For SnowFlake
This guide details the process for creating and exporting four Parquet files (columns.parquet, tables.parquet, views.parquet, and query-historys.parquet) from Snowflake.
Generate Input Data Files
tables.parquet
you can run following sql on snowflake console.
SELECT
table_catalog AS "table_catalog",
table_schema AS "table_schema",
table_name AS "table_name",
table_type AS "table_type",
is_transient AS "is_transient",
is_iceberg AS "is_iceberg",
is_dynamic AS "is_dynamic",
table_owner AS "table_owner",
row_count::BIGINT AS "row_count",
bytes::BIGINT AS "bytes",
auto_clustering_on AS "auto_clustering_on",
clustering_key AS "clustering_key",
comment AS "comment",
created AS "created",
last_altered AS "last_altered"
FROM
snowflake.account_usage.tables;
Corresponding parquet schema
message TableItem {
string table_catalog = 1;
string table_schema = 2;
string table_name = 3;
string table_type = 4;
string is_transient = 5;
string is_iceberg = 6;
string is_dynamic = 7;
string table_owner = 8;
int64 row_count = 9;
int64 bytes = 10;
string auto_clustering_on = 11;
string clustering_key = 12;
string comment = 13;
int64 created = 14;
int64 last_altered = 15;
}
columns.parquet
you can run following sql on snowflake console
SELECT
table_catalog AS "table_catalog",
table_schema AS "table_schema",
table_name AS "table_name",
column_name AS "column_name",
ordinal_position AS "ordinal_position",
is_nullable AS "is_nullable",
data_type AS "data_type" ,
numeric_precision AS "numeric_precision",
numeric_scale AS "numeric_scale",
datetime_precision AS "datetime_precision"
FROM snowflake.account_usage.columns;
Corresponding parquet schema
message schema {
string table_catalog = 1;
string table_schema = 2;
string table_name = 3;
string column_name = 4;
int32 ordinal_position = 5;
string is_nullable = 6;
string data_type = 7;
bytes numeric_precision = 8;
bytes numeric_scale = 9;
bytes datetime_precision = 10;
}
views.parquet
you can run following sql on snowflake console
SELECT
table_catalog AS "table_catalog",
table_schema AS "table_schema",
table_name AS "table_name",
view_definition AS "view_definition",
table_owner AS "table_owner"
FROM
snowflake.account_usage.views;
Corresponding parquet schema
message schema {
string table_catalog = 1;
string table_schema = 2;
string table_name = 3;
string view_definition = 4;
string table_owner = 5;
}
query-historys.parquet
you can run following sql on snowflake console
SELECT
query_id AS "query_id",
query_text AS "query",
user_name AS "query_user",
'' as "email",
total_elapsed_time AS "total_elapsed_time",
END_TIME::TIMESTAMP_NTZ "updated_at",
START_TIME::TIMESTAMP_NTZ as "start_time",
END_TIME::TIMESTAMP_NTZ "end_time",
bytes_scanned::BIGINT AS "processed_bytes",
'' as "source_tables",
'' AS "destination_table",
database_name AS "default_database",
schema_name AS "default_schema",
query_type AS "statement_type",
ROWS_PRODUCED::BIGINT as "produced_rows",
EXECUTION_TIME::BIGINT "execution_time",
warehouse_size AS "warehouse_size",
warehouse_id AS "warehouse_id",
warehouse_name AS "warehouse_name",
execution_status AS "execution_status",
query_tag AS "query_tag",
0.0 AS "query_credit_cost", -- pls input query cost from your system
CLUSTER_NUMBER::BIGINT AS "cluster_number"
FROM snowflake.account_usage.query_history
Corresponding parquet schema
message schema {
string query_id = 1;
string query = 2;
string query_user = 3;
string email = 4;
int64 total_elapsed_time = 5;
int64 updated_at = 6;
int64 start_time = 7;
int64 end_time = 8;
int64 processed_bytes = 9;
string source_tables = 10;
string destination_table = 11;
string default_database = 12;
string default_schema = 13;
string statement_type = 14;
int64 produced_rows = 15;
int64 execution_time = 16;
string warehouse_size = 17;
string warehouse_id = 18;
string warehouse_name = 19;
string execution_status = 20;
string query_tag = 21;
double query_credit_cost = 22;
int64 cluster_number = 23;
}
Updated about 20 hours ago
What’s Next