For SnowFlake

This guide details the process for creating and exporting four Parquet files (columns.parquet, tables.parquet, views.parquet, and query-historys.parquet) from Snowflake.

Generate Input Data Files

tables.parquet

you can run following sql on snowflake console.

SELECT
  table_catalog AS "table_catalog",
  table_schema AS "table_schema",
  table_name AS "table_name",
  table_type AS "table_type",
  is_transient AS "is_transient",
  is_iceberg AS "is_iceberg",
  is_dynamic AS "is_dynamic",
  table_owner AS "table_owner",
  row_count::BIGINT AS "row_count",
  bytes::BIGINT AS "bytes",
  auto_clustering_on AS "auto_clustering_on",
  clustering_key AS "clustering_key",
  comment AS "comment",
  created AS "created",
  last_altered AS "last_altered"
FROM
  snowflake.account_usage.tables;

Corresponding parquet schema

message TableItem {
  string table_catalog = 1;
  string table_schema = 2;
  string table_name = 3;
  string table_type = 4;
  string is_transient = 5;
  string is_iceberg = 6;
  string is_dynamic = 7;
  string table_owner = 8;
  int64 row_count = 9;
  int64 bytes = 10;
  string auto_clustering_on = 11;
  string clustering_key = 12;
  string comment = 13;
  int64 created = 14;
  int64 last_altered = 15;
}

columns.parquet

you can run following sql on snowflake console

SELECT 
    table_catalog AS "table_catalog",
    table_schema AS "table_schema",
    table_name AS "table_name",
    column_name AS "column_name",
    ordinal_position AS "ordinal_position",
    is_nullable AS "is_nullable",
    data_type AS "data_type" ,
    numeric_precision AS "numeric_precision",
    numeric_scale AS "numeric_scale",
    datetime_precision AS "datetime_precision"
FROM snowflake.account_usage.columns;

Corresponding parquet schema

message schema {
  string table_catalog = 1;
  string table_schema = 2;
  string table_name = 3;
  string column_name = 4;
  int32 ordinal_position = 5;
  string is_nullable = 6;
  string data_type = 7;
  bytes numeric_precision = 8;
  bytes numeric_scale = 9;
  bytes datetime_precision = 10;
}

views.parquet

you can run following sql on snowflake console

SELECT
    table_catalog AS "table_catalog",
    table_schema AS "table_schema",
    table_name AS "table_name",
    view_definition AS "view_definition",
    table_owner AS "table_owner"
FROM 
    snowflake.account_usage.views;

Corresponding parquet schema

message schema {
  string table_catalog = 1;
  string table_schema = 2;
  string table_name = 3;
  string view_definition = 4;
  string table_owner = 5;
}

query-historys.parquet

you can run following sql on snowflake console

  SELECT 
    query_id AS "query_id", 
    query_text AS "query", 
    user_name AS "query_user", 
    '' as "email", 
    total_elapsed_time AS "total_elapsed_time",
    END_TIME::TIMESTAMP_NTZ "updated_at", 
    START_TIME::TIMESTAMP_NTZ as "start_time", 
    END_TIME::TIMESTAMP_NTZ "end_time",
    bytes_scanned::BIGINT AS "processed_bytes", 
    '' as "source_tables",
    '' AS "destination_table", 
    database_name AS "default_database",
    schema_name AS "default_schema",
    query_type AS "statement_type", 
    ROWS_PRODUCED::BIGINT as "produced_rows",
    EXECUTION_TIME::BIGINT "execution_time", 
    warehouse_size AS "warehouse_size", 
    warehouse_id AS "warehouse_id", 
    warehouse_name AS "warehouse_name", 
    execution_status AS "execution_status", 
    query_tag AS "query_tag",
    0.0 AS "query_credit_cost", -- pls input query cost from your system
    CLUSTER_NUMBER::BIGINT AS "cluster_number" 
  FROM snowflake.account_usage.query_history

Corresponding parquet schema

message schema {
  string query_id = 1;
  string query = 2;
  string query_user = 3;
  string email = 4;
  int64 total_elapsed_time = 5;
  int64 updated_at = 6;
  int64 start_time = 7;
  int64 end_time = 8;
  int64 processed_bytes = 9;
  string source_tables = 10;
  string destination_table = 11;
  string default_database = 12;
  string default_schema = 13;
  string statement_type = 14;
  int64 produced_rows = 15;
  int64 execution_time = 16;
  string warehouse_size = 17;
  string warehouse_id = 18;
  string warehouse_name = 19;
  string execution_status = 20;
  string query_tag = 21;
  double query_credit_cost = 22;
  int64 cluster_number = 23;
}