For Trino and Presto

This guide details the process for creating and exporting 3 Parquet files (tables.parquet, columns.parquet, views.parquet) from the storage and query log from Trino/Presto query engine.

Generate Input Data Files

tables.parquet

message TableItem {
  string table_catalog = 1;
  string table_schema = 2;
  string table_name = 3;
  string table_owner = 4;
  int64 row_count = 5;
  int64 bytes = 6;
  string comment = 7;
  int64 created = 8;
  int64 last_altered = 9;
}

columns.parquet

message schema {
  string table_catalog = 1;
  string table_schema = 2;
  string table_name = 3;
  string column_name = 4;
  int32 ordinal_position = 5;
  string is_nullable = 6;
  string data_type = 7;
  bytes numeric_precision = 8;
  bytes numeric_scale = 9;
  bytes datetime_precision = 10;
}

views.parquet

message schema {
  string table_catalog = 1;
  string table_schema = 2;
  string table_name = 3;
  string view_definition = 4;
  string table_owner = 5;
}

query-historys.json

After you enable the logging for Trino/Presto queries, the below is a sample query history record:

{  
  "instant": {  
    "epochSecond": 1649685758,  
    "nanoOfSecond": 64613000  
  },  
  "thread": "dispatcher-query-0",  
  "level": "INFO",  
  "loggerName": "com.github.trino.querylog.QueryLogListener",  
  "message": {  
    "createTime": {  
      "nano": 39000000,  
      "epochSecond": 1649685758  
    },  
    "context": {  
      "user": "admin",  
      "principal": {  
        "empty": false,  
        "present": true  
      },  
      "traceToken": {  
        "empty": true,  
        "present": false  
      },  
      "remoteClientAddress": {  
        "empty": false,  
        "present": true  
      },  
      "userAgent": {  
        "empty": false,  
        "present": true  
      },  
      "clientInfo": {  
        "empty": true,  
        "present": false  
      },  
      "source": {  
        "empty": true,  
        "present": false  
      },  
      "catalog": {  
        "empty": true,  
        "present": false  
      },  
      "schema": {  
        "empty": true,  
        "present": false  
      },  
      "resourceGroupId": {  
        "empty": false,  
        "present": true  
      },  
      "resourceEstimates": {  
        "executionTime": {  
          "empty": true,  
          "present": false  
        },  
        "cpuTime": {  
          "empty": true,  
          "present": false  
        },  
        "peakMemoryBytes": {  
          "empty": true,  
          "present": false  
        }  
      },  
      "serverAddress": "192.168.56.2",  
      "serverVersion": "355",  
      "environment": "demo",  
      "queryType": {  
        "empty": false,  
        "present": true  
      }  
    },  
    "metadata": {  
      "queryId": "20220411_140237_00000_ai6km",  
      "transactionId": {  
        "empty": false,  
        "present": true  
      },  
      "query": "select \* from hive.dw1.flight where count in (10,264)",  
      "updateType": {  
        "empty": true,  
        "present": false  
      },  
      "preparedQuery": {  
        "empty": true,  
        "present": false  
      },  
      "queryState": "QUEUED",  
      "uri": "<http://192.168.56.2:8080/v1/query/20220411_140237_00000_ai6km">,  
      "plan": {  
        "empty": true,  
        "present": false  
      },  
      "payload": {  
        "empty": true,  
        "present": false  
      }  
    }  
  },  
  "endOfBatch": false,  
  "loggerFqcn": "org.apache.logging.log4j.spi.AbstractLogger",  
  "threadId": 141,  
  "threadPriority": 5  
}