For Trino and Presto
This guide details the process for creating and exporting 3 Parquet files (tables.parquet, columns.parquet, views.parquet) from the storage and query log from Trino/Presto query engine.
Generate Input Data Files
tables.parquet
message TableItem {
string table_catalog = 1;
string table_schema = 2;
string table_name = 3;
string table_owner = 4;
int64 row_count = 5;
int64 bytes = 6;
string comment = 7;
int64 created = 8;
int64 last_altered = 9;
}
columns.parquet
message schema {
string table_catalog = 1;
string table_schema = 2;
string table_name = 3;
string column_name = 4;
int32 ordinal_position = 5;
string is_nullable = 6;
string data_type = 7;
bytes numeric_precision = 8;
bytes numeric_scale = 9;
bytes datetime_precision = 10;
}
views.parquet
message schema {
string table_catalog = 1;
string table_schema = 2;
string table_name = 3;
string view_definition = 4;
string table_owner = 5;
}
query-historys.json
After you enable the logging for Trino/Presto queries, the below is a sample query history record:
{
"instant": {
"epochSecond": 1649685758,
"nanoOfSecond": 64613000
},
"thread": "dispatcher-query-0",
"level": "INFO",
"loggerName": "com.github.trino.querylog.QueryLogListener",
"message": {
"createTime": {
"nano": 39000000,
"epochSecond": 1649685758
},
"context": {
"user": "admin",
"principal": {
"empty": false,
"present": true
},
"traceToken": {
"empty": true,
"present": false
},
"remoteClientAddress": {
"empty": false,
"present": true
},
"userAgent": {
"empty": false,
"present": true
},
"clientInfo": {
"empty": true,
"present": false
},
"source": {
"empty": true,
"present": false
},
"catalog": {
"empty": true,
"present": false
},
"schema": {
"empty": true,
"present": false
},
"resourceGroupId": {
"empty": false,
"present": true
},
"resourceEstimates": {
"executionTime": {
"empty": true,
"present": false
},
"cpuTime": {
"empty": true,
"present": false
},
"peakMemoryBytes": {
"empty": true,
"present": false
}
},
"serverAddress": "192.168.56.2",
"serverVersion": "355",
"environment": "demo",
"queryType": {
"empty": false,
"present": true
}
},
"metadata": {
"queryId": "20220411_140237_00000_ai6km",
"transactionId": {
"empty": false,
"present": true
},
"query": "select \* from hive.dw1.flight where count in (10,264)",
"updateType": {
"empty": true,
"present": false
},
"preparedQuery": {
"empty": true,
"present": false
},
"queryState": "QUEUED",
"uri": "<http://192.168.56.2:8080/v1/query/20220411_140237_00000_ai6km">,
"plan": {
"empty": true,
"present": false
},
"payload": {
"empty": true,
"present": false
}
}
},
"endOfBatch": false,
"loggerFqcn": "org.apache.logging.log4j.spi.AbstractLogger",
"threadId": 141,
"threadPriority": 5
}
Updated about 20 hours ago
What’s Next