Created
December 18, 2024 04:03
-
-
Save rdkls/b3dcbd5c725b50275f8df73a73f29baa to your computer and use it in GitHub Desktop.
tgw flow logs athena table create, parquet format, hive partitions
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- src https://docs.aws.amazon.com/athena/latest/ug/vpc-flow-logs-parquet.html | |
CREATE EXTERNAL TABLE IF NOT EXISTS tgw_flow_logs ( | |
version int, | |
account_id string, | |
interface_id string, | |
srcaddr string, | |
dstaddr string, | |
srcport int, | |
dstport int, | |
protocol bigint, | |
packets bigint, | |
bytes bigint, | |
start bigint, | |
`end` bigint, | |
action string, | |
log_status string, | |
vpc_id string, | |
subnet_id string, | |
instance_id string, | |
tcp_flags int, | |
type string, | |
pkt_srcaddr string, | |
pkt_dstaddr string, | |
region string, | |
az_id string, | |
sublocation_type string, | |
sublocation_id string, | |
pkt_src_aws_service string, | |
pkt_dst_aws_service string, | |
flow_direction string, | |
traffic_path int | |
) | |
PARTITIONED BY ( | |
`aws-account-id` string, | |
`aws-service` string, | |
`aws-region` string, | |
`year` string, | |
`month` string, | |
`day` string, | |
`hour` string | |
) | |
ROW FORMAT SERDE | |
'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' | |
STORED AS INPUTFORMAT | |
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' | |
OUTPUTFORMAT | |
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' | |
LOCATION | |
's3://amzn-s3-demo-bucket/prefix/AWSLogs/' | |
TBLPROPERTIES ( | |
'EXTERNAL'='true', | |
'skip.header.line.count'='1' | |
) | |
; | |
msck repair table tgw_flow_logs |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment