Skip to content

Instantly share code, notes, and snippets.

@rdkls
Created December 18, 2024 04:03
Show Gist options
  • Save rdkls/b3dcbd5c725b50275f8df73a73f29baa to your computer and use it in GitHub Desktop.
Save rdkls/b3dcbd5c725b50275f8df73a73f29baa to your computer and use it in GitHub Desktop.
tgw flow logs athena table create, parquet format, hive partitions
-- src https://docs.aws.amazon.com/athena/latest/ug/vpc-flow-logs-parquet.html
CREATE EXTERNAL TABLE IF NOT EXISTS tgw_flow_logs (
version int,
account_id string,
interface_id string,
srcaddr string,
dstaddr string,
srcport int,
dstport int,
protocol bigint,
packets bigint,
bytes bigint,
start bigint,
`end` bigint,
action string,
log_status string,
vpc_id string,
subnet_id string,
instance_id string,
tcp_flags int,
type string,
pkt_srcaddr string,
pkt_dstaddr string,
region string,
az_id string,
sublocation_type string,
sublocation_id string,
pkt_src_aws_service string,
pkt_dst_aws_service string,
flow_direction string,
traffic_path int
)
PARTITIONED BY (
`aws-account-id` string,
`aws-service` string,
`aws-region` string,
`year` string,
`month` string,
`day` string,
`hour` string
)
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
LOCATION
's3://amzn-s3-demo-bucket/prefix/AWSLogs/'
TBLPROPERTIES (
'EXTERNAL'='true',
'skip.header.line.count'='1'
)
;
msck repair table tgw_flow_logs
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment