Instantly share code, notes, and snippets.
Created
November 19, 2024 19:47
-
Star
(0)
0
You must be signed in to star a gist -
Fork
(0)
0
You must be signed in to fork a gist
-
Save dalelane/9e9feccd0306bf1c8aa04b912eafcd24 to your computer and use it in GitHub Desktop.
Compare activity on social network platforms
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CREATE FUNCTION TO_TIMESTAMP_UDF AS 'com.ibm.ei.streamproc.udf.ToTimestampUdf'; | |
CREATE FUNCTION TO_TIMESTAMP_LTZ_UDF AS 'com.ibm.ei.streamproc.udf.ToTimestampLtzUdf'; | |
-- ---------------------------------------------------------- | |
-- xbox posts on Bluesky | |
-- ---------------------------------------------------------- | |
CREATE TABLE `"xbox" on Bluesky___TABLE` | |
( | |
`id` ROW<`uri` STRING, `cid` STRING>, | |
`text` STRING, | |
`langs` ARRAY<STRING>, | |
`createdAt` BIGINT, | |
`author` ROW<`handle` STRING, `displayName` STRING, `avatar` STRING>, | |
`event_time` TIMESTAMP(3) METADATA FROM 'timestamp', | |
WATERMARK FOR `event_time` AS `event_time` - INTERVAL '0' MINUTE | |
) | |
WITH ( | |
'connector' = 'kafka', | |
'topic' = 'BLUESKY.XBOX', | |
'properties.bootstrap.servers' = 'my-kafka-cluster-kafka-bootstrap.event-automation.svc:9095', | |
'format' = 'json', | |
'scan.startup.mode' = 'earliest-offset', | |
... | |
); | |
CREATE TEMPORARY VIEW `"xbox" on Bluesky` AS | |
SELECT | |
`id` AS `id`, | |
`text` AS `text`, | |
`langs` AS `langs`, | |
TO_TIMESTAMP_LTZ(`createdAt`, 3) AS `createdAt`, | |
`author` AS `author`, | |
`event_time` AS `event_time` | |
FROM | |
`"xbox" on Bluesky___TABLE`; | |
-- ---------------------------------------------------------- | |
-- xbox posts on Mastodon | |
-- ---------------------------------------------------------- | |
CREATE TABLE `"#xbox" on Mastodon` | |
( | |
`id` STRING, | |
`uri` STRING, | |
`content` STRING, | |
`isSensitive` BOOLEAN, | |
`language` STRING, | |
`createdAt` STRING, | |
`account` ROW<`username` STRING, `displayName` STRING, `url` STRING, `note` STRING, `avatar` STRING, `avatarStatic` STRING, `bot` BOOLEAN>, | |
`application` ROW<`name` STRING, `website` STRING>, | |
`event_time` TIMESTAMP(3) METADATA FROM 'timestamp', | |
WATERMARK FOR `event_time` AS `event_time` - INTERVAL '0' MINUTE | |
) | |
WITH ( | |
'connector' = 'kafka', | |
'topic' = 'MASTODON.XBOX', | |
'properties.bootstrap.servers' = 'my-kafka-cluster-kafka-bootstrap.event-automation.svc:9095', | |
'format' = 'json', | |
'scan.startup.mode' = 'earliest-offset', | |
... | |
); | |
-- ---------------------------------------------------------- | |
-- netflix posts on Bluesky | |
-- ---------------------------------------------------------- | |
CREATE TABLE `"netflix" on Bluesky___TABLE` | |
( | |
`id` ROW<`uri` STRING, `cid` STRING>, | |
`text` STRING, | |
`langs` ARRAY<STRING>, | |
`createdAt` BIGINT, | |
`author` ROW<`handle` STRING, `displayName` STRING, `avatar` STRING>, | |
`event_time` TIMESTAMP(3) METADATA FROM 'timestamp', | |
WATERMARK FOR `event_time` AS `event_time` - INTERVAL '0' MINUTE | |
) | |
WITH ( | |
'connector' = 'kafka', | |
'topic' = 'BLUESKY.NETFLIX', | |
'properties.bootstrap.servers' = 'my-kafka-cluster-kafka-bootstrap.event-automation.svc:9095', | |
'format' = 'json', | |
'scan.startup.mode' = 'earliest-offset', | |
... | |
); | |
CREATE TEMPORARY VIEW `"netflix" on Bluesky` AS | |
SELECT | |
`id` AS `id`, | |
`text` AS `text`, | |
`langs` AS `langs`, | |
TO_TIMESTAMP_LTZ(`createdAt`, 3) AS `createdAt`, | |
`author` AS `author`, | |
`event_time` AS `event_time` | |
FROM | |
`"netflix" on Bluesky___TABLE`; | |
-- ---------------------------------------------------------- | |
-- netflix posts on Mastodon | |
-- ---------------------------------------------------------- | |
CREATE TABLE `"#netflix" on Mastodon` | |
( | |
`id` STRING, | |
`uri` STRING, | |
`content` STRING, | |
`isSensitive` BOOLEAN, | |
`language` STRING, | |
`createdAt` STRING, | |
`account` ROW<`username` STRING, `displayName` STRING, `url` STRING, `note` STRING, `avatar` STRING, `avatarStatic` STRING, `bot` BOOLEAN>, | |
`application` ROW<`name` STRING, `website` STRING>, | |
`event_time` TIMESTAMP(3) METADATA FROM 'timestamp', | |
WATERMARK FOR `event_time` AS `event_time` - INTERVAL '0' MINUTE | |
) | |
WITH ( | |
'connector' = 'kafka', | |
'topic' = 'MASTODON.NETFLIX', | |
'properties.bootstrap.servers' = 'my-kafka-cluster-kafka-bootstrap.event-automation.svc:9095', | |
'format' = 'json', | |
'scan.startup.mode' = 'earliest-offset', | |
... | |
); | |
-- ---------------------------------------------------------- | |
-- count the number of posts per hour in each network | |
-- ---------------------------------------------------------- | |
CREATE TEMPORARY VIEW `count xbox on Bluesky` AS | |
SELECT | |
`number of posts`, | |
`start`, | |
`end`, | |
`result` | |
FROM ( | |
SELECT | |
COUNT(`id`.`uri`) AS `number of posts`, | |
`window_start` AS `start`, | |
`window_end` AS `end`, | |
`window_time` AS `result` | |
FROM TABLE ( | |
TUMBLE( TABLE `"xbox" on Bluesky`, DESCRIPTOR(`event_time`), INTERVAL '1' HOUR ) | |
) | |
GROUP BY | |
`window_start`, | |
`window_end`, | |
`window_time` | |
); | |
CREATE TEMPORARY VIEW `count xbox on Mastodon` AS | |
SELECT | |
`number of posts`, | |
`start`, | |
`end`, | |
`result` | |
FROM ( | |
SELECT | |
COUNT(`id`) AS `number of posts`, | |
`window_start` AS `start`, | |
`window_end` AS `end`, | |
`window_time` AS `result` | |
FROM TABLE ( | |
TUMBLE( TABLE `"#xbox" on Mastodon`, DESCRIPTOR(`event_time`), INTERVAL '1' HOUR ) | |
) | |
GROUP BY | |
`window_start`, | |
`window_end`, | |
`window_time` | |
); | |
CREATE TEMPORARY VIEW `count netflix on Bluesky` AS | |
SELECT | |
`number of posts`, | |
`start`, | |
`end`, | |
`result` | |
FROM ( | |
SELECT | |
COUNT(`id`.`uri`) AS `number of posts`, | |
`window_start` AS `start`, | |
`window_end` AS `end`, | |
`window_time` AS `result` | |
FROM TABLE ( | |
TUMBLE( TABLE `"netflix" on Bluesky`, DESCRIPTOR(`event_time`), INTERVAL '1' HOUR ) | |
) | |
GROUP BY | |
`window_start`, | |
`window_end`, | |
`window_time` | |
); | |
CREATE TEMPORARY VIEW `count netflix on Mastodon` AS | |
SELECT | |
`number of posts`, | |
`start`, | |
`end`, | |
`result` | |
FROM ( | |
SELECT | |
COUNT(`id`) AS `number of posts`, | |
`window_start` AS `start`, | |
`window_end` AS `end`, | |
`window_time` AS `result` | |
FROM TABLE ( | |
TUMBLE( TABLE `"#netflix" on Mastodon`, DESCRIPTOR(`event_time`), INTERVAL '1' HOUR ) | |
) | |
GROUP BY | |
`window_start`, | |
`window_end`, | |
`window_time` | |
); | |
-- ---------------------------------------------------------- | |
-- combine all of the counts | |
-- ---------------------------------------------------------- | |
SELECT | |
`count xbox on Bluesky`.`start` AS `hour`, | |
`count xbox on Bluesky`.`number of posts` AS `bluesky-xbox`, | |
`count netflix on Bluesky`.`number of posts` AS `bluesky-netflix`, | |
`count xbox on Mastodon`.`number of posts` AS `mastodon-xbox`, | |
`count netflix on Mastodon`.`number of posts` AS `mastodon-netflix` | |
FROM | |
`count xbox on Bluesky` | |
JOIN | |
`count xbox on Mastodon` | |
ON | |
`count xbox on Bluesky`.`start` = `count xbox on Mastodon`.`start` AND | |
`count xbox on Bluesky`.`end` = `count xbox on Mastodon`.`end` | |
JOIN | |
`count netflix on Bluesky` | |
ON | |
`count xbox on Bluesky`.`start` = `count netflix on Bluesky`.`start` AND | |
`count xbox on Bluesky`.`end` = `count netflix on Bluesky`.`end` | |
JOIN | |
`count netflix on Mastodon` | |
ON | |
`count xbox on Bluesky`.`start` = `count netflix on Mastodon`.`start` AND | |
`count xbox on Bluesky`.`end` = `count netflix on Mastodon`.`end`; | |
-- ---------------------------------------------------------- |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment