Skip to content

Instantly share code, notes, and snippets.

@dalelane
Created November 19, 2024 19:47
Show Gist options
  • Save dalelane/9e9feccd0306bf1c8aa04b912eafcd24 to your computer and use it in GitHub Desktop.
Save dalelane/9e9feccd0306bf1c8aa04b912eafcd24 to your computer and use it in GitHub Desktop.
Compare activity on social network platforms
CREATE FUNCTION TO_TIMESTAMP_UDF AS 'com.ibm.ei.streamproc.udf.ToTimestampUdf';
CREATE FUNCTION TO_TIMESTAMP_LTZ_UDF AS 'com.ibm.ei.streamproc.udf.ToTimestampLtzUdf';
-- ----------------------------------------------------------
-- xbox posts on Bluesky
-- ----------------------------------------------------------
CREATE TABLE `"xbox" on Bluesky___TABLE`
(
`id` ROW<`uri` STRING, `cid` STRING>,
`text` STRING,
`langs` ARRAY<STRING>,
`createdAt` BIGINT,
`author` ROW<`handle` STRING, `displayName` STRING, `avatar` STRING>,
`event_time` TIMESTAMP(3) METADATA FROM 'timestamp',
WATERMARK FOR `event_time` AS `event_time` - INTERVAL '0' MINUTE
)
WITH (
'connector' = 'kafka',
'topic' = 'BLUESKY.XBOX',
'properties.bootstrap.servers' = 'my-kafka-cluster-kafka-bootstrap.event-automation.svc:9095',
'format' = 'json',
'scan.startup.mode' = 'earliest-offset',
...
);
CREATE TEMPORARY VIEW `"xbox" on Bluesky` AS
SELECT
`id` AS `id`,
`text` AS `text`,
`langs` AS `langs`,
TO_TIMESTAMP_LTZ(`createdAt`, 3) AS `createdAt`,
`author` AS `author`,
`event_time` AS `event_time`
FROM
`"xbox" on Bluesky___TABLE`;
-- ----------------------------------------------------------
-- xbox posts on Mastodon
-- ----------------------------------------------------------
CREATE TABLE `"#xbox" on Mastodon`
(
`id` STRING,
`uri` STRING,
`content` STRING,
`isSensitive` BOOLEAN,
`language` STRING,
`createdAt` STRING,
`account` ROW<`username` STRING, `displayName` STRING, `url` STRING, `note` STRING, `avatar` STRING, `avatarStatic` STRING, `bot` BOOLEAN>,
`application` ROW<`name` STRING, `website` STRING>,
`event_time` TIMESTAMP(3) METADATA FROM 'timestamp',
WATERMARK FOR `event_time` AS `event_time` - INTERVAL '0' MINUTE
)
WITH (
'connector' = 'kafka',
'topic' = 'MASTODON.XBOX',
'properties.bootstrap.servers' = 'my-kafka-cluster-kafka-bootstrap.event-automation.svc:9095',
'format' = 'json',
'scan.startup.mode' = 'earliest-offset',
...
);
-- ----------------------------------------------------------
-- netflix posts on Bluesky
-- ----------------------------------------------------------
CREATE TABLE `"netflix" on Bluesky___TABLE`
(
`id` ROW<`uri` STRING, `cid` STRING>,
`text` STRING,
`langs` ARRAY<STRING>,
`createdAt` BIGINT,
`author` ROW<`handle` STRING, `displayName` STRING, `avatar` STRING>,
`event_time` TIMESTAMP(3) METADATA FROM 'timestamp',
WATERMARK FOR `event_time` AS `event_time` - INTERVAL '0' MINUTE
)
WITH (
'connector' = 'kafka',
'topic' = 'BLUESKY.NETFLIX',
'properties.bootstrap.servers' = 'my-kafka-cluster-kafka-bootstrap.event-automation.svc:9095',
'format' = 'json',
'scan.startup.mode' = 'earliest-offset',
...
);
CREATE TEMPORARY VIEW `"netflix" on Bluesky` AS
SELECT
`id` AS `id`,
`text` AS `text`,
`langs` AS `langs`,
TO_TIMESTAMP_LTZ(`createdAt`, 3) AS `createdAt`,
`author` AS `author`,
`event_time` AS `event_time`
FROM
`"netflix" on Bluesky___TABLE`;
-- ----------------------------------------------------------
-- netflix posts on Mastodon
-- ----------------------------------------------------------
CREATE TABLE `"#netflix" on Mastodon`
(
`id` STRING,
`uri` STRING,
`content` STRING,
`isSensitive` BOOLEAN,
`language` STRING,
`createdAt` STRING,
`account` ROW<`username` STRING, `displayName` STRING, `url` STRING, `note` STRING, `avatar` STRING, `avatarStatic` STRING, `bot` BOOLEAN>,
`application` ROW<`name` STRING, `website` STRING>,
`event_time` TIMESTAMP(3) METADATA FROM 'timestamp',
WATERMARK FOR `event_time` AS `event_time` - INTERVAL '0' MINUTE
)
WITH (
'connector' = 'kafka',
'topic' = 'MASTODON.NETFLIX',
'properties.bootstrap.servers' = 'my-kafka-cluster-kafka-bootstrap.event-automation.svc:9095',
'format' = 'json',
'scan.startup.mode' = 'earliest-offset',
...
);
-- ----------------------------------------------------------
-- count the number of posts per hour in each network
-- ----------------------------------------------------------
CREATE TEMPORARY VIEW `count xbox on Bluesky` AS
SELECT
`number of posts`,
`start`,
`end`,
`result`
FROM (
SELECT
COUNT(`id`.`uri`) AS `number of posts`,
`window_start` AS `start`,
`window_end` AS `end`,
`window_time` AS `result`
FROM TABLE (
TUMBLE( TABLE `"xbox" on Bluesky`, DESCRIPTOR(`event_time`), INTERVAL '1' HOUR )
)
GROUP BY
`window_start`,
`window_end`,
`window_time`
);
CREATE TEMPORARY VIEW `count xbox on Mastodon` AS
SELECT
`number of posts`,
`start`,
`end`,
`result`
FROM (
SELECT
COUNT(`id`) AS `number of posts`,
`window_start` AS `start`,
`window_end` AS `end`,
`window_time` AS `result`
FROM TABLE (
TUMBLE( TABLE `"#xbox" on Mastodon`, DESCRIPTOR(`event_time`), INTERVAL '1' HOUR )
)
GROUP BY
`window_start`,
`window_end`,
`window_time`
);
CREATE TEMPORARY VIEW `count netflix on Bluesky` AS
SELECT
`number of posts`,
`start`,
`end`,
`result`
FROM (
SELECT
COUNT(`id`.`uri`) AS `number of posts`,
`window_start` AS `start`,
`window_end` AS `end`,
`window_time` AS `result`
FROM TABLE (
TUMBLE( TABLE `"netflix" on Bluesky`, DESCRIPTOR(`event_time`), INTERVAL '1' HOUR )
)
GROUP BY
`window_start`,
`window_end`,
`window_time`
);
CREATE TEMPORARY VIEW `count netflix on Mastodon` AS
SELECT
`number of posts`,
`start`,
`end`,
`result`
FROM (
SELECT
COUNT(`id`) AS `number of posts`,
`window_start` AS `start`,
`window_end` AS `end`,
`window_time` AS `result`
FROM TABLE (
TUMBLE( TABLE `"#netflix" on Mastodon`, DESCRIPTOR(`event_time`), INTERVAL '1' HOUR )
)
GROUP BY
`window_start`,
`window_end`,
`window_time`
);
-- ----------------------------------------------------------
-- combine all of the counts
-- ----------------------------------------------------------
SELECT
`count xbox on Bluesky`.`start` AS `hour`,
`count xbox on Bluesky`.`number of posts` AS `bluesky-xbox`,
`count netflix on Bluesky`.`number of posts` AS `bluesky-netflix`,
`count xbox on Mastodon`.`number of posts` AS `mastodon-xbox`,
`count netflix on Mastodon`.`number of posts` AS `mastodon-netflix`
FROM
`count xbox on Bluesky`
JOIN
`count xbox on Mastodon`
ON
`count xbox on Bluesky`.`start` = `count xbox on Mastodon`.`start` AND
`count xbox on Bluesky`.`end` = `count xbox on Mastodon`.`end`
JOIN
`count netflix on Bluesky`
ON
`count xbox on Bluesky`.`start` = `count netflix on Bluesky`.`start` AND
`count xbox on Bluesky`.`end` = `count netflix on Bluesky`.`end`
JOIN
`count netflix on Mastodon`
ON
`count xbox on Bluesky`.`start` = `count netflix on Mastodon`.`start` AND
`count xbox on Bluesky`.`end` = `count netflix on Mastodon`.`end`;
-- ----------------------------------------------------------
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment