Skip to content

Instantly share code, notes, and snippets.

@thinhha
Created August 11, 2020 11:14
Show Gist options
  • Save thinhha/d4bc7e4ec735408aa16ec38c51aa9cdf to your computer and use it in GitHub Desktop.
Save thinhha/d4bc7e4ec735408aa16ec38c51aa9cdf to your computer and use it in GitHub Desktop.
Merkle Tree Calculations in BigQuerySQL based on https://datprotocol.github.io/book/ch01-02-merkle-tree.html
CREATE OR REPLACE FUNCTION udf_sql.uint64be(x INT64) RETURNS BYTES AS (
CAST((SELECT STRING_AGG(FORMAT("%x",x >> bit & 0x1), "" ORDER BY bit DESC)
FROM UNNEST(GENERATE_ARRAY(0, 63)) AS bit) AS BYTES)
)
;
CREATE OR REPLACE FUNCTION udf_sql.custom_hash(input BYTES, is_leaf BOOLEAN, depth INT64) RETURNS BYTES AS (
SHA256( CASE WHEN is_leaf THEN b'0x00' ELSE b'0x01' END || udf_sql.uint64be(depth) || input )
)
;
WITH
data AS (
SELECT
(ROW_NUMBER() OVER (ORDER BY row_num)-1)*2 AS index,
udf_sql.custom_hash(udf_sql.uint64be(depth_1), TRUE, 2) AS row_hash
FROM
merkle.flat_tree -- replace with input data
),
d2 AS (
SELECT
2 AS depth,
depth_2 AS index,
ARRAY_LENGTH(ARRAY_AGG(t.depth_1)) num_elements,
udf_sql.custom_hash(STRING_AGG(row_hash ORDER BY d.index), FALSE, ARRAY_LENGTH(ARRAY_AGG(t.depth_1))) row_hash
FROM
data d
LEFT JOIN
merkle.flat_tree t
ON
d.index = t.depth_1
GROUP BY
depth_2 ),
d3 AS (
SELECT
3 AS depth,
depth_3 AS index,
ARRAY_LENGTH(ARRAY_AGG(t.depth_1)) num_elements,
udf_sql.custom_hash(STRING_AGG(row_hash ORDER BY d.index), FALSE, ARRAY_LENGTH(ARRAY_AGG(t.depth_1))) row_hash
FROM
d2 d
LEFT JOIN
merkle.flat_tree t
ON
d.index = t.depth_2
GROUP BY
depth_3 ),
d4 AS (
SELECT
4 AS depth,
depth_4 AS index,
ARRAY_LENGTH(ARRAY_AGG(t.depth_1)) num_elements,
udf_sql.custom_hash(STRING_AGG(row_hash ORDER BY d.index), FALSE, ARRAY_LENGTH(ARRAY_AGG(t.depth_1))) row_hash
FROM
d3 d
LEFT JOIN
merkle.flat_tree t
ON
d.index = t.depth_3
GROUP BY
depth_4 ),
d5 AS (
SELECT
5 AS depth,
depth_5 AS index,
ARRAY_LENGTH(ARRAY_AGG(t.depth_1)) num_elements,
udf_sql.custom_hash(STRING_AGG(row_hash ORDER BY d.index), FALSE, ARRAY_LENGTH(ARRAY_AGG(t.depth_1))) row_hash
FROM
d4 d
LEFT JOIN
merkle.flat_tree t
ON
d.index = t.depth_4
GROUP BY
depth_5 ),
d6 AS (
SELECT
6 AS depth,
depth_6 AS index,
ARRAY_LENGTH(ARRAY_AGG(t.depth_1)) num_elements,
udf_sql.custom_hash(STRING_AGG(row_hash ORDER BY d.index), FALSE, ARRAY_LENGTH(ARRAY_AGG(t.depth_1))) row_hash
FROM
d5 d
LEFT JOIN
merkle.flat_tree t
ON
d.index = t.depth_5
GROUP BY
depth_6 ),
d7 AS (
SELECT
7 AS depth,
depth_7 AS index,
ARRAY_LENGTH(ARRAY_AGG(t.depth_1)) num_elements,
udf_sql.custom_hash(STRING_AGG(row_hash ORDER BY d.index), FALSE, ARRAY_LENGTH(ARRAY_AGG(t.depth_1))) row_hash
FROM
d6 d
LEFT JOIN
merkle.flat_tree t
ON
d.index = t.depth_6
GROUP BY
depth_7 ),
d8 AS (
SELECT
8 AS depth,
depth_8 AS index,
ARRAY_LENGTH(ARRAY_AGG(t.depth_1)) num_elements,
udf_sql.custom_hash(STRING_AGG(row_hash ORDER BY d.index), FALSE, ARRAY_LENGTH(ARRAY_AGG(t.depth_1))) row_hash
FROM
d7 d
LEFT JOIN
merkle.flat_tree t
ON
d.index = t.depth_7
GROUP BY
depth_8 )
SELECT * FROM d2
UNION ALL
SELECT * FROM d3
UNION ALL
SELECT * FROM d4
UNION ALL
SELECT * FROM d5
UNION ALL
SELECT * FROM d6
UNION ALL
SELECT * FROM d7
UNION ALL
SELECT * FROM d8
;
DECLARE gen_until INT64 DEFAULT 256;
-- CREATE OR REPLACE TABLE merkle.flat_tree AS
SELECT
offset_1 AS row_num,
depth_1,
depth_2,
depth_3,
depth_4,
depth_5,
depth_6,
depth_7,
depth_8,
-- depth_9,
-- depth_10,
-- depth_11,
-- depth_12,
-- depth_13,
-- depth_14,
-- depth_15,
-- depth_16,
FROM
UNNEST(GENERATE_ARRAY(0,gen_until-1,2)) AS depth_1
WITH
OFFSET
offset_1
INNER JOIN
UNNEST(ARRAY(
SELECT
index
FROM
UNNEST(GENERATE_ARRAY(0,1)) AS multipler,
UNNEST(GENERATE_ARRAY(1,gen_until,4)) AS index
ORDER BY
index )) AS depth_2
WITH
OFFSET
offset_2
ON
offset_1=offset_2
INNER JOIN
UNNEST(ARRAY(
SELECT
index
FROM
UNNEST(GENERATE_ARRAY(0,3)) AS multipler,
UNNEST(GENERATE_ARRAY(3,gen_until,8)) AS index
ORDER BY
index )) AS depth_3
WITH
OFFSET
offset_3
ON
offset_1=offset_3
INNER JOIN
UNNEST(ARRAY(
SELECT
index
FROM
UNNEST(GENERATE_ARRAY(0,7)) AS multipler,
UNNEST(GENERATE_ARRAY(7,gen_until,16)) AS index
ORDER BY
index )) AS depth_4
WITH
OFFSET
offset_4
ON
offset_1=offset_4
INNER JOIN
UNNEST(ARRAY(
SELECT
index
FROM
UNNEST(GENERATE_ARRAY(0,15)) AS multipler,
UNNEST(GENERATE_ARRAY(15,gen_until,32)) AS index
ORDER BY
index )) AS depth_5
WITH
OFFSET
offset_5
ON
offset_1=offset_5
INNER JOIN
UNNEST(ARRAY(
SELECT
index
FROM
UNNEST(GENERATE_ARRAY(0,31)) AS multipler,
UNNEST(GENERATE_ARRAY(31,gen_until,64)) AS index
ORDER BY
index )) AS depth_6
WITH
OFFSET
offset_6
ON
offset_1=offset_6
INNER JOIN
UNNEST(ARRAY(
SELECT
index
FROM
UNNEST(GENERATE_ARRAY(0,63)) AS multipler,
UNNEST(GENERATE_ARRAY(63,gen_until,128)) AS index
ORDER BY
index )) AS depth_7
WITH
OFFSET
offset_7
ON
offset_1=offset_7
INNER JOIN
UNNEST(ARRAY(
SELECT
index
FROM
UNNEST(GENERATE_ARRAY(0,127)) AS multipler,
UNNEST(GENERATE_ARRAY(127,gen_until,256)) AS index
ORDER BY
index )) AS depth_8
WITH
OFFSET
offset_8
ON
offset_1=offset_8
;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment