Last active
April 11, 2024 17:12
-
-
Save Rudxain/f4d2dd9b1c3e44a2eb4fb46ddf2a76f6 to your computer and use it in GitHub Desktop.
One of the many ways to measure entropy
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
'use strict' | |
/** | |
Calculates the minimum int-num of payload bits needed to encode the given data. | |
It does so with a simple algorithm: | |
- `boolean`: `1` (duh). | |
- `bigint`: bitcount - Most Significant One + sign-bit (only if negative), `sizeof 0n == 0`. | |
- `number`: Same as `bigint` (MSO is the implicit-bit) + bits after radix-point (not including the point itself). | |
- `string`: Number of code-points multiplied by the bits-per-char. BPC is calculated from min-charset-size. | |
@template {boolean | bigint | number | string} T | |
@param {T} x Value to measure. | |
Shouldn't be a `Symbol`, because its size is `undefined`. | |
@example | |
min_bit_len(0.5) //1 | |
min_bit_len('') //0 | |
min_bit_len('0101') //4 | |
min_bit_len('hey') // 2b/c * 3c = 6b, or just 6 | |
@license Unlicense | |
*/ | |
const min_bit_len = x => { | |
const lb = Math.log2 | |
/** | |
Integer (truncated) Binary Logarithm. | |
@param {number} x | |
*/ | |
const ilb = x => Math.trunc(lb(x)) // I prefer `trunc` over `floor` | |
switch (typeof x) { | |
case 'boolean': return 1 | |
case 'bigint': { | |
const sgn = x < 0n | |
if (sgn) x = -x //abs | |
let i = sgn | |
while (x > 1n) { x >>= 1n; i++ } | |
return i | |
} | |
case 'number': { | |
if (!isFinite(x)) return NaN | |
if (x == 0) return 1 | |
const sgn = x < 0 | |
if (sgn) x = -x //abs | |
/** fractional part */ | |
let f = x % 1 | |
x -= f //trunc | |
let i = 0 | |
while (!Number.isInteger(f)) { f *= 2; i++ } | |
return (x && (ilb(x) + sgn)) + i | |
} | |
// for some reason, | |
// I feel like this is the most beautiful code I've ever written | |
case 'string': { | |
// avoid `lb(0)` | |
if (x == '') return 0 | |
/** charset size (unique code-points) of `x` */ | |
const cs_size = (new Set(x)).size | |
/** total number of code-points in `x` */ | |
const cp_count = [...x].length | |
// special case for unary | |
if (cs_size == 1) return ilb(cp_count) + 1 | |
/** bits per character */ | |
const bpc = Math.ceil(lb(cs_size)) | |
return cp_count * bpc | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment