Last active
October 29, 2024 14:57
-
-
Save jlevy/c246006675becc446360a798e2b2d781 to your computer and use it in GitHub Desktop.
Fast and simple insecure string hash for JavaScript
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// These hashes are for algorithmic use cases, such as bucketing in hashtables, where security isn't | |
// needed and 32 or 64 bits is enough (that is, rare collisions are acceptable). These are way simpler | |
// than sha1 (and all its deps) or similar, and with a short, clean (base 36 alphanumeric) result. | |
// A simple, *insecure* 32-bit hash that's short, fast, and has no dependencies. | |
// Output is always 7 characters. | |
// Loosely based on the Java version; see | |
// https://stackoverflow.com/questions/6122571/simple-non-secure-hash-function-for-javascript | |
const simpleHash = str => { | |
let hash = 0; | |
for (let i = 0; i < str.length; i++) { | |
const char = str.charCodeAt(i); | |
hash = (hash << 5) - hash + char; | |
} | |
// Convert to 32bit unsigned integer in base 36 and pad with "0" to ensure length is 7. | |
return (hash >>> 0).toString(36).padStart(7, '0'); | |
}; | |
// An improved alternative: | |
// cyrb53 (c) 2018 bryc (github.com/bryc). License: Public domain. Attribution appreciated. | |
// A fast and simple 64-bit (or 53-bit) string hash function with decent collision resistance. | |
// Largely inspired by MurmurHash2/3, but with a focus on speed/simplicity. | |
// See https://stackoverflow.com/questions/7616461/generate-a-hash-from-string-in-javascript/52171480#52171480 | |
// https://github.com/bryc/code/blob/master/jshash/experimental/cyrb53.js | |
const cyrb64 = (str, seed = 0) => { | |
let h1 = 0xdeadbeef ^ seed, h2 = 0x41c6ce57 ^ seed; | |
for(let i = 0, ch; i < str.length; i++) { | |
ch = str.charCodeAt(i); | |
h1 = Math.imul(h1 ^ ch, 2654435761); | |
h2 = Math.imul(h2 ^ ch, 1597334677); | |
} | |
h1 = Math.imul(h1 ^ (h1 >>> 16), 2246822507); | |
h1 ^= Math.imul(h2 ^ (h2 >>> 13), 3266489909); | |
h2 = Math.imul(h2 ^ (h2 >>> 16), 2246822507); | |
h2 ^= Math.imul(h1 ^ (h1 >>> 13), 3266489909); | |
// For a single 53-bit numeric return value we could return | |
// 4294967296 * (2097151 & h2) + (h1 >>> 0); | |
// but we instead return the full 64-bit value: | |
return [h2>>>0, h1>>>0]; | |
}; | |
// An improved, *insecure* 64-bit hash that's short, fast, and has no dependencies. | |
// Output is always 14 characters. | |
const cyrb64Hash = (str, seed = 0) => { | |
const [h2, h1] = cyrb64(str, seed); | |
return h2.toString(36).padStart(7, '0') + h1.toString(36).padStart(7, '0'); | |
} |
I've gone ahead and updated this to include @bryc's improved hash, as a 64-bit option, based on
https://github.com/bryc/code/blob/master/jshash/experimental/cyrb53.js
https://stackoverflow.com/questions/7616461/generate-a-hash-from-string-in-javascript/52171480#52171480
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Yes, like with hashCode() in Java, this is for algorithmic use, and not use cases where collisions are unacceptable.
I've updated the comments, simplified as suggested, and for convenience padded so that output is always 7 characters.
That said, arguably a more modern algorithm like cyrb53 would be preferable for a lot of use cases.