Skip to content

Instantly share code, notes, and snippets.

@wrunk
Last active August 29, 2015 14:09
Show Gist options
  • Save wrunk/a6f19ff3d9de23a57fb9 to your computer and use it in GitHub Desktop.
Save wrunk/a6f19ff3d9de23a57fb9 to your computer and use it in GitHub Desktop.
Creating GUIDs in python using the first two chars to get a shard
#!/usr/bin/env python
# Written by Warren Runk
# This file is free software in the public domain.
import base64
import uuid
"""
******* Deprecated. I went the route of explict shard in the node section.
See: https://gist.github.com/wrunk/b6d340297e7a9f3d97a0
Get a GUID - URL safe, Base32, all lowercase, looks like:
rwc5wtj73fdqjasiii5texzize
aruud7l6ijarfdk2a3kstwqmnu
p4zkxwv5hvfsbmp7o5b7cktb5i
We also provide the ability to get the bucket for the ID (0-1023)
"""
def _get_base_int(c):
"Return the base int value of the base32 char (0-31)"
c = ord(c)
if c < 97:
return c - 50
return c - 91
def get_bucket_int(gid):
"""
Return a base 10 integer representing the bucket of this guid. Will be between 0-1023 with decent
distribution:
Total # of bucket ids 1024
[MAX] Buck id (0-1023) (642) # of ids generated in this bucket (1076)
[MIN] Buck id (0-1023) (12) # of ids generated in this bucket (883)
Median pos 511 977
Median pos 512 977
Median pos 513 977
We take the first 2 character and count using their values.
Each char has 32 potential values it can be, so 32*32 = 1024.
Multiple the base of the first val by 32 and add to the second val.
"""
return (_get_base_int(gid[0]) * 32) + _get_base_int(gid[1])
def get_new_guid():
"""
Returns a length 26 globally unique ID.
"""
return base64.b32encode(uuid.uuid4().bytes).lower().replace('=', '')
def print_bucket_distro_stats():
from pprint import pprint
meat_hap = {}
for i in range(1000000):
u = get_new_guid()
buck = get_bucket_int(u)
if buck not in meat_hap:
meat_hap[buck] = 1
meat_hap[buck] += 1
pprint(meat_hap)
print "Total # of bucket ids", len(meat_hap)
mi = 20000
mic = None
ma = 0
mac = None
for k, v in meat_hap.items():
if v < mi:
mic = k
mi = v
if v > ma:
mac = k
ma = v
print "[MAX] Buck id (0-1023) (%s) # of ids generated in this bucket (%i)" % (mac, ma)
print "[MIN] Buck id (0-1023) (%s) # of ids generated in this bucket (%i)" % (mic, mi)
print "Median pos 511", sorted(meat_hap.values())[511]
print "Median pos 512", sorted(meat_hap.values())[512]
print "Median pos 513", sorted(meat_hap.values())[513]
def print_sample():
for i in range(20):
u = get_new_guid()
print "GUID: (%s) LEN (%i)" % (u, len(u))
if __name__ == '__main__':
print_bucket_distro_stats()
print_sample()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment