Skip to content

Instantly share code, notes, and snippets.

@annawoodard
Created April 9, 2018 15:21
Show Gist options
  • Save annawoodard/437cddf7fe1bb6300fa1c9b6681b6fd1 to your computer and use it in GitHub Desktop.
Save annawoodard/437cddf7fe1bb6300fa1c9b6681b6fd1 to your computer and use it in GitHub Desktop.
"""
Based on https://gist.github.com/CTimmerman/1f328f02ac2740f4c90d
"""
import cloudpickle
import dill
import _pickle
import json
import marshal
import pickle
import random
from hashlib import md5
from time import time
from zlib import compress, decompress
#import ujson # from http://www.lfd.uci.edu/~gohlke/pythonlibs/#ujson
# 2 to 3 times slower and up to 50% larger output than marshal.
data_size = 100000
test_runs = 10
if __name__ == "__main__":
payload = {
"float": [(random.randrange(0, 99) + random.random()) for i in range(data_size)],
"int": [random.randrange(0, 9999) for i in range(data_size)],
"str": [md5(str(random.random()).encode('utf8')).hexdigest() for i in range(data_size)]
}
modules = [json, pickle, _pickle, dill, cloudpickle, marshal]
print("{} times {} elements:".format(test_runs, data_size))
print("{:15s} {:11s} {:10s} {:8s} {:>10s}".format('module', 'payload', 'write', 'read', 'size'))
for payload_type in payload:
data = payload[payload_type]
for module in modules:
name = 'cPickle' if module.__name__ == '_pickle' else module.__name__
dumps_binary = name in ['dill', 'pickle', 'cPickle', 'marshal', 'cloudpickle']
start = time()
if name in ['dill', 'pickle', 'cPickle', 'cloudpickle']:
for i in range(test_runs):
serialized = compress(module.dumps(data, protocol=-1)) # -1 = highest available (2 in Python 2; 3 in Python 3.4 (2x speed))
elif dumps_binary:
for i in range(test_runs):
serialized = compress(module.dumps(data))
else:
for i in range(test_runs):
serialized = compress(module.dumps(data).encode('utf8'))
w = time() - start
start = time()
if dumps_binary:
for i in range(test_runs):
unserialized = module.loads(decompress(serialized))
else:
for i in range(test_runs):
unserialized = module.loads(decompress(serialized).decode('utf8'))
r = time() - start
print("{:15s} {:8s} {:8.3f} s {:8.3f} s {:10.0f} kB ".format(name, payload_type, w, r, len(serialized) / 1e3))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment