Created
April 9, 2018 15:21
-
-
Save annawoodard/437cddf7fe1bb6300fa1c9b6681b6fd1 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Based on https://gist.github.com/CTimmerman/1f328f02ac2740f4c90d | |
""" | |
import cloudpickle | |
import dill | |
import _pickle | |
import json | |
import marshal | |
import pickle | |
import random | |
from hashlib import md5 | |
from time import time | |
from zlib import compress, decompress | |
#import ujson # from http://www.lfd.uci.edu/~gohlke/pythonlibs/#ujson | |
# 2 to 3 times slower and up to 50% larger output than marshal. | |
data_size = 100000 | |
test_runs = 10 | |
if __name__ == "__main__": | |
payload = { | |
"float": [(random.randrange(0, 99) + random.random()) for i in range(data_size)], | |
"int": [random.randrange(0, 9999) for i in range(data_size)], | |
"str": [md5(str(random.random()).encode('utf8')).hexdigest() for i in range(data_size)] | |
} | |
modules = [json, pickle, _pickle, dill, cloudpickle, marshal] | |
print("{} times {} elements:".format(test_runs, data_size)) | |
print("{:15s} {:11s} {:10s} {:8s} {:>10s}".format('module', 'payload', 'write', 'read', 'size')) | |
for payload_type in payload: | |
data = payload[payload_type] | |
for module in modules: | |
name = 'cPickle' if module.__name__ == '_pickle' else module.__name__ | |
dumps_binary = name in ['dill', 'pickle', 'cPickle', 'marshal', 'cloudpickle'] | |
start = time() | |
if name in ['dill', 'pickle', 'cPickle', 'cloudpickle']: | |
for i in range(test_runs): | |
serialized = compress(module.dumps(data, protocol=-1)) # -1 = highest available (2 in Python 2; 3 in Python 3.4 (2x speed)) | |
elif dumps_binary: | |
for i in range(test_runs): | |
serialized = compress(module.dumps(data)) | |
else: | |
for i in range(test_runs): | |
serialized = compress(module.dumps(data).encode('utf8')) | |
w = time() - start | |
start = time() | |
if dumps_binary: | |
for i in range(test_runs): | |
unserialized = module.loads(decompress(serialized)) | |
else: | |
for i in range(test_runs): | |
unserialized = module.loads(decompress(serialized).decode('utf8')) | |
r = time() - start | |
print("{:15s} {:8s} {:8.3f} s {:8.3f} s {:10.0f} kB ".format(name, payload_type, w, r, len(serialized) / 1e3)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment