Skip to content

Instantly share code, notes, and snippets.

@nateware
Last active May 31, 2024 18:35
Show Gist options
  • Save nateware/4735384 to your computer and use it in GitHub Desktop.
Save nateware/4735384 to your computer and use it in GitHub Desktop.
Check local files vs what's on S3, and upload any that have changed.
#!/usr/bin/env python
# Compare a file on S3 to see if we have the latest version
# If not, upload it and invalidate CloudFront
import fnmatch
import os
import boto
import pprint
import re
import hashlib
from boto.s3.key import Key
# Where source is checked out
SOURCE_DIR = '/local/path/to/source/code'
BUCKET_NAME = 'my-s3-bucket-name'
# Connect to S3 and get bucket
conn = boto.connect_s3()
bucket = conn.get_bucket(BUCKET_NAME)
# Shortcut to MD5
def get_md5(filename):
f = open(filename, 'rb')
m = hashlib.md5()
while True:
data = f.read(10240)
if len(data) == 0:
break
m.update(data)
return m.hexdigest()
def to_uri(filename):
return re.sub(SOURCE_DIR, '', f)
# Assemble a list of all files from SOURCE_DIR
files = []
for root, dirnames, filenames in os.walk(SOURCE_DIR):
for filename in filenames:
files.append(os.path.join(root, filename))
# Compare them to S3 checksums
files_to_upload = []
for f in files:
uri = to_uri(f)
key = bucket.get_key(uri)
if key is None:
# new file, upload
files_to_upload.append(f)
else:
# check MD5
md5 = get_md5(f)
etag = key.etag.strip('"').strip("'")
if etag != md5:
print(f + ": " + md5 + " != " + etag)
files_to_upload.append(f)
# Upload + invalidate the ones that are different
for f in files_to_upload:
uri = to_uri(f)
key = Key(bucket)
key.key = uri
key.set_contents_from_filename(f)
# CloudFront invalidation code goes here
@FilBot3
Copy link

FilBot3 commented May 1, 2018

@viatcheslavmogilevsky if you're on MacOS, Apple rolls their own brand of OpenSSL as of this post, and it may be slightly different than what would be used on an actual Linux VM/Container. If you're using MacOS

@akinom
Copy link

akinom commented Feb 20, 2020

did you consider using on MD5Sum tag on s3 objects -- seems to me a cleaner solution if you have control over the s3 object upload

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment