Last active
May 31, 2024 18:35
-
-
Save nateware/4735384 to your computer and use it in GitHub Desktop.
Check local files vs what's on S3, and upload any that have changed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# Compare a file on S3 to see if we have the latest version | |
# If not, upload it and invalidate CloudFront | |
import fnmatch | |
import os | |
import boto | |
import pprint | |
import re | |
import hashlib | |
from boto.s3.key import Key | |
# Where source is checked out | |
SOURCE_DIR = '/local/path/to/source/code' | |
BUCKET_NAME = 'my-s3-bucket-name' | |
# Connect to S3 and get bucket | |
conn = boto.connect_s3() | |
bucket = conn.get_bucket(BUCKET_NAME) | |
# Shortcut to MD5 | |
def get_md5(filename): | |
f = open(filename, 'rb') | |
m = hashlib.md5() | |
while True: | |
data = f.read(10240) | |
if len(data) == 0: | |
break | |
m.update(data) | |
return m.hexdigest() | |
def to_uri(filename): | |
return re.sub(SOURCE_DIR, '', f) | |
# Assemble a list of all files from SOURCE_DIR | |
files = [] | |
for root, dirnames, filenames in os.walk(SOURCE_DIR): | |
for filename in filenames: | |
files.append(os.path.join(root, filename)) | |
# Compare them to S3 checksums | |
files_to_upload = [] | |
for f in files: | |
uri = to_uri(f) | |
key = bucket.get_key(uri) | |
if key is None: | |
# new file, upload | |
files_to_upload.append(f) | |
else: | |
# check MD5 | |
md5 = get_md5(f) | |
etag = key.etag.strip('"').strip("'") | |
if etag != md5: | |
print(f + ": " + md5 + " != " + etag) | |
files_to_upload.append(f) | |
# Upload + invalidate the ones that are different | |
for f in files_to_upload: | |
uri = to_uri(f) | |
key = Key(bucket) | |
key.key = uri | |
key.set_contents_from_filename(f) | |
# CloudFront invalidation code goes here | |
@viatcheslavmogilevsky if you're on MacOS, Apple rolls their own brand of OpenSSL as of this post, and it may be slightly different than what would be used on an actual Linux VM/Container. If you're using MacOS
did you consider using on MD5Sum tag on s3 objects -- seems to me a cleaner solution if you have control over the s3 object upload
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hello,
I am using this python script to upload files that have changed or newly created from local folder to S3 folder. The script does not work. It just failed at getting bucket name. I am using boto with python2.7. Any help much appreciated.
Many thanks.
Here is the error
Traceback (most recent call last): File "s3update.py", line 20, in <module> bucket = conn.get_bucket(BUCKET_NAME) File "/usr/lib/python2.7/site-packages/boto/s3/connection.py", line 506, in get_bucket return self.head_bucket(bucket_name, headers=headers) File "/usr/lib/python2.7/site-packages/boto/s3/connection.py", line 539, in head_bucket raise err boto.exception.S3ResponseError: S3ResponseError: 403 Forbidden