nateware/s3update.py

## s3update.py
#!/usr/bin/env python

# Compare a file on S3 to see if we have the latest version
# If not, upload it and invalidate CloudFront

import fnmatch
import os
import boto
import pprint
import re
import hashlib
from boto.s3.key import Key

# Where source is checked out
SOURCE_DIR  = '/local/path/to/source/code'
BUCKET_NAME = 'my-s3-bucket-name'

# Connect to S3 and get bucket
conn = boto.connect_s3()
bucket = conn.get_bucket(BUCKET_NAME)

# Shortcut to MD5
def get_md5(filename):
  f = open(filename, 'rb')
  m = hashlib.md5()
  while True:
    data = f.read(10240)
    if len(data) == 0:
        break
    m.update(data)
  return m.hexdigest()

def to_uri(filename):
  return re.sub(SOURCE_DIR, '', f)

# Assemble a list of all files from SOURCE_DIR
files = []
for root, dirnames, filenames in os.walk(SOURCE_DIR):
  for filename in filenames:
    files.append(os.path.join(root, filename))

# Compare them to S3 checksums
files_to_upload = []
for f in files:
  uri = to_uri(f)
  key = bucket.get_key(uri)
  if key is None:
    # new file, upload
    files_to_upload.append(f)
  else:
    # check MD5
    md5  = get_md5(f)
    etag = key.etag.strip('"').strip("'")
    if etag != md5:
      print(f + ": " + md5 + " != " + etag)
      files_to_upload.append(f)

# Upload + invalidate the ones that are different
for f in files_to_upload:
  uri = to_uri(f)
  key = Key(bucket)
  key.key = uri
  key.set_contents_from_filename(f)
  # CloudFront invalidation code goes here
	#!/usr/bin/env python

	# Compare a file on S3 to see if we have the latest version
	# If not, upload it and invalidate CloudFront

	import fnmatch
	import os
	import boto
	import pprint
	import re
	import hashlib
	from boto.s3.key import Key

	# Where source is checked out
	SOURCE_DIR = '/local/path/to/source/code'
	BUCKET_NAME = 'my-s3-bucket-name'

	# Connect to S3 and get bucket
	conn = boto.connect_s3()
	bucket = conn.get_bucket(BUCKET_NAME)

	# Shortcut to MD5
	def get_md5(filename):
	f = open(filename, 'rb')
	m = hashlib.md5()
	while True:
	data = f.read(10240)
	if len(data) == 0:
	break
	m.update(data)
	return m.hexdigest()

	def to_uri(filename):
	return re.sub(SOURCE_DIR, '', f)

	# Assemble a list of all files from SOURCE_DIR
	files = []
	for root, dirnames, filenames in os.walk(SOURCE_DIR):
	for filename in filenames:
	files.append(os.path.join(root, filename))

	# Compare them to S3 checksums
	files_to_upload = []
	for f in files:
	uri = to_uri(f)
	key = bucket.get_key(uri)
	if key is None:
	# new file, upload
	files_to_upload.append(f)
	else:
	# check MD5
	md5 = get_md5(f)
	etag = key.etag.strip('"').strip("'")
	if etag != md5:
	print(f + ": " + md5 + " != " + etag)
	files_to_upload.append(f)

	# Upload + invalidate the ones that are different
	for f in files_to_upload:
	uri = to_uri(f)
	key = Key(bucket)
	key.key = uri
	key.set_contents_from_filename(f)
	# CloudFront invalidation code goes here