-
-
Save SavvyGuard/6115006 to your computer and use it in GitHub Desktop.
import boto | |
import boto.s3 | |
import os.path | |
import sys | |
# Fill these in - you get them when you sign up for S3 | |
AWS_ACCESS_KEY_ID = '' | |
AWS_ACCESS_KEY_SECRET = '' | |
# Fill in info on data to upload | |
# destination bucket name | |
bucket_name = 'jwu-testbucket' | |
# source directory | |
sourceDir = 'testdata/' | |
# destination directory name (on s3) | |
destDir = '' | |
#max size in bytes before uploading in parts. between 1 and 5 GB recommended | |
MAX_SIZE = 20 * 1000 * 1000 | |
#size of parts when uploading in parts | |
PART_SIZE = 6 * 1000 * 1000 | |
conn = boto.connect_s3(AWS_ACCESS_KEY_ID, AWS_ACCESS_KEY_SECRET) | |
bucket = conn.create_bucket(bucket_name, | |
location=boto.s3.connection.Location.DEFAULT) | |
uploadFileNames = [] | |
for (sourceDir, dirname, filename) in os.walk(sourceDir): | |
uploadFileNames.extend(filename) | |
break | |
def percent_cb(complete, total): | |
sys.stdout.write('.') | |
sys.stdout.flush() | |
for filename in uploadFileNames: | |
sourcepath = os.path.join(sourceDir + filename) | |
destpath = os.path.join(destDir, filename) | |
print 'Uploading %s to Amazon S3 bucket %s' % \ | |
(sourcepath, bucket_name) | |
filesize = os.path.getsize(sourcepath) | |
if filesize > MAX_SIZE: | |
print "multipart upload" | |
mp = bucket.initiate_multipart_upload(destpath) | |
fp = open(sourcepath,'rb') | |
fp_num = 0 | |
while (fp.tell() < filesize): | |
fp_num += 1 | |
print "uploading part %i" %fp_num | |
mp.upload_part_from_file(fp, fp_num, cb=percent_cb, num_cb=10, size=PART_SIZE) | |
mp.complete_upload() | |
else: | |
print "singlepart upload" | |
k = boto.s3.key.Key(bucket) | |
k.key = destpath | |
k.set_contents_from_filename(sourcepath, | |
cb=percent_cb, num_cb=10) |
This also creates the bucket, and errors out if the bucket already exists. Would I simply comment out:
bucket = conn.create_bucket(bucket_name,
location=boto.s3.connection.Location.DEFAULT)
to bypass that?
Ah, got it figured out. Simply replace:
bucket = conn.create_bucket(bucket_name,
location=boto.s3.connection.Location.DEFAULT)
with
bucket = conn.get_bucket(bucket_name)
Using this technique can you upload files within a sub directory inside a directory?
This is great, just what I'm after. But how can I extract the AWS_ACCESS_KEY_ID
and AWS_ACCESS_KEY_SECRET
in my Python script running within Bitbucket Pipelines? Other scripts I have seen make no reference to these two properties, but connect like this:
import boto3
client = boto3.client('s3')
Although the methods are different on boto3
so it's a bit tricky...advice?
@mark-norgate The example from Amazon for Bitbucket pipelines says to set environment variables which are automatically picked up by the boto3 library. linky
Also, thanks for the script!
You also need to add '/'
sourcepath = os.path.join(sourceDir + '/' + filename)
Won't do it recursively for sub-directories
While syncing directory to aws server by using this code only one file is uploading where as this directory is contains 3 files. Please help me to solve this problem.
Thanks Man!
This code is amazing! Thank you @SavvyGuard !
bucket = conn.create_bucket(bucket_name, location='https://s3.eu-central-1.wasabisys.com')
S3ResponseError: S3ResponseError: 403 Forbidden
InvalidAccessKeyId
The AWS Access Key Id you provided does not exist in our records.NOCC4UFL6U659XNJHGFME3M762MSK2KPQJMElWA0cWuCCEAOw9ObIyTn8GGe1ErsEdJeTw8aHfPX5T09QSDYT3jElLqAsGv/LPcIJhH+5ncuBdU=
what's wrong?
Thank you!