Created
November 28, 2021 08:00
-
-
Save Timvrakas/2a4f52a6a7fd164cc0cd9a0ba08e33c8 to your computer and use it in GitHub Desktop.
GlacialFlow
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
'''Highly Redundant AWS Glacier Multipart Uploader''' | |
from multiprocessing import Pool | |
import os | |
import time | |
import boto3 | |
import botocore.utils as utils | |
MEGA_BYTES = 1048576 | |
CHUNK_MB = 32 | |
FILE_PATH = "/mnt/c/Backup/Tim S8 Backup 2017-09-08.rar" | |
VAULT_NAME = 'Tim-Backup' | |
ARCHIVE_DESCRIPTION = 'Tim S8 Backup 2017-09-08' | |
CHUNK_SIZE = MEGA_BYTES * CHUNK_MB | |
client = boto3.client('glacier') | |
not_done = True | |
while not_done: | |
uploads = client.list_multipart_uploads( | |
vaultName=VAULT_NAME)['UploadsList'] | |
num = 0 | |
for upload in uploads: | |
print('{0}->Resume Upload'.format(num)) | |
print(' ID: {}'.format(upload['MultipartUploadId'])) | |
print(' Date: {}'.format(upload['CreationDate'])) | |
print(' Description: {}'.format( | |
upload['ArchiveDescription'])) | |
num += 1 | |
print('N-> Create New Upload') | |
input_cmd = input('> ') | |
if input_cmd == 'N': | |
response = client.initiate_multipart_upload( | |
vaultName=VAULT_NAME, | |
archiveDescription=ARCHIVE_DESCRIPTION, | |
partSize=str(CHUNK_SIZE)) | |
continue | |
try: | |
upload = uploads[int(input_cmd)] | |
upload_id = upload['MultipartUploadId'] | |
print("Selected Upload {}".format(int(input_cmd))) | |
except Exception: | |
print('Invalid Response') | |
continue | |
else: | |
not_done = False | |
total_bytes = os.path.getsize(FILE_PATH) | |
print("File Size: " + str(total_bytes)) | |
num_chunks = -(-total_bytes // CHUNK_SIZE) # Ceiling Division | |
print("Broken into " + str(num_chunks) + | |
" parts, each with " + str(CHUNK_SIZE) + " bytes") | |
chunks = set(range(num_chunks)) | |
finished_chunks = set() | |
response = client.list_parts( | |
vaultName=VAULT_NAME, uploadId=upload_id, limit='1000') | |
not_done = True | |
while not_done: | |
done_parts = response['Parts'] | |
print("Loading {}/{}...".format(len(finished_chunks), num_chunks), end='\r') | |
for part in done_parts: | |
byte_range = part['RangeInBytes'] | |
start = byte_range.split('-')[0] | |
finished_chunks.add(int(start) // CHUNK_SIZE) | |
if 'Marker' in response: | |
marker = response['Marker'] | |
response = client.list_parts( | |
vaultName=VAULT_NAME, uploadId=upload_id, marker=marker, limit='1000') | |
else: | |
not_done = False | |
print("Already Uploaded: " + str(finished_chunks)) | |
remaining_chunks = set(chunks - finished_chunks) | |
def upload_chunk(i): | |
'''Thread Method to upload chunk''' | |
try: | |
print("Started Upload of Chunk " + | |
str(i) + "/" + str(num_chunks)) | |
byte_start = i * CHUNK_SIZE | |
byte_end = byte_start + CHUNK_SIZE - 1 | |
if byte_end >= total_bytes: | |
byte_end = total_bytes - 1 | |
range_str = 'bytes ' + str(byte_start) + '-' + \ | |
str(byte_end) + '/' + str(total_bytes) | |
with open(FILE_PATH, 'rb') as file: | |
file.seek(byte_start) | |
data = file.read(CHUNK_SIZE) | |
client.upload_multipart_part( | |
vaultName=VAULT_NAME, uploadId=upload_id, range=range_str, body=data) | |
print("Finished Upload of Chunk " + | |
str(i) + "/" + str(num_chunks)) | |
except Exception as e: | |
print("Error! " + str(e)) | |
pool = Pool(processes=5) | |
result = pool.map_async(upload_chunk, remaining_chunks, 1) | |
while not result.ready(): | |
num_remaining = result._number_left * result._chunksize | |
num_completed = num_chunks - num_remaining | |
percent = (num_completed / num_chunks) * 100 | |
print("Completed {}/{} ({:.3f}%)".format(num_completed, | |
num_chunks, percent), end='\r') | |
time.sleep(1) | |
print("Calculating Checksum...") | |
checksum = utils.calculate_tree_hash(open(FILE_PATH, 'rb')) | |
print("Checksum: {}".format(checksum)) | |
print("Finishing Upload...") | |
response = client.complete_multipart_upload( | |
vaultName=VAULT_NAME, uploadId=upload_id, checksum=checksum, archiveSize=str(total_bytes)) | |
print(response) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment