Skip to content

Instantly share code, notes, and snippets.

@ungeskriptet
Last active November 21, 2024 07:09
Show Gist options
  • Save ungeskriptet/bad23991058d0ac39b0ccec4c0436fc1 to your computer and use it in GitHub Desktop.
Save ungeskriptet/bad23991058d0ac39b0ccec4c0436fc1 to your computer and use it in GitHub Desktop.
Python script to skip already uploaded files in an item on archive.org
#!/usr/bin/python
from internetarchive import upload
from random import randint
from requests import get
import hashlib
import sys
def info(text): print(f"\033[94mINFO: \033[00m{text}")
def warning(text): print(f"\033[93mWARNING: \033[00m{text}")
item = sys.argv[1]
metadata = get(f'https://archive.org/metadata/{item}').text
file_queue = {}
info(f'Uploading to item \'{item}\'')
for file in sys.argv[2:]:
with open(file, 'rb') as f:
hasher = hashlib.md5()
while data := f.read(2**12):
hasher.update(data)
md5 = hasher.hexdigest()
if f'"md5":"{md5}"' in metadata:
warning(f'{file} has already been uploaded')
continue
else:
if file in metadata:
warning(f'{file} has already been uploaded with a different checksum. Uploading anyway under different file name')
suffix = randint(0, 9999)
while f'{file}_fixme_{suffix}' in metadata:
suffix = randint(0, 9999)
else:
file_queue.update({f'{file}_fixme_{suffix}': file})
else:
info(f'Adding {file} to upload queue')
file_queue.update({f'{file}': file})
if file_queue != {}:
upload(identifier=item, files=file_queue, verify=True, checksum=True, verbose=True, retries=9999, retries_sleep=60)
else:
info("No need to upload any files")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment