Last active
November 21, 2024 07:09
-
-
Save ungeskriptet/bad23991058d0ac39b0ccec4c0436fc1 to your computer and use it in GitHub Desktop.
Python script to skip already uploaded files in an item on archive.org
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
from internetarchive import upload | |
from random import randint | |
from requests import get | |
import hashlib | |
import sys | |
def info(text): print(f"\033[94mINFO: \033[00m{text}") | |
def warning(text): print(f"\033[93mWARNING: \033[00m{text}") | |
item = sys.argv[1] | |
metadata = get(f'https://archive.org/metadata/{item}').text | |
file_queue = {} | |
info(f'Uploading to item \'{item}\'') | |
for file in sys.argv[2:]: | |
with open(file, 'rb') as f: | |
hasher = hashlib.md5() | |
while data := f.read(2**12): | |
hasher.update(data) | |
md5 = hasher.hexdigest() | |
if f'"md5":"{md5}"' in metadata: | |
warning(f'{file} has already been uploaded') | |
continue | |
else: | |
if file in metadata: | |
warning(f'{file} has already been uploaded with a different checksum. Uploading anyway under different file name') | |
suffix = randint(0, 9999) | |
while f'{file}_fixme_{suffix}' in metadata: | |
suffix = randint(0, 9999) | |
else: | |
file_queue.update({f'{file}_fixme_{suffix}': file}) | |
else: | |
info(f'Adding {file} to upload queue') | |
file_queue.update({f'{file}': file}) | |
if file_queue != {}: | |
upload(identifier=item, files=file_queue, verify=True, checksum=True, verbose=True, retries=9999, retries_sleep=60) | |
else: | |
info("No need to upload any files") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment