Last active
April 19, 2024 13:44
-
-
Save sueszli/c8bd7ec5d821e281be9cabcf2fa51fef to your computer and use it in GitHub Desktop.
bypassing github storage service
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import hashlib | |
import sys | |
import pathlib | |
import subprocess | |
""" | |
github commits are restricted to 25-50 MiB, varying based on the push method [^1]. | |
to handle files beyond this limit, git lfs (large file storage) pointers are necessary, referencing an external lfs server [^2]. | |
however, this method incurs a monthly cloud storage fee to github [^3]. | |
this is a failed attempt at bypassing the file size limit by committing a large file in small chunks: | |
> remote: warning: File huge-ass-file.tar is 60.00 MB; this is larger than GitHub's recommended maximum file size of 50.00 MB | |
> remote: error: Trace: 2fa983a46f7b5205ea9bbef6e118069f7426f07618935e67ed6225df9647d617 | |
> remote: error: See https://gh.io/lfs for more information. | |
> ... | |
> remote: error: File huge-ass-file.tar is 150.00 MB; this exceeds GitHub's file size limit of 100.00 MB | |
> remote: error: File huge-ass-file.tar is 200.00 MB; this exceeds GitHub's file size limit of 100.00 MB | |
[^1]: docs: https://docs.github.com/en/repositories/working-with-files/managing-large-files/about-large-files-on-github#file-size-limits | |
[^2]: nice comment: wokwokwok, 2021 on hackernews, https://news.ycombinator.com/item?id=27134972#:~:text=of%20such%20projects-,wokwokwok,-on%20May%2013 | |
[^3]: https://docs.github.com/en/billing/managing-billing-for-git-large-file-storage/about-billing-for-git-large-file-storage | |
""" | |
def assert_matching_checksums(filepath1: pathlib.Path, filepath2: pathlib.Path) -> None: | |
print(f"verifying checksums...") | |
checksum1 = hashlib.md5(pathlib.Path(filepath1).read_bytes()).hexdigest() | |
checksum2 = hashlib.md5(pathlib.Path(filepath2).read_bytes()).hexdigest() | |
assert checksum1 == checksum2, f"checksums do not match: {checksum1} != {checksum2}" | |
print(f"checksums match: {checksum1} == {checksum2}") | |
def assert_matching_filesizes(filepath1: pathlib.Path, filepath2: pathlib.Path) -> None: | |
print(f"verifying file sizes...") | |
filesize1 = filepath1.stat().st_size | |
filesize2 = filepath2.stat().st_size | |
assert filesize1 == filesize2, f"file sizes do not match: {filesize1} != {filesize2}" | |
print(f"file sizes match: {filesize1} == {filesize2}") | |
if __name__ == "__main__": | |
file = pathlib.Path(sys.argv[1]) | |
assert pathlib.Path(".git").exists(), "put this script inside the git directory you want to copy the file to" | |
assert file.exists(), f"file does not exist: {file}" | |
assert file.is_file(), f"not a file: {file}" | |
assert not any([sibling.name == ".git" for sibling in list(file.parent.glob("*"))]), f"{file} should not be in a .git directory" | |
filesize = file.stat().st_size | |
print(f"{file.name} size: {file.stat().st_size}") | |
print(f"copying and committing chunks to github...") | |
chunk_size = 30 * 1024 * 1024 | |
num_chunks = (file.stat().st_size // chunk_size) + 1 | |
with open(file.name, "wb") as f: | |
pass | |
for i in range(num_chunks): | |
with open(file, "rb") as f: | |
# read | |
f.seek(i * chunk_size) | |
chunk = f.read(chunk_size) | |
if not chunk: | |
print(f"no more chunks to read at iteration {i}") | |
break | |
# append to file in this directory | |
with open(file.name, "ab") as g: | |
g.write(chunk) | |
# push to github | |
subprocess.run(["git", "add", file.name]) | |
subprocess.run(["git", "commit", "-m", f"git lfs exploit auto commit: {file.name} - {i}/{num_chunks}"]) | |
subprocess.run(["git", "push"]) | |
print(f"\033[92mprogress: {i}/{num_chunks} \033[0m") | |
assert_matching_checksums(file, pathlib.Path(file.name)) | |
assert_matching_filesizes(file, pathlib.Path(file.name)) | |
print(f"finished! {file.name} pushed to github") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
update: the script above worked just fine and it was surprisingly easy to set up