Last active
September 8, 2024 08:40
-
-
Save CTimmerman/ccf884f8c8dcc284588f1811ed99be6c to your computer and use it in GitHub Desktop.
Resume HTTP download proof of concept (POC) / minimum viable product (MVP). Let unstable internet be no problem working with pip on the road, please.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Download with automatic resume. | |
2018-06-28 v1.0 by Cees Timmerman | |
2018-07-09 v1.1 Added If-Unmodified-Since header for consistency.""" | |
import os, shutil, sys, time | |
import requests # python -m pip install requests | |
def download_file(url, local_filename=None): | |
if not local_filename: | |
local_filename = url.split('/')[-1] | |
resume_byte_pos = 0 | |
try: | |
resume_byte_pos = os.path.getsize(local_filename) | |
mtime = os.path.getmtime(local_filename) | |
except: pass | |
headers = {} | |
if resume_byte_pos: | |
headers.update(Range='bytes=%d-' % resume_byte_pos) | |
#headers.update({"If-Range": time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(mtime))}) # Never matches? | |
headers.update({"If-Unmodified-Since": time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(mtime))}) | |
print("Requesting", headers) | |
with requests.get( | |
url, | |
headers=headers, | |
stream=True, # Save RAM. | |
verify=False, | |
allow_redirects=True | |
) as r: | |
print("Received", r.headers) | |
if r.status_code == 206: | |
print('Resuming download') | |
mode = 'ab' | |
elif r.status_code == 200: | |
print('(Re)starting download') | |
mode = 'wb' | |
else: | |
raise Exception("Unexpected HTTP status in", r) | |
with open(local_filename, mode) as f: | |
shutil.copyfileobj(r.raw, f) | |
if __name__ == "__main__": | |
''' | |
open('test_server_data', 'wb').write(b'1234567890'*20) | |
sent = open('test_server_data', 'rb').read() | |
print("Wrote:") | |
print(sent[:50], sent[-50:]) | |
download_file('http://localhost:8000/test_server_data', 'downloaded_data') # python -m http.server | |
print("Received:") | |
received = open('downloaded_data', 'rb').read() | |
print(received[:50], received[-50:]) | |
''' | |
download_file('https://files.pythonhosted.org/packages/5d/85/d174a50e0d6b60aa9113f6a32afb31f25345bec8584992af486235373252/PyQt5-5.11.2-5.11.1-cp35.cp36.cp37.cp38-none-win_amd64.whl') | |
""" | |
Received {'x-amz-id-2': '62GbFKUpcBCcdmNASaQOnkDKP8pqpkuQyZlJZ7E/E5XKRWH2UYHo3GoLgdH7mjoFGcTKlkgGL1k=', 'x-amz-request-id': 'D197A6E08A790C75', 'Last-Modified': 'Mon, 02 Jul 2018 10:08:26 GMT', 'ETag': '"1423c667f7a669b86fb726047c6622bb-12"', 'x-amz-version-id': 'tGHd1dEMyUmr0S68pWjuVsaUmSE.Cf1P', 'Content-Type': 'binary/octet-stream', 'Server': 'AmazonS3', 'Cache-Control': 'max-age=365000000, immutable, public', 'Accept-Ranges': 'bytes, bytes', 'Age': '617529', 'Content-Length': '93349595', 'Date': 'Mon, 09 Jul 2018 13:41:01 GMT', 'Connection': 'keep-alive', 'X-Served-By': 'cache-sea1036-SEA, cache-ams4431-AMS', 'X-Cache': 'HIT, HIT', 'X-Cache-Hits': '0, 0', 'X-Timer': 'S1531143662.786994,VS0,VE1', 'Strict-Transport-Security': 'max-age=31536000; includeSubDomains; preload', 'X-Frame-Options': 'deny', 'X-XSS-Protection': '1; mode=block', 'X-Content-Type-Options': 'nosniff', 'X-Permitted-Cross-Domain-Policies': 'none', 'X-Robots-Header': 'noindex'} | |
Requesting {'Range': 'bytes=58966016-'} | |
Received {'x-amz-id-2': '62GbFKUpcBCcdmNASaQOnkDKP8pqpkuQyZlJZ7E/E5XKRWH2UYHo3GoLgdH7mjoFGcTKlkgGL1k=', 'x-amz-request-id': 'D197A6E08A790C75', 'Last-Modified': 'Mon, 02 Jul 2018 10:08:26 GMT', 'ETag': '"1423c667f7a669b86fb726047c6622bb-12"', 'x-amz-version-id': 'tGHd1dEMyUmr0S68pWjuVsaUmSE.Cf1P', 'Content-Type': 'binary/octet-stream', 'Server': 'AmazonS3', 'Cache-Control': 'max-age=365000000, immutable, public', 'Accept-Ranges': 'bytes, bytes', 'Age': '617810', 'Content-Range': 'bytes 58966016-93349594/93349595', 'Content-Length': '34383579', 'Date': 'Mon, 09 Jul 2018 13:45:43 GMT', 'Connection': 'keep-alive', 'X-Served-By': 'cache-sea1036-SEA, cache-ams4448-AMS', 'X-Cache': 'HIT, HIT', 'X-Cache-Hits': '0, 0', 'X-Timer': 'S1531143943.365115,VS0,VE1', 'Strict-Transport-Security': 'max-age=31536000; includeSubDomains; preload', 'X-Frame-Options': 'deny', 'X-XSS-Protection': '1; mode=block', 'X-Content-Type-Options': 'nosniff', 'X-Permitted-Cross-Domain-Policies': 'none', 'X-Robots-Header': 'noindex'} | |
Resuming download | |
""" | |
To add a progress bar, replace the shutil.copyfileobj call with a read/write/report loop: https://stackoverflow.com/a/9740603/819417
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Because even Dutch mobile internet sucks.