Created
February 28, 2017 09:33
-
-
Save alexwlchan/97d7c80bb7227ffb6dc348c5983ab1cb to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- encoding: utf-8 -*- | |
"""Guess a filename from a URL. | |
Suppose you want to download a file from a URL, and you want to preserve | |
the name of the original file as closely as possible. This snippet | |
provides a function for doing so. | |
""" | |
import mimetypes | |
import os | |
import urllib | |
import requests | |
from werkzeug.utils import secure_filename | |
def guess_filename_from_url(url): | |
"""Given a URL to download, guess a sensible filename.""" | |
# Get an initial filename to use for the saved file, which is safe | |
# from malicious user input or weird URLs. | |
filename = os.path.basename(urllib.parse.urlparse(url).path) | |
filename = secure_filename(filename) | |
# If the filename doesn't have an extension, add one now. Make a | |
# HEAD request, and guess a file extension based on the Content-Type | |
# header of the response. | |
_, ext = os.path.splitext(filename) | |
if not ext: | |
req = requests.head(url) | |
content_type = req.headers.get('Content-Type') | |
# Assuming we got a non-empty Content-Type header, guess an | |
# extension. | |
if content_type: | |
extension = mimetypes.guess_extension(content_type) | |
filename += extension | |
return filename |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment