Skip to content

Instantly share code, notes, and snippets.

@alexwlchan
Created February 28, 2017 09:33
Show Gist options
  • Save alexwlchan/97d7c80bb7227ffb6dc348c5983ab1cb to your computer and use it in GitHub Desktop.
Save alexwlchan/97d7c80bb7227ffb6dc348c5983ab1cb to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
"""Guess a filename from a URL.
Suppose you want to download a file from a URL, and you want to preserve
the name of the original file as closely as possible. This snippet
provides a function for doing so.
"""
import mimetypes
import os
import urllib
import requests
from werkzeug.utils import secure_filename
def guess_filename_from_url(url):
"""Given a URL to download, guess a sensible filename."""
# Get an initial filename to use for the saved file, which is safe
# from malicious user input or weird URLs.
filename = os.path.basename(urllib.parse.urlparse(url).path)
filename = secure_filename(filename)
# If the filename doesn't have an extension, add one now. Make a
# HEAD request, and guess a file extension based on the Content-Type
# header of the response.
_, ext = os.path.splitext(filename)
if not ext:
req = requests.head(url)
content_type = req.headers.get('Content-Type')
# Assuming we got a non-empty Content-Type header, guess an
# extension.
if content_type:
extension = mimetypes.guess_extension(content_type)
filename += extension
return filename
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment