Last active
August 6, 2024 00:10
-
-
Save mara004/6fe0ac15d0cf303bed0aea2f22d8531f to your computer and use it in GitHub Desktop.
Safer tar extraction
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# SPDX-FileCopyrightText: 2023 geisserml <[email protected]> | |
# SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause OR MPL-2.0 | |
# Safer tar extraction (hopefully) preventing CVE-2007-4559 etc. | |
# Tries to use the most elegant strategy available in the caller's python version (>= 3.6) | |
__all__ = ["safer_tar_unpack"] | |
import sys | |
if sys.version_info >= (3, 11, 4): # PEP 706 | |
import shutil | |
def safer_tar_unpack(archive_path, dest_dir): | |
shutil.unpack_archive(archive_path, dest_dir, format="tar", filter="data") | |
else: # workaround | |
import tarfile | |
from pathlib import Path | |
def safer_tar_unpack(archive_path, dest_dir): | |
dest_dir = Path(dest_dir).resolve() | |
with tarfile.open(archive_path) as tar: | |
for m in tar.getmembers(): | |
if not ((m.isfile() or m.isdir()) and dest_dir in (dest_dir/m.name).resolve().parents): | |
raise RuntimeError("Path traversal, symlink or non-file member in tar archive (probably malicious).") | |
tar.extractall(dest_dir) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Also note, a known limitation of the above code is that existing files in
dir
may be overwritten by the archive. But this is expected.