Last active
December 25, 2019 09:54
-
-
Save hemebond/bd4abaa4fa676997147037e3df0adf60 to your computer and use it in GitHub Desktop.
Get file info for all files inside a zip or pak file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import hashlib | |
from zipfile import ZipFile, BadZipFile | |
from datetime import datetime | |
from vgio.quake.pak import PakFile, BadPakFile # https://github.com/joshuaskelly/vgio/ <3 | |
from typing import Union, BinaryIO | |
def get_digest(b): | |
"""Calculate the SHA256 hash of the bytes""" | |
# via https://stackoverflow.com/a/55542529/4828720 | |
h = hashlib.sha256() | |
while True: | |
# Reading is buffered, so we can read smaller chunks. | |
chunk = b.read(h.block_size) | |
if not chunk: | |
break | |
h.update(chunk) | |
return h.hexdigest() | |
def files_in_archive(archive_file: Union[str, BinaryIO], hashed: bool=True, recursive: bool=True) -> dict: | |
""" | |
Read a .pak or .zip file and return a dict of information about the files inside | |
e.g.: | |
{ | |
'readme.txt': { | |
'size': 3206, | |
'timestamp': '2014-08-15T17:51:54.000Z', | |
'sha256': '0a8dbd908c23e3d7a665ee30af4e38eaecb83b5a326b051c64883078d23b2251' | |
} | |
} | |
:param archive_file: Either the path to the file, or a file-like object | |
:type archive_file: str or BinaryIO | |
:param bool hashed: include a sha256 hash of each file | |
:param bool recursive: recurse into any file archives contained inside | |
""" | |
try: | |
archive = PakFile(archive_file) | |
except BadPakFile: | |
archive = ZipFile(archive_file) | |
files = {} | |
for file_path in archive.namelist(): | |
file = archive.getinfo(file_path) | |
files[file_path] = {} | |
files[file_path]['size'] = file.file_size | |
if hasattr(file, 'date_time'): | |
files[file_path]['timestamp'] = datetime.strftime(datetime(*file.date_time), "%Y-%m-%dT%H:%M:%S.000Z") | |
if hashed or recursive: | |
with archive.open(file_path) as f: | |
if hashed: | |
files[file_path]['sha256'] = get_digest(f) | |
file_name, file_ext = os.path.splitext(file_path) | |
if recursive and file_ext in ['.zip', '.pak']: | |
files[file_path]['files'] = files_in_archive(f, hashed=hashed, recursive=recursive) | |
archive.close() | |
return files |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment