Created
July 13, 2024 11:54
-
-
Save andybak/788c07fefb1d6fb9dfe6d546010188e1 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# scan all subdirectories and count how many of each file type exists | |
import json | |
import os | |
import collections | |
from pprint import pprint | |
import matplotlib.pyplot as plt | |
def scan_files(root): | |
dirs_total = 0 | |
files_total = 0 | |
dir_count = collections.defaultdict(int) | |
file_count = collections.defaultdict(int) | |
json_stats = collections.defaultdict(int) | |
gltf_stats = collections.defaultdict(int) | |
# List all subdirectories in the base directory | |
subdirs = (os.path.join(root, d) for d in os.listdir(root) if os.path.isdir(os.path.join(root, d))) | |
for subdir in subdirs: | |
print(f"Scanning [{files_total} files {dirs_total} dirs]: {subdir} ") | |
# Skip the excluded directory | |
if os.path.abspath(subdir) == os.path.abspath("d:/Poly/_Polyscan"): | |
print("excluding", subdir) | |
continue | |
for path, dirs, files in os.walk(subdir): | |
extensions = ",".join(sorted(list(set([os.path.splitext(x)[1].lower() for x in files])))) | |
dir_count[extensions] += 1 | |
for file in files: | |
ext = os.path.splitext(file)[1].lower() | |
if ext == ".jpeg": ext = ".jpg" | |
file_count[ext] += 1 | |
if ext == ".json": | |
with open(os.path.join(path, file), "r", encoding="utf-8", errors="replace") as f: | |
try: | |
j = json.load(f) | |
except json.JSONDecodeError: | |
print("Error decoding json", os.path.join(path, file)) | |
continue | |
# val = j.get("presentationParams", {}).get("backgroundColor", "none") | |
# val = "blocks" in j.get("tags", []) | |
val = ",".join(sorted([x.get("formatType", "MISSING") for x in j.get("formats", [])])) | |
json_stats[val] += 1 | |
if ext == ".gltf": | |
with open(os.path.join(path, file), "r", encoding="utf-8", errors="replace") as f: | |
try: | |
gltf = json.load(f) | |
except json.JSONDecodeError: | |
print("Error decoding gltf", os.path.join(path, file)) | |
continue | |
# val = gltf.get("asset", {}).get("version", "unknown") | |
val = gltf.get("asset", {}).get("generator", "unknown").split(".")[0] | |
gltf_stats[val] += 1 | |
files_total += 1 | |
dirs_total += 1 | |
return files_total, dirs_total, file_count, gltf_stats, json_stats, dir_count | |
if __name__ == "__main__": | |
# Function to label only slices larger than a threshold | |
def autopct_func(pct): | |
return ('%1.1f%%' % pct) if pct > 5 else '' | |
total_files, total_dirs, file_data, gltf_data, json_data, dir_data = scan_files("d:/Poly") | |
print(f"Total: {total_files} files {total_dirs} dirs") | |
print("\n\n") | |
pprint(file_data) | |
pprint(gltf_data) | |
pprint(json_data) | |
pprint(dir_data) | |
# Generate and display a matplotlib pie chart of the file types | |
width = 1024 | |
height = 1024 | |
plt.figure(figsize=(width / 100, height / 100), dpi=100) | |
# increase font size | |
plt.rcParams.update({'font.size': 22}) | |
# hide labels for small slices | |
plt.rcParams.update({'figure.autolayout': True}) | |
labels = [key if value > 50 else '' for key, value in file_data.items()] | |
plt.pie(list(file_data.values()), labels=labels, autopct=autopct_func) | |
plt.show() | |
# Generate and display a matplotlib histogram of the file types | |
# width = 1024 | |
# height = 768 | |
# plt.figure(figsize=(width / 100, height / 100), dpi=100) | |
# plt.bar(range(len(data)), list(data.values()), align='center') | |
# plt.xticks(range(len(data)), list(data.keys())) | |
# plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment