Created
September 4, 2021 08:53
-
-
Save romunov/b79d63caa8874618e734329eaa3d6950 to your computer and use it in GitHub Desktop.
Skip processed entities
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Off the shelf way to skip already processed entities | |
This scripts processes files, remembers which files have been already | |
processed and skips them on the next run. | |
""" | |
import glob | |
import os | |
import shelve | |
def create_files(file_list): | |
"""Creates specified files on disk with boring contents.""" | |
for obj in file_list: | |
with open(obj, mode="wt") as to_disk: | |
to_disk.write(f"In {os.path.basename(obj)}") | |
return None | |
def process_file(x): | |
"""Reads file and prints its contents.""" | |
with open(file=x, mode="rt") as in_file: | |
content = in_file.readlines() | |
print(content) | |
def list_files(path): | |
"""List all files in a specified path.""" | |
files = glob.glob(os.path.join(path, "file*")) | |
return sorted(files) | |
# noinspection PyBroadException | |
def run_process(files): | |
""" | |
Iterate through specified files and process them if not already | |
done in the previous run. | |
""" | |
for obj in files: | |
try: | |
key = os.path.basename(obj) | |
if key not in shelf: | |
process_file(x=obj) | |
shelf[key] = "OK" | |
else: | |
print(f"Skipping {obj}") | |
except Exception as e: | |
print(f"Failed ({e})") | |
return f"Processed {len(files)} files." | |
# This is a file where memories of processed files go. | |
CACHE_FILE = "processed.cache" | |
shelf = shelve.open(filename=CACHE_FILE) | |
create_files(file_list=["file1", "file2", "file3"]) | |
available_files = list_files(path=".") | |
run_process(files=available_files) | |
create_files(file_list=["file4", "file5"]) | |
available_files = list_files(path=".") | |
run_process(files=available_files) | |
# Close the shelf | |
shelf.close() | |
# Remove any files that may have been created during the execution of | |
# this script. | |
for file in available_files: | |
os.unlink(file) | |
os.unlink(CACHE_FILE) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment