Created
December 5, 2016 18:17
-
-
Save CalebFenton/f08c351abae246b9b788dc4b3b41323c to your computer and use it in GitHub Desktop.
Recursively find and copy files with some file type.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
"""ferret_files.py: Recursively find and copy files with some file type.""" | |
import hashlib | |
import os | |
import shutil | |
import argh | |
import magic | |
def get_matches(input_dir, target_type): | |
for root, dirnames, filenames in os.walk(input_dir): | |
for filename in filenames: | |
file_path = '%s/%s' % (root, filename) | |
if os.path.islink(file_path): | |
print(file_path + " is link") | |
# NICE TRY BRO | |
continue | |
the_type = magic.from_file(file_path) | |
if the_type.lower().find(target_type) >= 0: | |
yield file_path | |
@argh.arg('--target_type', default='mach-o', help='String to look for in magic file type') | |
def main(input_dir, output_dir, target_type='mach-o', pretend=False): | |
if not os.path.exists(output_dir): | |
os.makedirs(output_dir) | |
for path in get_matches(input_dir, target_type): | |
with open(path, 'rb') as f: | |
sha256 = hashlib.sha256(f.read()).hexdigest() | |
out_path = '%s/%s' % (output_dir, sha256) | |
print("COPY SRC=%s\n DEST=%s" % (path, out_path)) | |
if not pretend: | |
shutil.copyfile(path, out_path) | |
if __name__ == '__main__': | |
argh.dispatch_command(main) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment