Last active
April 10, 2023 13:12
-
-
Save dataplayer12/400cf2ee6584389dd4a1be144691f147 to your computer and use it in GitHub Desktop.
Download SA-1B dataset from Segment Anything paper by FAIR
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import wget | |
import argparse | |
import os | |
import shutil | |
def parse_arguments(): | |
p=argparse.ArgumentParser() | |
p.add_argument('--links-file', type=str, required=True, help='Path of text file downloaded from https://ai.facebook.com/datasets/segment-anything-downloads/') | |
p.add_argument('--start-chunk', type=int, required=False, default=0, help='Index of the starting chunk to download. Note the dataset is divided into ~1000 chunks of tar files') | |
p.add_argument('--end-chunk', type=int, required=False, default=-1, help='Index of the end chunk.') | |
p.add_argument('--output-dir', type=str, required=False, default='./', help='Output directory to store data in') | |
args=p.parse_args() | |
return args | |
def download_chunk(name, link, chunk_num, n_chunks, outdir): | |
print(f'Downloading chunk number {chunk_num} of {n_chunks} with name {name}') | |
cwd=os.getcwd() | |
os.chdir(outdir) | |
wget.download(link) | |
long_name = link[link.rfind('/')+1:link.rfind('.tar')] + '.tar' | |
assert long_name in os.listdir(), f'File {long_name} not found' | |
shutil.move(long_name, name) | |
os.chdir(cwd) | |
print(f'Finished downloading {name}') | |
def main(): | |
args=parse_arguments() | |
os.makedirs(args.output_dir, exist_ok=True) | |
with open(args.links_file, 'r') as f: | |
content=f.read().split('\n')[1:-1] | |
names_links=[line.split('\t') for line in content] | |
n_chunks=len(names_links) if args.end_chunk == -1 else (args.end_chunk - args.start_chunk) | |
for (idx, (name, link)) in enumerate(names_links[args.start_chunk: args.start_chunk + n_chunks]): | |
chunk_index = idx + 1 | |
download_chunk(name, link, chunk_index, n_chunks, args.output_dir) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
How to use
pip install wget