Last active
January 7, 2023 14:26
-
-
Save nackjicholson/e50b83fb55842854062e16928ce1e985 to your computer and use it in GitHub Desktop.
s3 list paginator tricks.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import boto3 | |
s3_client = boto3.client('s3') | |
def list_dirs(bucket, prefix): | |
""" Yield direct child folders of the given prefix. | |
""" | |
if not prefix.endswith('/'): | |
prefix += '/' | |
paginator = s3_client.get_paginator('list_objects_v2') | |
results = paginator.paginate(Bucket=bucket, Prefix=prefix, Delimiter='/') | |
for result in results: | |
for prefix in result.get('CommonPrefixes', []): | |
# Prefixes look like "<prefix>/<subdir>/" | |
# This code replaces "<prefix>/" with an empty | |
# space leaving "<subdir>" from the common prefix. | |
yield prefix['Prefix'].replace(prefix, '', 1).strip('/') | |
def list_s3_keys(bucket, prefix='', suffix=''): | |
s3_client = boto3.client('s3') | |
params = {'Bucket': bucket} | |
if isinstance(prefix, str): | |
params['Prefix'] = prefix | |
paginator = s3_client.get_paginator('list_objects_v2') | |
for result in paginator.paginate(**params): | |
for obj in result['Contents']: | |
key = obj['Key'] | |
if key.startswith(prefix) and key.endswith(suffix): | |
yield key |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
@markkvdb Did you try it? I have, and it does work. I don't think it matters that the
prefix
in the loop shadows the outerprefix
variable because it's defined and only used on line 13, before the loops.A variable name change for the variable in the loop would make it more clear.