Skip to content

Instantly share code, notes, and snippets.

@shadiakiki1986
Created July 29, 2021 16:36
Show Gist options
  • Save shadiakiki1986/b4937be1ccb7b56ad31e224bed79d5cf to your computer and use it in GitHub Desktop.
Save shadiakiki1986/b4937be1ccb7b56ad31e224bed79d5cf to your computer and use it in GitHub Desktop.
s3 object as a fileobj for reading tar without full download
# https://stackoverflow.com/questions/56086604/how-to-list-files-inside-tar-in-aws-s3-without-downloading-it
import boto3
import io
import tarfile
class S3File(io.BytesIO):
def __init__(self, bucket_name, key_name, s3client):
super().__init__()
self.bucket_name = bucket_name
self.key_name = key_name
self.s3client = s3client
self.offset = 0
self.total_download = 0
def close(self):
return
def read(self, size):
self.total_download += size
print('read: offset = {}, size = {}, total download = {}'.format(self.offset, size, self.total_download))
start = self.offset
end = self.offset + size - 1
try:
s3_object = self.s3client.get_object(Bucket=self.bucket_name, Key=self.key_name, Range="bytes=%d-%d" % (start, end))
except:
return bytearray()
self.offset = self.offset + size
result = s3_object['Body'].read()
return result
def seek(self, offset, whence=0):
if whence == 0:
print('seek: offset {} -> {} (diff = {} kB)'.format(self.offset, offset, (offset-self.offset)//1000))
self.offset = offset
def tell(self):
return self.offset
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment