s3 object as a fileobj for reading tar without full download
Copied from SO question How to list files inside tar in AWS S3 without downloading it?
Check my answer there for more details: link
s3 object as a fileobj for reading tar without full download
Copied from SO question How to list files inside tar in AWS S3 without downloading it?
Check my answer there for more details: link
# https://stackoverflow.com/questions/56086604/how-to-list-files-inside-tar-in-aws-s3-without-downloading-it | |
import boto3 | |
import io | |
import tarfile | |
class S3File(io.BytesIO): | |
def __init__(self, bucket_name, key_name, s3client): | |
super().__init__() | |
self.bucket_name = bucket_name | |
self.key_name = key_name | |
self.s3client = s3client | |
self.offset = 0 | |
self.total_download = 0 | |
def close(self): | |
return | |
def read(self, size): | |
self.total_download += size | |
print('read: offset = {}, size = {}, total download = {}'.format(self.offset, size, self.total_download)) | |
start = self.offset | |
end = self.offset + size - 1 | |
try: | |
s3_object = self.s3client.get_object(Bucket=self.bucket_name, Key=self.key_name, Range="bytes=%d-%d" % (start, end)) | |
except: | |
return bytearray() | |
self.offset = self.offset + size | |
result = s3_object['Body'].read() | |
return result | |
def seek(self, offset, whence=0): | |
if whence == 0: | |
print('seek: offset {} -> {} (diff = {} kB)'.format(self.offset, offset, (offset-self.offset)//1000)) | |
self.offset = offset | |
def tell(self): | |
return self.offset |