Upload object in oss on chunks with minio

107 Views Asked by At

I have that snippet



get_client = Minio(
    endpoint='host',
    access_key='',
    secret_key='',

)


put_client = Minio(
    endpoint='host',
    access_key='',
    secret_key='',

)


helpers.MIN_PART_SIZE = 10 * 1024 * 1024


source_bucket = 'source-bucket'
get_object_name = 'object-name'
destination_bucket = 'destination-bucket'
upload_object_name = 'test-bucket-name'


stat = get_client.stat_object(source_bucket, bject_name)
object_size = stat.size



chunk_size = 6000000
num_chunks = math.ceil(object_size/chunk_size)

print(f'Object size: {object_size} bytes')
print(f'Number of chunks: {num_chunks}')


bytes_store = bytearray()

for i in range(num_chunks):
    offset = i * chunk_size
    length = min(chunk_size, object_size - offset) - 1
    try:
        response = get_client.get_object(
            source_bucket,
            get_object_name,
            offset=offset,
            length=length,
        )
        data = response.read()

        put_client.put_object(
            destination_bucket,
            upload_object_name,
            io.BytesIO(data),
            -1, part_size=length

        )


        print(f'Uploaded chunk {i} ({length} bytes)')
        bytes_store.extend(data)
    except Exception as e:
        print(f'Error processing chunk {i}:', e)
        break

object size is 42238464 and I am getting that error -> Error processing chunk 7: part size 238463 is not supported; minimum allowed 5MiB

The idea is that I want to upload by chinks with Minio, but they have limitation when you want to do that action. They have hardcoded that env MIN_PART_SIZE in their code to 5 * 1024 * 1024 I want to be much more smaller, but I can't do it. Is there way to pass around that limit and customize it according to my needs?

Little update I made this

helpers.MIN_PART_SIZE = 100000


get_client = Minio(
    endpoint='host',
    access_key='',
    secret_key='',
    secure=False
)


put_client = Minio(
    endpoint='host',
    access_key='',
    secret_key='',
    secure=False
)



source_bucket = 'source-bucket'
get_object_name = 'object-name'
destination_bucket = 'destination-bucket'
upload_object_name = 'test-bucket-name'


stat = get_client.stat_object(source_bucket, bject_name)
object_size = stat.size



chunk_size = 6000000
num_chunks = math.ceil(object_size/chunk_size)

print(f'Object size: {object_size} bytes')
print(f'Number of chunks: {num_chunks}')


hash_object = hashlib.sha256()
bytes_store = bytearray()

for i in range(num_chunks):
    offset = i * chunk_size
    print(offset)
    length = min(chunk_size, object_size - offset)
    print(length)
    try:
        response = get_client.get_object(
            source_bucket,
            get_object_name,
            offset=offset,
            length=length,
        )
        data = response.read()

        put_client.put_object(
            destination_bucket,
            upload_object_name,
            io.BytesIO(data),
            -1, part_size=length

        )


        print(f'Uploaded chunk {i} ({length} bytes)')
        bytes_store.extend(data)
    except Exception as e:
        print(f'Error processing chunk {i}:', e)
        break

I worked around them limitations, but now, the problem is that on each upload in the loop, I overwrite the object with new chunk and at the end I am getting the last chunk which is upload and loosing the other chunks. Any suggestions how to deal with that?

0

There are 0 best solutions below