I am trying to migrate some of the legacy code from python2 to python3, I am done with most of the part, the below functions are throwing some errors.
Here is the code :
def upload_part(
mp,
fname,
idx,
offset,
):
f = open(fname)
f.seek(offset)
content = f.read(CHUNK_SIZE)
f.close()
success = False
for x in range(3):
try:
conn = get_connection()
bucket = conn.lookup(mp.bucket_name)
p = boto.s3.multipart.MultiPartUpload(bucket)
p.id = mp.id
p.key_name = mp.key_name
p.upload_part_from_file(StringIO(content), idx + 1, replace=True)
success = True
break
except Exception as e:
logging.critical("Error in part upload - %s %s %s",
fname, idx, offset)
logging.debug(traceback.format_exc())
assert success, f"Part failed - {fname} {idx} {offset}"
# upload files to s3
def upload(
options,
table_id,
table_nm,
s3_bucket,
s3_bucket_folder,
file_list,
):
conn = get_connection()
bck = conn.get_bucket(s3_bucket)
if bck is None:
bck = conn.create_bucket(s3_bucket)
pool = gevent.pool.Pool(options.concurrency)
for fname in file_list:
if s3_bucket_folder == ".":
fpath = os.path.basename(fname)
else:
fpath = os.path.join(s3_bucket_folder, os.path.basename(fname))
logging.debug("Putting: %s -> %s/%s ...", fname, s3_bucket, fpath)
sys.stdout.flush()
start = time.time()
size = os.stat(fname).st_size
if size > CHUNK_SIZE * 2 and options.concurrency > 1:
mp = bck.initiate_multipart_upload(
fpath, reduced_redundancy=options.reduced_redundancy
)
greenlets = []
idx = offset = 0
while offset < size:
greenlets.append(pool.spawn(
upload_part, mp, fname, idx, offset))
idx += 1
offset += CHUNK_SIZE
gevent.joinall(greenlets)
cmp = mp.complete_upload()
else:
key = bck.new_key(fpath)
f = open(fname)
key.set_contents_from_file(
f, reduced_redundancy=options.reduced_redundancy, replace=True
)
f.close()
size = float(size) / 1024 / 1024
elapsed = time.time() - start
speed = int(size * 1000 / elapsed)
logging.debug(" %6.1f MiB in %.1fs (%d KiB/s) ", size, elapsed, speed)
file_name = os.path.basename(fname)
upload_speed = "%d KiB/s" % speed
Here is the error :
2023-04-27 21:32:52,973 - root - CRITICAL - s3_upload.py: - 104: - upload_part: - Error in part upload - /data/analytics/ExportDir/full_sync/clinic_access_control.csv 2 10485760
Traceback (most recent call last):
File "src/gevent/greenlet.py", line 908, in gevent._gevent_cgreenlet.Greenlet.run
File "/home/analytics/data-analytics/DataSync/s3_upload.py", line 108, in upload_part
assert success, f"Part failed - {fname} {idx} {offset}"
AssertionError: Part failed - /data/analytics/ExportDir/full_sync/clinic_access_control.csv 2 10485760
2023-04-27T16:02:52Z <Greenlet at 0x7f0f1adf4f40: upload_part(<MultiPartUpload lybrate_full_sync/clinic_access_c, '/data/analytics/ExportDir/full_sync/clini, 2, 10485760)> failed with AssertionError
File "/home/analytics/data-analytics//DataSync/redshift_load_master.py", line 179, in exec_load_master
s3_upload.s3_upload(con_redshift, options)
File "/home/analytics/data-analytics/DataSync/s3_upload.py", line 288, in s3_upload
upload(
File "/home/analytics/data-analytics/DataSync/s3_upload.py", line 155, in upload
cmp = mp.complete_upload()
File "/home/analytics/.local/lib/python3.8/site-packages/boto/s3/multipart.py", line 318, in complete_upload
return self.bucket.complete_multipart_upload(self.key_name,
File "/home/analytics/.local/lib/python3.8/site-packages/boto/s3/bucket.py", line 1806, in complete_multipart_upload
raise self.connection.provider.storage_response_error(
boto.exception.S3ResponseError: S3ResponseError: 400 Bad Request
MalformedXMLThe XML you provided was not well-formed or did not validate against our published schemaT7YNBP91V9SS7XNH5gsOzFhTRDfmgVJeKBNhPto7W5ibemMFjUs/Jv7zOXzo+YYSejjf5EMBY++D+SOr2q1M0f3uQ/c=