The XML you provided was not well-formed or did not validate against our published schema

56 Views Asked by At

I am trying to migrate some of the legacy code from python2 to python3, I am done with most of the part, the below functions are throwing some errors.

Here is the code :

def upload_part(
    mp,
    fname,
    idx,
    offset,
):

    f = open(fname)
    f.seek(offset)
    content = f.read(CHUNK_SIZE)
    f.close()

    success = False
    for x in range(3):
        try:
            conn = get_connection()
            bucket = conn.lookup(mp.bucket_name)

            p = boto.s3.multipart.MultiPartUpload(bucket)
            p.id = mp.id
            p.key_name = mp.key_name

            p.upload_part_from_file(StringIO(content), idx + 1, replace=True)
            success = True
            break
        except Exception as e:
            logging.critical("Error in part upload - %s %s %s",
                             fname, idx, offset)
            logging.debug(traceback.format_exc())

    assert success, f"Part failed - {fname} {idx} {offset}"


# upload files to s3
def upload(
    options,
    table_id,
    table_nm,
    s3_bucket,
    s3_bucket_folder,
    file_list,
):

    conn = get_connection()
    bck = conn.get_bucket(s3_bucket)
    if bck is None:
        bck = conn.create_bucket(s3_bucket)

    pool = gevent.pool.Pool(options.concurrency)

    for fname in file_list:
        if s3_bucket_folder == ".":
            fpath = os.path.basename(fname)
        else:
            fpath = os.path.join(s3_bucket_folder, os.path.basename(fname))

        logging.debug("Putting: %s -> %s/%s ...", fname, s3_bucket, fpath)

        sys.stdout.flush()

        start = time.time()

        size = os.stat(fname).st_size
        if size > CHUNK_SIZE * 2 and options.concurrency > 1:
            mp = bck.initiate_multipart_upload(
                fpath, reduced_redundancy=options.reduced_redundancy
            )

            greenlets = []
            idx = offset = 0
            while offset < size:
                greenlets.append(pool.spawn(
                    upload_part, mp, fname, idx, offset))
                idx += 1
                offset += CHUNK_SIZE

            gevent.joinall(greenlets)
            cmp = mp.complete_upload()
        else:
            key = bck.new_key(fpath)
            f = open(fname)
            key.set_contents_from_file(
                f, reduced_redundancy=options.reduced_redundancy, replace=True
            )
            f.close()

        size = float(size) / 1024 / 1024
        elapsed = time.time() - start
        speed = int(size * 1000 / elapsed)

        logging.debug(" %6.1f MiB in %.1fs (%d KiB/s) ", size, elapsed, speed)

        file_name = os.path.basename(fname)
        upload_speed = "%d KiB/s" % speed

Here is the error :

2023-04-27 21:32:52,973 - root - CRITICAL - s3_upload.py: - 104: - upload_part: - Error in part upload - /data/analytics/ExportDir/full_sync/clinic_access_control.csv 2 10485760
Traceback (most recent call last):
  File "src/gevent/greenlet.py", line 908, in gevent._gevent_cgreenlet.Greenlet.run
  File "/home/analytics/data-analytics/DataSync/s3_upload.py", line 108, in upload_part
    assert success, f"Part failed - {fname} {idx} {offset}"
AssertionError: Part failed - /data/analytics/ExportDir/full_sync/clinic_access_control.csv 2 10485760
2023-04-27T16:02:52Z <Greenlet at 0x7f0f1adf4f40: upload_part(<MultiPartUpload lybrate_full_sync/clinic_access_c, '/data/analytics/ExportDir/full_sync/clini, 2, 10485760)> failed with AssertionError

File "/home/analytics/data-analytics//DataSync/redshift_load_master.py", line 179, in exec_load_master s3_upload.s3_upload(con_redshift, options) File "/home/analytics/data-analytics/DataSync/s3_upload.py", line 288, in s3_upload upload( File "/home/analytics/data-analytics/DataSync/s3_upload.py", line 155, in upload cmp = mp.complete_upload() File "/home/analytics/.local/lib/python3.8/site-packages/boto/s3/multipart.py", line 318, in complete_upload return self.bucket.complete_multipart_upload(self.key_name, File "/home/analytics/.local/lib/python3.8/site-packages/boto/s3/bucket.py", line 1806, in complete_multipart_upload raise self.connection.provider.storage_response_error( boto.exception.S3ResponseError: S3ResponseError: 400 Bad Request MalformedXMLThe XML you provided was not well-formed or did not validate against our published schemaT7YNBP91V9SS7XNH5gsOzFhTRDfmgVJeKBNhPto7W5ibemMFjUs/Jv7zOXzo+YYSejjf5EMBY++D+SOr2q1M0f3uQ/c=

0

There are 0 best solutions below