Unable to figure out why my zip files are corrupted

35 Views Asked by At

So I have this utility that makes use of the archiver package to generate zip files. The source is a stream of s3 files. I am making use of passthrough stream to upload the archive directly on s3. The problem is that on production, the generated zip files are randomnly corrupted. when I try to unzip them I get

End-of-central-directory signature not found.  Either this file is not
  a zipfile, or it constitutes one disk of a multi-part archive.  In the
  latter case the central directory and zipfile comment will be found on
  the last disk(s) of this archive.

It cannot be issue with multi part archive because the library I am using to generate the zip file doesnt support multi part archives. and I have verified the binary content of the file and the ending bytes look problematic.

Here is the code:

'use strict';

const { S3 } = require('./../libs/s3');
const { Upload } = require('@aws-sdk/lib-storage');

const archiver = require('archiver');
const { PassThrough } = require('stream');
const { basename } = require('path');
const mAsync = require('async');

/**
 *
 * This utility can be used to generate a zip file out of a list of s3 files.
 *
 * @param {object} params
 * @param {Array<LinkObject>} params.sourceLinks 
 * @param {LinkObject} params.targetLink
 * @param {string} params.s3Region
[![enter image description here](https://i.stack.imgur.com/ctrsD.png)](https://i.stack.imgur.com/ctrsD.png) * 
 */
const S3ZipGenerator = function (params, callback) {
  const { sourceLinks, targetLink, s3Region } = params;
  const instance = S3.getInstance({ region: s3Region });

  const archiveStream = archiver('zip');
  archiveStream.on('error', (error) => {
    return callback(error);
  });

  const passthrough = new PassThrough();

  const { bucket, key } = targetLink;
  const putParams = {
    Bucket: bucket,
    Key: key,
    Body: passthrough,
    ContentType: 'application/zip'
  };

  const uploadS3Promise = new Upload({
    client: instance,
    params: putParams
  }).done();

  uploadS3Promise.then(() => {
    callback(null);
  }).catch((err) => {
    callback(err);
  });

  // archiveStream.on('end', finishHandler);
  // archiveStream.on('finish', finishHandler);

  archiveStream.pipe(passthrough);

  mAsync.forEachOf(sourceLinks, (source, index, cb) => {
    const s3Params = {
      Bucket: source.bucket,
      Key: source.key
    };
    S3.getInstance({ region: source.region }).send(S3.getObjectCommand(s3Params), (err, response) => {
      if (err) {
        return cb(err);
      }
      archiveStream.append(response.Body, { name: basename(source.key) });
      return cb(null);
    });
  }, (err) => {
    if (err) {
      return callback(err);
    }
    // when should we call finalize() actually?
    
    archiveStream.finalize();
  });
};

/**
 * @typedef LinkObject
 * @type {object}
 * @property {string} bucket
 * @property {string} key
 */

module.exports = S3ZipGenerator;

0

There are 0 best solutions below