So I have this utility that makes use of the archiver package to generate zip files. The source is a stream of s3 files. I am making use of passthrough stream to upload the archive directly on s3.
The problem is that on production, the generated zip files are randomnly corrupted. when I try to unzip them I get
End-of-central-directory signature not found. Either this file is not
a zipfile, or it constitutes one disk of a multi-part archive. In the
latter case the central directory and zipfile comment will be found on
the last disk(s) of this archive.
It cannot be issue with multi part archive because the library I am using to generate the zip file doesnt support multi part archives. and I have verified the binary content of the file and the ending bytes look problematic.
Here is the code:
'use strict';
const { S3 } = require('./../libs/s3');
const { Upload } = require('@aws-sdk/lib-storage');
const archiver = require('archiver');
const { PassThrough } = require('stream');
const { basename } = require('path');
const mAsync = require('async');
/**
*
* This utility can be used to generate a zip file out of a list of s3 files.
*
* @param {object} params
* @param {Array<LinkObject>} params.sourceLinks
* @param {LinkObject} params.targetLink
* @param {string} params.s3Region
[](https://i.stack.imgur.com/ctrsD.png) *
*/
const S3ZipGenerator = function (params, callback) {
const { sourceLinks, targetLink, s3Region } = params;
const instance = S3.getInstance({ region: s3Region });
const archiveStream = archiver('zip');
archiveStream.on('error', (error) => {
return callback(error);
});
const passthrough = new PassThrough();
const { bucket, key } = targetLink;
const putParams = {
Bucket: bucket,
Key: key,
Body: passthrough,
ContentType: 'application/zip'
};
const uploadS3Promise = new Upload({
client: instance,
params: putParams
}).done();
uploadS3Promise.then(() => {
callback(null);
}).catch((err) => {
callback(err);
});
// archiveStream.on('end', finishHandler);
// archiveStream.on('finish', finishHandler);
archiveStream.pipe(passthrough);
mAsync.forEachOf(sourceLinks, (source, index, cb) => {
const s3Params = {
Bucket: source.bucket,
Key: source.key
};
S3.getInstance({ region: source.region }).send(S3.getObjectCommand(s3Params), (err, response) => {
if (err) {
return cb(err);
}
archiveStream.append(response.Body, { name: basename(source.key) });
return cb(null);
});
}, (err) => {
if (err) {
return callback(err);
}
// when should we call finalize() actually?
archiveStream.finalize();
});
};
/**
* @typedef LinkObject
* @type {object}
* @property {string} bucket
* @property {string} key
*/
module.exports = S3ZipGenerator;