How to handle files received from the frontend synchronously API on node

316 Views Asked by At

At first, I apologize for my terrible English :D

Hello, I have the following situation that is leaving me intrigued, I have a frontend made in react and a backend in node that receives requests by express. The idea is that from the frontend I send a pdf file using a POST method and the backend processes this file making the necessary treatments (separating pages into more files and taking data inside the pdf) and at the end of it it returns these treated pdf's. I wanted to return these new files in the POST response, but I am having a problem with asynchronism. When processing the file, I use the pfd2Json library, and this library apparently processes the pdf you choose asynchronously and lets the execution flow continue, my problem is that when I send the library to process my pdf, it puts it in the "background" "and the executions continue, this leads to the end of the flow and send the post's response before the library handles the pdf's.

When a post request arrives, the program executes this function "getPDF()"

async function getPdf(fileLocation) {
    let pdf = fileLocation;

    await pdfSeparator(pdf, folderTemp);

    await getInformationsPdf();
    return arrayObj
}

When it executes getInformationsPdf(), the program executes everything, but does not wait for the pdf to be processed by the library. In this case, I load each separate file into a forEach, use pdfParser.loadPDF(fileLocation); to upload my pdf and he is waiting for everything to be read by pdfParser.on ("pdfParser_dataReady", pdfData => {}) only because this method is asynchronous, it just calls and puts it in the background, making the flow continue until the end of the block and go to the next forEach item, while the pdf hasn't even been processed yet. In the end, all forEach has already been executed and the pdfs have not yet been processed, the program sends the response and the data from the pdf's are on the backend. Is there a way for me to force the wait for treatment before sending the response?

async function getInformationsPdf() {
    let arrayObjs = []
    fs.readdirSync(folderTemp).forEach(file => {
        var pdfParser = new PDFParser(this, 1);
        let fileLocation = folderTemp + file;
        pdfParser.loadPDF(fileLocation);
        
        pdfParser.on("pdfParser_dataError", (errData) => {
            console.error(errData.parserError)
        });

        pdfParser.on("pdfParser_dataReady", (pdfData) => {
            let t1 = pdfData.formImage.Pages[0].Texts[32].R[0].T.replace(/%20/g, " ");
            let t2 = pdfData.formImage.Pages[0].Texts[33].R[0].T.replace(/%20/g, " ");
            let t3 = pdfData.formImage.Pages[0].Texts[34].R[0].T.replace(/%20/g, " ");
            let t4 = pdfData.formImage.Pages[0].Texts[35].R[0].T.replace(/%20/g, " ");
            let t5 = pdfData.formImage.Pages[0].Texts[36].R[0].T.replace(/%20/g, " ");
            let t6 = pdfData.formImage.Pages[0].Texts[37].R[0].T.replace(/%20/g, " ");
            let t7 = pdfData.formImage.Pages[0].Texts[38].R[0].T.replace(/%20/g, " ");
            let t8 = pdfData.formImage.Pages[0].Texts[39].R[0].T.replace(/%20/g, " ");
            let t9 = pdfData.formImage.Pages[0].Texts[40].R[0].T.replace(/%20/g, " ");
            let textsPdf = [t1, t2, t3, t4, t5, t6, t7, t8, t9];
            let fileWithTexts = {
                file: fileLocation,
                texts: textsPdf
            }

            renameFileMatch(fileWithTexts);
            arrayObjs.push(fileWithTexts);
        });
    })
    return arrayObjs;
}
2

There are 2 best solutions below

2
On BEST ANSWER

If I've understood the question correctly, getInformationsPdf() goes through a loop for each file in that folder, and it doesn't wait for the processing inside the pdfParser.on("pdfParser_dataReady" to finish before going on, so in this bit of code:

    let pdf = fileLocation;

    await pdfSeparator(pdf, folderTemp);

    await getInformationsPdf();
    return arrayObj

it runs return arrayObj before the pdfs are actually finished processing, right?

So the pattern I think you should use is to make an array of Promises with fs.readdirSync(folderTemp).map, and then make the promise resolve at the end of pdfParser.on("pdfParser_dataReady". Then, you can await Promise.all() all the promises

It might look somethin glike this:

async function getInformationsPdf() {
    let arrayObjs = []
    const promises = fs.readdirSync(folderTemp).map(file => {
        return new Promise((resolve, reject) => {
            var pdfParser = new PDFParser(this, 1);
            let fileLocation = folderTemp + file;
            pdfParser.loadPDF(fileLocation);

            pdfParser.on("pdfParser_dataError", (errData) => {
                console.error(errData.parserError);
                reject(errData);
            });

            pdfParser.on("pdfParser_dataReady", (pdfData) => {
                let t1 = pdfData.formImage.Pages[0].Texts[32].R[0].T.replace(/%20/g, " ");
                let t2 = pdfData.formImage.Pages[0].Texts[33].R[0].T.replace(/%20/g, " ");
                let t3 = pdfData.formImage.Pages[0].Texts[34].R[0].T.replace(/%20/g, " ");
                let t4 = pdfData.formImage.Pages[0].Texts[35].R[0].T.replace(/%20/g, " ");
                let t5 = pdfData.formImage.Pages[0].Texts[36].R[0].T.replace(/%20/g, " ");
                let t6 = pdfData.formImage.Pages[0].Texts[37].R[0].T.replace(/%20/g, " ");
                let t7 = pdfData.formImage.Pages[0].Texts[38].R[0].T.replace(/%20/g, " ");
                let t8 = pdfData.formImage.Pages[0].Texts[39].R[0].T.replace(/%20/g, " ");
                let t9 = pdfData.formImage.Pages[0].Texts[40].R[0].T.replace(/%20/g, " ");
                let textsPdf = [t1, t2, t3, t4, t5, t6, t7, t8, t9];
                let fileWithTexts = {
                    file: fileLocation,
                    texts: textsPdf
                }

                renameFileMatch(fileWithTexts);
                arrayObjs.push(fileWithTexts);
                resolve(fileWithTexts);
            });
        })        
    });
    await Promise.all(promises);
    return arrayObjs;
}
0
On

You can use Promise in your getInformationsPdf() function

Example

function getInformationsPdf() {
       return new Promise((resolve, reject) => {
           let arrayObjs = [];
           fs.readdirSync(folderTemp).forEach(file => {
                  ...//your code stuff
                if(!fileWithTexts) resolve(arrayObjs);  //handle your exception
                renameFileMatch(fileWithTexts);
                arrayObjs.push(fileWithTexts);
                
           })
     })
}