Puppeteer cluster.close() "crashes" after calling cluster.queue()

978 Views Asked by At

Long story short, I've made an app for web scraping and in order for it to be able to simultaneously run more then 1 process at a time (more than 1 Chromium opened), i used puppeteer-cluster. I've got it to run several processes at once, but the cluster won't stop afterwards, it permanently runs. Along the way, I've encountered the following error (1)

await cluster.close(); // Gives the following error -> cluster.close is not a function (1)

If i use it like this (2)

(await cluster).close(); // This returns no error (2)

Anywho, the main problem is that when the code gets to the line where cluster gets closed (at the end of the code, check code below), it freezes there. Won't return any error, as i tried catching it, but at the same time it won't crash either.

const puppeteer = require('puppeteer');
const { Cluster }  = require('puppeteer-cluster/');

function delay(time) {
 return new Promise(function(resolve) { 
   setTimeout(resolve, time)
 });
}


(async () => {

  const cluster = Cluster.launch({
    concurrency: Cluster.CONCURRENCY_BROWSER,
    maxConcurrency: 2,
  });

  const test = async () => {

    const user = process.argv[2];
    const pass = process.argv[3];
    const smis = process.argv[4];
    const nrinreg = process.argv[5];



    const browser = await puppeteer.launch({headless: false, defaultViewport: null, args:['--start-fullscreen']}); // you can also use '--start-fullscreen']});
    const page = await browser.newPage();
    //await page.setViewport({width: 1200, height: 900});
    await page._client.send('Page.setDownloadBehavior', {behavior: 'allow', downloadPath: './'+smis+''}).catch(e => {});
    await page.goto('https://aplicatii2014.smis.fonduri-ue.ro/smis2014app/').catch(e => {});
    await delay(2000);
    // await page.waitFor(3000);

    //scriere credentiale pt elementele de tip HTML user&parola
    await page.type('#j_idt38 > .step-content > .step-pane > .col-md-12 > .form-group > input[name="j_idt38:utilizator"]', user).catch(e => {});
    await delay(1000);
    await page.type('#j_idt38 > .step-content > .step-pane > .col-md-12 > .form-group > input[name="j_idt38:pass"]', pass).catch(e => {});
    await delay(1000);
    page.click('#j_idt38 > .actions > a').catch(e => {});

    await delay(2000);

    page.click('#idPanelGroup > #headerPanel > div > #j_idt18 > tbody > tr > #topMenuCell > #j_idt22').catch(e => {});
    await delay(2000);
    await page.type('#dialogCereriFinantare > .ui-dialog-content > #formCereriFinantare > table > tbody > tr > td > input[id="formCereriFinantare:idSmisAll"]', smis).catch(e => {});
    await delay(1000);
    page.click('#dialogCereriFinantare > .ui-dialog-content > #formCereriFinantare > table > tbody > tr > td > a[id="formCereriFinantare:commandBtnSearch"]').catch(e => {});
    await delay(1000);
    page.click('span.ui-chkbox-icon.ui-icon.ui-icon-blank.ui-c').catch(e => {});
    await delay(1000);
    await page.type('#dialogAcordConfidentialitate > .ui-dialog-content > #formAcordConfidentialitate > div[id="formAcordConfidentialitate:j_idt167"] > .ui-scrollpanel-container > .ui-scrollpanel-content > .col-md-12 > .row > .col-md-3 > input[id="formAcordConfidentialitate:CNP"]', pass).catch(e => {});
    await delay(1000);
    page.click('#dialogAcordConfidentialitate > .ui-dialog-content > #formAcordConfidentialitate > div[id="formAcordConfidentialitate:j_idt167"] > .ui-scrollpanel-container > .ui-scrollpanel-content > .col-md-12 > .row > a[id="formAcordConfidentialitate:btnConfirmContent"]').catch(e => {});
    await delay(1000);
    page.click('#dialogCereriFinantare > .ui-dialog-content > #formCereriFinantare > div > div > a > .ui-icon-seek-end').catch(e => {});
    await delay(2000);

    const doc_details = await page.evaluate(() => {
    //Extract each doc's basic details
    let table = document.querySelector('#dialogCereriFinantare > .ui-dialog-content > #formCereriFinantare > div[id="formCereriFinantare:tableCereriFinantare"] > .ui-datatable-tablewrapper > table > tbody');
    let doc_panels = Array.from(table.children); 
    // Loop through each doc and get their details 
    let doc_info = doc_panels.map(doc_panel => {
      let codsmis = doc_panel.querySelector("tr > td:nth-child(1)").textContent;
      let titlu = doc_panel.querySelector("tr > td:nth-child(2)").textContent;
      let versiune = doc_panel.querySelector("tr > td:nth-child(3)").textContent;
      let contractare = doc_panel.querySelector("tr > td:nth-child(4)").textContent;

      return { versiune, contractare };
    });


    return doc_info;
    });


    doc_details.sort((a, b) => (parseInt(a.versiune) < parseInt(b.versiune) ? 1 : -1));
    let res = new Array();
    res = doc_details.filter(a => a.contractare.length > 0);


  /**
   * Get first elem from a array
   * // [...res].shift()
   */
   const [first] = res;
  /**
   * If no element exist 
   */

   if (first === null) {
    page.click('#dialogCereriFinantare > .ui-dialog-content > #formCereriFinantare > div > div > a > .ui-icon-seek-prev').catch(e => { });
  }
  else {

    let version = first["versiune"];

    await delay(1000);

    const example = await page.$('#dialogCereriFinantare > .ui-dialog-titlebar');
    const bounding_box = await example.boundingBox();

    await page.mouse.move(bounding_box.x + bounding_box.width / 2, bounding_box.y + bounding_box.height / 2);
    await page.mouse.down();
    await page.mouse.move(126, 19);
    await page.mouse.up();
    
    await delay(1000);
    await page.waitForXPath("//tr/td[3][contains(., '"+ version +"')]");  
    const [projects] = await page.$x("//tr/td[3][contains(., '"+ version +"')]");
    projects.click().catch(e => {});

    await delay(2000);
    await page.goto("https://aplicatii2014.smis.fonduri-ue.ro/smis2014app/faces/pages/comunicare.xhtml").catch(e => {});
    await delay(2000);

    await page.evaluate(() => {
      document.querySelector('#j_idt68 > div > #idPanelContent > #j_idt140 > #j_idt140_content > #j_idt142 > div > .ui-datatable-tablewrapper > table > tbody').scrollIntoView();
    }).catch(e => {});


    await delay(2000).catch(e => {});
    
    const [com] = await page.$x("//tr/td[1][contains(., '37114')]").catch(e => {});

    if (com){
      com.click().catch(e => {});
    }
    else
    {
      let [com2] = await page.$x("//tr/td[1][contains(., '"+ nrinreg +"')]");
      do{
        page.click('#j_idt68 > div > #idPanelContent > #j_idt140 > #j_idt140_content > #j_idt142 > div > div[id="j_idt142:idComunicareTable_paginator_bottom"] > .ui-paginator-next').catch(e => {});
        await delay(2000).catch(e => {});
        let [com2] = await page.$x("//tr/td[1][contains(., '"+ nrinreg +"')]").catch(e => {});
        if (com2){
          break;
        }
      }
      while(!com2);

      
      let [com3] = await page.$x("//tr/td[1][contains(., '"+ nrinreg +"')]").catch(e => {});
      com3.click().catch(e => {});

      await delay(2000).catch(e => {});

      await page.evaluate(() => {
        document.querySelector('#j_idt68 > div > #idPanelContent > #j_idt140 > div > #idDetaliicomunicare').scrollIntoView();
      }).catch(e => {});

      await delay(2000).catch(e => {});
    }

    await delay(2000);

    const listadownload = await page.$$('#j_idt68 > div > #idPanelContent > div > div > #idDetaliicomunicare > div > div > div > ul > li > .ui-treenode-children  > li > span');

    for (let iteminlistadownload of listadownload){
      await iteminlistadownload.click({button: 'right',}).catch(e => {});
      await delay(2000);
      let [viz] = await page.$x('//*[@id="idDetaliicomunicare:j_idt163"]/ul/li/a').catch(e => {});
      viz.click().catch(e => {});
      await delay(2000);
    }


  }

 
  await delay(3000);
  await browser.close();
  
};


  
  (await cluster).queue(test);
  
  
  (await cluster).idle();

  
  (await cluster).close();

  


})();


I have been scraping the internet for a solution, looking for fixes on GitHub and nothing seems to work. What is it that I'm doing wrong that simply just won't terminate the process? PS : I added the whole code in hopes of it being relevant.

1

There are 1 best solutions below

1
On BEST ANSWER

Cluster.launch return a Promise. If you just call const cluster = Cluster.launch, now cluster is Promise, when you call (await cluster).close();, (await cluster) will return a Cluster instance -> It work!

Let’s use cluster as a Cluster instance instead of a Promise object:

  const cluster = await Cluster.launch({ // wait until it "launch" finish
    concurrency: Cluster.CONCURRENCY_BROWSER,
    maxConcurrency: 2,
  });