How to make synchronous calls in node.js with request and cheerio

179 Views Asked by At

I'm working on a node.js project in which i'm taking input from a text file and trying to generate a json file containing the output.

My text file contains multiple categories of data which are represented by a global variable current_title.

My sample text file:

*category1
item1
item2
*category2
item3
item4

My code looks something lithis:

const fs = require('fs');
const request = require('request');
const cheerio = require('cheerio');
.
.
var current_title = "";
for (let i = 0; i < lines.length; i++) {
  if(lines[i].startsWith('*')) {
    lines[i] = lines[i].slice(1,undefined);
    current_title = lines[i];
  }
  else {
    console.log(current_title);
    if(current_title.trim() == "category1")
      function1(lines[i]);
  }
}
console.log("the end.");

function function1(name) {
  request(search_url, (err, res, body) => { if (err) console.log(err); else parseBody(body); });
  function parseBody(body) {
    const $ = cheerio.load(body);
    // DOES SOME WEB SCRAPING
    function2(url);
  }
}
  
function function2(url) {
  request(url, (err, res, body) => { if (err) console.log(err); else parseBody(body); });
  function parseBody(body) {
    const $ = cheerio.load(body);
    // DOES SOME WEB SCRAPING
    console.log(current_title);
    // USES current_title TO INSERT DATA INTO A JSON FILE
  }
}

The problem is that the function calls work asynchronously and hence my code doesn't work as expected i.e., the current_title is updated to 'category2' before the function2 tries to insert data of 'category1'. So, my code returns an output which looks something like below:

category1
category1
category2
category2
the end.
category2
category2

How do i solve this problem? and, Is there a better way to do this?

EDIT:

I've updated the code according to the solution provided by @MrDiamond but my problem still seems to persist.

updated code snippet:

async function function1(name) {
  request(search_url, async (err, res, body) => { 
     if (err) console.log(err); 
     else await parseBody(body); 
  });
}

async function parseBody1(body) {
  const $ = await cheerio.load(body);
  // do something
  function2(url);
}

async function function2(url) {
  request(url, async (err, res, body) => { 
     if (err) console.log(err); 
     else await parseBody2(body); 
  });
}

async function parseBody2(body) {
  const $ = await cheerio.load(body);
  // do some other thing
  // try inserting data (FAILS BECAUSE OF SYNCHRONICITY)
}
1

There are 1 best solutions below

0
MrDiamond On

Use async await with cheerio.

Also, it isn't entirely necessary, but for organization I would take the parseBody function outside of the other function, and don't define it twice.

It seems that function1 and function2 do about the same thing with different variables, so I removed function2. Feel free to add it back in if you feel it is necessary.

Here is some optimized code:

const fs = require('fs');
const request = require('request');
const cheerio = require('cheerio');
...
...
var current_title = "";
for (let i = 0; i < lines.length; i++) {
  if(lines[i].startsWith('*')) {
    lines[i] = lines[i].slice(1,undefined);
    current_title = lines[i];
  }
  else {
    console.log(current_title);
    if(current_title.trim() == "category1")
      function1(lines[i]);
  }
}
console.log("the end.");

async function function1(name) {
  request(search_url, (err, res, body) => { 
     if (err) console.log(err); 
     else await parseBody(body); 
  });
}

async function parseBody(body) {
  const $ = await cheerio.load(body);

  // do whatever
}