I am using pdf-parse to parse some data. With english it is working fine but for hindi pdfs it is giving some garbage text. So is there npm packages that can parse hindi text as well ? I tried pdf2json as well it is not working.
import pdf from "pdf-parse";
import fs from "fs";
async function parsePDF(filePath) {
const dataBuffer = fs.readFileSync(filePath);
try {
const pdfData = await pdf(dataBuffer).then((data) => {
return data.text;
});
console.log(pdfData);
} catch (error) {
console.log(error);
}
}
parsePDF(process.env.PDF_FILEPATH);
Input Input Output : Here is the output i am getting