Pdftohtml Poppler utils not working on centOs

3.2k Views Asked by At

I am trying to convert pdf to html in php using mgufrone library(https://github.com/mgufrone/pdf-to-html). I run this in my mac it's working alright. But when I run in centos server, the .html file is created blank inside /vendor/gufy/pdftohtml-pdf/output folder. In my mac local multiples files are created inside /output folder. But in server only a single file is created with empty content. Please help.

2

There are 2 best solutions below

1
On

const path = require('path');
const fs = require('fs');
const { exec } = require('child_process');

async function pdfToImageConvert(pdfData, req, res) {
  const projectDirectory = process.cwd();
  const scriptDirectory = projectDirectory;

  function fileToBase64(fileObject) {
    return new Promise((resolve, reject) => {
      const base64Data = fileObject.data.toString("base64");
      resolve(base64Data);
    });
  }

  fileToBase64(pdfData)
    .then(async (base64Data) => {
      const pdfPath = path.join(scriptDirectory, 'input.pdf');
      fs.writeFileSync(pdfPath, Buffer.from(base64Data, 'base64'));

      const outputDirectory = path.join(scriptDirectory, 'public', 'upload');
      console.log("outputDirectory", outputDirectory);

      convertPdfToImage(pdfPath, outputDirectory);

      async function convertPdfToImage(pdfPath, outputDir) {
        const command = `pdftoppm -jpeg ${pdfPath} ${path.join(outputDir, 'image')}`;

        try {
          await executeCommand(command);

          fs.readdir(outputDir, (err, files) => {
            if (err) {
              console.error('Error listing files:', err);
              return;
            }

            const pdfImages = files.filter(file => file.startsWith('image'));

            const baseURL = 'http://localhost:3001/upload/'; // Change this to your actual base URL

            const modifiedPDFImages = pdfImages.map(filename => baseURL + filename);

            console.log('Images created from the PDF:', modifiedPDFImages);
            console.log('Number of images created:', modifiedPDFImages.length);

            fs.unlink(pdfPath, err => {
              if (err) {
                console.error('Error deleting input.pdf:', err);
              } else {
                console.log('input.pdf deleted.');
              }
            });

            let resultSuccess = encryptData(
              JSON.stringify({ images: modifiedPDFImages })
            );
            return res.json({ result: resultSuccess });
          });
        } catch (error) {
          console.error('Error converting PDF to image:', error);
        }
      }
    })
    .catch((error) => {
      console.error(error);
    });
}

function executeCommand(command) {
  return new Promise((resolve, reject) => {
    exec(command, (error, stdout, stderr) => {
      if (error) {
        console.error(`Error executing command: ${error}`);
        reject(error);
      } else {
        console.log(`Command executed successfully: ${stdout}`);
        resolve();
      }
    });
  });
}

pdfToImageConvert(pdfData, req, res);

2
On

Herein lies the problem:

sudo yum install poppler-utils

That installs an old version (0.12.4) which does not have pdftohtml command options like "-s" and "-fmt".

Go to this page https://medium.com/@jakebathman/building-poppler-utils-for-centos-6-5-really-e52eccffc6ae to guide you on how to get a later version of poppler-utils. I installed https://poppler.freedesktop.org/poppler-0.22.5.tar.gz instead of 0.13.4 as instructed.

All the best!