I am trying to convert pdf to html in php using mgufrone library(https://github.com/mgufrone/pdf-to-html). I run this in my mac it's working alright. But when I run in centos server, the .html file is created blank inside /vendor/gufy/pdftohtml-pdf/output folder. In my mac local multiples files are created inside /output folder. But in server only a single file is created with empty content. Please help.
Pdftohtml Poppler utils not working on centOs
3.2k Views Asked by Mir Mumtaz At
2
There are 2 best solutions below
1
On
const path = require('path');
const fs = require('fs');
const { exec } = require('child_process');
async function pdfToImageConvert(pdfData, req, res) {
const projectDirectory = process.cwd();
const scriptDirectory = projectDirectory;
function fileToBase64(fileObject) {
return new Promise((resolve, reject) => {
const base64Data = fileObject.data.toString("base64");
resolve(base64Data);
});
}
fileToBase64(pdfData)
.then(async (base64Data) => {
const pdfPath = path.join(scriptDirectory, 'input.pdf');
fs.writeFileSync(pdfPath, Buffer.from(base64Data, 'base64'));
const outputDirectory = path.join(scriptDirectory, 'public', 'upload');
console.log("outputDirectory", outputDirectory);
convertPdfToImage(pdfPath, outputDirectory);
async function convertPdfToImage(pdfPath, outputDir) {
const command = `pdftoppm -jpeg ${pdfPath} ${path.join(outputDir, 'image')}`;
try {
await executeCommand(command);
fs.readdir(outputDir, (err, files) => {
if (err) {
console.error('Error listing files:', err);
return;
}
const pdfImages = files.filter(file => file.startsWith('image'));
const baseURL = 'http://localhost:3001/upload/'; // Change this to your actual base URL
const modifiedPDFImages = pdfImages.map(filename => baseURL + filename);
console.log('Images created from the PDF:', modifiedPDFImages);
console.log('Number of images created:', modifiedPDFImages.length);
fs.unlink(pdfPath, err => {
if (err) {
console.error('Error deleting input.pdf:', err);
} else {
console.log('input.pdf deleted.');
}
});
let resultSuccess = encryptData(
JSON.stringify({ images: modifiedPDFImages })
);
return res.json({ result: resultSuccess });
});
} catch (error) {
console.error('Error converting PDF to image:', error);
}
}
})
.catch((error) => {
console.error(error);
});
}
function executeCommand(command) {
return new Promise((resolve, reject) => {
exec(command, (error, stdout, stderr) => {
if (error) {
console.error(`Error executing command: ${error}`);
reject(error);
} else {
console.log(`Command executed successfully: ${stdout}`);
resolve();
}
});
});
}
pdfToImageConvert(pdfData, req, res);
Herein lies the problem:
That installs an old version (0.12.4) which does not have pdftohtml command options like "-s" and "-fmt".
Go to this page https://medium.com/@jakebathman/building-poppler-utils-for-centos-6-5-really-e52eccffc6ae to guide you on how to get a later version of poppler-utils. I installed https://poppler.freedesktop.org/poppler-0.22.5.tar.gz instead of 0.13.4 as instructed.
All the best!