Troubleshooting Issues with PDF Conversion in Updated Docker-Wkhtmltopdf-aas Package

54 Views Asked by At

I've taken on the task of updating an old package available on GitHub => https://github.com/leknoppix/docker-wkhtmltopdf-aas (master branch). This package allows for the quick setup of an API to convert HTML to PDF, based on wkhtmltopdf.

So, I've updated the package.json file with the latest package versions. Here's its content:

{
  "name": "docker-wkhtmltopdf-ass",
  "version": "2.4.0",
  "description": "A PDF Serice API that renders HTML templates to PDF",
  "author": "",
  "main": "app.coffee",
  "license": "ISC",
  "scripts": {
    "version": "coffee --version",
    "start": "while true; do coffee app.coffee; done",
    "test": "nyc --extension .coffee mocha --exit --recursive --compilers coffee:coffee-script/register --require coffee-coverage/register-istanbul test.coffee --timeout 5000 && nyc report --reporter=lcov --extension .coffee",
    "nyc": "nyc",
    "lint": "coffeelint app.coffee test.coffee",
    "ncu": "ncu",
    "ncupdate": "ncu -u"
  },
  "dependencies": {
    "bluebird": "^3.7.2",
    "body-parser": "^1.20.2",
    "bootprint": "^4.0.4",
    "bootprint-openapi": "^4.0.4",
    "child_process": "^1.0.2",
    "child-process-promise": "^2.2.1",
    "coffeescript": "^2.7.0",
    "dotenv": "^16.3.1",
    "express": "^4.18.2",
    "express-basic-auth": "^1.2.1",
    "express-healthcheck": "^0.1.0",
    "express-prom-bundle": "^6.6.0",
    "express-status-monitor": "^1.3.4",
    "forever": "^4.0.3",
    "fs": "^0.0.1-security",
    "fs-writefile-promise": "^3.0.2",
    "helmet": "^7.1.0",
    "http-auth": "^4.2.0",
    "lodash": "^4.17.21",
    "morgan": "^1.10.0",
    "npm-check-updates": "^16.14.6",
    "promisepipe": "^3.0.0",
    "temp-write": "^5.0.0",
    "tmp-promise": "^3.0.3"
  },
  "devDependencies": {
    "chakram": "^1.5.0",
    "codeclimate-test-reporter": "^0.5.1",
    "coffee-coverage": "^3.0.1",
    "coffeelint": "^2.1.0",
    "mocha": "^10.2.0",
    "nyc": "^15.1.0",
    "supertest": "^6.3.3",
    "textract": "^2.5.0"
  }
}

Next, I modified the CoffeeScript code to make it compatible with the new package versions, as well as the new versions of Node and npm:

fileWrite = require 'fs-writefile-promise'
spawn = require('child_process').spawn
prometheusMetrics = require 'express-prom-bundle'
# { spawn } = require 'child-process-promise'
statusMonitor = require 'express-status-monitor'
{flow, map, compact, values, flatMap,
  toPairs, first, last, concat, remove,
  flatten, negate} = require 'lodash/fp'
health = require 'express-healthcheck'
promisePipe = require 'promisepipe'
bodyParser = require 'body-parser'
parallel = require 'bluebird'
tmp = require 'tmp-promise'
express = require 'express'
basicAuth = require('express-basic-auth')
helmet = require 'helmet'
log = require 'morgan'
fs = require 'fs'

require('dotenv').config({ silent: true })

app = express()

payload_limit = process.env.PAYLOAD_LIMIT or '20mb'

app.use helmet()
app.use '/healthcheck', health()
app.use '/', express.static(__dirname + '/documentation')
app.use(basicAuth({
  users: { [process.env.USER]: process.env.PASS },
  challenge: true,
  realm: 'Restricted Area'
}))
# don't work
#app.use(statusMonitor({
#  eventLoop: false
#}))

app.use prometheusMetrics()
app.use log('combined')

app.post '/', bodyParser.json(limit: payload_limit), ({body}, res) ->
  console.log 'Fichier reçu'

  # decode base64
  # comment être sur que le traitement est à 100% ?
  decode = (base64) ->
    Buffer.from(base64, 'base64').toString 'utf8' if base64?

  tmpFile = (ext) ->
    tmp.file(dir: '/tmp', postfix: '.' + ext).then (f) -> f.path

  tmpWrite = (content) ->
    tmpFile('html').then (f) -> fileWrite f, content if content?

  # compile options to arguments
  arg = flow(toPairs, flatMap((i) -> ['--' + first(i), last(i)]), compact)
  
  parallel.join tmpFile('pdf'),
  map(flow(decode, tmpWrite), [body.header, body.footer, body.contents])...,
  (output, header, footer, content) ->
    files = [['--header-html', header],
             ['--footer-html', footer],
             [content, output]]
    # combine arguments and call pdf compiler using shell
    # injection save function 'spawn' goo.gl/zspCaC
    console.log 'wkhtmltopdf', (arg(body.options).concat(flow(remove(negate(last)), flatten)(files)))


    # Create a ChildProcess object for the wkhtmltopdf command
    child = spawn 'wkhtmltopdf', (arg(body.options)
    .concat(flow(remove(negate(last)), flatten)(files)))

    # Wait for the wkhtmltopdf process to finish
    child.on 'exit', (code) ->
      if code is 0
        res.setHeader('Content-type', 'application/pdf');
        fs.createReadStream(output).pipe(res);
      else
        res.status(BAD_REQUEST = 400).send('invalid arguments');

    # Delete the temporary files
    # map fs.unlinkSync, compact([output, header, footer, content])

app.listen process.env.PORT or 6555
module.exports = app

The script below works, but has two issues:

  1. If I intentionally provide it with a very large base64 file (e.g., a sizable HTML page), the decoding step takes longer, but the conversion and the creation of the temporary file aren't complete before the PDF conversion starts. I'm unsure how to ensure that decoding and temporary file creation are complete before launching the PDF conversion.
  2. The second point, although it doesn't block the script's functionality, is related to the express-status-monitor package, which doesn't work correctly. In other words, it doesn't retrieve RAM and CPU usage.

I'm posting here to find out how you would ensure that the decode and tmpWrite functions are fully executed. If you'd like to test, switch to the update_package branch of my repository: https://github.com/leknoppix/docker-wkhtmltopdf-aas/tree/update_package.

If you have any questions, please feel free to let me know.

0

There are 0 best solutions below