I've taken on the task of updating an old package available on GitHub => https://github.com/leknoppix/docker-wkhtmltopdf-aas (master branch). This package allows for the quick setup of an API to convert HTML to PDF, based on wkhtmltopdf.
So, I've updated the package.json file with the latest package versions. Here's its content:
{
"name": "docker-wkhtmltopdf-ass",
"version": "2.4.0",
"description": "A PDF Serice API that renders HTML templates to PDF",
"author": "",
"main": "app.coffee",
"license": "ISC",
"scripts": {
"version": "coffee --version",
"start": "while true; do coffee app.coffee; done",
"test": "nyc --extension .coffee mocha --exit --recursive --compilers coffee:coffee-script/register --require coffee-coverage/register-istanbul test.coffee --timeout 5000 && nyc report --reporter=lcov --extension .coffee",
"nyc": "nyc",
"lint": "coffeelint app.coffee test.coffee",
"ncu": "ncu",
"ncupdate": "ncu -u"
},
"dependencies": {
"bluebird": "^3.7.2",
"body-parser": "^1.20.2",
"bootprint": "^4.0.4",
"bootprint-openapi": "^4.0.4",
"child_process": "^1.0.2",
"child-process-promise": "^2.2.1",
"coffeescript": "^2.7.0",
"dotenv": "^16.3.1",
"express": "^4.18.2",
"express-basic-auth": "^1.2.1",
"express-healthcheck": "^0.1.0",
"express-prom-bundle": "^6.6.0",
"express-status-monitor": "^1.3.4",
"forever": "^4.0.3",
"fs": "^0.0.1-security",
"fs-writefile-promise": "^3.0.2",
"helmet": "^7.1.0",
"http-auth": "^4.2.0",
"lodash": "^4.17.21",
"morgan": "^1.10.0",
"npm-check-updates": "^16.14.6",
"promisepipe": "^3.0.0",
"temp-write": "^5.0.0",
"tmp-promise": "^3.0.3"
},
"devDependencies": {
"chakram": "^1.5.0",
"codeclimate-test-reporter": "^0.5.1",
"coffee-coverage": "^3.0.1",
"coffeelint": "^2.1.0",
"mocha": "^10.2.0",
"nyc": "^15.1.0",
"supertest": "^6.3.3",
"textract": "^2.5.0"
}
}
Next, I modified the CoffeeScript code to make it compatible with the new package versions, as well as the new versions of Node and npm:
fileWrite = require 'fs-writefile-promise'
spawn = require('child_process').spawn
prometheusMetrics = require 'express-prom-bundle'
# { spawn } = require 'child-process-promise'
statusMonitor = require 'express-status-monitor'
{flow, map, compact, values, flatMap,
toPairs, first, last, concat, remove,
flatten, negate} = require 'lodash/fp'
health = require 'express-healthcheck'
promisePipe = require 'promisepipe'
bodyParser = require 'body-parser'
parallel = require 'bluebird'
tmp = require 'tmp-promise'
express = require 'express'
basicAuth = require('express-basic-auth')
helmet = require 'helmet'
log = require 'morgan'
fs = require 'fs'
require('dotenv').config({ silent: true })
app = express()
payload_limit = process.env.PAYLOAD_LIMIT or '20mb'
app.use helmet()
app.use '/healthcheck', health()
app.use '/', express.static(__dirname + '/documentation')
app.use(basicAuth({
users: { [process.env.USER]: process.env.PASS },
challenge: true,
realm: 'Restricted Area'
}))
# don't work
#app.use(statusMonitor({
# eventLoop: false
#}))
app.use prometheusMetrics()
app.use log('combined')
app.post '/', bodyParser.json(limit: payload_limit), ({body}, res) ->
console.log 'Fichier reçu'
# decode base64
# comment être sur que le traitement est à 100% ?
decode = (base64) ->
Buffer.from(base64, 'base64').toString 'utf8' if base64?
tmpFile = (ext) ->
tmp.file(dir: '/tmp', postfix: '.' + ext).then (f) -> f.path
tmpWrite = (content) ->
tmpFile('html').then (f) -> fileWrite f, content if content?
# compile options to arguments
arg = flow(toPairs, flatMap((i) -> ['--' + first(i), last(i)]), compact)
parallel.join tmpFile('pdf'),
map(flow(decode, tmpWrite), [body.header, body.footer, body.contents])...,
(output, header, footer, content) ->
files = [['--header-html', header],
['--footer-html', footer],
[content, output]]
# combine arguments and call pdf compiler using shell
# injection save function 'spawn' goo.gl/zspCaC
console.log 'wkhtmltopdf', (arg(body.options).concat(flow(remove(negate(last)), flatten)(files)))
# Create a ChildProcess object for the wkhtmltopdf command
child = spawn 'wkhtmltopdf', (arg(body.options)
.concat(flow(remove(negate(last)), flatten)(files)))
# Wait for the wkhtmltopdf process to finish
child.on 'exit', (code) ->
if code is 0
res.setHeader('Content-type', 'application/pdf');
fs.createReadStream(output).pipe(res);
else
res.status(BAD_REQUEST = 400).send('invalid arguments');
# Delete the temporary files
# map fs.unlinkSync, compact([output, header, footer, content])
app.listen process.env.PORT or 6555
module.exports = app
The script below works, but has two issues:
- If I intentionally provide it with a very large base64 file (e.g., a sizable HTML page), the decoding step takes longer, but the conversion and the creation of the temporary file aren't complete before the PDF conversion starts. I'm unsure how to ensure that decoding and temporary file creation are complete before launching the PDF conversion.
- The second point, although it doesn't block the script's functionality, is related to the express-status-monitor package, which doesn't work correctly. In other words, it doesn't retrieve RAM and CPU usage.
I'm posting here to find out how you would ensure that the decode and tmpWrite functions are fully executed. If you'd like to test, switch to the update_package branch of my repository: https://github.com/leknoppix/docker-wkhtmltopdf-aas/tree/update_package.
If you have any questions, please feel free to let me know.