I'm currently working on a project where I need to make PDF documents accessible to individuals with disabilities. I'm using PDFBox 3.0 to create these PDFs, but I'm facing challenges in properly tagging the PDFs to meet accessibility compliance, having done everything else to make them compliant (XMP metadata, marked dictionary etc...).

I followed these two guides already posted on stack overflow to no results. I expected that these tags will be reflected in the document however when testing this in Acrobat, only the top level tag under the logical structure is reflected

Here is the code at present, note I have stripped out the some layout code to keep this concise.

object GeneratorImpl {

  protected class PDFContext {
    val document: PDDocument = new PDDocument()
    var page: PDPage = new PDPage(PDRectangle.A4)

    page.getCOSObject.setItem(COSName.STRUCT_PARENTS, COSInteger.get(0))
    var contentStream: PDPageContentStream = new PDPageContentStream(document, page)
    var currentY: Float = 0

    val MARGIN = 50f
    var unsupportedCharsFound = false

    page.setMediaBox(PDRectangle.A4)
    document.addPage(page)

    //Set Document Language
    document.getDocumentCatalog.setLanguage("en")
    document.getDocument.setVersion(1.7F)

    //Set Document Viewer Preferences and add metadata
    document.getDocumentCatalog.setViewerPreferences(new PDViewerPreferences(new COSDictionary()));
    document.getDocumentCatalog.getViewerPreferences.setDisplayDocTitle(true);

    //Mark the PDF as Tagged
    private val markInfo = new PDMarkInfo()
    markInfo.setMarked(true)
    document.getDocumentCatalog.setMarkInfo(markInfo)

    addXMPMetadata(document)

    val structureTreeRoot = new PDStructureTreeRoot()
    document.getDocumentCatalog.setStructureTreeRoot(structureTreeRoot)

    val root = new PDStructureElement(StandardStructureTypes.DOCUMENT, structureTreeRoot)
    val dictionary = new COSDictionary()
    dictionary.setItem(COSName.STRUCT_PARENTS, COSInteger.get(0))

    var pageStructureElement = new PDStructureElement(dictionary)
    root.appendKid(pageStructureElement)

    var currentMCID = 0
    var currentMarkedDictionary = new COSDictionary()
    currentMarkedDictionary.setInt(COSName.MCID, currentMCID)

    var nums = new COSArray()
    var numDictionaries = new COSArray()
  }

  def generateCertificatePDF(fields: Seq[Field]): Unit = {
      var pdfContext = createContext()

      if (fields.isEmpty) throw NonRetryableException("No fields supplied to PDF generator")

      for (field <- fields) {
        field match {
          case header: Header => {
            pdfContext = addTitle(header.contents, pdfContext.ROBOTO_BOLD, 20)(pdfContext)
          }
          // Stripped out the other cases for simplicity
          case _ => throw NonRetryableException("Unknown field type in list. Cannot generate PDF without matching generation logic.")
        }
      }

      pdfContext.contentStream.close()
      addParentTree(pdfContext)

      pdfContext.document.save(new File("./test.pdf"))
      val outputStream = new ByteArrayOutputStream()
      pdfContext.document.save(outputStream)

      outputStream.close()
      pdfContext.document.close()
  }

  private def addTitle(text: String, font: PDType0Font, fontSize: Float)(implicit context: PDFContext): PDFContext = {
    if (text.nonEmpty) {
      setDocumentTitle(text)
      addContentToParent(COSName.P, StandardStructureTypes.H, context.page, context.pageStructureElement)
      addCenteredTextBlock(text.toUpperCase, fontSize, font)
    }

   context
  }

  private def addCenteredTextBlock(text: String, fontSize: Float, font: PDType0Font)(implicit context: PDFContext): PDFContext = {
    var localContext = context
    localContext.contentStream.setFont(font, fontSize)

    // 2 * context.MARGIN for left and right margin * 2 for extra padding
    val maxCentreWidth = localContext.page.getMediaBox.getWidth - (2 * localContext.MARGIN * 2)
    localContext.currentY = localContext.page.getMediaBox.getHeight - (2 * localContext.MARGIN)

    val startX = (localContext.page.getMediaBox.getWidth - maxCentreWidth) / 2f
    val startY = localContext.currentY
    var currentY = startY

    val lines = wrapText(text, font, fontSize, maxCentreWidth)
    for (line <- lines) {
      if (localContext.currentY - fontSize <= localContext.MARGIN) {
        localContext = addPage(fontSize, font)
      }

      val textWidth = fontSize * font.getStringWidth(line) / 1000F
      val currentX = startX + (maxCentreWidth - textWidth) / 2f

      localContext.contentStream.beginText()
      localContext.contentStream.newLineAtOffset(currentX, currentY)

      localContext.contentStream.beginMarkedContent(COSName.P, PDPropertyList.create(context.currentMarkedDictionary))
      localContext.contentStream.showText(line)
      localContext.contentStream.endMarkedContent()
      setNextMarkedDictionary

      if (line.contains("\n") || line.contains("\\n") || line.equals("")) {
        currentY -= fontSize
        localContext.contentStream.beginMarkedContent(COSName.ARTIFACT, PDPropertyList.create(context.currentMarkedDictionary))
        localContext.contentStream.newLineAtOffset(currentX, currentY)
        localContext.contentStream.endMarkedContent()
        setNextMarkedDictionary
      }

      localContext.contentStream.endText()

      currentY -= fontSize
      localContext.currentY = currentY

    }

    localContext
  }

  private def setNextMarkedDictionary(implicit context: PDFContext): Unit = {
    context.currentMCID += 1
    context.currentMarkedDictionary = new COSDictionary()
    context.currentMarkedDictionary.setInt(COSName.MCID, context.currentMCID)
  }

  //Add a structure element to a parent structure element with optional marked content given a non-null name param.//Add a structure element to a parent structure element with optional marked content given a non-null name param.
  private def addContentToParent(name: COSName, structureType: String, currentPage: PDPage, parent: PDStructureElement)(implicit context: PDFContext) = {
    //Create a structure element and add it to the current section.
    var structureElement: PDStructureElement = null
    if (structureType != null) {
      structureElement = new PDStructureElement(structureType, parent)
      structureElement.setPage(currentPage)
    }
    //If COSName is not null then there is marked content.
    if (name != null) {
      //numDict for parent tree
      val numDict = new COSDictionary()
      numDict.setInt(COSName.K, context.currentMCID)
      numDict.setItem(COSName.PG, currentPage.getCOSObject)
      if (structureElement != null) {
        if (!COSName.ARTIFACT.equals(name)) structureElement.appendKid(new PDMarkedContent(name, context.currentMarkedDictionary))
        else structureElement.appendKid(new PDArtifactMarkedContent(context.currentMarkedDictionary))
        numDict.setItem(COSName.P, structureElement.getCOSObject)
      }
      else {
        if (!COSName.ARTIFACT.equals(name)) parent.appendKid(new PDMarkedContent(name, context.currentMarkedDictionary))
        else parent.appendKid(new PDArtifactMarkedContent(context.currentMarkedDictionary))
        numDict.setItem(COSName.P, parent.getCOSObject)
      }
      numDict.setName(COSName.S, name.getName)
      context.numDictionaries.add(numDict)
    }
    if (structureElement != null) parent.appendKid(structureElement)
    structureElement
  }

  //Adds the parent tree to root struct element to identify tagged content//Adds the parent tree to root struct element to identify tagged content

  private def addParentTree(implicit context: PDFContext): Unit = {
    val dict = new COSDictionary()
    context.nums.add(context.numDictionaries)
    var i = 1
    while (i < 1) {
      context.nums.add(COSInteger.get(i))

      i += 1
    }
    dict.setItem(COSName.NUMS, context.nums)
    val numberTreeNode = new PDNumberTreeNode(dict, dict.getClass)
    context.document.getDocumentCatalog.getStructureTreeRoot.setParentTree(numberTreeNode)
    context.document.getDocumentCatalog.getStructureTreeRoot.appendKid(context.root)
  }

}
0

There are 0 best solutions below