How do I add a hyperlink to the top of each page in a PDF in Python?

49 Views Asked by At

We are posting scanned and OCRed documents on a website and need to add a link to each page so that people who find the pages via a search engine easily get to the parent index of related documents.

I've been trying to work out a way to do this in Python using pypdf and have, so far, not had any luck. My strategy is for each document to create a page containing just the hyperlink (it needs to be different for each document) and then to merge that created page into each page in the document. I've messed around with code I've found in SE (mostly for PyPdf2, which I understand is deprecated, and which needs to be modified significantly for pypdf) without luck.

For example, from the pypdf doc pages, I tried:

from pypdf import PdfWriter, PdfReader

stamp = PdfReader("bg.pdf").pages[0]
writer = PdfWriter(clone_from="source.pdf")
for page in writer.pages:
    page.merge_page(stamp, over=False)  # here set to False for watermarking

writer.write("out.pdf")

and generated corrupt PDF files.

2

There are 2 best solutions below

0
Martin Thoma On BEST ANSWER

This works like a charm for me:

prerequesites:

pip install pypdf==4.1.0
pip install fpdf2==2.7.8

code:

import fpdf  # pip install fpdf2
from fpdf.enums import XPos, YPos

import pypdf
from pypdf import PdfReader, PdfWriter


def generate_overlay(target_path: str, text: str, link: str) -> None:
    class PDF(fpdf.FPDF):
        def header(self) -> None:
            self.set_font("helvetica", "B", 12)
            self.set_text_color(0, 0, 255)  # Blue color for the link
            link_width = pdf.get_string_width(text)
            link_height = 10
            self.cell(
                link_width,
                link_height,
                text=text,
                new_x=XPos.RIGHT,
                new_y=YPos.TOP,
                align="C",
                link=link,
            )

    pdf = PDF()
    pdf.add_page()
    pdf.output(target_path)


def stamp(original_path: str, stamp_path: str, out_path: str) -> None:
    stamp = PdfReader(stamp_path).pages[0]
    writer = PdfWriter(clone_from=original_path)
    for page in writer.pages:
        page.merge_page(stamp, over=False)
    writer.write(out_path)



if __name__ == "__main__":
    print(f"pypdf=={pypdf.__version__}")
    print(f"fpdf2=={fpdf.__version__}")
    stamp_path = "stamp.pdf"
    generate_overlay(stamp_path, "py-pdf.github.io", "https://py-pdf.github.io")
    stamp(stamp_path, "GeoTopo.pdf", "out.pdf")

It should print:

pypdf==4.1.0
fpdf2==2.7.8
2
Mahfujur_Rahman On

from PyPDF2 import PdfWriter, PdfReader
from PyPDF2.generic import TextStringObject, Annotation

# Function to add hyperlink to each page
def add_hyperlink_to_page(page, hyperlink_text, hyperlink_url, x=100, y=100):
    # Create a link annotation
    link_annotation = Annotation()
    link_annotation.update({
        NameObject("/Type"): NameObject("/Annot"),
        NameObject("/Subtype"): NameObject("/Link"),
        NameObject("/Rect"): [x, y, x + 100, y + 20],
        NameObject("/Border"): [0, 0, 0],  # No border
        NameObject("/A"): {
            NameObject("/Type"): NameObject("/Action"),
            NameObject("/S"): NameObject("/URI"),
            NameObject("/URI"): TextStringObject(hyperlink_url)
        }
    })

    # Create a text object for the hyperlink text
    text_object = TextStringObject(hyperlink_text)

    # Add the annotation and text object to the page
    page[NameObject("/Annots")] = [link_annotation]
    page.mergePage(link_annotation)

# Open the source PDF file
with open("source.pdf", "rb") as source_file:
    reader = PdfReader(source_file)
    writer = PdfWriter()

    # Iterate over each page in the PDF
    for page_number in range(len(reader.pages)):
        # Get the current page
        page = reader.pages[page_number]

        # Add the hyperlink to the current page
        add_hyperlink_to_page(page, "Parent Index", "https://your-parent-index-url.com")

        # Add the modified page to the output PDF
        writer.add_page(page)

    # Write the output PDF to a file
    with open("output.pdf", "wb") as output_file:
        writer.write(output_file)