from pdfminer.high_level import extract_pages
from pdfminer.layout import LTContainer, LTTextContainer, LTChar


def mostrar_estructura(pagina):
    def buscar_ltchar(elemento):
        if isinstance(elemento, LTChar):
            if (elemento._text == "O"):
                elemento._text = "X"
                print(elemento)

        elif isinstance(elemento, LTContainer):
            for subelemento in elemento:
                buscar_ltchar(subelemento)

                for elemento in pagina:
                    buscar_ltchar(elemento)

Abre el archivo PDF en modo lectura binaria

with open("estructura.pdf", "rb") as archivo_pdf:
    for pagina_num, pagina in enumerate(extract_pages(archivo_pdf)):
        print(f"Estructura de la página {pagina_num + 1}:")
        mostrar_estructura(pagina)

When I try to save it, it doesn't matter if I do it with fitz with pdf2 using writer, it always tells me this error

  File "C:\Users\David\AppData\Local\Programs\Python\Python312\Lib\site-packages\PyPDF2\_writer.py", line 258, in _add_page
    assert cast(str, page[PA.TYPE]) == CO.PAGE
                     ~~~~^^^^^^^^^

The page format is an LTPage but I don't know how to convert it into a format that allows you to save the changes

0

There are 0 best solutions below