I have this python3 script that accepts pdf files using a pygui dialog box and checks if the URL on every page works fine or if the URL is broken.
This script does not return the list of URLs mentioned in the script code and closes the dialog after I click the Extract URLs button.
Can someone please check what's wrong here and why this is closing the 1st window and not running further as n not displaying the window with Save to File button.
import PyPDF2
import requests
import PySimpleGUI as sg
# Function to extract URLs from a PDF
def extract_urls_from_pdf(file_path):
# Open the PDF file
with open(file_path, 'rb') as pdf_file:
# Create a PDF reader object
pdf_reader = PyPDF2.PdfFileReader(pdf_file)
urls = []
# Loop through each page of the PDF
for page_num in range(pdf_reader.numPages):
# Get the page object
page = pdf_reader.getPage(page_num)
# Extract the annotations from the page
annotations = page.get('/Annots')
# If there are no annotations, skip to the next page
if not annotations:
continue
# Loop through each annotation
for annotation in annotations:
# Get the annotation dictionary
annotation_dict = annotation.getObject()
# If the annotation is a link, extract the URL
if annotation_dict.get('/Subtype') == '/Link':
url_dict = annotation_dict.get('/A')
if url_dict is not None:
url = url_dict.get('/URI')
if url is not None:
urls.append(url)
return urls
# Define the PySimpleGUI layout
layout = [
[sg.Text('Select a PDF file to extract URLs from:')],
[sg.Input(key='file_path'), sg.FileBrowse()],
[sg.Button('Extract URLs'), sg.Button('Exit')],
[sg.Output(size=(80, 20), key='output')]
]
# Create the PySimpleGUI window
window = sg.Window('PDF URL Extractor', layout)
# Event loop to process events and get input from the user
while True:
event, values = window.read()
if event == sg.WINDOW_CLOSED or event == 'Exit':
break
if event == 'Extract URLs':
# Get the file path from the input field
file_path = values['file_path']
# Extract URLs from the PDF
urls = extract_urls_from_pdf(file_path)
# Display the extracted URLs in the output window
output_window_layout = [
[sg.Text('Extracted URLs:', font=('Arial', 12))],
[sg.Multiline('\n'.join(urls), size=(80, 20), key='output', disabled=True)],
[sg.Button('Save to File'), sg.Button('Close')]
]
output_window = sg.Window('URL Extraction Result', output_window_layout)
while True:
output_event, output_values = output_window.read()
if output_event == sg.WINDOW_CLOSED or output_event == 'Close':
break
if output_event == 'Save to File':
# Get the output text from the window
output_text = output_values['output']
# Open a file dialog to save the URLs to a text file
save_layout = [
[sg.Text('Save URLs to Text File')],
[sg.Input(key='save_path'), sg.FileSaveAs()],
[sg.Button('Save'), sg.Button('Cancel')]
]
save_window = sg.Window('Save URLs', save_layout)
save_event, save_values = save_window.read()
if save_event == 'Save':
# Get the save file path
save_file_path = save_values['save_path']
# Save the output text to the file
with open(save_file_path, 'w') as save_file:
save_file.write(output_text)
sg.popup('URLs saved to file.')
save_window.close()
output_window.close()
window.close()
I have tried this script without the pygui code and it works fine but I was expecting if I can add a pygui code like the above, users will be able to run on the local system.