I wrote a program to extract link pictures from webcomics, however, when I run it, it just extracts image links from the last link chapter, not all image links from all chapters. What is the issue with my program? I have tried several ways but not things useful.
from PyQt5 import QtNetwork, QtCore
from requests_html import HTML
from functools import cached_property
from PyQt5.QtCore import QCoreApplication, QUrl
url1 = "https://saytruyen.net/truyen-su-tro-lai-cua-phap-su-hac-am-sau-66666-nam.html"
class Manager:
def __init__(self):
self.manager.finished.connect(self.handle_response)
@cached_property
def manager(self):
return QtNetwork.QNetworkAccessManager()
def start(self):
self.start_request(QtCore.QUrl(url1))
def start_request(self, url):
request = QtNetwork.QNetworkRequest(url)
self.manager.get(request)
def handle_response(self, reply):
err = reply.error()
if err == QtNetwork.QNetworkReply.NoError:
self.process(str(reply.readAll(), 'utf-8'))
else:
print("Error occured: ", err)
print(reply.errorString())
def process(self, data):
html = HTML(html=data)
rs = html.find("#list-chapter a", first=False)
for i in reversed(rs):
url2 = "https://saytruyen.net/" + i.attrs["href"]
#print(url2)
#self.start_request(QtCore.QUrl(url2))
req = QtNetwork.QNetworkRequest(QUrl(url2))
self.nam = QtNetwork.QNetworkAccessManager()
self.nam.finished.connect(self.handleResponse)
self.nam.get(req)
def handleResponse(self, reply):
er = reply.error()
if er == QtNetwork.QNetworkReply.NoError:
bytes_string = reply.readAll()
html2 = HTML(html = str(bytes_string, 'utf-8'))
rs_c = html2.find("#lst_content img")
for x in rs_c:
img ="https://saytruyen.net/" + x.attrs['src']
print(img)
else:
print("Error occured: ", er)
print(reply.errorString())
QCoreApplication.quit()
There are two problems:
The solution is to create a single manager for the download process in the
__init__, and quit as soon as all requests have been received.Note that it's usually enough (and better) to have a single network manager and properly handle responses based on queued requests, but for simple situations like this one having two managers doesn't represent a huge problem.