I know how to do it using a Requests lib just with while True loop, and when I get an empty page or 404 error, I break it. But using aiohttp I use gather, and I just cancel() all tasks when the page is empty, and I losr tasks that are not done.

async def get_goods_from_pages(session, page):  
    url = f'https://somewebsite?page={page}'
    async with session.get(url, headers=headers) as r:
    soup = BS(await r.text(), 'lxml')

    all_goods = soup.find_all('div', class_='js_category-list-item')
    if all_goods:
        for el in all_goods:
            print(el)
    else:
        raise SomeError

# collect all tasks function
async def get_pages_info():
    tasks = []
    async with aiohttp.ClientSession() as session:
    for page in range(1, 150):
        task = asyncio.create_task(get_goods_from_pages(session, page))
        tasks.append(task)  
    try:    
        group = asyncio.gather(*tasks)
        await group
    except Exception:
        group.cancel()

I also tried to use while True loop and call the function using await, but I got very bad speed of parsing.

0

There are 0 best solutions below