Python POST request to a form of a webpage does not retrieve correct HTML

29 Views Asked by At

I'm trying to use python requests to retrieve results from the below URL using these form inputs:

  • "Residential Building" in the "Record Type" field

  • "02/25/2020" and "02/23/2024" in the Start Date and End Date fields

https://aca-prod.accela.com/BROOKLINE/Cap/CapHome.aspx?module=Building&TabName=Building&TabList=Home%7C0%7CBuilding%7C1%7CPublicWorks%7C2%7CFire%7C3%7CClerkOffice%7C4%7CBoardOfHealth%7C5%7CLicenses%7C6%7CPlanning%7C7%7CZoning%7C8%7CHistoric%7C9%7CParking%7C10%7CEnforcement%7C11%7CPolice%7C12%7CCurrentTabIndex%7C1

The problem is that when I send the post request the response is just the javascript and HTML without the table that is generated when you submit the form with inputs.

Here's my code so far:


import requests
from bs4 import BeautifulSoup

with requests.Session() as connection:
    main_url = "https://aca-prod.accela.com/BROOKLINE/Cap/CapHome.aspx?module=Building&TabName=Building&TabList=Home%7C0%7CBuilding%7C1%7CPublicWorks%7C2%7CFire%7C3%7CClerkOffice%7C4%7CBoardOfHealth%7C5%7CLicenses%7C6%7CPlanning%7C7%7CZoning%7C8%7CHistoric%7C9%7CParking%7C10%7CEnforcement%7C11%7CPolice%7C12%7CCurrentTabIndex%7C1"
    
    connection.headers["User-Agent"] = "Mozilla/5.0 (X11; Linux x86_64) " \
                                       "AppleWebKit/537.36 (KHTML, like Gecko) " \
                                       "Chrome/90.0.4430.212 Safari/537.36"
    r = connection.get(main_url)
    soup = BeautifulSoup(r.content, 'html.parser')
    
    cookie_dict = r.cookies.get_dict()
    cookie_aspx = cookie_dict['.ASPXANONYMOUS']
    cookie_cs_key = cookie_dict['ACA_CS_KEY']
    cookie_ss_store = cookie_dict['ACA_SS_STORE']
    cookie_culture = cookie_dict['ACA_USER_PREFERRED_CULTURE']
    cookie_affinity = cookie_dict['ApplicationGatewayAffinity']
    cookie_affinity_cors = cookie_dict['ApplicationGatewayAffinityCORS']
    cookie_request_time = cookie_dict['LASTEST_REQUEST_TIME']

    cookie_string = f'''TabNav=Home|0|Building|1|PublicWorks|2|Fire|3|ClerkOffice|4|BoardOfHealth|5|Licenses|6|Planning|7|Zoning|8|Historic|9|Parking|10|Enforcement|11|Police|12|CurrentTabIndex|1; ApplicationGatewayAffinityCORS={cookie_affinity_cors}; ApplicationGatewayAffinity={cookie_affinity}; ACA_SS_STORE={cookie_ss_store}; ACA_USER_PREFERRED_CULTURE={cookie_culture}; ACA_CS_KEY={cookie_cs_key}; .ASPXANONYMOUS={cookie_aspx}; LASTEST_REQUEST_TIME={cookie_request_time}; _dd_s=rum=1&id=5489dd8c-5f35-4c34-97a9-4aaf55d42227&created=1708710852124&expire=1708711752124'''
    
    post_url = main_url
    connection.headers.update(
        {
            "Accept": "*/*",
            "Accept-Encoding": "identity",
            "Accept-Language": "en-US,en;q=0.9",
            "Cache-Control": "no-cache",
            "Connection": "keep-alive",
            "Content-Length": "59009",
            "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
            "Cookie": cookie_string,
            "Host": "aca-prod.accela.com",
            "Origin": "https://aca-prod.accela.com",
            "Referer": "https://aca-prod.accela.com/BROOKLINE/Cap/CapHome.aspx?module=Building&TabName=Building&TabList=Home%7c0%7cBuilding%7c1%7cPublicWorks%7c2%7cFire%7c3%7cClerkOffice%7c4%7cBoardOfHealth%7c5%7cLicenses%7c6%7cPlanning%7c7%7cZoning%7c8%7cHistoric%7c9%7cParking%7c10%7cEnforcement%7c11%7cPolice%7c12%7cCurrentTabIndex%7c1",
            "Sec-Ch-Ua": "\"Google Chrome\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Chromium\";v=\"117\"",
            "Sec-Ch-Ua-Mobile": "?0",
            "Sec-Ch-Ua-Platform": "\"macOS\"",
            "Sec-Fetch-Dest": "empty",
            "Sec-Fetch-Mode": "cors",
            "Sec-Fetch-Site": "same-origin",
            "X-Requested-With":"XMLHttpRequest",
        }
    )
    
    payload_aca = soup.find('input', id='ACA_CS_FIELD').get('value')
    payload_event = soup.find('input', id='__EVENTTARGET').get('value')
    payload_eventarg = soup.find('input', id='__EVENTARGUMENT').get('value')
    payload_last = soup.find('input', id='__LASTFOCUS').get('value')
    payload_encrypt = soup.find('input', id='__VIEWSTATEENCRYPTED').get('value')    
    payload_view = soup.find('input', id='__VIEWSTATE').get('value')
    payload_generator = soup.find('input', id='__VIEWSTATEGENERATOR').get('value')
    
    payload = {"ctl00$ScriptManager1": "ctl00$PlaceHolderMain$updatePanel|ctl00$PlaceHolderMain$btnNewSearch",
        "ACA_CS_FIELD": payload_aca,
        "__EVENTTARGET": payload_event,
        "__VIEWSTATE": payload_view,
        "__VIEWSTATEENCRYPTED": payload_encrypt,
        "__LASTFOCUS":payload_last,
        "__EVENTARGUMENT":payload_eventarg,
        "__VIEWSTATEGENERATOR": payload_generator,
        "ctl00$PlaceHolderMain$generalSearchForm$ddlGSPermitType": "Building/1&2 Family/Residential/NA",
        "ctl00$HeaderNavigation$hdnShowReportLink": "N",
        "ctl00$PlaceHolderMain$addForMyPermits$collection": "rdoNewCollection",
        "ctl00$PlaceHolderMain$addForMyPermits$txtName": "name",
        "ctl00$PlaceHolderMain$ddlSearchType": '0',
        "ctl00$PlaceHolderMain$generalSearchForm$txtGSStartDate": '02/25/2020',
        "ctl00$PlaceHolderMain$generalSearchForm$txtGSEndDate": '02/23/2024',
        "ctl00$PlaceHolderMain$generalSearchForm$txtGSAppZipSearchPermit_ZipFromAA": '0',
        "Submit": "Submit",
        "__ASYNCPOST": True}
    
    response = connection.post(post_url, data=payload)
    output = BeautifulSoup(response.text, "lxml")

I'm using a session to populate the hidden inputs. Don't know why the results are still not showing up in the response.

Any ideas on how I can resolve this? Does it have something to do with the cookie_string, specifically the "_dd_s" cookie? I believe it is a DataDog cookie to make sure that it's a real user...

TIA!

0

There are 0 best solutions below