I'm trying to use python requests to retrieve results from the below URL using these form inputs:
"Residential Building" in the "Record Type" field
"02/25/2020" and "02/23/2024" in the Start Date and End Date fields
The problem is that when I send the post request the response is just the javascript and HTML without the table that is generated when you submit the form with inputs.
Here's my code so far:
import requests
from bs4 import BeautifulSoup
with requests.Session() as connection:
main_url = "https://aca-prod.accela.com/BROOKLINE/Cap/CapHome.aspx?module=Building&TabName=Building&TabList=Home%7C0%7CBuilding%7C1%7CPublicWorks%7C2%7CFire%7C3%7CClerkOffice%7C4%7CBoardOfHealth%7C5%7CLicenses%7C6%7CPlanning%7C7%7CZoning%7C8%7CHistoric%7C9%7CParking%7C10%7CEnforcement%7C11%7CPolice%7C12%7CCurrentTabIndex%7C1"
connection.headers["User-Agent"] = "Mozilla/5.0 (X11; Linux x86_64) " \
"AppleWebKit/537.36 (KHTML, like Gecko) " \
"Chrome/90.0.4430.212 Safari/537.36"
r = connection.get(main_url)
soup = BeautifulSoup(r.content, 'html.parser')
cookie_dict = r.cookies.get_dict()
cookie_aspx = cookie_dict['.ASPXANONYMOUS']
cookie_cs_key = cookie_dict['ACA_CS_KEY']
cookie_ss_store = cookie_dict['ACA_SS_STORE']
cookie_culture = cookie_dict['ACA_USER_PREFERRED_CULTURE']
cookie_affinity = cookie_dict['ApplicationGatewayAffinity']
cookie_affinity_cors = cookie_dict['ApplicationGatewayAffinityCORS']
cookie_request_time = cookie_dict['LASTEST_REQUEST_TIME']
cookie_string = f'''TabNav=Home|0|Building|1|PublicWorks|2|Fire|3|ClerkOffice|4|BoardOfHealth|5|Licenses|6|Planning|7|Zoning|8|Historic|9|Parking|10|Enforcement|11|Police|12|CurrentTabIndex|1; ApplicationGatewayAffinityCORS={cookie_affinity_cors}; ApplicationGatewayAffinity={cookie_affinity}; ACA_SS_STORE={cookie_ss_store}; ACA_USER_PREFERRED_CULTURE={cookie_culture}; ACA_CS_KEY={cookie_cs_key}; .ASPXANONYMOUS={cookie_aspx}; LASTEST_REQUEST_TIME={cookie_request_time}; _dd_s=rum=1&id=5489dd8c-5f35-4c34-97a9-4aaf55d42227&created=1708710852124&expire=1708711752124'''
post_url = main_url
connection.headers.update(
{
"Accept": "*/*",
"Accept-Encoding": "identity",
"Accept-Language": "en-US,en;q=0.9",
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"Content-Length": "59009",
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
"Cookie": cookie_string,
"Host": "aca-prod.accela.com",
"Origin": "https://aca-prod.accela.com",
"Referer": "https://aca-prod.accela.com/BROOKLINE/Cap/CapHome.aspx?module=Building&TabName=Building&TabList=Home%7c0%7cBuilding%7c1%7cPublicWorks%7c2%7cFire%7c3%7cClerkOffice%7c4%7cBoardOfHealth%7c5%7cLicenses%7c6%7cPlanning%7c7%7cZoning%7c8%7cHistoric%7c9%7cParking%7c10%7cEnforcement%7c11%7cPolice%7c12%7cCurrentTabIndex%7c1",
"Sec-Ch-Ua": "\"Google Chrome\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Chromium\";v=\"117\"",
"Sec-Ch-Ua-Mobile": "?0",
"Sec-Ch-Ua-Platform": "\"macOS\"",
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-origin",
"X-Requested-With":"XMLHttpRequest",
}
)
payload_aca = soup.find('input', id='ACA_CS_FIELD').get('value')
payload_event = soup.find('input', id='__EVENTTARGET').get('value')
payload_eventarg = soup.find('input', id='__EVENTARGUMENT').get('value')
payload_last = soup.find('input', id='__LASTFOCUS').get('value')
payload_encrypt = soup.find('input', id='__VIEWSTATEENCRYPTED').get('value')
payload_view = soup.find('input', id='__VIEWSTATE').get('value')
payload_generator = soup.find('input', id='__VIEWSTATEGENERATOR').get('value')
payload = {"ctl00$ScriptManager1": "ctl00$PlaceHolderMain$updatePanel|ctl00$PlaceHolderMain$btnNewSearch",
"ACA_CS_FIELD": payload_aca,
"__EVENTTARGET": payload_event,
"__VIEWSTATE": payload_view,
"__VIEWSTATEENCRYPTED": payload_encrypt,
"__LASTFOCUS":payload_last,
"__EVENTARGUMENT":payload_eventarg,
"__VIEWSTATEGENERATOR": payload_generator,
"ctl00$PlaceHolderMain$generalSearchForm$ddlGSPermitType": "Building/1&2 Family/Residential/NA",
"ctl00$HeaderNavigation$hdnShowReportLink": "N",
"ctl00$PlaceHolderMain$addForMyPermits$collection": "rdoNewCollection",
"ctl00$PlaceHolderMain$addForMyPermits$txtName": "name",
"ctl00$PlaceHolderMain$ddlSearchType": '0',
"ctl00$PlaceHolderMain$generalSearchForm$txtGSStartDate": '02/25/2020',
"ctl00$PlaceHolderMain$generalSearchForm$txtGSEndDate": '02/23/2024',
"ctl00$PlaceHolderMain$generalSearchForm$txtGSAppZipSearchPermit_ZipFromAA": '0',
"Submit": "Submit",
"__ASYNCPOST": True}
response = connection.post(post_url, data=payload)
output = BeautifulSoup(response.text, "lxml")
I'm using a session to populate the hidden inputs. Don't know why the results are still not showing up in the response.
Any ideas on how I can resolve this? Does it have something to do with the cookie_string, specifically the "_dd_s" cookie? I believe it is a DataDog cookie to make sure that it's a real user...
TIA!