I am trying to get dates that trades occurred from an Opensea NFT exchange collection. My code works for getting several of the values on the page but in order to get the exact date of the trade I need to move the cursor over a general date (2 months) and then get data from a pop up window. The code is able to get all the dates from the initial load but when the page scrolls down I get a "Message: stale element reference: element is not attached to the page document" error.
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
options = Options()
options.add_experimental_option("detach", True)
options.add_argument("start-maximized")
options.add_experimental_option('excludeSwitches', ['enable-logging'])
driver = webdriver.Chrome(options=options)
# driver.get("https://opensea.io/activity?search[collections][0]=fvckrender&search[collections][1]=artifex-fvckrender&search[collections][2]=fvck-limited&search[collections][3]=unidentified-contract-kg9mf80eue&search[eventTypes][0]=AUCTION_SUCCESSFUL")
driver.get(
"https://opensea.io/activity/artifex-fvckrender?search[collections][0]=artifex-fvckrender&search[eventTypes][0]=AUCTION_SUCCESSFUL")
driver.implicitly_wait(5)
# Code for scrolling down the page
pre_scroll_height = driver.execute_script('return document.body.scrollHeight;')
run_time, max_run_time = 0, 1
list_of_dates = []
while True:
# Gets all available items that have loaded on the page.
list_items = driver.find_elements(By.XPATH, "//div[@role='listitem']")
# Getting values from parent element by digging down to children elements
for item in list_items:
# Getting time stamps
WebDriverWait(driver, 10).until(
EC.presence_of_all_elements_located((By.XPATH, ".//a[@class='sc-1f719d57-0 fKAlPV EventTimestamp--link']"))) # Timestamp
time_stamp = item.find_element(
By.XPATH, ".//a[@class='sc-1f719d57-0 fKAlPV EventTimestamp--link']")
ActionChains(driver).move_to_element(time_stamp).perform()
dates_container = item.find_element(
By.XPATH, ".//div[@data-tippy-root]")
dates_parent = item.find_element(
By.XPATH, ".//div[@class='tippy-content']/div[1]")
list_of_dates.append(dates_parent.get_attribute("innerHTML"))
print(list_of_dates)
iteration_start = time.time()
# Scroll webpage, the 100 allows for a more 'aggressive' scroll
driver.execute_script(
'window.scrollTo(0, 100*document.body.scrollHeight);')
time.sleep(5)
post_scroll_height = driver.execute_script(
'return document.body.scrollHeight;')
scrolled = post_scroll_height != pre_scroll_height
timed_out = run_time >= max_run_time
if scrolled:
run_time = 0
pre_scroll_height = post_scroll_height
elif not scrolled and not timed_out:
run_time += time.time() - iteration_start
elif not scrolled and timed_out:
break
print(list_of_dates)
The output of the print(list_of_dates) is:
['April 26, 2022 at 12:30 PM', 'March 9, 2022 at 11:45 PM', 'January 14, 2022 at 6:18 PM', 'October 18, 2021 at 9:03 PM', 'October 22, 2021 at 11:08 AM', 'March 9, 2022 at 5:10 PM', 'March 14, 2022
at 11:48 AM', 'March 13, 2022 at 4:18 AM', 'March 18, 2022 at 8:05 AM', 'November 29, 2022 at 7:37 AM', 'July 8, 2022 at 8:13 AM', 'December 8, 2022 at 4:07 PM']
But it then fails on the second run through after the page has scrolled and I get the message:
Traceback (most recent call last):
File "c:\Users\persi\Projects\Python\data-scrape-opensea\main.py", line 37, in <module>
time_stamp = item.find_element(
^^^^^^^^^^^^^^^^^^
File "C:\Users\persi\AppData\Local\Programs\Python\Python311\Lib\site-packages\selenium\webdriver\remote\webelement.py", line 433, in find_element
return self._execute(Command.FIND_CHILD_ELEMENT, {"using": by, "value": value})["value"]
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\persi\AppData\Local\Programs\Python\Python311\Lib\site-packages\selenium\webdriver\remote\webelement.py", line 410, in _execute
return self._parent.execute(command, params)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\persi\AppData\Local\Programs\Python\Python311\Lib\site-packages\selenium\webdriver\remote\webdriver.py", line 444, in execute
self.error_handler.check_response(response)
File "C:\Users\persi\AppData\Local\Programs\Python\Python311\Lib\site-packages\selenium\webdriver\remote\errorhandler.py", line 249, in check_response
raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.StaleElementReferenceException: Message: stale element reference: element is not attached to the page document
(Session info: chrome=109.0.5414.75)
Stacktrace:
Backtrace:
(No symbol) [0x00986643]
(No symbol) [0x0091BE21]
(No symbol) [0x0081DA9D]
(No symbol) [0x008209E4]
(No symbol) [0x008208AD]
(No symbol) [0x00820B30]
(No symbol) [0x00850FAC]
(No symbol) [0x0085147B]
(No symbol) [0x008464C1]
(No symbol) [0x0086FDC4]
(No symbol) [0x0084641F]
(No symbol) [0x008700D4]
(No symbol) [0x00886B09]
(No symbol) [0x0086FB76]
(No symbol) [0x008449C1]
(No symbol) [0x00845E5D]
GetHandleVerifier [0x00BFA142+2497106]
GetHandleVerifier [0x00C285D3+2686691]
GetHandleVerifier [0x00C2BB9C+2700460]
GetHandleVerifier [0x00A33B10+635936]
(No symbol) [0x00924A1F]
(No symbol) [0x0092A418]
(No symbol) [0x0092A505]
(No symbol) [0x0093508B]
BaseThreadInitThunk [0x771B00F9+25]
RtlGetAppContainerNamedObjectPath [0x77457BBE+286]
RtlGetAppContainerNamedObjectPath [0x77457B8E+238]
document
Line 37 corresponds to the time_stamp variable.