I'm trying to create a headless web scraper on cloud function. I have used Selenium to automate the driver provided by the Webdriver manager.
Can you please tell me how to change the wdm.cachePath according to virtual environment? Below is my code and the error I'm getting.
import os
import logging
# selenium 4
os.environ['GH_TOKEN'] = "gkjkjhjkhjhkjhuihjhgjhg"
os.environ['WDM_LOG'] = str(logging.NOTSET)
os.environ['WDM_LOCAL'] = '1'
os.environ['WDM_SSL_VERIFY'] = '0'
from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
def hello_world(request):
"""Responds to any HTTP request.
Args:
request (flask.Request): HTTP request object.
Returns:
The response text or any set of values that can be turned into a
Response object using
`make_response <http://flask.pocoo.org/docs/1.0/api/#flask.Flask.make_response>`.
"""
# instance of Options class allows
# us to configure Headless Chrome
options = Options()
print("options")
options.headless = True
driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager("2.26", cache_valid_range=1, path = r".\\temp\\Drivers").install()
), options=options)
print("driver was initiated")
# this parameter tells Chrome that
# it should be run without UI (Headless)
# initializing webdriver for Chrome with our options
# driver = webdriver.Chrome(options=options)
# driver = webdriver.Chrome(ChromeDriverManager(path = r"/temp/data").install())
request_json = request.get_json()
if request_json and 'url' in request_json:
url = request_json['url']
driver.get('https://www.geeksforgeeks.org')
print(driver.title)
driver.close()
return f'Success!'
else:
return f'Not run'
Error logs -
Traceback (most recent call last): File "/layers/google.python.pip/pip/lib/python3.10/site-packages/flask/app.py", line 2525, in wsgi_app response = self.full_dispatch_request() File "/layers/google.python.pip/pip/lib/python3.10/site-packages/flask/app.py", line 1822, in full_dispatch_request rv = self.handle_user_exception(e) File "/layers/google.python.pip/pip/lib/python3.10/site-packages/flask/app.py", line 1820, in full_dispatch_request rv = self.dispatch_request() File "/layers/google.python.pip/pip/lib/python3.10/site-packages/flask/app.py", line 1796, in dispatch_request return self.ensure_sync(self.view_functions[rule.endpoint])(**view_args) File "/layers/google.python.pip/pip/lib/python3.10/site-packages/functions_framework/__init__.py", line 98, in view_func return function(request._get_current_object()) File "/workspace/main.py", line 28, in hello_world driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager("2.26", cache_valid_range=1, path = r".\\temp\\Drivers").install() File "/layers/google.python.pip/pip/lib/python3.10/site-packages/webdriver_manager/chrome.py", line 39, in install driver_path = self._get_driver_path(self.driver) File "/layers/google.python.pip/pip/lib/python3.10/site-packages/webdriver_manager/core/manager.py", line 31, in _get_driver_path binary_path = self.driver_cache.save_file_to_cache(driver, file) File "/layers/google.python.pip/pip/lib/python3.10/site-packages/webdriver_manager/core/driver_cache.py", line 45, in save_file_to_cache archive = save_file(file, path) File "/layers/google.python.pip/pip/lib/python3.10/site-packages/webdriver_manager/core/utils.py", line 38, in save_file os.makedirs(directory, exist_ok=True) File "/layers/google.python.runtime/python/lib/python3.10/os.py", line 215, in makedirs makedirs(head, exist_ok=exist_ok) File "/layers/google.python.runtime/python/lib/python3.10/os.py", line 215, in makedirs makedirs(head, exist_ok=exist_ok) File "/layers/google.python.runtime/python/lib/python3.10/os.py", line 215, in makedirs makedirs(head, exist_ok=exist_ok)
I think the error is caused due to web driver manager trying to save the driver to cache is some static path, I already changed the path setting using
path = r".\\temp\\Drivers"
How to do it correctly?
So I figured this out...