My goal is to get a new IP each time (or after N tries) when doing scraping. I currently have a process doing scraping that uses python + selenium, and need IP to change to bypass bot detection.
I was reading that can get new IP using Tor rotating proxy: medium & stackoverflow.
I based in both sources to adapt their codes to mine here. Also I download Tor for Windows here, where I get a firefox executable that has this path 'Tor Browser\Browser\firefox.exe' and use it in my code:
# from selenium import webdriver
from seleniumwire import webdriver
# from webdriver_manager.chrome import ChromeDriverManager
# from selenium.webdriver.chrome.service import Service
from webdriver_manager.firefox import GeckoDriverManager
from selenium.webdriver.firefox.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
import os
import time
import subprocess
from fake_useragent import UserAgent
options_wire = {
'proxy': {
'http': 'http://localhost:8088',
'https': 'https://localhost:8088',
'no_proxy': ''
}
}
def firefox_init():
os.system("killall tor")
time.sleep(1)
ua = UserAgent()
user_agent = ua.random
subprocess.Popen(("tor --HTTPTunnelPort 8088"),shell=True)
time.sleep(2)
return user_agent
def get_driver():
tor_binary_path_driver = 'C:\Program Files\Tor Browser\Browser\firefox.exe'
os.popen(tor_binary_path_driver)
options = webdriver.FirefoxOptions()
options.set_preference('permissions.default.image', 2)
options.set_preference('dom.ipc.plugins.enabled.libflashplayer.so', 'false')
options.set_preference('dom.webnotifications.enabled', False)
options.set_preference('general.useragent.override', firefox_init()) #options.set_preference('general.useragent.override', user_agent)
options.set_preference("driver.privatebrowsing.autostart", True)
options.set_preference('dom.webdriver.enabled', False)
options.set_preference("network.proxy.type", 1) # "MANUAL"
options.set_preference("network.proxy.socks", "127.0.0.1:9150")
options.set_preference("network.proxy.socks_version", 5)
# driver = webdriver.Firefox(service=Service(GeckoDriverManager().install()), options=options)
driver = webdriver.Firefox(service = Service(GeckoDriverManager().install()), options=options, seleniumwire_options = options_wire)
driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
return driver
driver = get_driver()
driver.get('https://httpbin.org/ip') #"https://www.ipify.org"
current_ip = driver.find_element(By.TAG_NAME, 'body')
print("Current IP Address:", current_ip.text)
In last lines I check IP address used by searching in link, but IP showed is my actual IP and expect to be some random IP.
Thanks for any help, and let me know if need any clarification.
About my configuration and libraries:
- Windows 11
- Python 3.11.7
- selenium 4.17.2
- seleniumwire 5.1.0
- webdriver_manager 4.0.1