I'm trying to run a code to do some web scraping in Twitter but I keep having an error that I don't manage to solve. I guess it is related to selenium webdriver, but I am not very familiar with it and I haven't found any solutions so far.
This is the code:
import pandas
import numpy
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
class SeleniumClient(object):
def __init__(self):
#Initialization method.
self.chrome_options = webdriver.ChromeOptions('D:/chromedriver_win32/chromedriver', options=self.chrome_options)
self.chrome_options.add_argument('--headless')
self.chrome_options.add_argument('--no-sandbox')
self.chrome_options.add_argument('--disable-setuid-sandbox')
# you need to provide the path of chromdriver in your system
self.browser = webdriver.Chrome('C:\Program Files\Google\chromedriver.exe')
self.base_url = 'https://twitter.com/search?q='
def get_tweets(self, query):
'''
Function to fetch tweets.
'''
try:
self.browser.get(self.base_url+query)
time.sleep(2)
body = self.browser.find_element_by_tag_name('body')
for _ in range(3000):
body.send_keys(Keys.PAGE_DOWN)
time.sleep(0.3)
timeline = self.browser.find_element_by_id('timeline')
tweet_nodes = timeline.find_elements_by_css_selector('.tweet-text')
return pd.DataFrame({'tweets': [tweet_node.text for tweet_node in tweet_nodes]})
except:
print("Selenium - An error occured while fetching tweets.")
#this next line is the one that gives the error, but it must be located in the class
selenium_client = SeleniumClient()
tweets_df = selenium_client.get_tweets('AI and Deep learning')
This is what the error says:
AttributeError Traceback (most recent call last)
<ipython-input-5-3bd40446c1fd> in <module>
----> 1 selenium_client = SeleniumClient()
2 #tweets_df = selenium_client.get_tweets('AI and Deep learning')
<ipython-input-3-f0c81bf234aa> in __init__(self)
4 def __init__(self):
5 #Initialization method.
----> 6 self.chrome_options = webdriver.ChromeOptions('D:/chromedriver_win32/chromedriver', options=self.chrome_options)
7 self.chrome_options.add_argument('--headless')
8 self.chrome_options.add_argument('--no-sandbox')
AttributeError: 'SeleniumClient' object has no attribute 'chrome_options'
Possibly you are using selenium3 and using
Chrome()
the default argument isexecutable_path
which takes the absolute path of the ChromeDriver executable. Where as you have passed the absolute path of the Chrome executable.Moreover,
ChromeOptions()
object doesn't needs any location as an argument.Effectively your code block will be: