I having some issues:
Traceback (most recent call last):
File "/home/ec2-user/pulltweets/pull1.py", line 113, in <module>
stream_listener.add_rules(tweepy.StreamRule("#ClimateChange")) # Modify with your desired rule
File "/home/ec2-user/.local/lib/python3.9/site-packages/tweepy/streaming.py", line 333, in add_rules
return self._make_request(
File "/home/ec2-user/.local/lib/python3.9/site-packages/tweepy/client.py", line 129, in _make_request
response = self.request(method, route, params=request_params,
File "/home/ec2-user/.local/lib/python3.9/site-packages/tweepy/client.py", line 100, in request
raise Forbidden(response)
tweepy.errors.Forbidden: 403 Forbidden
When authenticating requests to the Twitter API v2 endpoints, you must use keys and tokens from a Twitter developer App that is attached to a Project. You can create a project via the developer portal.
I am trying to extract tweets from Twitter API through aws ec2 instance cli using putty. I want to load the raw tweet data to aws s3.
This is my python script:
import tweepy
import pandas as pd
import boto3
import json
from datetime import datetime
# AWS Secrets Manager
def get_twitter_secrets():
# Specify your AWS region where the secret is stored
# aws_region = 'us-east-1'
secrets_manager_client = boto3.client('secretsmanager', region_name='us-east-1')
secret_name = 'twitter-api' # Name of your secret in AWS Secrets Manager
try:
get_secret_value_response = secrets_manager_client.get_secret_value(SecretId=secret_name)
if 'SecretString' in get_secret_value_response:
secrets = json.loads(get_secret_value_response['SecretString'])
return secrets
except Exception as e:
print(f"Error retrieving secret: {e}")
return None
# Extract the Twitter API credentials
twitter_secrets = get_twitter_secrets()
if not twitter_secrets:
raise Exception("Failed to retrieve Twitter API secrets. Check AWS Secrets Manager configuration.")
bearer_token = twitter_secrets.get('bearer_token')
if not bearer_token:
raise Exception("Bearer token not found in the retrieved secrets.")
# AWS S3 configuration
s3_client = boto3.client('s3', region_name='us-east-1')
bucket_name = 'raw-tweets' # Your S3 bucket name
# DataFrame setup for storing tweets
columns = ['datetime', 'user_id', 'username', 'tweet', 'source', 'likes',
'retweeted', 'retweet_count', 'user_location']
tweet_df = pd.DataFrame(columns=columns)
# Initialize Tweepy client with the bearer token
client = tweepy.Client(bearer_token=bearer_token)
# User details cache
user_cache = {}
class CustomStreamListener(tweepy.StreamingClient):
def __init__(self, bearer_token, max_tweets=25):
super().__init__(bearer_token)
self.max_tweets = max_tweets
self.tweet_count = 0
self.api = tweepy.Client(bearer_token=bearer_token)
def get_user_details(self, user_id):
if user_id in user_cache:
return user_cache[user_id]
else:
try:
user_details = self.api.get_user(id=user_id, user_fields=['public_metrics', 'location'])
user_data = user_details.data
user_cache[user_id] = user_data # Cache the result
return user_data
except tweepy.TweepyException as e:
print(f"Error fetching user details: {e}")
return None
def on_tweet(self, tweet):
global tweet_df # Use the global DataFrame
if self.tweet_count < self.max_tweets:
user_data = self.get_user_details(tweet.author_id)
# Prepare data for DataFrame insertion
data = {
'datetime': str(tweet.created_at),
'user_id': tweet.author_id,
'username': user_data.username if user_data else 'N/A',
'tweet': tweet.text,
'source': tweet.source if 'source' in tweet else 'N/A',
'likes': tweet.public_metrics['like_count'] if 'like_count' in tweet.public_metrics else 0,
'retweeted': 'True' if tweet.referenced_tweets else 'False',
'retweet_count': tweet.public_metrics['retweet_count'] if 'retweet_count' in tweet.public_metrics else 0,
'user_location': user_data.location if user_data else 'N/A'
}
tweet_df = tweet_df.append(data, ignore_index=True)
self.tweet_count += 1
if self.tweet_count == self.max_tweets:
self.upload_to_s3()
self.disconnect()
def upload_to_s3(self):
global tweet_df
csv_buffer = tweet_df.to_csv(index=False).encode('utf-8')
timestamp = datetime.utcnow().strftime('%Y-%m-%d-%H%M%S')
filename = f"tweets_{timestamp}.csv"
try:
s3_client.put_object(Bucket=bucket_name, Key=filename, Body=csv_buffer)
print(f"Uploaded {filename} to S3 bucket {bucket_name}")
except Exception as e:
print(f"Failed to upload {filename} to S3: {e}")
# Create an instance of the custom stream listener
stream_listener = CustomStreamListener(bearer_token=bearer_token, max_tweets=25)
# Start streaming tweets
stream_listener.add_rules(tweepy.StreamRule("#ClimateChange")) # Modify with your desired rule
print("Starting stream...")
stream_listener.filter()
# After disconnecting, upload the DataFrame to S3
stream_listener.upload_to_s3()
I kept my bearer token in the AWS secret manager and the first sets of code were to extract the token from my AWS secret until the # AWS S3 configuration section.
I am using Twitter developer basic tier.

and my app is linked to a project:

How do I fix this issue?