having an error in my code "NameError: name 'tweets_df' is not defined"
having an error in my code "NameError: name 'tweets_df' is not defined"
我的代码中有一个我无法解决的错误
我的代码
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import tweepy as tw # To extarct the twitter data
from tqdm import tqdm
consumer_api_key = 'vJB7L6fhV3hYPQjXdgSDtzWdy'
consumer_api_secret = 'dUTeph2pJCaojtpuiv7M7UDLeEiuR6qTBhD0fOzdaTuOE8xTZF'
auth = tw.OAuthHandler(consumer_api_key, consumer_api_secret)
api = tw.API(auth, wait_on_rate_limit=True)
search_words = "#BTC #bitcoin -filter:retweets" #Type you keywork here instead of #covidvaccine
#You can fix a time frame with the date since and date until parameters
date_since = "2022-04-18"
date_until="2022-04-19"
# Collect tweets
tweets = tw.Cursor(api.search_tweets,
q=search_words,
lang="en",
since=date_since,
until=date_until
).items(7500) #We instruct the cursor to return maximum of 7500 tweets
tweets_copy = []
for tweet in tqdm(tweets):
tweets_copy.append(tweet)
print(f"New tweets retrieved: {len(tweets_copy)}")
for tweet in tqdm(tweets_copy):
hashtags = []
try:
for hashtag in tweet.entities["hashtags"]:
hashtags.append(hashtag["text"])
except:
pass
tweets_df = tweets_df.append(pd.DataFrame({'user_name': tweet.user.name,
'user_location': tweet.user.location,\
'user_description': tweet.user.description,
'user_created': tweet.user.created_at,
'user_followers': tweet.user.followers_count,
'user_friends': tweet.user.friends_count,
'user_favourites': tweet.user.favourites_count,
'user_verified': tweet.user.verified,
'date': tweet.created_at,
'text': tweet.text,
'hashtags': [hashtags if hashtags else None],
'source': tweet.source,
'is_retweet': tweet.retweeted}, index=[0]))
tweets_df
tweets_df.to_csv('newData.csv',index=False)
我尝试 运行 时出现的错误是
NameError Traceback(最后一次调用)
在 [8] 中输入 ()
----> 1 tweets_df |
NameError: 名称 'tweets_df' 未定义
您可以创建一个数据帧列表,然后使用 pandas.concat 方法将所有数据帧列表连接成一个数据帧,例如
tweets_df_list = []
for tweet in tqdm(tweets_copy):
hashtags = []
try:
for hashtag in tweet.entities["hashtags"]:
hashtags.append(hashtag["text"])
except:
pass
tweets_df_list.append(pd.DataFrame({'user_name': tweet.user.name,
'user_location': tweet.user.location,\
'user_description': tweet.user.description,
'user_created': tweet.user.created_at,
'user_followers': tweet.user.followers_count,
'user_friends': tweet.user.friends_count,
'user_favourites': tweet.user.favourites_count,
'user_verified': tweet.user.verified,
'date': tweet.created_at,
'text': tweet.text,
'hashtags': [hashtags if hashtags else None],
'source': tweet.source,
'is_retweet': tweet.retweeted}, index=[0]))
tweet_df = pd.concat(tweets_df_list)
tweets_df.to_csv('newData.csv',index=False)
我的代码中有一个我无法解决的错误
我的代码
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import tweepy as tw # To extarct the twitter data
from tqdm import tqdm
consumer_api_key = 'vJB7L6fhV3hYPQjXdgSDtzWdy'
consumer_api_secret = 'dUTeph2pJCaojtpuiv7M7UDLeEiuR6qTBhD0fOzdaTuOE8xTZF'
auth = tw.OAuthHandler(consumer_api_key, consumer_api_secret)
api = tw.API(auth, wait_on_rate_limit=True)
search_words = "#BTC #bitcoin -filter:retweets" #Type you keywork here instead of #covidvaccine
#You can fix a time frame with the date since and date until parameters
date_since = "2022-04-18"
date_until="2022-04-19"
# Collect tweets
tweets = tw.Cursor(api.search_tweets,
q=search_words,
lang="en",
since=date_since,
until=date_until
).items(7500) #We instruct the cursor to return maximum of 7500 tweets
tweets_copy = []
for tweet in tqdm(tweets):
tweets_copy.append(tweet)
print(f"New tweets retrieved: {len(tweets_copy)}")
for tweet in tqdm(tweets_copy):
hashtags = []
try:
for hashtag in tweet.entities["hashtags"]:
hashtags.append(hashtag["text"])
except:
pass
tweets_df = tweets_df.append(pd.DataFrame({'user_name': tweet.user.name,
'user_location': tweet.user.location,\
'user_description': tweet.user.description,
'user_created': tweet.user.created_at,
'user_followers': tweet.user.followers_count,
'user_friends': tweet.user.friends_count,
'user_favourites': tweet.user.favourites_count,
'user_verified': tweet.user.verified,
'date': tweet.created_at,
'text': tweet.text,
'hashtags': [hashtags if hashtags else None],
'source': tweet.source,
'is_retweet': tweet.retweeted}, index=[0]))
tweets_df
tweets_df.to_csv('newData.csv',index=False)
我尝试 运行 时出现的错误是
NameError Traceback(最后一次调用)
在 [8] 中输入
NameError: 名称 'tweets_df' 未定义
您可以创建一个数据帧列表,然后使用 pandas.concat 方法将所有数据帧列表连接成一个数据帧,例如
tweets_df_list = []
for tweet in tqdm(tweets_copy):
hashtags = []
try:
for hashtag in tweet.entities["hashtags"]:
hashtags.append(hashtag["text"])
except:
pass
tweets_df_list.append(pd.DataFrame({'user_name': tweet.user.name,
'user_location': tweet.user.location,\
'user_description': tweet.user.description,
'user_created': tweet.user.created_at,
'user_followers': tweet.user.followers_count,
'user_friends': tweet.user.friends_count,
'user_favourites': tweet.user.favourites_count,
'user_verified': tweet.user.verified,
'date': tweet.created_at,
'text': tweet.text,
'hashtags': [hashtags if hashtags else None],
'source': tweet.source,
'is_retweet': tweet.retweeted}, index=[0]))
tweet_df = pd.concat(tweets_df_list)
tweets_df.to_csv('newData.csv',index=False)