Tweepy stream: TypeError: 'NoneType' object is not subscriptable
Tweepy stream: TypeError: 'NoneType' object is not subscriptable
我已经创建了一个 Postgres 数据库并正在执行 Tweepy 的 Stream 函数来填充它。截至目前,我能够将正常的推文(非转发和非回复)获取到 Postgres 数据库中。当我添加与转推和回复 (status.retweeted_status.user.id, status.in_reply_to_user_id) 以及位置 (longitude = status.coordinates[0], latitude = status.coordinates[1]) 相对应的变量时,我收到以下错误:
File "stream.py", line 70, in on_status
longitude = status.coordinates[0]
TypeError: 'NoneType' object is not subscriptable
对于上面的转推和回复字段,此错误也一直存在。我知道它与在坐标字段中具有 NA 的非启用位置的推文有关,并且我尝试使用 if 语句来捕获这些推文,但我仍然收到相同的错误。
我的代码:
import time
import user
import tweepy
import psycopg2
auth = tweepy.OAuthHandler(user.CONSUMER_KEY, user.CONSUMER_KEY_SECRET)
auth.set_access_token(user.ACCESS_TOKEN, user.ACCESS_TOKEN_SECRET)
api = tweepy.API(auth)
class MyStreamListener(tweepy.StreamListener):
def __init__(self, time_limit=300):
self.start_time = time.time()
self.limit = time_limit
super(MyStreamListener, self).__init__()
def on_connect(self):
print("Connected to Twitter API.")
def on_status(self, status):
print(status.text)
##################### tweet table #########################
tweet_id = status.id # Tweet ID
# Tweet
if status.truncated == True:
hashtags = status.extended_tweet['entities']['hashtags']
tweet = status.extended_tweet['full_text']
else:
hashtags = status.entities['hashtags']
tweet = status.text
created_at = status.created_at # created_at
user_id = status.user.id # User ID
retweet_count = status.retweet_count # retweet count
favorite_count = status.favorite_count
##################### user table ##########################
username = status.user.name # Username
followers_count = status.user.followers_count
following_count = status.user.friends_count
# ##################### location table ##########################
if status.coordinates is not None:
longitude = status.coordinates[0]
latitude = status.coordinates[1]
else:
longitude = latitude = None
# ##################### in_reply_to table ##########################
if status.in_reply_to_user_id is not None:
replying_to_id = status.in_reply_to_user_id
else:
replying_to_id = None
# # ##################### retweeting table ##########################
if status.retweeted_status is not None:
retweeting_id = status.retweeted_status.user.id
else:
retweeting_id = None
# Read hastags
hashtags = read_hashtags(hashtags)
# Language
lang = status.lang
# If tweet is not a retweet and tweet is in English - not hasattr(status, "retweeted_status") and
if lang == "en":
# Connect to database
dbConnect(user_id, username, tweet_id, tweet, created_at,
retweet_count, favorite_count, hashtags,
followers_count,following_count,longitude,latitude,replying_to_id,retweeting_id)
if (time.time() - self.start_time) > self.limit:
print(time.time(), self.start_time, self.limit)
return False
def on_error(self, status_code):
if status_code == 420:
# Returning False in on_data disconnects the stream
return False
# Extract hashtags
def read_hashtags(tag_list):
hashtags = []
for tag in tag_list:
hashtags.append(tag['text'])
return hashtags
# Connection to database server
# need to allow ip address on GCP first - remember to convert to CIDR format with "to" address
# conn = psycopg2.connect(host="***", database="***", user='***', password = '***')
# Create cursor to execute SQL commands
# cur = conn.cursor()
# Insert Tweet data into database
def dbConnect(user_id, username, tweet_id, tweet, created_at,
retweet_count, favorite_count, hashtags, followers_count,
following_count,longitude,latitude,replying_to_id,retweeting_id):
conn = psycopg2.connect(host="***", database="***", user='***', password = '***')
cur = conn.cursor()
# insert user information
command = '''INSERT INTO users(user_id, username, followers_count, following_count) VALUES (%s,%s,%s,%s) ON CONFLICT
(user_id) DO NOTHING;'''
cur.execute(command, (user_id, username, followers_count, following_count))
# insert tweet information
command = '''INSERT INTO tweet(ID, user_id, created_at,tweet, retweet_count, favorite_count) VALUES (%s,%s,%s,%s,%s,%s);'''
cur.execute(command, (tweet_id, user_id, created_at,tweet, retweet_count, favorite_count))
# # insert retweeting information
command = '''INSERT INTO retweeting(tweet_id, retweeting_id) VALUES (%s,%s);'''
cur.execute(command, (tweet_id, retweeting_id))
#
# # insert in_reply_to information
command = '''INSERT INTO in_reply_to(tweet_id, replying_to_id) VALUES (%s,%s);'''
cur.execute(command, (tweet_id, replying_to_id))
# insert location information
command = '''INSERT INTO location(tweet_id, longitude, latitude) VALUES (%s,%s,%s);'''
cur.execute(command, (tweet_id, longitude, latitude))
# insert entity information
for i in range(len(hashtags)):
hashtag = hashtags[i]
command = '''INSERT INTO TwitterEntity (ID, hashtag) VALUES (%s,%s);'''
cur.execute(command, (tweet_id, hashtag))
# Commit changes
conn.commit()
# Disconnect
cur.close()
conn.close()
myStreamListener = MyStreamListener()
myStream = tweepy.Stream(auth=api.auth, listener=myStreamListener,
tweet_mode="extended")
myStream.filter(track=['covid','coronavirus','pandemic','covid19','covid-19'])
更新:我使用了 tdelaney 的解决方案,现在我可以填充位置。我仍然收到转发和回复错误,现在显示为:
File "stream.py", line 82, in on_status
if status.retweeted_status is not None:
AttributeError: 'Status' object has no attribute 'retweeted_status'
我已经编辑了上面的代码以反映更新。
您可以测试坐标是否存在,如果不存在则指定一个默认值。您可能需要更改默认值,具体取决于您希望此案例在数据库中的显示方式。
# ##################### location table ##########################
if status.coordinates is not None:
longitude = status.coordinates[0]
latitude = status.coordinates[1]
else:
longitude = latitude = None
我已经创建了一个 Postgres 数据库并正在执行 Tweepy 的 Stream 函数来填充它。截至目前,我能够将正常的推文(非转发和非回复)获取到 Postgres 数据库中。当我添加与转推和回复 (status.retweeted_status.user.id, status.in_reply_to_user_id) 以及位置 (longitude = status.coordinates[0], latitude = status.coordinates[1]) 相对应的变量时,我收到以下错误:
File "stream.py", line 70, in on_status
longitude = status.coordinates[0]
TypeError: 'NoneType' object is not subscriptable
对于上面的转推和回复字段,此错误也一直存在。我知道它与在坐标字段中具有 NA 的非启用位置的推文有关,并且我尝试使用 if 语句来捕获这些推文,但我仍然收到相同的错误。
我的代码:
import time
import user
import tweepy
import psycopg2
auth = tweepy.OAuthHandler(user.CONSUMER_KEY, user.CONSUMER_KEY_SECRET)
auth.set_access_token(user.ACCESS_TOKEN, user.ACCESS_TOKEN_SECRET)
api = tweepy.API(auth)
class MyStreamListener(tweepy.StreamListener):
def __init__(self, time_limit=300):
self.start_time = time.time()
self.limit = time_limit
super(MyStreamListener, self).__init__()
def on_connect(self):
print("Connected to Twitter API.")
def on_status(self, status):
print(status.text)
##################### tweet table #########################
tweet_id = status.id # Tweet ID
# Tweet
if status.truncated == True:
hashtags = status.extended_tweet['entities']['hashtags']
tweet = status.extended_tweet['full_text']
else:
hashtags = status.entities['hashtags']
tweet = status.text
created_at = status.created_at # created_at
user_id = status.user.id # User ID
retweet_count = status.retweet_count # retweet count
favorite_count = status.favorite_count
##################### user table ##########################
username = status.user.name # Username
followers_count = status.user.followers_count
following_count = status.user.friends_count
# ##################### location table ##########################
if status.coordinates is not None:
longitude = status.coordinates[0]
latitude = status.coordinates[1]
else:
longitude = latitude = None
# ##################### in_reply_to table ##########################
if status.in_reply_to_user_id is not None:
replying_to_id = status.in_reply_to_user_id
else:
replying_to_id = None
# # ##################### retweeting table ##########################
if status.retweeted_status is not None:
retweeting_id = status.retweeted_status.user.id
else:
retweeting_id = None
# Read hastags
hashtags = read_hashtags(hashtags)
# Language
lang = status.lang
# If tweet is not a retweet and tweet is in English - not hasattr(status, "retweeted_status") and
if lang == "en":
# Connect to database
dbConnect(user_id, username, tweet_id, tweet, created_at,
retweet_count, favorite_count, hashtags,
followers_count,following_count,longitude,latitude,replying_to_id,retweeting_id)
if (time.time() - self.start_time) > self.limit:
print(time.time(), self.start_time, self.limit)
return False
def on_error(self, status_code):
if status_code == 420:
# Returning False in on_data disconnects the stream
return False
# Extract hashtags
def read_hashtags(tag_list):
hashtags = []
for tag in tag_list:
hashtags.append(tag['text'])
return hashtags
# Connection to database server
# need to allow ip address on GCP first - remember to convert to CIDR format with "to" address
# conn = psycopg2.connect(host="***", database="***", user='***', password = '***')
# Create cursor to execute SQL commands
# cur = conn.cursor()
# Insert Tweet data into database
def dbConnect(user_id, username, tweet_id, tweet, created_at,
retweet_count, favorite_count, hashtags, followers_count,
following_count,longitude,latitude,replying_to_id,retweeting_id):
conn = psycopg2.connect(host="***", database="***", user='***', password = '***')
cur = conn.cursor()
# insert user information
command = '''INSERT INTO users(user_id, username, followers_count, following_count) VALUES (%s,%s,%s,%s) ON CONFLICT
(user_id) DO NOTHING;'''
cur.execute(command, (user_id, username, followers_count, following_count))
# insert tweet information
command = '''INSERT INTO tweet(ID, user_id, created_at,tweet, retweet_count, favorite_count) VALUES (%s,%s,%s,%s,%s,%s);'''
cur.execute(command, (tweet_id, user_id, created_at,tweet, retweet_count, favorite_count))
# # insert retweeting information
command = '''INSERT INTO retweeting(tweet_id, retweeting_id) VALUES (%s,%s);'''
cur.execute(command, (tweet_id, retweeting_id))
#
# # insert in_reply_to information
command = '''INSERT INTO in_reply_to(tweet_id, replying_to_id) VALUES (%s,%s);'''
cur.execute(command, (tweet_id, replying_to_id))
# insert location information
command = '''INSERT INTO location(tweet_id, longitude, latitude) VALUES (%s,%s,%s);'''
cur.execute(command, (tweet_id, longitude, latitude))
# insert entity information
for i in range(len(hashtags)):
hashtag = hashtags[i]
command = '''INSERT INTO TwitterEntity (ID, hashtag) VALUES (%s,%s);'''
cur.execute(command, (tweet_id, hashtag))
# Commit changes
conn.commit()
# Disconnect
cur.close()
conn.close()
myStreamListener = MyStreamListener()
myStream = tweepy.Stream(auth=api.auth, listener=myStreamListener,
tweet_mode="extended")
myStream.filter(track=['covid','coronavirus','pandemic','covid19','covid-19'])
更新:我使用了 tdelaney 的解决方案,现在我可以填充位置。我仍然收到转发和回复错误,现在显示为:
File "stream.py", line 82, in on_status
if status.retweeted_status is not None:
AttributeError: 'Status' object has no attribute 'retweeted_status'
我已经编辑了上面的代码以反映更新。
您可以测试坐标是否存在,如果不存在则指定一个默认值。您可能需要更改默认值,具体取决于您希望此案例在数据库中的显示方式。
# ##################### location table ##########################
if status.coordinates is not None:
longitude = status.coordinates[0]
latitude = status.coordinates[1]
else:
longitude = latitude = None