Tweepy stream: TypeError: 'NoneType' object is not subscriptable

Tweepy stream: TypeError: 'NoneType' object is not subscriptable

我已经创建了一个 Postgres 数据库并正在执行 Tweepy 的 Stream 函数来填充它。截至目前,我能够将正常的推文(非转发和非回复)获取到 Postgres 数据库中。当我添加与转推和回复 (status.retweeted_status.user.id, status.in_reply_to_user_id) 以及位置 (longitude = status.coordinates[0], latitude = status.coordinates[1]) 相对应的变量时,我收到以下错误:

  File "stream.py", line 70, in on_status
    longitude = status.coordinates[0]
TypeError: 'NoneType' object is not subscriptable

对于上面的转推和回复字段,此错误也一直存在。我知道它与在坐标字段中具有 NA 的非启用位置的推文有关,并且我尝试使用 if 语句来捕获这些推文,但我仍然收到相同的错误。

我的代码:

import time
import user
import tweepy
import psycopg2

auth = tweepy.OAuthHandler(user.CONSUMER_KEY, user.CONSUMER_KEY_SECRET)
auth.set_access_token(user.ACCESS_TOKEN, user.ACCESS_TOKEN_SECRET)
api = tweepy.API(auth)


class MyStreamListener(tweepy.StreamListener):

    def __init__(self, time_limit=300):
        self.start_time = time.time()
        self.limit = time_limit
        super(MyStreamListener, self).__init__()

    def on_connect(self):
        print("Connected to Twitter API.")

    def on_status(self, status):
        print(status.text)
        ##################### tweet table #########################
        tweet_id = status.id # Tweet ID

        # Tweet
        if status.truncated == True:
            hashtags = status.extended_tweet['entities']['hashtags']
            tweet = status.extended_tweet['full_text']

        else:
            hashtags = status.entities['hashtags']
            tweet = status.text


        created_at = status.created_at # created_at
        user_id = status.user.id # User ID
        retweet_count = status.retweet_count # retweet count
        favorite_count = status.favorite_count

        ##################### user table ##########################
        username = status.user.name # Username
        followers_count = status.user.followers_count
        following_count = status.user.friends_count

        # ##################### location table ##########################
        if status.coordinates is not None:
            longitude = status.coordinates[0]
            latitude = status.coordinates[1]
        else:
            longitude = latitude = None
        
        # ##################### in_reply_to table ##########################
        if status.in_reply_to_user_id is not None:
            replying_to_id = status.in_reply_to_user_id
        else:
            replying_to_id = None

        # # ##################### retweeting table ##########################
        if status.retweeted_status is not None:
            retweeting_id = status.retweeted_status.user.id
        else:
            retweeting_id = None

        # Read hastags
        hashtags = read_hashtags(hashtags)

        # Language
        lang = status.lang

        # If tweet is not a retweet and tweet is in English - not hasattr(status, "retweeted_status") and
        if lang == "en":
            # Connect to database
            dbConnect(user_id, username, tweet_id, tweet, created_at,
                      retweet_count, favorite_count, hashtags, 
 followers_count,following_count,longitude,latitude,replying_to_id,retweeting_id)

        if (time.time() - self.start_time) > self.limit:
            print(time.time(), self.start_time, self.limit)
            return False

    def on_error(self, status_code):
        if status_code == 420:
            # Returning False in on_data disconnects the stream
            return False

# Extract hashtags
def read_hashtags(tag_list):
    hashtags = []
    for tag in tag_list:
        hashtags.append(tag['text'])
    return hashtags

# Connection to database server
# need to allow ip address on GCP first - remember to convert to CIDR format with "to" address
# conn = psycopg2.connect(host="***", database="***", user='***', password = '***')

# Create cursor to execute SQL commands
# cur = conn.cursor()

# Insert Tweet data into database
def dbConnect(user_id, username, tweet_id, tweet, created_at,
                      retweet_count, favorite_count, hashtags, followers_count,
following_count,longitude,latitude,replying_to_id,retweeting_id):

    conn = psycopg2.connect(host="***", database="***", user='***', password = '***')

    cur = conn.cursor()

    # insert user information
    command = '''INSERT INTO users(user_id, username, followers_count, following_count) VALUES (%s,%s,%s,%s) ON CONFLICT
                 (user_id) DO NOTHING;'''
    cur.execute(command, (user_id, username, followers_count, following_count))

    # insert tweet information
    command = '''INSERT INTO tweet(ID, user_id, created_at,tweet, retweet_count, favorite_count) VALUES (%s,%s,%s,%s,%s,%s);'''
    cur.execute(command, (tweet_id, user_id, created_at,tweet, retweet_count, favorite_count))

    # # insert retweeting information
    command = '''INSERT INTO retweeting(tweet_id, retweeting_id) VALUES (%s,%s);'''
    cur.execute(command, (tweet_id, retweeting_id))
    #
    # # insert in_reply_to information
    command = '''INSERT INTO in_reply_to(tweet_id, replying_to_id) VALUES (%s,%s);'''
    cur.execute(command, (tweet_id, replying_to_id))

    # insert location information
    command = '''INSERT INTO location(tweet_id, longitude, latitude) VALUES (%s,%s,%s);'''
    cur.execute(command, (tweet_id, longitude, latitude))

    # insert entity information
    for i in range(len(hashtags)):
        hashtag = hashtags[i]
        command = '''INSERT INTO TwitterEntity (ID, hashtag) VALUES (%s,%s);'''
        cur.execute(command, (tweet_id, hashtag))

    # Commit changes
    conn.commit()

    # Disconnect
    cur.close()
    conn.close()

myStreamListener = MyStreamListener()
myStream = tweepy.Stream(auth=api.auth, listener=myStreamListener,
                        tweet_mode="extended")
myStream.filter(track=['covid','coronavirus','pandemic','covid19','covid-19'])

更新:我使用了 tdelaney 的解决方案,现在我可以填充位置。我仍然收到转发和回复错误,现在显示为:

  File "stream.py", line 82, in on_status
    if status.retweeted_status is not None:
AttributeError: 'Status' object has no attribute 'retweeted_status'

我已经编辑了上面的代码以反映更新。

您可以测试坐标是否存在,如果不存在则指定一个默认值。您可能需要更改默认值,具体取决于您希望此案例在数据库中的显示方式。

    # ##################### location table ##########################
    if status.coordinates is not None:
        longitude = status.coordinates[0]
        latitude = status.coordinates[1]
    else:
        longitude = latitude = None