如何解决 "string indices must be integers"(使用 conversation_id 检索推文回复时出错)?

How to solve "string indices must be integers" (error while retrieving replies on tweet using conversation_id)?

我正在使用 TwitterAPI 通过下面提到的 example code 使用 conversation_id 来提取对推文的回复:这个想法是根据列表提取所有回复数以千计的 conversation_ids,而且我确实有学术证书,所以档案搜索应该不是问题。

from TwitterAPI import TwitterAPI, TwitterOAuth, TwitterRequestError, TwitterConnectionError, TwitterPager

# NOTE: If conversation is over a week old then it will not get returned.
CONVERSATION_ID = '1369393783482236933'

class TreeNode:
    def __init__(self, data):
        """data is a tweet's json object"""
        self.data = data
        self.children = []
        self.replied_to_tweet = None
        if 'referenced_tweets' in self.data:
            for tweet in self.data['referenced_tweets']: 
                if tweet['type'] == 'replied_to':
                    self.replied_to_tweet = tweet['id']
                    break

    def id(self):
        """a node is identified by its tweet id"""
        return self.data['id']

    def parent(self):
        """the reply-to tweet is the parent of the node"""
        return self.replied_to_tweet

    def find_parent_of(self, node):
        """append a node to the children of it's parent tweet"""
        if node.parent() == self.id():
            self.children.append(node)
            return True
        for child in self.children:
            if child.find_parent_of(node):
                return True
        return False

    def print_tree(self, level):
        """level 0 is the root node, then incremented for subsequent generations"""
        created_at = self.data['created_at']
        username = self.data['author_id']['username']
        text_80chars = self.data['text'][0:80].replace('\n', ' ')
        print(f'{level*"_"}{level}: [{created_at}][{username}] {text_80chars}')
        level += 1
        for child in reversed(self.children):
            child.print_tree(level)

try:
    o = TwitterOAuth.read_file()
    api = TwitterAPI(o.consumer_key, o.consumer_secret, auth_type='oAuth2', api_version='2')

    # GET ROOT OF THE CONVERSATION

    r = api.request(f'tweets/:{CONVERSATION_ID}',
        {
            'expansions':'author_id',
            'tweet.fields':'author_id,conversation_id,created_at,referenced_tweets'
        },
        hydrate_type=HydrateType.APPEND)

    for item in r:
        root = TreeNode(item)
        print(f'ROOT {root.id()}')

    # GET ALL REPLIES IN CONVERSATION
    # (RETURNED IN REVERSE CHRONOLOGICAL ORDER)

    pager = TwitterPager(api, 'tweets/search/recent', 
        {
            'query':f'conversation_id:{CONVERSATION_ID}',
            'expansions':'author_id',
            'tweet.fields':'author_id,conversation_id,created_at,referenced_tweets'
        },
        hydrate_type=HydrateType.APPEND)

    # "wait=2" means wait 2 seconds between each request.
    # The rate limit is 450 requests per 15 minutes, or
    # 15*60/450 = 2 seconds. 

    orphans = []

    for item in pager.get_iterator(wait=2):
        node = TreeNode(item)
        print(f'{node.id()} => {node.parent()}', item['author_id']['username'])
        # COLLECT ANY ORPHANS THAT ARE CHILDREN OF THE NEW NODE
        orphans = [orphan for orphan in orphans if not node.find_parent_of(orphan)]
        # IF THE NEW NODE CANNOT BE PLACED IN TREE, ORPHAN IT UNTIL ITS PARENT IS FOUND
        if not root.find_parent_of(node):
            orphans.append(node)

    print('\nTREE...')
    root.print_tree(0)
    assert len(orphans) == 0, f'{len(orphans)} orphaned tweets'

except TwitterRequestError as e:
    print(e.status_code)
    for msg in iter(e):
        print(msg)

except TwitterConnectionError as e:
    print(e)

except Exception as e:
    print(e)

如果最后两行注释,错误会详细显示。

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
C:\Users\ANEESB~1\AppData\Local\Temp/ipykernel_18696/4104024841.py in <module>
     88 
     89         print('\nTREE...')
---> 90         root.print_tree(0)
     91         assert len(orphans) == 0, f'{len(orphans)} orphaned tweets'
     92 

C:\Users\ANEESB~1\AppData\Local\Temp/ipykernel_18696/4104024841.py in print_tree(self, level)
     37                 """level 0 is the root node, then incremented for subsequent generations"""
     38                 created_at = self.data['created_at']
---> 39                 username = self.data['author_id']['username']
     40                 text_80chars = self.data['text'][0:80].replace('\n', ' ')
     41                 print(f'{level*"_"}{level}: [{created_at}][{username}] {text_80chars}')

TypeError: string indices must be integers

代码应该可以运行,我不知道是什么导致了错误。有什么帮助吗?

Self.data 看起来像这样:

{
    'author_id': '3420477195', 
    'conversation_id': '1369393783482236933', 
    'created_at': '2021-03-09T21:04:54.000Z', 
    'text': "Happy one year anniversary to everyone working from home! Do you feel like if you have one more Zoom meeting you’ll rip your hair out? First of all, please don't do that. Second, we're here to save you from Zoom boredom with these new backgrounds!", 
    'id': '1369393783482236933', 
    'author_id_hydrate': {'id': '3420477195', 'name': 'Full Frontal', 'username': 'FullFrontalSamB'}
}

author_id只是一个字符串,作者的详细信息在author_id_hydrate中。所以 self.data['author_id']['username'] 应该是 self.data['author_id_hydrate']['username']