从 url 获取媒体链接,而不是 api.user_timeline / Tweepy-Python

Get media links from urls, not with api.user_timeline / Tweepy-Python

Tweepy 不支持书签。我已将我的书签链接保存在一个文本文件中(大约 400 个链接)。我想获取这些推文的媒体链接。我可以从时间线中提取推文,但在从时间线中提取推文时,tweepy 会保存额外的信息。我怎样才能对手动添加的推文做同样的事情?比如我如何将链接导入为 https://twitter.com/xyz123/status/12344567902367 并使 tweepy 与它们一起工作?

我的代码现在可以工作,但它需要来自文件的链接。我通过 chrome 扩展名获得了这些链接,但我仍然想知道是否有更好的方法。

import API_Tokens as t
from tweepy import OAuthHandler, API
import os
import wget


def main():
    file1 = open("bookmarks.txt", "r")
    try:
        os.mkdir('bookmarks')
        os.chdir('bookmarks')
    except:
        os.chdir('bookmarks')

    api = authenticate()
    getTweets(api, file1)


def getTweets(api, file1):
    count = 0

    while True:
        count += 1
        full_twt = file1.readline()
        id = full_twt[full_twt.find("status") + 7:]  # id of a tweet starts from "status/"
        tweet = api.get_status(id)  # fetch the tweet
        media = tweet.entities.get('media', [])  # get the media info

        if len(media) > 0:
            wget.download((media[0]['media_url']))  # download the image if media exists

    file1.close()
    return all_tweets


def authenticate():     # developer access
    auth = OAuthHandler(t.CONSUMER_KEY, t.CONSUMER_SECRET)
    auth.set_access_token(t.ACCESS_TOKEN, t.ACCESS_TOKEN_SECRET)
    api = API(auth)
    return api


if __name__ == '__main__':
    main()
    

您现在为每条推文传递一个 api 调用。您可以使用 statuses_lookup 减少调用次数,每个请求最多包含 100 个推文 ID。

with open("bookmarks.txt", "r") as f:
    l = f.readlines() #create list of tweet urls
    l = [int(i.split('/')[-1]) for i in l] #extract tweet ids
    
n = 100
chunks = [l[i:i + n] for i in range(0, len(l), n)] #split list in chunks

all_tweets = []

for i in chunks:
    tweets = api.statuses_lookup(i) #retrieve tweets
    all_tweets = all_tweets + tweets #store tweets in list