从 url 获取媒体链接,而不是 api.user_timeline / Tweepy-Python
Get media links from urls, not with api.user_timeline / Tweepy-Python
Tweepy 不支持书签。我已将我的书签链接保存在一个文本文件中(大约 400 个链接)。我想获取这些推文的媒体链接。我可以从时间线中提取推文,但在从时间线中提取推文时,tweepy 会保存额外的信息。我怎样才能对手动添加的推文做同样的事情?比如我如何将链接导入为 https://twitter.com/xyz123/status/12344567902367 并使 tweepy 与它们一起工作?
我的代码现在可以工作,但它需要来自文件的链接。我通过 chrome 扩展名获得了这些链接,但我仍然想知道是否有更好的方法。
import API_Tokens as t
from tweepy import OAuthHandler, API
import os
import wget
def main():
file1 = open("bookmarks.txt", "r")
try:
os.mkdir('bookmarks')
os.chdir('bookmarks')
except:
os.chdir('bookmarks')
api = authenticate()
getTweets(api, file1)
def getTweets(api, file1):
count = 0
while True:
count += 1
full_twt = file1.readline()
id = full_twt[full_twt.find("status") + 7:] # id of a tweet starts from "status/"
tweet = api.get_status(id) # fetch the tweet
media = tweet.entities.get('media', []) # get the media info
if len(media) > 0:
wget.download((media[0]['media_url'])) # download the image if media exists
file1.close()
return all_tweets
def authenticate(): # developer access
auth = OAuthHandler(t.CONSUMER_KEY, t.CONSUMER_SECRET)
auth.set_access_token(t.ACCESS_TOKEN, t.ACCESS_TOKEN_SECRET)
api = API(auth)
return api
if __name__ == '__main__':
main()
您现在为每条推文传递一个 api 调用。您可以使用 statuses_lookup 减少调用次数,每个请求最多包含 100 个推文 ID。
with open("bookmarks.txt", "r") as f:
l = f.readlines() #create list of tweet urls
l = [int(i.split('/')[-1]) for i in l] #extract tweet ids
n = 100
chunks = [l[i:i + n] for i in range(0, len(l), n)] #split list in chunks
all_tweets = []
for i in chunks:
tweets = api.statuses_lookup(i) #retrieve tweets
all_tweets = all_tweets + tweets #store tweets in list
Tweepy 不支持书签。我已将我的书签链接保存在一个文本文件中(大约 400 个链接)。我想获取这些推文的媒体链接。我可以从时间线中提取推文,但在从时间线中提取推文时,tweepy 会保存额外的信息。我怎样才能对手动添加的推文做同样的事情?比如我如何将链接导入为 https://twitter.com/xyz123/status/12344567902367 并使 tweepy 与它们一起工作?
我的代码现在可以工作,但它需要来自文件的链接。我通过 chrome 扩展名获得了这些链接,但我仍然想知道是否有更好的方法。
import API_Tokens as t
from tweepy import OAuthHandler, API
import os
import wget
def main():
file1 = open("bookmarks.txt", "r")
try:
os.mkdir('bookmarks')
os.chdir('bookmarks')
except:
os.chdir('bookmarks')
api = authenticate()
getTweets(api, file1)
def getTweets(api, file1):
count = 0
while True:
count += 1
full_twt = file1.readline()
id = full_twt[full_twt.find("status") + 7:] # id of a tweet starts from "status/"
tweet = api.get_status(id) # fetch the tweet
media = tweet.entities.get('media', []) # get the media info
if len(media) > 0:
wget.download((media[0]['media_url'])) # download the image if media exists
file1.close()
return all_tweets
def authenticate(): # developer access
auth = OAuthHandler(t.CONSUMER_KEY, t.CONSUMER_SECRET)
auth.set_access_token(t.ACCESS_TOKEN, t.ACCESS_TOKEN_SECRET)
api = API(auth)
return api
if __name__ == '__main__':
main()
您现在为每条推文传递一个 api 调用。您可以使用 statuses_lookup 减少调用次数,每个请求最多包含 100 个推文 ID。
with open("bookmarks.txt", "r") as f:
l = f.readlines() #create list of tweet urls
l = [int(i.split('/')[-1]) for i in l] #extract tweet ids
n = 100
chunks = [l[i:i + n] for i in range(0, len(l), n)] #split list in chunks
all_tweets = []
for i in chunks:
tweets = api.statuses_lookup(i) #retrieve tweets
all_tweets = all_tweets + tweets #store tweets in list