是否可以在 Python 中使用 Twitter 的 API V2 获取流推文的 date/time?
Is it possible to obtain the date/time of streamed tweets using Twitter's API V2 in Python?
我不知道这是否可能,但是有没有办法获取通过 Twitter 过滤流的每条推文的 date/time?
我使用 Twitter API V2 文档中提供的示例代码作为“过滤流”推文的基础。我对其进行了编辑,以便可以搜索关键字,并且我只能获取推文的文本,但我还想获取推文的 date/time。我好像做不到。
我的目标是能够计算每 15 分钟创建的包含我 word/s 兴趣的推文的数量,但如果没有时间推文我无法做到这一点已创建。
到目前为止,这是我的代码:
import requests
import os
import json
import config
import preprocessor as p
from csv import writer
# To set your enviornment variables in your terminal run the following line:
# export 'BEARER_TOKEN'='<your_bearer_token>'
bearer_token = config.BEARER_TOKEN
def bearer_oauth(r):
"""
Method required by bearer token authentication.
"""
r.headers["Authorization"] = f"Bearer {bearer_token}"
r.headers["User-Agent"] = "v2FilteredStreamPython"
return r
def get_rules():
response = requests.get(
"https://api.twitter.com/2/tweets/search/stream/rules", auth=bearer_oauth
)
if response.status_code != 200:
raise Exception(
"Cannot get rules (HTTP {}): {}".format(response.status_code, response.text, response.created_at)
)
print(json.dumps(response.json()))
return response.json()
def delete_all_rules(rules):
if rules is None or "data" not in rules:
return None
ids = list(map(lambda rule: rule["id"], rules["data"]))
payload = {"delete": {"ids": ids}}
response = requests.post(
"https://api.twitter.com/2/tweets/search/stream/rules",
auth=bearer_oauth,
json=payload
)
if response.status_code != 200:
raise Exception(
"Cannot delete rules (HTTP {}): {}".format(
response.status_code, response.text
)
)
print(json.dumps(response.json()))
def set_rules(delete):
# You can adjust the rules if needed
sample_rules = [
{"value": "(AVAX OR #AVAX OR AVAX/USDT OR AVAXUSDT OR AVAXUSD OR AVALANCHEAVAX OR #AVALANCHEAVAX) lang:en -giveaway -jackpot -jackpots -collectable -collectible -collection"},#-passive -prize -prizes -giveaways -tag -YouTube -dickhead -rank -ranked -rewards -link -visit -game -promotion -promote -vote -colony -retweet -Regards -discord -jizz -tits -join -airdrop -earn -retweets -contest -shib -shiba -is:retweet -is:reply -has:links"},
# {"value": "cat has:images -grumpy", "tag": "cat pictures"},
]
payload = {"add": sample_rules}
response = requests.post(
"https://api.twitter.com/2/tweets/search/stream/rules",
auth=bearer_oauth,
json=payload,
)
if response.status_code != 201:
raise Exception(
"Cannot add rules (HTTP {}): {}".format(response.status_code, response.text)
)
print(json.dumps(response.json()))
def get_stream(set):
response = requests.get(
"https://api.twitter.com/2/tweets/search/stream", auth=bearer_oauth, stream=True,
)
print(response.status_code)
if response.status_code != 200:
raise Exception(
"Cannot get stream (HTTP {}): {}".format(
response.status_code, response.text
)
)
for response_line in response.iter_lines():
if response_line:
json_response = json.loads(response_line)
# print(json.dumps(json_response, indent=4, sort_keys=True))
tweet = json_response['data']['text']
tweet = p.clean(tweet)
print(tweet)
tweetList = [tweet]
with open('avaxdata.csv', 'a+', newline='') as write_obj:
csv_writer = writer(write_obj)
csv_writer.writerow(tweetList)
def main():
rules = get_rules()
delete = delete_all_rules(rules)
set = set_rules(delete)
get_stream(set)
if __name__ == "__main__":
main()```
是的,您可以向端点添加额外的 field
参数。要获取推文的创建时间,请尝试 https://api.twitter.com/2/tweets/search/stream?tweet.fields=created_at
。
有关可选参数的完整列表,请查看 API 参考 here
非常感谢 Alan Lee 指引我正确的方向。
我在get_stream函数下包含了他提供的link:
def get_stream(set):
response = requests.get(
"https://api.twitter.com/2/tweets/search/stream?tweet.fields=created_at,text", auth=bearer_oauth, stream=True,
)
注意,我还在末尾添加了“text”参数,因为我想要推文的创建时间以及推文的实际内容。
完整的 get_stream 函数允许我获取日期和文本,并将其清理,并存储到 csv 中的两个单独的列(文本,日期)如下:
注意: 确保安装 'tweet-preprocessor' 和 NOT 'preprocessor'...它们是不同的,后一个不适用于此代码。
import preprocessor as p
from csv import writer
def get_stream(set):
response = requests.get(
"https://api.twitter.com/2/tweets/search/stream?tweet.fields=created_at,text", auth=bearer_oauth, stream=True,
)
print(response.status_code)
if response.status_code != 200:
raise Exception(
"Cannot get stream (HTTP {}): {}".format(
response.status_code, response.text
)
)
for response_line in response.iter_lines():
if response_line:
json_response = json.loads(response_line)
tweet_text = json_response['data']['text']
tweet_created_at = json_response['data']['created_at']
tweet_text = p.clean(tweet_text)
print(tweet_text)
print(tweet_created_at)
tweetList = [(tweet_text),(tweet_created_at)]
with open('avaxdata.csv', 'a+', newline='') as write_obj:
csv_writer = writer(write_obj)
csv_writer.writerow(tweetList)
我希望有人觉得这有用!我花了比正常工作更长的时间哈哈!
我不知道这是否可能,但是有没有办法获取通过 Twitter 过滤流的每条推文的 date/time?
我使用 Twitter API V2 文档中提供的示例代码作为“过滤流”推文的基础。我对其进行了编辑,以便可以搜索关键字,并且我只能获取推文的文本,但我还想获取推文的 date/time。我好像做不到。
我的目标是能够计算每 15 分钟创建的包含我 word/s 兴趣的推文的数量,但如果没有时间推文我无法做到这一点已创建。
到目前为止,这是我的代码:
import requests
import os
import json
import config
import preprocessor as p
from csv import writer
# To set your enviornment variables in your terminal run the following line:
# export 'BEARER_TOKEN'='<your_bearer_token>'
bearer_token = config.BEARER_TOKEN
def bearer_oauth(r):
"""
Method required by bearer token authentication.
"""
r.headers["Authorization"] = f"Bearer {bearer_token}"
r.headers["User-Agent"] = "v2FilteredStreamPython"
return r
def get_rules():
response = requests.get(
"https://api.twitter.com/2/tweets/search/stream/rules", auth=bearer_oauth
)
if response.status_code != 200:
raise Exception(
"Cannot get rules (HTTP {}): {}".format(response.status_code, response.text, response.created_at)
)
print(json.dumps(response.json()))
return response.json()
def delete_all_rules(rules):
if rules is None or "data" not in rules:
return None
ids = list(map(lambda rule: rule["id"], rules["data"]))
payload = {"delete": {"ids": ids}}
response = requests.post(
"https://api.twitter.com/2/tweets/search/stream/rules",
auth=bearer_oauth,
json=payload
)
if response.status_code != 200:
raise Exception(
"Cannot delete rules (HTTP {}): {}".format(
response.status_code, response.text
)
)
print(json.dumps(response.json()))
def set_rules(delete):
# You can adjust the rules if needed
sample_rules = [
{"value": "(AVAX OR #AVAX OR AVAX/USDT OR AVAXUSDT OR AVAXUSD OR AVALANCHEAVAX OR #AVALANCHEAVAX) lang:en -giveaway -jackpot -jackpots -collectable -collectible -collection"},#-passive -prize -prizes -giveaways -tag -YouTube -dickhead -rank -ranked -rewards -link -visit -game -promotion -promote -vote -colony -retweet -Regards -discord -jizz -tits -join -airdrop -earn -retweets -contest -shib -shiba -is:retweet -is:reply -has:links"},
# {"value": "cat has:images -grumpy", "tag": "cat pictures"},
]
payload = {"add": sample_rules}
response = requests.post(
"https://api.twitter.com/2/tweets/search/stream/rules",
auth=bearer_oauth,
json=payload,
)
if response.status_code != 201:
raise Exception(
"Cannot add rules (HTTP {}): {}".format(response.status_code, response.text)
)
print(json.dumps(response.json()))
def get_stream(set):
response = requests.get(
"https://api.twitter.com/2/tweets/search/stream", auth=bearer_oauth, stream=True,
)
print(response.status_code)
if response.status_code != 200:
raise Exception(
"Cannot get stream (HTTP {}): {}".format(
response.status_code, response.text
)
)
for response_line in response.iter_lines():
if response_line:
json_response = json.loads(response_line)
# print(json.dumps(json_response, indent=4, sort_keys=True))
tweet = json_response['data']['text']
tweet = p.clean(tweet)
print(tweet)
tweetList = [tweet]
with open('avaxdata.csv', 'a+', newline='') as write_obj:
csv_writer = writer(write_obj)
csv_writer.writerow(tweetList)
def main():
rules = get_rules()
delete = delete_all_rules(rules)
set = set_rules(delete)
get_stream(set)
if __name__ == "__main__":
main()```
是的,您可以向端点添加额外的 field
参数。要获取推文的创建时间,请尝试 https://api.twitter.com/2/tweets/search/stream?tweet.fields=created_at
。
有关可选参数的完整列表,请查看 API 参考 here
非常感谢 Alan Lee 指引我正确的方向。 我在get_stream函数下包含了他提供的link:
def get_stream(set):
response = requests.get(
"https://api.twitter.com/2/tweets/search/stream?tweet.fields=created_at,text", auth=bearer_oauth, stream=True,
)
注意,我还在末尾添加了“text”参数,因为我想要推文的创建时间以及推文的实际内容。
完整的 get_stream 函数允许我获取日期和文本,并将其清理,并存储到 csv 中的两个单独的列(文本,日期)如下:
注意: 确保安装 'tweet-preprocessor' 和 NOT 'preprocessor'...它们是不同的,后一个不适用于此代码。
import preprocessor as p
from csv import writer
def get_stream(set):
response = requests.get(
"https://api.twitter.com/2/tweets/search/stream?tweet.fields=created_at,text", auth=bearer_oauth, stream=True,
)
print(response.status_code)
if response.status_code != 200:
raise Exception(
"Cannot get stream (HTTP {}): {}".format(
response.status_code, response.text
)
)
for response_line in response.iter_lines():
if response_line:
json_response = json.loads(response_line)
tweet_text = json_response['data']['text']
tweet_created_at = json_response['data']['created_at']
tweet_text = p.clean(tweet_text)
print(tweet_text)
print(tweet_created_at)
tweetList = [(tweet_text),(tweet_created_at)]
with open('avaxdata.csv', 'a+', newline='') as write_obj:
csv_writer = writer(write_obj)
csv_writer.writerow(tweetList)
我希望有人觉得这有用!我花了比正常工作更长的时间哈哈!