每天终止并重启 Python 脚本
Kill and restart Python script every day
我有一个 Python 代码可以通过流 API 提取 Twitter 数据。我想每天使用单独的文件,所以我想让脚本 运行 运行 24 小时,然后终止它并重新启动它,因为随着程序的重新启动,文件的名称将会改变。
如何确保脚本在 00:00 处停止并立即重新启动?
代码可以在下面找到。如果您对我如何每天创建一个新的文本文件有任何其他想法,那就更好了。
import tweepy
import datetime
key_words = ["xx"]
twitter_data_title = "".join([xx, "_", date_today, ".txt"])
class TwitterStreamer():
def __init__(self):
pass
def stream_tweets(self, twitter_data_title, key_words):
listener = StreamListener(twitter_data_title)
auth = tweepy.OAuthHandler(api_key, api_secret_key)
auth.set_access_token(access_token, access_secret_token)
stream = tweepy.Stream(auth, listener)
stream.filter(track=key_words)
class StreamListener(tweepy.StreamListener):
def __init__(self, twitter_data_title):
self.fetched_tweets_filename = twitter_data_title
def on_data(self, data):
try:
print(data)
with open(self.fetched_tweets_filename, 'a') as tf:
tf.write(data)
return True
except BaseException as e:
print("Error on_data %s" % str(e))
return True
def on_exception(self, exception):
print('exception', exception)
stream_tweets(twitter_data_title, key_words)
def on_error(self, status):
print(status)
def stream_tweets(twitter_data_title, key_words):
listener = StreamListener(twitter_data_title)
auth = tweepy.OAuthHandler(api_key, api_secret_key)
auth.set_access_token(access_token, access_secret_token)
stream = tweepy.Stream(auth, listener)
stream.filter(track=key_words)
if __name__ == '__main__':
twitter_streamer = TwitterStreamer()
twitter_streamer.stream_tweets(twitter_data_title, key_words)
我会将此添加到您的代码中:
from threading import Timer
def stopTheScript():
exec(open("anotherscript.py").read())
exit()
Timer(86400, stopTheScript).start() #86400 s = 24 h
您示例中的 'blocking' 代码似乎来自另一个库,因此您没有机会(轻松地)更改内部循环以检查条件并退出。
使用后台进程(不理想)
您可以更改入口点以在后台进程中启动代码,并检查文件的标题是否应该更改:
from multiprocessing import Process
from time import sleep
...
if __name__ == "__main__":
twitter_streamer = TwitterStreamer()
twitter_data_title, process = None, None
while True:
new_data_title = "".join([xx, "_", str(datetime.date.today()), ".txt"])
if new_data_title == twitter_data_title: # Nothing to do.
sleep(60) # Sleep for a minute
continue # And check again
# Set the new title.
twitter_data_title = new_data_title
# If the process is already running, terminate and join it.
if process is not None:
process.terminate()
process.join()
process = Process(target=twitter_streamer.stream_tweets, args=[twitter_data_title, key_words])
process.start()
改变StreamListener
更好的选择可能是将日期知识编码为 StreamListener
。不要传递文件名 (twitter_data_title
),而是传递文件前缀(xx
来自您的示例),并在 属性:
中构建文件名
...
class StreamListener(tweepy.StreamListener):
def __init__(self, file_prefix):
self.prefix = file_prefix
@property
def fetched_tweets_filename(self):
"""The file name for the tweets."""
date = datetime.date.today()
return f"{self.prefix}_{date}.txt"
...
...
if __name__ == "__main__":
twitter_streamer = TwitterStreamer()
twitter_streamer.stream_tweets(xx, key_words)
由于 StreamListener.on_data
从 self.fetched_tweets_filename
获取文件名,这应该意味着推文会在日期更改时写入新文件。
我有一个 Python 代码可以通过流 API 提取 Twitter 数据。我想每天使用单独的文件,所以我想让脚本 运行 运行 24 小时,然后终止它并重新启动它,因为随着程序的重新启动,文件的名称将会改变。
如何确保脚本在 00:00 处停止并立即重新启动? 代码可以在下面找到。如果您对我如何每天创建一个新的文本文件有任何其他想法,那就更好了。
import tweepy
import datetime
key_words = ["xx"]
twitter_data_title = "".join([xx, "_", date_today, ".txt"])
class TwitterStreamer():
def __init__(self):
pass
def stream_tweets(self, twitter_data_title, key_words):
listener = StreamListener(twitter_data_title)
auth = tweepy.OAuthHandler(api_key, api_secret_key)
auth.set_access_token(access_token, access_secret_token)
stream = tweepy.Stream(auth, listener)
stream.filter(track=key_words)
class StreamListener(tweepy.StreamListener):
def __init__(self, twitter_data_title):
self.fetched_tweets_filename = twitter_data_title
def on_data(self, data):
try:
print(data)
with open(self.fetched_tweets_filename, 'a') as tf:
tf.write(data)
return True
except BaseException as e:
print("Error on_data %s" % str(e))
return True
def on_exception(self, exception):
print('exception', exception)
stream_tweets(twitter_data_title, key_words)
def on_error(self, status):
print(status)
def stream_tweets(twitter_data_title, key_words):
listener = StreamListener(twitter_data_title)
auth = tweepy.OAuthHandler(api_key, api_secret_key)
auth.set_access_token(access_token, access_secret_token)
stream = tweepy.Stream(auth, listener)
stream.filter(track=key_words)
if __name__ == '__main__':
twitter_streamer = TwitterStreamer()
twitter_streamer.stream_tweets(twitter_data_title, key_words)
我会将此添加到您的代码中:
from threading import Timer
def stopTheScript():
exec(open("anotherscript.py").read())
exit()
Timer(86400, stopTheScript).start() #86400 s = 24 h
您示例中的 'blocking' 代码似乎来自另一个库,因此您没有机会(轻松地)更改内部循环以检查条件并退出。
使用后台进程(不理想)
您可以更改入口点以在后台进程中启动代码,并检查文件的标题是否应该更改:
from multiprocessing import Process
from time import sleep
...
if __name__ == "__main__":
twitter_streamer = TwitterStreamer()
twitter_data_title, process = None, None
while True:
new_data_title = "".join([xx, "_", str(datetime.date.today()), ".txt"])
if new_data_title == twitter_data_title: # Nothing to do.
sleep(60) # Sleep for a minute
continue # And check again
# Set the new title.
twitter_data_title = new_data_title
# If the process is already running, terminate and join it.
if process is not None:
process.terminate()
process.join()
process = Process(target=twitter_streamer.stream_tweets, args=[twitter_data_title, key_words])
process.start()
改变StreamListener
更好的选择可能是将日期知识编码为 StreamListener
。不要传递文件名 (twitter_data_title
),而是传递文件前缀(xx
来自您的示例),并在 属性:
...
class StreamListener(tweepy.StreamListener):
def __init__(self, file_prefix):
self.prefix = file_prefix
@property
def fetched_tweets_filename(self):
"""The file name for the tweets."""
date = datetime.date.today()
return f"{self.prefix}_{date}.txt"
...
...
if __name__ == "__main__":
twitter_streamer = TwitterStreamer()
twitter_streamer.stream_tweets(xx, key_words)
由于 StreamListener.on_data
从 self.fetched_tweets_filename
获取文件名,这应该意味着推文会在日期更改时写入新文件。