如何获取 Python 目录中的更改
How can I get changes in a directory in Python
我正在尝试获取目录中的最后更改,而不仅仅是最后修改的文件。
到目前为止我有这个代码:
import pathlib
import logging
# Set logging level
logging.basicConfig(level=logging.DEBUG)
# Define some paths
source_path = pathlib.Path("U:")
logging.info(f"Source directory is {source_path}")
# Latest path
latest_path = max(source_path.glob('*'),
key=lambda path: path.stat().st_ctime)
last_path = None
while True:
try:
# Latest path
latest_path = max(source_path.glob('*'),
key=lambda path: path.stat().st_ctime)
except FileNotFoundError:
pass
if not latest_path == last_path:
logging.info(f"Last changed path is {latest_path}")
last_path = latest_path
它的作用是打印出目录中的最新更改(当前 U:
)
这是一些示例输出:
INFO:root:Source directory is U:
INFO:root:Last changed path is U:Website Downloader
INFO:root:Last changed path is U:New Text Document.txt <-- Created it
INFO:root:Last changed path is U:hi.txt <-- Renamed it
INFO:root:Last changed path is U:Website Downloader <-- Deleted hi.txt
INFO:root:Last changed path is U:New folder <-- Created it
INFO:root:Last changed path is U:hi <-- Renamed it
INFO:root:Last changed path is U:Website Downloader <-- Deleted hi (directory)
它遗漏了一些内容,例如在基本目录 (U:
) 中保存文件更改,在目录中新建、修改和删除文件。
我想让它说,比如:
INFO:root:Source directory is U:
INFO:root:File created: U:New Text Document.txt
INFO:root:File modified: U:New Text Document.txt
INFO:root:File deleted: U:New Text Document.txt
INFO:root:Directory created: U:New Folder
INFO:root:File created: U:New Folder\New Text Document.txt
INFO:root:File modified: U:New Folder\New Text Document.txt
INFO:root:File deleted: U:New Folder\New Text Document.txt
INFO:root:Directory created: U:New Folder\New Folder
INFO:root:Directory created: U:New Folder\New Folder\New Folder
INFO:root:Directory deleted: U:New Folder\New Folder\New Folder
INFO:root:Directory deleted: U:New Folder
这在 Python 中甚至可能吗?
提前致谢
您需要的是一个事件驱动的文件夹观察程序库。例如https://pypi.org/project/watchdog/
有关更多信息,Google out python folder watcher
可能与操作系统有关。
这绝对有可能,我的意思是,一切皆有可能!但是它可能有点复杂,所以我建议您使用像 watchdog
这样的库,它挂接到操作系统的文件系统事件监视 thingamabob。
尽管如此,如果您需要 使用轮询扫描器,这里有一个实现。受到关于性能的明显警告,并且它不会注意到在轮询间隔之间再次出现和消失的文件。
import time
import pathlib
import logging
logging.basicConfig(level=logging.DEBUG)
def get_paths(path):
answer = {}
for x in pathlib.Path(path).rglob("*"):
try:
answer[str(x)] = (x.stat().st_ctime, x.is_dir())
except FileNotFoundError:
pass
return answer
def log(name, is_dir, action):
descrip = "Directory" if is_dir else "File"
logging.info("{} {}: {}".format(descrip, action, name))
def scan(top_dir, sleep_time):
old_paths = get_paths(top_dir)
s_old_paths = set(old_paths)
while True:
time.sleep(sleep_time)
new_paths = get_paths(top_dir)
s_new_paths = set(new_paths)
cre_names = s_new_paths - s_old_paths
del_names = s_old_paths - s_new_paths
for name in cre_names:
_, is_dir = new_paths[name]
log(name, is_dir, "created")
for name in del_names:
_, is_dir = old_paths[name]
log(name, is_dir, "deleted")
for name in s_old_paths & s_new_paths:
new_time, is_dir = new_paths[name]
old_time, _ = old_paths[name]
if new_time != old_time:
log(name, is_dir, "modified")
old_paths = new_paths
s_old_paths = s_new_paths
top_dir = "U:"
sleep_time = 10
scan(top_dir, sleep_time)
我正在尝试获取目录中的最后更改,而不仅仅是最后修改的文件。
到目前为止我有这个代码:
import pathlib
import logging
# Set logging level
logging.basicConfig(level=logging.DEBUG)
# Define some paths
source_path = pathlib.Path("U:")
logging.info(f"Source directory is {source_path}")
# Latest path
latest_path = max(source_path.glob('*'),
key=lambda path: path.stat().st_ctime)
last_path = None
while True:
try:
# Latest path
latest_path = max(source_path.glob('*'),
key=lambda path: path.stat().st_ctime)
except FileNotFoundError:
pass
if not latest_path == last_path:
logging.info(f"Last changed path is {latest_path}")
last_path = latest_path
它的作用是打印出目录中的最新更改(当前 U:
)
这是一些示例输出:
INFO:root:Source directory is U:
INFO:root:Last changed path is U:Website Downloader
INFO:root:Last changed path is U:New Text Document.txt <-- Created it
INFO:root:Last changed path is U:hi.txt <-- Renamed it
INFO:root:Last changed path is U:Website Downloader <-- Deleted hi.txt
INFO:root:Last changed path is U:New folder <-- Created it
INFO:root:Last changed path is U:hi <-- Renamed it
INFO:root:Last changed path is U:Website Downloader <-- Deleted hi (directory)
它遗漏了一些内容,例如在基本目录 (U:
) 中保存文件更改,在目录中新建、修改和删除文件。
我想让它说,比如:
INFO:root:Source directory is U:
INFO:root:File created: U:New Text Document.txt
INFO:root:File modified: U:New Text Document.txt
INFO:root:File deleted: U:New Text Document.txt
INFO:root:Directory created: U:New Folder
INFO:root:File created: U:New Folder\New Text Document.txt
INFO:root:File modified: U:New Folder\New Text Document.txt
INFO:root:File deleted: U:New Folder\New Text Document.txt
INFO:root:Directory created: U:New Folder\New Folder
INFO:root:Directory created: U:New Folder\New Folder\New Folder
INFO:root:Directory deleted: U:New Folder\New Folder\New Folder
INFO:root:Directory deleted: U:New Folder
这在 Python 中甚至可能吗?
提前致谢
您需要的是一个事件驱动的文件夹观察程序库。例如https://pypi.org/project/watchdog/
有关更多信息,Google out python folder watcher
可能与操作系统有关。
这绝对有可能,我的意思是,一切皆有可能!但是它可能有点复杂,所以我建议您使用像 watchdog
这样的库,它挂接到操作系统的文件系统事件监视 thingamabob。
尽管如此,如果您需要 使用轮询扫描器,这里有一个实现。受到关于性能的明显警告,并且它不会注意到在轮询间隔之间再次出现和消失的文件。
import time
import pathlib
import logging
logging.basicConfig(level=logging.DEBUG)
def get_paths(path):
answer = {}
for x in pathlib.Path(path).rglob("*"):
try:
answer[str(x)] = (x.stat().st_ctime, x.is_dir())
except FileNotFoundError:
pass
return answer
def log(name, is_dir, action):
descrip = "Directory" if is_dir else "File"
logging.info("{} {}: {}".format(descrip, action, name))
def scan(top_dir, sleep_time):
old_paths = get_paths(top_dir)
s_old_paths = set(old_paths)
while True:
time.sleep(sleep_time)
new_paths = get_paths(top_dir)
s_new_paths = set(new_paths)
cre_names = s_new_paths - s_old_paths
del_names = s_old_paths - s_new_paths
for name in cre_names:
_, is_dir = new_paths[name]
log(name, is_dir, "created")
for name in del_names:
_, is_dir = old_paths[name]
log(name, is_dir, "deleted")
for name in s_old_paths & s_new_paths:
new_time, is_dir = new_paths[name]
old_time, _ = old_paths[name]
if new_time != old_time:
log(name, is_dir, "modified")
old_paths = new_paths
s_old_paths = s_new_paths
top_dir = "U:"
sleep_time = 10
scan(top_dir, sleep_time)