Python inotify - 在创建新文件时执行函数

Python inotify - Execute function upon new file creation

在 python 脚本中,我正在监视一个目录以查找来自扫描仪的新文件。目前我的代码只对 IN_CLOSE_WRITE 事件做出反应。我知道正确的方法是注意 IN_CREATE 事件,然后是 IN_CLOSE_WRITE 事件。

我当前的代码如下所示:

import os
import sys
import logging
import inotify.adapters
import ocrmypdf

def DoOCR(filePath, fileName):
    print("Processing {}".format(fileName))
    try:
       fullPath = os.path.join(filePath, fileName)
       ocrmypdf.ocr(fullPath, fullPath, deskew=True, clean=True, language="deu")
    except ocrmypdf.exceptions.PriorOcrFoundError as err:
       print("Already processed: {0}".format(err))
    except:
       print("Unexpected error:", sys.exc_info()[0])
       raise

if __name__ == '__main__':
    # Setup logging.
    logging.basicConfig(level=logging.DEBUG)
    #logging.basicConfig(level=logging.INFO)

    i = inotify.adapters.Inotify()

    pathToWatch = '/srv/smb/scanneddocs'
    if not os.path.exists(pathToWatch):
        os.mkdir(pathToWatch)

    watchMask = inotify.constants.IN_CREATE | inotify.constants.IN_CLOSE_WRITE

    i.add_watch(pathToWatch, watchMask)
    
    for event in i.event_gen(yield_nones=False):
        (_, type_names, path, filename) = event

        #TODO: Check that a IN_CREATE is followed by a IN_CLOSE_WRITE
        if "IN_CLOSE_WRITE" in type_names:
            print("PATH=[{}] FILENAME=[{}] EVENT_TYPES={}".format(path, filename, type_names))
            name, extension = os.path.splitext(filename)
            print(extension)
            if extension.lower() == ".pdf":
                DoOCR(pathToWatch, filename)
            else:
                print("{} is no PDF file. Skipping...".format(filename))

确保两个事件依次发生的最佳方法是什么?

将创建的文件添加到 set,然后在收到 IN_CLOSE_WRITE 事件时检查集合。

    created_files = set()
    for event in i.event_gen(yield_nones=False):
        (_, type_names, path, filename) = event

        if "IN_CREATE" in type_names:
            created_files.add(filename)
        if "IN_CLOSE_WRITE" in type_names:
            if filename not in created_files:
                continue
            created_files.remove(filename) # remove unneeded item
            print("PATH=[{}] FILENAME=[{}] EVENT_TYPES={}".format(path, filename, type_names))
            name, extension = os.path.splitext(filename)
            print(extension)
            if extension.lower() == ".pdf":
                DoOCR(pathToWatch, filename)
            else:
                print("{} is no PDF file. Skipping...".format(filename))