我需要一种更快的方法来递归查找文件元数据标签,我的 VBA 脚本太慢了

I need a faster method to recursively find file metadata Tags, my VBA script is too slow

我在 VBA 中构建了一个脚本,它递归地查看文件夹以查找文件的元数据 "Tags"。做一个小文件结构需要 20 多分钟,我需要它来处理更大的文件结构。 VBA 是不是用错了工具?

它包含我要搜索的标签列表和文件目录

我尝试加快速度的事情:
-一旦在关键字列表中找到一个匹配项,它就会停止查找其他关键字
-如果没有关键字,它甚至不会费心寻找匹配项

子程序如下:

        Sub FolderSearcher(ByVal SourceFolder As String, KeywordList As Variant)
        'A recursive sub that searches metadata Tags for the Keywords and populates the output

        Dim oFSO, oSourceFolder, oSubFolder As Variant 'FSO
        Dim oShell, oDir As Variant 'Shell
        Dim KeywordListSize As Integer
        Dim DirectoryItem As Variant
        Dim vFileName, vFileKeyword As Variant
        Dim k As Integer 'Counter

        'Create FileSystemObject And Shell Application objects
        Set oFSO = CreateObject("Scripting.FileSystemObject")
        Set oSourceFolder = oFSO.GetFolder(SourceFolder)
        Set oShell = CreateObject("Shell.Application")

        KeywordListSize = UBound(KeywordList) - LBound(KeywordList)

        'Loop through all Sub folders
        For Each oSubFolder In oSourceFolder.SubFolders
            ' Look through all the files in the folder
            Set oDir = oShell.Namespace(oSubFolder.Path)
            For Each DirectoryItem In oDir.Items
                vFileName = oDir.GetDetailsOf(DirectoryItem, 0) 'Detail 0 is the file name
                If Right(vFileName, 4) = ".pdf" Then 'check if it's a PDF
                    vFileKeyword = PDFkeyword(oSubFolder.Path, vFileName) 'PDFs work a little differently
                Else
                    vFileKeyword = oDir.GetDetailsOf(DirectoryItem, 18) 'Detail 18 is the file tag
                End If

                If vFileKeyword = "" Then
                    GoTo NextDirItem
                End If

                ' Loop through all the searchlist keywords to check for a match
                For k = 0 To KeywordListSize
                    If vFileKeyword = KeywordList(k) Then
                        Call OutputSubroutine(oSubFolder.Path, vFileName, k)
                        Exit For
                    End If
                Next k

                NextDirItem:
            Next DirectoryItem

            'Recursive search through all the subfolders
            Call FolderSearcher(oSubFolder.Path, KeywordList)
        Next oSubFolder


        'Release Objects
        Set oSubFolder = Nothing
        Set oSourceFolder = Nothing
        Set oFSO = Nothing
        Set oDir = Nothing
        Set oShell = Nothing

    End Sub

输入:
KeywordList 是一个包含字符串的可变长度的一维数组。
SourceFolder 是一个包含我的文件路径的字符串,即 \files\example

我查看了 ,但没有找到有用的答案。

我希望获得有关如何加快速度的提示。或者对可能比 VBA 更有效地执行此工作的其他语言的建议。谢谢!

编辑* 我被要求添加 PDF 关键字 function

Public Function PDFkeyword(InFilePath As Variant, InFileName As Variant) As String
    'This function is used to read the metadata from a PDF
    'Inputs: the folder path and file name
    'Output: the PDF keyword

    Dim oFile As String
    Dim oApp As Object
    Dim oDoc As Object
    Dim strFileName As String
    Dim strKeywords As String

    Set oApp = CreateObject("AcroExch.App")
    Set oDoc = CreateObject("AcroExch.PDDoc")
    oFile = InFilePath & "\" & InFileName

    'Grab the keywords from the PDF file
    With oDoc
      If .Open(oFile) Then
        'strFileName = .GetFileName    'not needed rn but could be handy
        strKeywords = .GetInfo("Keywords")
        .Close
      End If
    End With

    PDFkeyword = strKeywords

    'Release Objects
    Set oDoc = Nothing
    Set oApp = Nothing

End Function

小文件结构只有大约 3 个文件夹级别和 16 个文件。大型结构可能有 5 个以上的文件夹级别和 ~1000 个文件。

这是一个轻微的重写,删除了递归,并将您的 PDF 函数更改为对 Acrobat 对象使用静态变量。

在我的机器上,它在大约 4.5 秒内处理了 3182 个文件。

Sub FolderSearcher(ByVal SourceFolder As String, KeywordList As Variant)

    Dim oFSO, oSourceFolder, oSubFolder As Variant
    Dim oShell, oDir As Variant 'Shell
    Dim KeywordListSize As Long
    Dim DirectoryItem As Variant
    Dim vFileName, vFileKeyword As Variant
    Dim k As Long 'Counter
    Dim colFolders As New Collection
    Dim fldr, subFldr, fCount As Long

    Set oFSO = CreateObject("Scripting.FileSystemObject")
    Set oSourceFolder = oFSO.GetFolder(SourceFolder)
    Set oShell = CreateObject("Shell.Application")

    KeywordListSize = UBound(KeywordList) - LBound(KeywordList)

    colFolders.Add oSourceFolder  '<<< first folder for processing

    Do While colFolders.Count > 0

        Set fldr = colFolders(1)  'get the first folder to process
        colFolders.Remove 1       '...then remove from the collection

        For Each subFldr In fldr.subfolders 'capture subfolders for processing
            colFolders.Add subFldr
        Next subFldr

        Set oDir = oShell.Namespace(fldr.Path)
        For Each DirectoryItem In oDir.Items
            fCount = fCount + 1
            vFileName = oDir.GetDetailsOf(DirectoryItem, 0)
            If LCase(Right(vFileName, 4)) = ".pdf" Then
                vFileKeyword = PDFkeyword(fldr.Path, vFileName) 'PDFs work a little differently
            Else
                vFileKeyword = oDir.GetDetailsOf(DirectoryItem, 18) 'Detail 18 is the file tag
            End If

            If Len(vFileKeyword) > 0 Then
                Debug.Print "Got keyword:" & vFileKeyword
                For k = 0 To KeywordListSize
                    If vFileKeyword = KeywordList(k) Then
                        'Call OutputSubroutine(oSubFolder.Path, vFileName, k)
                        Debug.Print fldr.Path, vFileName, k
                        Exit For
                    End If
                Next k
            End If
        Next DirectoryItem
    Loop
    Debug.Print "Processed " & fCount & " files"
End Sub

Public Function PDFkeyword(InFilePath As Variant, InFileName As Variant) As String
    Dim oFile As String
    Static oApp As Object
    Static oDoc As Object
    Dim strFileName As String, strKeywords As String
    'only create objects once...
    If oApp Is Nothing Then Set oApp = CreateObject("AcroExch.App")
    If oDoc Is Nothing Then Set oDoc = CreateObject("AcroExch.PDDoc")

    oFile = InFilePath & "\" & InFileName
    With oDoc
      If .Open(oFile) Then
        strKeywords = .GetInfo("Keywords")
        .Close
      End If
    End With
    PDFkeyword = strKeywords
End Function