我需要一种更快的方法来递归查找文件元数据标签,我的 VBA 脚本太慢了
I need a faster method to recursively find file metadata Tags, my VBA script is too slow
我在 VBA 中构建了一个脚本,它递归地查看文件夹以查找文件的元数据 "Tags"。做一个小文件结构需要 20 多分钟,我需要它来处理更大的文件结构。
VBA 是不是用错了工具?
它包含我要搜索的标签列表和文件目录
我尝试加快速度的事情:
-一旦在关键字列表中找到一个匹配项,它就会停止查找其他关键字
-如果没有关键字,它甚至不会费心寻找匹配项
子程序如下:
Sub FolderSearcher(ByVal SourceFolder As String, KeywordList As Variant)
'A recursive sub that searches metadata Tags for the Keywords and populates the output
Dim oFSO, oSourceFolder, oSubFolder As Variant 'FSO
Dim oShell, oDir As Variant 'Shell
Dim KeywordListSize As Integer
Dim DirectoryItem As Variant
Dim vFileName, vFileKeyword As Variant
Dim k As Integer 'Counter
'Create FileSystemObject And Shell Application objects
Set oFSO = CreateObject("Scripting.FileSystemObject")
Set oSourceFolder = oFSO.GetFolder(SourceFolder)
Set oShell = CreateObject("Shell.Application")
KeywordListSize = UBound(KeywordList) - LBound(KeywordList)
'Loop through all Sub folders
For Each oSubFolder In oSourceFolder.SubFolders
' Look through all the files in the folder
Set oDir = oShell.Namespace(oSubFolder.Path)
For Each DirectoryItem In oDir.Items
vFileName = oDir.GetDetailsOf(DirectoryItem, 0) 'Detail 0 is the file name
If Right(vFileName, 4) = ".pdf" Then 'check if it's a PDF
vFileKeyword = PDFkeyword(oSubFolder.Path, vFileName) 'PDFs work a little differently
Else
vFileKeyword = oDir.GetDetailsOf(DirectoryItem, 18) 'Detail 18 is the file tag
End If
If vFileKeyword = "" Then
GoTo NextDirItem
End If
' Loop through all the searchlist keywords to check for a match
For k = 0 To KeywordListSize
If vFileKeyword = KeywordList(k) Then
Call OutputSubroutine(oSubFolder.Path, vFileName, k)
Exit For
End If
Next k
NextDirItem:
Next DirectoryItem
'Recursive search through all the subfolders
Call FolderSearcher(oSubFolder.Path, KeywordList)
Next oSubFolder
'Release Objects
Set oSubFolder = Nothing
Set oSourceFolder = Nothing
Set oFSO = Nothing
Set oDir = Nothing
Set oShell = Nothing
End Sub
输入:
KeywordList 是一个包含字符串的可变长度的一维数组。
SourceFolder 是一个包含我的文件路径的字符串,即 \files\example
我查看了 ,但没有找到有用的答案。
我希望获得有关如何加快速度的提示。或者对可能比 VBA 更有效地执行此工作的其他语言的建议。谢谢!
编辑*
我被要求添加 PDF 关键字 function
Public Function PDFkeyword(InFilePath As Variant, InFileName As Variant) As String
'This function is used to read the metadata from a PDF
'Inputs: the folder path and file name
'Output: the PDF keyword
Dim oFile As String
Dim oApp As Object
Dim oDoc As Object
Dim strFileName As String
Dim strKeywords As String
Set oApp = CreateObject("AcroExch.App")
Set oDoc = CreateObject("AcroExch.PDDoc")
oFile = InFilePath & "\" & InFileName
'Grab the keywords from the PDF file
With oDoc
If .Open(oFile) Then
'strFileName = .GetFileName 'not needed rn but could be handy
strKeywords = .GetInfo("Keywords")
.Close
End If
End With
PDFkeyword = strKeywords
'Release Objects
Set oDoc = Nothing
Set oApp = Nothing
End Function
小文件结构只有大约 3 个文件夹级别和 16 个文件。大型结构可能有 5 个以上的文件夹级别和 ~1000 个文件。
这是一个轻微的重写,删除了递归,并将您的 PDF 函数更改为对 Acrobat 对象使用静态变量。
在我的机器上,它在大约 4.5 秒内处理了 3182 个文件。
Sub FolderSearcher(ByVal SourceFolder As String, KeywordList As Variant)
Dim oFSO, oSourceFolder, oSubFolder As Variant
Dim oShell, oDir As Variant 'Shell
Dim KeywordListSize As Long
Dim DirectoryItem As Variant
Dim vFileName, vFileKeyword As Variant
Dim k As Long 'Counter
Dim colFolders As New Collection
Dim fldr, subFldr, fCount As Long
Set oFSO = CreateObject("Scripting.FileSystemObject")
Set oSourceFolder = oFSO.GetFolder(SourceFolder)
Set oShell = CreateObject("Shell.Application")
KeywordListSize = UBound(KeywordList) - LBound(KeywordList)
colFolders.Add oSourceFolder '<<< first folder for processing
Do While colFolders.Count > 0
Set fldr = colFolders(1) 'get the first folder to process
colFolders.Remove 1 '...then remove from the collection
For Each subFldr In fldr.subfolders 'capture subfolders for processing
colFolders.Add subFldr
Next subFldr
Set oDir = oShell.Namespace(fldr.Path)
For Each DirectoryItem In oDir.Items
fCount = fCount + 1
vFileName = oDir.GetDetailsOf(DirectoryItem, 0)
If LCase(Right(vFileName, 4)) = ".pdf" Then
vFileKeyword = PDFkeyword(fldr.Path, vFileName) 'PDFs work a little differently
Else
vFileKeyword = oDir.GetDetailsOf(DirectoryItem, 18) 'Detail 18 is the file tag
End If
If Len(vFileKeyword) > 0 Then
Debug.Print "Got keyword:" & vFileKeyword
For k = 0 To KeywordListSize
If vFileKeyword = KeywordList(k) Then
'Call OutputSubroutine(oSubFolder.Path, vFileName, k)
Debug.Print fldr.Path, vFileName, k
Exit For
End If
Next k
End If
Next DirectoryItem
Loop
Debug.Print "Processed " & fCount & " files"
End Sub
Public Function PDFkeyword(InFilePath As Variant, InFileName As Variant) As String
Dim oFile As String
Static oApp As Object
Static oDoc As Object
Dim strFileName As String, strKeywords As String
'only create objects once...
If oApp Is Nothing Then Set oApp = CreateObject("AcroExch.App")
If oDoc Is Nothing Then Set oDoc = CreateObject("AcroExch.PDDoc")
oFile = InFilePath & "\" & InFileName
With oDoc
If .Open(oFile) Then
strKeywords = .GetInfo("Keywords")
.Close
End If
End With
PDFkeyword = strKeywords
End Function
我在 VBA 中构建了一个脚本,它递归地查看文件夹以查找文件的元数据 "Tags"。做一个小文件结构需要 20 多分钟,我需要它来处理更大的文件结构。 VBA 是不是用错了工具?
它包含我要搜索的标签列表和文件目录
我尝试加快速度的事情:
-一旦在关键字列表中找到一个匹配项,它就会停止查找其他关键字
-如果没有关键字,它甚至不会费心寻找匹配项
子程序如下:
Sub FolderSearcher(ByVal SourceFolder As String, KeywordList As Variant)
'A recursive sub that searches metadata Tags for the Keywords and populates the output
Dim oFSO, oSourceFolder, oSubFolder As Variant 'FSO
Dim oShell, oDir As Variant 'Shell
Dim KeywordListSize As Integer
Dim DirectoryItem As Variant
Dim vFileName, vFileKeyword As Variant
Dim k As Integer 'Counter
'Create FileSystemObject And Shell Application objects
Set oFSO = CreateObject("Scripting.FileSystemObject")
Set oSourceFolder = oFSO.GetFolder(SourceFolder)
Set oShell = CreateObject("Shell.Application")
KeywordListSize = UBound(KeywordList) - LBound(KeywordList)
'Loop through all Sub folders
For Each oSubFolder In oSourceFolder.SubFolders
' Look through all the files in the folder
Set oDir = oShell.Namespace(oSubFolder.Path)
For Each DirectoryItem In oDir.Items
vFileName = oDir.GetDetailsOf(DirectoryItem, 0) 'Detail 0 is the file name
If Right(vFileName, 4) = ".pdf" Then 'check if it's a PDF
vFileKeyword = PDFkeyword(oSubFolder.Path, vFileName) 'PDFs work a little differently
Else
vFileKeyword = oDir.GetDetailsOf(DirectoryItem, 18) 'Detail 18 is the file tag
End If
If vFileKeyword = "" Then
GoTo NextDirItem
End If
' Loop through all the searchlist keywords to check for a match
For k = 0 To KeywordListSize
If vFileKeyword = KeywordList(k) Then
Call OutputSubroutine(oSubFolder.Path, vFileName, k)
Exit For
End If
Next k
NextDirItem:
Next DirectoryItem
'Recursive search through all the subfolders
Call FolderSearcher(oSubFolder.Path, KeywordList)
Next oSubFolder
'Release Objects
Set oSubFolder = Nothing
Set oSourceFolder = Nothing
Set oFSO = Nothing
Set oDir = Nothing
Set oShell = Nothing
End Sub
输入:
KeywordList 是一个包含字符串的可变长度的一维数组。
SourceFolder 是一个包含我的文件路径的字符串,即 \files\example
我查看了
我希望获得有关如何加快速度的提示。或者对可能比 VBA 更有效地执行此工作的其他语言的建议。谢谢!
编辑* 我被要求添加 PDF 关键字 function
Public Function PDFkeyword(InFilePath As Variant, InFileName As Variant) As String
'This function is used to read the metadata from a PDF
'Inputs: the folder path and file name
'Output: the PDF keyword
Dim oFile As String
Dim oApp As Object
Dim oDoc As Object
Dim strFileName As String
Dim strKeywords As String
Set oApp = CreateObject("AcroExch.App")
Set oDoc = CreateObject("AcroExch.PDDoc")
oFile = InFilePath & "\" & InFileName
'Grab the keywords from the PDF file
With oDoc
If .Open(oFile) Then
'strFileName = .GetFileName 'not needed rn but could be handy
strKeywords = .GetInfo("Keywords")
.Close
End If
End With
PDFkeyword = strKeywords
'Release Objects
Set oDoc = Nothing
Set oApp = Nothing
End Function
小文件结构只有大约 3 个文件夹级别和 16 个文件。大型结构可能有 5 个以上的文件夹级别和 ~1000 个文件。
这是一个轻微的重写,删除了递归,并将您的 PDF 函数更改为对 Acrobat 对象使用静态变量。
在我的机器上,它在大约 4.5 秒内处理了 3182 个文件。
Sub FolderSearcher(ByVal SourceFolder As String, KeywordList As Variant)
Dim oFSO, oSourceFolder, oSubFolder As Variant
Dim oShell, oDir As Variant 'Shell
Dim KeywordListSize As Long
Dim DirectoryItem As Variant
Dim vFileName, vFileKeyword As Variant
Dim k As Long 'Counter
Dim colFolders As New Collection
Dim fldr, subFldr, fCount As Long
Set oFSO = CreateObject("Scripting.FileSystemObject")
Set oSourceFolder = oFSO.GetFolder(SourceFolder)
Set oShell = CreateObject("Shell.Application")
KeywordListSize = UBound(KeywordList) - LBound(KeywordList)
colFolders.Add oSourceFolder '<<< first folder for processing
Do While colFolders.Count > 0
Set fldr = colFolders(1) 'get the first folder to process
colFolders.Remove 1 '...then remove from the collection
For Each subFldr In fldr.subfolders 'capture subfolders for processing
colFolders.Add subFldr
Next subFldr
Set oDir = oShell.Namespace(fldr.Path)
For Each DirectoryItem In oDir.Items
fCount = fCount + 1
vFileName = oDir.GetDetailsOf(DirectoryItem, 0)
If LCase(Right(vFileName, 4)) = ".pdf" Then
vFileKeyword = PDFkeyword(fldr.Path, vFileName) 'PDFs work a little differently
Else
vFileKeyword = oDir.GetDetailsOf(DirectoryItem, 18) 'Detail 18 is the file tag
End If
If Len(vFileKeyword) > 0 Then
Debug.Print "Got keyword:" & vFileKeyword
For k = 0 To KeywordListSize
If vFileKeyword = KeywordList(k) Then
'Call OutputSubroutine(oSubFolder.Path, vFileName, k)
Debug.Print fldr.Path, vFileName, k
Exit For
End If
Next k
End If
Next DirectoryItem
Loop
Debug.Print "Processed " & fCount & " files"
End Sub
Public Function PDFkeyword(InFilePath As Variant, InFileName As Variant) As String
Dim oFile As String
Static oApp As Object
Static oDoc As Object
Dim strFileName As String, strKeywords As String
'only create objects once...
If oApp Is Nothing Then Set oApp = CreateObject("AcroExch.App")
If oDoc Is Nothing Then Set oDoc = CreateObject("AcroExch.PDDoc")
oFile = InFilePath & "\" & InFileName
With oDoc
If .Open(oFile) Then
strKeywords = .GetInfo("Keywords")
.Close
End If
End With
PDFkeyword = strKeywords
End Function