Word VBA 搜索和替换 运行 非常慢,消耗大量资源
Word VBA Search and Replace Running very Slow, Consuming to many resources
我有一个由建模程序创建的 800 多页文档。本文档主要由一系列编号的句子(而非编号列表)组成。可以有 10,000 个编号行,这些行将被转换为 5000-7000 个不同长度的编号列表。
Word 脚本的 VBA(见下文)搜索第一个编号段落和未编号段落,然后查找未编号段落之前的最后一个编号段落,然后将范围转换为编号列表。重复此序列直到文档结束。
程序按预期运行。问题是,它消耗了 CPU 的 65-95%,大部分物理内存,需要 5-15 个小时才能 运行。
任何关于将性能提高至少一个数量级(至少一半或四分之一的时间)的想法都将不胜感激。
处理前的示例文档:
处理后的示例文档:
VBA 程序:
' Cleanup Numbered Lists
Sub UpdateNumbering()
Dim rng0, rng1, rng2 As Range
Dim sRegEx()
Dim index As Long
Dim StoryEnd As Long
Dim EscCnt As Long
Dim TotPCnt As Long
' Note name of Method being called
UpdateStatusBar ("UpdateNumbering")
CalledFrom = LastSubroutineVisited
LastSubroutineVisited = "UpdateNumbering"
' Cleanup [Space}[Tab} variances, Convert to [Tab] Only
Set rng0 = Selection.Range
Set rng1 = rng0
rng0.WholeStory
rng1.WholeStory
StoryEnd = rng1.End
With rng1.Find
.ClearFormatting
.Replacement.ClearFormatting
.Text = "\. {1,}^9"
.Replacement.Text = ".^t"
.Forward = True
.Wrap = wdFindStop
.MatchWildcards = True
.MatchCase = False
.MatchWholeWord = False
.MatchAllWordForms = False
.MatchSoundsLike = False
.Execute Replace:=wdReplaceAll
End With
' Cleanup #.[Space] to #.[Tab]
With rng1.Find
.ClearFormatting
.Replacement.ClearFormatting
.Text = "(^13[0-9]{1,}\.) {1,}"
.Replacement.Text = "^t"
.Forward = True
.Wrap = wdFindStop
.MatchWildcards = True
.MatchCase = False
.MatchWholeWord = False
.MatchAllWordForms = False
.MatchSoundsLike = False
.Execute Replace:=wdReplaceAll
End With
' Define number formats to be cleaned up
sRegEx = Array("(^13[0-9]{1,2}\.^9)", _
"(^13^9[0-9]{1,2}\.^9)")
' Loop through each RegEx
For index = 0 To 1
' Status Update
UpdateStatusBar ("UpdateNumbering: Pass #" & CStr(index + 1))
LastSubroutineVisited = "UpdateNumbering: Pass #" & CStr(index + 1)
' Find Begin of Doc
' Selection.HomeKey Unit:=wdStory, Extend:=wdExtend
' Selection.Collapse Direction:=wdCollapseStart
EscCnt = 0
TotPCnt = ActiveDocument.Paragraphs.Count
Do
' Find First Line of Next Numbered List
With rng0.Find
.ClearFormatting
.Text = sRegEx(index)
.Forward = True
.Wrap = wdFindStop
.MatchWildcards = True
.MatchCase = False
.MatchWholeWord = False
.MatchAllWordForms = False
.MatchSoundsLike = False
.Execute
If .Found = False Then Exit Do
End With
' Status Update
UpdateStatusBar ("UpdateNumbering: Pass #" & CStr(index + 1) & " - " & _
Format(ActiveDocument.Range(0, Selection.Paragraphs(1).Range.End).Paragraphs.Count / _
TotPCnt, "Percent") & " Percent Complete")
' Mark beginning of List
rng0.Select
Selection.Collapse Direction:=wdCollapseStart
Selection.MoveDown Unit:=wdParagraph
Set rng1 = Selection.Range
rng1.SetRange Start:=rng1.Start + 1, End:=rng1.End
' Find and Mark last entry in Numbered List
Do
Selection.MoveDown Unit:=wdParagraph
Selection.Expand Unit:=wdParagraph
Set rng2 = Selection.Range
rng2.SetRange Start:=rng2.Start - 1, End:=rng2.End
' Level 1 Numbering
With rng2.Find
.ClearFormatting
.Text = sRegEx(index)
.Forward = True
.Wrap = wdFindStop
.MatchWildcards = True
.MatchCase = False
.MatchWholeWord = False
.MatchAllWordForms = False
.MatchSoundsLike = False
.Execute
If .Found = False Then Exit Do
End With
Selection.Collapse Direction:=wdCollapseStart
Loop
rng1.SetRange Start:=rng1.Start - 1, End:=rng2.Start
' Remove Numbering
With rng1.Find
.ClearFormatting
.Replacement.ClearFormatting
.Text = "[0-9]{1,2}\.^t{1,}" 'allow for 0 or more tabs
.Replacement.Text = ""
.Forward = True
.Wrap = wdFindStop
.MatchWildcards = True
.MatchCase = False
.MatchWholeWord = False
.MatchAllWordForms = False
.MatchSoundsLike = False
.Execute Replace:=wdReplaceAll
End With
rng1.SetRange Start:=rng1.Start + 1, End:=rng1.End
' Update Numbered List
rng1.ListFormat.ApplyListTemplate _
ListTemplate:=ListGalleries(wdNumberGallery).ListTemplates(1)
rng1.End = rng1.End + 1
rng0.SetRange Start:=rng1.End, End:=StoryEnd
EscCnt = EscCnt + 1 ' Debug
' If EscCnt > 50 Then Exit Do ' Debug
Loop
Next
LastSubroutineVisited = CalledFrom
Err_Handler:
If (Err.Number <> 0) Then
Call Handle_Error
Err.Clear
End If
End Sub
' Update the Status Bar
Private Sub UpdateStatusBar(status As String)
ActiveDocument.Application.StatusBar = status
End Sub
' Inform user and break into debug mode.
Private Sub Handle_Error()
Dim msgbox_Reply As Integer
msgbox_Reply = MsgBox("An unexpected error has occured:" & vbCrLf & vbCrLf _
& "Subroutine: " & LastSubroutineVisited & vbCrLf & vbCrLf _
& "Error Number: " & Err.Number & vbCrLf _
& "Error Description: " & Err.Description & vbCrLf & vbCrLf _
& "VBA will now enter debug mode.", vbCritical + vbOKOnly, "Error")
' Turn on screen updating.
ActiveDocument.Application.ScreenUpdating = True
' Application.WindowState = wdWindowStateMaximize
' Break into debug mode.
Stop
End Sub
感谢@TechnoDabbler,这是代码的最终版本:
' Cleanup Numbered Lists
Sub UpdateNumbering()
Dim rng0, rng1 As Range
Dim oRegEx As New RegExp
Dim oPar As Paragraph
Dim bNewList As Boolean
Dim sRegEx As String
Dim sTemps As MatchCollection
Dim index1, index2 As Long
Dim TotPCnt As Long
Dim tStart As Variant
tStart = Now()
' Note name of Method being called
UpdateStatusBar ("UpdateNumbering")
CalledFrom = LastSubroutineVisited
LastSubroutineVisited = "UpdateNumbering"
' Cleanup [Space}[Tab} variances, Convert to [Tab] Only
Set rng0 = Selection.Range
Set rng1 = rng0
With rng1.Find
.ClearFormatting
.Replacement.ClearFormatting
.Text = "\. {1,}^9"
.Replacement.Text = ".^t"
.Forward = True
.Wrap = wdFindStop
.MatchWildcards = True
.MatchCase = False
.MatchWholeWord = False
.MatchAllWordForms = False
.MatchSoundsLike = False
.Execute Replace:=wdReplaceAll
End With
' Cleanup #.[Space] to #.[Tab]
With rng1.Find
.ClearFormatting
.Replacement.ClearFormatting
.Text = "(^13[0-9]{1,}\.) {1,}"
.Replacement.Text = "^t"
.Forward = True
.Wrap = wdFindStop
.MatchWildcards = True
.MatchCase = False
.MatchWholeWord = False
.MatchAllWordForms = False
.MatchSoundsLike = False
.Execute Replace:=wdReplaceAll
End With
' Define number formats to be cleaned up
sRegEx = "^\t*[0-9]+\.\t+"
' Init Variables
index2 = 0
TotPCnt = ActiveDocument.Paragraphs.Count
With oRegEx
.Pattern = sRegEx
.Global = False
End With
bNewList = False
For Each oPar In ActiveDocument.Paragraphs
' Status Update
index2 = index2 + 1
If index2 Mod 10 = 0 Then
ActiveDocument.Application.StatusBar = _
"UpdateNumbering:" & _
Format(index2 / TotPCnt, "Percent") & " Percent Complete"
End If
' Find First Line of Next Numbered List
If oRegEx.Test(oPar.Range.Text) Then
' Extend the Range of the List and Clean up
Set rng0 = oPar.Range
Set sTemps = oRegEx.Execute(oPar.Range.Text)
index1 = Len(sTemps(0).Value)
rng0.End = rng0.Start + index1
rng0.Delete
If Not bNewList Then
' Mark beginning of List
bNewList = True
Set rng1 = oPar.Range
End If
rng1.End = oPar.Range.End
ElseIf bNewList Then
' Update Numbered List
rng1.ListFormat.ApplyListTemplate _
ListTemplate:=ListGalleries(wdNumberGallery).ListTemplates(1)
bNewList = False
End If
DoEvents
Next oPar
If bNewList Then
' Update Numbered List
rng1.ListFormat.ApplyListTemplate _
ListTemplate:=ListGalleries(wdNumberGallery).ListTemplates(1)
End If
LastSubroutineVisited = CalledFrom
Err_Handler:
If (Err.Number <> 0) Then
Call Handle_Error
Err.Clear
End If
End Sub
这可能是一种解决方案。您可能需要根据您的文档调整正则表达式模式。性能似乎合理:
Option Explicit
Public Sub ConvertDocument()
Dim vParagraph As Paragraph
Dim vRegExp As New RegExp
vRegExp.Pattern = "^[0-9]+.\t"
Application.ScreenUpdating = False
For Each vParagraph In ActiveDocument.Paragraphs
If vRegExp.Test(vParagraph.Range.Text) Then
vParagraph.Range.ListFormat.ApplyNumberDefault
End If
ActiveDocument.UndoClear
DoEvents
Next
Application.ScreenUpdating = True
End Sub
下面是我使用的测试数据生成器;更改 for 循环以生成更多或更少的测试数据。
Public Sub TestDataPopulate()
Dim vCounter As Long
Dim vParagraph As Paragraph
Application.ScreenUpdating = False
For vCounter = 1 To 50
Set vParagraph = ActiveDocument.Paragraphs.Add
vParagraph.Range.Text = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt " & _
"ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco " & vbCrLf & _
"Lorem ipsum dolor" & vbCrLf & _
vbCrLf & _
"1." & vbTab & "Lorem ipsum dolor sit amet" & vbCrLf & _
"2." & vbTab & "consectetur adipiscing elit" & vbCrLf & _
"3." & vbTab & "sed do eiusmod tempor incididunt" & vbCrLf & _
vbCrLf
ActiveDocument.UndoClear
Next
Application.ScreenUpdating = True
End Sub
我有一个由建模程序创建的 800 多页文档。本文档主要由一系列编号的句子(而非编号列表)组成。可以有 10,000 个编号行,这些行将被转换为 5000-7000 个不同长度的编号列表。
Word 脚本的 VBA(见下文)搜索第一个编号段落和未编号段落,然后查找未编号段落之前的最后一个编号段落,然后将范围转换为编号列表。重复此序列直到文档结束。
程序按预期运行。问题是,它消耗了 CPU 的 65-95%,大部分物理内存,需要 5-15 个小时才能 运行。
任何关于将性能提高至少一个数量级(至少一半或四分之一的时间)的想法都将不胜感激。
处理前的示例文档:
处理后的示例文档:
VBA 程序:
' Cleanup Numbered Lists
Sub UpdateNumbering()
Dim rng0, rng1, rng2 As Range
Dim sRegEx()
Dim index As Long
Dim StoryEnd As Long
Dim EscCnt As Long
Dim TotPCnt As Long
' Note name of Method being called
UpdateStatusBar ("UpdateNumbering")
CalledFrom = LastSubroutineVisited
LastSubroutineVisited = "UpdateNumbering"
' Cleanup [Space}[Tab} variances, Convert to [Tab] Only
Set rng0 = Selection.Range
Set rng1 = rng0
rng0.WholeStory
rng1.WholeStory
StoryEnd = rng1.End
With rng1.Find
.ClearFormatting
.Replacement.ClearFormatting
.Text = "\. {1,}^9"
.Replacement.Text = ".^t"
.Forward = True
.Wrap = wdFindStop
.MatchWildcards = True
.MatchCase = False
.MatchWholeWord = False
.MatchAllWordForms = False
.MatchSoundsLike = False
.Execute Replace:=wdReplaceAll
End With
' Cleanup #.[Space] to #.[Tab]
With rng1.Find
.ClearFormatting
.Replacement.ClearFormatting
.Text = "(^13[0-9]{1,}\.) {1,}"
.Replacement.Text = "^t"
.Forward = True
.Wrap = wdFindStop
.MatchWildcards = True
.MatchCase = False
.MatchWholeWord = False
.MatchAllWordForms = False
.MatchSoundsLike = False
.Execute Replace:=wdReplaceAll
End With
' Define number formats to be cleaned up
sRegEx = Array("(^13[0-9]{1,2}\.^9)", _
"(^13^9[0-9]{1,2}\.^9)")
' Loop through each RegEx
For index = 0 To 1
' Status Update
UpdateStatusBar ("UpdateNumbering: Pass #" & CStr(index + 1))
LastSubroutineVisited = "UpdateNumbering: Pass #" & CStr(index + 1)
' Find Begin of Doc
' Selection.HomeKey Unit:=wdStory, Extend:=wdExtend
' Selection.Collapse Direction:=wdCollapseStart
EscCnt = 0
TotPCnt = ActiveDocument.Paragraphs.Count
Do
' Find First Line of Next Numbered List
With rng0.Find
.ClearFormatting
.Text = sRegEx(index)
.Forward = True
.Wrap = wdFindStop
.MatchWildcards = True
.MatchCase = False
.MatchWholeWord = False
.MatchAllWordForms = False
.MatchSoundsLike = False
.Execute
If .Found = False Then Exit Do
End With
' Status Update
UpdateStatusBar ("UpdateNumbering: Pass #" & CStr(index + 1) & " - " & _
Format(ActiveDocument.Range(0, Selection.Paragraphs(1).Range.End).Paragraphs.Count / _
TotPCnt, "Percent") & " Percent Complete")
' Mark beginning of List
rng0.Select
Selection.Collapse Direction:=wdCollapseStart
Selection.MoveDown Unit:=wdParagraph
Set rng1 = Selection.Range
rng1.SetRange Start:=rng1.Start + 1, End:=rng1.End
' Find and Mark last entry in Numbered List
Do
Selection.MoveDown Unit:=wdParagraph
Selection.Expand Unit:=wdParagraph
Set rng2 = Selection.Range
rng2.SetRange Start:=rng2.Start - 1, End:=rng2.End
' Level 1 Numbering
With rng2.Find
.ClearFormatting
.Text = sRegEx(index)
.Forward = True
.Wrap = wdFindStop
.MatchWildcards = True
.MatchCase = False
.MatchWholeWord = False
.MatchAllWordForms = False
.MatchSoundsLike = False
.Execute
If .Found = False Then Exit Do
End With
Selection.Collapse Direction:=wdCollapseStart
Loop
rng1.SetRange Start:=rng1.Start - 1, End:=rng2.Start
' Remove Numbering
With rng1.Find
.ClearFormatting
.Replacement.ClearFormatting
.Text = "[0-9]{1,2}\.^t{1,}" 'allow for 0 or more tabs
.Replacement.Text = ""
.Forward = True
.Wrap = wdFindStop
.MatchWildcards = True
.MatchCase = False
.MatchWholeWord = False
.MatchAllWordForms = False
.MatchSoundsLike = False
.Execute Replace:=wdReplaceAll
End With
rng1.SetRange Start:=rng1.Start + 1, End:=rng1.End
' Update Numbered List
rng1.ListFormat.ApplyListTemplate _
ListTemplate:=ListGalleries(wdNumberGallery).ListTemplates(1)
rng1.End = rng1.End + 1
rng0.SetRange Start:=rng1.End, End:=StoryEnd
EscCnt = EscCnt + 1 ' Debug
' If EscCnt > 50 Then Exit Do ' Debug
Loop
Next
LastSubroutineVisited = CalledFrom
Err_Handler:
If (Err.Number <> 0) Then
Call Handle_Error
Err.Clear
End If
End Sub
' Update the Status Bar
Private Sub UpdateStatusBar(status As String)
ActiveDocument.Application.StatusBar = status
End Sub
' Inform user and break into debug mode.
Private Sub Handle_Error()
Dim msgbox_Reply As Integer
msgbox_Reply = MsgBox("An unexpected error has occured:" & vbCrLf & vbCrLf _
& "Subroutine: " & LastSubroutineVisited & vbCrLf & vbCrLf _
& "Error Number: " & Err.Number & vbCrLf _
& "Error Description: " & Err.Description & vbCrLf & vbCrLf _
& "VBA will now enter debug mode.", vbCritical + vbOKOnly, "Error")
' Turn on screen updating.
ActiveDocument.Application.ScreenUpdating = True
' Application.WindowState = wdWindowStateMaximize
' Break into debug mode.
Stop
End Sub
感谢@TechnoDabbler,这是代码的最终版本:
' Cleanup Numbered Lists
Sub UpdateNumbering()
Dim rng0, rng1 As Range
Dim oRegEx As New RegExp
Dim oPar As Paragraph
Dim bNewList As Boolean
Dim sRegEx As String
Dim sTemps As MatchCollection
Dim index1, index2 As Long
Dim TotPCnt As Long
Dim tStart As Variant
tStart = Now()
' Note name of Method being called
UpdateStatusBar ("UpdateNumbering")
CalledFrom = LastSubroutineVisited
LastSubroutineVisited = "UpdateNumbering"
' Cleanup [Space}[Tab} variances, Convert to [Tab] Only
Set rng0 = Selection.Range
Set rng1 = rng0
With rng1.Find
.ClearFormatting
.Replacement.ClearFormatting
.Text = "\. {1,}^9"
.Replacement.Text = ".^t"
.Forward = True
.Wrap = wdFindStop
.MatchWildcards = True
.MatchCase = False
.MatchWholeWord = False
.MatchAllWordForms = False
.MatchSoundsLike = False
.Execute Replace:=wdReplaceAll
End With
' Cleanup #.[Space] to #.[Tab]
With rng1.Find
.ClearFormatting
.Replacement.ClearFormatting
.Text = "(^13[0-9]{1,}\.) {1,}"
.Replacement.Text = "^t"
.Forward = True
.Wrap = wdFindStop
.MatchWildcards = True
.MatchCase = False
.MatchWholeWord = False
.MatchAllWordForms = False
.MatchSoundsLike = False
.Execute Replace:=wdReplaceAll
End With
' Define number formats to be cleaned up
sRegEx = "^\t*[0-9]+\.\t+"
' Init Variables
index2 = 0
TotPCnt = ActiveDocument.Paragraphs.Count
With oRegEx
.Pattern = sRegEx
.Global = False
End With
bNewList = False
For Each oPar In ActiveDocument.Paragraphs
' Status Update
index2 = index2 + 1
If index2 Mod 10 = 0 Then
ActiveDocument.Application.StatusBar = _
"UpdateNumbering:" & _
Format(index2 / TotPCnt, "Percent") & " Percent Complete"
End If
' Find First Line of Next Numbered List
If oRegEx.Test(oPar.Range.Text) Then
' Extend the Range of the List and Clean up
Set rng0 = oPar.Range
Set sTemps = oRegEx.Execute(oPar.Range.Text)
index1 = Len(sTemps(0).Value)
rng0.End = rng0.Start + index1
rng0.Delete
If Not bNewList Then
' Mark beginning of List
bNewList = True
Set rng1 = oPar.Range
End If
rng1.End = oPar.Range.End
ElseIf bNewList Then
' Update Numbered List
rng1.ListFormat.ApplyListTemplate _
ListTemplate:=ListGalleries(wdNumberGallery).ListTemplates(1)
bNewList = False
End If
DoEvents
Next oPar
If bNewList Then
' Update Numbered List
rng1.ListFormat.ApplyListTemplate _
ListTemplate:=ListGalleries(wdNumberGallery).ListTemplates(1)
End If
LastSubroutineVisited = CalledFrom
Err_Handler:
If (Err.Number <> 0) Then
Call Handle_Error
Err.Clear
End If
End Sub
这可能是一种解决方案。您可能需要根据您的文档调整正则表达式模式。性能似乎合理:
Option Explicit
Public Sub ConvertDocument()
Dim vParagraph As Paragraph
Dim vRegExp As New RegExp
vRegExp.Pattern = "^[0-9]+.\t"
Application.ScreenUpdating = False
For Each vParagraph In ActiveDocument.Paragraphs
If vRegExp.Test(vParagraph.Range.Text) Then
vParagraph.Range.ListFormat.ApplyNumberDefault
End If
ActiveDocument.UndoClear
DoEvents
Next
Application.ScreenUpdating = True
End Sub
下面是我使用的测试数据生成器;更改 for 循环以生成更多或更少的测试数据。
Public Sub TestDataPopulate()
Dim vCounter As Long
Dim vParagraph As Paragraph
Application.ScreenUpdating = False
For vCounter = 1 To 50
Set vParagraph = ActiveDocument.Paragraphs.Add
vParagraph.Range.Text = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt " & _
"ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco " & vbCrLf & _
"Lorem ipsum dolor" & vbCrLf & _
vbCrLf & _
"1." & vbTab & "Lorem ipsum dolor sit amet" & vbCrLf & _
"2." & vbTab & "consectetur adipiscing elit" & vbCrLf & _
"3." & vbTab & "sed do eiusmod tempor incididunt" & vbCrLf & _
vbCrLf
ActiveDocument.UndoClear
Next
Application.ScreenUpdating = True
End Sub