从正文和脚注中搜索一串文本并将其及其后面的 # 个字符复制到 excel 文档中
Searching for a string of text from the main body and footnotes and copying it and its following # characters into an excel document
我有大量文档需要从中提取文件名参考,这些文档分散在大块文本和脚注中。
我目前有一个单词VBA代码,我认为应该搜索一个字符串(例如“This_”),然后是以下#个字符,以及然后将它们粘贴到等待 excel sheet 中。我正在努力让它同时搜索脚注和文本主体。
我一直在使用下面的代码,但我目前的工作让它做了一些奇怪的事情。它会找到我正在搜索的字符串,但随后会从文档的开头复制找到该字符串的次数——而不是该字符串及其后续文本。
如有任何帮助,我们将不胜感激,我相信问题将出自 'return data to array 部分的前半部分。
Option Explicit
Option Base 1
Sub WordDataToExcel()
Dim myObj
Dim myWB
Dim mySh
Dim txt As String, Lgth As Long, Strt As Long
Dim i As Long
Dim oRng As Range
Dim Tgt As String
Dim TgtFile As String
Dim arr()
Dim ArrSize As Long
Dim ArrIncrement As Long
ArrIncrement = 1000
ArrSize = ArrIncrement
ReDim arr(ArrSize)
'Set parameters Change to your path and filename
TgtFile = "File.xlsx"
If IsWindowsOS Then
Tgt = "C:\users\user\" & TgtFile ' Windows OS
Else
Tgt = "MacintoshHD:Users:" & TgtFile 'Mac OS
End If
txt = InputBox("String to find")
Lgth = InputBox("Length of string to return")
Strt = Len(txt)
'Return data to array
ActiveDocument.StoryRanges(wdFootnotesStory).Select
With Selection.Find
.ClearFormatting
.Forward = True
.Text = txt
.MatchCase = True
.Execute
While .Found
i = i + 1
Set oRng = ActiveDocument.Range _
(Start:=Selection.Range.Start + Strt, _
End:=Selection.Range.End + Lgth)
arr(i) = oRng.Text
oRng.Start = oRng.End
.Execute
If i = ArrSize - 20 Then
ArrSize = ArrSize + ArrIncrement
ReDim Preserve arr(ArrSize)
End If
Wend
End With
ReDim Preserve arr(i)
'Set target and write data
Set myObj = CreateObject("Excel.Application")
Set myWB = myObj.workbooks.Open(Tgt)
Set mySh = myWB.sheets(1)
With mySh
.Range(.Cells(1, 1), .Cells(i, 1)) = myObj.transpose(arr)
End With
'Tidy up
myWB.Close True
myObj.Quit
Set mySh = Nothing
Set myWB = Nothing
Set myObj = Nothing
End Sub
Public Function IsWindowsOS() As Boolean
If Application.System.OperatingSystem Like "*Win*" Then
IsWindowsOS = True
Else
IsWindowsOS = False
End If
End Function
例如,以下代码 returns 找到的文本及其页面引用:
Sub Demo()
Application.ScreenUpdating = False
Dim i As Long, j As Long, k As Long, StrFnd As String, StrOut As String
StrFnd = InputBox("String to find")
j = InputBox("String Length to find")
k = j - Len(StrFnd)
For i = 1 To k
StrFnd = StrFnd & "^?"
Next
With ActiveDocument
For i = 1 To 2 ' 1 = wdMainTextStory, 2 = wdFootnotesStory, 3 = wdEndnotesStory, etc.
With .StoryRanges(i)
With .Find
.ClearFormatting
.Text = StrFnd
.Forward = True
.Format = True
.MatchWildcards = False
.Wrap = wdFindStop
.Replacement.Text = ""
End With
Do While .Find.Execute = True
StrOut = StrOut & vbCr & .Text & vbTab
Select Case .StoryType
Case wdMainTextStory
StrOut = StrOut & .Information(wdActiveEndAdjustedPageNumber)
Case wdFootnotesStory
StrOut = StrOut & .Duplicate.Footnotes(1).Reference.Information(wdActiveEndAdjustedPageNumber)
End Select
Loop
End With
Next
End With
MsgBox StrOut
Application.ScreenUpdating = True
End Sub
您的代码有点混乱,因为有 Selection
和 Range
的邪恶组合。避免使用 Selection
是一种很好的做法,因为在 VBA.
中工作时很少需要 select 任何东西
VBA 也有 compiler constants 可用于检测代码是否在 Mac 上 运行。不确定 Mac 常量是否仍然可靠地工作,因为我不再需要测试了。
'Set parameters Change to your path and filename
TgtFile = "File.xlsx"
'This isn't necessary as there is a compiler constant that can be used to identify code is running on Mac
' If IsWindowsOS Then
' Tgt = "C:\users\user\" & TgtFile ' Windows OS
' Else
' Tgt = "MacintoshHD:Users:" & TgtFile 'Mac OS
' End If
#If Mac Then
Tgt = "MacintoshHD:Users:" & TgtFile 'Mac OS
#Else
Tgt = "C:\users\user\" & TgtFile ' Windows OS
#End If
txt = InputBox("String to find")
Lgth = InputBox("Length of string to return")
Strt = Len(txt)
'Return data to array
'not necessary to select the story range
'ActiveDocument.StoryRanges(wdFootnotesStory).Select
Set oRng = ActiveDocument.StoryRanges(wdFootnotesStory)
With oRng
With .Find
.ClearFormatting
.Forward = True
.Text = txt
.MatchCase = True
End With
While .Find.Execute
'a match has been found and oRng redefined to the range of the match
i = i + 1
.MoveEnd wdCharacter, Lgth
arr(i) = .Text
.Collapse wdCollapseEnd
If i = ArrSize - 20 Then
ArrSize = ArrSize + ArrIncrement
ReDim Preserve arr(ArrSize)
End If
Wend
End With
这是一个如何搜索文档的多个部分的示例。请注意,我正在使用 Collection
来收集项目,因此您不必继续增加数组。
Option Explicit
Option Base 1
Sub test()
Dim allFound As Collection
Set allFound = TextFoundReport("This_", 10)
Dim entry As Variant
For Each entry In allFound
Dim partType As Long
Dim text As String
Dim tokens() As String
tokens = Split(entry, "|")
'--- here is where you copy to an Excel sheet
Debug.Print "Part type: " & tokens(0) & " - '" & tokens(1) & "'"
Next entry
End Sub
Private Function TextFoundReport(ByVal text As String, _
ByVal numberOfCharacters As Long) As Collection
Dim whatWeFound As Collection
Set whatWeFound = New Collection
'--- create a list of the document parts to search
Dim docParts As Variant
docParts = Array(wdMainTextStory, wdFootnotesStory, wdEndnotesStory, wdCommentsStory)
Dim foundRng As Range
Dim docPart As Variant
For Each docPart In docParts
ActiveDocument.StoryRanges(docPart).Select
'--- find all occurences in this part and add it to the collection
' the Item in the collection is the story type and the found text
With Selection.Find
.ClearFormatting
.Forward = True
.text = text
.MatchCase = True
.Execute
Do While .found
Set foundRng = ActiveDocument.Range _
(Start:=Selection.Range.Start + Len(text), _
End:=Selection.Range.End + numberOfCharacters)
whatWeFound.Add CLng(docPart) & "|" & foundRng.text
foundRng.Start = foundRng.End
.Execute
Loop
End With
Next docPart
Set TextFoundReport = whatWeFound
End Function
我有大量文档需要从中提取文件名参考,这些文档分散在大块文本和脚注中。
我目前有一个单词VBA代码,我认为应该搜索一个字符串(例如“This_”),然后是以下#个字符,以及然后将它们粘贴到等待 excel sheet 中。我正在努力让它同时搜索脚注和文本主体。
我一直在使用下面的代码,但我目前的工作让它做了一些奇怪的事情。它会找到我正在搜索的字符串,但随后会从文档的开头复制找到该字符串的次数——而不是该字符串及其后续文本。
如有任何帮助,我们将不胜感激,我相信问题将出自 'return data to array 部分的前半部分。
Option Explicit
Option Base 1
Sub WordDataToExcel()
Dim myObj
Dim myWB
Dim mySh
Dim txt As String, Lgth As Long, Strt As Long
Dim i As Long
Dim oRng As Range
Dim Tgt As String
Dim TgtFile As String
Dim arr()
Dim ArrSize As Long
Dim ArrIncrement As Long
ArrIncrement = 1000
ArrSize = ArrIncrement
ReDim arr(ArrSize)
'Set parameters Change to your path and filename
TgtFile = "File.xlsx"
If IsWindowsOS Then
Tgt = "C:\users\user\" & TgtFile ' Windows OS
Else
Tgt = "MacintoshHD:Users:" & TgtFile 'Mac OS
End If
txt = InputBox("String to find")
Lgth = InputBox("Length of string to return")
Strt = Len(txt)
'Return data to array
ActiveDocument.StoryRanges(wdFootnotesStory).Select
With Selection.Find
.ClearFormatting
.Forward = True
.Text = txt
.MatchCase = True
.Execute
While .Found
i = i + 1
Set oRng = ActiveDocument.Range _
(Start:=Selection.Range.Start + Strt, _
End:=Selection.Range.End + Lgth)
arr(i) = oRng.Text
oRng.Start = oRng.End
.Execute
If i = ArrSize - 20 Then
ArrSize = ArrSize + ArrIncrement
ReDim Preserve arr(ArrSize)
End If
Wend
End With
ReDim Preserve arr(i)
'Set target and write data
Set myObj = CreateObject("Excel.Application")
Set myWB = myObj.workbooks.Open(Tgt)
Set mySh = myWB.sheets(1)
With mySh
.Range(.Cells(1, 1), .Cells(i, 1)) = myObj.transpose(arr)
End With
'Tidy up
myWB.Close True
myObj.Quit
Set mySh = Nothing
Set myWB = Nothing
Set myObj = Nothing
End Sub
Public Function IsWindowsOS() As Boolean
If Application.System.OperatingSystem Like "*Win*" Then
IsWindowsOS = True
Else
IsWindowsOS = False
End If
End Function
例如,以下代码 returns 找到的文本及其页面引用:
Sub Demo()
Application.ScreenUpdating = False
Dim i As Long, j As Long, k As Long, StrFnd As String, StrOut As String
StrFnd = InputBox("String to find")
j = InputBox("String Length to find")
k = j - Len(StrFnd)
For i = 1 To k
StrFnd = StrFnd & "^?"
Next
With ActiveDocument
For i = 1 To 2 ' 1 = wdMainTextStory, 2 = wdFootnotesStory, 3 = wdEndnotesStory, etc.
With .StoryRanges(i)
With .Find
.ClearFormatting
.Text = StrFnd
.Forward = True
.Format = True
.MatchWildcards = False
.Wrap = wdFindStop
.Replacement.Text = ""
End With
Do While .Find.Execute = True
StrOut = StrOut & vbCr & .Text & vbTab
Select Case .StoryType
Case wdMainTextStory
StrOut = StrOut & .Information(wdActiveEndAdjustedPageNumber)
Case wdFootnotesStory
StrOut = StrOut & .Duplicate.Footnotes(1).Reference.Information(wdActiveEndAdjustedPageNumber)
End Select
Loop
End With
Next
End With
MsgBox StrOut
Application.ScreenUpdating = True
End Sub
您的代码有点混乱,因为有 Selection
和 Range
的邪恶组合。避免使用 Selection
是一种很好的做法,因为在 VBA.
VBA 也有 compiler constants 可用于检测代码是否在 Mac 上 运行。不确定 Mac 常量是否仍然可靠地工作,因为我不再需要测试了。
'Set parameters Change to your path and filename
TgtFile = "File.xlsx"
'This isn't necessary as there is a compiler constant that can be used to identify code is running on Mac
' If IsWindowsOS Then
' Tgt = "C:\users\user\" & TgtFile ' Windows OS
' Else
' Tgt = "MacintoshHD:Users:" & TgtFile 'Mac OS
' End If
#If Mac Then
Tgt = "MacintoshHD:Users:" & TgtFile 'Mac OS
#Else
Tgt = "C:\users\user\" & TgtFile ' Windows OS
#End If
txt = InputBox("String to find")
Lgth = InputBox("Length of string to return")
Strt = Len(txt)
'Return data to array
'not necessary to select the story range
'ActiveDocument.StoryRanges(wdFootnotesStory).Select
Set oRng = ActiveDocument.StoryRanges(wdFootnotesStory)
With oRng
With .Find
.ClearFormatting
.Forward = True
.Text = txt
.MatchCase = True
End With
While .Find.Execute
'a match has been found and oRng redefined to the range of the match
i = i + 1
.MoveEnd wdCharacter, Lgth
arr(i) = .Text
.Collapse wdCollapseEnd
If i = ArrSize - 20 Then
ArrSize = ArrSize + ArrIncrement
ReDim Preserve arr(ArrSize)
End If
Wend
End With
这是一个如何搜索文档的多个部分的示例。请注意,我正在使用 Collection
来收集项目,因此您不必继续增加数组。
Option Explicit
Option Base 1
Sub test()
Dim allFound As Collection
Set allFound = TextFoundReport("This_", 10)
Dim entry As Variant
For Each entry In allFound
Dim partType As Long
Dim text As String
Dim tokens() As String
tokens = Split(entry, "|")
'--- here is where you copy to an Excel sheet
Debug.Print "Part type: " & tokens(0) & " - '" & tokens(1) & "'"
Next entry
End Sub
Private Function TextFoundReport(ByVal text As String, _
ByVal numberOfCharacters As Long) As Collection
Dim whatWeFound As Collection
Set whatWeFound = New Collection
'--- create a list of the document parts to search
Dim docParts As Variant
docParts = Array(wdMainTextStory, wdFootnotesStory, wdEndnotesStory, wdCommentsStory)
Dim foundRng As Range
Dim docPart As Variant
For Each docPart In docParts
ActiveDocument.StoryRanges(docPart).Select
'--- find all occurences in this part and add it to the collection
' the Item in the collection is the story type and the found text
With Selection.Find
.ClearFormatting
.Forward = True
.text = text
.MatchCase = True
.Execute
Do While .found
Set foundRng = ActiveDocument.Range _
(Start:=Selection.Range.Start + Len(text), _
End:=Selection.Range.End + numberOfCharacters)
whatWeFound.Add CLng(docPart) & "|" & foundRng.text
foundRng.Start = foundRng.End
.Execute
Loop
End With
Next docPart
Set TextFoundReport = whatWeFound
End Function