从正文和脚注中搜索一串文本并将其及其后面的 # 个字符复制到 excel 文档中

Searching for a string of text from the main body and footnotes and copying it and its following # characters into an excel document

我有大量文档需要从中提取文件名参考,这些文档分散在大块文本和脚注中。

我目前有一个单词VBA代码,我认为应该搜索一个字符串(例如“This_”),然后是以下#个字符,以及然后将它们粘贴到等待 excel sheet 中。我正在努力让它同时搜索脚注和文本主体。

我一直在使用下面的代码,但我目前的工作让它做了一些奇怪的事情。它会找到我正在搜索的字符串,但随后会从文档的开头复制找到该字符串的次数——而不是该字符串及其后续文本。

如有任何帮助,我们将不胜感激,我相信问题将出自 'return data to array 部分的前半部分。

Option Explicit
Option Base 1
Sub WordDataToExcel()
Dim myObj
Dim myWB
Dim mySh
Dim txt As String, Lgth As Long, Strt As Long
Dim i As Long
Dim oRng As Range
Dim Tgt As String
Dim TgtFile As String
Dim arr()
Dim ArrSize As Long
Dim ArrIncrement As Long
ArrIncrement = 1000
ArrSize = ArrIncrement
ReDim arr(ArrSize)


'Set parameters Change to your path and filename
TgtFile = "File.xlsx"
If IsWindowsOS Then
Tgt = "C:\users\user\" & TgtFile ' Windows OS
Else
Tgt = "MacintoshHD:Users:" & TgtFile 'Mac OS
End If
txt = InputBox("String to find")
Lgth = InputBox("Length of string to return")
Strt = Len(txt)


'Return data to array
ActiveDocument.StoryRanges(wdFootnotesStory).Select
With Selection.Find
.ClearFormatting
.Forward = True
.Text = txt
.MatchCase = True
.Execute
While .Found
i = i + 1
Set oRng = ActiveDocument.Range _
(Start:=Selection.Range.Start + Strt, _
End:=Selection.Range.End + Lgth)
arr(i) = oRng.Text
oRng.Start = oRng.End
.Execute
If i = ArrSize - 20 Then
ArrSize = ArrSize + ArrIncrement
ReDim Preserve arr(ArrSize)
End If
Wend
End With
ReDim Preserve arr(i)


'Set target and write data
Set myObj = CreateObject("Excel.Application")
Set myWB = myObj.workbooks.Open(Tgt)
Set mySh = myWB.sheets(1)
With mySh
.Range(.Cells(1, 1), .Cells(i, 1)) = myObj.transpose(arr)
End With


'Tidy up
myWB.Close True
myObj.Quit
Set mySh = Nothing
Set myWB = Nothing
Set myObj = Nothing
End Sub


Public Function IsWindowsOS() As Boolean
If Application.System.OperatingSystem Like "*Win*" Then
IsWindowsOS = True
Else
IsWindowsOS = False
End If
End Function

例如,以下代码 returns 找到的文本及其页面引用:

Sub Demo()
Application.ScreenUpdating = False
Dim i As Long, j As Long, k As Long, StrFnd As String, StrOut As String
StrFnd = InputBox("String to find")
j = InputBox("String Length to find")
k = j - Len(StrFnd)
For i = 1 To k
  StrFnd = StrFnd & "^?"
Next
With ActiveDocument
  For i = 1 To 2 ' 1 = wdMainTextStory, 2 = wdFootnotesStory, 3 = wdEndnotesStory, etc.
    With .StoryRanges(i)
      With .Find
        .ClearFormatting
        .Text = StrFnd
        .Forward = True
        .Format = True
        .MatchWildcards = False
        .Wrap = wdFindStop
        .Replacement.Text = ""
      End With
      Do While .Find.Execute = True
        StrOut = StrOut & vbCr & .Text & vbTab
        Select Case .StoryType
          Case wdMainTextStory
            StrOut = StrOut & .Information(wdActiveEndAdjustedPageNumber)
          Case wdFootnotesStory
            StrOut = StrOut & .Duplicate.Footnotes(1).Reference.Information(wdActiveEndAdjustedPageNumber)
        End Select
      Loop
    End With
  Next
End With
MsgBox StrOut
Application.ScreenUpdating = True
End Sub

您的代码有点混乱,因为有 SelectionRange 的邪恶组合。避免使用 Selection 是一种很好的做法,因为在 VBA.

中工作时很少需要 select 任何东西

VBA 也有 compiler constants 可用于检测代码是否在 Mac 上 运行。不确定 Mac 常量是否仍然可靠地工作,因为我不再需要测试了。

    'Set parameters Change to your path and filename
    TgtFile = "File.xlsx"
    'This isn't necessary as there is a compiler constant that can be used to identify code is running on Mac
    '    If IsWindowsOS Then
    '        Tgt = "C:\users\user\" & TgtFile ' Windows OS
    '    Else
    '        Tgt = "MacintoshHD:Users:" & TgtFile 'Mac OS
    '    End If
#If Mac Then
    Tgt = "MacintoshHD:Users:" & TgtFile 'Mac OS
#Else
    Tgt = "C:\users\user\" & TgtFile ' Windows OS
#End If
    txt = InputBox("String to find")
    Lgth = InputBox("Length of string to return")
    Strt = Len(txt)


    'Return data to array
    'not necessary to select the story range
    'ActiveDocument.StoryRanges(wdFootnotesStory).Select
    Set oRng = ActiveDocument.StoryRanges(wdFootnotesStory)
    With oRng
        With .Find
            .ClearFormatting
            .Forward = True
            .Text = txt
            .MatchCase = True
        End With
          
        While .Find.Execute
            'a match has been found and oRng redefined to the range of the match
            i = i + 1
            .MoveEnd wdCharacter, Lgth
            arr(i) = .Text
            .Collapse wdCollapseEnd
            If i = ArrSize - 20 Then
                ArrSize = ArrSize + ArrIncrement
                ReDim Preserve arr(ArrSize)
            End If
        Wend
    End With

这是一个如何搜索文档的多个部分的示例。请注意,我正在使用 Collection 来收集项目,因此您不必继续增加数组。

Option Explicit
Option Base 1

Sub test()
    Dim allFound As Collection
    Set allFound = TextFoundReport("This_", 10)
    
    Dim entry As Variant
    For Each entry In allFound
        Dim partType As Long
        Dim text As String
        Dim tokens() As String
        tokens = Split(entry, "|")
        '--- here is where you copy to an Excel sheet
        Debug.Print "Part type: " & tokens(0) & " - '" & tokens(1) & "'"
    Next entry
End Sub

Private Function TextFoundReport(ByVal text As String, _
                                 ByVal numberOfCharacters As Long) As Collection
    Dim whatWeFound As Collection
    Set whatWeFound = New Collection
    
    '--- create a list of the document parts to search
    Dim docParts As Variant
    docParts = Array(wdMainTextStory, wdFootnotesStory, wdEndnotesStory, wdCommentsStory)
    
    Dim foundRng As Range
    Dim docPart As Variant
    For Each docPart In docParts
        ActiveDocument.StoryRanges(docPart).Select
        '--- find all occurences in this part and add it to the collection
        '    the Item in the collection is the story type and the found text
        With Selection.Find
            .ClearFormatting
            .Forward = True
            .text = text
            .MatchCase = True
            .Execute
            Do While .found
                Set foundRng = ActiveDocument.Range _
                               (Start:=Selection.Range.Start + Len(text), _
                                End:=Selection.Range.End + numberOfCharacters)
                whatWeFound.Add CLng(docPart) & "|" & foundRng.text
                foundRng.Start = foundRng.End
                .Execute
            Loop
        End With
    Next docPart
    
    Set TextFoundReport = whatWeFound
End Function