使用数组比较和共享多个工作簿和工作表之间的数据

Using Arrays to Compare and Share Data Between Multiple Workbooks and Worksheets

这段代码我已经写了几个星期了,它曾经可以工作,编译我正在比较的 49 个工作表花了 2 个小时,但出于某种原因,它现在只是说没有响应。我真的很想尝试切换到使用数组,这样如果我能让它再次工作,它就会运行得更快。然而,即使在阅读了很多关于数组的文章之后,除了知道我需要使用多维数组并具有不同的行大小之外,我还是想不出一种方法来做到这一点。任何人都可以提供任何建议吗?提前致谢!

更多信息,代码会查看 e 列中的内容,如果 e 列中的其他内容匹配,它会采用 t 至 x 列中的值并将它们放置在 t 至 x 行中。如果它们的 t 到 x 为空,它还会为行 e 着色,或者如果它发现它在不应该着色时再次变白。

Sub FindPart_FullWorkbooks()

'If searching multiple worksheets & workbooks

Dim PartNumber As String
Dim Found1 As Integer
Dim Found2 As Boolean
Dim Found3 As Boolean
Dim Found4 As Boolean
Dim Found5 As Boolean
Dim Found6 As Boolean
Dim Found7 As Boolean
Dim Found8 As Boolean
Dim Found9 As Boolean
Dim Found10 As Boolean
Dim Found11 As Boolean
Dim Found12 As Boolean
Dim EOS As String
Dim EOSL As String
Dim EOL As String
Dim Replace As String
Dim AddInfo As String
Dim n As Long
Dim m As Long
Dim LastRow As Long
Dim WS As Worksheet
Dim WS2 As Worksheet
Dim WB As Workbook
Dim WB2 As Workbook

For Each WB In Workbooks

For Each WS In WB.Worksheets

With WS
LastRow = .Range("A1").SpecialCells(xlCellTypeLastCell).Row
End With

For m = 1 To LastRow

    PartNumber = WB.Sheets(WS.Name).Cells(m, 5).Value
    EOS = WB.Sheets(WS.Name).Cells(m, 20).Value
    EOSL = WB.Sheets(WS.Name).Cells(m, 21).Value
    EOL = WB.Sheets(WS.Name).Cells(m, 22).Value
    Replace = WB.Sheets(WS.Name).Cells(m, 23).Value
    AddInfo = WB.Sheets(WS.Name).Cells(m, 24).Value

    Found2 = IsEmpty(WB.Sheets(WS.Name).Cells(m, 5).Value)
    Found4 = IsEmpty(WB.Sheets(WS.Name).Cells(m, 20).Value)
    Found5 = IsEmpty(WB.Sheets(WS.Name).Cells(m, 21).Value)
    Found6 = IsEmpty(WB.Sheets(WS.Name).Cells(m, 22).Value)
    Found7 = IsEmpty(WB.Sheets(WS.Name).Cells(m, 23).Value)
    Found8 = IsEmpty(WB.Sheets(WS.Name).Cells(m, 24).Value)

    If Found2 = True Then
    GoTo NextIndex

        Else

        For Each WB2 In Workbooks
        For Each WS2 In WB2.Worksheets

            For n = 1 To LastRow

                Found1 = InStr(WB2.Sheets(WS2.Name).Cells(n, 5).Value, PartNumber)

                Found3 = IsEmpty(WB2.Sheets(WS2.Name).Cells(n, 20).Value)
                Found9 = IsEmpty(WB2.Sheets(WS2.Name).Cells(n, 21).Value)
                Found10 = IsEmpty(WB2.Sheets(WS2.Name).Cells(n, 22).Value)
                Found11 = IsEmpty(WB2.Sheets(WS2.Name).Cells(n, 23).Value)
                Found12 = IsEmpty(WB2.Sheets(WS2.Name).Cells(n, 24).Value)

                If Found3 = True And Found9 = True And Found10 = True And Found11 = True And Found12 = True Then

                    If Found1 = 1 Then
                        WB2.Sheets(WS2.Name).Cells(n, 20).Value = EOS
                        WB2.Sheets(WS2.Name).Cells(n, 21).Value = EOSL
                        WB2.Sheets(WS2.Name).Cells(n, 22).Value = EOL
                        WB2.Sheets(WS2.Name).Cells(n, 23).Value = Replace
                        WB2.Sheets(WS2.Name).Cells(n, 24).Value = AddInfo

                    End If
                End If
            Next n

        If Found4 = True And Found5 = True And Found6 = True And Found7 = True And Found8 = True Then

        WB.Sheets(WS.Name).Cells(m, 5).Interior.Color = RGB(255, 0, 255)

        ElseIf WB.Sheets(WS.Name).Cells(m, 5).Interior.Color = RGB(255, 0, 255) Then

        WB.Sheets(WS.Name).Cells(m, 5).Interior.Color = RGB(255, 255, 255)

        End If

        'MsgBox (WB2.Name & " " & WS2.Name)

        Next WS2
        Next WB2

    End If
'MsgBox (m)
NextIndex:

 Next m
'MsgBox (WB.Name & " " & WS.Name)

 Next WS
 Next WB

End Sub

这个答案是针对 Code Review site 的,但这个问题暂时搁置,所以我会在这里提供它

从性能的角度来看,您设法编写了最坏情况的代码 - 完成任务所需的最大工作量。您这样做可能只是为了让它正常工作,我对这个问题投了赞成票,因为您做出了寻求帮助的正确决定

为了说明,我们有 10 个文件,每个文件有 3 个 sheet,每个 sheet 包含 1,000 行(部分)。您的算法所做的是循环遍历每个文件,并且对于每个文件再次循环遍历每个文件(!),每个 sheet,以及每一行:

结果:10 个文件 * 3 sheets * 1,000 行 = 30,000 次搜索 - 与范围

的交互

还有其他问题:

  • 您多次覆盖所有数据,包括用空字符串覆盖有效数据
  • 由于 InStr()
  • ,搜索零件号不准确
  • 更不用说一些基本问题,例如使代码难以阅读的命名约定,以及对任何问题都无济于事的 GoTo 语句

提高性能的第一步是你想到的:转换为数组,但即使这样也不能很好地应对大量的工作,因为仍然有很多文件交互(通过它们移动一遍又一遍),所以下一步就是优化逻辑

转换为数组时,要理解的主要概念是数组与 sheet 上的数据具有相同的结构 - 您可以想象内存中的 sheet 使用行和列,除了列不使用字母,所以如果你这样做将数据复制到内存:dataArray = Sheet1.UsedRange,你访问它的方式相同:

  • Sheet1.UsedRange.Cells(1, 1) = A1
  • dataArray(1, 1) = A1

除了数组的速度呈指数级增长。你不需要担心数组的二维,如果这会使事情变得复杂,因为 VBA 在这个简单的赋值 dataArray = Sheet1.UsedRange 中生成正确的数组,其中 dataArray 应定义为 Variant

然后,在所有处理完成后唯一需要的额外步骤是用这条语句将数据复制回sheet Sheet1.UsedRange = dataArray

所以我做的第一个版本是原始的(低效的)逻辑,转换成数组,只是为了演示如何做

第二个版本是一个改进的算法,迭代所有文件,只迭代两次

  1. 一次将所有零件号读入字典
  2. 第二次更新所有文件中的所有部件号(缺少 T 至 X 列中的详细信息)

我的数据结果(3 个文件,每个文件有 3 个 sheet,每个 sheet 包含 1,000 行):

- v1: Time: 4399.262 sec (1.22 hrs) - your version
- v2: Time:  770.797 sec (12.8 min) - your version converted to arrays
- v3: Time:    2.684 sec            - optimized logic (arrays + dictionary)

版本 2(数组):

Public Sub FindPart_FullWorkbooks3()    '-----------------------------------------------
    Const FR = 2    'First row, after header
    Dim wb1 As Workbook, wb2 As Workbook, ws1 As Worksheet, ws2 As Worksheet
    Dim ur1 As Variant, ur2 As Variant, info1 As String,info2 As String, updt As Boolean
    Dim lr1 As Long, lr2 As Long, lc1 As Long, lc2 As Long, samePart As Boolean
    Dim m(1 To 6), i As Byte, cel As Range, yColor As Long, nColor As Long
    Dim r1 As Long, c1 As Long, r2 As Long, c2 As Long, y As Range, n As Range

    yColor = RGB(255, 255, 255)
    nColor = RGB(255, 0, 0)

    m(1) = 5
    m(2) = 20
    m(3) = 21
    m(4) = 22
    m(5) = 23
    m(6) = 24

    For Each wb1 In Workbooks
        For Each ws1 In wb1.Worksheets
            ur1 = ws1.UsedRange
            lr1 = UBound(ur1, 1)    'last row
            lc1 = UBound(ur1, 2)    'last col
            If lc1 >= 24 Then
                For r1 = FR To lr1
                    If Len(ur1(r1, m(1))) > 0 Then
                      info1 = ur1(r1, m(2)) & ur1(r1, m(3)) & ur1(r1, m(4))
                      info1 = info1 & ur1(r1, m(5)) & ur1(r1, m(6))
                      Set cel = ws1.Cells(r1, m(1))
                      If Len(info1) > 0 Then
                        For Each wb2 In Workbooks
                          For Each ws2 In wb2.Worksheets
                            ur2 = ws2.UsedRange
                            lr2 = UBound(ur2, 1)
                            lc2 = UBound(ur2, 2)
                            If lc2 >= 24 Then
                              For r2 = FR To lr2
                                info2 = ur2(r2, m(2)) & ur2(r2, m(3)) & ur2(r2, m(4))
                                info2 = info2 & ur2(r2, m(5)) & ur2(r2, m(6))
                                samePart = InStr(ur2(r2, m(1)), ur1(r1, m(1))) = 1
                                If (samePart And Len(info2) = 0) Then
                                  For i = 1 To 6
                                      ur2(r2, m(i)) = ur1(r1, m(i))
                                  Next
                                  updt = True
                                End If
                              Next
                            End If
                            If updt Then
                              ws2.UsedRange = ur2
                              updt = False
                            End If
                          Next
                        Next
                        If y Is Nothing Then Set y = cel Else Set y = Union(y, cel)
                      Else
                        If n Is Nothing Then Set n = cel Else Set n = Union(n, cel)
                      End If
                    End If
                Next
                If Not y Is Nothing Then
                    If y.Interior.Color = nColor Then y.Interior.Color = yColor
                    Set y = Nothing
                End If
                If Not n Is Nothing Then
                    n.Interior.Color = nColor
                    Set n = Nothing
                End If
            End If
        Next
    Next
End Sub

版本 3(数组和字典)

Public Function UpdateAllParts() As Long    '------------------------------------------
    Const FR = 2    'First row, after header
    Const DELIM = "<*>"
    Dim wb As Workbook, ws As Worksheet, ur As Variant, i As Byte, iter As Long
    Dim lr As Long, lc As Long, m(1 To 6), inf As String, frst As Boolean
    Dim yColor As Long, nColor As Long, y As Range, n As Range, d As Dictionary
    Dim cel As Range, lenDelim As Long, vals As Variant, r As Long, c As Long

    yColor = RGB(255, 255, 255):    nColor = RGB(255, 0, 0):    Set d = New Dictionary
    m(1) = 5:   m(2) = 20:  m(3) = 21:  m(4) = 22:  m(5) = 23:  m(6) = 24

    lenDelim = Len(DELIM) * 4
    For iter = 1 To 2
      frst = iter = 1
      For Each wb In Workbooks
        For Each ws In wb.Worksheets
          ur = ws.Range(ws.Cells(1), ws.Cells.SpecialCells(xlCellTypeLastCell))
          lr = UBound(ur, 1): lc = UBound(ur, 2)
          If lc >= 24 Then
            For r = FR To lr
              If Len(ur(r, m(1))) > 0 Then
                If frst Then Set cel = ws.Cells(r, m(1))
                inf = ur(r, m(2)) & DELIM & ur(r, m(3)) & DELIM & ur(r, m(4))
                inf = inf & DELIM & ur(r, m(5)) & DELIM & ur(r, m(6))
                If frst Then
                    If Len(inf) > lenDelim Then
                        d(ur(r, m(1))) = inf 'add all to dict
                        If cel.Interior.Color = nColor Then
                            If y Is Nothing Then Set y = cel Else Set y = Union(y, cel)
                        End If
                    Else
                        If n Is Nothing Then Set n = cel Else Set n = Union(n, cel)
                    End If
                Else
                  If Len(inf) = lenDelim Then
                    If d.Exists(ur(r, m(1))) Then
                      vals = Split(d(ur(r, m(1))), DELIM)
                      For i = 0 To 4
                        ur(r, m(i + 2)) = vals(i)
                      Next
                    End If
                  End If
                End If
              End If
            Next
            If frst Then
              If Not y Is Nothing Then
                If y.Interior.Color = nColor Then y.Interior.Color = yColor
                Set y = Nothing
              End If
              If Not n Is Nothing Then
                n.Interior.Color = nColor
                Set n = Nothing
              End If
            Else
              ws.Range(ws.Cells(1), ws.Cells.SpecialCells(xlCellTypeLastCell)) = ur
            End If
          End If
        Next
      Next
    Next
    UpdateAllParts = d.Count
End Function

测试数据:

之前 - 所有缺少数据的文件:


之后 - 所有文件,v1(你的) - 注意蓝色轮廓的记录 - 无效数据


之后 - 所有文件,v2 - 与 v1 中的问题相同,数组实现更加突出


之后 - 所有文件,v3