比较具有不同记录数和不同顺序的两个数据集

Comparing two datasets with different number of records and different order

我在一个工作簿中有两个数据集(主数据和 BAZA OLD)。两个 sheet 中的每个记录都有 11 列 A:K。 Master 数据 worksheet 不时使用 Power Query 更新。我想将新的更新数据(主数据)与旧数据(BAZA OLD)进行比较,并将所有不匹配的记录复制并粘贴到名为 Output 的工作sheet 中,其中包含 L 列“新发票”中的信息。 我发现了一些可以部分满足我需要的宏,但问题是主数据 sheet 中记录的顺序一旦通过 Power Query 上传就可以更改。 以下是我目前拥有的代码。 Public 子比较()

Dim dumpSheet, icdSheet, outputSheet As Worksheet
Dim startRow, outputRow, tempDumpRow, tempICDRow, icdRowCount, finishedICDIndex As Integer
Dim finishedICD() As String
Dim isExist As Boolean

'Set sheets
Set dumpSheet = Sheets("BAZA OLD")
Set icdSheet = Sheets("Master data")
Set outputSheet = Sheets("Output")

'Start row of each sheet for data
startRow = 2
outputRow = 2

'Get row count from ICD sheet
icdRowCount = icdSheet.Range("A:K").End(xlDown).Row

'Index
finishedICDIndex = 0

'Re-define array
ReDim finishedICD(0 To icdRowCount - 1)

'Start row
tempDumpRow = startRow

'Here I looped with OR state, you can modify it to AND start if you want
Do While dumpSheet.Range("A" & tempDumpRow) <> "" Or dumpSheet.Range("B" & tempDumpRow) <> "" Or dumpSheet.Range("C" & tempDumpRow) <> "" And _
    ("D" & tempDumpRow) <> "" Or dumpSheet.Range("E" & tempDumpRow) <> "" Or dumpSheet.Range("F" & tempDumpRow) <> "" And _
    ("G" & tempDumpRow) <> "" Or dumpSheet.Range("H" & tempDumpRow) <> "" Or dumpSheet.Range("I" & tempDumpRow) <> "" And _
    ("J" & tempDumpRow) <> "" Or dumpSheet.Range("K" & tempDumpRow) <> ""
    
    
    'Reset exist flag
    isExist = False

    'loop all row in ICD sheet
    For tempICDRow = 1 To icdRowCount Step 1

        'If row is not finished for checking.
        If UBound(Filter(finishedICD, tempICDRow)) < 0 Then

            'If all cell are equal
            If dumpSheet.Range("A" & tempDumpRow) = icdSheet.Range("A" & tempICDRow) And _
               dumpSheet.Range("B" & tempDumpRow) = icdSheet.Range("B" & tempICDRow) And _
               dumpSheet.Range("C" & tempDumpRow) = icdSheet.Range("C" & tempICDRow) And _
               dumpSheet.Range("D" & tempDumpRow) = icdSheet.Range("D" & tempICDRow) And _
               dumpSheet.Range("E" & tempDumpRow) = icdSheet.Range("E" & tempICDRow) And _
               dumpSheet.Range("F" & tempDumpRow) = icdSheet.Range("F" & tempICDRow) And _
               dumpSheet.Range("G" & tempDumpRow) = icdSheet.Range("G" & tempICDRow) And _
               dumpSheet.Range("H" & tempDumpRow) = icdSheet.Range("H" & tempICDRow) And _
               dumpSheet.Range("I" & tempDumpRow) = icdSheet.Range("I" & tempICDRow) And _
               dumpSheet.Range("I" & tempDumpRow) = icdSheet.Range("J" & tempICDRow) And _
               dumpSheet.Range("J" & tempDumpRow) = icdSheet.Range("K" & tempICDRow) Then
               
                'Set true to exist flag
                isExist = True

                'Store finished row
                finishedICD(finishedICDIndex) = tempICDRow

                finishedICDIndex = finishedICDIndex + 1

                'exit looping
                Exit For

            End If

        End If

    Next tempICDRow

    'Show result
    outputSheet.Range("A" & outputRow) = dumpSheet.Range("A" & tempDumpRow)
    outputSheet.Range("B" & outputRow) = dumpSheet.Range("B" & tempDumpRow)
    outputSheet.Range("C" & outputRow) = dumpSheet.Range("C" & tempDumpRow)
    outputSheet.Range("D" & outputRow) = dumpSheet.Range("D" & tempDumpRow)
    outputSheet.Range("E" & outputRow) = dumpSheet.Range("E" & tempDumpRow)
    outputSheet.Range("F" & outputRow) = dumpSheet.Range("F" & tempDumpRow)
    outputSheet.Range("G" & outputRow) = dumpSheet.Range("G" & tempDumpRow)
    outputSheet.Range("H" & outputRow) = dumpSheet.Range("H" & tempDumpRow)
    outputSheet.Range("I" & outputRow) = dumpSheet.Range("I" & tempDumpRow)
    outputSheet.Range("J" & outputRow) = dumpSheet.Range("J" & tempDumpRow)
    outputSheet.Range("K" & outputRow) = dumpSheet.Range("K" & tempDumpRow)

    If isExist Then
        outputSheet.Range("L" & outputRow) = ""
    Else
        outputSheet.Range("L" & outputRow) = "Item found in ""BAZA OLD"" but not in ""Saldeo"""
    End If

    'increase output row
    outputRow = outputRow + 1

    'go next row
    tempDumpRow = tempDumpRow + 1

Loop

'loop all row in ICD sheet
For tempICDRow = 1 To icdRowCount Step 1

    'If row is not finished for checking.
    If UBound(Filter(finishedICD, tempICDRow)) < 0 Then

        'Show result
        outputSheet.Range("A" & outputRow) = icdSheet.Range("A" & tempICDRow)
        outputSheet.Range("B" & outputRow) = icdSheet.Range("B" & tempICDRow)
        outputSheet.Range("C" & outputRow) = icdSheet.Range("C" & tempICDRow)
        outputSheet.Range("D" & outputRow) = icdSheet.Range("D" & tempICDRow)
        outputSheet.Range("E" & outputRow) = icdSheet.Range("E" & tempICDRow)
        outputSheet.Range("F" & outputRow) = icdSheet.Range("F" & tempICDRow)
        outputSheet.Range("G" & outputRow) = icdSheet.Range("G" & tempICDRow)
        outputSheet.Range("H" & outputRow) = icdSheet.Range("H" & tempICDRow)
        outputSheet.Range("I" & outputRow) = icdSheet.Range("I" & tempICDRow)
        outputSheet.Range("J" & outputRow) = icdSheet.Range("J" & tempICDRow)
        outputSheet.Range("K" & outputRow) = icdSheet.Range("K" & tempICDRow)
        
        'outputSheet.Range("P" & outputRow) = "Item found in ""Baza Faktur Saldeo"" but not in ""BAZA OLD"""
        outputSheet.Range("L" & outputRow) = "NEW INVOICE"


        'increase output row
        outputRow = outputRow + 1

    End If

Next tempICDRow

结束子

通过连接单元格值为每一行创建一个键字符串,并使用字典对象比较 2 个工作表上的键

Sub Comparison()

    Const COL_MATCH = 11
    Const SEP = "~"

    Dim dumpSheet As Worksheet, icdSheet As Worksheet, outputSheet As Worksheet
    Dim rng As Range
    Dim i As Long, lastrow As Long, outrow As Long
    
    'Set sheets
    With ThisWorkbook
        Set dumpSheet = .Sheets("BAZA OLD")
        Set icdSheet = .Sheets("Master data")
        Set outputSheet = .Sheets("Output")
    End With
    
    ' dump sheet
    Dim dict As Object, k as String, ar
    Set dict = CreateObject("Scripting.Dictionary")
    With dumpSheet
        lastrow = .UsedRange.Row + .UsedRange.Rows.Count - 1
        For i = 2 To lastrow
            Set rng = .Range("A" & i).Resize(, COL_MATCH) 'A-K
            ar = Application.Transpose(rng)
            k = Join(Application.Transpose(ar), SEP) ' key
            ' check not blank
            If Len(k) >= COL_MATCH Then
                If dict.exists(k) Then
                    MsgBox "Key not unique '" & k & "'", vbCritical, .Name & " Row " & i & " and Row " & dict(k)
                Else
                    dict.Add k, i
                End If
            End If
        Next
    End With
    
    ' compare master to dump sheet
    outrow = 2
    With icdSheet
        lastrow = .UsedRange.Row + .UsedRange.Rows.Count - 1
        For i = 2 To lastrow
            Set rng = .Range("A" & i).Resize(, COL_MATCH)
            ar = Application.Transpose(rng) 'A-K
            k = Join(Application.Transpose(ar), SEP) ' key
            If Len(k) >= COL_MATCH And Not dict.exists(k) Then
                outputSheet.Range("A" & outrow).Resize(, COL_MATCH).Value2 = rng.Value2
                outputSheet.Cells(outrow, COL_MATCH + 1) = "new invoice"
                outrow = outrow + 1
            End If
        Next
    End With
    
    ' result
    If outrow = 2 Then
        MsgBox "No new invoices", vbInformation
    Else
        MsgBox outrow - 2 & " new invoices", vbInformation
    End If
    
End Sub