比较两个 excel 文件的快速方法?

Fast way to compare two excel files?

我想比较 2 excels 个文件 [只有 1 个 sheet] 有 10-15 列和行将超过 30K。我们得到了一个 excel 宏文件,它在 5-10 分钟内完成了比较。此宏的局限性在于它一次只能比较 2-3 列。所以每次我们都需要多次运行这个宏,这是一个耗时的过程。所以我创建了一个实用程序文件 [.vbs 文件],它一次性执行此任务,但大约需要 1-3 小时。 有没有其他方法可以在短时间内一次性完成这种比较?

startTime=Timer()
Set objExcel=Createobject("Excel.application")
objExcel.Visible=True
Set objWorkbook=objExcel.Workbooks.Open("E:\QTP trial version\Data.xls")

'Set deleteAnalysis_CopySheet=objWorkbook.sheets("Analysis_Copy")
'deleteAnalysis_CopySheet.delete
'Set deleteSummarySheet=objWorkbook.sheets("Summary")
'deleteSummarySheet.delete

Set objAnalysis_Copy=objWorkbook.sheets.add
objAnalysis_Copy.name="Analysis_Copy"

Set objSummary=objWorkbook.sheets.add
objSummary.name="Summary"
objSummary.Cells(1,1)="Analysis Row Count"
objSummary.Cells(2,1)="Reporting Row Count"
objSummary.Cells(3,1)="Analysis Column Count"
objSummary.Cells(4,1)="Reporting Column Count"
objSummary.Cells(5,1)="Difference of Row Count"
objSummary.Cells(6,1)="Difference of Column Count"
objSummary.Cells(7,1)="False Count"

' ------------------------1st Check - Verify the position of ''Metrics' in Analysis and Reporting tab. It must be same---------------------
'Get the control of Analysis tab
Set objAnalysis=objExcel.Worksheets.Item("Analysis")
intAnalysisRowCount=objAnalysis.Usedrange.rows.count
objSummary.Cells(1,2)=intAnalysisRowCount
intAnalysisColCount=objAnalysis.Usedrange.Columns.count
objSummary.Cells(3,2)=intAnalysisColCount

'Get Column number of 'Metric' Column from Analysis tab
For intMetricAnalysis=1 to intAnalysisColCount
        If(Trim(Lcase(objAnalysis.Cells(1,intMetricAnalysis)))=Trim(Lcase("Metrics"))) Then
            Exit for
        End If
Next

'Get all Analysis columns in 1 string
strAnalysisColumnOrder=""
For intAnalysisColumnOrder=1 to intAnalysisColCount
        strAnalysisColumnOrder=strAnalysisColumnOrder&"*"&objAnalysis.Cells(1,intAnalysisColumnOrder)               

        If(intAnalysisColumnOrder=1) then
            strAnalysisColumnOrder=Replace(strAnalysisColumnOrder,"*","")
        End If      
Next

Set objReporting=objExcel.Worksheets.Item("Reporting")
intReportingRowCount=objReporting.Usedrange.rows.count
objSummary.Cells(2,2)=intReportingRowCount
intReportingColCount=objReporting.Usedrange.Columns.count
objSummary.Cells(4,2)=intReportingColCount

''Get Column number of 'Metric' Column from Reporting tab
For intMetricReporting=1 to intReportingColCount
        If(Trim(Lcase(objReporting.Cells(1,intMetricReporting)))=Trim(Lcase("Metrics"))) Then
            Exit for
        End If
Next

'Get all Reporting columns in 1 string
strReportingColumnOrder=""
For intReportingColumnOrder=1 to intAnalysisColCount
            strReportingColumnOrder=strReportingColumnOrder&"*"&objReporting.Cells(1,intReportingColumnOrder)               

        If(intReportingColumnOrder=1) then
            strReportingColumnOrder=Replace(strReportingColumnOrder,"*","")
        End If      
Next


''Metric' column number must be same
If(intMetricAnalysis<>intMetricReporting) then
    msgbox "Merics column is  at  "&intMetricAnalysis&" position in 'Analysis' Tab And  at "&intMetricReporting&" position in 'Reporting' tab. 'Metrics' column should be at same position in both tab."
    strMetricsFlag=False
Else
    strMetricsFlag=True
End IF

'-----------2nd Check, Verify count of columns in 'Analysis'  And 'Reporting' tab . It Must be same
If  intAnalysisColCount<>intReportingColCount Then
    msgbox "Column count of 'Reporting' Tab is not same as of 'Analysis tab'."
    strAnalysisColCount=False
Else
    strAnalysisColCount=True
End If

''---------------3rd Check , Verify Order of columns in 'Analysis'  And 'Reporting' tab . It Must be same
If Trim(Lcase(strAnalysisColumnOrder))<>Trim(Lcase(strReportingColumnOrder)) then
    msgbox "Column order of 'Reporting' Tab is not same as of 'Analysis tab'. Reporting column order should be  "&strAnalysisColumnOrder
    strAnalysisColumnOrderFlag=False
Else
    strAnalysisColumnOrderFlag=True
End IF

'Creare 'Analysis_Copy' tab and add headers
Set objAnalysisCopy=objExcel.Worksheets.Item("Analysis_Copy")

strFirstCoulmn_AggKeys=""
For intHeaderAggkey=1 to intMetricAnalysis-1
        strFirstCoulmn_AggKeys=strFirstCoulmn_AggKeys&"*"&objAnalysis.Cells(1,intHeaderAggkey)              

        If(intHeaderAggkey=1) then
            strFirstCoulmn_AggKeys=Replace(strFirstCoulmn_AggKeys,"*","")
        End If      
Next

objAnalysisCopy.Cells(1,1)=strFirstCoulmn_AggKeys

strSecondCoulmn_AnalysisMetrics=""

For intHeaderAnalysisMetrics=intMetricAnalysis+1 to intAnalysisColCount
        strSecondCoulmn_AnalysisMetrics=strSecondCoulmn_AnalysisMetrics&"*"&objAnalysis.Cells(1,intHeaderAnalysisMetrics)   

        If(intHeaderAnalysisMetrics=intMetricAnalysis+1 ) then
            strSecondCoulmn_AnalysisMetrics=Replace(strSecondCoulmn_AnalysisMetrics,"*","")
        End If                          
Next

objAnalysisCopy.Cells(1,2)="Analysis_"&strSecondCoulmn_AnalysisMetrics
objAnalysisCopy.Cells(1,3)="Reporting_"&strSecondCoulmn_AnalysisMetrics
objAnalysisCopy.Cells(1,4)="Status"

objWorkbook.Save

'$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$

If  strAnalysisColumnOrderFlag=False OR strMetricsFlag=False OR strAnalysisColCount=False Then

            msgbox "So Data Comparision can not be done"
            objWorkbook.Save
            objWorkbook.Close
            objExcel.Quit

Else

        intFalseCount=0

        For intAnalysisRow=2 to intAnalysisRowCount
        
        '   ------ Get the control of  ''Analysis' tab and the string of  Aggrecate Keys [strAnalysisAggrData]  and  respective metrics [strAnalysisMetricsData]
                      Set objAnalysis=objExcel.Worksheets.Item("Analysis")
                       ' Append all data of  each row  which is before 'Metrics' column 
                        strAnalysisAggrData=""
                        For intAnalysisColumn=1 to intMetricAnalysis-1
                                strAnalysisAggrData=strAnalysisAggrData&"*"&objAnalysis.Cells(intAnalysisRow,intAnalysisColumn)             
                    
                                If(intAnalysisColumn=1) then
                                    strAnalysisAggrData=Replace(strAnalysisAggrData,"*","")
                                End If      
                        Next
                    
        '              ' Append all data of  each row  which is after 'Metrics' column 
                        strAnalysisMetricsData=""
                        For intFromMetric=intMetricAnalysis+1 to intAnalysisColCount

                                strAnalysisMetricsData=strAnalysisMetricsData&"*"&objAnalysis.Cells(intAnalysisRow,intFromMetric)   
                    
                                If(intFromMetric=intMetricAnalysis+1 ) then
                                    strAnalysisMetricsData=Replace(strAnalysisMetricsData,"*","")
                                End If                          
                        Next
                    
                '   ------ Get the control of  ''Reporting' tab and the string of  Aggrecate Keys [strAnalysisAggrData]  and  respective metrics [strAnalysisMetricsData]
                    Set objReporting=objExcel.Worksheets.Item("Reporting")
            
                    For  intReportingRow=1 to intReportingRowCount
            
                            ' Append all data of  each row  which is before 'Metrics' column 
                            strReportingAggrData=""
                            For intBeforeMetricReporting=1 to intMetricReporting-1
                                    strReportingAggrData=strReportingAggrData&"*"&objReporting.Cells(intReportingRow,intBeforeMetricReporting)              
                        
                                    If(intBeforeMetricReporting=1) then
                                        strReportingAggrData=Replace(strReportingAggrData,"*","")
                                    End If      
                            Next
                        
                            ' Append all data of  each row  which is after 'Metrics' column 
                            strReportingMetricsData=""
                            For intFromReportingMetric=intMetricReporting+1 to intReportingColCount

                                  strReportingMetricsData=strReportingMetricsData&"*"&objReporting.Cells(intReportingRow,intFromReportingMetric)    
                        
                                    If(intFromReportingMetric=intMetricReporting+1 ) then
                                        strReportingMetricsData=Replace(strReportingMetricsData,"*","")
                                    End If                          
                            Next

        '------------------------------------------------------------  Actual Comparision will be from here ------------------------------------------
            
                            If  Trim(LCase(strAnalysisAggrData))=Trim(LCase(strReportingAggrData)) Then
        
                                    objAnalysisCopy.Cells(intAnalysisRow,1)=strAnalysisAggrData
                                    objAnalysisCopy.Cells(intAnalysisRow,2)=strAnalysisMetricsData
                                    objAnalysisCopy.Cells(intAnalysisRow,3)=strReportingMetricsData

                                    'Compare Metrics Data
                                    If  Trim(LCase(strAnalysisMetricsData))=Trim(LCase(strReportingMetricsData)) Then                                   
                                            objAnalysisCopy.Cells(intAnalysisRow,4)="PASS"
                                            objAnalysisCopy.Cells(intAnalysisRow,4).font.color=vbGreen  
                                    Else
                                            objAnalysisCopy.Cells(intAnalysisRow,4)="FAIL"
                                            intFalseCount=intFalseCount+1
                                            objAnalysisCopy.Cells(intAnalysisRow,4).font.color=vbRed    
                                    End If

                                    Exit For
                                
                            End If                          
        
                    Next        
        Next

            objSummary.Cells(5,2)=intAnalysisRowCount-intReportingRowCount
            objSummary.Cells(6,2)=intAnalysisColCount-intReportingColCount
            objSummary.Cells(7,2)=intFalseCount
            objSummary.Cells(7,2).font.color=vbRed      

            objWorkbook.Save
            objWorkbook.Close
            objExcel.Quit

            EndTime=Timer()

            TotalTime=EndTime-startTime
            
            msgbox "Data Comparision is Completed. Comparision time is "&TotalTime&"Secs"

End If

使用 dictionary 可以避免嵌套循环,每个 sheet 只扫描一次。例如作为 VBA 宏(未测试)

Sub compare()

     Dim wb As Workbook
     Dim ws(2) As Worksheet, wsSum As Worksheet, wsCopy As Worksheet
     Dim rowCount(2) As Long, colCount(2) As Integer, colMetric(2) As Integer
     Dim colsMetric(2) As String, colsAll(2) As String, colsKeys(2) As String
     Dim bMetricsFlag As Boolean, bColCountFlag As Boolean, bColOrderFlag As Boolean
     Dim i As Long, ar, msg As String, intFalseCount As Long
     
     Dim t0 as Single
     t0 = Timer

     Set wb = ThisWorkbook
     Set ws(1) = wb.Sheets("Analysis")
     Set ws(2) = wb.Sheets("Reporting")
     Set wsSum = wb.Sheets("Summary")
     wsSum.Cells.Clear
     wsSum.Range("A1:A7") = WorksheetFunction.Transpose(Array("Analysis Row Count", _
           "Reporting Row Count", "Analysis Column Count", "Reporting Column Count", _
           "Difference of Row Count", "Difference of Column Count", "False Count"))

     Set wsCopy = wb.Sheets("Analysis_Copy")
     wsCopy.Cells.Clear

     ' get stats for each sheet 1-Analyis 2=Reporting
     For i = 1 To 2
         ar = Stats(ws(i))
         rowCount(i) = ar(0)
         colCount(i) = ar(1)
         colMetric(i) = ar(2)
         colsAll(i) = ar(3)
         colsMetric(i) = ar(4)
         colsKeys(i) = ar(5)
     Next

     ' summary
     With wsSum
         .Cells(1, 2) = rowCount(1)
         .Cells(2, 2) = rowCount(2)
         .Cells(3, 2) = colCount(1)
         .Cells(4, 2) = colCount(2)
     End With

     ' check stats
     'Metric' column number must be same
     If colMetric(1) = 0 Or colMetric(2) = 0 Or colMetric(1) <> colMetric(2) Then
         msg = "Metrics columns not the same or missing : " & vbCr & _
         "Analysis : " & colMetric(1) & vbCr & _
         "Reporting : " & colMetric(2)
         MsgBox msg, vbCritical
         bMetricsFlag = False
     Else
         bMetricsFlag = True
     End If

     ' Verify count of columns
     If colCount(1) <> colCount(2) Then
         msg = "Column counts not the same : " & vbCr & _
         "Analysis : " & colCount(1) & vbCr & _
         "Reporting : " & colCount(2)
         MsgBox msg, vbCritical
         bColCountFlag = False
     Else
         bColCountFlag = True
     End If

    'Verify Order of columns
     If colsAll(1) <> colsAll(2) Then
         msg = "Column order not the same : " & vbCr & _
         "Analysis : " & colsAll(1) & vbCr & _
         "Reporting : " & colsAll(2)
         MsgBox msg, vbCritical
         bColOrderFlag = False
     Else
         bColOrderFlag = True
     End If

     With wsCopy
         .Cells(1, 1) = colsKeys(1)
         .Cells(1, 2) = "Analysis_" & colsMetric(1)
         .Cells(1, 3) = "Reporting_" & colsMetric(2)
         .Cells(1, 4) = "Status"
     End With

     ' checks OK ?
     If bColOrderFlag And bMetricsFlag And bColCountFlag Then
         ' ok
     Else
         MsgBox "So Data Comparision can not be done", vbCritical
         Exit Sub
     End If

     ' start comparison
     Dim dict As Object, m As Long, c As Long, s As String
     Dim sKey As String, sMetric As String
     Set dict = CreateObject("Scripting.Dictionary")

     ' scan Reporting sheet to build dictionary
     m = colMetric(2)
     For i = 1 To rowCount(2)
         'join cols up to and after metric col
         sMetric = "": sKey = ""
         For c = 1 To colCount(2)
             s = Trim(ws(2).Cells(i, c))
             If c < m Then
                 If sMetric <> "" Then sMetric = sMetric & "*"
                 sMetric = sMetric & s
             ElseIf c > m Then
                 If sKey <> "" Then sKey = sKey & "*"
                 sKey = sKey & s
             End If
         Next
         dict(sKey) = sMetric
     Next

    ' scan Analysis sheet to compare dictionary
     m = colMetric(1)
     For i = 2 To rowCount(1)
         'join cols up to and after metric col
         sMetric = "": sKey = ""
         For c = 1 To colCount(1)
             s = Trim(ws(1).Cells(i, c))
             If c < m Then
                 If sMetric <> "" Then sMetric = sMetric & "*"
                 sMetric = sMetric & s
             ElseIf c > m Then
                 If sKey <> "" Then sKey = sKey & "*"
                 sKey = sKey & s
             End If
         Next

         ' result
         wsCopy.Cells(i, 1) = sKey
         wsCopy.Cells(i, 2) = sMetric
         wsCopy.Cells(i, 3) = dict(sKey)

         ' pass or fail
         If sMetric = dict(sKey) Then
             wsCopy.Cells(i, 4) = "PASS"
             wsCopy.Cells(i, 4).Font.Color = vbGreen
         Else
             wsCopy.Cells(i, 4) = "FAIL"
             wsCopy.Cells(i, 4).Font.Color = vbRed
             intFalseCount = intFalseCount + 1
         End If
     Next

     With wsSum
         .Cells(5, 2) = rowCount(1) - rowCount(2)
         .Cells(6, 2) = colCount(1) - colCount(2)
         .Cells(7, 2) = intFalseCount
         .Cells(7, 2).Font.Color = vbRed
     End With

     MsgBox i - 2 & " rows scanned " & vbCrLf & _
         intFalseCount & " FAILED", vbInformation, Int(Timer - t0) & "seconds"

End Sub

Function Stats(ws As Worksheet) As Variant

     Dim c As Integer, ar(5) As Variant, s As String
     ar(0) = ws.UsedRange.Rows.Count
     ar(1) = ws.UsedRange.Columns.Count
     ar(2) = 0 'metric column
     ar(3) = "" ' col aggregated
     ar(4) = "" ' cols upto not including metric
     ar(5) = "" ' cols after metric

     For c = 1 To ar(1)
        s = LCase(Trim(ws.Cells(1, c)))
        If s = "metric" Then
            ar(2) = c
        End If

        ' aggregate headers before/after metric
        If ar(2) = 0 Then
            If ar(4) <> "" Then ar(4) = ar(4) & "*"
            ar(4) = ar(4) & s
        ElseIf c > ar(2) Then
            If ar(5) <> "" Then ar(5) = ar(5) & "*"
            ar(5) = ar(5) & s
        End If

        ' aggregate all
        If ar(3) <> "" Then ar(3) = ar(3) & "*"
        ar(3) = ar(3) & s
     Next
     Stats = ar
End Function

测试数据生成器

Sub testdata()
   Dim ws As Worksheet, n, r, c, ar
   ar = Array("", "Analysis", "Reporting")
   For n = 1 To 2
        Set ws = Sheets(ar(n))
        For r = 1 To 30000
           For c = 1 To 15
               ws.Cells(r, c) = Chr(64 + c) & r & "_abcdefghijklmnopqrstuvwxyz_"
           Next
        Next
        ws.Cells(1, 10) = "metric" ' col J
   Next
   MsgBox "test data created"
End Sub