如何将每个唯一 ID 的不同文本字符串合并为每个 ID 的 1 行

How to combine different strings of text for each unique ID into 1 line for each ID

下图显示了我拥有的数据示例 table(A 到 B 列),右边的 table(e 到 f 列)显示了我想要的输出。

我只是不知道从哪里开始,因为我需要为每个单独的 ID 将所有不同的数据项组合在一起。 每个 ID 都是唯一的,但可以有任意多个相同的 ID。同一个ID可以多次复制数据,也可以包含不同的数据。

如果超过 1 个数据项,数据项将始终以逗号分隔,并且可以是多种长度的数字和字母的混合(即使我的示例显示单个字符)。所需数据始终位于每个逗号之间,其中存在逗号(即,单个数据项除外。)

ID 总是数字。

因此,我正在努力为 Excel 2010(作为 VBA 的新手)提出一些 VBA 代码来实现此要求。任何帮助将不胜感激?

这会根据提供的输入数据生成组合输出。该代码使用 Dictionaries 来帮助获取唯一的值集。

Option Explicit

Public Sub Test()

    Dim sourceWksht As Worksheet
    Set sourceWksht = Application.ActiveWorkbook.Worksheets.("Sheet1")
    
    Dim rawData As Variant
    rawData = sourceWksht.Range("A2:B12").Value2
    
    Dim rawInputDictionary As Dictionary
    Set rawInputDictionary = New Dictionary
    
    Dim csvValue As String
    
    Dim rawIndex As Long
    For rawIndex = LBound(rawData, 1) To UBound(rawData, 1)
        csvValue = Trim$(rawData(rawIndex, 2))
        If Not rawInputDictionary.Exists(rawData(rawIndex, 1)) And Len(csvValue) > 0 Then
            rawInputDictionary.Add rawData(rawIndex, 1), csvValue
        ElseIf Len(csvValue) > 0 Then
            rawInputDictionary.Item(rawData(rawIndex, 1)) _
                = rawInputDictionary.Item(rawData(rawIndex, 1)) & "," & csvValue
        End If
    Next
    
    GenerateOutput rawInputDictionary, sourceWksht
    
End Sub

Private Sub GenerateOutput(ByVal rawInputDictionary As Dictionary, ByVal wksht As Worksheet)
    
    Dim outputArray As Variant
    ReDim outputArray(1 To rawInputDictionary.Count, 1 To 2)
    
    Dim outputArrayIndex As Long
    outputArrayIndex = 1
    
    Dim idKey As Variant
    For Each idKey In rawInputDictionary.Keys
        outputArray(outputArrayIndex, 1) = idKey
        outputArray(outputArrayIndex, 2) = GenerateCombinedData(rawInputDictionary.Item(idKey))
        outputArrayIndex = outputArrayIndex + 1
    Next
    
    Dim outputRange As Range
    Set outputRange = wksht.Range("E2:F" & CStr(rawInputDictionary.Count + 1))
    outputRange.Value = outputArray
End Sub

Private Function GenerateCombinedData(ByVal idValues As String) As String
    
    Dim combinedData As String
    combinedData = vbNullString
    
    Dim outputDictionary As Dictionary
    Set outputDictionary = New Dictionary
    
    Dim valuesArrayIndex As Long
    
    Dim valuesArray As Variant
    valuesArray = Split(idValues, ",")
    For valuesArrayIndex = LBound(valuesArray) To UBound(valuesArray)
        If Not outputDictionary.Exists(valuesArray(valuesArrayIndex)) Then
            combinedData = combinedData & valuesArray(valuesArrayIndex) & ","
            'Use the outputDictionary 'Keys' to ignore duplicate values
            outputDictionary.Add valuesArray(valuesArrayIndex), ""
        End If
    Next

    'Trim the trailing comma
    combinedData = Left$(combinedData, Len(combinedData) - 1)
    
    GenerateCombinedData = combinedData
End Function


请使用下一种方式。正如我在评论中所说,它使用字典来提取唯一键和其他四个数组来保留中间值并构建最终值。以下代码能够处理两种(可能的)分隔符:逗号“,”和逗号后跟一个或多个空格“、”、“、”、“、”。它只在内存中工作并且应该非常快,即使对于大范围也是如此:

 Sub extractUniqueIDsUniqueData()
   Dim sh As Worksheet, lastR As Long, arr, arrItem, arrIt, arrFin
   Dim i As Long, mtch, El, dict As Object
   
   Set sh = ActiveSheet
   lastR = sh.Range("A" & sh.rows.count).End(xlUp).row
   arr = sh.Range("A2:B" & lastR).value 'place the range in an array for faster iteration

   Set dict = CreateObject("Scripting.Dictionary")
   'fill the dictionary:
   For i = 1 To UBound(arr)
        If Not dict.Exists(arr(i, 1)) Then
            arrItem = Split(Replace(arr(i, 2), " ", ""), ",") 'replace spaces before splitting
            dict.Add arr(i, 1), arrItem
        Else
            If arr(i, 2) <> "" Then  'skip the empty strings in B:B
                arrIt = Split(Replace(arr(i, 2), " ", ""), ",")
                arrItem = dict(arr(i, 1))
                If UBound(arrItem) = -1 Then 'if no any element in the item array
                    arrItem = arrIt                  'use the existing processed B:B value instead
                Else
                    For Each El In arrIt
                        mtch = Application.match(El, arrItem, 0)
                        If IsError(mtch) Then 'not existing in the item array
                            ReDim Preserve arrItem(UBound(arrItem) + 1)
                            arrItem(UBound(arrItem)) = El 'add the new element in the item array
                        End If
                    Next El
                End If
                dict(arr(i, 1)) = arrItem    'place the array back as dictionary item
            End If
        End If
   Next i
   
   'Process the dictionary content:
   ReDim arrFin(1 To dict.count + 1, 1 To 2) 'redim the array to keep all dictionary elements
   
   'fill the header in the final array:
   arrFin(1, 1) = "FinalList": arrFin(1, 2) = "Combined DATA"
   'fill the rest of the final array rows
   For i = 0 To dict.count - 1
        arrFin(i + 2, 1) = dict.Keys()(i)
        arrFin(i + 2, 2) = Join(dict.items()(i), ", ")
   Next i
   'drop the final array content at once:
   With sh.Range("E1").Resize(UBound(arrFin), UBound(arrFin, 2))
        .value = arrFin
        .EntireColumn.AutoFit
   End With
   MsgBox "Ready..."
 End Sub

使用字典中的字典合并唯一数据和分隔数据

Option Explicit

Sub CombineData()
    
    ' Source
    Const sName As String = "Sheet1"
    Const sDelimiter As String = ", "
    ' Destination
    Const dName As String = "Sheet1"
    Const dFirstCellAddress As String = "E2"
    Const dDelimiter As String = ", "
    
    ' Source range to an array.
    
    Dim Data As Variant
    Dim rCount As Long
    
    With ThisWorkbook.Worksheets(sName).Range("A1").CurrentRegion
        rCount = .Rows.Count - 1
        If rCount < 1 Then Exit Sub ' no data or only headers
        Data = .Resize(rCount, 2).Offset(1).Value
    End With
    
    ' Array to a dictionary of dictionaries.
    
    Dim dict As Object: Set dict = CreateObject("Scripting.Dictionary")
    dict.CompareMode = vbTextCompare
    
    Dim Key As Variant
    Dim Item As Variant
    Dim r As Long
    Dim n As Long
    
    For r = 1 To rCount
        Item = CStr(Data(r, 2))
        If Not IsError(Item) Then
            If Len(Item) > 0 Then
                Key = Data(r, 1)
                If Not IsError(Key) Then
                    If Len(Key) > 0 Then
                        Item = Split(Item, sDelimiter)
                        If Not dict.Exists(Key) Then
                            Set dict(Key) = CreateObject("Scripting.Dictionary")
                        End If
                        For n = 0 To UBound(Item)
                            dict(Key)(Item(n)) = Empty
                        Next n
                    End If
                End If
            End If
        End If
    Next r

    rCount = dict.Count
    If rCount = 0 Then Exit Sub ' only error values or blanks
     
    ' Dictionary of dictionaries to the array.
    
    ReDim Data(1 To rCount, 1 To 2)
    r = 0
    
    For Each Key In dict.Keys
        r = r + 1
        Data(r, 1) = Key
        Data(r, 2) = Join(dict(Key).Keys, dDelimiter)
    Next Key
    
    ' Array to the destination range.
    
    With ThisWorkbook.Worksheets(dName).Range(dFirstCellAddress).Resize(, 2)
        .Resize(rCount).Value = Data
        .Resize(.Worksheet.Rows.Count - .Row - rCount + 1).Offset(rCount).Clear
    End With

    MsgBox "Data combined.", vbInformation

End Sub