如何将每个唯一 ID 的不同文本字符串合并为每个 ID 的 1 行
How to combine different strings of text for each unique ID into 1 line for each ID
下图显示了我拥有的数据示例 table(A 到 B 列),右边的 table(e 到 f 列)显示了我想要的输出。
我只是不知道从哪里开始,因为我需要为每个单独的 ID 将所有不同的数据项组合在一起。
每个 ID 都是唯一的,但可以有任意多个相同的 ID。同一个ID可以多次复制数据,也可以包含不同的数据。
如果超过 1 个数据项,数据项将始终以逗号分隔,并且可以是多种长度的数字和字母的混合(即使我的示例显示单个字符)。所需数据始终位于每个逗号之间,其中存在逗号(即,单个数据项除外。)
ID 总是数字。
因此,我正在努力为 Excel 2010(作为 VBA 的新手)提出一些 VBA 代码来实现此要求。任何帮助将不胜感激?
这会根据提供的输入数据生成组合输出。该代码使用 Dictionaries
来帮助获取唯一的值集。
Option Explicit
Public Sub Test()
Dim sourceWksht As Worksheet
Set sourceWksht = Application.ActiveWorkbook.Worksheets.("Sheet1")
Dim rawData As Variant
rawData = sourceWksht.Range("A2:B12").Value2
Dim rawInputDictionary As Dictionary
Set rawInputDictionary = New Dictionary
Dim csvValue As String
Dim rawIndex As Long
For rawIndex = LBound(rawData, 1) To UBound(rawData, 1)
csvValue = Trim$(rawData(rawIndex, 2))
If Not rawInputDictionary.Exists(rawData(rawIndex, 1)) And Len(csvValue) > 0 Then
rawInputDictionary.Add rawData(rawIndex, 1), csvValue
ElseIf Len(csvValue) > 0 Then
rawInputDictionary.Item(rawData(rawIndex, 1)) _
= rawInputDictionary.Item(rawData(rawIndex, 1)) & "," & csvValue
End If
Next
GenerateOutput rawInputDictionary, sourceWksht
End Sub
Private Sub GenerateOutput(ByVal rawInputDictionary As Dictionary, ByVal wksht As Worksheet)
Dim outputArray As Variant
ReDim outputArray(1 To rawInputDictionary.Count, 1 To 2)
Dim outputArrayIndex As Long
outputArrayIndex = 1
Dim idKey As Variant
For Each idKey In rawInputDictionary.Keys
outputArray(outputArrayIndex, 1) = idKey
outputArray(outputArrayIndex, 2) = GenerateCombinedData(rawInputDictionary.Item(idKey))
outputArrayIndex = outputArrayIndex + 1
Next
Dim outputRange As Range
Set outputRange = wksht.Range("E2:F" & CStr(rawInputDictionary.Count + 1))
outputRange.Value = outputArray
End Sub
Private Function GenerateCombinedData(ByVal idValues As String) As String
Dim combinedData As String
combinedData = vbNullString
Dim outputDictionary As Dictionary
Set outputDictionary = New Dictionary
Dim valuesArrayIndex As Long
Dim valuesArray As Variant
valuesArray = Split(idValues, ",")
For valuesArrayIndex = LBound(valuesArray) To UBound(valuesArray)
If Not outputDictionary.Exists(valuesArray(valuesArrayIndex)) Then
combinedData = combinedData & valuesArray(valuesArrayIndex) & ","
'Use the outputDictionary 'Keys' to ignore duplicate values
outputDictionary.Add valuesArray(valuesArrayIndex), ""
End If
Next
'Trim the trailing comma
combinedData = Left$(combinedData, Len(combinedData) - 1)
GenerateCombinedData = combinedData
End Function
请使用下一种方式。正如我在评论中所说,它使用字典来提取唯一键和其他四个数组来保留中间值并构建最终值。以下代码能够处理两种(可能的)分隔符:逗号“,”和逗号后跟一个或多个空格“、”、“、”、“、”。它只在内存中工作并且应该非常快,即使对于大范围也是如此:
Sub extractUniqueIDsUniqueData()
Dim sh As Worksheet, lastR As Long, arr, arrItem, arrIt, arrFin
Dim i As Long, mtch, El, dict As Object
Set sh = ActiveSheet
lastR = sh.Range("A" & sh.rows.count).End(xlUp).row
arr = sh.Range("A2:B" & lastR).value 'place the range in an array for faster iteration
Set dict = CreateObject("Scripting.Dictionary")
'fill the dictionary:
For i = 1 To UBound(arr)
If Not dict.Exists(arr(i, 1)) Then
arrItem = Split(Replace(arr(i, 2), " ", ""), ",") 'replace spaces before splitting
dict.Add arr(i, 1), arrItem
Else
If arr(i, 2) <> "" Then 'skip the empty strings in B:B
arrIt = Split(Replace(arr(i, 2), " ", ""), ",")
arrItem = dict(arr(i, 1))
If UBound(arrItem) = -1 Then 'if no any element in the item array
arrItem = arrIt 'use the existing processed B:B value instead
Else
For Each El In arrIt
mtch = Application.match(El, arrItem, 0)
If IsError(mtch) Then 'not existing in the item array
ReDim Preserve arrItem(UBound(arrItem) + 1)
arrItem(UBound(arrItem)) = El 'add the new element in the item array
End If
Next El
End If
dict(arr(i, 1)) = arrItem 'place the array back as dictionary item
End If
End If
Next i
'Process the dictionary content:
ReDim arrFin(1 To dict.count + 1, 1 To 2) 'redim the array to keep all dictionary elements
'fill the header in the final array:
arrFin(1, 1) = "FinalList": arrFin(1, 2) = "Combined DATA"
'fill the rest of the final array rows
For i = 0 To dict.count - 1
arrFin(i + 2, 1) = dict.Keys()(i)
arrFin(i + 2, 2) = Join(dict.items()(i), ", ")
Next i
'drop the final array content at once:
With sh.Range("E1").Resize(UBound(arrFin), UBound(arrFin, 2))
.value = arrFin
.EntireColumn.AutoFit
End With
MsgBox "Ready..."
End Sub
使用字典中的字典合并唯一数据和分隔数据
Option Explicit
Sub CombineData()
' Source
Const sName As String = "Sheet1"
Const sDelimiter As String = ", "
' Destination
Const dName As String = "Sheet1"
Const dFirstCellAddress As String = "E2"
Const dDelimiter As String = ", "
' Source range to an array.
Dim Data As Variant
Dim rCount As Long
With ThisWorkbook.Worksheets(sName).Range("A1").CurrentRegion
rCount = .Rows.Count - 1
If rCount < 1 Then Exit Sub ' no data or only headers
Data = .Resize(rCount, 2).Offset(1).Value
End With
' Array to a dictionary of dictionaries.
Dim dict As Object: Set dict = CreateObject("Scripting.Dictionary")
dict.CompareMode = vbTextCompare
Dim Key As Variant
Dim Item As Variant
Dim r As Long
Dim n As Long
For r = 1 To rCount
Item = CStr(Data(r, 2))
If Not IsError(Item) Then
If Len(Item) > 0 Then
Key = Data(r, 1)
If Not IsError(Key) Then
If Len(Key) > 0 Then
Item = Split(Item, sDelimiter)
If Not dict.Exists(Key) Then
Set dict(Key) = CreateObject("Scripting.Dictionary")
End If
For n = 0 To UBound(Item)
dict(Key)(Item(n)) = Empty
Next n
End If
End If
End If
End If
Next r
rCount = dict.Count
If rCount = 0 Then Exit Sub ' only error values or blanks
' Dictionary of dictionaries to the array.
ReDim Data(1 To rCount, 1 To 2)
r = 0
For Each Key In dict.Keys
r = r + 1
Data(r, 1) = Key
Data(r, 2) = Join(dict(Key).Keys, dDelimiter)
Next Key
' Array to the destination range.
With ThisWorkbook.Worksheets(dName).Range(dFirstCellAddress).Resize(, 2)
.Resize(rCount).Value = Data
.Resize(.Worksheet.Rows.Count - .Row - rCount + 1).Offset(rCount).Clear
End With
MsgBox "Data combined.", vbInformation
End Sub
下图显示了我拥有的数据示例 table(A 到 B 列),右边的 table(e 到 f 列)显示了我想要的输出。
我只是不知道从哪里开始,因为我需要为每个单独的 ID 将所有不同的数据项组合在一起。 每个 ID 都是唯一的,但可以有任意多个相同的 ID。同一个ID可以多次复制数据,也可以包含不同的数据。
如果超过 1 个数据项,数据项将始终以逗号分隔,并且可以是多种长度的数字和字母的混合(即使我的示例显示单个字符)。所需数据始终位于每个逗号之间,其中存在逗号(即,单个数据项除外。)
ID 总是数字。
因此,我正在努力为 Excel 2010(作为 VBA 的新手)提出一些 VBA 代码来实现此要求。任何帮助将不胜感激?
这会根据提供的输入数据生成组合输出。该代码使用 Dictionaries
来帮助获取唯一的值集。
Option Explicit
Public Sub Test()
Dim sourceWksht As Worksheet
Set sourceWksht = Application.ActiveWorkbook.Worksheets.("Sheet1")
Dim rawData As Variant
rawData = sourceWksht.Range("A2:B12").Value2
Dim rawInputDictionary As Dictionary
Set rawInputDictionary = New Dictionary
Dim csvValue As String
Dim rawIndex As Long
For rawIndex = LBound(rawData, 1) To UBound(rawData, 1)
csvValue = Trim$(rawData(rawIndex, 2))
If Not rawInputDictionary.Exists(rawData(rawIndex, 1)) And Len(csvValue) > 0 Then
rawInputDictionary.Add rawData(rawIndex, 1), csvValue
ElseIf Len(csvValue) > 0 Then
rawInputDictionary.Item(rawData(rawIndex, 1)) _
= rawInputDictionary.Item(rawData(rawIndex, 1)) & "," & csvValue
End If
Next
GenerateOutput rawInputDictionary, sourceWksht
End Sub
Private Sub GenerateOutput(ByVal rawInputDictionary As Dictionary, ByVal wksht As Worksheet)
Dim outputArray As Variant
ReDim outputArray(1 To rawInputDictionary.Count, 1 To 2)
Dim outputArrayIndex As Long
outputArrayIndex = 1
Dim idKey As Variant
For Each idKey In rawInputDictionary.Keys
outputArray(outputArrayIndex, 1) = idKey
outputArray(outputArrayIndex, 2) = GenerateCombinedData(rawInputDictionary.Item(idKey))
outputArrayIndex = outputArrayIndex + 1
Next
Dim outputRange As Range
Set outputRange = wksht.Range("E2:F" & CStr(rawInputDictionary.Count + 1))
outputRange.Value = outputArray
End Sub
Private Function GenerateCombinedData(ByVal idValues As String) As String
Dim combinedData As String
combinedData = vbNullString
Dim outputDictionary As Dictionary
Set outputDictionary = New Dictionary
Dim valuesArrayIndex As Long
Dim valuesArray As Variant
valuesArray = Split(idValues, ",")
For valuesArrayIndex = LBound(valuesArray) To UBound(valuesArray)
If Not outputDictionary.Exists(valuesArray(valuesArrayIndex)) Then
combinedData = combinedData & valuesArray(valuesArrayIndex) & ","
'Use the outputDictionary 'Keys' to ignore duplicate values
outputDictionary.Add valuesArray(valuesArrayIndex), ""
End If
Next
'Trim the trailing comma
combinedData = Left$(combinedData, Len(combinedData) - 1)
GenerateCombinedData = combinedData
End Function
请使用下一种方式。正如我在评论中所说,它使用字典来提取唯一键和其他四个数组来保留中间值并构建最终值。以下代码能够处理两种(可能的)分隔符:逗号“,”和逗号后跟一个或多个空格“、”、“、”、“、”。它只在内存中工作并且应该非常快,即使对于大范围也是如此:
Sub extractUniqueIDsUniqueData()
Dim sh As Worksheet, lastR As Long, arr, arrItem, arrIt, arrFin
Dim i As Long, mtch, El, dict As Object
Set sh = ActiveSheet
lastR = sh.Range("A" & sh.rows.count).End(xlUp).row
arr = sh.Range("A2:B" & lastR).value 'place the range in an array for faster iteration
Set dict = CreateObject("Scripting.Dictionary")
'fill the dictionary:
For i = 1 To UBound(arr)
If Not dict.Exists(arr(i, 1)) Then
arrItem = Split(Replace(arr(i, 2), " ", ""), ",") 'replace spaces before splitting
dict.Add arr(i, 1), arrItem
Else
If arr(i, 2) <> "" Then 'skip the empty strings in B:B
arrIt = Split(Replace(arr(i, 2), " ", ""), ",")
arrItem = dict(arr(i, 1))
If UBound(arrItem) = -1 Then 'if no any element in the item array
arrItem = arrIt 'use the existing processed B:B value instead
Else
For Each El In arrIt
mtch = Application.match(El, arrItem, 0)
If IsError(mtch) Then 'not existing in the item array
ReDim Preserve arrItem(UBound(arrItem) + 1)
arrItem(UBound(arrItem)) = El 'add the new element in the item array
End If
Next El
End If
dict(arr(i, 1)) = arrItem 'place the array back as dictionary item
End If
End If
Next i
'Process the dictionary content:
ReDim arrFin(1 To dict.count + 1, 1 To 2) 'redim the array to keep all dictionary elements
'fill the header in the final array:
arrFin(1, 1) = "FinalList": arrFin(1, 2) = "Combined DATA"
'fill the rest of the final array rows
For i = 0 To dict.count - 1
arrFin(i + 2, 1) = dict.Keys()(i)
arrFin(i + 2, 2) = Join(dict.items()(i), ", ")
Next i
'drop the final array content at once:
With sh.Range("E1").Resize(UBound(arrFin), UBound(arrFin, 2))
.value = arrFin
.EntireColumn.AutoFit
End With
MsgBox "Ready..."
End Sub
使用字典中的字典合并唯一数据和分隔数据
Option Explicit
Sub CombineData()
' Source
Const sName As String = "Sheet1"
Const sDelimiter As String = ", "
' Destination
Const dName As String = "Sheet1"
Const dFirstCellAddress As String = "E2"
Const dDelimiter As String = ", "
' Source range to an array.
Dim Data As Variant
Dim rCount As Long
With ThisWorkbook.Worksheets(sName).Range("A1").CurrentRegion
rCount = .Rows.Count - 1
If rCount < 1 Then Exit Sub ' no data or only headers
Data = .Resize(rCount, 2).Offset(1).Value
End With
' Array to a dictionary of dictionaries.
Dim dict As Object: Set dict = CreateObject("Scripting.Dictionary")
dict.CompareMode = vbTextCompare
Dim Key As Variant
Dim Item As Variant
Dim r As Long
Dim n As Long
For r = 1 To rCount
Item = CStr(Data(r, 2))
If Not IsError(Item) Then
If Len(Item) > 0 Then
Key = Data(r, 1)
If Not IsError(Key) Then
If Len(Key) > 0 Then
Item = Split(Item, sDelimiter)
If Not dict.Exists(Key) Then
Set dict(Key) = CreateObject("Scripting.Dictionary")
End If
For n = 0 To UBound(Item)
dict(Key)(Item(n)) = Empty
Next n
End If
End If
End If
End If
Next r
rCount = dict.Count
If rCount = 0 Then Exit Sub ' only error values or blanks
' Dictionary of dictionaries to the array.
ReDim Data(1 To rCount, 1 To 2)
r = 0
For Each Key In dict.Keys
r = r + 1
Data(r, 1) = Key
Data(r, 2) = Join(dict(Key).Keys, dDelimiter)
Next Key
' Array to the destination range.
With ThisWorkbook.Worksheets(dName).Range(dFirstCellAddress).Resize(, 2)
.Resize(rCount).Value = Data
.Resize(.Worksheet.Rows.Count - .Row - rCount + 1).Offset(rCount).Clear
End With
MsgBox "Data combined.", vbInformation
End Sub