VBA/Excel - 统计每个单元格中包含多个单词的列中的唯一单词

VBA/Excel - Count unique words in columns with multiple words in each cell

我正在使用以下数据集。对于每家公司,我想了解他们订购了多少种不同的产品。

例如:公司'AAA'订购了 6 种不同的产品(产品 1、2、3、4、5、7)。

不确定,如果我们需要在每一列中拆分单词,然后在循环中一个一个地计数,或者有什么更快的方法吗? 我这里得用VBA,我的数据集100k多

这个答案可能看起来很傻,但是由于您用逗号分隔不同的产品,为什么不简单地计算逗号的数量并加 1,例如:

=SEARCH(",",C2,1)+1

在辅助列中添加此内容后,您可以使用 Excel 的基本 Subtotals 功能来查找每个客户的总和。

假设数据在 A1:C?:

Sub Test()

Dim arr As Variant
Dim lr As Long, x As Long, y As Long
Dim dict1 As Object: Set dict1 = CreateObject("Scripting.Dictionary")
Dim dict2 As Object: Set dict2 = CreateObject("Scripting.Dictionary")

'Get initial array (NOTE: implicit reference to the active worksheet)
lr = Cells(Rows.Count, "A").End(xlUp).Row
arr = Range("A2:C" & lr)

'Loop through array and fill dictionary
For x = LBound(arr) To UBound(arr)
    dict1(arr(x, 1)) = dict1(arr(x, 1)) & "," & arr(x, 3)
Next

'Loop through dictionary and count unique items
For y = 0 To dict1.Count - 1
    For Each el In Split(dict1.Items()(y), ",")
        dict2(el) = 1
    Next
    dict1(dict1.keys()(y)) = dict2.Count - 1
    dict2.RemoveAll
    
    'Check the result
    Debug.Print dict1.keys()(y) & "-" & dict1.Items()(y)
Next

End sub

请测试下一个代码。它将 return(在上面的代码中,在下一个 sheet 中,但它可以在任何 sheet 中 return)唯一客户端,然后是总产品数,然后在下一个列订购的产品:

Sub ProductsPerClient()
    Dim sh As Worksheet, sh1 As Worksheet, lastR As Long, arr, arrSpl, arrFin, colMax As Long
    Dim i As Long, j As Long, dict As Object
    
    Set sh = ActiveSheet
    Set sh1 = sh.Next 'use here the sheet you need
    lastR = sh.Range("A" & sh.rows.count).End(xlUp).row
    arr = sh.Range("A2:C" & lastR).value
    Set dict = CreateObject("Scripting.Dictionary")
    For i = 1 To UBound(arr)
        arrSpl = Split(Trim(arr(i, 3)), ",")
        If Not dict.Exists(arr(i, 1)) Then
            dict.Add arr(i, 1), Join(arrSpl, "|")
            If UBound(arrSpl) + 1 > colMax Then colMax = UBound(arrSpl) + 1
        Else
            dict(arr(i, 1)) = dict(arr(i, 1)) & "|" & Join(arrSpl, "|")
            If UBound(Split(dict(arr(i, 1)), "|")) + 1 > colMax Then colMax = UBound(Split(dict(arr(i, 1)), "|")) + 1
        End If
    Next i
    ReDim arrFin(1 To dict.count, 1 To colMax + 2)

    For i = 0 To dict.count - 1
        arrFin(i + 1, 1) = dict.Keys()(i)
        arrSpl = Split(dict.items()(i), "|")
        arrFin(i + 1, 2) = UBound(arrSpl) + 1
        For j = 0 To UBound(arrSpl)
            arrFin(i + 1, j + 3) = arrSpl(j)
        Next j
    Next i
    'drop the final array content:
    sh1.Range("A2").Resize(UBound(arrFin), UBound(arrFin, 2)).value = arrFin
 End Sub