如何为每个 Kmeans 集群提取观察值

how to extract observation values for each cluster of Kmeans

我有来自两个分布函数的数据(混合数据)。我用 $2$ 中心将 k-means 拟合到数据中。然后我得到了集群。我的意思是,我想将我的数据分成两组,而不是每个集群的数量。也就是说,第一组包含来自第一组的数据,第二组也相同(我的数据是二维和矩阵)。

这是我的尝试:

kme <- kmeans(Sim, 2)
kme$cluster 

这给出了这个:

kme$cluster
  [1] 1 2 2 1 1 1 2 2 2 1 2 2 1 2 1 2 1 2 2 1 2 1 2 2 2 1 2 1 2 1 1 2 1 1 1 2 2 1 1 1 1 1 2 2 1 1 1 2 2 1 2 1 2 2 2
 [56] 1 2 1 2 2 1 2 1 1 2 2 1 2 2 1 1 2 2 1 2 2 1 1 1 2 2 2 2 2 2 1 2 2 2 1 2 2 2 2 1 1 2 2 1 2

我知道这意味着我的矩阵的第一行(第一行中的观察值)来自第一个集群,第二行和第三行来自第二个集群。而不是这个,我想要两组,一个是第一个集群的观察值(值不是集群的数量),另一个来自第二个集群。

例如,

          [,1]      [,2]  [,3]
[1,] 0.8026952 0.8049413    1
[2,] 0.4333745 0.5063472    2 
[3,] 0.3587946 0.4091627    2
[4,] 0.9067146 0.9211618    1 
[5,] 0.6663730 0.6644439    1 
[6,] 0.9752217 0.8299001    1 

因此,我想要这样:

Group_1
         [,1]      [,2]  
    [1,] 0.8026952 0.8049413    
    [2,] 0.9067146 0.9211618    
    [3,] 0.6663730 0.6644439    
    [4,] 0.9752217 0.8299001  



Group_2

    [2,] 0.4333745 0.5063472    
    [3,] 0.3587946 0.4091627    




## my data 
structure(c(0.8026952064848, 0.433374540465373, 0.35879457564118, 
0.906714606331661, 0.666372966486961, 0.975221659988165, 0.146514602801487, 
0.185211665343342, 0.266845172200967, 0.9316249943804, 0.458760005421937, 
0.260092565789819, 0.546946153900359, 0.320214906940237, 0.998543527442962, 
0.264783770404576, 0.940526409307495, 0.218771387590095, 0.00109510733232848, 
0.909367726704406, 0.195467973826453, 0.853418850837688, 0.257240866776556, 
0.18492349224921, 0.0350681275368262, 0.743108308431699, 0.120800079312176, 
0.536067422405767, 0.387076289858669, 0.859893148997799, 0.962759922724217, 
0.0288314732712864, 0.878663770621642, 0.98208610656754, 0.98423704248853, 
0.0850008164197942, 0.415692074922845, 0.725441533140838, 0.514739896170795, 
0.564903213409707, 0.65493689605431, 0.551635805051774, 0.20452569425106, 
0.0509099354967475, 0.646801606381046, 0.656341063790023, 0.706781879998744, 
0.244539211907925, 0.43318469475677, 0.848426640266553, 0.26359805940462, 
0.730860544172275, 0.405211122473702, 0.401496034115553, 0.432796132021846, 
0.654138915939257, 0.00803712895140052, 0.991968845921972, 0.0311756118742527, 
0.0648601313587278, 0.733741108178729, 0.0431173096876591, 0.619796682847664, 
0.804308546474203, 0.0934691624715924, 0.520366458455101, 0.833598382357762, 
0.373484763782471, 0.261487311183624, 0.822368689114228, 0.88254910800606, 
0.261728620579622, 0.109025254459585, 0.661885950024542, 0.231851563323289, 
0.46855820226483, 0.909970719134435, 0.799321972066537, 0.646252158097923, 
0.233985049184412, 0.309839888018159, 0.129971102112904, 0.0901338488329202, 
0.460395671925082, 0.274646409088746, 0.675003502921675, 0.00289221783168614, 
0.336108531044562, 0.371105678845197, 0.607435576152056, 0.156731446506456, 
0.246894558891654, 0.418194083335386, 0.000669385509081014, 0.929943428778418, 
0.972200238145888, 0.503282874496368, 0.126382717164233, 0.683936105109751, 
0.21720214970307, 0.804941252722838, 0.506347232734472, 0.409162739287115, 
0.921161751145135, 0.664443932378791, 0.829900114789874, 0.0660539097664178, 
0.296326436845226, 0.120007439729838, 0.768823563807157, 0.449026418114183, 
0.268668511775742, 0.733763495587273, 0.365402223476625, 0.97980160509396, 
0.335119241818387, 0.929315469866307, 0.253016166717649, 0.00521095494948787, 
0.870041067705, 0.215020805969677, 0.858896143709886, 0.167998804405928, 
0.204213777320881, 0.050652931423494, 0.731499125526297, 0.166061290725948, 
0.520575411719918, 0.370579454420263, 0.655607928337889, 0.978414469097905, 
0.00268175014874324, 0.937587480238656, 0.992468047261219, 0.856301580636229, 
0.106064732119751, 0.530228247677302, 0.502227925225818, 0.66462369930413, 
0.526988978414104, 0.394591213637187, 0.623968017885322, 0.222666427921132, 
0.0707407196787662, 0.715361864683925, 0.561951996212598, 0.874765155771585, 
0.217631973951671, 0.576708062239157, 0.910641489550344, 0.215463715360162, 
0.761807500922947, 0.417110771840405, 0.497162608159201, 0.530665309105489, 
0.689703677933362, 0.00811876221245061, 0.991245541114815, 0.0518070069187705, 
0.0733367055960226, 0.803126294581356, 0.0291602667026993, 0.724848517465592, 
0.682316094846719, 0.0914714514707226, 0.426956537783392, 0.826985575416605, 
0.3128962286514, 0.295208624024388, 0.58934716401092, 0.856718183582533, 
0.183019143019377, 0.302561606994597, 0.666755501118539, 0.176298329811281, 
0.389183841328174, 0.86253900906311, 0.753736534075238, 0.627220192419063, 
0.319958512526359, 0.321602248149364, 0.161772830672492, 0.103166641060684, 
0.339980194505715, 0.218533019046996, 0.689884789678819, 0.00251942038852481, 
0.174792447835404, 0.509071373135409, 0.647835095901117, 0.22572898134156, 
0.287369659385574, 0.538675651472693, 0.000995476493411555, 0.939528694637273, 
0.961510166904661, 0.452822116916426, 0.2061782381611, 0.722694525115558, 
0.328404467661884), .Dim = c(100L, 2L))

kme$cluster 值添加到原始数据框,然后根据 kme$cluster

中的值创建一个包含每一列的新数据框

据我所知,没有数据样本:

library(tidyverse)
Sim <- Sim %>%
 mutate(cluster_group = kme$cluster)

df_final <- data.frame(Group1 = Sim %>%
                         filter(cluster_group == 1) %>%
                         select(value) %>%
                         pull(),
                       Group2 = Sim %>%
                         filter(cluster_group== 2) %>%
                         select(value) %>%
                         pull())

value Sim

中用于 kmeans 的值

希望这就是您要找的。

  • 我必须将矩阵转换为数据框,这样当我们使用 split 函数时,结构将被保留,否则它会逐个元素地拆分整个矩阵,因为矩阵实际上是一个向量,具有dim 属性。所以它的行为就像一个向量
  • split 函数将数据框或向量分成由 f. 定义的组,在您的情况下是唯一的聚类值
kme <- kmeans(Sim, 2)
kme$cluster 

Sim2 <- as.data.frame(cbind(Sim, kme$cluster))
split(Sim2, Sim2$V3) |>
  setNames(paste("Group", sort(unique(kme$cluster))))

$`Group 1`
              V1           V2 V3
2   0.4333745405 0.5063472327  1
3   0.3587945756 0.4091627393  1
7   0.1465146028 0.0660539098  1
8   0.1852116653 0.2963264368  1
9   0.2668451722 0.1200074397  1
11  0.4587600054 0.4490264181  1
12  0.2600925658 0.2686685118  1
14  0.3202149069 0.3654022235  1
16  0.2647837704 0.3351192418  1
18  0.2187713876 0.2530161667  1
19  0.0010951073 0.0052109549  1
21  0.1954679738 0.2150208060  1
23  0.2572408668 0.1679988044  1
24  0.1849234922 0.2042137773  1
25  0.0350681275 0.0506529314  1
27  0.1208000793 0.1660612907  1
29  0.3870762899 0.3705794544  1
32  0.0288314733 0.0026817501  1
36  0.0850008164 0.1060647321  1
37  0.4156920749 0.5302282477  1
43  0.2045256943 0.2226664279  1
44  0.0509099355 0.0707407197  1
48  0.2445392119 0.2176319740  1
49  0.4331846948 0.5767080622  1
51  0.2635980594 0.2154637154  1
53  0.4052111225 0.4171107718  1
54  0.4014960341 0.4971626082  1
55  0.4327961320 0.5306653091  1
57  0.0080371290 0.0081187622  1
59  0.0311756119 0.0518070069  1
60  0.0648601314 0.0733367056  1
62  0.0431173097 0.0291602667  1
65  0.0934691625 0.0914714515  1
66  0.5203664585 0.4269565378  1
68  0.3734847638 0.3128962287  1
69  0.2614873112 0.2952086240  1
72  0.2617286206 0.1830191430  1
73  0.1090252545 0.3025616070  1
75  0.2318515633 0.1762983298  1
76  0.4685582023 0.3891838413  1
80  0.2339850492 0.3199585125  1
81  0.3098398880 0.3216022481  1
82  0.1299711021 0.1617728307  1
83  0.0901338488 0.1031666411  1
84  0.4603956719 0.3399801945  1
85  0.2746464091 0.2185330190  1
87  0.0028922178 0.0025194204  1
88  0.3361085310 0.1747924478  1
89  0.3711056788 0.5090713731  1
91  0.1567314465 0.2257289813  1
92  0.2468945589 0.2873696594  1
93  0.4181940833 0.5386756515  1
94  0.0006693855 0.0009954765  1
97  0.5032828745 0.4528221169  1
98  0.1263827172 0.2061782382  1
100 0.2172021497 0.3284044677  1

$`Group 2`
          V1        V2 V3
1  0.8026952 0.8049413  2
4  0.9067146 0.9211618  2
5  0.6663730 0.6644439  2
6  0.9752217 0.8299001  2
10 0.9316250 0.7688236  2
13 0.5469462 0.7337635  2
15 0.9985435 0.9798016  2
17 0.9405264 0.9293155  2
20 0.9093677 0.8700411  2
22 0.8534189 0.8588961  2
26 0.7431083 0.7314991  2
28 0.5360674 0.5205754  2
30 0.8598931 0.6556079  2
31 0.9627599 0.9784145  2
33 0.8786638 0.9375875  2
34 0.9820861 0.9924680  2
35 0.9842370 0.8563016  2
38 0.7254415 0.5022279  2
39 0.5147399 0.6646237  2
40 0.5649032 0.5269890  2
41 0.6549369 0.3945912  2
42 0.5516358 0.6239680  2
45 0.6468016 0.7153619  2
46 0.6563411 0.5619520  2
47 0.7067819 0.8747652  2
50 0.8484266 0.9106415  2
52 0.7308605 0.7618075  2
56 0.6541389 0.6897037  2
58 0.9919688 0.9912455  2
61 0.7337411 0.8031263  2
63 0.6197967 0.7248485  2
64 0.8043085 0.6823161  2
67 0.8335984 0.8269856  2
70 0.8223687 0.5893472  2
71 0.8825491 0.8567182  2
74 0.6618860 0.6667555  2
77 0.9099707 0.8625390  2
78 0.7993220 0.7537365  2
79 0.6462522 0.6272202  2
86 0.6750035 0.6898848  2
90 0.6074356 0.6478351  2
95 0.9299434 0.9395287  2
96 0.9722002 0.9615102  2
99 0.6839361 0.7226945  2