如何为每个 Kmeans 集群提取观察值
how to extract observation values for each cluster of Kmeans
我有来自两个分布函数的数据(混合数据)。我用 $2$ 中心将 k-means 拟合到数据中。然后我得到了集群。我的意思是,我想将我的数据分成两组,而不是每个集群的数量。也就是说,第一组包含来自第一组的数据,第二组也相同(我的数据是二维和矩阵)。
这是我的尝试:
kme <- kmeans(Sim, 2)
kme$cluster
这给出了这个:
kme$cluster
[1] 1 2 2 1 1 1 2 2 2 1 2 2 1 2 1 2 1 2 2 1 2 1 2 2 2 1 2 1 2 1 1 2 1 1 1 2 2 1 1 1 1 1 2 2 1 1 1 2 2 1 2 1 2 2 2
[56] 1 2 1 2 2 1 2 1 1 2 2 1 2 2 1 1 2 2 1 2 2 1 1 1 2 2 2 2 2 2 1 2 2 2 1 2 2 2 2 1 1 2 2 1 2
我知道这意味着我的矩阵的第一行(第一行中的观察值)来自第一个集群,第二行和第三行来自第二个集群。而不是这个,我想要两组,一个是第一个集群的观察值(值不是集群的数量),另一个来自第二个集群。
例如,
[,1] [,2] [,3]
[1,] 0.8026952 0.8049413 1
[2,] 0.4333745 0.5063472 2
[3,] 0.3587946 0.4091627 2
[4,] 0.9067146 0.9211618 1
[5,] 0.6663730 0.6644439 1
[6,] 0.9752217 0.8299001 1
因此,我想要这样:
Group_1
[,1] [,2]
[1,] 0.8026952 0.8049413
[2,] 0.9067146 0.9211618
[3,] 0.6663730 0.6644439
[4,] 0.9752217 0.8299001
Group_2
[2,] 0.4333745 0.5063472
[3,] 0.3587946 0.4091627
## my data
structure(c(0.8026952064848, 0.433374540465373, 0.35879457564118,
0.906714606331661, 0.666372966486961, 0.975221659988165, 0.146514602801487,
0.185211665343342, 0.266845172200967, 0.9316249943804, 0.458760005421937,
0.260092565789819, 0.546946153900359, 0.320214906940237, 0.998543527442962,
0.264783770404576, 0.940526409307495, 0.218771387590095, 0.00109510733232848,
0.909367726704406, 0.195467973826453, 0.853418850837688, 0.257240866776556,
0.18492349224921, 0.0350681275368262, 0.743108308431699, 0.120800079312176,
0.536067422405767, 0.387076289858669, 0.859893148997799, 0.962759922724217,
0.0288314732712864, 0.878663770621642, 0.98208610656754, 0.98423704248853,
0.0850008164197942, 0.415692074922845, 0.725441533140838, 0.514739896170795,
0.564903213409707, 0.65493689605431, 0.551635805051774, 0.20452569425106,
0.0509099354967475, 0.646801606381046, 0.656341063790023, 0.706781879998744,
0.244539211907925, 0.43318469475677, 0.848426640266553, 0.26359805940462,
0.730860544172275, 0.405211122473702, 0.401496034115553, 0.432796132021846,
0.654138915939257, 0.00803712895140052, 0.991968845921972, 0.0311756118742527,
0.0648601313587278, 0.733741108178729, 0.0431173096876591, 0.619796682847664,
0.804308546474203, 0.0934691624715924, 0.520366458455101, 0.833598382357762,
0.373484763782471, 0.261487311183624, 0.822368689114228, 0.88254910800606,
0.261728620579622, 0.109025254459585, 0.661885950024542, 0.231851563323289,
0.46855820226483, 0.909970719134435, 0.799321972066537, 0.646252158097923,
0.233985049184412, 0.309839888018159, 0.129971102112904, 0.0901338488329202,
0.460395671925082, 0.274646409088746, 0.675003502921675, 0.00289221783168614,
0.336108531044562, 0.371105678845197, 0.607435576152056, 0.156731446506456,
0.246894558891654, 0.418194083335386, 0.000669385509081014, 0.929943428778418,
0.972200238145888, 0.503282874496368, 0.126382717164233, 0.683936105109751,
0.21720214970307, 0.804941252722838, 0.506347232734472, 0.409162739287115,
0.921161751145135, 0.664443932378791, 0.829900114789874, 0.0660539097664178,
0.296326436845226, 0.120007439729838, 0.768823563807157, 0.449026418114183,
0.268668511775742, 0.733763495587273, 0.365402223476625, 0.97980160509396,
0.335119241818387, 0.929315469866307, 0.253016166717649, 0.00521095494948787,
0.870041067705, 0.215020805969677, 0.858896143709886, 0.167998804405928,
0.204213777320881, 0.050652931423494, 0.731499125526297, 0.166061290725948,
0.520575411719918, 0.370579454420263, 0.655607928337889, 0.978414469097905,
0.00268175014874324, 0.937587480238656, 0.992468047261219, 0.856301580636229,
0.106064732119751, 0.530228247677302, 0.502227925225818, 0.66462369930413,
0.526988978414104, 0.394591213637187, 0.623968017885322, 0.222666427921132,
0.0707407196787662, 0.715361864683925, 0.561951996212598, 0.874765155771585,
0.217631973951671, 0.576708062239157, 0.910641489550344, 0.215463715360162,
0.761807500922947, 0.417110771840405, 0.497162608159201, 0.530665309105489,
0.689703677933362, 0.00811876221245061, 0.991245541114815, 0.0518070069187705,
0.0733367055960226, 0.803126294581356, 0.0291602667026993, 0.724848517465592,
0.682316094846719, 0.0914714514707226, 0.426956537783392, 0.826985575416605,
0.3128962286514, 0.295208624024388, 0.58934716401092, 0.856718183582533,
0.183019143019377, 0.302561606994597, 0.666755501118539, 0.176298329811281,
0.389183841328174, 0.86253900906311, 0.753736534075238, 0.627220192419063,
0.319958512526359, 0.321602248149364, 0.161772830672492, 0.103166641060684,
0.339980194505715, 0.218533019046996, 0.689884789678819, 0.00251942038852481,
0.174792447835404, 0.509071373135409, 0.647835095901117, 0.22572898134156,
0.287369659385574, 0.538675651472693, 0.000995476493411555, 0.939528694637273,
0.961510166904661, 0.452822116916426, 0.2061782381611, 0.722694525115558,
0.328404467661884), .Dim = c(100L, 2L))
将 kme$cluster
值添加到原始数据框,然后根据 kme$cluster
中的值创建一个包含每一列的新数据框
据我所知,没有数据样本:
library(tidyverse)
Sim <- Sim %>%
mutate(cluster_group = kme$cluster)
df_final <- data.frame(Group1 = Sim %>%
filter(cluster_group == 1) %>%
select(value) %>%
pull(),
Group2 = Sim %>%
filter(cluster_group== 2) %>%
select(value) %>%
pull())
value
Sim
中用于 kmeans 的值
希望这就是您要找的。
- 我必须将矩阵转换为数据框,这样当我们使用
split
函数时,结构将被保留,否则它会逐个元素地拆分整个矩阵,因为矩阵实际上是一个向量,具有dim
属性。所以它的行为就像一个向量
split
函数将数据框或向量分成由 f.
定义的组,在您的情况下是唯一的聚类值
kme <- kmeans(Sim, 2)
kme$cluster
Sim2 <- as.data.frame(cbind(Sim, kme$cluster))
split(Sim2, Sim2$V3) |>
setNames(paste("Group", sort(unique(kme$cluster))))
$`Group 1`
V1 V2 V3
2 0.4333745405 0.5063472327 1
3 0.3587945756 0.4091627393 1
7 0.1465146028 0.0660539098 1
8 0.1852116653 0.2963264368 1
9 0.2668451722 0.1200074397 1
11 0.4587600054 0.4490264181 1
12 0.2600925658 0.2686685118 1
14 0.3202149069 0.3654022235 1
16 0.2647837704 0.3351192418 1
18 0.2187713876 0.2530161667 1
19 0.0010951073 0.0052109549 1
21 0.1954679738 0.2150208060 1
23 0.2572408668 0.1679988044 1
24 0.1849234922 0.2042137773 1
25 0.0350681275 0.0506529314 1
27 0.1208000793 0.1660612907 1
29 0.3870762899 0.3705794544 1
32 0.0288314733 0.0026817501 1
36 0.0850008164 0.1060647321 1
37 0.4156920749 0.5302282477 1
43 0.2045256943 0.2226664279 1
44 0.0509099355 0.0707407197 1
48 0.2445392119 0.2176319740 1
49 0.4331846948 0.5767080622 1
51 0.2635980594 0.2154637154 1
53 0.4052111225 0.4171107718 1
54 0.4014960341 0.4971626082 1
55 0.4327961320 0.5306653091 1
57 0.0080371290 0.0081187622 1
59 0.0311756119 0.0518070069 1
60 0.0648601314 0.0733367056 1
62 0.0431173097 0.0291602667 1
65 0.0934691625 0.0914714515 1
66 0.5203664585 0.4269565378 1
68 0.3734847638 0.3128962287 1
69 0.2614873112 0.2952086240 1
72 0.2617286206 0.1830191430 1
73 0.1090252545 0.3025616070 1
75 0.2318515633 0.1762983298 1
76 0.4685582023 0.3891838413 1
80 0.2339850492 0.3199585125 1
81 0.3098398880 0.3216022481 1
82 0.1299711021 0.1617728307 1
83 0.0901338488 0.1031666411 1
84 0.4603956719 0.3399801945 1
85 0.2746464091 0.2185330190 1
87 0.0028922178 0.0025194204 1
88 0.3361085310 0.1747924478 1
89 0.3711056788 0.5090713731 1
91 0.1567314465 0.2257289813 1
92 0.2468945589 0.2873696594 1
93 0.4181940833 0.5386756515 1
94 0.0006693855 0.0009954765 1
97 0.5032828745 0.4528221169 1
98 0.1263827172 0.2061782382 1
100 0.2172021497 0.3284044677 1
$`Group 2`
V1 V2 V3
1 0.8026952 0.8049413 2
4 0.9067146 0.9211618 2
5 0.6663730 0.6644439 2
6 0.9752217 0.8299001 2
10 0.9316250 0.7688236 2
13 0.5469462 0.7337635 2
15 0.9985435 0.9798016 2
17 0.9405264 0.9293155 2
20 0.9093677 0.8700411 2
22 0.8534189 0.8588961 2
26 0.7431083 0.7314991 2
28 0.5360674 0.5205754 2
30 0.8598931 0.6556079 2
31 0.9627599 0.9784145 2
33 0.8786638 0.9375875 2
34 0.9820861 0.9924680 2
35 0.9842370 0.8563016 2
38 0.7254415 0.5022279 2
39 0.5147399 0.6646237 2
40 0.5649032 0.5269890 2
41 0.6549369 0.3945912 2
42 0.5516358 0.6239680 2
45 0.6468016 0.7153619 2
46 0.6563411 0.5619520 2
47 0.7067819 0.8747652 2
50 0.8484266 0.9106415 2
52 0.7308605 0.7618075 2
56 0.6541389 0.6897037 2
58 0.9919688 0.9912455 2
61 0.7337411 0.8031263 2
63 0.6197967 0.7248485 2
64 0.8043085 0.6823161 2
67 0.8335984 0.8269856 2
70 0.8223687 0.5893472 2
71 0.8825491 0.8567182 2
74 0.6618860 0.6667555 2
77 0.9099707 0.8625390 2
78 0.7993220 0.7537365 2
79 0.6462522 0.6272202 2
86 0.6750035 0.6898848 2
90 0.6074356 0.6478351 2
95 0.9299434 0.9395287 2
96 0.9722002 0.9615102 2
99 0.6839361 0.7226945 2
我有来自两个分布函数的数据(混合数据)。我用 $2$ 中心将 k-means 拟合到数据中。然后我得到了集群。我的意思是,我想将我的数据分成两组,而不是每个集群的数量。也就是说,第一组包含来自第一组的数据,第二组也相同(我的数据是二维和矩阵)。
这是我的尝试:
kme <- kmeans(Sim, 2)
kme$cluster
这给出了这个:
kme$cluster
[1] 1 2 2 1 1 1 2 2 2 1 2 2 1 2 1 2 1 2 2 1 2 1 2 2 2 1 2 1 2 1 1 2 1 1 1 2 2 1 1 1 1 1 2 2 1 1 1 2 2 1 2 1 2 2 2
[56] 1 2 1 2 2 1 2 1 1 2 2 1 2 2 1 1 2 2 1 2 2 1 1 1 2 2 2 2 2 2 1 2 2 2 1 2 2 2 2 1 1 2 2 1 2
我知道这意味着我的矩阵的第一行(第一行中的观察值)来自第一个集群,第二行和第三行来自第二个集群。而不是这个,我想要两组,一个是第一个集群的观察值(值不是集群的数量),另一个来自第二个集群。
例如,
[,1] [,2] [,3]
[1,] 0.8026952 0.8049413 1
[2,] 0.4333745 0.5063472 2
[3,] 0.3587946 0.4091627 2
[4,] 0.9067146 0.9211618 1
[5,] 0.6663730 0.6644439 1
[6,] 0.9752217 0.8299001 1
因此,我想要这样:
Group_1
[,1] [,2]
[1,] 0.8026952 0.8049413
[2,] 0.9067146 0.9211618
[3,] 0.6663730 0.6644439
[4,] 0.9752217 0.8299001
Group_2
[2,] 0.4333745 0.5063472
[3,] 0.3587946 0.4091627
## my data
structure(c(0.8026952064848, 0.433374540465373, 0.35879457564118,
0.906714606331661, 0.666372966486961, 0.975221659988165, 0.146514602801487,
0.185211665343342, 0.266845172200967, 0.9316249943804, 0.458760005421937,
0.260092565789819, 0.546946153900359, 0.320214906940237, 0.998543527442962,
0.264783770404576, 0.940526409307495, 0.218771387590095, 0.00109510733232848,
0.909367726704406, 0.195467973826453, 0.853418850837688, 0.257240866776556,
0.18492349224921, 0.0350681275368262, 0.743108308431699, 0.120800079312176,
0.536067422405767, 0.387076289858669, 0.859893148997799, 0.962759922724217,
0.0288314732712864, 0.878663770621642, 0.98208610656754, 0.98423704248853,
0.0850008164197942, 0.415692074922845, 0.725441533140838, 0.514739896170795,
0.564903213409707, 0.65493689605431, 0.551635805051774, 0.20452569425106,
0.0509099354967475, 0.646801606381046, 0.656341063790023, 0.706781879998744,
0.244539211907925, 0.43318469475677, 0.848426640266553, 0.26359805940462,
0.730860544172275, 0.405211122473702, 0.401496034115553, 0.432796132021846,
0.654138915939257, 0.00803712895140052, 0.991968845921972, 0.0311756118742527,
0.0648601313587278, 0.733741108178729, 0.0431173096876591, 0.619796682847664,
0.804308546474203, 0.0934691624715924, 0.520366458455101, 0.833598382357762,
0.373484763782471, 0.261487311183624, 0.822368689114228, 0.88254910800606,
0.261728620579622, 0.109025254459585, 0.661885950024542, 0.231851563323289,
0.46855820226483, 0.909970719134435, 0.799321972066537, 0.646252158097923,
0.233985049184412, 0.309839888018159, 0.129971102112904, 0.0901338488329202,
0.460395671925082, 0.274646409088746, 0.675003502921675, 0.00289221783168614,
0.336108531044562, 0.371105678845197, 0.607435576152056, 0.156731446506456,
0.246894558891654, 0.418194083335386, 0.000669385509081014, 0.929943428778418,
0.972200238145888, 0.503282874496368, 0.126382717164233, 0.683936105109751,
0.21720214970307, 0.804941252722838, 0.506347232734472, 0.409162739287115,
0.921161751145135, 0.664443932378791, 0.829900114789874, 0.0660539097664178,
0.296326436845226, 0.120007439729838, 0.768823563807157, 0.449026418114183,
0.268668511775742, 0.733763495587273, 0.365402223476625, 0.97980160509396,
0.335119241818387, 0.929315469866307, 0.253016166717649, 0.00521095494948787,
0.870041067705, 0.215020805969677, 0.858896143709886, 0.167998804405928,
0.204213777320881, 0.050652931423494, 0.731499125526297, 0.166061290725948,
0.520575411719918, 0.370579454420263, 0.655607928337889, 0.978414469097905,
0.00268175014874324, 0.937587480238656, 0.992468047261219, 0.856301580636229,
0.106064732119751, 0.530228247677302, 0.502227925225818, 0.66462369930413,
0.526988978414104, 0.394591213637187, 0.623968017885322, 0.222666427921132,
0.0707407196787662, 0.715361864683925, 0.561951996212598, 0.874765155771585,
0.217631973951671, 0.576708062239157, 0.910641489550344, 0.215463715360162,
0.761807500922947, 0.417110771840405, 0.497162608159201, 0.530665309105489,
0.689703677933362, 0.00811876221245061, 0.991245541114815, 0.0518070069187705,
0.0733367055960226, 0.803126294581356, 0.0291602667026993, 0.724848517465592,
0.682316094846719, 0.0914714514707226, 0.426956537783392, 0.826985575416605,
0.3128962286514, 0.295208624024388, 0.58934716401092, 0.856718183582533,
0.183019143019377, 0.302561606994597, 0.666755501118539, 0.176298329811281,
0.389183841328174, 0.86253900906311, 0.753736534075238, 0.627220192419063,
0.319958512526359, 0.321602248149364, 0.161772830672492, 0.103166641060684,
0.339980194505715, 0.218533019046996, 0.689884789678819, 0.00251942038852481,
0.174792447835404, 0.509071373135409, 0.647835095901117, 0.22572898134156,
0.287369659385574, 0.538675651472693, 0.000995476493411555, 0.939528694637273,
0.961510166904661, 0.452822116916426, 0.2061782381611, 0.722694525115558,
0.328404467661884), .Dim = c(100L, 2L))
将 kme$cluster
值添加到原始数据框,然后根据 kme$cluster
据我所知,没有数据样本:
library(tidyverse)
Sim <- Sim %>%
mutate(cluster_group = kme$cluster)
df_final <- data.frame(Group1 = Sim %>%
filter(cluster_group == 1) %>%
select(value) %>%
pull(),
Group2 = Sim %>%
filter(cluster_group== 2) %>%
select(value) %>%
pull())
value
Sim
希望这就是您要找的。
- 我必须将矩阵转换为数据框,这样当我们使用
split
函数时,结构将被保留,否则它会逐个元素地拆分整个矩阵,因为矩阵实际上是一个向量,具有dim
属性。所以它的行为就像一个向量 split
函数将数据框或向量分成由f.
定义的组,在您的情况下是唯一的聚类值
kme <- kmeans(Sim, 2)
kme$cluster
Sim2 <- as.data.frame(cbind(Sim, kme$cluster))
split(Sim2, Sim2$V3) |>
setNames(paste("Group", sort(unique(kme$cluster))))
$`Group 1`
V1 V2 V3
2 0.4333745405 0.5063472327 1
3 0.3587945756 0.4091627393 1
7 0.1465146028 0.0660539098 1
8 0.1852116653 0.2963264368 1
9 0.2668451722 0.1200074397 1
11 0.4587600054 0.4490264181 1
12 0.2600925658 0.2686685118 1
14 0.3202149069 0.3654022235 1
16 0.2647837704 0.3351192418 1
18 0.2187713876 0.2530161667 1
19 0.0010951073 0.0052109549 1
21 0.1954679738 0.2150208060 1
23 0.2572408668 0.1679988044 1
24 0.1849234922 0.2042137773 1
25 0.0350681275 0.0506529314 1
27 0.1208000793 0.1660612907 1
29 0.3870762899 0.3705794544 1
32 0.0288314733 0.0026817501 1
36 0.0850008164 0.1060647321 1
37 0.4156920749 0.5302282477 1
43 0.2045256943 0.2226664279 1
44 0.0509099355 0.0707407197 1
48 0.2445392119 0.2176319740 1
49 0.4331846948 0.5767080622 1
51 0.2635980594 0.2154637154 1
53 0.4052111225 0.4171107718 1
54 0.4014960341 0.4971626082 1
55 0.4327961320 0.5306653091 1
57 0.0080371290 0.0081187622 1
59 0.0311756119 0.0518070069 1
60 0.0648601314 0.0733367056 1
62 0.0431173097 0.0291602667 1
65 0.0934691625 0.0914714515 1
66 0.5203664585 0.4269565378 1
68 0.3734847638 0.3128962287 1
69 0.2614873112 0.2952086240 1
72 0.2617286206 0.1830191430 1
73 0.1090252545 0.3025616070 1
75 0.2318515633 0.1762983298 1
76 0.4685582023 0.3891838413 1
80 0.2339850492 0.3199585125 1
81 0.3098398880 0.3216022481 1
82 0.1299711021 0.1617728307 1
83 0.0901338488 0.1031666411 1
84 0.4603956719 0.3399801945 1
85 0.2746464091 0.2185330190 1
87 0.0028922178 0.0025194204 1
88 0.3361085310 0.1747924478 1
89 0.3711056788 0.5090713731 1
91 0.1567314465 0.2257289813 1
92 0.2468945589 0.2873696594 1
93 0.4181940833 0.5386756515 1
94 0.0006693855 0.0009954765 1
97 0.5032828745 0.4528221169 1
98 0.1263827172 0.2061782382 1
100 0.2172021497 0.3284044677 1
$`Group 2`
V1 V2 V3
1 0.8026952 0.8049413 2
4 0.9067146 0.9211618 2
5 0.6663730 0.6644439 2
6 0.9752217 0.8299001 2
10 0.9316250 0.7688236 2
13 0.5469462 0.7337635 2
15 0.9985435 0.9798016 2
17 0.9405264 0.9293155 2
20 0.9093677 0.8700411 2
22 0.8534189 0.8588961 2
26 0.7431083 0.7314991 2
28 0.5360674 0.5205754 2
30 0.8598931 0.6556079 2
31 0.9627599 0.9784145 2
33 0.8786638 0.9375875 2
34 0.9820861 0.9924680 2
35 0.9842370 0.8563016 2
38 0.7254415 0.5022279 2
39 0.5147399 0.6646237 2
40 0.5649032 0.5269890 2
41 0.6549369 0.3945912 2
42 0.5516358 0.6239680 2
45 0.6468016 0.7153619 2
46 0.6563411 0.5619520 2
47 0.7067819 0.8747652 2
50 0.8484266 0.9106415 2
52 0.7308605 0.7618075 2
56 0.6541389 0.6897037 2
58 0.9919688 0.9912455 2
61 0.7337411 0.8031263 2
63 0.6197967 0.7248485 2
64 0.8043085 0.6823161 2
67 0.8335984 0.8269856 2
70 0.8223687 0.5893472 2
71 0.8825491 0.8567182 2
74 0.6618860 0.6667555 2
77 0.9099707 0.8625390 2
78 0.7993220 0.7537365 2
79 0.6462522 0.6272202 2
86 0.6750035 0.6898848 2
90 0.6074356 0.6478351 2
95 0.9299434 0.9395287 2
96 0.9722002 0.9615102 2
99 0.6839361 0.7226945 2