R:用几个m实现combn函数并定义输出变量
R: implement combn function with several m and define output variables
我正在使用以下代码获取名称以 "form" 开头的变量的所有可能组合 (m=2) 的平均值。
k=which(grepl("^form",colnames(data)))
combined <- combn(data[,k], 2, FUN = rowMeans)
colnames(combined) <- combn(names(data[,k]), 2, paste0, collapse="")
data <- cbind(data, combined)
数据集 "data" 如下:
structure(list(id = c(5309039, 5284969, 5300279, 5270289, 5259957,
5267086, 5173196, 5057536, 5246135, 5255558, 5241070, 5280194,
5112387, 444459, 5054590, 5048412, 5296390, 5093742, 5293520),
form13 = c(1300.81321145176, 1130.23869905075, 1292.03253463863,
1358.23586808642, 1250.66417156907, 1388.37813595599, 1277.89625553694,
1242.17552321015, 1275.95068420011, 1449.97932094858, 1494.93158409261,
1183.72005024492, 1319.72081010904, 1153.43556746197, 1451.47500658524,
1502.05308533551, 1641.66472289938, 1407.07852441646, 1444.3815517771
), form12 = c(1329.6, 1104.4, 1272, 1322.8, 1195.5, 1487.4,
1195.6, 1258, 1256.4, 1455, 1524, 1170, 1291.4, 1224.6, 1414,
1606, 1765.2, 1441, 1406.8), form11 = c(1325.578, 1201.752,
1346.42, 1424.884, 1328.03, 1367.262, 1294.928, 1278.99,
1330.482, 1493.54, 1524.19, 1242.21, 1379.522, 1178.458,
1438.37, 1475.15, 1611.236, 1426.11, 1431.014), form10 = c(1056.7264,
940.4956, 1076.29, 1149.9412, 1059.028, 1095.8536, 1027.9564,
1012.996, 1061.3296, 1214.386, 1243.156, 978.472, 1107.3616,
918.6304, 1162.6, 1197.124, 1324.8628, 1151.092, 1155.6952
), form9 = c(1265.95883621535, 1104.13796282321, 1292.61038190038,
1391.60226122629, 1269.10247448997, 1319.10781736395, 1226.47462059388,
1205.80097696249, 1272.24391797013, 1476.61400008329, 1514.11964245256,
1157.70450530205, 1334.62450699242, 1072.96302932, 1408.41424685422,
1453.98138963552, 1619.24856353662, 1393.1329826012, 1399.25113387699
), form8 = c(1482.14960970768, 1302.96011430734, 1455.11530997823,
1507.60187999797, 1403.62372119021, 1590.3115445541, 1392.70107590683,
1422.72772811208, 1440.68241714823, 1606.14610155669, 1656.53381495283,
1357.47229571355, 1476.63693689195, 1356.28387443873, 1567.80354390345,
1697.01564123702, 1829.93948069795, 1581.30521692185, 1561.45650301116
), form7 = c(1444.56088362196, 1256.09569669502, 1416.12716131828,
1471.33068319787, 1361.97012558123, 1558.32178921338, 1350.4820727773,
1382.06304580259, 1400.94715403591, 1574.97601740197, 1627.97203596215,
1313.42968513872, 1438.7628489193, 1312.17974558614, 1534.64866852904,
1670.54939207752, 1810.35399499291, 1548.84925168016, 1527.97307493173
), form6 = c(1199.39256844313, 1030.51525282711, 1173.91406615889,
1223.38008553142, 1125.38576782367, 1301.32988998026, 1115.09171006788,
1143.39035787661, 1160.31177216137, 1316.25318375141, 1363.74113364133,
1081.8903116367, 1194.19714454337, 1080.77028284113, 1280.11720270038,
1401.89327051093, 1527.16747332837, 1292.84186767351, 1274.13542778885
), form5 = c(1297.78687926793, 1159.12885718351, 1290.6491699916,
1344.46508388198, 1257.02131246849, 1368.96738018114, 1239.89545043121,
1250.12098970015, 1277.57642224122, 1419.04226152712, 1455.58342941928,
1202.60322079507, 1313.15664462902, 1177.98531965952, 1380.99558290387,
1461.37241431927, 1574.8610783177, 1384.16870680163, 1375.22939662201
), form4 = c(1335.97776730397, 1108.36308048125, 1324.2608292059,
1412.60257966574, 1269.05887158687, 1452.82443206729, 1240.94583733479,
1257.73161635649, 1302.80120256198, 1535.02507407783, 1595.00938916382,
1179.7286135352, 1361.20807332313, 1139.31698950533, 1472.56938122075,
1604.51232282192, 1790.81013902909, 1477.77823673001, 1463.10387273464
), form3 = c(1354.228, 1167.277, 1385.695, 1504.159, 1357.93,
1417.162, 1307.953, 1283.89, 1361.632, 1607.815, 1654.09,
1228.36, 1435.672, 1132.108, 1524.52, 1580.05, 1785.511,
1506.01, 1513.414), form2 = c(2275.7324829005, 1960.23260237236,
2259.163108513, 2384.94888103794, 2181.57337654262, 2442.86896126772,
2142.36120747078, 2165.7494001933, 2228.9072421228, 2562.48497832825,
2650.8148703194, 2057.68931533889, 2311.5302827576, 2002.33637794664,
2471.44922673607, 2664.88828208925, 2945.12448823488, 2479.00498842122,
2457.73611045874), form1 = c(1180.88828860349, 1056.82591443514,
1162.17101167316, 1198.5102427986, 1126.52065872992, 1255.77452231775,
1118.95833314255, 1139.74737411054, 1152.17835587263, 1266.73762443072,
1301.62370599969, 1094.56758356167, 1177.07157336578, 1093.7447765967,
1240.19104186727, 1329.65141749175, 1421.68162869499, 1249.53896489237,
1235.79664943772)), row.names = c(NA, -19L), class = c("tbl_df",
"tbl", "data.frame"))
>
代码运行良好,我正在尝试实现它,以便将 m 从 2 到 8 的所有可能组合。我已经尝试了以下代码,但它不起作用。
x<-2:8
k=which(grepl("^form",colnames(data)))
combined <- combn(data[,k], seq_along(x), FUN = rowMeans)
colnames(combined) <- combn(names(data[,k]), seq_along(x), paste0, collapse="")
data <- cbind(data, combined)
因为我收到以下错误:
> x<-2:8
> k=which(grepl("^form",colnames(data)))
> combined <- combn(data[,k], seq_along(x), FUN = rowMeans)
**Error in combn(data[, k], seq_along(x), FUN = rowMeans) :
length(m) == 1L is not TRUE**
> colnames(combined) <- combn(names(data[,k]), seq_along(x), paste0, collapse="")
**Error in combn(names(data[, k]), seq_along(x), paste0, collapse = "") :
length(m) == 1L is not TRUE**
> data <- cbind(data, combined)
我哪里错了?
此外,我想在所有生成的变量的名称中添加以下前缀 "comb_"。我该如何修改上面的代码?
谢谢!
函数combn
,组合数只能取1个元素,所以你,需要用lapply,最后用do.call组合起来(cbind..) :
首先我们定义组合函数x:
func = function(x,DATA){
mat = combn(DATA,x,FUN=rowMeans)
colnames(mat) = combn(names(DATA),x, paste0, collapse="")
mat
}
然后我们迭代:
k=which(grepl("^form",colnames(data)))
combined = lapply(2:8,func,DATA=data[,k])
combined <- do.call(cbind, combined)
如果你熟悉purrr,你也可以这样做:
library(purrr)
library(dplyr)
combined = 2:8 %>% map(~as.tibble(func(.x,DATA=data[,k]))) %>% bind_cols()
原因很简单,combn
一次只取一个m
。只需使用 sapply
遍历 m
即可。为了一步得到列名,我们可以使用'colnames<-()'
。 'colnames<-'(x, names)
实际上与 colnames(x) <- names
相同,但优点是一切都在 RHS 上。 "form"
后缀可以用gsub
删除。
k <- 2:14
combined.2.lst <- sapply(2:8, function(m)
`colnames<-`(combn(data[,k], m, rowMeans),
combn(names(data[,k]), m, function(x)
paste0("comb.", paste0(gsub("form", "", x), collapse=".")))))
这为您提供了一个列表,然后可以对其进行 cbind
编辑。
combined.2 <- do.call(cbind, combined.2.lst)
dim(combined.2)
# [1] 19 7085
结果
combined.2[1:5, c(1, 50, 100, 500, 1000, 5000)] # example columns
# comb.13.12 comb.9.1 comb.13.10.9 comb.13.10.2.1 comb.9.5.4.3 comb.13.7.6.5.4.3.2
# [1,] 1315.207 1223.424 1207.833 1453.540 1313.488 1458.356
# [2,] 1117.319 1080.482 1058.291 1271.948 1134.727 1258.836
# [3,] 1282.016 1227.391 1220.311 1447.414 1323.304 1448.835
# [4,] 1340.518 1295.056 1299.926 1522.909 1413.207 1528.446
# [5,] 1223.082 1197.812 1192.932 1404.447 1288.278 1400.515
最后只用cbind(data, combined.2)
。
您需要使用 lapply()
或 sapply()
对 m<-2:8
进行迭代。我试图保留您代码的主要结构并进行最少的更改以使其正常工作:
m <- 2:8
k=which(grepl("^form",colnames(data)))
combined <- Reduce(cbind,lapply(m, function(m) combn(data[,k], m, FUN = rowMeans)))
colnames(combined) <-unlist(sapply(m, function(m) combn(names(data[,k]), m, paste0, collapse="")))
data <- cbind(data, combined)
我正在使用以下代码获取名称以 "form" 开头的变量的所有可能组合 (m=2) 的平均值。
k=which(grepl("^form",colnames(data)))
combined <- combn(data[,k], 2, FUN = rowMeans)
colnames(combined) <- combn(names(data[,k]), 2, paste0, collapse="")
data <- cbind(data, combined)
数据集 "data" 如下:
structure(list(id = c(5309039, 5284969, 5300279, 5270289, 5259957,
5267086, 5173196, 5057536, 5246135, 5255558, 5241070, 5280194,
5112387, 444459, 5054590, 5048412, 5296390, 5093742, 5293520),
form13 = c(1300.81321145176, 1130.23869905075, 1292.03253463863,
1358.23586808642, 1250.66417156907, 1388.37813595599, 1277.89625553694,
1242.17552321015, 1275.95068420011, 1449.97932094858, 1494.93158409261,
1183.72005024492, 1319.72081010904, 1153.43556746197, 1451.47500658524,
1502.05308533551, 1641.66472289938, 1407.07852441646, 1444.3815517771
), form12 = c(1329.6, 1104.4, 1272, 1322.8, 1195.5, 1487.4,
1195.6, 1258, 1256.4, 1455, 1524, 1170, 1291.4, 1224.6, 1414,
1606, 1765.2, 1441, 1406.8), form11 = c(1325.578, 1201.752,
1346.42, 1424.884, 1328.03, 1367.262, 1294.928, 1278.99,
1330.482, 1493.54, 1524.19, 1242.21, 1379.522, 1178.458,
1438.37, 1475.15, 1611.236, 1426.11, 1431.014), form10 = c(1056.7264,
940.4956, 1076.29, 1149.9412, 1059.028, 1095.8536, 1027.9564,
1012.996, 1061.3296, 1214.386, 1243.156, 978.472, 1107.3616,
918.6304, 1162.6, 1197.124, 1324.8628, 1151.092, 1155.6952
), form9 = c(1265.95883621535, 1104.13796282321, 1292.61038190038,
1391.60226122629, 1269.10247448997, 1319.10781736395, 1226.47462059388,
1205.80097696249, 1272.24391797013, 1476.61400008329, 1514.11964245256,
1157.70450530205, 1334.62450699242, 1072.96302932, 1408.41424685422,
1453.98138963552, 1619.24856353662, 1393.1329826012, 1399.25113387699
), form8 = c(1482.14960970768, 1302.96011430734, 1455.11530997823,
1507.60187999797, 1403.62372119021, 1590.3115445541, 1392.70107590683,
1422.72772811208, 1440.68241714823, 1606.14610155669, 1656.53381495283,
1357.47229571355, 1476.63693689195, 1356.28387443873, 1567.80354390345,
1697.01564123702, 1829.93948069795, 1581.30521692185, 1561.45650301116
), form7 = c(1444.56088362196, 1256.09569669502, 1416.12716131828,
1471.33068319787, 1361.97012558123, 1558.32178921338, 1350.4820727773,
1382.06304580259, 1400.94715403591, 1574.97601740197, 1627.97203596215,
1313.42968513872, 1438.7628489193, 1312.17974558614, 1534.64866852904,
1670.54939207752, 1810.35399499291, 1548.84925168016, 1527.97307493173
), form6 = c(1199.39256844313, 1030.51525282711, 1173.91406615889,
1223.38008553142, 1125.38576782367, 1301.32988998026, 1115.09171006788,
1143.39035787661, 1160.31177216137, 1316.25318375141, 1363.74113364133,
1081.8903116367, 1194.19714454337, 1080.77028284113, 1280.11720270038,
1401.89327051093, 1527.16747332837, 1292.84186767351, 1274.13542778885
), form5 = c(1297.78687926793, 1159.12885718351, 1290.6491699916,
1344.46508388198, 1257.02131246849, 1368.96738018114, 1239.89545043121,
1250.12098970015, 1277.57642224122, 1419.04226152712, 1455.58342941928,
1202.60322079507, 1313.15664462902, 1177.98531965952, 1380.99558290387,
1461.37241431927, 1574.8610783177, 1384.16870680163, 1375.22939662201
), form4 = c(1335.97776730397, 1108.36308048125, 1324.2608292059,
1412.60257966574, 1269.05887158687, 1452.82443206729, 1240.94583733479,
1257.73161635649, 1302.80120256198, 1535.02507407783, 1595.00938916382,
1179.7286135352, 1361.20807332313, 1139.31698950533, 1472.56938122075,
1604.51232282192, 1790.81013902909, 1477.77823673001, 1463.10387273464
), form3 = c(1354.228, 1167.277, 1385.695, 1504.159, 1357.93,
1417.162, 1307.953, 1283.89, 1361.632, 1607.815, 1654.09,
1228.36, 1435.672, 1132.108, 1524.52, 1580.05, 1785.511,
1506.01, 1513.414), form2 = c(2275.7324829005, 1960.23260237236,
2259.163108513, 2384.94888103794, 2181.57337654262, 2442.86896126772,
2142.36120747078, 2165.7494001933, 2228.9072421228, 2562.48497832825,
2650.8148703194, 2057.68931533889, 2311.5302827576, 2002.33637794664,
2471.44922673607, 2664.88828208925, 2945.12448823488, 2479.00498842122,
2457.73611045874), form1 = c(1180.88828860349, 1056.82591443514,
1162.17101167316, 1198.5102427986, 1126.52065872992, 1255.77452231775,
1118.95833314255, 1139.74737411054, 1152.17835587263, 1266.73762443072,
1301.62370599969, 1094.56758356167, 1177.07157336578, 1093.7447765967,
1240.19104186727, 1329.65141749175, 1421.68162869499, 1249.53896489237,
1235.79664943772)), row.names = c(NA, -19L), class = c("tbl_df",
"tbl", "data.frame"))
>
代码运行良好,我正在尝试实现它,以便将 m 从 2 到 8 的所有可能组合。我已经尝试了以下代码,但它不起作用。
x<-2:8
k=which(grepl("^form",colnames(data)))
combined <- combn(data[,k], seq_along(x), FUN = rowMeans)
colnames(combined) <- combn(names(data[,k]), seq_along(x), paste0, collapse="")
data <- cbind(data, combined)
因为我收到以下错误:
> x<-2:8
> k=which(grepl("^form",colnames(data)))
> combined <- combn(data[,k], seq_along(x), FUN = rowMeans)
**Error in combn(data[, k], seq_along(x), FUN = rowMeans) :
length(m) == 1L is not TRUE**
> colnames(combined) <- combn(names(data[,k]), seq_along(x), paste0, collapse="")
**Error in combn(names(data[, k]), seq_along(x), paste0, collapse = "") :
length(m) == 1L is not TRUE**
> data <- cbind(data, combined)
我哪里错了?
此外,我想在所有生成的变量的名称中添加以下前缀 "comb_"。我该如何修改上面的代码?
谢谢!
函数combn
,组合数只能取1个元素,所以你,需要用lapply,最后用do.call组合起来(cbind..) :
首先我们定义组合函数x:
func = function(x,DATA){
mat = combn(DATA,x,FUN=rowMeans)
colnames(mat) = combn(names(DATA),x, paste0, collapse="")
mat
}
然后我们迭代:
k=which(grepl("^form",colnames(data)))
combined = lapply(2:8,func,DATA=data[,k])
combined <- do.call(cbind, combined)
如果你熟悉purrr,你也可以这样做:
library(purrr)
library(dplyr)
combined = 2:8 %>% map(~as.tibble(func(.x,DATA=data[,k]))) %>% bind_cols()
原因很简单,combn
一次只取一个m
。只需使用 sapply
遍历 m
即可。为了一步得到列名,我们可以使用'colnames<-()'
。 'colnames<-'(x, names)
实际上与 colnames(x) <- names
相同,但优点是一切都在 RHS 上。 "form"
后缀可以用gsub
删除。
k <- 2:14
combined.2.lst <- sapply(2:8, function(m)
`colnames<-`(combn(data[,k], m, rowMeans),
combn(names(data[,k]), m, function(x)
paste0("comb.", paste0(gsub("form", "", x), collapse=".")))))
这为您提供了一个列表,然后可以对其进行 cbind
编辑。
combined.2 <- do.call(cbind, combined.2.lst)
dim(combined.2)
# [1] 19 7085
结果
combined.2[1:5, c(1, 50, 100, 500, 1000, 5000)] # example columns
# comb.13.12 comb.9.1 comb.13.10.9 comb.13.10.2.1 comb.9.5.4.3 comb.13.7.6.5.4.3.2
# [1,] 1315.207 1223.424 1207.833 1453.540 1313.488 1458.356
# [2,] 1117.319 1080.482 1058.291 1271.948 1134.727 1258.836
# [3,] 1282.016 1227.391 1220.311 1447.414 1323.304 1448.835
# [4,] 1340.518 1295.056 1299.926 1522.909 1413.207 1528.446
# [5,] 1223.082 1197.812 1192.932 1404.447 1288.278 1400.515
最后只用cbind(data, combined.2)
。
您需要使用 lapply()
或 sapply()
对 m<-2:8
进行迭代。我试图保留您代码的主要结构并进行最少的更改以使其正常工作:
m <- 2:8
k=which(grepl("^form",colnames(data)))
combined <- Reduce(cbind,lapply(m, function(m) combn(data[,k], m, FUN = rowMeans)))
colnames(combined) <-unlist(sapply(m, function(m) combn(names(data[,k]), m, paste0, collapse="")))
data <- cbind(data, combined)