在 R 中简化条件 table 循环而不使用矩阵表示法
Simplify conditional table loop without matrix notation in R
使用下面的示例,我想知道是否有更有效的包或函数来对匹配的字符串元素进行条件计数和表格——例如,使用 data.table
包,dplyr
包,lapply()
喜欢功能?
produce = c("apple", "blueberry", "blueberry", "corn",
"horseradish", "rutabega", "rutabega", "tomato") # Long list
veggies = c("carrot", "corn", "horseradish", "rutabega") # Short list
basket = matrix(rep(0, length(unique(veggies))*length(unique(produce)) ), nrow = length(unique(veggies)),
ncol = length(unique(produce)) )
rownames(basket) <- unique(veggies)
colnames(basket) <- unique(produce)
basket
输出:
# apple blueberry corn horseradish rutabega tomato
# carrot 0 0 0 0 0 0
# corn 0 0 0 0 0 0
# horseradish 0 0 0 0 0 0
# rutabega 0 0 0 0 0 0
查找具有共享实例的计数
for(i in 1:length(veggies)) {
counter = NULL
for (j in 1:length(produce)){
if(veggies[i] == produce[j]){
basket[i, which( colnames(basket) == produce[j] ) ] <- basket[i,
which( colnames(basket) == produce[j] ) ] + 1
}
}
}
basket
我使用更快/更优雅的方法寻求的结果:
# apple blueberry corn horseradish rutabega tomato
# carrot 0 0 0 0 0 0
# corn 0 0 1 0 0 0
# horseradish 0 0 0 1 0 0
# rutabega 0 0 0 0 2 0
library(data.table)
dcast(data.table(produce), produce~produce)[veggies]
produce apple blueberry corn horseradish rutabega tomato
#1: carrot NA NA NA NA NA NA
#2: corn 0 0 1 0 0 0
#3: horseradish 0 0 0 1 0 0
#4: rutabega 0 0 0 0 2 0
我在 base R 中能想到的最不丑陋的解决方案:
newprod <- factor(produce, levels=unique(c(produce,veggies)))
table(newprod,newprod)[veggies,]
# newprod
#newprod apple blueberry corn horseradish rutabega tomato carrot
# carrot 0 0 0 0 0 0 0
# corn 0 0 1 0 0 0 0
# horseradish 0 0 0 1 0 0 0
# rutabega 0 0 0 0 2 0 0
或者全部在一行丑陋的代码中:
do.call(table, replicate(2,factor(produce, levels=unique(c(produce,veggies))),simplify=FALSE))[veggies,]
使用下面的示例,我想知道是否有更有效的包或函数来对匹配的字符串元素进行条件计数和表格——例如,使用 data.table
包,dplyr
包,lapply()
喜欢功能?
produce = c("apple", "blueberry", "blueberry", "corn",
"horseradish", "rutabega", "rutabega", "tomato") # Long list
veggies = c("carrot", "corn", "horseradish", "rutabega") # Short list
basket = matrix(rep(0, length(unique(veggies))*length(unique(produce)) ), nrow = length(unique(veggies)),
ncol = length(unique(produce)) )
rownames(basket) <- unique(veggies)
colnames(basket) <- unique(produce)
basket
输出:
# apple blueberry corn horseradish rutabega tomato
# carrot 0 0 0 0 0 0
# corn 0 0 0 0 0 0
# horseradish 0 0 0 0 0 0
# rutabega 0 0 0 0 0 0
查找具有共享实例的计数
for(i in 1:length(veggies)) {
counter = NULL
for (j in 1:length(produce)){
if(veggies[i] == produce[j]){
basket[i, which( colnames(basket) == produce[j] ) ] <- basket[i,
which( colnames(basket) == produce[j] ) ] + 1
}
}
}
basket
我使用更快/更优雅的方法寻求的结果:
# apple blueberry corn horseradish rutabega tomato
# carrot 0 0 0 0 0 0
# corn 0 0 1 0 0 0
# horseradish 0 0 0 1 0 0
# rutabega 0 0 0 0 2 0
library(data.table)
dcast(data.table(produce), produce~produce)[veggies]
produce apple blueberry corn horseradish rutabega tomato
#1: carrot NA NA NA NA NA NA
#2: corn 0 0 1 0 0 0
#3: horseradish 0 0 0 1 0 0
#4: rutabega 0 0 0 0 2 0
我在 base R 中能想到的最不丑陋的解决方案:
newprod <- factor(produce, levels=unique(c(produce,veggies)))
table(newprod,newprod)[veggies,]
# newprod
#newprod apple blueberry corn horseradish rutabega tomato carrot
# carrot 0 0 0 0 0 0 0
# corn 0 0 1 0 0 0 0
# horseradish 0 0 0 1 0 0 0
# rutabega 0 0 0 0 2 0 0
或者全部在一行丑陋的代码中:
do.call(table, replicate(2,factor(produce, levels=unique(c(produce,veggies))),simplify=FALSE))[veggies,]