在 R 分类中格式化数据
Formatting data in R categorical
让
reason <- c("v","v","v","v","v","s","s","s","v","v","v","s","s")
location <- c("c","c","c","c","c","c","c","c","h","h","h","h","h")
zero_one <- c(1,1,0,1,1,1,1,0,1,0,0,1,0)
df <- data.frame(reason, location, zero_one)
有没有一种简单的方法可以将 "df" 转换为 "DF",其中 "DF" 具有以下形状:
reason location #zeros #ones
v c 1 4
s c 1 2
v h 2 1
s h 1 1
你可以用 dplyr
非常简单地做到这一点:
library(dplyr)
df %>%
group_by(reason,location) %>%
summarize(zeros = sum(zero_ones==0), ones = sum(zero_ones==1))
# reason location zeros ones
#1 s c 1 2
#2 s h 1 1
#3 v c 1 4
#4 v h 2 1
您可以使用 dcast
library(reshape2)
dcast(transform(df, zero_one= factor(zero_one, levels=0:1,
labels=c('zeros', 'ones'))), ...~zero_one, value.var='zero_one', length)
# reason location zeros ones
#1 s c 1 2
#2 s h 1 1
#3 v c 1 4
#4 v h 2 1
或使用data.table
(与@jalapic 的方法类似)
setDT(df)[,list(zeros=sum(!zero_one), ones=sum(!!zero_one)),
.(reason, location)][]
# reason location zeros ones
#1: v c 1 4
#2: s c 1 2
#3: v h 2 1
#4: s h 1 1
或在base R
aggregate(cbind(zeros=!zero_one, ones=!!zero_one)~., df, FUN= sum)
# reason location zeros ones
#1 s c 1 2
#2 v c 1 4
#3 s h 1 1
#4 v h 2 1
让
reason <- c("v","v","v","v","v","s","s","s","v","v","v","s","s")
location <- c("c","c","c","c","c","c","c","c","h","h","h","h","h")
zero_one <- c(1,1,0,1,1,1,1,0,1,0,0,1,0)
df <- data.frame(reason, location, zero_one)
有没有一种简单的方法可以将 "df" 转换为 "DF",其中 "DF" 具有以下形状:
reason location #zeros #ones
v c 1 4
s c 1 2
v h 2 1
s h 1 1
你可以用 dplyr
非常简单地做到这一点:
library(dplyr)
df %>%
group_by(reason,location) %>%
summarize(zeros = sum(zero_ones==0), ones = sum(zero_ones==1))
# reason location zeros ones
#1 s c 1 2
#2 s h 1 1
#3 v c 1 4
#4 v h 2 1
您可以使用 dcast
library(reshape2)
dcast(transform(df, zero_one= factor(zero_one, levels=0:1,
labels=c('zeros', 'ones'))), ...~zero_one, value.var='zero_one', length)
# reason location zeros ones
#1 s c 1 2
#2 s h 1 1
#3 v c 1 4
#4 v h 2 1
或使用data.table
(与@jalapic 的方法类似)
setDT(df)[,list(zeros=sum(!zero_one), ones=sum(!!zero_one)),
.(reason, location)][]
# reason location zeros ones
#1: v c 1 4
#2: s c 1 2
#3: v h 2 1
#4: s h 1 1
或在base R
aggregate(cbind(zeros=!zero_one, ones=!!zero_one)~., df, FUN= sum)
# reason location zeros ones
#1 s c 1 2
#2 v c 1 4
#3 s h 1 1
#4 v h 2 1