在 ddply 中将字符串作为参数传递
passing strings as arguments in ddply
set.seed(1)
df <-data.frame(category=rep(LETTERS[1:5],each=10),superregion=sample(c("EMEA","LATAM","AMER","APAC"),100,replace=T),country=sample(c("Country1","Country2","Country3","Country4","Country5","Country6","Country7","Country8"),100,replace=T),market=sample(c("Market1","Market2","Market3","Market4","Market5","Market6","Market7","Market8","Market9","Market10","Market11","Market12"),100,replace=T),hospitalID=sample(seq(1,50,1),100,replace=T),IndicatorFlag=sample(seq(0,1,1),100,replace=T))
我正在尝试创建一个按级别分组的 SummaryTab 和一个 binaryIndicator,其中级别是一个参数,可以是国家或市场。因此,我想通过使级别成为一个论点来将以下示例合并为一个。
SummaryTab1 = ddply(df, .(market,IndicatorFlag), summarize, counts=length(unique(hospitalID)))
SummaryTab1 = ddply(df, .(country,IndicatorFlag), summarize, counts=length(unique(hospitalID)))
以级别为参数,我尝试了以下操作:
level<-c("market")
string<-paste(level,"IndicatorFlag",sep=" , ")
SummaryTab1 = ddply(df,.(string) , summarize, counts=length(unique(hospitalID)))
只给出一个字符串
我也试过这个
SummaryTab1 =as.formula(paste0("ddply(df,.(",level,",IndicatorFlag),summarize, counts=length(unique(hospitalID)))"))
有什么关于如何做到这一点的建议吗?
我要做的是按级别和 IndicatorFlag 分组
根据 Miha 的建议,我正在尝试这样做(均无效):
library(dplyr)
SumTab<-df %>% group_by_(my_level,IndicatorFlag) %>% summarise(counts = length(unique(hospitalID)))
SumTab<-ddply(df, .(my_level[2],IndicatorFlag), summarize, counts=length(unique(hospitalID)))
这就是你想要的吗...
library(plyr)
library(dplyr)
数据
set.seed(1)
df <-data.frame(category=rep(LETTERS[1:5],each=10),
superregion=sample(c("EMEA","LATAM","AMER","APAC"),100,replace=T),
country=sample(c("Country1","Country2","Country3","Country4","Country5","Country6","Country7","Country8"),100,replace=T),
market=sample(c("Market1","Market2","Market3","Market4","Market5","Market6","Market7","Market8","Market9","Market10","Market11","Market12"),100,replace=T),
hospitalID=sample(seq(1,50,1),100,replace=T),IndicatorFlag=sample(seq(0,1,1),100,replace=T))
解决方案
dplyr
根据 docendo discimus 在下面的评论部分给出的建议更新了解决方案。
# your level of choice
my_level <- "market"
# code
df %>%
group_by_(my_level) %>%
summarise(counts = n_distinct(hospitalID))
market counts
1 Market1 5
2 Market10 4
3 Market11 3
4 Market12 9
5 Market2 12
6 Market3 10
7 Market4 12
8 Market5 8
9 Market6 9
10 Market7 7
11 Market8 4
12 Market9 5
# multiple levels
my_level <- c("market", "country", "IndicatorFlag")
df %>%
group_by_(.dots = my_level[c(1, 3)]) %>%
summarise(counts = n_distinct(hospitalID))
market IndicatorFlag counts
1 Market1 0 1
2 Market1 1 5
3 Market10 0 2
4 Market10 1 3
5 Market11 1 3
6 Market12 0 7
7 Market12 1 3
8 Market2 0 4
9 Market2 1 10
10 Market3 0 7
.. ... ... ...
# using all levels
df %>%
group_by_(.dots = my_level) %>%
summarise(counts = n_distinct(hospitalID))
market country IndicatorFlag counts
1 Market1 Country1 1 1
2 Market1 Country2 1 1
3 Market1 Country3 1 2
4 Market1 Country6 0 1
5 Market1 Country8 1 1
6 Market10 Country2 1 1
7 Market10 Country3 0 1
8 Market10 Country3 1 1
9 Market10 Country5 1 1
10 Market10 Country7 0 1
.. ... ... ... ...
plyr
ddply(df, c(my_level[1], my_level[3]),
summarize,
counts = n_distinct(hospitalID)) %>%
head(.)
market IndicatorFlag counts
1 Market1 0 1
2 Market1 1 5
3 Market10 0 2
4 Market10 1 3
5 Market11 1 3
6 Market12 0 7
set.seed(1)
df <-data.frame(category=rep(LETTERS[1:5],each=10),superregion=sample(c("EMEA","LATAM","AMER","APAC"),100,replace=T),country=sample(c("Country1","Country2","Country3","Country4","Country5","Country6","Country7","Country8"),100,replace=T),market=sample(c("Market1","Market2","Market3","Market4","Market5","Market6","Market7","Market8","Market9","Market10","Market11","Market12"),100,replace=T),hospitalID=sample(seq(1,50,1),100,replace=T),IndicatorFlag=sample(seq(0,1,1),100,replace=T))
我正在尝试创建一个按级别分组的 SummaryTab 和一个 binaryIndicator,其中级别是一个参数,可以是国家或市场。因此,我想通过使级别成为一个论点来将以下示例合并为一个。
SummaryTab1 = ddply(df, .(market,IndicatorFlag), summarize, counts=length(unique(hospitalID)))
SummaryTab1 = ddply(df, .(country,IndicatorFlag), summarize, counts=length(unique(hospitalID)))
以级别为参数,我尝试了以下操作:
level<-c("market")
string<-paste(level,"IndicatorFlag",sep=" , ")
SummaryTab1 = ddply(df,.(string) , summarize, counts=length(unique(hospitalID)))
只给出一个字符串
我也试过这个
SummaryTab1 =as.formula(paste0("ddply(df,.(",level,",IndicatorFlag),summarize, counts=length(unique(hospitalID)))"))
有什么关于如何做到这一点的建议吗?
我要做的是按级别和 IndicatorFlag 分组
根据 Miha 的建议,我正在尝试这样做(均无效):
library(dplyr)
SumTab<-df %>% group_by_(my_level,IndicatorFlag) %>% summarise(counts = length(unique(hospitalID)))
SumTab<-ddply(df, .(my_level[2],IndicatorFlag), summarize, counts=length(unique(hospitalID)))
这就是你想要的吗...
library(plyr)
library(dplyr)
数据
set.seed(1)
df <-data.frame(category=rep(LETTERS[1:5],each=10),
superregion=sample(c("EMEA","LATAM","AMER","APAC"),100,replace=T),
country=sample(c("Country1","Country2","Country3","Country4","Country5","Country6","Country7","Country8"),100,replace=T),
market=sample(c("Market1","Market2","Market3","Market4","Market5","Market6","Market7","Market8","Market9","Market10","Market11","Market12"),100,replace=T),
hospitalID=sample(seq(1,50,1),100,replace=T),IndicatorFlag=sample(seq(0,1,1),100,replace=T))
解决方案
dplyr
根据 docendo discimus 在下面的评论部分给出的建议更新了解决方案。
# your level of choice
my_level <- "market"
# code
df %>%
group_by_(my_level) %>%
summarise(counts = n_distinct(hospitalID))
market counts
1 Market1 5
2 Market10 4
3 Market11 3
4 Market12 9
5 Market2 12
6 Market3 10
7 Market4 12
8 Market5 8
9 Market6 9
10 Market7 7
11 Market8 4
12 Market9 5
# multiple levels
my_level <- c("market", "country", "IndicatorFlag")
df %>%
group_by_(.dots = my_level[c(1, 3)]) %>%
summarise(counts = n_distinct(hospitalID))
market IndicatorFlag counts
1 Market1 0 1
2 Market1 1 5
3 Market10 0 2
4 Market10 1 3
5 Market11 1 3
6 Market12 0 7
7 Market12 1 3
8 Market2 0 4
9 Market2 1 10
10 Market3 0 7
.. ... ... ...
# using all levels
df %>%
group_by_(.dots = my_level) %>%
summarise(counts = n_distinct(hospitalID))
market country IndicatorFlag counts
1 Market1 Country1 1 1
2 Market1 Country2 1 1
3 Market1 Country3 1 2
4 Market1 Country6 0 1
5 Market1 Country8 1 1
6 Market10 Country2 1 1
7 Market10 Country3 0 1
8 Market10 Country3 1 1
9 Market10 Country5 1 1
10 Market10 Country7 0 1
.. ... ... ... ...
plyr
ddply(df, c(my_level[1], my_level[3]),
summarize,
counts = n_distinct(hospitalID)) %>%
head(.)
market IndicatorFlag counts
1 Market1 0 1
2 Market1 1 5
3 Market10 0 2
4 Market10 1 3
5 Market11 1 3
6 Market12 0 7