如何在使用 ```complete``` 时保持变量顺序为 ```cut ```order
how to keep the variable order in ```cut ```order while using ```complete```
我正在使用 cut
生成各年龄组的人数小计。示例数据和代码为:
set.seed(12345)
#create a numeric variable Age
AGE <- sample(0:110, 100, replace = TRUE)
Sample.data <-data.frame(AGE)
summary_data<- Sample.data %>%
group_by(grp = cut(
AGE,
breaks=c(-Inf, 0,0.001, 0.083, 2, 13, 65,1000),
right=TRUE,
labels = c("Foetus(0 yr)","Neonate (0.001 - 0.082 yr)","Infant(0.083-1.999 yrs)","Child(2-12.999 yrs)", "Adolescent(13-17.999 yrs)","Adult(18-64.999 yrs.)","Elderly(65-199 yrs)")
)) %>% summarise(TotalPeople = n())
这是我得到的:
我想保留 table 中没有观察的年龄组。所以我添加完成并填写。我可以为空的年龄组填写 0,但年龄组的顺序也发生了变化,这是我不想要的。查看代码和结果:
summary_data<- Sample.data %>%
group_by(grp = cut(
AGE,
breaks=c(-Inf, 0,0.001, 0.083, 2, 13, 65,1000),
right=TRUE,
labels = c("Foetus(0 yr)","Neonate (0.001 - 0.082 yr)","Infant(0.083-1.999 yrs)","Child(2-12.999 yrs)", "Adolescent(13-17.999 yrs)","Adult(18-64.999 yrs.)","Elderly(65-199 yrs)")
)) %>% summarise(TotalPeople = n(),)%>% complete(grp = levels(grp), fill = list(TotalPeople = 0))
有没有办法让年龄组按切分顺序排列?或者有没有办法以另一种方式让年龄组保持 0 观察,这样年龄组就不会改变它的顺序?理想的结果应该是这样的:
您可以通过将 .drop = FALSE
添加到 group_by
来防止删除组:
library(dplyr)
set.seed(12345)
#create a numeric variable Age
AGE <- sample(0:110, 100, replace = TRUE)
Sample.data <-data.frame(AGE)
summary_data<- Sample.data %>%
group_by(grp = cut(
AGE,
breaks=c(-Inf, 0,0.001, 0.083, 2, 13, 65,1000),
right=TRUE,
labels = c("Foetus(0 yr)","Neonate (0.001 - 0.082 yr)","Infant(0.083-1.999 yrs)","Child(2-12.999 yrs)", "Adolescent(13-17.999 yrs)","Adult(18-64.999 yrs.)","Elderly(65-199 yrs)")
), .drop = FALSE) %>% summarise(TotalPeople = n())
#> `summarise()` ungrouping output (override with `.groups` argument)
summary_data
#> # A tibble: 7 x 2
#> grp TotalPeople
#> <fct> <int>
#> 1 Foetus(0 yr) 1
#> 2 Neonate (0.001 - 0.082 yr) 0
#> 3 Infant(0.083-1.999 yrs) 0
#> 4 Child(2-12.999 yrs) 2
#> 5 Adolescent(13-17.999 yrs) 14
#> 6 Adult(18-64.999 yrs.) 37
#> 7 Elderly(65-199 yrs) 46
您可以添加 ordered_result = TRUE
。使用 table
可以得到计数,使用 as.data.frame
可以得到每个组的计数 data.frame
。
grp <- cut(Sample.data$AGE, breaks=c(-Inf, 0, 0.082, 2, 13, 18, 65, Inf)
, ordered_result = TRUE
, labels = c("Foetus(0 yr)","Neonate (0 - 0.082] yrs","Infant (0.082-2] yrs",
"Child (2-13] yrs", "Adolescent (13-18] yrs","Adult (18-65] yrs",
"Elderly >65 yrs"))
as.data.frame(table(grp))
# grp Freq
#1 Foetus(0 yr) 1
#2 Neonate (0 - 0.082] yrs 0
#3 Infant (0.082-2] yrs 2
#4 Child (2-13] yrs 14
#5 Adolescent (13-18] yrs 5
#6 Adult (18-65] yrs 32
#7 Elderly >65 yrs 46
要获得额外的平均年龄,您可以使用 xtabs
x <- table(grp)
cbind(TotalPeople=x, meanAge=xtabs(AGE ~ grp, cbind(AGE, addNA(grp))) / x)
# TotalPeople meanAge
#Foetus(0 yr) 1 0.000000
#Neonate (0 - 0.082] yrs 0 NaN
#Infant (0.082-2] yrs 2 1.500000
#Child (2-13] yrs 14 9.071429
#Adolescent (13-18] yrs 5 15.000000
#Adult (18-65] yrs 32 41.093750
#Elderly >65 yrs 46 87.434783
或者您可以使用 aggregate
aggregate(AGE ~ grp, cbind(AGE, addNA(grp)), function(x)
c(TotalPeople=length(x), meanAge=mean(x)), drop=FALSE)
# grp AGE.TotalPeople AGE.meanAge
#1 Foetus(0 yr) 1.000000 0.000000
#2 Neonate (0 - 0.082] yrs NA NA
#3 Infant (0.082-2] yrs 2.000000 1.500000
#4 Child (2-13] yrs 14.000000 9.071429
#5 Adolescent (13-18] yrs 5.000000 15.000000
#6 Adult (18-65] yrs 32.000000 41.093750
#7 Elderly >65 yrs 46.000000 87.434783
或者您可以使用 by
by(AGE, grp, function(x) c(TotalPeople=length(x), meanAge=mean(x)))
#grp: Foetus(0 yr)
#TotalPeople meanAge
# 1 0
#------------------------------------------------------------
#grp: Neonate (0 - 0.082] yrs
#NULL
#------------------------------------------------------------
#grp: Infant (0.082-2] yrs
#TotalPeople meanAge
# 2.0 1.5
#------------------------------------------------------------
#grp: Child (2-13] yrs
#TotalPeople meanAge
# 14.000000 9.071429
#------------------------------------------------------------
#grp: Adolescent (13-18] yrs
#TotalPeople meanAge
# 5 15
#------------------------------------------------------------
#grp: Adult (18-65] yrs
#TotalPeople meanAge
# 32.00000 41.09375
#------------------------------------------------------------
#grp: Elderly >65 yrs
#TotalPeople meanAge
# 46.00000 87.43478
我正在使用 cut
生成各年龄组的人数小计。示例数据和代码为:
set.seed(12345)
#create a numeric variable Age
AGE <- sample(0:110, 100, replace = TRUE)
Sample.data <-data.frame(AGE)
summary_data<- Sample.data %>%
group_by(grp = cut(
AGE,
breaks=c(-Inf, 0,0.001, 0.083, 2, 13, 65,1000),
right=TRUE,
labels = c("Foetus(0 yr)","Neonate (0.001 - 0.082 yr)","Infant(0.083-1.999 yrs)","Child(2-12.999 yrs)", "Adolescent(13-17.999 yrs)","Adult(18-64.999 yrs.)","Elderly(65-199 yrs)")
)) %>% summarise(TotalPeople = n())
这是我得到的:
我想保留 table 中没有观察的年龄组。所以我添加完成并填写。我可以为空的年龄组填写 0,但年龄组的顺序也发生了变化,这是我不想要的。查看代码和结果:
summary_data<- Sample.data %>%
group_by(grp = cut(
AGE,
breaks=c(-Inf, 0,0.001, 0.083, 2, 13, 65,1000),
right=TRUE,
labels = c("Foetus(0 yr)","Neonate (0.001 - 0.082 yr)","Infant(0.083-1.999 yrs)","Child(2-12.999 yrs)", "Adolescent(13-17.999 yrs)","Adult(18-64.999 yrs.)","Elderly(65-199 yrs)")
)) %>% summarise(TotalPeople = n(),)%>% complete(grp = levels(grp), fill = list(TotalPeople = 0))
有没有办法让年龄组按切分顺序排列?或者有没有办法以另一种方式让年龄组保持 0 观察,这样年龄组就不会改变它的顺序?理想的结果应该是这样的:
您可以通过将 .drop = FALSE
添加到 group_by
来防止删除组:
library(dplyr)
set.seed(12345)
#create a numeric variable Age
AGE <- sample(0:110, 100, replace = TRUE)
Sample.data <-data.frame(AGE)
summary_data<- Sample.data %>%
group_by(grp = cut(
AGE,
breaks=c(-Inf, 0,0.001, 0.083, 2, 13, 65,1000),
right=TRUE,
labels = c("Foetus(0 yr)","Neonate (0.001 - 0.082 yr)","Infant(0.083-1.999 yrs)","Child(2-12.999 yrs)", "Adolescent(13-17.999 yrs)","Adult(18-64.999 yrs.)","Elderly(65-199 yrs)")
), .drop = FALSE) %>% summarise(TotalPeople = n())
#> `summarise()` ungrouping output (override with `.groups` argument)
summary_data
#> # A tibble: 7 x 2
#> grp TotalPeople
#> <fct> <int>
#> 1 Foetus(0 yr) 1
#> 2 Neonate (0.001 - 0.082 yr) 0
#> 3 Infant(0.083-1.999 yrs) 0
#> 4 Child(2-12.999 yrs) 2
#> 5 Adolescent(13-17.999 yrs) 14
#> 6 Adult(18-64.999 yrs.) 37
#> 7 Elderly(65-199 yrs) 46
您可以添加 ordered_result = TRUE
。使用 table
可以得到计数,使用 as.data.frame
可以得到每个组的计数 data.frame
。
grp <- cut(Sample.data$AGE, breaks=c(-Inf, 0, 0.082, 2, 13, 18, 65, Inf)
, ordered_result = TRUE
, labels = c("Foetus(0 yr)","Neonate (0 - 0.082] yrs","Infant (0.082-2] yrs",
"Child (2-13] yrs", "Adolescent (13-18] yrs","Adult (18-65] yrs",
"Elderly >65 yrs"))
as.data.frame(table(grp))
# grp Freq
#1 Foetus(0 yr) 1
#2 Neonate (0 - 0.082] yrs 0
#3 Infant (0.082-2] yrs 2
#4 Child (2-13] yrs 14
#5 Adolescent (13-18] yrs 5
#6 Adult (18-65] yrs 32
#7 Elderly >65 yrs 46
要获得额外的平均年龄,您可以使用 xtabs
x <- table(grp)
cbind(TotalPeople=x, meanAge=xtabs(AGE ~ grp, cbind(AGE, addNA(grp))) / x)
# TotalPeople meanAge
#Foetus(0 yr) 1 0.000000
#Neonate (0 - 0.082] yrs 0 NaN
#Infant (0.082-2] yrs 2 1.500000
#Child (2-13] yrs 14 9.071429
#Adolescent (13-18] yrs 5 15.000000
#Adult (18-65] yrs 32 41.093750
#Elderly >65 yrs 46 87.434783
或者您可以使用 aggregate
aggregate(AGE ~ grp, cbind(AGE, addNA(grp)), function(x)
c(TotalPeople=length(x), meanAge=mean(x)), drop=FALSE)
# grp AGE.TotalPeople AGE.meanAge
#1 Foetus(0 yr) 1.000000 0.000000
#2 Neonate (0 - 0.082] yrs NA NA
#3 Infant (0.082-2] yrs 2.000000 1.500000
#4 Child (2-13] yrs 14.000000 9.071429
#5 Adolescent (13-18] yrs 5.000000 15.000000
#6 Adult (18-65] yrs 32.000000 41.093750
#7 Elderly >65 yrs 46.000000 87.434783
或者您可以使用 by
by(AGE, grp, function(x) c(TotalPeople=length(x), meanAge=mean(x)))
#grp: Foetus(0 yr)
#TotalPeople meanAge
# 1 0
#------------------------------------------------------------
#grp: Neonate (0 - 0.082] yrs
#NULL
#------------------------------------------------------------
#grp: Infant (0.082-2] yrs
#TotalPeople meanAge
# 2.0 1.5
#------------------------------------------------------------
#grp: Child (2-13] yrs
#TotalPeople meanAge
# 14.000000 9.071429
#------------------------------------------------------------
#grp: Adolescent (13-18] yrs
#TotalPeople meanAge
# 5 15
#------------------------------------------------------------
#grp: Adult (18-65] yrs
#TotalPeople meanAge
# 32.00000 41.09375
#------------------------------------------------------------
#grp: Elderly >65 yrs
#TotalPeople meanAge
# 46.00000 87.43478