从 R 中的行进行应急 table
Make contingency table from rows in R
我有一个包含 376 列、7 行的 covid 数据框,其中包含 7 个国家/地区 376 天不同日期的 covid 感染数。我已经为它们匹配了不同的严重性类别,现在我正在尝试制作一个应急 table 包含严重性类别作为列和国家/地区作为行。我已经编写了一个函数并且它可以工作,但我仍然想知道是否有更优雅的解决方案可能包括诸如 table() 函数之类的东西,每行都有一个聚合。
我的代码:
severity <- function(x,countries){
sev = c("Leicht","Mittel","Schwer")
res=matrix(ncol=3,nrow=7)
colnames(res) = sev
rownames(res) = countries
for (i in 1:nrow(x)){
for (s in 1:length(sev)){
res[i,s]=length(x2[i,x2[i,]==sev[s]])
}
}
return(res)
}
r = severity(x2,covid_world2[,1]) #covid_world2 countains the countrynames, x2 the data with the categories
x = rbind(r,"Z" = colSums(r))
ctable=cbind(x,"S" = rowSums(x))
这只是 x2 中前两行的示例(即代表国家加拿大、德国)
dput(head(covid_world2[, 1:20]))
输出为:
structure(list(Country = c("Canada", "France", "Germany", "Italy",
"Japan", "United Kingdom"), X1_22_20 = c(0, 0, 0, 0, 1.58132191886678e-08,
0), X1_23_20 = c(0, 0, 0, 0, 1.58132191886678e-08, 0), X1_24_20 = c(0,
3.06403006266968e-08, 0, 0, 1.58132191886678e-08, 0), X1_25_20 = c(0,
4.59604509400452e-08, 0, 0, 1.58132191886678e-08, 0), X1_26_20 = c(2.6495573093152e-08,
4.59604509400452e-08, 0, 0, 3.16264383773357e-08, 0), X1_27_20 = c(2.6495573093152e-08,
4.59604509400452e-08, 1.19354613321966e-08, 0, 3.16264383773357e-08,
0), X1_28_20 = c(5.2991146186304e-08, 6.12806012533936e-08, 4.77418453287863e-08,
0, 5.53462671603374e-08, 0), X1_29_20 = c(5.2991146186304e-08,
7.6600751566742e-08, 4.77418453287863e-08, 0, 5.53462671603374e-08,
0), X1_30_20 = c(5.2991146186304e-08, 7.6600751566742e-08, 4.77418453287863e-08,
0, 8.69727055376731e-08, 0), X1_31_20 = c(1.05982292372608e-07,
7.6600751566742e-08, 5.96773066609828e-08, 3.3078723093808e-08,
1.18599143915009e-07, 2.94611506927399e-08), X02_01_2020 = c(1.05982292372608e-07,
9.19209018800904e-08, 9.54836906575725e-08, 3.3078723093808e-08,
1.58132191886678e-07, 2.94611506927399e-08), X02_02_2020 = c(1.05982292372608e-07,
9.19209018800904e-08, 1.19354613321966e-07, 3.3078723093808e-08,
1.58132191886678e-07, 2.94611506927399e-08), X02_03_2020 = c(1.05982292372608e-07,
9.19209018800904e-08, 1.43225535986359e-07, 3.3078723093808e-08,
1.58132191886678e-07, 1.1784460277096e-07), X02_04_2020 = c(1.05982292372608e-07,
9.19209018800904e-08, 1.43225535986359e-07, 3.3078723093808e-08,
1.73945411075346e-07, 1.1784460277096e-07), X02_05_2020 = c(1.3247786546576e-07,
9.19209018800904e-08, 1.43225535986359e-07, 3.3078723093808e-08,
1.8185202066968e-07, 1.3257517811733e-07), X02_06_2020 = c(1.3247786546576e-07,
9.19209018800904e-08, 1.43225535986359e-07, 3.3078723093808e-08,
1.8185202066968e-07, 1.3257517811733e-07), X02_07_2020 = c(1.85469011652064e-07,
9.19209018800904e-08, 1.55160997318555e-07, 4.9618084640712e-08,
1.8185202066968e-07, 1.3257517811733e-07), X02_08_2020 = c(1.85469011652064e-07,
1.68521653446832e-07, 1.55160997318555e-07, 4.9618084640712e-08,
1.89758630264014e-07, 1.9149747950281e-07), X02_09_2020 = c(1.85469011652064e-07,
1.68521653446832e-07, 1.67096458650752e-07, 4.9618084640712e-08,
1.89758630264014e-07, 2.06228054849179e-07)), row.names = c(NA,
6L), class = "data.frame")
library(dplyr)
library(tidyr)
severity <- c("Leicht", "Mitte", "Schwer")
ranges <- c(0, 0.01, 0.05, Inf)
df %>%
dplyr::mutate(across(starts_with("X"), ~cut(., ranges, right = F, labels = severity))) %>%
tidyr::pivot_longer(cols = -Country,
names_to = "Date",
values_to = "Severity") %>%
dplyr::mutate(across(2, ~ as.Date(., tryFormats = "X%m_%d_%y"))) %>% # can delete line
tidyr::pivot_wider(id_cols = Country,
names_from = Severity,
values_from = Severity,
values_fn = length)
Country Leicht
<chr> <int>
1 Canada 19
2 France 19
3 Germany 19
4 Italy 19
5 Japan 19
6 United Kingdom 19
工作原理
mutate
将在所有以“X”开头的列中应用函数 cut
。 Cut 使用提供的范围(不包括上限)并使用向量 severity
. 中的适当标签标记这些范围内的值
cut(0.01, range, right = F)
[1] [0.01,0.05)
Levels: [0,0.01) [0.01,0.05) [0.05,Inf)
您可以看到 0.01 包含在 [0.01,0.05)
范围内,因为 right = F
导致右边界不包含在内。将 label = severity
添加到此函数将应用 "Mitte"
的正确标签
tidyr::pivot_longer
会将除 Country
之外的所有列都转换为长格式。列名将存储在名为 Date
的新列中。 mutate
之后的列的值将存储在名为 Severity
. 的新列中
- 最后的
mutate
将列 Date
更改为日期向量。所以 "X2_01_20"
变成日期 2020-02-01
。注意:如果你只关心下一步做的表格,你可以删除这个管道。我把它包括在内,以防您需要按日期查看内容。
tidyr::pivot_wider
将计算每个国家/地区的严重性值
我有一个包含 376 列、7 行的 covid 数据框,其中包含 7 个国家/地区 376 天不同日期的 covid 感染数。我已经为它们匹配了不同的严重性类别,现在我正在尝试制作一个应急 table 包含严重性类别作为列和国家/地区作为行。我已经编写了一个函数并且它可以工作,但我仍然想知道是否有更优雅的解决方案可能包括诸如 table() 函数之类的东西,每行都有一个聚合。
我的代码:
severity <- function(x,countries){
sev = c("Leicht","Mittel","Schwer")
res=matrix(ncol=3,nrow=7)
colnames(res) = sev
rownames(res) = countries
for (i in 1:nrow(x)){
for (s in 1:length(sev)){
res[i,s]=length(x2[i,x2[i,]==sev[s]])
}
}
return(res)
}
r = severity(x2,covid_world2[,1]) #covid_world2 countains the countrynames, x2 the data with the categories
x = rbind(r,"Z" = colSums(r))
ctable=cbind(x,"S" = rowSums(x))
这只是 x2 中前两行的示例(即代表国家加拿大、德国)
dput(head(covid_world2[, 1:20]))
输出为:
structure(list(Country = c("Canada", "France", "Germany", "Italy",
"Japan", "United Kingdom"), X1_22_20 = c(0, 0, 0, 0, 1.58132191886678e-08,
0), X1_23_20 = c(0, 0, 0, 0, 1.58132191886678e-08, 0), X1_24_20 = c(0,
3.06403006266968e-08, 0, 0, 1.58132191886678e-08, 0), X1_25_20 = c(0,
4.59604509400452e-08, 0, 0, 1.58132191886678e-08, 0), X1_26_20 = c(2.6495573093152e-08,
4.59604509400452e-08, 0, 0, 3.16264383773357e-08, 0), X1_27_20 = c(2.6495573093152e-08,
4.59604509400452e-08, 1.19354613321966e-08, 0, 3.16264383773357e-08,
0), X1_28_20 = c(5.2991146186304e-08, 6.12806012533936e-08, 4.77418453287863e-08,
0, 5.53462671603374e-08, 0), X1_29_20 = c(5.2991146186304e-08,
7.6600751566742e-08, 4.77418453287863e-08, 0, 5.53462671603374e-08,
0), X1_30_20 = c(5.2991146186304e-08, 7.6600751566742e-08, 4.77418453287863e-08,
0, 8.69727055376731e-08, 0), X1_31_20 = c(1.05982292372608e-07,
7.6600751566742e-08, 5.96773066609828e-08, 3.3078723093808e-08,
1.18599143915009e-07, 2.94611506927399e-08), X02_01_2020 = c(1.05982292372608e-07,
9.19209018800904e-08, 9.54836906575725e-08, 3.3078723093808e-08,
1.58132191886678e-07, 2.94611506927399e-08), X02_02_2020 = c(1.05982292372608e-07,
9.19209018800904e-08, 1.19354613321966e-07, 3.3078723093808e-08,
1.58132191886678e-07, 2.94611506927399e-08), X02_03_2020 = c(1.05982292372608e-07,
9.19209018800904e-08, 1.43225535986359e-07, 3.3078723093808e-08,
1.58132191886678e-07, 1.1784460277096e-07), X02_04_2020 = c(1.05982292372608e-07,
9.19209018800904e-08, 1.43225535986359e-07, 3.3078723093808e-08,
1.73945411075346e-07, 1.1784460277096e-07), X02_05_2020 = c(1.3247786546576e-07,
9.19209018800904e-08, 1.43225535986359e-07, 3.3078723093808e-08,
1.8185202066968e-07, 1.3257517811733e-07), X02_06_2020 = c(1.3247786546576e-07,
9.19209018800904e-08, 1.43225535986359e-07, 3.3078723093808e-08,
1.8185202066968e-07, 1.3257517811733e-07), X02_07_2020 = c(1.85469011652064e-07,
9.19209018800904e-08, 1.55160997318555e-07, 4.9618084640712e-08,
1.8185202066968e-07, 1.3257517811733e-07), X02_08_2020 = c(1.85469011652064e-07,
1.68521653446832e-07, 1.55160997318555e-07, 4.9618084640712e-08,
1.89758630264014e-07, 1.9149747950281e-07), X02_09_2020 = c(1.85469011652064e-07,
1.68521653446832e-07, 1.67096458650752e-07, 4.9618084640712e-08,
1.89758630264014e-07, 2.06228054849179e-07)), row.names = c(NA,
6L), class = "data.frame")
library(dplyr)
library(tidyr)
severity <- c("Leicht", "Mitte", "Schwer")
ranges <- c(0, 0.01, 0.05, Inf)
df %>%
dplyr::mutate(across(starts_with("X"), ~cut(., ranges, right = F, labels = severity))) %>%
tidyr::pivot_longer(cols = -Country,
names_to = "Date",
values_to = "Severity") %>%
dplyr::mutate(across(2, ~ as.Date(., tryFormats = "X%m_%d_%y"))) %>% # can delete line
tidyr::pivot_wider(id_cols = Country,
names_from = Severity,
values_from = Severity,
values_fn = length)
Country Leicht
<chr> <int>
1 Canada 19
2 France 19
3 Germany 19
4 Italy 19
5 Japan 19
6 United Kingdom 19
工作原理
mutate
将在所有以“X”开头的列中应用函数cut
。 Cut 使用提供的范围(不包括上限)并使用向量severity
. 中的适当标签标记这些范围内的值
cut(0.01, range, right = F)
[1] [0.01,0.05)
Levels: [0,0.01) [0.01,0.05) [0.05,Inf)
您可以看到 0.01 包含在 [0.01,0.05)
范围内,因为 right = F
导致右边界不包含在内。将 label = severity
添加到此函数将应用 "Mitte"
tidyr::pivot_longer
会将除Country
之外的所有列都转换为长格式。列名将存储在名为Date
的新列中。mutate
之后的列的值将存储在名为Severity
. 的新列中
- 最后的
mutate
将列Date
更改为日期向量。所以"X2_01_20"
变成日期2020-02-01
。注意:如果你只关心下一步做的表格,你可以删除这个管道。我把它包括在内,以防您需要按日期查看内容。 tidyr::pivot_wider
将计算每个国家/地区的严重性值