计算病人消耗最多的食物
Calculate the most consumed food items by Ill Patients
我有一个数据集如下,其中有很多列。有些列的标题是:
baked_hamburgur,spinach,mashed_potato,cabbages,jello,rolls,brown,milk,coffee,water,cakes,vanilla,chocolate,fruitsalad
还有其他专栏,但我目前只对上述专栏感兴趣。
这些列的每一行中的值是:是或否。
此数据的屏幕截图如下,因为我无法 attach/share 问题本身中的此文件。
dput(head()) 输出如下:
> dput(head(illness_data))
structure(list(Age = structure(c(18L, 26L, 22L, 25L, 29L, 13L
), .Label = c("10", "106", "11", "12", "14", "15", "16", "17",
"18", "19", "2", "20", "22", "23", "24", "25", "26", "27", "28",
"30", "31", "32", "33", "34", "36", "38", "39", "4", "42", "43",
"44", "45", "46", "48", "5", "7", "8", "9", "seven"), class = "factor"),
sex = structure(c(3L, 2L, 3L, 3L, 2L, 3L), .Label = c("-1",
"Female", "Male"), class = "factor"), timesupper = c(2000L,
1830L, 1830L, 1930L, 1930L, 1930L), ill = structure(c(1L,
1L, 1L, 1L, 1L, 1L), .Label = "yes", class = "factor"), onsetdate = structure(c(4L,
4L, 4L, 1L, 1L, 4L), .Label = c("18-Apr", "18-Jun", "18/4",
"19-Apr"), class = "factor"), onsettime = c(30L, 30L, 30L,
2230L, 2230L, 200L), baked_hamburgur = structure(c(2L, 2L,
2L, 2L, 2L, 1L), .Label = c("no", "yes"), class = "factor"),
spinach = structure(c(2L, 2L, 2L, 2L, 2L, 1L), .Label = c("no",
"yes"), class = "factor"), mashed_potato = structure(c(2L,
2L, 1L, 1L, 2L, 1L), .Label = c("no", "yes"), class = "factor"),
cabbages = structure(c(1L, 2L, 1L, 2L, 1L, 1L), .Label = c("no",
"yes"), class = "factor"), jello = structure(c(1L, 1L, 1L,
2L, 2L, 1L), .Label = c("no", "yes"), class = "factor"),
rolls = structure(c(2L, 1L, 1L, 1L, 2L, 1L), .Label = c("no",
"yes"), class = "factor"), brown = structure(c(1L, 1L, 1L,
1L, 2L, 1L), .Label = c("no", "yes"), class = "factor"),
milk = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c("no",
"yes"), class = "factor"), coffee = structure(c(2L, 2L, 2L,
1L, 2L, 1L), .Label = c("no", "yes"), class = "factor"),
water = structure(c(1L, 1L, 1L, 2L, 2L, 1L), .Label = c("no",
"yes"), class = "factor"), cakes = structure(c(1L, 1L, 2L,
1L, 1L, 1L), .Label = c("no", "yes"), class = "factor"),
vanilla = structure(c(2L, 2L, 2L, 2L, 2L, 2L), .Label = c("no",
"yes"), class = "factor"), chocolate = structure(c(1L, 2L,
2L, 1L, 1L, 2L), .Label = c("no", "yes"), class = "factor"),
fruitsalad = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c("no",
"yes", "yes</pre></body></html>Ztext/plain\b\v5(F]l~Ó_Ý6R[=11=]2[=11=]1"
), class = "factor")), .Names = c("Age", "sex", "timesupper",
"ill", "onsetdate", "onsettime", "baked_hamburgur", "spinach",
"mashed_potato", "cabbages", "jello", "rolls", "brown", "milk",
"coffee", "water", "cakes", "vanilla", "chocolate", "fruitsalad"
), row.names = c(NA, 6L), class = "data.frame")
一个完整的dput命令输出如下:
> dput(illness_data)
structure(list(Age = structure(c(18L, 26L, 22L, 25L, 29L, 13L,
36L, 8L, 11L, 7L, 24L, 10L, 8L, 35L, 34L, 6L, 22L, 39L, 12L,
9L, 36L, 17L, 9L, 20L, 37L, 27L, 32L, 30L, 21L, 24L, 3L, 18L,
33L, 16L, 5L, 31L, 28L, 14L, 19L, 38L, 2L, 4L, 23L, 1L, 18L,
15L), .Label = c("10", "106", "11", "12", "14", "15", "16", "17",
"18", "19", "2", "20", "22", "23", "24", "25", "26", "27", "28",
"30", "31", "32", "33", "34", "36", "38", "39", "4", "42", "43",
"44", "45", "46", "48", "5", "7", "8", "9", "seven"), class = "factor"),
sex = structure(c(3L, 2L, 3L, 3L, 2L, 3L, 3L, 3L, 2L, 3L,
3L, 2L, 3L, 3L, 2L, 2L, 3L, 3L, 3L, 2L, 2L, 1L, 3L, 3L, 3L,
2L, 2L, 3L, 2L, 3L, 3L, 3L, 2L, 3L, 2L, 2L, 3L, 3L, 3L, 3L,
3L, 3L, 2L, 3L, 2L, 3L), .Label = c("-1", "Female", "Male"
), class = "factor"), timesupper = c(2000L, 1830L, 1830L,
1930L, 1930L, 1930L, 2200L, 1900L, 1930L, NA, NA, NA, NA,
2200L, NA, NA, NA, 2200L, NA, NA, 2200L, 2200L, NA, NA, 2200L,
NA, NA, NA, NA, NA, 1900L, NA, 1100L, NA, NA, NA, 2200L,
1930L, 1930L, 2200L, NA, NA, 1930L, 1930L, NA, NA), ill = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), .Label = "yes", class = "factor"), onsetdate = structure(c(4L,
4L, 4L, 1L, 1L, 4L, 4L, 2L, 4L, 4L, 4L, 1L, 1L, 4L, 1L, 3L,
1L, 4L, 1L, 1L, 4L, 4L, 1L, 1L, 4L, 1L, 4L, 4L, 1L, 4L, 4L,
1L, 1L, 1L, 1L, 1L, 4L, 4L, 1L, 4L, 4L, 4L, 4L, 1L, 4L, 1L
), .Label = c("18-Apr", "18-Jun", "18/4", "19-Apr"), class = "factor"),
onsettime = c(30L, 30L, 30L, 2230L, 2230L, 200L, 100L, 2300L,
200L, 1030L, 30L, 2215L, 2200L, 100L, 2300L, 2145L, 2145L,
100L, 2300L, 2100L, 100L, 100L, 2115L, 2330L, 100L, 2130L,
230L, 200L, 2130L, 30L, 100L, 2230L, 1500L, 2400L, 2300L,
2230L, 100L, 230L, 2330L, 100L, 30L, 30L, 100L, 2400L, 215L,
2300L), baked_hamburgur = structure(c(2L, 2L, 2L, 2L, 2L,
1L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L,
1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L,
2L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 2L), .Label = c("no",
"yes"), class = "factor"), spinach = structure(c(2L, 2L,
2L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 2L,
1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L), .Label = c("no",
"yes"), class = "factor"), mashed_potato = structure(c(2L,
2L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L,
2L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L,
2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 2L
), .Label = c("no", "yes"), class = "factor"), cabbages = structure(c(1L,
2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L,
2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 2L
), .Label = c("no", "yes"), class = "factor"), jello = structure(c(1L,
1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L,
1L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L,
1L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 2L
), .Label = c("no", "yes"), class = "factor"), rolls = structure(c(2L,
1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 2L,
2L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L,
2L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L
), .Label = c("no", "yes"), class = "factor"), brown = structure(c(1L,
1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L,
2L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L
), .Label = c("no", "yes"), class = "factor"), milk = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L,
1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), .Label = c("no", "yes"), class = "factor"), coffee = structure(c(2L,
2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L
), .Label = c("no", "yes"), class = "factor"), water = structure(c(1L,
1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L,
1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L
), .Label = c("no", "yes"), class = "factor"), cakes = structure(c(1L,
1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L
), .Label = c("no", "yes"), class = "factor"), vanilla = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L
), .Label = c("no", "yes"), class = "factor"), chocolate = structure(c(1L,
2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L,
1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, NA, 1L, 1L,
2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L
), .Label = c("no", "yes"), class = "factor"), fruitsalad = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L
), .Label = c("no", "yes", "yes</pre></body></html>Ztext/plain\b\v5(F]l~Ó_Ý6R[=12=]2[=12=]1"
), class = "factor")), .Names = c("Age", "sex", "timesupper",
"ill", "onsetdate", "onsettime", "baked_hamburgur", "spinach",
"mashed_potato", "cabbages", "jello", "rolls", "brown", "milk",
"coffee", "water", "cakes", "vanilla", "chocolate", "fruitsalad"
), class = "data.frame", row.names = c(NA, -46L))
R 已将这些列正确读取为因子类型变量。
现在,这些列中的每一列都对应于医院中每位病人的消费量。
我想使用 R 了解病人食用最多的食物。
请指教一个好的方法。谢谢!
请注意,除下面 URL 中提到的选项外,我没有尝试过任何其他选项。但是,我无法让它工作。
Count Factor Columns Using R
我不完全确定你在找什么,但这将计算食物的消耗频率(使用 tidyverse
包):
library(tidyverse)
illness_data_summed <- illness_data %>%
mutate_at(vars(-Age, -sex, -timesupper,-onsetdate,-onsettime), ~ifelse(. == "yes", 1,0)) %>%
summarise_at(vars(-Age, -sex, -timesupper,-onsetdate,-onsettime, -ill), ~sum(., na.rm = TRUE))
illness_data_summed[which(illness_data_summed == max(illness_data_summed))]
所以首先我将是转换为 1,将否转换为 0,这使得总和表示特定食物被食用的次数。我对所有列都这样做,除了那些你不感兴趣的列(由 - 在 vars 中表示)但是如果需要的话你也可以反转它(例如,当你想要转换的 vars 的数量低于你不想要的数量时想转换)。
最后一部分将产生:
vanilla
1 43
因为所有行都有 ill = 'yes'
,我们可以计算每列中带有 yes
的值的数量。基本的 R 方法可能是:
head(sort(colSums(illness_data[7:20] == "yes"), decreasing = TRUE), 5)
# vanilla baked_hamburgur cakes spinach mashed_potato
# 43 29 27 26 23
我 select 编辑了第 7 至 20 列,因为这些是唯一存在食品的列。此外,我只 select 编辑了前 5 个值,您可以通过更改 head
命令中的数字 5 来 select 任何值。
我有一个数据集如下,其中有很多列。有些列的标题是:
baked_hamburgur,spinach,mashed_potato,cabbages,jello,rolls,brown,milk,coffee,water,cakes,vanilla,chocolate,fruitsalad
还有其他专栏,但我目前只对上述专栏感兴趣。
这些列的每一行中的值是:是或否。
此数据的屏幕截图如下,因为我无法 attach/share 问题本身中的此文件。
dput(head()) 输出如下:
> dput(head(illness_data))
structure(list(Age = structure(c(18L, 26L, 22L, 25L, 29L, 13L
), .Label = c("10", "106", "11", "12", "14", "15", "16", "17",
"18", "19", "2", "20", "22", "23", "24", "25", "26", "27", "28",
"30", "31", "32", "33", "34", "36", "38", "39", "4", "42", "43",
"44", "45", "46", "48", "5", "7", "8", "9", "seven"), class = "factor"),
sex = structure(c(3L, 2L, 3L, 3L, 2L, 3L), .Label = c("-1",
"Female", "Male"), class = "factor"), timesupper = c(2000L,
1830L, 1830L, 1930L, 1930L, 1930L), ill = structure(c(1L,
1L, 1L, 1L, 1L, 1L), .Label = "yes", class = "factor"), onsetdate = structure(c(4L,
4L, 4L, 1L, 1L, 4L), .Label = c("18-Apr", "18-Jun", "18/4",
"19-Apr"), class = "factor"), onsettime = c(30L, 30L, 30L,
2230L, 2230L, 200L), baked_hamburgur = structure(c(2L, 2L,
2L, 2L, 2L, 1L), .Label = c("no", "yes"), class = "factor"),
spinach = structure(c(2L, 2L, 2L, 2L, 2L, 1L), .Label = c("no",
"yes"), class = "factor"), mashed_potato = structure(c(2L,
2L, 1L, 1L, 2L, 1L), .Label = c("no", "yes"), class = "factor"),
cabbages = structure(c(1L, 2L, 1L, 2L, 1L, 1L), .Label = c("no",
"yes"), class = "factor"), jello = structure(c(1L, 1L, 1L,
2L, 2L, 1L), .Label = c("no", "yes"), class = "factor"),
rolls = structure(c(2L, 1L, 1L, 1L, 2L, 1L), .Label = c("no",
"yes"), class = "factor"), brown = structure(c(1L, 1L, 1L,
1L, 2L, 1L), .Label = c("no", "yes"), class = "factor"),
milk = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c("no",
"yes"), class = "factor"), coffee = structure(c(2L, 2L, 2L,
1L, 2L, 1L), .Label = c("no", "yes"), class = "factor"),
water = structure(c(1L, 1L, 1L, 2L, 2L, 1L), .Label = c("no",
"yes"), class = "factor"), cakes = structure(c(1L, 1L, 2L,
1L, 1L, 1L), .Label = c("no", "yes"), class = "factor"),
vanilla = structure(c(2L, 2L, 2L, 2L, 2L, 2L), .Label = c("no",
"yes"), class = "factor"), chocolate = structure(c(1L, 2L,
2L, 1L, 1L, 2L), .Label = c("no", "yes"), class = "factor"),
fruitsalad = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c("no",
"yes", "yes</pre></body></html>Ztext/plain\b\v5(F]l~Ó_Ý6R[=11=]2[=11=]1"
), class = "factor")), .Names = c("Age", "sex", "timesupper",
"ill", "onsetdate", "onsettime", "baked_hamburgur", "spinach",
"mashed_potato", "cabbages", "jello", "rolls", "brown", "milk",
"coffee", "water", "cakes", "vanilla", "chocolate", "fruitsalad"
), row.names = c(NA, 6L), class = "data.frame")
一个完整的dput命令输出如下:
> dput(illness_data)
structure(list(Age = structure(c(18L, 26L, 22L, 25L, 29L, 13L,
36L, 8L, 11L, 7L, 24L, 10L, 8L, 35L, 34L, 6L, 22L, 39L, 12L,
9L, 36L, 17L, 9L, 20L, 37L, 27L, 32L, 30L, 21L, 24L, 3L, 18L,
33L, 16L, 5L, 31L, 28L, 14L, 19L, 38L, 2L, 4L, 23L, 1L, 18L,
15L), .Label = c("10", "106", "11", "12", "14", "15", "16", "17",
"18", "19", "2", "20", "22", "23", "24", "25", "26", "27", "28",
"30", "31", "32", "33", "34", "36", "38", "39", "4", "42", "43",
"44", "45", "46", "48", "5", "7", "8", "9", "seven"), class = "factor"),
sex = structure(c(3L, 2L, 3L, 3L, 2L, 3L, 3L, 3L, 2L, 3L,
3L, 2L, 3L, 3L, 2L, 2L, 3L, 3L, 3L, 2L, 2L, 1L, 3L, 3L, 3L,
2L, 2L, 3L, 2L, 3L, 3L, 3L, 2L, 3L, 2L, 2L, 3L, 3L, 3L, 3L,
3L, 3L, 2L, 3L, 2L, 3L), .Label = c("-1", "Female", "Male"
), class = "factor"), timesupper = c(2000L, 1830L, 1830L,
1930L, 1930L, 1930L, 2200L, 1900L, 1930L, NA, NA, NA, NA,
2200L, NA, NA, NA, 2200L, NA, NA, 2200L, 2200L, NA, NA, 2200L,
NA, NA, NA, NA, NA, 1900L, NA, 1100L, NA, NA, NA, 2200L,
1930L, 1930L, 2200L, NA, NA, 1930L, 1930L, NA, NA), ill = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), .Label = "yes", class = "factor"), onsetdate = structure(c(4L,
4L, 4L, 1L, 1L, 4L, 4L, 2L, 4L, 4L, 4L, 1L, 1L, 4L, 1L, 3L,
1L, 4L, 1L, 1L, 4L, 4L, 1L, 1L, 4L, 1L, 4L, 4L, 1L, 4L, 4L,
1L, 1L, 1L, 1L, 1L, 4L, 4L, 1L, 4L, 4L, 4L, 4L, 1L, 4L, 1L
), .Label = c("18-Apr", "18-Jun", "18/4", "19-Apr"), class = "factor"),
onsettime = c(30L, 30L, 30L, 2230L, 2230L, 200L, 100L, 2300L,
200L, 1030L, 30L, 2215L, 2200L, 100L, 2300L, 2145L, 2145L,
100L, 2300L, 2100L, 100L, 100L, 2115L, 2330L, 100L, 2130L,
230L, 200L, 2130L, 30L, 100L, 2230L, 1500L, 2400L, 2300L,
2230L, 100L, 230L, 2330L, 100L, 30L, 30L, 100L, 2400L, 215L,
2300L), baked_hamburgur = structure(c(2L, 2L, 2L, 2L, 2L,
1L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L,
1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L,
2L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 2L), .Label = c("no",
"yes"), class = "factor"), spinach = structure(c(2L, 2L,
2L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 2L,
1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L), .Label = c("no",
"yes"), class = "factor"), mashed_potato = structure(c(2L,
2L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L,
2L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L,
2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 2L
), .Label = c("no", "yes"), class = "factor"), cabbages = structure(c(1L,
2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L,
2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 2L
), .Label = c("no", "yes"), class = "factor"), jello = structure(c(1L,
1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L,
1L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L,
1L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 2L
), .Label = c("no", "yes"), class = "factor"), rolls = structure(c(2L,
1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 2L,
2L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L,
2L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L
), .Label = c("no", "yes"), class = "factor"), brown = structure(c(1L,
1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L,
2L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L
), .Label = c("no", "yes"), class = "factor"), milk = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L,
1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), .Label = c("no", "yes"), class = "factor"), coffee = structure(c(2L,
2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L
), .Label = c("no", "yes"), class = "factor"), water = structure(c(1L,
1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L,
1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L
), .Label = c("no", "yes"), class = "factor"), cakes = structure(c(1L,
1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L
), .Label = c("no", "yes"), class = "factor"), vanilla = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L
), .Label = c("no", "yes"), class = "factor"), chocolate = structure(c(1L,
2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L,
1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, NA, 1L, 1L,
2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L
), .Label = c("no", "yes"), class = "factor"), fruitsalad = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L
), .Label = c("no", "yes", "yes</pre></body></html>Ztext/plain\b\v5(F]l~Ó_Ý6R[=12=]2[=12=]1"
), class = "factor")), .Names = c("Age", "sex", "timesupper",
"ill", "onsetdate", "onsettime", "baked_hamburgur", "spinach",
"mashed_potato", "cabbages", "jello", "rolls", "brown", "milk",
"coffee", "water", "cakes", "vanilla", "chocolate", "fruitsalad"
), class = "data.frame", row.names = c(NA, -46L))
R 已将这些列正确读取为因子类型变量。
现在,这些列中的每一列都对应于医院中每位病人的消费量。
我想使用 R 了解病人食用最多的食物。
请指教一个好的方法。谢谢!
请注意,除下面 URL 中提到的选项外,我没有尝试过任何其他选项。但是,我无法让它工作。
Count Factor Columns Using R
我不完全确定你在找什么,但这将计算食物的消耗频率(使用 tidyverse
包):
library(tidyverse)
illness_data_summed <- illness_data %>%
mutate_at(vars(-Age, -sex, -timesupper,-onsetdate,-onsettime), ~ifelse(. == "yes", 1,0)) %>%
summarise_at(vars(-Age, -sex, -timesupper,-onsetdate,-onsettime, -ill), ~sum(., na.rm = TRUE))
illness_data_summed[which(illness_data_summed == max(illness_data_summed))]
所以首先我将是转换为 1,将否转换为 0,这使得总和表示特定食物被食用的次数。我对所有列都这样做,除了那些你不感兴趣的列(由 - 在 vars 中表示)但是如果需要的话你也可以反转它(例如,当你想要转换的 vars 的数量低于你不想要的数量时想转换)。
最后一部分将产生:
vanilla
1 43
因为所有行都有 ill = 'yes'
,我们可以计算每列中带有 yes
的值的数量。基本的 R 方法可能是:
head(sort(colSums(illness_data[7:20] == "yes"), decreasing = TRUE), 5)
# vanilla baked_hamburgur cakes spinach mashed_potato
# 43 29 27 26 23
我 select 编辑了第 7 至 20 列,因为这些是唯一存在食品的列。此外,我只 select 编辑了前 5 个值,您可以通过更改 head
命令中的数字 5 来 select 任何值。