R:根据多个条件填充向量
R: populate vector based on multiple conditions
我正在尝试想出一种有效的方法来填充名为 Cohort 的新列。问题是我不擅长在 R 中编写条件函数或使用循环。也许某种 sapply
函数会起作用。
这是起始数据...
> dput(as.data.frame(wi.age.count))
structure(list(Year = c("2008", "2009", "2010", "2011", "2012",
"2013", "2014", "2015", "2016", "2017", "2018", "2007", "2007",
"2007", "2007", "2008", "2008", "2008", "2009", "2009", "2009",
"2009", "2009", "2009", "2009", "2010", "2010", "2010", "2010",
"2010", "2011", "2011", "2011", "2011", "2011", "2011", "2011",
"2011", "2011", "2012", "2012", "2012", "2012", "2012", "2012",
"2012", "2012", "2013", "2013", "2013", "2013", "2013", "2013",
"2013", "2013", "2014", "2014", "2014", "2014", "2014", "2014",
"2014", "2014", "2014", "2015", "2015", "2015", "2015", "2015",
"2015", "2015", "2015", "2015", "2016", "2016", "2016", "2016",
"2016", "2016", "2016", "2017", "2017", "2017", "2017", "2017",
"2017", "2017", "2018", "2018", "2018", "2018", "2018", "2018",
"2018", "2018"), Age = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 3L, 6L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 5L,
6L, 7L, 2L, 3L, 4L, 5L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 10L, 2L, 3L, 4L, 5L, 6L, 7L, 10L, 1L, 2L, 3L, 4L, 5L,
6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L), .Label = c("0", "1",
"2", "3", "4", "5", "6", "7", "8", "9"), class = "factor"), n = c(166,
28, 34, 77, 170, 18, 3, 22, 43, 50, 151, 1, 8, 17, 1, 4, 19,
1, 1, 46, 37, 52, 5, 1, 1, 19, 41, 15, 16, 1, 1, 13, 4, 26, 12,
11, 1, 1, 1, 1, 87, 15, 13, 27, 13, 17, 1, 1, 32, 30, 3, 4, 1,
1, 1, 1, 24, 15, 23, 6, 2, 1, 2, 2, 4, 18, 13, 31, 28, 3, 3,
6, 1, 4, 6, 1, 5, 9, 1, 1, 1, 16, 16, 8, 1, 1, 4, 1, 12, 4, 7,
2, 1, 2, 1), id = c("YOY", "YOY", "YOY", "YOY", "YOY", "YOY",
"YOY", "YOY", "YOY", "YOY", "YOY", "Adult", "Adult", "Adult",
"Adult", "Adult", "Adult", "Adult", "Adult", "Adult", "Adult",
"Adult", "Adult", "Adult", "Adult", "Adult", "Adult", "Adult",
"Adult", "Adult", "Adult", "Adult", "Adult", "Adult", "Adult",
"Adult", "Adult", "Adult", "Adult", "Adult", "Adult", "Adult",
"Adult", "Adult", "Adult", "Adult", "Adult", "Adult", "Adult",
"Adult", "Adult", "Adult", "Adult", "Adult", "Adult", "Adult",
"Adult", "Adult", "Adult", "Adult", "Adult", "Adult", "Adult",
"Adult", "Adult", "Adult", "Adult", "Adult", "Adult", "Adult",
"Adult", "Adult", "Adult", "Adult", "Adult", "Adult", "Adult",
"Adult", "Adult", "Adult", "Adult", "Adult", "Adult", "Adult",
"Adult", "Adult", "Adult", "Adult", "Adult", "Adult", "Adult",
"Adult", "Adult", "Adult", "Adult")), row.names = c(NA, -95L), class = "data.frame")
>
这就是我要找的....
> dput(as.data.frame(wi.age.count))
structure(list(Year = c("2008", "2009", "2010", "2011", "2012",
"2013", "2014", "2015", "2016", "2017", "2018", "2007", "2007",
"2007", "2007", "2008", "2008", "2008", "2009", "2009", "2009",
"2009", "2009", "2009", "2009", "2010", "2010", "2010", "2010",
"2010", "2011", "2011", "2011", "2011", "2011", "2011", "2011",
"2011", "2011", "2012", "2012", "2012", "2012", "2012", "2012",
"2012", "2012", "2013", "2013", "2013", "2013", "2013", "2013",
"2013", "2013", "2014", "2014", "2014", "2014", "2014", "2014",
"2014", "2014", "2014", "2015", "2015", "2015", "2015", "2015",
"2015", "2015", "2015", "2015", "2016", "2016", "2016", "2016",
"2016", "2016", "2016", "2017", "2017", "2017", "2017", "2017",
"2017", "2017", "2018", "2018", "2018", "2018", "2018", "2018",
"2018", "2018"), Age = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 3L, 6L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 5L,
6L, 7L, 2L, 3L, 4L, 5L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 10L, 2L, 3L, 4L, 5L, 6L, 7L, 10L, 1L, 2L, 3L, 4L, 5L,
6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L), .Label = c("0", "1",
"2", "3", "4", "5", "6", "7", "8", "9"), class = "factor"), n = c(166,
28, 34, 77, 170, 18, 3, 22, 43, 50, 151, 1, 8, 17, 1, 4, 19,
1, 1, 46, 37, 52, 5, 1, 1, 19, 41, 15, 16, 1, 1, 13, 4, 26, 12,
11, 1, 1, 1, 1, 87, 15, 13, 27, 13, 17, 1, 1, 32, 30, 3, 4, 1,
1, 1, 1, 24, 15, 23, 6, 2, 1, 2, 2, 4, 18, 13, 31, 28, 3, 3,
6, 1, 4, 6, 1, 5, 9, 1, 1, 1, 16, 16, 8, 1, 1, 4, 1, 12, 4, 7,
2, 1, 2, 1), id = c("YOY", "YOY", "YOY", "YOY", "YOY", "YOY",
"YOY", "YOY", "YOY", "YOY", "YOY", "Adult", "Adult", "Adult",
"Adult", "Adult", "Adult", "Adult", "Adult", "Adult", "Adult",
"Adult", "Adult", "Adult", "Adult", "Adult", "Adult", "Adult",
"Adult", "Adult", "Adult", "Adult", "Adult", "Adult", "Adult",
"Adult", "Adult", "Adult", "Adult", "Adult", "Adult", "Adult",
"Adult", "Adult", "Adult", "Adult", "Adult", "Adult", "Adult",
"Adult", "Adult", "Adult", "Adult", "Adult", "Adult", "Adult",
"Adult", "Adult", "Adult", "Adult", "Adult", "Adult", "Adult",
"Adult", "Adult", "Adult", "Adult", "Adult", "Adult", "Adult",
"Adult", "Adult", "Adult", "Adult", "Adult", "Adult", "Adult",
"Adult", "Adult", "Adult", "Adult", "Adult", "Adult", "Adult",
"Adult", "Adult", "Adult", "Adult", "Adult", "Adult", "Adult",
"Adult", "Adult", "Adult", "Adult"), Cohort = c("2008", "2009",
"2010", "2011", "2012", "2013", "2014", "2015", "2016", "2017",
"2018", "2007", "2006", "2005", "2002", "2007", "2006", "2005",
"2009", "2008", "2007", "2006", "2005", "2004", "2003", "2009",
"2008", "2007", "2006", "2001", "2011", "2010", "2009", "2008",
"2007", "2006", "2005", "2004", "2003", "2012", "2011", "2010",
"2009", "2008", "2007", "2006", "2005", "2013", "2012", "2011",
"2010", "2009", "2008", "2007", "2006", "2014", "2013", "2012",
"2011", "2010", "2009", "2008", "2007", "2006", "2015", "2014",
"2013", "2012", "2011", "2010", "2009", "2008", "2006", "2015",
"2014", "2013", "2012", "2011", "2010", "2007", "2017", "2016",
"2015", "2014", "2013", "2012", "2011", "2018", "2017", "2016",
"2015", "2014", "2013", "2012", "2011")), row.names = c(NA, -95L
), class = "data.frame")
我使用大约 200 行 ifelse 语句完成了这项工作。我知道如果有人能提供一些建议,还有更简单的方法。
这是我用来执行此操作的代码的(小)示例,就像我说的那样非常乏味......
wi.age.count$Cohort <- ifelse(wi.age.count$Year == "2012", ifelse(wi.age.count$Age == "9", "2003", wi.age.count$Cohort), wi.age.count$Cohort)
wi.age.count$Cohort <- ifelse(wi.age.count$Year == "2013", ifelse(wi.age.count$Age == "10", "2003", wi.age.count$Cohort), wi.age.count$Cohort)
wi.age.count$Cohort <- ifelse(wi.age.count$Year == "2014", ifelse(wi.age.count$Age == "11", "2003", wi.age.count$Cohort), wi.age.count$Cohort)
wi.age.count$Cohort <- ifelse(wi.age.count$Year == "2015", ifelse(wi.age.count$Age == "12", "2003", wi.age.count$Cohort), wi.age.count$Cohort)
wi.age.count$Cohort <- ifelse(wi.age.count$Year == "2016", ifelse(wi.age.count$Age == "13", "2003", wi.age.count$Cohort), wi.age.count$Cohort)
wi.age.count$Cohort <- ifelse(wi.age.count$Year == "2017", ifelse(wi.age.count$Age == "14", "2003", wi.age.count$Cohort), wi.age.count$Cohort)
wi.age.count$Cohort <- ifelse(wi.age.count$Year == "2018", ifelse(wi.age.count$Age == "15", "2003", wi.age.count$Cohort), wi.age.count$Cohort)
wi.age.count$Cohort <- ifelse(wi.age.count$Year == "2019", ifelse(wi.age.count$Age == "16", "2003", wi.age.count$Cohort), wi.age.count$Cohort)
wi.age.count$Cohort <- ifelse(wi.age.count$Year == "2004", ifelse(wi.age.count$Age == "0", "2004", wi.age.count$Cohort), wi.age.count$Cohort)
wi.age.count$Cohort <- ifelse(wi.age.count$Year == "2005", ifelse(wi.age.count$Age == "1", "2004", wi.age.count$Cohort), wi.age.count$Cohort)
wi.age.count$Cohort <- ifelse(wi.age.count$Year == "2006", ifelse(wi.age.count$Age == "2", "2004", wi.age.count$Cohort), wi.age.count$Cohort)
wi.age.count$Cohort <- ifelse(wi.age.count$Year == "2007", ifelse(wi.age.count$Age == "3", "2004", wi.age.count$Cohort), wi.age.count$Cohort)
Cohort
是对象的出生年份吗?如果是这样,那么:
wi.age.count$Cohort <- as.numeric(wi.age.count$Year) - as.numeric(wi.age.count$Age)
我正在尝试想出一种有效的方法来填充名为 Cohort 的新列。问题是我不擅长在 R 中编写条件函数或使用循环。也许某种 sapply
函数会起作用。
这是起始数据...
> dput(as.data.frame(wi.age.count))
structure(list(Year = c("2008", "2009", "2010", "2011", "2012",
"2013", "2014", "2015", "2016", "2017", "2018", "2007", "2007",
"2007", "2007", "2008", "2008", "2008", "2009", "2009", "2009",
"2009", "2009", "2009", "2009", "2010", "2010", "2010", "2010",
"2010", "2011", "2011", "2011", "2011", "2011", "2011", "2011",
"2011", "2011", "2012", "2012", "2012", "2012", "2012", "2012",
"2012", "2012", "2013", "2013", "2013", "2013", "2013", "2013",
"2013", "2013", "2014", "2014", "2014", "2014", "2014", "2014",
"2014", "2014", "2014", "2015", "2015", "2015", "2015", "2015",
"2015", "2015", "2015", "2015", "2016", "2016", "2016", "2016",
"2016", "2016", "2016", "2017", "2017", "2017", "2017", "2017",
"2017", "2017", "2018", "2018", "2018", "2018", "2018", "2018",
"2018", "2018"), Age = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 3L, 6L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 5L,
6L, 7L, 2L, 3L, 4L, 5L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 10L, 2L, 3L, 4L, 5L, 6L, 7L, 10L, 1L, 2L, 3L, 4L, 5L,
6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L), .Label = c("0", "1",
"2", "3", "4", "5", "6", "7", "8", "9"), class = "factor"), n = c(166,
28, 34, 77, 170, 18, 3, 22, 43, 50, 151, 1, 8, 17, 1, 4, 19,
1, 1, 46, 37, 52, 5, 1, 1, 19, 41, 15, 16, 1, 1, 13, 4, 26, 12,
11, 1, 1, 1, 1, 87, 15, 13, 27, 13, 17, 1, 1, 32, 30, 3, 4, 1,
1, 1, 1, 24, 15, 23, 6, 2, 1, 2, 2, 4, 18, 13, 31, 28, 3, 3,
6, 1, 4, 6, 1, 5, 9, 1, 1, 1, 16, 16, 8, 1, 1, 4, 1, 12, 4, 7,
2, 1, 2, 1), id = c("YOY", "YOY", "YOY", "YOY", "YOY", "YOY",
"YOY", "YOY", "YOY", "YOY", "YOY", "Adult", "Adult", "Adult",
"Adult", "Adult", "Adult", "Adult", "Adult", "Adult", "Adult",
"Adult", "Adult", "Adult", "Adult", "Adult", "Adult", "Adult",
"Adult", "Adult", "Adult", "Adult", "Adult", "Adult", "Adult",
"Adult", "Adult", "Adult", "Adult", "Adult", "Adult", "Adult",
"Adult", "Adult", "Adult", "Adult", "Adult", "Adult", "Adult",
"Adult", "Adult", "Adult", "Adult", "Adult", "Adult", "Adult",
"Adult", "Adult", "Adult", "Adult", "Adult", "Adult", "Adult",
"Adult", "Adult", "Adult", "Adult", "Adult", "Adult", "Adult",
"Adult", "Adult", "Adult", "Adult", "Adult", "Adult", "Adult",
"Adult", "Adult", "Adult", "Adult", "Adult", "Adult", "Adult",
"Adult", "Adult", "Adult", "Adult", "Adult", "Adult", "Adult",
"Adult", "Adult", "Adult", "Adult")), row.names = c(NA, -95L), class = "data.frame")
>
这就是我要找的....
> dput(as.data.frame(wi.age.count))
structure(list(Year = c("2008", "2009", "2010", "2011", "2012",
"2013", "2014", "2015", "2016", "2017", "2018", "2007", "2007",
"2007", "2007", "2008", "2008", "2008", "2009", "2009", "2009",
"2009", "2009", "2009", "2009", "2010", "2010", "2010", "2010",
"2010", "2011", "2011", "2011", "2011", "2011", "2011", "2011",
"2011", "2011", "2012", "2012", "2012", "2012", "2012", "2012",
"2012", "2012", "2013", "2013", "2013", "2013", "2013", "2013",
"2013", "2013", "2014", "2014", "2014", "2014", "2014", "2014",
"2014", "2014", "2014", "2015", "2015", "2015", "2015", "2015",
"2015", "2015", "2015", "2015", "2016", "2016", "2016", "2016",
"2016", "2016", "2016", "2017", "2017", "2017", "2017", "2017",
"2017", "2017", "2018", "2018", "2018", "2018", "2018", "2018",
"2018", "2018"), Age = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 3L, 6L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 5L,
6L, 7L, 2L, 3L, 4L, 5L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 10L, 2L, 3L, 4L, 5L, 6L, 7L, 10L, 1L, 2L, 3L, 4L, 5L,
6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L), .Label = c("0", "1",
"2", "3", "4", "5", "6", "7", "8", "9"), class = "factor"), n = c(166,
28, 34, 77, 170, 18, 3, 22, 43, 50, 151, 1, 8, 17, 1, 4, 19,
1, 1, 46, 37, 52, 5, 1, 1, 19, 41, 15, 16, 1, 1, 13, 4, 26, 12,
11, 1, 1, 1, 1, 87, 15, 13, 27, 13, 17, 1, 1, 32, 30, 3, 4, 1,
1, 1, 1, 24, 15, 23, 6, 2, 1, 2, 2, 4, 18, 13, 31, 28, 3, 3,
6, 1, 4, 6, 1, 5, 9, 1, 1, 1, 16, 16, 8, 1, 1, 4, 1, 12, 4, 7,
2, 1, 2, 1), id = c("YOY", "YOY", "YOY", "YOY", "YOY", "YOY",
"YOY", "YOY", "YOY", "YOY", "YOY", "Adult", "Adult", "Adult",
"Adult", "Adult", "Adult", "Adult", "Adult", "Adult", "Adult",
"Adult", "Adult", "Adult", "Adult", "Adult", "Adult", "Adult",
"Adult", "Adult", "Adult", "Adult", "Adult", "Adult", "Adult",
"Adult", "Adult", "Adult", "Adult", "Adult", "Adult", "Adult",
"Adult", "Adult", "Adult", "Adult", "Adult", "Adult", "Adult",
"Adult", "Adult", "Adult", "Adult", "Adult", "Adult", "Adult",
"Adult", "Adult", "Adult", "Adult", "Adult", "Adult", "Adult",
"Adult", "Adult", "Adult", "Adult", "Adult", "Adult", "Adult",
"Adult", "Adult", "Adult", "Adult", "Adult", "Adult", "Adult",
"Adult", "Adult", "Adult", "Adult", "Adult", "Adult", "Adult",
"Adult", "Adult", "Adult", "Adult", "Adult", "Adult", "Adult",
"Adult", "Adult", "Adult", "Adult"), Cohort = c("2008", "2009",
"2010", "2011", "2012", "2013", "2014", "2015", "2016", "2017",
"2018", "2007", "2006", "2005", "2002", "2007", "2006", "2005",
"2009", "2008", "2007", "2006", "2005", "2004", "2003", "2009",
"2008", "2007", "2006", "2001", "2011", "2010", "2009", "2008",
"2007", "2006", "2005", "2004", "2003", "2012", "2011", "2010",
"2009", "2008", "2007", "2006", "2005", "2013", "2012", "2011",
"2010", "2009", "2008", "2007", "2006", "2014", "2013", "2012",
"2011", "2010", "2009", "2008", "2007", "2006", "2015", "2014",
"2013", "2012", "2011", "2010", "2009", "2008", "2006", "2015",
"2014", "2013", "2012", "2011", "2010", "2007", "2017", "2016",
"2015", "2014", "2013", "2012", "2011", "2018", "2017", "2016",
"2015", "2014", "2013", "2012", "2011")), row.names = c(NA, -95L
), class = "data.frame")
我使用大约 200 行 ifelse 语句完成了这项工作。我知道如果有人能提供一些建议,还有更简单的方法。
这是我用来执行此操作的代码的(小)示例,就像我说的那样非常乏味......
wi.age.count$Cohort <- ifelse(wi.age.count$Year == "2012", ifelse(wi.age.count$Age == "9", "2003", wi.age.count$Cohort), wi.age.count$Cohort)
wi.age.count$Cohort <- ifelse(wi.age.count$Year == "2013", ifelse(wi.age.count$Age == "10", "2003", wi.age.count$Cohort), wi.age.count$Cohort)
wi.age.count$Cohort <- ifelse(wi.age.count$Year == "2014", ifelse(wi.age.count$Age == "11", "2003", wi.age.count$Cohort), wi.age.count$Cohort)
wi.age.count$Cohort <- ifelse(wi.age.count$Year == "2015", ifelse(wi.age.count$Age == "12", "2003", wi.age.count$Cohort), wi.age.count$Cohort)
wi.age.count$Cohort <- ifelse(wi.age.count$Year == "2016", ifelse(wi.age.count$Age == "13", "2003", wi.age.count$Cohort), wi.age.count$Cohort)
wi.age.count$Cohort <- ifelse(wi.age.count$Year == "2017", ifelse(wi.age.count$Age == "14", "2003", wi.age.count$Cohort), wi.age.count$Cohort)
wi.age.count$Cohort <- ifelse(wi.age.count$Year == "2018", ifelse(wi.age.count$Age == "15", "2003", wi.age.count$Cohort), wi.age.count$Cohort)
wi.age.count$Cohort <- ifelse(wi.age.count$Year == "2019", ifelse(wi.age.count$Age == "16", "2003", wi.age.count$Cohort), wi.age.count$Cohort)
wi.age.count$Cohort <- ifelse(wi.age.count$Year == "2004", ifelse(wi.age.count$Age == "0", "2004", wi.age.count$Cohort), wi.age.count$Cohort)
wi.age.count$Cohort <- ifelse(wi.age.count$Year == "2005", ifelse(wi.age.count$Age == "1", "2004", wi.age.count$Cohort), wi.age.count$Cohort)
wi.age.count$Cohort <- ifelse(wi.age.count$Year == "2006", ifelse(wi.age.count$Age == "2", "2004", wi.age.count$Cohort), wi.age.count$Cohort)
wi.age.count$Cohort <- ifelse(wi.age.count$Year == "2007", ifelse(wi.age.count$Age == "3", "2004", wi.age.count$Cohort), wi.age.count$Cohort)
Cohort
是对象的出生年份吗?如果是这样,那么:
wi.age.count$Cohort <- as.numeric(wi.age.count$Year) - as.numeric(wi.age.count$Age)