使用 dplyr 重新编码变量调节

Recode variable conditioning using dplyr

我正在尝试根据第二个变量对数据帧中的一个值进行重新编码:当 age 为 [=17= 时,将 age24 重新编码为 1 ]

adm_id  sex age counts  age2    year    rate1       rate2
1       1   0   547     1       2009    0.02387617  1.632533
1       1   1   492     4       2009    0.02387617  1.468384
1       1   2   568     4       2009    0.02387617  1.695208
1       1   3   533     4       2009    0.02387617  1.59075
1       1   4   519     4       2009    0.02387617  1.548966
1       1   5   539     5       2009    0.02387617  1.608657
1       1   6   514     5       2009    0.02387617  1.534044
1       1   7   513     5       2009    0.02387617  1.531059
1       1   8   496     5       2009    0.02387617  1.480322
1       1   9   548     5       2009    0.02387617  1.635518

为了获得 age2 我使用了 mutate:

dt %>% mutate( age2 = cut(age, breaks= c( seq( -1,80,5), max(age)), 
                labels= c( 4, seq( 5, 80, by=5))))

但是当 age0

时,我无法用 1 替换 age2
structure(list(adm_id = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), sex = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), age = c(0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 53L, 54L, 55L, 56L, 57L, 58L, 59L, 60L, 61L, 62L, 63L, 64L, 65L, 66L, 67L, 68L, 69L, 70L, 71L, 72L, 73L, 74L, 75L, 76L, 77L, 78L, 79L, 80L, 81L, 82L, 83L, 84L, 85L, 86L, 87L, 88L, 89L, 90L, 91L, 92L, 93L, 94L, 95L, 96L, 97L, 98L, 99L, 100L, 0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 53L, 54L, 55L, 56L, 57L, 58L, 59L, 60L, 61L, 62L, 63L, 64L, 65L, 66L, 67L, 68L, 69L, 70L, 71L, 72L, 73L, 74L, 75L, 76L, 77L, 78L, 79L, 80L, 81L, 82L, 83L, 84L, 85L, 86L, 87L, 88L, 89L, 90L, 91L, 92L, 93L, 94L, 95L, 96L, 97L, 98L, 99L, 100L), 
counts = c(547L, 492L, 568L, 533L, 519L, 539L, 514L, 513L, 496L, 548L, 541L, 549L, 468L, 444L, 478L, 492L, 366L, 323L, 329L, 306L, 285L, 241L, 269L, 253L, 276L, 259L, 256L, 236L, 178L, 306L, 276L, 186L, 216L, 223L, 244L, 202L, 206L,     247L, 170L, 198L, 254L, 193L, 155L, 171L, 171L, 169L, 146L, 149L, 139L, 185L, 
143L, 141L, 115L, 109L, 103L, 138L, 108L, 129L, 91L, 89L, 101L, 81L, 71L, 63L, 
79L, 76L, 59L, 160L, 50L, 59L, 62L, 45L, 54L, 35L, 33L, 34L, 33L, 50L, 39L, 
66L, 26L, 12L, 17L, 7L, 23L, 18L, 10L, 16L, 7L, 33L, 13L, 8L, 6L, 4L, 8L, 
4L, 7L, 4L, 6L, 2L, 21L, 515L, 492L, 481L, 528L, 481L, 487L, 454L, 411L, 
395L, 515L, 470L, 435L, 445L, 431L, 413L, 398L, 357L, 337L, 331L, 319L,     309L, 232L, 283L, 335L, 323L, 300L, 279L, 274L, 211L, 326L, 280L, 206L, 227L,     226L, 250L, 236L, 215L, 292L, 179L, 220L, 223L, 186L, 176L, 166L, 190L, 180L,     179L, 180L, 167L, 194L, 180L, 136L, 108L, 125L, 110L, 134L, 128L, 131L, 94L,     122L, 99L, 109L, 83L, 57L, 
80L, 78L, 44L, 106L, 35L, 75L, 71L, 42L, 34L, 30L, 37L, 32L, 37L, 37L, 24L,     69L, 27L, 14L, 13L, 8L, 19L, 7L, 16L, 10L, 6L, 38L, 8L, 13L, 5L, 4L, 11L, 2L, 7L, 1L, 1L, 3L, 23L), 
year = c(2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L,     2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L,     2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L,     2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L,     2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L,     2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L,     2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L,     2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L,     2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L,         2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L,     2009L, 
2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L, 2009L), 
rate1 = c(0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168, 0.023876168), 
rate2 = c(1.632533018, 1.46838436, 1.69520796, 1.590749723, 1.548966428, 1.608656849, 1.534043823, 1.531059302, 1.480322444, 1.635517539, 1.614625892, 1.63850206, 1.396755854, 1.325127349, 1.426601065, 1.46838436, 1.092334707, 0.964000301, 0.981907428, 0.913263443, 0.850588501, 0.719269575, 0.802836164, 0.755083827, 0.823727812, 0.772990954, 0.76403739, 0.704346969, 0.531244748, 0.913263443, 0.823727812, 0.555120916, 0.644656548, 0.665548196, 0.728223138, 0.602873253, 0.614811338, 0.737176701, 0.50736858, 0.590935169, 0.758068348, 0.576012564, 0.462600764, 0.510353101, 0.510353101, 0.504384059, 0.435740074, 0.444693637, 0.414848427, 0.552136395, 0.426786511, 0.420817469, 0.343219921, 0.325312795, 0.307405669, 0.411863906, 0.322328274, 0.385003216, 0.271591416, 0.265622374, 0.301436627, 0.241746206, 0.211900995, 0.188024827, 0.235777163, 0.2268236, 0.176086742, 0.477523369, 0.149226053, 0.176086742, 0.185040305, 0.134303448, 0.161164137, 0.104458237, 0.098489195, 0.101473716, 0.098489195, 0.149226053, 0.116396321, 0.19697839, 0.077597547, 0.035814253, 0.050736858, 0.020891647, 0.068643984, 0.053721379,0.029845211, 0.047752337, 0.020891647, 0.098489195, 0.038798774,0.023876168, 0.017907126, 0.011938084, 0.023876168, 0.011938084,0.020891647, 0.011938084, 0.017907126, 0.005969042, 0.062674942,1.537028344, 1.46838436, 1.435554628, 1.575827118, 1.435554628, 1.453461754, 1.35497256, 1.226638154, 1.178885817, 1.537028344, 1.402724897, 1.29826666, 1.32811187, 1.286328575, 1.232607196, 1.18783938, 1.065474017, 1.005783596, 0.98787647, 0.952062217, 0.922217006, 0.692408885, 0.844619459, 0.999814554, 0.964000301, 0.895356317, 0.832681375, 0.817758769, 0.629733943, 0.972953864, 0.835665896, 0.614811338, 0.67748628, 0.674501759, 0.746130264, 0.704346969, 0.641672027, 0.871480148, 0.534229269, 0.656594632, 0.665548196, 0.555120916, 0.525275706, 0.495430495, 0.567059001, 0.53721379, 0.534229269, 0.53721379, 0.498415016, 0.578997085, 0.53721379, 0.405894864, 0.322328274, 0.373065132, 0.328297316, 0.399925822, 0.382018695, 0.390972258, 0.280544979, 0.364111569, 0.295467585, 0.325312795, 0.247715248, 0.1701177, 0.238761685, 0.232792642, 0.131318926, 0.316359232, 0.104458237, 0.223839079, 0.211900995, 0.125349884, 0.101473716, 0.089535632, 0.110427279, 0.095504674, 0.110427279, 0.110427279, 0.071628505, 0.205931953, 0.080582069, 0.041783295, 0.038798774, 0.023876168, 0.0567059, 0.020891647, 0.047752337, 0.029845211, 0.017907126, 0.1134118, 0.023876168, 0.038798774, 0.014922605, 0.011938084, 0.032829732, 0.005969042, 0.020891647, 0.002984521, 0.002984521, 0.008953563, 0.068643984)), .Names = c("adm_id", "sex", "age", "counts", "year", "rate1", "rate2"), 
class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, -202L), spec = structure(list(cols = structure(list(adm_id = structure(list(), class = c("collector_integer", "collector")), sex = structure(list(), class = c("collector_integer", "collector")), age = structure(list(), class = c("collector_integer", "collector")), counts = structure(list(), class = c("collector_integer", "collector")), year = structure(list(), class = c("collector_integer", "collector")), rate1 = structure(list(), class = c("collector_double", "collector")), rate2 = structure(list(), class = c("collector_double", "collector"))), .Names = c("adm_id", "sex", "age", "counts", "year", "rate1", "rate2")), default = structure(list(), class = c("collector_guess", "collector"))), .Names = c("cols", "default"), class = "col_spec"))

像这样,

dta <- dta %>% mutate(age2=cut(age, 
                   breaks = c(-Inf, seq(0, max(df[c('age')]), by = 5)), 
                   labels = seq(0, max(df[, 3]), by=5) ) )

我确实喜欢默认标签,但是……

始终确保在重新编码时获得所需的结果,

with(df, table(age, age2, useNA = "ifany"))   

您的工作示例不只是 dta <- tibble(age = 0:9)0:80 有什么原因吗?

2017-10-18 更新11:42:11Z

如果你只是想替换你可以做类似的事情(这个 mutate 不适用于你的问题的答案,这只是为了演示 ifelse()。)

require(tibble)
dta <- tibble(age = 0:9)

require(dplyr)
dta <- dta %>% mutate(age2=cut(age, breaks = c(-Inf, seq(0, max(dta[,1]), by = 5), Inf), labels = 1:3) )

# to replace in age2 when age is 0 using base R
dta$age2 <- with(dta, ifelse(age == 0, 667, age))