如何在R中的纵向数据中添加带条件的新变量

How to add new variable with condition in longitudinal data in R

在下面的数据中,我想添加另一个变量说 z

mydata
                y  x  sl
    1   199.92989  1   1
    2    27.73883  2   1
    3   144.00000  3   1
    4    72.00000  4   1
    5     0.00000  5   1
    6   392.60636  1   2
    7   749.52499  2   2
    8  3120.00000  3   2
    9  1600.00000  4   2
    10 1000.00000  5   2
    11 5840.00000  6   2
    12 3960.00000  7   2
    13 4700.00000  8   2
    14 1660.00000  9   2
    15 5620.00000 10   2
    16    0.00000  1 585
    17    0.00000  2 585
    18    0.00000  3 585
    19 3062.32962  1 587
    20 2048.97458  2 587
    21 1280.00000  3 587
    22 1440.00000  4 587
    23 2960.00000  5 587
    24  460.00000  6 587
    25  530.00000  7 587
    26 5190.00000  8 587
    27 3200.00000  9 587
    28 4620.00000 10 587
    29    0.00000  1 651
    30    0.00000  2 651
    31    0.00000  3 651
    32    0.00000  4 651

z=c(5,7,8),值5应该重复5次,属于sl=17应该重复10次,属于sl=2, 8 应该重复 10 次,属于 sl=587, .如果 y 的所有观察值都针对 0 对于任何 sl585651,那么 z 必须取值 0. z 列必须像这样 z=c(rep(5,5), rep(7,10), rep(0,3), rep(8,10), rep(0,4))=c(5 5 5 5 5 7 7 7 7 7 7 7 7 7 7 0 0 0 8 8 8 8 8 8 8 8 8 8 0 0 0 0)

以上条件怎么办?

我们可以使用dplyr中的case_when并指定条件。

library(dplyr)
df %>%
  mutate(z = case_when(sl == 1 ~ 5, 
                       sl == 2 ~ 7,
                       sl == 587 ~ 8, 
                       all(y[sl == 585] == 0) ~ 0, 
                       all(y[sl == 651] == 0) ~ 0))

其中 returns :

#            y  x  sl z
#1   199.92989  1   1 5
#2    27.73883  2   1 5
#3   144.00000  3   1 5
#4    72.00000  4   1 5
#5     0.00000  5   1 5
#6   392.60636  1   2 7
#7   749.52499  2   2 7
#8  3120.00000  3   2 7
#9  1600.00000  4   2 7
#10 1000.00000  5   2 7
#11 5840.00000  6   2 7
#12 3960.00000  7   2 7
#13 4700.00000  8   2 7
#14 1660.00000  9   2 7
#15 5620.00000 10   2 7
#16    0.00000  1 585 0
#17    0.00000  2 585 0
#18    0.00000  3 585 0
#19 3062.32962  1 587 8
#20 2048.97458  2 587 8
#21 1280.00000  3 587 8
#22 1440.00000  4 587 8
#23 2960.00000  5 587 8
#24  460.00000  6 587 8
#25  530.00000  7 587 8
#26 5190.00000  8 587 8
#27 3200.00000  9 587 8
#28 4620.00000 10 587 8
#29    0.00000  1 651 0
#30    0.00000  2 651 0
#31    0.00000  3 651 0
#32    0.00000  4 651 0

如果我们不知道哪个 sl 会全为 0 或者如果有多个这样的 sl 我们可以使用

df %>%
  mutate(z = case_when(sl == 1 ~ 5, 
                       sl == 2 ~ 7,
                       sl == 587 ~ 8)) %>%
  group_by(sl) %>%
  mutate(z = replace(z, all(y == 0), 0)) 

数据

df <- structure(list(y = c(199.92989, 27.73883, 144, 72, 0, 392.60636, 
749.52499, 3120, 1600, 1000, 5840, 3960, 4700, 1660, 5620, 0, 
0, 0, 3062.32962, 2048.97458, 1280, 1440, 2960, 460, 530, 5190, 
3200, 4620, 0, 0, 0, 0), x = c(1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 
4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 1L, 2L, 3L, 4L, 5L, 
6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L), sl = c(1L, 1L, 1L, 1L, 
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 585L, 585L, 585L, 
587L, 587L, 587L, 587L, 587L, 587L, 587L, 587L, 587L, 587L, 651L, 
651L, 651L, 651L)), class = "data.frame", row.names = c("1", 
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", 
"14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", 
"25", "26", "27", "28", "29", "30", "31", "32"))