R只保留符合特定条件的行

R Only Keep Rows up to a certin condition

我有一个数据框如下

   head(data)

   subject block trial timeLeft timeRight stim1 stim2 Chosen
1        1    13     0        0         0     2     1      2
2        1    13     1        0         1     3     2      2
3        1    13     3        0         0     3     1      1
4        1    13     4        2         0     2     3      3
5        1    13     6        1         1     1     3      1
6        1    13     7        2         2     2     1      1
...   

     
454     1006    14     0        0         0     6     5      5
455     1006    14     1        0         0     6     4      6
456     1006    14     3        0         1     4     5      4
457     1006    14     4        1         1     4     5      4
458     1006    14     6        1         2     6     4      6
  

my objective 是按主题和块分组,并且只保留前面的行,包括 timeLeft 和 timeRight =0

在这种情况下,输出将是

       subject block trial timeLeft timeRight stim1 stim2 Chosen
    1        1    13     0        0         0     2     1      2
    2        1    13     1        0         1     3     2      2
    3        1    13     3        0         0     3     1      1
...
   454     1006    14     0        0         0     6     5      5
   455     1006    14     1        0         0     6     4      6

提前致谢!

这里是数据的结构

'data.frame':   64748 obs. of  8 variables:
 $ subject  : num  1 1 1 1 1 1 1 1 1 1 ...
 $ block    : int  13 13 13 13 13 13 13 13 13 13 ...
 $ trial    : int  0 1 3 4 6 7 9 10 12 13 ...
 $ timeLeft : int  0 0 0 2 1 2 2 1 3 4 ...
 $ timeRight: int  0 1 0 0 1 2 1 3 4 4 ...
 $ stim1    : int  2 3 3 2 1 2 2 3 2 2 ...
 $ stim2    : int  1 2 1 3 3 1 3 1 1 1 ...
 $ Chosen   : int  2 2 1 3 1 1 2 1 2 2 ...

您可以只过滤符合条件的行,然后分组

data %>% 
    filter(timeLeft > 0  & timeRight > 0) %>%
    group_by(subject, block)

您可以在自定义函数的帮助下完成此操作 -

library(dplyr)

select_rows <- function(timeLeft, timeRight) {
  inds <- which(timeLeft == 0 & timeRight == 0)
  if(length(inds) >= 2) inds[1]:inds[2]
  else 0
}


data %>%
  group_by(subject, block) %>%
  slice(select_rows(timeLeft, timeRight)) %>%
  ungroup

#   subject block trial timeLeft timeRight stim1 stim2 Chosen
#    <int> <int> <int>    <int>     <int> <int> <int>  <int>
#1       1    13     0        0         0     2     1      2
#2       1    13     1        0         1     3     2      2
#3       1    13     3        0         0     3     1      1
#4    1006    14     0        0         0     6     5      5
#5    1006    14     1        0         0     6     4      6

如果数据量很大,您也可以使用 data.table -

library(data.table)
setDT(data)[, .SD[select_rows(timeLeft, timeRight)], .(subject, block)]

数据

如果您在 reproducible format

中提供数据,会更容易提供帮助
data <- structure(list(subject = c(1L, 1L, 1L, 1L, 1L, 1L, 1006L, 1006L, 
1006L, 1006L, 1006L), block = c(13L, 13L, 13L, 13L, 13L, 13L, 
14L, 14L, 14L, 14L, 14L), trial = c(0L, 1L, 3L, 4L, 6L, 7L, 0L, 
1L, 3L, 4L, 6L), timeLeft = c(0L, 0L, 0L, 2L, 1L, 2L, 0L, 0L, 
0L, 1L, 1L), timeRight = c(0L, 1L, 0L, 0L, 1L, 2L, 0L, 0L, 1L, 
1L, 2L), stim1 = c(2L, 3L, 3L, 2L, 1L, 2L, 6L, 6L, 4L, 4L, 6L
), stim2 = c(1L, 2L, 1L, 3L, 3L, 1L, 5L, 4L, 5L, 5L, 4L), Chosen = c(2L, 
2L, 1L, 3L, 1L, 1L, 5L, 6L, 4L, 4L, 6L)), class = "data.frame", row.names = 
c("1", "2", "3", "4", "5", "6", "454", "455", "456", "457", "458"))

如果你想保留timeLefttimeRight之前的所有行都是0,你可以试试这个方法。

数据

   subject block trial timeLeft timeRight stim1 stim2 Chosen
1        1    13     0        0         0     2     1      2
2        1    13     1        0         1     3     2      2
3        1    13     3        0         0     3     1      1
4        1    13     4        2         0     2     3      3
5        1    13     6        1         1     1     3      1
6        1    13     7        2         2     2     1      1
7     1006    14     0        0         1     6     5      5
8     1006    14     0        0         0     6     5      5
9     1006    14     1        0         0     6     4      6
10    1006    14     3        0         1     4     5      4
11    1006    14     4        1         1     4     5      4
12    1006    14     6        1         2     6     4      6

我为 subject:1006 添加了一行,以使第一行不是 0,0

代码

df %>%
  group_by(subject) %>%
  mutate(key = max(which((timeLeft == 0 & timeRight ==0)))) %>%
  slice(1:key)

  subject block trial timeLeft timeRight stim1 stim2 Chosen   key
    <int> <int> <int>    <int>     <int> <int> <int>  <int> <int>
1       1    13     0        0         0     2     1      2     3
2       1    13     1        0         1     3     2      2     3
3       1    13     3        0         0     3     1      1     3
4    1006    14     0        0         1     6     5      5     3
5    1006    14     0        0         0     6     5      5     3
6    1006    14     1        0         0     6     4      6     3