由模式确定的时间间隔的子集数据帧

Subset dataframe on time interval determined by pattern

我有这样的数据:

df <- structure(list(Line = c("129", "130", "131", "132", "133", "134", "135", 
                              "136", "137", "138", "139", "140", "141", "142", "143", "144", 
                              "145"), 
                     Actor = c("R", "R", "R", "R", "R", "B", "R", "B", "B", "B", 
                               "M", "M", "M", "M", "M", "W", "M"), 
                     Act_cat = c("SpeechRec", "ver", "SpeechRec","ges", "ges", "gaze", "ges", "gaze", "gaze", "gaze", "gaze", 
                                  "gaze", "gaze", "gaze", "gaze", "gaze", "gaze"), 
                     Activity = c("hey", "dort drüben die sparrenburg", 
                                  "schwert", "D-onset", "D-peak", "~", "D-retract", "@tum", "~", "@tum", "~", "@tum", 
                                  "~", "@tum", "~", "~", "@tum"), 
                     Starttime_ms = c(46616, 48825, 48865, 49220, 50080, 50730, 50900, 51009, 51191, 51270, 51486, 51809, 
                                      52251, 52333, 53227, 53267, 53429), 
                   Endtime_ms = c(47616,53035, 49865, 50080, 50900, 51009, 52220, 51191, 51270, 53474, 51808, 52250, 
                                  52332, 53226, 53428, 53524, 53606)), 
                row.names = 129:145, class = "data.frame")

我需要做的是 slice/filter Starttime_ms>=sparrenburg 模式 Starttime_ms 的行子集 ActivityEndtime_ms<=Activity 中相同模式 sparrenburgEndtime_ms

我已经尝试了这两种子集化方法,但都无法正常工作:

library(dplyr)
df %>% slice(which(Starttime_ms >= Starttime_ms[str_detect(Activity, "sparrenburg")])
             :
             which(Endtime_ms <= Endtime_ms[str_detect(Activity, "sparrenburg")]))

和:

df %>% filter(between(Line, 
                      Starttime_ms >= Starttime_ms[str_detect(Activity, "sparrenburg")], 
                      Endtime_ms <= Endtime_ms[str_detect(Activity, "sparrenburg")]))

我如何进行子集化才能得到这样的结果:

130  130     R       ver dort drüben die sparrenburg        48825      53035
131  131     R SpeechRec                     schwert        48865      49865
132  132     R       ges                     D-onset        49220      50080
133  133     R       ges                      D-peak        50080      50900
134  134     B      gaze                           ~        50730      51009
135  135     R       ges                   D-retract        50900      52220
136  136     B      gaze                        @tum        51009      51191
137  137     B      gaze                           ~        51191      51270
138  138     B      gaze                        @tum        51270      53474
139  139     M      gaze                           ~        51486      51808
140  140     M      gaze                        @tum        51809      52250
141  141     M      gaze                           ~        52251      52332
142  142     M      gaze                        @tum        52333      53226

您已接近设置的条件,但您需要将它们提供给与逻辑和运算符 & 连接的 dplyr::filter() 以要求两者。因为你可能有多行满足条件str_detect(Activity, "sparrenburg"),你可以直接取min()max()取最极端的

library(tidyverse)

df <- structure(list(Line = c("129", "130", "131", "132", "133", "134", "135", "136", "137", "138", "139", "140", "141", "142", "143", "144", "145"), Actor = c("R", "R", "R", "R", "R", "B", "R", "B", "B", "B", "M", "M", "M", "M", "M", "W", "M"), Act_cat = c("SpeechRec", "ver", "SpeechRec","ges", "ges", "gaze", "ges", "gaze", "gaze", "gaze", "gaze", "gaze", "gaze", "gaze", "gaze", "gaze", "gaze"), Activity = c("hey", "dort drüben die sparrenburg", "schwert", "D-onset", "D-peak", "~", "D-retract", "@tum", "~", "@tum", "~", "@tum", "~", "@tum", "~", "~", "@tum"), Starttime_ms = c(46616, 48825, 48865, 49220, 50080, 50730, 50900, 51009, 51191, 51270, 51486, 51809, 52251, 52333, 53227, 53267, 53429), Endtime_ms = c(47616,53035, 49865, 50080, 50900, 51009, 52220, 51191, 51270, 53474, 51808, 52250, 52332, 53226, 53428, 53524, 53606)), row.names = 129:145, class = "data.frame")

df %>% 
  filter(
    Starttime_ms >= min(Starttime_ms[str_detect(Activity, "sparrenburg")], na.rm = T) &
       Endtime_ms <= max(Endtime_ms[str_detect(Activity, "sparrenburg")], na.rm = T)
  )
#>    Line Actor   Act_cat                    Activity Starttime_ms Endtime_ms
#> 1   130     R       ver dort drüben die sparrenburg        48825      53035
#> 2   131     R SpeechRec                     schwert        48865      49865
#> 3   132     R       ges                     D-onset        49220      50080
#> 4   133     R       ges                      D-peak        50080      50900
#> 5   134     B      gaze                           ~        50730      51009
#> 6   135     R       ges                   D-retract        50900      52220
#> 7   136     B      gaze                        @tum        51009      51191
#> 8   137     B      gaze                           ~        51191      51270
#> 9   139     M      gaze                           ~        51486      51808
#> 10  140     M      gaze                        @tum        51809      52250
#> 11  141     M      gaze                           ~        52251      52332

reprex package (v2.0.1)

创建于 2022-02-16

您可以使用基本索引来做到这一点。

starttime <- df[["Starttime_ms"]][stringr::str_detect(df$Activity, "sparrenburg")]
stoptime <- df[["Endtime_ms"]][stringr::str_detect(df$Activity, "sparrenburg")]
slice_df <- df[df["Starttime_ms"] >= starttime & df["Endtime_ms"] <= stoptime, ]
> slice_df
    Line Actor   Act_cat                    Activity Starttime_ms Endtime_ms
130  130     R       ver dort drüben die sparrenburg        48825      53035
131  131     R SpeechRec                     schwert        48865      49865
132  132     R       ges                     D-onset        49220      50080
133  133     R       ges                      D-peak        50080      50900
134  134     B      gaze                           ~        50730      51009
135  135     R       ges                   D-retract        50900      52220
136  136     B      gaze                        @tum        51009      51191
137  137     B      gaze                           ~        51191      51270
139  139     M      gaze                           ~        51486      51808
140  140     M      gaze                        @tum        51809      52250
141  141     M      gaze                           ~        52251      52332