由模式确定的时间间隔的子集数据帧
Subset dataframe on time interval determined by pattern
我有这样的数据:
df <- structure(list(Line = c("129", "130", "131", "132", "133", "134", "135",
"136", "137", "138", "139", "140", "141", "142", "143", "144",
"145"),
Actor = c("R", "R", "R", "R", "R", "B", "R", "B", "B", "B",
"M", "M", "M", "M", "M", "W", "M"),
Act_cat = c("SpeechRec", "ver", "SpeechRec","ges", "ges", "gaze", "ges", "gaze", "gaze", "gaze", "gaze",
"gaze", "gaze", "gaze", "gaze", "gaze", "gaze"),
Activity = c("hey", "dort drüben die sparrenburg",
"schwert", "D-onset", "D-peak", "~", "D-retract", "@tum", "~", "@tum", "~", "@tum",
"~", "@tum", "~", "~", "@tum"),
Starttime_ms = c(46616, 48825, 48865, 49220, 50080, 50730, 50900, 51009, 51191, 51270, 51486, 51809,
52251, 52333, 53227, 53267, 53429),
Endtime_ms = c(47616,53035, 49865, 50080, 50900, 51009, 52220, 51191, 51270, 53474, 51808, 52250,
52332, 53226, 53428, 53524, 53606)),
row.names = 129:145, class = "data.frame")
我需要做的是 slice/filter Starttime_ms
是 >=
列 sparrenburg
模式 Starttime_ms
的行子集 Activity
和 Endtime_ms
是 <=
列 Activity
中相同模式 sparrenburg
的 Endtime_ms
。
我已经尝试了这两种子集化方法,但都无法正常工作:
library(dplyr)
df %>% slice(which(Starttime_ms >= Starttime_ms[str_detect(Activity, "sparrenburg")])
:
which(Endtime_ms <= Endtime_ms[str_detect(Activity, "sparrenburg")]))
和:
df %>% filter(between(Line,
Starttime_ms >= Starttime_ms[str_detect(Activity, "sparrenburg")],
Endtime_ms <= Endtime_ms[str_detect(Activity, "sparrenburg")]))
我如何进行子集化才能得到这样的结果:
130 130 R ver dort drüben die sparrenburg 48825 53035
131 131 R SpeechRec schwert 48865 49865
132 132 R ges D-onset 49220 50080
133 133 R ges D-peak 50080 50900
134 134 B gaze ~ 50730 51009
135 135 R ges D-retract 50900 52220
136 136 B gaze @tum 51009 51191
137 137 B gaze ~ 51191 51270
138 138 B gaze @tum 51270 53474
139 139 M gaze ~ 51486 51808
140 140 M gaze @tum 51809 52250
141 141 M gaze ~ 52251 52332
142 142 M gaze @tum 52333 53226
您已接近设置的条件,但您需要将它们提供给与逻辑和运算符 &
连接的 dplyr::filter()
以要求两者。因为你可能有多行满足条件str_detect(Activity, "sparrenburg")
,你可以直接取min()
和max()
取最极端的
library(tidyverse)
df <- structure(list(Line = c("129", "130", "131", "132", "133", "134", "135", "136", "137", "138", "139", "140", "141", "142", "143", "144", "145"), Actor = c("R", "R", "R", "R", "R", "B", "R", "B", "B", "B", "M", "M", "M", "M", "M", "W", "M"), Act_cat = c("SpeechRec", "ver", "SpeechRec","ges", "ges", "gaze", "ges", "gaze", "gaze", "gaze", "gaze", "gaze", "gaze", "gaze", "gaze", "gaze", "gaze"), Activity = c("hey", "dort drüben die sparrenburg", "schwert", "D-onset", "D-peak", "~", "D-retract", "@tum", "~", "@tum", "~", "@tum", "~", "@tum", "~", "~", "@tum"), Starttime_ms = c(46616, 48825, 48865, 49220, 50080, 50730, 50900, 51009, 51191, 51270, 51486, 51809, 52251, 52333, 53227, 53267, 53429), Endtime_ms = c(47616,53035, 49865, 50080, 50900, 51009, 52220, 51191, 51270, 53474, 51808, 52250, 52332, 53226, 53428, 53524, 53606)), row.names = 129:145, class = "data.frame")
df %>%
filter(
Starttime_ms >= min(Starttime_ms[str_detect(Activity, "sparrenburg")], na.rm = T) &
Endtime_ms <= max(Endtime_ms[str_detect(Activity, "sparrenburg")], na.rm = T)
)
#> Line Actor Act_cat Activity Starttime_ms Endtime_ms
#> 1 130 R ver dort drüben die sparrenburg 48825 53035
#> 2 131 R SpeechRec schwert 48865 49865
#> 3 132 R ges D-onset 49220 50080
#> 4 133 R ges D-peak 50080 50900
#> 5 134 B gaze ~ 50730 51009
#> 6 135 R ges D-retract 50900 52220
#> 7 136 B gaze @tum 51009 51191
#> 8 137 B gaze ~ 51191 51270
#> 9 139 M gaze ~ 51486 51808
#> 10 140 M gaze @tum 51809 52250
#> 11 141 M gaze ~ 52251 52332
由 reprex package (v2.0.1)
创建于 2022-02-16
您可以使用基本索引来做到这一点。
starttime <- df[["Starttime_ms"]][stringr::str_detect(df$Activity, "sparrenburg")]
stoptime <- df[["Endtime_ms"]][stringr::str_detect(df$Activity, "sparrenburg")]
slice_df <- df[df["Starttime_ms"] >= starttime & df["Endtime_ms"] <= stoptime, ]
> slice_df
Line Actor Act_cat Activity Starttime_ms Endtime_ms
130 130 R ver dort drüben die sparrenburg 48825 53035
131 131 R SpeechRec schwert 48865 49865
132 132 R ges D-onset 49220 50080
133 133 R ges D-peak 50080 50900
134 134 B gaze ~ 50730 51009
135 135 R ges D-retract 50900 52220
136 136 B gaze @tum 51009 51191
137 137 B gaze ~ 51191 51270
139 139 M gaze ~ 51486 51808
140 140 M gaze @tum 51809 52250
141 141 M gaze ~ 52251 52332
我有这样的数据:
df <- structure(list(Line = c("129", "130", "131", "132", "133", "134", "135",
"136", "137", "138", "139", "140", "141", "142", "143", "144",
"145"),
Actor = c("R", "R", "R", "R", "R", "B", "R", "B", "B", "B",
"M", "M", "M", "M", "M", "W", "M"),
Act_cat = c("SpeechRec", "ver", "SpeechRec","ges", "ges", "gaze", "ges", "gaze", "gaze", "gaze", "gaze",
"gaze", "gaze", "gaze", "gaze", "gaze", "gaze"),
Activity = c("hey", "dort drüben die sparrenburg",
"schwert", "D-onset", "D-peak", "~", "D-retract", "@tum", "~", "@tum", "~", "@tum",
"~", "@tum", "~", "~", "@tum"),
Starttime_ms = c(46616, 48825, 48865, 49220, 50080, 50730, 50900, 51009, 51191, 51270, 51486, 51809,
52251, 52333, 53227, 53267, 53429),
Endtime_ms = c(47616,53035, 49865, 50080, 50900, 51009, 52220, 51191, 51270, 53474, 51808, 52250,
52332, 53226, 53428, 53524, 53606)),
row.names = 129:145, class = "data.frame")
我需要做的是 slice/filter Starttime_ms
是 >=
列 sparrenburg
模式 Starttime_ms
的行子集 Activity
和 Endtime_ms
是 <=
列 Activity
中相同模式 sparrenburg
的 Endtime_ms
。
我已经尝试了这两种子集化方法,但都无法正常工作:
library(dplyr)
df %>% slice(which(Starttime_ms >= Starttime_ms[str_detect(Activity, "sparrenburg")])
:
which(Endtime_ms <= Endtime_ms[str_detect(Activity, "sparrenburg")]))
和:
df %>% filter(between(Line,
Starttime_ms >= Starttime_ms[str_detect(Activity, "sparrenburg")],
Endtime_ms <= Endtime_ms[str_detect(Activity, "sparrenburg")]))
我如何进行子集化才能得到这样的结果:
130 130 R ver dort drüben die sparrenburg 48825 53035
131 131 R SpeechRec schwert 48865 49865
132 132 R ges D-onset 49220 50080
133 133 R ges D-peak 50080 50900
134 134 B gaze ~ 50730 51009
135 135 R ges D-retract 50900 52220
136 136 B gaze @tum 51009 51191
137 137 B gaze ~ 51191 51270
138 138 B gaze @tum 51270 53474
139 139 M gaze ~ 51486 51808
140 140 M gaze @tum 51809 52250
141 141 M gaze ~ 52251 52332
142 142 M gaze @tum 52333 53226
您已接近设置的条件,但您需要将它们提供给与逻辑和运算符 &
连接的 dplyr::filter()
以要求两者。因为你可能有多行满足条件str_detect(Activity, "sparrenburg")
,你可以直接取min()
和max()
取最极端的
library(tidyverse)
df <- structure(list(Line = c("129", "130", "131", "132", "133", "134", "135", "136", "137", "138", "139", "140", "141", "142", "143", "144", "145"), Actor = c("R", "R", "R", "R", "R", "B", "R", "B", "B", "B", "M", "M", "M", "M", "M", "W", "M"), Act_cat = c("SpeechRec", "ver", "SpeechRec","ges", "ges", "gaze", "ges", "gaze", "gaze", "gaze", "gaze", "gaze", "gaze", "gaze", "gaze", "gaze", "gaze"), Activity = c("hey", "dort drüben die sparrenburg", "schwert", "D-onset", "D-peak", "~", "D-retract", "@tum", "~", "@tum", "~", "@tum", "~", "@tum", "~", "~", "@tum"), Starttime_ms = c(46616, 48825, 48865, 49220, 50080, 50730, 50900, 51009, 51191, 51270, 51486, 51809, 52251, 52333, 53227, 53267, 53429), Endtime_ms = c(47616,53035, 49865, 50080, 50900, 51009, 52220, 51191, 51270, 53474, 51808, 52250, 52332, 53226, 53428, 53524, 53606)), row.names = 129:145, class = "data.frame")
df %>%
filter(
Starttime_ms >= min(Starttime_ms[str_detect(Activity, "sparrenburg")], na.rm = T) &
Endtime_ms <= max(Endtime_ms[str_detect(Activity, "sparrenburg")], na.rm = T)
)
#> Line Actor Act_cat Activity Starttime_ms Endtime_ms
#> 1 130 R ver dort drüben die sparrenburg 48825 53035
#> 2 131 R SpeechRec schwert 48865 49865
#> 3 132 R ges D-onset 49220 50080
#> 4 133 R ges D-peak 50080 50900
#> 5 134 B gaze ~ 50730 51009
#> 6 135 R ges D-retract 50900 52220
#> 7 136 B gaze @tum 51009 51191
#> 8 137 B gaze ~ 51191 51270
#> 9 139 M gaze ~ 51486 51808
#> 10 140 M gaze @tum 51809 52250
#> 11 141 M gaze ~ 52251 52332
由 reprex package (v2.0.1)
创建于 2022-02-16您可以使用基本索引来做到这一点。
starttime <- df[["Starttime_ms"]][stringr::str_detect(df$Activity, "sparrenburg")]
stoptime <- df[["Endtime_ms"]][stringr::str_detect(df$Activity, "sparrenburg")]
slice_df <- df[df["Starttime_ms"] >= starttime & df["Endtime_ms"] <= stoptime, ]
> slice_df
Line Actor Act_cat Activity Starttime_ms Endtime_ms
130 130 R ver dort drüben die sparrenburg 48825 53035
131 131 R SpeechRec schwert 48865 49865
132 132 R ges D-onset 49220 50080
133 133 R ges D-peak 50080 50900
134 134 B gaze ~ 50730 51009
135 135 R ges D-retract 50900 52220
136 136 B gaze @tum 51009 51191
137 137 B gaze ~ 51191 51270
139 139 M gaze ~ 51486 51808
140 140 M gaze @tum 51809 52250
141 141 M gaze ~ 52251 52332