如果列的第二行与 r 中的值匹配,则创建新变量
Create new variable if second row of a column matches a value in r
我想创建一个新变量 A_row_error
如果 first_A_row = TRUE
和下一行的消息 = ERROR(如果下一行 A_row_error = TRUE
是任何其他消息)。请注意,下一行由时间戳确定,并且必须按升序排列。 A_row_error
的 TRUE 消息也必须与 first_A_row = TRUE
位于同一行(有关所需输出,请参见下面的 table)。
示例数据集:
participant_id <- c("ps1", "ps1", "ps1", "ps1", "ps2", "ps2", "ps3", "ps3", "ps3", "ps3")
timestamp <- c(0.01, 0.02, 0.03, 0.04, 0.01, 0.02, 0.01, 0.02, 0.03, 0.04)
event <- c("A", "A", "A", "B", "B", "A", "A", "A", "B", "A")
first_A_row <- c("TRUE", "FALSE", "FALSE", "FALSE", "FALSE", "TRUE", "TRUE", "FALSE", "FALSE", "FALSE")
message <- c("ACCEPTED", "ERROR", "DECLINED", "ACCEPTED", "HELLO", "BYE", "ACCEPTED", "BYE", "ERROR", "ACCEPTED")
data.frame(participant_id, timestamp, event, first_A_row, message)
期望的输出:
participant_id
timestamp
event
first_A_row
message
A_row_error
ps1
0.01
A
TRUE
ACCEPTED
TRUE
ps1
0.02
A
FALSE
ERROR
FALSE
ps1
0.03
A
FALSE
DECLINED
FALSE
ps1
0.04
B
FALSE
ACCEPTED
FALSE
ps2
0.01
B
FALSE
HELLO
FALSE
ps2
0.02
A
TRUE
BYE
FALSE
ps3
0.01
A
TRUE
ACCEPTED
FALSE
ps3
0.02
A
FALSE
BYE
FALSE
ps3
0.03
B
FALSE
ERROR
FALSE
ps3
0.04
A
FALSE
ACCEPTED
FALSE
我们可以按 'participant_id' 分组并使用 'message' 的 lead
来创建逻辑,即如果下一个值是 'ERROR' 并且给定的行'first_A_row' 为 TRUE(first_A_row
列创建为字符而不是逻辑列 - 因此它用 as.logical
转换)
library(dplyr)
df1 %>%
group_by(participant_id) %>%
mutate(A_row_error = lead(message, default = last(message)) ==
'ERROR' & as.logical(first_A_row)) %>%
ungroup
-输出
# A tibble: 10 × 6
participant_id timestamp event first_A_row message A_row_error
<chr> <dbl> <chr> <chr> <chr> <lgl>
1 ps1 0.01 A TRUE ACCEPTED TRUE
2 ps1 0.02 A FALSE ERROR FALSE
3 ps1 0.03 A FALSE DECLINED FALSE
4 ps1 0.04 B FALSE ACCEPTED FALSE
5 ps2 0.01 B FALSE HELLO FALSE
6 ps2 0.02 A TRUE BYE FALSE
7 ps3 0.01 A TRUE ACCEPTED FALSE
8 ps3 0.02 A FALSE BYE FALSE
9 ps3 0.03 B FALSE ERROR FALSE
10 ps3 0.04 A FALSE ACCEPTED FALSE
我想创建一个新变量 A_row_error
如果 first_A_row = TRUE
和下一行的消息 = ERROR(如果下一行 A_row_error = TRUE
是任何其他消息)。请注意,下一行由时间戳确定,并且必须按升序排列。 A_row_error
的 TRUE 消息也必须与 first_A_row = TRUE
位于同一行(有关所需输出,请参见下面的 table)。
示例数据集:
participant_id <- c("ps1", "ps1", "ps1", "ps1", "ps2", "ps2", "ps3", "ps3", "ps3", "ps3")
timestamp <- c(0.01, 0.02, 0.03, 0.04, 0.01, 0.02, 0.01, 0.02, 0.03, 0.04)
event <- c("A", "A", "A", "B", "B", "A", "A", "A", "B", "A")
first_A_row <- c("TRUE", "FALSE", "FALSE", "FALSE", "FALSE", "TRUE", "TRUE", "FALSE", "FALSE", "FALSE")
message <- c("ACCEPTED", "ERROR", "DECLINED", "ACCEPTED", "HELLO", "BYE", "ACCEPTED", "BYE", "ERROR", "ACCEPTED")
data.frame(participant_id, timestamp, event, first_A_row, message)
期望的输出:
participant_id | timestamp | event | first_A_row | message | A_row_error |
---|---|---|---|---|---|
ps1 | 0.01 | A | TRUE | ACCEPTED | TRUE |
ps1 | 0.02 | A | FALSE | ERROR | FALSE |
ps1 | 0.03 | A | FALSE | DECLINED | FALSE |
ps1 | 0.04 | B | FALSE | ACCEPTED | FALSE |
ps2 | 0.01 | B | FALSE | HELLO | FALSE |
ps2 | 0.02 | A | TRUE | BYE | FALSE |
ps3 | 0.01 | A | TRUE | ACCEPTED | FALSE |
ps3 | 0.02 | A | FALSE | BYE | FALSE |
ps3 | 0.03 | B | FALSE | ERROR | FALSE |
ps3 | 0.04 | A | FALSE | ACCEPTED | FALSE |
我们可以按 'participant_id' 分组并使用 'message' 的 lead
来创建逻辑,即如果下一个值是 'ERROR' 并且给定的行'first_A_row' 为 TRUE(first_A_row
列创建为字符而不是逻辑列 - 因此它用 as.logical
转换)
library(dplyr)
df1 %>%
group_by(participant_id) %>%
mutate(A_row_error = lead(message, default = last(message)) ==
'ERROR' & as.logical(first_A_row)) %>%
ungroup
-输出
# A tibble: 10 × 6
participant_id timestamp event first_A_row message A_row_error
<chr> <dbl> <chr> <chr> <chr> <lgl>
1 ps1 0.01 A TRUE ACCEPTED TRUE
2 ps1 0.02 A FALSE ERROR FALSE
3 ps1 0.03 A FALSE DECLINED FALSE
4 ps1 0.04 B FALSE ACCEPTED FALSE
5 ps2 0.01 B FALSE HELLO FALSE
6 ps2 0.02 A TRUE BYE FALSE
7 ps3 0.01 A TRUE ACCEPTED FALSE
8 ps3 0.02 A FALSE BYE FALSE
9 ps3 0.03 B FALSE ERROR FALSE
10 ps3 0.04 A FALSE ACCEPTED FALSE