添加列,取一列中由另一列分组的值的差异
Add column taking difference of values in one column grouped by other column
我有一个名为 diff_colour_valid_int1 的 df:
> head(diff_colour_valid_int1)
# A tibble: 6 x 5
# Groups: search_difficulty, cue_validity [3]
search_difficulty cue_validity cue_colour meanrt stdev
<fct> <fct> <fct> <dbl> <dbl>
1 difficult FALSE Match (Color) cue 0.990 0.158
2 difficult FALSE Mismatch (Onset) cue 0.972 0.150
3 difficult TRUE Match (Color) cue 0.828 0.133
4 difficult TRUE Mismatch (Onset) cue 0.881 0.177
5 easy FALSE Match (Color) cue 0.813 0.132
6 easy FALSE Mismatch (Onset) cue 0.801 0.137
>
我想添加一个名为 cue_effect 的列,用于计算每个 cue_validity 对(例如前两个 FALSE FALSE)的平均值之间的差异。因此该列的前六个值将是:
cue_effect
<dbl>
0.018
0.018
-0.053
-0.053
0.012
如有任何建议,我们将不胜感激。谢谢
我们可以使用rleid
创建分组列
library(dplyr)
library(data.table)
diff_colour_valid_int1 %>%
group_by(search_difficulty, grp = rleid(cue_validity)) %>%
mutate(cue_effect = -diff(meanrt))
-输出
# A tibble: 6 x 7
# Groups: search_difficulty, grp [3]
# search_difficulty cue_validity cue_colour meanrt stdev grp cue_effect
# <chr> <lgl> <chr> <dbl> <dbl> <int> <dbl>
#1 difficult FALSE Match (Color) cue 0.99 0.158 1 0.018
#2 difficult FALSE Mismatch (Onset) cue 0.972 0.15 1 0.018
#3 difficult TRUE Match (Color) cue 0.828 0.133 2 -0.053
#4 difficult TRUE Mismatch (Onset) cue 0.881 0.177 2 -0.053
#5 easy FALSE Match (Color) cue 0.813 0.132 3 0.0120
#6 easy FALSE Mismatch (Onset) cue 0.801 0.137 3 0.0120
数据
diff_colour_valid_int1 <- structure(list(search_difficulty = c("difficult", "difficult",
"difficult", "difficult", "easy", "easy"), cue_validity = c(FALSE,
FALSE, TRUE, TRUE, FALSE, FALSE), cue_colour = c("Match (Color) cue",
"Mismatch (Onset) cue", "Match (Color) cue", "Mismatch (Onset) cue",
"Match (Color) cue", "Mismatch (Onset) cue"), meanrt = c(0.99,
0.972, 0.828, 0.881, 0.813, 0.801), stdev = c(0.158, 0.15, 0.133,
0.177, 0.132, 0.137)), class = "data.frame", row.names = c("1",
"2", "3", "4", "5", "6"))
每次 cue_validity
值发生变化时,您可以使用 cumsum
和 lag
创建一个新组,并计算每个组中的 mean
。
library(dplyr)
diff_colour_valid_int1 %>%
group_by(search_difficulty,
group = cumsum(cue_validity != lag(cue_validity,
default = first(cue_validity)))) %>%
mutate(cue_effect = na.omit(lag(meanrt) - meanrt)) %>%
ungroup() %>%
select(-group)
# search_difficulty cue_validity cue_colour meanrt stdev cue_effect
# <chr> <lgl> <chr> <dbl> <dbl> <dbl>
#1 difficult FALSE Match (Color) cue 0.99 0.158 0.018
#2 difficult FALSE Mismatch (Onset) cue 0.972 0.15 0.018
#3 difficult TRUE Match (Color) cue 0.828 0.133 -0.053
#4 difficult TRUE Mismatch (Onset) cue 0.881 0.177 -0.053
#5 easy FALSE Match (Color) cue 0.813 0.132 0.0120
#6 easy FALSE Mismatch (Onset) cue 0.801 0.137 0.0120
我有一个名为 diff_colour_valid_int1 的 df:
> head(diff_colour_valid_int1)
# A tibble: 6 x 5
# Groups: search_difficulty, cue_validity [3]
search_difficulty cue_validity cue_colour meanrt stdev
<fct> <fct> <fct> <dbl> <dbl>
1 difficult FALSE Match (Color) cue 0.990 0.158
2 difficult FALSE Mismatch (Onset) cue 0.972 0.150
3 difficult TRUE Match (Color) cue 0.828 0.133
4 difficult TRUE Mismatch (Onset) cue 0.881 0.177
5 easy FALSE Match (Color) cue 0.813 0.132
6 easy FALSE Mismatch (Onset) cue 0.801 0.137
>
我想添加一个名为 cue_effect 的列,用于计算每个 cue_validity 对(例如前两个 FALSE FALSE)的平均值之间的差异。因此该列的前六个值将是:
cue_effect
<dbl>
0.018
0.018
-0.053
-0.053
0.012
如有任何建议,我们将不胜感激。谢谢
我们可以使用rleid
创建分组列
library(dplyr)
library(data.table)
diff_colour_valid_int1 %>%
group_by(search_difficulty, grp = rleid(cue_validity)) %>%
mutate(cue_effect = -diff(meanrt))
-输出
# A tibble: 6 x 7
# Groups: search_difficulty, grp [3]
# search_difficulty cue_validity cue_colour meanrt stdev grp cue_effect
# <chr> <lgl> <chr> <dbl> <dbl> <int> <dbl>
#1 difficult FALSE Match (Color) cue 0.99 0.158 1 0.018
#2 difficult FALSE Mismatch (Onset) cue 0.972 0.15 1 0.018
#3 difficult TRUE Match (Color) cue 0.828 0.133 2 -0.053
#4 difficult TRUE Mismatch (Onset) cue 0.881 0.177 2 -0.053
#5 easy FALSE Match (Color) cue 0.813 0.132 3 0.0120
#6 easy FALSE Mismatch (Onset) cue 0.801 0.137 3 0.0120
数据
diff_colour_valid_int1 <- structure(list(search_difficulty = c("difficult", "difficult",
"difficult", "difficult", "easy", "easy"), cue_validity = c(FALSE,
FALSE, TRUE, TRUE, FALSE, FALSE), cue_colour = c("Match (Color) cue",
"Mismatch (Onset) cue", "Match (Color) cue", "Mismatch (Onset) cue",
"Match (Color) cue", "Mismatch (Onset) cue"), meanrt = c(0.99,
0.972, 0.828, 0.881, 0.813, 0.801), stdev = c(0.158, 0.15, 0.133,
0.177, 0.132, 0.137)), class = "data.frame", row.names = c("1",
"2", "3", "4", "5", "6"))
每次 cue_validity
值发生变化时,您可以使用 cumsum
和 lag
创建一个新组,并计算每个组中的 mean
。
library(dplyr)
diff_colour_valid_int1 %>%
group_by(search_difficulty,
group = cumsum(cue_validity != lag(cue_validity,
default = first(cue_validity)))) %>%
mutate(cue_effect = na.omit(lag(meanrt) - meanrt)) %>%
ungroup() %>%
select(-group)
# search_difficulty cue_validity cue_colour meanrt stdev cue_effect
# <chr> <lgl> <chr> <dbl> <dbl> <dbl>
#1 difficult FALSE Match (Color) cue 0.99 0.158 0.018
#2 difficult FALSE Mismatch (Onset) cue 0.972 0.15 0.018
#3 difficult TRUE Match (Color) cue 0.828 0.133 -0.053
#4 difficult TRUE Mismatch (Onset) cue 0.881 0.177 -0.053
#5 easy FALSE Match (Color) cue 0.813 0.132 0.0120
#6 easy FALSE Mismatch (Onset) cue 0.801 0.137 0.0120