使用 R 的数据整理将一列分成两列
Splitting one column into two columns using data wrangling with R
非常感谢您在使用 R 进行数据整理方面提供的帮助。我有一个数据,我想在适用时将一列(变量)分成两列,以其他变量为条件。例如,根据下面的示例,数据表示出现在不同阅读时间(块)中的某些单词(项目)的反应时间测量值(RT1 和 RT2)。我想看看块 3、4 和 5 中的 RT1 和 RT2 值是否与块 1 中同一项目的 RT1 和 RT2 值相关。出现在块 1 中并在后续块中重新出现的目标项目被编码为'EI' 在 'condition' 列中,而编码为 'E' 或 'I' 的项目仅出现一次。
dput(d1)
structure(list(RECORDING_SESSION_LABEL = c(26, 26, 26, 26, 26,
26, 26, 26), RT1 = c(5171, 3857, 3447, 314, 460, 731, 957, 1253
), RT2 = c(357, 328, 122, 39, 86, 132, 173, 215), item = c("foreign",
"detailed", "large", "foreign", "foreign", "large", "large",
"disputable"), block = c(1, 1, 1, 3, 4, 3, 4, 3), condition = c("EI",
"E", "EI", "EI", "EI", "EI", "EI", "I")), row.names = c(NA, -8L
), class = c("tbl_df", "tbl", "data.frame"))
数据样本如下所示:
> d1
# A tibble: 8 x 6
RECORDING_SESSION_LABEL RT1 RT2 item block condition
<dbl> <dbl> <dbl> <chr> <dbl> <chr>
1 26 5171 357 foreign 1 EI
2 26 3857 328 detailed 1 E
3 26 3447 122 large 1 EI
4 26 314 39 foreign 3 EI
5 26 460 86 foreign 4 EI
6 26 731 132 large 3 EI
7 26 957 173 large 4 EI
8 26 1253 215 disputable 3 I
为了以 R 可以理解的格式呈现,我想要实现的目标数据框将类似于下面的数据框(应添加突出显示的列)。这些列的空白行表示不重复出现的项目(条件未编码为 'EI');因此,它们是无关紧要的,应编码为 'NA'。
dput(d2)
structure(list(RECORDING_SESSION_LABEL = c(26, 26, 26, 26, 26,
26, 26, 26), `RT 1` = c(5171, 3857, 3447, 314, 460, 731, 957,
1253), RT2 = c(357, 328, 122, 39, 86, 132, 173, 215), item = c("foreign",
"detailed", "large", "foreign", "foreign", "large", "large",
"disputable"), block = c(1, 1, 1, 3, 4, 3, 4, 3), condition = c("EI",
"E", "EI", "EI", "EI", "EI", "EI", "I"), `RT 1_at_block1` = c(NA,
NA, NA, 5171, 5171, 3447, 3447, NA), RT2_at_block1 = c(NA, NA,
NA, 357, 357, 122, 122, NA)), row.names = c(NA, -8L), class = c("tbl_df",
"tbl", "data.frame"))
目标数据格式示例如下所示:
> d2
# A tibble: 8 x 8
RECORDING_SESSI~ `RT 1` RT2 item block condition `RT 1_at_block1`
<dbl> <dbl> <dbl> <chr> <dbl> <chr> <dbl>
1 26 5171 357 fore~ 1 EI NA
2 26 3857 328 deta~ 1 E NA
3 26 3447 122 large 1 EI NA
4 26 314 39 fore~ 3 EI 5171
5 26 460 86 fore~ 4 EI 5171
6 26 731 132 large 3 EI 3447
7 26 957 173 large 4 EI 3447
8 26 1253 215 disp~ 3 I NA
# ... with 1 more variable: RT2_at_block1 <dbl>
> head(d2)
# A tibble: 6 x 8
RECORDING_SESSION_LABEL `RT 1` RT2 item block condition `RT 1_at_block1` RT2_at_block1
<dbl> <dbl> <dbl> <chr> <dbl> <chr> <dbl> <dbl>
1 26 5171 357 foreign 1 EI NA NA
2 26 3857 328 detailed 1 E NA NA
3 26 3447 122 large 1 EI NA NA
4 26 314 39 foreign 3 EI 5171 357
5 26 460 86 foreign 4 EI 5171 357
6 26 731 132 large 3 EI 3447 122
在此先感谢您的帮助。
使用dplyr
的可能解决方案:
d1 <- structure(list(RECORDING_SESSION_LABEL = c(26, 26, 26, 26, 26, 26, 26, 26),
RT1 = c(5171, 3857, 3447, 314, 460, 731, 957, 1253),
RT2 = c(357, 328, 122, 39, 86, 132, 173, 215),
item = c("foreign", "detailed", "large", "foreign", "foreign", "large", "large", "disputable"),
block = c(1, 1, 1, 3, 4, 3, 4, 3), condition = c("EI", "E", "EI", "EI", "EI", "EI", "EI", "I")),
row.names = c(NA, -8L), class = c("tbl_df", "tbl", "data.frame"))
library(dplyr)
d2 <- d1 %>%
left_join(d1 %>% filter(block == 1) %>% select(RECORDING_SESSION_LABEL, item, RT1_at_block1 = RT1)) %>%
left_join(d1 %>% filter(block == 1) %>% select(RECORDING_SESSION_LABEL, item, RT2_at_block1 = RT2))
之后,d2
看起来像这样:
RECORDING_SESSION_LABEL RT1 RT2 item block condition RT1_at_block1 RT2_at_block1
<dbl> <dbl> <dbl> <chr> <dbl> <chr> <dbl> <dbl>
1 26 5171 357 foreign 1 EI 5171 357
2 26 3857 328 detailed 1 E 3857 328
3 26 3447 122 large 1 EI 3447 122
4 26 314 39 foreign 3 EI 5171 357
5 26 460 86 foreign 4 EI 5171 357
6 26 731 132 large 3 EI 3447 122
编辑:如果要将块 1 的值设置为 NA
,则添加 mutate
:
d2 <- d1 %>%
left_join(d1 %>% filter(block == 1) %>% select(RECORDING_SESSION_LABEL, item, RT1_at_block1 = RT1)) %>%
left_join(d1 %>% filter(block == 1) %>% select(RECORDING_SESSION_LABEL, item, RT2_at_block1 = RT2)) %>%
mutate(RT1_at_block1 = ifelse(block == 1, NA, RT1_at_block1),
RT2_at_block1 = ifelse(block == 1, NA, RT2_at_block1))
非常感谢您在使用 R 进行数据整理方面提供的帮助。我有一个数据,我想在适用时将一列(变量)分成两列,以其他变量为条件。例如,根据下面的示例,数据表示出现在不同阅读时间(块)中的某些单词(项目)的反应时间测量值(RT1 和 RT2)。我想看看块 3、4 和 5 中的 RT1 和 RT2 值是否与块 1 中同一项目的 RT1 和 RT2 值相关。出现在块 1 中并在后续块中重新出现的目标项目被编码为'EI' 在 'condition' 列中,而编码为 'E' 或 'I' 的项目仅出现一次。
dput(d1)
structure(list(RECORDING_SESSION_LABEL = c(26, 26, 26, 26, 26,
26, 26, 26), RT1 = c(5171, 3857, 3447, 314, 460, 731, 957, 1253
), RT2 = c(357, 328, 122, 39, 86, 132, 173, 215), item = c("foreign",
"detailed", "large", "foreign", "foreign", "large", "large",
"disputable"), block = c(1, 1, 1, 3, 4, 3, 4, 3), condition = c("EI",
"E", "EI", "EI", "EI", "EI", "EI", "I")), row.names = c(NA, -8L
), class = c("tbl_df", "tbl", "data.frame"))
数据样本如下所示:
> d1
# A tibble: 8 x 6
RECORDING_SESSION_LABEL RT1 RT2 item block condition
<dbl> <dbl> <dbl> <chr> <dbl> <chr>
1 26 5171 357 foreign 1 EI
2 26 3857 328 detailed 1 E
3 26 3447 122 large 1 EI
4 26 314 39 foreign 3 EI
5 26 460 86 foreign 4 EI
6 26 731 132 large 3 EI
7 26 957 173 large 4 EI
8 26 1253 215 disputable 3 I
为了以 R 可以理解的格式呈现,我想要实现的目标数据框将类似于下面的数据框(应添加突出显示的列)。这些列的空白行表示不重复出现的项目(条件未编码为 'EI');因此,它们是无关紧要的,应编码为 'NA'。
dput(d2)
structure(list(RECORDING_SESSION_LABEL = c(26, 26, 26, 26, 26,
26, 26, 26), `RT 1` = c(5171, 3857, 3447, 314, 460, 731, 957,
1253), RT2 = c(357, 328, 122, 39, 86, 132, 173, 215), item = c("foreign",
"detailed", "large", "foreign", "foreign", "large", "large",
"disputable"), block = c(1, 1, 1, 3, 4, 3, 4, 3), condition = c("EI",
"E", "EI", "EI", "EI", "EI", "EI", "I"), `RT 1_at_block1` = c(NA,
NA, NA, 5171, 5171, 3447, 3447, NA), RT2_at_block1 = c(NA, NA,
NA, 357, 357, 122, 122, NA)), row.names = c(NA, -8L), class = c("tbl_df",
"tbl", "data.frame"))
目标数据格式示例如下所示:
> d2
# A tibble: 8 x 8
RECORDING_SESSI~ `RT 1` RT2 item block condition `RT 1_at_block1`
<dbl> <dbl> <dbl> <chr> <dbl> <chr> <dbl>
1 26 5171 357 fore~ 1 EI NA
2 26 3857 328 deta~ 1 E NA
3 26 3447 122 large 1 EI NA
4 26 314 39 fore~ 3 EI 5171
5 26 460 86 fore~ 4 EI 5171
6 26 731 132 large 3 EI 3447
7 26 957 173 large 4 EI 3447
8 26 1253 215 disp~ 3 I NA
# ... with 1 more variable: RT2_at_block1 <dbl>
> head(d2)
# A tibble: 6 x 8
RECORDING_SESSION_LABEL `RT 1` RT2 item block condition `RT 1_at_block1` RT2_at_block1
<dbl> <dbl> <dbl> <chr> <dbl> <chr> <dbl> <dbl>
1 26 5171 357 foreign 1 EI NA NA
2 26 3857 328 detailed 1 E NA NA
3 26 3447 122 large 1 EI NA NA
4 26 314 39 foreign 3 EI 5171 357
5 26 460 86 foreign 4 EI 5171 357
6 26 731 132 large 3 EI 3447 122
在此先感谢您的帮助。
使用dplyr
的可能解决方案:
d1 <- structure(list(RECORDING_SESSION_LABEL = c(26, 26, 26, 26, 26, 26, 26, 26),
RT1 = c(5171, 3857, 3447, 314, 460, 731, 957, 1253),
RT2 = c(357, 328, 122, 39, 86, 132, 173, 215),
item = c("foreign", "detailed", "large", "foreign", "foreign", "large", "large", "disputable"),
block = c(1, 1, 1, 3, 4, 3, 4, 3), condition = c("EI", "E", "EI", "EI", "EI", "EI", "EI", "I")),
row.names = c(NA, -8L), class = c("tbl_df", "tbl", "data.frame"))
library(dplyr)
d2 <- d1 %>%
left_join(d1 %>% filter(block == 1) %>% select(RECORDING_SESSION_LABEL, item, RT1_at_block1 = RT1)) %>%
left_join(d1 %>% filter(block == 1) %>% select(RECORDING_SESSION_LABEL, item, RT2_at_block1 = RT2))
之后,d2
看起来像这样:
RECORDING_SESSION_LABEL RT1 RT2 item block condition RT1_at_block1 RT2_at_block1
<dbl> <dbl> <dbl> <chr> <dbl> <chr> <dbl> <dbl>
1 26 5171 357 foreign 1 EI 5171 357
2 26 3857 328 detailed 1 E 3857 328
3 26 3447 122 large 1 EI 3447 122
4 26 314 39 foreign 3 EI 5171 357
5 26 460 86 foreign 4 EI 5171 357
6 26 731 132 large 3 EI 3447 122
编辑:如果要将块 1 的值设置为 NA
,则添加 mutate
:
d2 <- d1 %>%
left_join(d1 %>% filter(block == 1) %>% select(RECORDING_SESSION_LABEL, item, RT1_at_block1 = RT1)) %>%
left_join(d1 %>% filter(block == 1) %>% select(RECORDING_SESSION_LABEL, item, RT2_at_block1 = RT2)) %>%
mutate(RT1_at_block1 = ifelse(block == 1, NA, RT1_at_block1),
RT2_at_block1 = ifelse(block == 1, NA, RT2_at_block1))