将特定列转置为 R 中的行
Transposing specific columns to the rows in R
我需要将某些列值转换为特定行的行。
这是一个示例数据集:
df <- data.frame(
student_id = c(1,1,1,2,2,2),
question_id = c(10,11,12,20,21,22),
score = c(2,3,4,2,1,5),
A = c(NA,NA,1, NA,NA,1),
B = c(NA,NA,2, NA,NA,1),
C = c(NA,NA,1, NA,NA,3)
)
> df
student_id question_id score C E O
1 1 10 2 NA NA NA
2 1 11 3 NA NA NA
3 1 12 4 1 2 1
4 2 20 2 NA NA NA
5 2 21 1 NA NA NA
6 2 22 5 1 1 3
对于那些在 A
、B
和 C
列中具有值的行,我需要将这些值抓取到分数列中并添加后缀来定义 question_id
。例如,第三行的分数是这三列(A,B,C)的总和。
我想要的输出如下。
> df
student_id question_id score
1 1 10 2
2 1 11 3
3 1 12_C 1
4 1 12_E 2
5 1 12_O 1
6 2 20 2
7 2 21 1
8 2 22_C 1
9 2 22_E 1
10 2 22_O 3
数据
df <- data.frame(
student_id = c(1,1,1,2,2,2),
question_id = c(10,11,12,20,21,22),
score = c(2,3,4,2,1,5),
C = c(NA,NA,1, NA,NA,1),
E = c(NA,NA,2, NA,NA,1),
O = c(NA,NA,1, NA,NA,3)
)
如何
library(tidyverse)
df %>%
pivot_longer(cols = c(C,E,O)) %>%
filter(!is.na(value)) %>%
mutate(question_id = str_c(question_id,name,sep = "_")) %>%
select(-name,-value)
student_id question_id score
<dbl> <chr> <dbl>
1 1 12_C 4
2 1 12_E 4
3 1 12_O 4
4 2 22_C 5
5 2 22_E 5
6 2 22_O 5
library(dplyr)
library(tidyr)
df %>%
pivot_longer(cols = c(A, B, C)) %>%
mutate(question_id = ifelse(!is.na(value), paste(question_id, name, sep = "_"), question_id),
score = ifelse(!is.na(value), value, score)) %>%
select(-c(name, value)) %>%
distinct()
#> # A tibble: 10 x 3
#> student_id question_id score
#> <dbl> <chr> <dbl>
#> 1 1 10 2
#> 2 1 11 3
#> 3 1 12_A 1
#> 4 1 12_B 2
#> 5 1 12_C 1
#> 6 2 20 2
#> 7 2 21 1
#> 8 2 22_A 1
#> 9 2 22_B 1
#> 10 2 22_C 3
由 reprex package (v2.0.0)
于 2021-09-15 创建
数据
df <- data.frame(
student_id = c(1,1,1,2,2,2),
question_id = c(10,11,12,20,21,22),
score = c(2,3,4,2,1,5),
A = c(NA,NA,1, NA,NA,1),
B = c(NA,NA,2, NA,NA,1),
C = c(NA,NA,1, NA,NA,3))
我不确定这是否比在另一个答案中仅在末尾添加 distinct()
更好,但我很好奇您是否可以在不使用 NA
值旋转行的情况下执行此操作所以这是:
df <- data.frame(
student_id = c(1,1,1,2,2,2),
question_id = c(10,11,12,20,21,22),
score = c(2,3,4,2,1,5),
A = c(NA,NA,1, NA,NA,1),
B = c(NA,NA,2, NA,NA,1),
C = c(NA,NA,1, NA,NA,3)
)
library(dplyr, warn.conflicts = FALSE)
library(tidyr)
df %>%
mutate(make_long = !do.call(pmax, across(c(A, B, C), is.na))) %>%
group_by(ml = make_long) %>%
group_modify(~ {
if (first(.x$make_long))
pivot_longer(.x, c(A, B, C)) %>%
transmute(
student_id,
question_id = paste(question_id, name, sep = '_'),
score = value)
else
transmute(.x,
student_id,
question_id = as.character(question_id),
score)
}) %>%
ungroup() %>%
select(-ml)
#> # A tibble: 10 × 3
#> student_id question_id score
#> <dbl> <chr> <dbl>
#> 1 1 10 2
#> 2 1 11 3
#> 3 2 20 2
#> 4 2 21 1
#> 5 1 12_A 1
#> 6 1 12_B 2
#> 7 1 12_C 1
#> 8 2 22_A 1
#> 9 2 22_B 1
#> 10 2 22_C 3
由 reprex package (v2.0.1)
于 2021-09-15 创建
另一种选择是用NA
替换求和的score
(即问题12和22)。从那里您可以将 pivot_longer
与 score:C
和 values_drop_na
一起使用。然后将 name
为 score
的行转换为 NA
。最后 unite
question_id
和 name
列。
df %>%
mutate(score = ifelse(!is.na(A), NA, score)) %>%
pivot_longer('score':'C', values_drop_na = TRUE) %>%
mutate(name = na_if(name, 'score')) %>%
unite('question_id', c(question_id, name), na.rm = T)
#------
# A tibble: 10 x 3
student_id question_id value
<dbl> <chr> <dbl>
1 1 10 2
2 1 11 3
3 1 12_A 1
4 1 12_B 2
5 1 12_C 1
6 2 20 2
7 2 21 1
8 2 22_A 1
9 2 22_B 1
10 2 22_C 3
我需要将某些列值转换为特定行的行。
这是一个示例数据集:
df <- data.frame(
student_id = c(1,1,1,2,2,2),
question_id = c(10,11,12,20,21,22),
score = c(2,3,4,2,1,5),
A = c(NA,NA,1, NA,NA,1),
B = c(NA,NA,2, NA,NA,1),
C = c(NA,NA,1, NA,NA,3)
)
> df
student_id question_id score C E O
1 1 10 2 NA NA NA
2 1 11 3 NA NA NA
3 1 12 4 1 2 1
4 2 20 2 NA NA NA
5 2 21 1 NA NA NA
6 2 22 5 1 1 3
对于那些在 A
、B
和 C
列中具有值的行,我需要将这些值抓取到分数列中并添加后缀来定义 question_id
。例如,第三行的分数是这三列(A,B,C)的总和。
我想要的输出如下。
> df
student_id question_id score
1 1 10 2
2 1 11 3
3 1 12_C 1
4 1 12_E 2
5 1 12_O 1
6 2 20 2
7 2 21 1
8 2 22_C 1
9 2 22_E 1
10 2 22_O 3
数据
df <- data.frame(
student_id = c(1,1,1,2,2,2),
question_id = c(10,11,12,20,21,22),
score = c(2,3,4,2,1,5),
C = c(NA,NA,1, NA,NA,1),
E = c(NA,NA,2, NA,NA,1),
O = c(NA,NA,1, NA,NA,3)
)
如何
library(tidyverse)
df %>%
pivot_longer(cols = c(C,E,O)) %>%
filter(!is.na(value)) %>%
mutate(question_id = str_c(question_id,name,sep = "_")) %>%
select(-name,-value)
student_id question_id score
<dbl> <chr> <dbl>
1 1 12_C 4
2 1 12_E 4
3 1 12_O 4
4 2 22_C 5
5 2 22_E 5
6 2 22_O 5
library(dplyr)
library(tidyr)
df %>%
pivot_longer(cols = c(A, B, C)) %>%
mutate(question_id = ifelse(!is.na(value), paste(question_id, name, sep = "_"), question_id),
score = ifelse(!is.na(value), value, score)) %>%
select(-c(name, value)) %>%
distinct()
#> # A tibble: 10 x 3
#> student_id question_id score
#> <dbl> <chr> <dbl>
#> 1 1 10 2
#> 2 1 11 3
#> 3 1 12_A 1
#> 4 1 12_B 2
#> 5 1 12_C 1
#> 6 2 20 2
#> 7 2 21 1
#> 8 2 22_A 1
#> 9 2 22_B 1
#> 10 2 22_C 3
由 reprex package (v2.0.0)
于 2021-09-15 创建数据
df <- data.frame(
student_id = c(1,1,1,2,2,2),
question_id = c(10,11,12,20,21,22),
score = c(2,3,4,2,1,5),
A = c(NA,NA,1, NA,NA,1),
B = c(NA,NA,2, NA,NA,1),
C = c(NA,NA,1, NA,NA,3))
我不确定这是否比在另一个答案中仅在末尾添加 distinct()
更好,但我很好奇您是否可以在不使用 NA
值旋转行的情况下执行此操作所以这是:
df <- data.frame(
student_id = c(1,1,1,2,2,2),
question_id = c(10,11,12,20,21,22),
score = c(2,3,4,2,1,5),
A = c(NA,NA,1, NA,NA,1),
B = c(NA,NA,2, NA,NA,1),
C = c(NA,NA,1, NA,NA,3)
)
library(dplyr, warn.conflicts = FALSE)
library(tidyr)
df %>%
mutate(make_long = !do.call(pmax, across(c(A, B, C), is.na))) %>%
group_by(ml = make_long) %>%
group_modify(~ {
if (first(.x$make_long))
pivot_longer(.x, c(A, B, C)) %>%
transmute(
student_id,
question_id = paste(question_id, name, sep = '_'),
score = value)
else
transmute(.x,
student_id,
question_id = as.character(question_id),
score)
}) %>%
ungroup() %>%
select(-ml)
#> # A tibble: 10 × 3
#> student_id question_id score
#> <dbl> <chr> <dbl>
#> 1 1 10 2
#> 2 1 11 3
#> 3 2 20 2
#> 4 2 21 1
#> 5 1 12_A 1
#> 6 1 12_B 2
#> 7 1 12_C 1
#> 8 2 22_A 1
#> 9 2 22_B 1
#> 10 2 22_C 3
由 reprex package (v2.0.1)
于 2021-09-15 创建另一种选择是用NA
替换求和的score
(即问题12和22)。从那里您可以将 pivot_longer
与 score:C
和 values_drop_na
一起使用。然后将 name
为 score
的行转换为 NA
。最后 unite
question_id
和 name
列。
df %>%
mutate(score = ifelse(!is.na(A), NA, score)) %>%
pivot_longer('score':'C', values_drop_na = TRUE) %>%
mutate(name = na_if(name, 'score')) %>%
unite('question_id', c(question_id, name), na.rm = T)
#------
# A tibble: 10 x 3
student_id question_id value
<dbl> <chr> <dbl>
1 1 10 2
2 1 11 3
3 1 12_A 1
4 1 12_B 2
5 1 12_C 1
6 2 20 2
7 2 21 1
8 2 22_A 1
9 2 22_B 1
10 2 22_C 3