评分者与 kappa 之间的协议,使用 tidyverse 和循环函数来旋转数据(数据集)
Agreement between raters with kappa, using tidyverse and looping functions to pivot the data (data set)
假设在我的研究中,每个学生在一学年内由多个评分员进行两次评估。在我的领域很常见。因此,我有一个具有以下格式的数据集:
我的目标是检查这些评估之间的一致性。我可以旋转数据集来检查每个学生的所有评分者的一致性。
ds_test %>%
select(rater, student_1) %>%
pivot_wider(., names_from = "rater", values_from = "student_1", values_fn = list(student_1 = list)) %>%
unchop(everything()) %>%
t %>% as.data.frame() %>%
rownames_to_column("rater") %>%
{irr::kappa2(.[, c("V1", "V2")], weight = "unweighted")}
但是,我的目标是拥有一个可以同时动态检查所有学生的功能。使用当前代码,我必须更改 student_1 参数。
我知道这是可能的,而且我知道我必须嵌套数据集。但是,我现在无法解决这个难题。
我想留在 tidyverse 环境中。
所有代码都在这里:
ds_test <- structure(list(rater = c("rater30", "rater14", "rater24", "rater31",
"rater1", "rater15", "rater32", "rater11", "rater34", "rater35",
"rater15", "rater3", "rater27", "rater9"), student_1 = c("C",
"C", "C", "C", "C", "C", "C", "C", "C", "A", "C", "C", "C", "C"
), student_2 = c("D", "D", "C", "D", "D", "D", "D", "D", "D",
"D", "D", "D", "D", "D"), student_3 = c("A", "A", "D", "A", "A",
"A", "A", "A", "A", "A", "A", "A", "A", "A"), student_4 = c("C",
"B", "A", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B"
), student_5 = c("A", "C", "B", "C", "B", "B", "C", "C", "C",
"A", "C", "B", "B", "B"), student_6 = c("D", "A", "B", "D", "D",
"D", "D", "D", "D", "C", "D", "D", "D", "A"), student_7 = c("B",
"B", "A", "C", "D", "B", "C", "C", "C", "D", "C", "D", "A", "C"
), student_8 = c("A", "B", "D", "B", "D", "D", "B", "B", "B",
"C", "A", "D", "B", "B"), student_9 = c("A", "D", "C", "C", "C",
"D", "D", "D", "D", NA, "C", "C", "B", "D"), student_10 = c("B",
"B", "C", "B", "C", "B", "B", "B", "B", "C", "B", "C", "B", "B"
), student_11 = c("A", "C", "D", "C", "A", "B", "C", "C", "C",
"C", "C", "A", "C", "C"), student_12 = c("A", "B", NA, "D", "C",
"C", "B", "B", "B", "C", "B", "C", "B", "B"), student_13 = c("A",
"A", NA, "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A"
), student_14 = c("C", "D", NA, NA, "D", "D", "D", "D", "D",
"A", "A", "D", "D", "D"), student_15 = c("C", NA, NA, NA, "A",
"C", "B", "B", "B", NA, "B", "A", "B", "B"), student_16 = c("C",
NA, NA, NA, "A", "B", "B", "A", "A", "D", "C", "A", NA, "B"),
student_17 = c("D", NA, NA, NA, "A", "D", "D", "D", "D",
"D", "C", "A", NA, "D"), student_18 = c(NA, NA, NA, NA, "B",
"A", "C", "C", "C", "B", "C", "B", NA, "C"), student_19 = c(NA,
NA, NA, NA, "D", "D", "A", "A", "A", "A", "A", "D", NA, "A"
), student_20 = c(NA, NA, NA, NA, "D", "D", "D", "D", "D",
"D", "D", "D", NA, "D"), student_21 = c(NA, NA, NA, NA, NA,
NA, "B", "B", "B", "B", "B", NA, NA, "B"), student_22 = c(NA,
NA, NA, NA, NA, NA, "D", "D", "D", "C", "C", NA, NA, "A"),
student_23 = c(NA, NA, NA, NA, NA, NA, "C", "C", "D", NA,
"B", NA, NA, "C"), student_24 = c(NA, NA, NA, NA, NA, NA,
"A", "A", "A", NA, "A", NA, NA, "A"), student_25 = c(NA,
NA, NA, NA, NA, NA, "A", "A", "D", NA, "A", NA, NA, "D"),
student_26 = c(NA, NA, NA, NA, NA, NA, "A", "C", "A", "C",
"C", NA, NA, "C"), student_27 = c(NA, NA, NA, NA, NA, NA,
"D", "D", "B", NA, "D", NA, NA, NA), student_28 = c(NA, NA,
NA, NA, NA, NA, "A", "A", NA, NA, NA, NA, NA, NA)), row.names = c(NA,
-14L), class = c("tbl_df", "tbl", "data.frame"))
ds_test %>%
select(rater, student_1) %>%
pivot_wider(., names_from = "rater", values_from = "student_1", values_fn = list(student_1 = list)) %>%
unchop(everything()) %>%
t %>% as.data.frame() %>%
rownames_to_column("rater") %>%
{irr::kappa2(.[, c("V1", "V2")], weight = "unweighted")}
如果你得到长格式的数据会更容易。
library(dplyr)
library(tidyr)
result <- ds_test %>%
pivot_longer(cols = -rater, values_to = 'value1') %>%
mutate(value2 = value1) %>%
group_by(name) %>%
summarise(answer = list(irr::kappa2(select(cur_data(),value1,value2),
weight = "unweighted")))
其中每个单独的结果都可以看成 result$answer[[1]]
、result$answer[[2]]
等
这个怎么样:
# I had to re-create the data frame because in your reprex there were no repeated raters :)
ds_test = tibble(
rater = rep(paste0('rater', 1:5), 2),
student_1 = sample(LETTERS[1:5], 10, replace = TRUE),
student_2 = sample(LETTERS[1:5], 10, replace = TRUE),
student_3 = sample(LETTERS[1:5], 10, replace = TRUE),
student_4 = sample(LETTERS[1:5], 10, replace = TRUE),
student_5 = sample(LETTERS[1:5], 10, replace = TRUE)
)
ds_test %>%
pivot_longer(cols=2:ncol(.), names_to = 'student', values_to = 'grade') %>%
group_by(rater, student) %>% mutate(n = paste0('rating_', row_number())) %>%
pivot_wider(names_from = n, values_from = grade)
输出
# A tibble: 25 x 4
# Groups: rater, student [25]
rater student rating_1 rating_2
<chr> <chr> <chr> <chr>
1 rater1 student_1 D D
2 rater1 student_2 C B
3 rater1 student_3 B A
4 rater1 student_4 D E
5 rater1 student_5 E C
6 rater2 student_1 C B
7 rater2 student_2 A D
8 rater2 student_3 D D
9 rater2 student_4 A B
10 rater2 student_5 D A
本质上,我通过跟踪每个(评分者、学生)组合出现在数据框中的次数并将其存储到一个新列 n
中来做一个小技巧,然后我用作 pivot_wider()
.
的名称
假设在我的研究中,每个学生在一学年内由多个评分员进行两次评估。在我的领域很常见。因此,我有一个具有以下格式的数据集:
我的目标是检查这些评估之间的一致性。我可以旋转数据集来检查每个学生的所有评分者的一致性。
ds_test %>%
select(rater, student_1) %>%
pivot_wider(., names_from = "rater", values_from = "student_1", values_fn = list(student_1 = list)) %>%
unchop(everything()) %>%
t %>% as.data.frame() %>%
rownames_to_column("rater") %>%
{irr::kappa2(.[, c("V1", "V2")], weight = "unweighted")}
但是,我的目标是拥有一个可以同时动态检查所有学生的功能。使用当前代码,我必须更改 student_1 参数。
我知道这是可能的,而且我知道我必须嵌套数据集。但是,我现在无法解决这个难题。
我想留在 tidyverse 环境中。 所有代码都在这里:
ds_test <- structure(list(rater = c("rater30", "rater14", "rater24", "rater31",
"rater1", "rater15", "rater32", "rater11", "rater34", "rater35",
"rater15", "rater3", "rater27", "rater9"), student_1 = c("C",
"C", "C", "C", "C", "C", "C", "C", "C", "A", "C", "C", "C", "C"
), student_2 = c("D", "D", "C", "D", "D", "D", "D", "D", "D",
"D", "D", "D", "D", "D"), student_3 = c("A", "A", "D", "A", "A",
"A", "A", "A", "A", "A", "A", "A", "A", "A"), student_4 = c("C",
"B", "A", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B"
), student_5 = c("A", "C", "B", "C", "B", "B", "C", "C", "C",
"A", "C", "B", "B", "B"), student_6 = c("D", "A", "B", "D", "D",
"D", "D", "D", "D", "C", "D", "D", "D", "A"), student_7 = c("B",
"B", "A", "C", "D", "B", "C", "C", "C", "D", "C", "D", "A", "C"
), student_8 = c("A", "B", "D", "B", "D", "D", "B", "B", "B",
"C", "A", "D", "B", "B"), student_9 = c("A", "D", "C", "C", "C",
"D", "D", "D", "D", NA, "C", "C", "B", "D"), student_10 = c("B",
"B", "C", "B", "C", "B", "B", "B", "B", "C", "B", "C", "B", "B"
), student_11 = c("A", "C", "D", "C", "A", "B", "C", "C", "C",
"C", "C", "A", "C", "C"), student_12 = c("A", "B", NA, "D", "C",
"C", "B", "B", "B", "C", "B", "C", "B", "B"), student_13 = c("A",
"A", NA, "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A"
), student_14 = c("C", "D", NA, NA, "D", "D", "D", "D", "D",
"A", "A", "D", "D", "D"), student_15 = c("C", NA, NA, NA, "A",
"C", "B", "B", "B", NA, "B", "A", "B", "B"), student_16 = c("C",
NA, NA, NA, "A", "B", "B", "A", "A", "D", "C", "A", NA, "B"),
student_17 = c("D", NA, NA, NA, "A", "D", "D", "D", "D",
"D", "C", "A", NA, "D"), student_18 = c(NA, NA, NA, NA, "B",
"A", "C", "C", "C", "B", "C", "B", NA, "C"), student_19 = c(NA,
NA, NA, NA, "D", "D", "A", "A", "A", "A", "A", "D", NA, "A"
), student_20 = c(NA, NA, NA, NA, "D", "D", "D", "D", "D",
"D", "D", "D", NA, "D"), student_21 = c(NA, NA, NA, NA, NA,
NA, "B", "B", "B", "B", "B", NA, NA, "B"), student_22 = c(NA,
NA, NA, NA, NA, NA, "D", "D", "D", "C", "C", NA, NA, "A"),
student_23 = c(NA, NA, NA, NA, NA, NA, "C", "C", "D", NA,
"B", NA, NA, "C"), student_24 = c(NA, NA, NA, NA, NA, NA,
"A", "A", "A", NA, "A", NA, NA, "A"), student_25 = c(NA,
NA, NA, NA, NA, NA, "A", "A", "D", NA, "A", NA, NA, "D"),
student_26 = c(NA, NA, NA, NA, NA, NA, "A", "C", "A", "C",
"C", NA, NA, "C"), student_27 = c(NA, NA, NA, NA, NA, NA,
"D", "D", "B", NA, "D", NA, NA, NA), student_28 = c(NA, NA,
NA, NA, NA, NA, "A", "A", NA, NA, NA, NA, NA, NA)), row.names = c(NA,
-14L), class = c("tbl_df", "tbl", "data.frame"))
ds_test %>%
select(rater, student_1) %>%
pivot_wider(., names_from = "rater", values_from = "student_1", values_fn = list(student_1 = list)) %>%
unchop(everything()) %>%
t %>% as.data.frame() %>%
rownames_to_column("rater") %>%
{irr::kappa2(.[, c("V1", "V2")], weight = "unweighted")}
如果你得到长格式的数据会更容易。
library(dplyr)
library(tidyr)
result <- ds_test %>%
pivot_longer(cols = -rater, values_to = 'value1') %>%
mutate(value2 = value1) %>%
group_by(name) %>%
summarise(answer = list(irr::kappa2(select(cur_data(),value1,value2),
weight = "unweighted")))
其中每个单独的结果都可以看成 result$answer[[1]]
、result$answer[[2]]
等
这个怎么样:
# I had to re-create the data frame because in your reprex there were no repeated raters :)
ds_test = tibble(
rater = rep(paste0('rater', 1:5), 2),
student_1 = sample(LETTERS[1:5], 10, replace = TRUE),
student_2 = sample(LETTERS[1:5], 10, replace = TRUE),
student_3 = sample(LETTERS[1:5], 10, replace = TRUE),
student_4 = sample(LETTERS[1:5], 10, replace = TRUE),
student_5 = sample(LETTERS[1:5], 10, replace = TRUE)
)
ds_test %>%
pivot_longer(cols=2:ncol(.), names_to = 'student', values_to = 'grade') %>%
group_by(rater, student) %>% mutate(n = paste0('rating_', row_number())) %>%
pivot_wider(names_from = n, values_from = grade)
输出
# A tibble: 25 x 4
# Groups: rater, student [25]
rater student rating_1 rating_2
<chr> <chr> <chr> <chr>
1 rater1 student_1 D D
2 rater1 student_2 C B
3 rater1 student_3 B A
4 rater1 student_4 D E
5 rater1 student_5 E C
6 rater2 student_1 C B
7 rater2 student_2 A D
8 rater2 student_3 D D
9 rater2 student_4 A B
10 rater2 student_5 D A
本质上,我通过跟踪每个(评分者、学生)组合出现在数据框中的次数并将其存储到一个新列 n
中来做一个小技巧,然后我用作 pivot_wider()
.