如何按组获取精度值

How to I get accuracy values by group

我无法在组图表类型和条件的 Correct_answers 列中获得平均准确度(真值的比例)。

数据

structure(list(Element = structure(c(1L, 1L, 1L, 1L, 1L), .Label = c("1", 
"2", "3", "4", "5", "6"), class = "factor"), Correct_answer = structure(c(2L, 
2L, 2L, 1L, 2L), .Label = c("FALSE", "TRUE"), class = "factor"), 
    Response_time = c(25.155, 6.74, 28.649, 16.112, 105.5906238
    ), Chart_type = structure(c(2L, 2L, 1L, 1L, 1L), .Label = c("Box", 
    "Violin"), class = "factor"), Condition = structure(c(1L, 
    2L, 1L, 2L, 1L), .Label = c("0", "1"), class = "factor")), row.names = c(NA, 
5L), class = "data.frame")

平均 chart_type

av_data_chartType <- data %>% group_by(Chart_type) %>% summarise_each(funs(mean, sd))

条件平均值

av_data_conition <- data %>% group_by(Condition) %>% summarise_each(funs(mean, sd))

没有为准确性生成平均值

NA 值是准确度应该达到的地方。

这应该有效:

a$Correct_answer <- as.logical(a$Correct_answer)

av_data_chartType <- a %>% select(Chart_type, Correct_answer) %>% group_by(Chart_type) %>% summarise_each(funs(mean, sd))

av_data_chartType <- a %>% select(Condition, Correct_answer) %>% group_by(Condition) %>% summarise_each(funs(mean, sd))

您有 2 个问题:

  1. 你的 Correct_answer 是一个因素。

  2. 您尝试计算每个列的函数

你可能需要

library(dplyr)

data %>%
  mutate(Correct_answer = as.logical(Correct_answer)) %>%
  group_by(Chart_type, Condition) %>%
  summarise(avg = mean(Correct_answer))

或者如果您需要单独使用它们

data %>%
  mutate(Correct_answer = as.logical(Correct_answer)) %>%
  group_by(Chart_type) %>%
  summarise(avg = mean(Correct_answer))

data %>%
  mutate(Correct_answer = as.logical(Correct_answer)) %>%
  group_by(Condition) %>%
  summarise(avg = mean(Correct_answer))

重现您的代码 我收到一条警告,使我得到了答案:您不应该计算因子变量的统计数据。如果您知道自己在做什么,可以将它们转换为数字:

data <- structure(list(Element = structure(c(1L, 1L, 1L, 1L, 1L), 
                                         .Label = c("1", "2", "3", "4", "5", "6"), 
                                         class = "factor"), 
                     Correct_answer = structure(c(2L, 2L, 2L, 1L, 2L), 
                                                .Label = c("FALSE", "TRUE"), 
                                                class = "factor"), 
                     Response_time = c(25.155, 6.74, 28.649, 16.112, 105.5906238
                     ), 
                     Chart_type = structure(c(2L, 2L, 1L, 1L, 1L), 
                                            .Label = c("Box", 
                                                       "Violin"), 
                                            class = "factor"), 
                     Condition = structure(c(1L, 2L, 1L, 2L, 1L), 
                                           .Label = c("0", "1"), 
                                           class = "factor")),
                row.names = c(NA, 5L), class = "data.frame")

library("dplyr", warn.conflicts = FALSE)
data <- data %>% as_tibble

# av_data_chartType 
data %>% 
        group_by(Chart_type) %>%
        mutate_if(.predicate = is.factor, .funs = as.numeric) %>% 
        summarise_each(list( ~mean, ~sd))
#> `mutate_if()` ignored the following grouping variables:
#> Column `Chart_type`
#> # A tibble: 2 x 9
#>   Chart_type Element_mean Correct_answer_~ Response_time_m~ Condition_mean
#>   <fct>             <dbl>            <dbl>            <dbl>          <dbl>
#> 1 Box                   1             1.67             50.1           1.33
#> 2 Violin                1             2                15.9           1.5 
#> # ... with 4 more variables: Element_sd <dbl>, Correct_answer_sd <dbl>,
#> #   Response_time_sd <dbl>, Condition_sd <dbl>

# av_data_condition
data %>% 
        group_by(Condition) %>%
        mutate_if(.predicate = is.factor, .funs = as.numeric) %>% 
        summarise_each(list( ~mean, ~sd))
#> `mutate_if()` ignored the following grouping variables:
#> Column `Condition`
#> # A tibble: 2 x 9
#>   Condition Element_mean Correct_answer_~ Response_time_m~ Chart_type_mean
#>   <fct>            <dbl>            <dbl>            <dbl>           <dbl>
#> 1 0                    1              2               53.1            1.33
#> 2 1                    1              1.5             11.4            1.5 
#> # ... with 4 more variables: Element_sd <dbl>, Correct_answer_sd <dbl>,
#> #   Response_time_sd <dbl>, Chart_type_sd <dbl>

reprex package (v0.2.1)

创建于 2019-06-11