通过 class 计算两个不同尺度上两个变量之间的相关性
Calculate correlation between two variables on two different scales by class
我用三个变量组合了两个数据集:class、total1、total2。 total1是每个学生的个人平均分,total2是老师对每个class的平均分。两个不同量表的平均分数。
df
class total1 total2
A 4.9 6.7
A 3.8 6.7
A 4.2 6.7
B 4.5 7.2
B 3.9 7.2
B 4.1 7.2
C 3.5 6.5
C 4.4 6.5
C 3.6 6.5
我想用 r 和 p 值计算 total1 和 total2 之间的相关性。我使用了这段代码,但我无法通过 class:
获得 total1 的平均分数
library("ggpubr")
ggscatter(df, x = "total2", y = "total1",
add = "reg.line", conf.int = TRUE,
cor.coef = TRUE, cor.method = "spearman",
xlab = "Teacher score", ylab = "Student score")
s <- "class total1 total2
A 4.9 6.7
A 3.8 6.7
A 4.2 6.7
B 4.5 7.2
B 3.9 7.2
B 4.1 7.2
C 3.5 6.5
C 4.4 6.5
C 3.6 6.5
"
df <- read.table(text=s, header=TRUE)
dfs <- split(df, df$class)
avg_m <- t(sapply(dfs, function(df) colMeans(df[, -1])))
res <- cor.test(x=avg_m[,"total1"], y=avg_m[, "total2"])
res$estimate ## 0.5
res$p.value ## [1] 0.6666667
您可以使用 dplyr 库。
library(dplyr)
class <- c("A", "A", "A", "B", "B", "B", "C", "C", "C")
total1 <- c(4.9, 3.8, 4.2, 4.5, 3.9, 4.1, 3.5, 4.4, 3.6)
total2 <- c(6.7, 6.7, 6.7, 7.2, 7.2, 7.2, 6.5, 6.5, 6.5)
df <- data.frame(class,total1,total2)
sum_data <- df %>%
group_by(class) %>%
summarise(total1_mean=mean(total1),total2_mean=mean(total2))
sum_data
# A tibble: 3 x 3
# class total1_mean total2_mean
# <chr> <dbl> <dbl>
#1 A 4.3 6.7
#2 B 4.17 7.2
#3 C 3.83 6.5
res <- cor.test(sum_data$total1_mean,sum_data$total2_mean)
res
#Pearson's product-moment correlation
#data: sum_data$total1_mean and sum_data$total2_mean
#t = 0.57735, df = 1, p-value = 0.6667
#alternative hypothesis: true correlation is not equal to 0
#sample estimates:
#cor
#0.5
res$p.value #[1] 0.6666667
res$estimate #cor 0.5
我用三个变量组合了两个数据集:class、total1、total2。 total1是每个学生的个人平均分,total2是老师对每个class的平均分。两个不同量表的平均分数。
df
class total1 total2
A 4.9 6.7
A 3.8 6.7
A 4.2 6.7
B 4.5 7.2
B 3.9 7.2
B 4.1 7.2
C 3.5 6.5
C 4.4 6.5
C 3.6 6.5
我想用 r 和 p 值计算 total1 和 total2 之间的相关性。我使用了这段代码,但我无法通过 class:
获得 total1 的平均分数library("ggpubr")
ggscatter(df, x = "total2", y = "total1",
add = "reg.line", conf.int = TRUE,
cor.coef = TRUE, cor.method = "spearman",
xlab = "Teacher score", ylab = "Student score")
s <- "class total1 total2
A 4.9 6.7
A 3.8 6.7
A 4.2 6.7
B 4.5 7.2
B 3.9 7.2
B 4.1 7.2
C 3.5 6.5
C 4.4 6.5
C 3.6 6.5
"
df <- read.table(text=s, header=TRUE)
dfs <- split(df, df$class)
avg_m <- t(sapply(dfs, function(df) colMeans(df[, -1])))
res <- cor.test(x=avg_m[,"total1"], y=avg_m[, "total2"])
res$estimate ## 0.5
res$p.value ## [1] 0.6666667
您可以使用 dplyr 库。
library(dplyr)
class <- c("A", "A", "A", "B", "B", "B", "C", "C", "C")
total1 <- c(4.9, 3.8, 4.2, 4.5, 3.9, 4.1, 3.5, 4.4, 3.6)
total2 <- c(6.7, 6.7, 6.7, 7.2, 7.2, 7.2, 6.5, 6.5, 6.5)
df <- data.frame(class,total1,total2)
sum_data <- df %>%
group_by(class) %>%
summarise(total1_mean=mean(total1),total2_mean=mean(total2))
sum_data
# A tibble: 3 x 3
# class total1_mean total2_mean
# <chr> <dbl> <dbl>
#1 A 4.3 6.7
#2 B 4.17 7.2
#3 C 3.83 6.5
res <- cor.test(sum_data$total1_mean,sum_data$total2_mean)
res
#Pearson's product-moment correlation
#data: sum_data$total1_mean and sum_data$total2_mean
#t = 0.57735, df = 1, p-value = 0.6667
#alternative hypothesis: true correlation is not equal to 0
#sample estimates:
#cor
#0.5
res$p.value #[1] 0.6666667
res$estimate #cor 0.5