如何以一种方式为每个变量选择一个具有正态分布的图
How to outline in one way for each variables selected a graph with normality distribution
我试图对我正在处理的数据集的一系列特定变量执行正态分布测试,并且我编写了以下命令
as.data.frame(d) %>%
dplyr::select(
age, T1.rt, CR.rt) %>%
na.omit() %>%
apply(., 2, ad.test)
但是,我不知道如何以一种方式为每个变量输入轮廓命令,我选择了包含正态分布和正态曲线的直方图。具体来说,我感兴趣的直方图代码应该具有以下特点:
取变量'age'
windows(width=7, height=7); par(lwd=1, las=1, family="sans", cex=1,
mgp=c(3.0,1,0))
hist2(d$age, freq=F, main="", xlab="age", ylab="", col="darkgray")
curve(dnorm(x, mean=mean(d$age[!is.na(d$age)]),
sd=sd(d$age[!is.na(d$age)])), add=T)
skewness.kurtosis(d$age)
ks.test(d$age, "pnorm", mean=mean(d$age[!is.na(d$age)]),
sd=sd(d$age[!is.na(d$age)]))
我只是在这里报告我正在处理的数据集的一些观察结果:
dput(head(d,50))
structure(list(ID = c("P1323", "P1323", "P1323", "P1323", "P1323",
"P1323", "P1323", "P1323", "P1323", "P1323", "P1323", "P1323",
"P1323", "P1323", "P1323", "P1323", "P1323", "P1323", "P1323",
"P1323", "P1323", "P1323", "P1323", "P1323", "P1323", "P1323",
"P1323", "P1323", "P1323", "P1323", "P1323", "P1323", "P1323",
"P1323", "P1323", "P1323", "P1323", "P1323", "P1323", "P1323",
"P1323", "P1323", "P1323", "P1323", "P1323", "P1323", "P1323",
"P1323", "P1323", "P1323"), gender = c("F", "F", "F", "F", "F",
"F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F",
"F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F",
"F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F",
"F", "F", "F", "F", "F", "F"), age = c(19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19), fixation_time = c(60,
60, 60, 60, 60, 70, 50, 50, 50, 70, 70, 60, 50, 60, 70, 70, 50,
70, 70, 60, 70, 50, 50, 50, 60, 70, 60, 50, 60, 70, 60, 70, 50,
60, 70, 50, 50, 70, 70, 70, 70, 50, 60, 50, 60, 60, 70, 50, 60,
60), block = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), t1.key = c("None", "None",
"None", "space", "None", "space", "None", "None", "None", "space",
"None", "None", "space", "None", "None", "space", "None", "None",
"space", "None", "space", "space", "space", "None", "None", "None",
"space", "space", "None", "None", "space", "None", "None", "None",
"None", "None", "None", "space", "space", "None", "None", "None",
"None", "space", "None", "None", "space", "None", "space", "None"
), T1.response = c(0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0,
0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0,
0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0), COND = c("NR",
"NR", "NR", "R", "NR", "R", "NR", "NR", "NR", "R", "NR", "NR",
"R", "NR", "NR", "R", "NR", "NR", "R", "NR", "R", "R", "R", "NR",
"NR", "NR", "R", "R", "NR", "NR", "R", "NR", "NR", "NR", "NR",
"NR", "NR", "R", "R", "NR", "NR", "NR", "NR", "R", "NR", "NR",
"R", "NR", "R", "NR"), T1.rt = c(NA, NA, NA, 0.812299799988978,
NA, 0.72336569998879, NA, NA, NA, 0.772733500052709, NA, NA,
0.606754800013732, NA, NA, 0.601030899968464, NA, NA, 0.838272600027267,
NA, 0.305548300035298, 0.849945599969942, 0.748269900039304,
NA, NA, NA, 0.859215400007088, 0.95704890001798, NA, NA, 0.874362500035204,
NA, NA, NA, NA, NA, NA, 0.270455699996091, 0.75726039998699,
NA, NA, NA, NA, 0.762694000033662, NA, NA, 0.789715700026136,
NA, 0.90579859999707, NA), CR.key = c("p", "p", "p", "p", "p",
"p", "p", "p", "p", "p", "p", "p", "p", "p", "o", "p", "i", "i",
"h", "u", "i", "u", "o", "o", "p", "p", "p", "o", "p", "i", "o",
"p", "p", "p", "o", "o", "o", "p", "i", "p", "p", "o", "o", "i",
"i", "o", "o", "i", "i", "u"), CR.rt = c(0.651771800010465, 0.585048799985088,
0.652350199990906, 0.69888829998672, 1.01917029998731, 0.550036200031173,
0.0361186999944039, 0.568817299965303, 0.452191599993966, 0.514980700041633,
0.619590600021184, 0.719264700019266, 0.466181399999186, 0.45217840000987,
0.668881699966732, 0.914478300022893, 1.01910460001091, 1.40315000002738,
1.69993370003067, 1.71914210001705, 1.29938790004235, 0.698139799991623,
0.848338100011461, 0.651829700043891, 0.486136299965438, 0.703567499993369,
0.76673849998042, 0.54929809999885, 0.718664799991529, 0.768383099988569,
0.898415500007104, 0.819344500021543, 0.61898209998617, 0.737225699995179,
1.03654629999073, 0.971092400024645, 1.4362695000018, 0.999490200018045,
0.932840399967972, 0.586312200000975, 0.786785800009966, 1.01987839996582,
0.93673920002766, 0.715710600023158, 0.819960499997251, 0.75370900001144,
0.818668299994897, 0.903600800025742, 1.1176545000053, 1.10352450003847
), trial_num = c(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
14, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 31, 32,
33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47, 48, 49,
50, 51, 52, 53), ldots = c(48, 48, 52, 55, 51, 51, 52, 49, 45,
55, 49, 49, 51, 49, 48, 52, 45, 49, 45, 55, 51, 48, 55, 51, 45,
45, 52, 48, 48, 48, 55, 51, 49, 48, 49, 51, 51, 55, 51, 49, 45,
55, 51, 55, 55, 52, 52, 48, 49, 52), rdots = c(52, 52, 48, 45,
49, 49, 48, 51, 55, 45, 51, 51, 49, 51, 52, 48, 55, 51, 55, 45,
49, 52, 45, 49, 55, 55, 48, 52, 52, 52, 45, 49, 51, 52, 51, 49,
49, 45, 49, 51, 55, 45, 49, 45, 45, 48, 48, 52, 51, 48), TASK = c("left",
"left", "left", "left", "left", "left", "left", "left", "left",
"left", "left", "left", "left", "left", "left", "left", "left",
"left", "left", "left", "left", "left", "left", "left", "left",
"left", "left", "left", "left", "left", "left", "left", "left",
"left", "left", "left", "left", "left", "left", "left", "left",
"left", "left", "left", "left", "left", "left", "left", "left",
"left"), T1.correct = c(0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1,
0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0,
0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1), Go.Nogo..whether.a.person.should.respond. = c("NR",
"NR", "R", "R", "R", "R", "R", "NR", "NR", "R", "NR", "NR", "R",
"NR", "NR", "R", "NR", "NR", "NR", "R", "R", "NR", "R", "R",
"NR", "NR", "R", "NR", "NR", "NR", "R", "R", "NR", "NR", "NR",
"R", "R", "R", "R", "NR", "NR", "R", "R", "R", "R", "R", "R",
"NR", "NR", "R"), T1.ACC = c(1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0,
1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0), CR = c(4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 4, 2, 2, 9, 1, 2, 1,
3, 3, 4, 4, 4, 3, 4, 2, 3, 4, 4, 4, 3, 3, 3, 4, 2, 4, 4, 3, 3,
2, 2, 3, 3, 2, 2, 1), difficulty = c("medium", "medium", "medium",
"easy", "hard", "hard", "medium", "hard", "easy", "easy", "hard",
"hard", "hard", "hard", "medium", "medium", "easy", "hard", "easy",
"easy", "hard", "medium", "easy", "hard", "easy", "easy", "medium",
"medium", "medium", "medium", "easy", "hard", "hard", "medium",
"hard", "hard", "hard", "easy", "hard", "hard", "easy", "easy",
"hard", "easy", "easy", "medium", "medium", "medium", "hard",
"medium")), row.names = c(NA, -50L), class = c("tbl_df", "tbl",
"data.frame"))
有人知道吗?我应该如何保留前面提到的代码?此外,我希望这些图表可以在同一个 window 中绘制在一起,而不是重叠。
好的,有了你的数据,我们可以再做一次。
- 我们加载您的数据
df=structure(list(ID = c("P1323", "P1323", "P1323", "P1323", "P1323",
"P1323", "P1323", "P1323", "P1323", "P1323", "P1323", "P1323",
"P1323", "P1323", "P1323", "P1323", "P1323", "P1323", "P1323",
"P1323", "P1323", "P1323", "P1323", "P1323", "P1323", "P1323",
"P1323", "P1323", "P1323", "P1323", "P1323", "P1323", "P1323",
"P1323", "P1323", "P1323", "P1323", "P1323", "P1323", "P1323",
"P1323", "P1323", "P1323", "P1323", "P1323", "P1323", "P1323",
"P1323", "P1323", "P1323"), gender = c("F", "F", "F", "F", "F",
"F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F",
"F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F",
"F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F",
"F", "F", "F", "F", "F", "F"), age = c(19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19), fixation_time = c(60,
60, 60, 60, 60, 70, 50, 50, 50, 70, 70, 60, 50, 60, 70, 70, 50,
70, 70, 60, 70, 50, 50, 50, 60, 70, 60, 50, 60, 70, 60, 70, 50,
60, 70, 50, 50, 70, 70, 70, 70, 50, 60, 50, 60, 60, 70, 50, 60,
60), block = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), t1.key = c("None", "None",
"None", "space", "None", "space", "None", "None", "None", "space",
"None", "None", "space", "None", "None", "space", "None", "None",
"space", "None", "space", "space", "space", "None", "None", "None",
"space", "space", "None", "None", "space", "None", "None", "None",
"None", "None", "None", "space", "space", "None", "None", "None",
"None", "space", "None", "None", "space", "None", "space", "None"
), T1.response = c(0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0,
0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0,
0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0), COND = c("NR",
"NR", "NR", "R", "NR", "R", "NR", "NR", "NR", "R", "NR", "NR",
"R", "NR", "NR", "R", "NR", "NR", "R", "NR", "R", "R", "R", "NR",
"NR", "NR", "R", "R", "NR", "NR", "R", "NR", "NR", "NR", "NR",
"NR", "NR", "R", "R", "NR", "NR", "NR", "NR", "R", "NR", "NR",
"R", "NR", "R", "NR"), T1.rt = c(NA, NA, NA, 0.812299799988978,
NA, 0.72336569998879, NA, NA, NA, 0.772733500052709, NA, NA,
0.606754800013732, NA, NA, 0.601030899968464, NA, NA, 0.838272600027267,
NA, 0.305548300035298, 0.849945599969942, 0.748269900039304,
NA, NA, NA, 0.859215400007088, 0.95704890001798, NA, NA, 0.874362500035204,
NA, NA, NA, NA, NA, NA, 0.270455699996091, 0.75726039998699,
NA, NA, NA, NA, 0.762694000033662, NA, NA, 0.789715700026136,
NA, 0.90579859999707, NA), CR.key = c("p", "p", "p", "p", "p",
"p", "p", "p", "p", "p", "p", "p", "p", "p", "o", "p", "i", "i",
"h", "u", "i", "u", "o", "o", "p", "p", "p", "o", "p", "i", "o",
"p", "p", "p", "o", "o", "o", "p", "i", "p", "p", "o", "o", "i",
"i", "o", "o", "i", "i", "u"), CR.rt = c(0.651771800010465, 0.585048799985088,
0.652350199990906, 0.69888829998672, 1.01917029998731, 0.550036200031173,
0.0361186999944039, 0.568817299965303, 0.452191599993966, 0.514980700041633,
0.619590600021184, 0.719264700019266, 0.466181399999186, 0.45217840000987,
0.668881699966732, 0.914478300022893, 1.01910460001091, 1.40315000002738,
1.69993370003067, 1.71914210001705, 1.29938790004235, 0.698139799991623,
0.848338100011461, 0.651829700043891, 0.486136299965438, 0.703567499993369,
0.76673849998042, 0.54929809999885, 0.718664799991529, 0.768383099988569,
0.898415500007104, 0.819344500021543, 0.61898209998617, 0.737225699995179,
1.03654629999073, 0.971092400024645, 1.4362695000018, 0.999490200018045,
0.932840399967972, 0.586312200000975, 0.786785800009966, 1.01987839996582,
0.93673920002766, 0.715710600023158, 0.819960499997251, 0.75370900001144,
0.818668299994897, 0.903600800025742, 1.1176545000053, 1.10352450003847
), trial_num = c(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
14, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 31, 32,
33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47, 48, 49,
50, 51, 52, 53), ldots = c(48, 48, 52, 55, 51, 51, 52, 49, 45,
55, 49, 49, 51, 49, 48, 52, 45, 49, 45, 55, 51, 48, 55, 51, 45,
45, 52, 48, 48, 48, 55, 51, 49, 48, 49, 51, 51, 55, 51, 49, 45,
55, 51, 55, 55, 52, 52, 48, 49, 52), rdots = c(52, 52, 48, 45,
49, 49, 48, 51, 55, 45, 51, 51, 49, 51, 52, 48, 55, 51, 55, 45,
49, 52, 45, 49, 55, 55, 48, 52, 52, 52, 45, 49, 51, 52, 51, 49,
49, 45, 49, 51, 55, 45, 49, 45, 45, 48, 48, 52, 51, 48), TASK = c("left",
"left", "left", "left", "left", "left", "left", "left", "left",
"left", "left", "left", "left", "left", "left", "left", "left",
"left", "left", "left", "left", "left", "left", "left", "left",
"left", "left", "left", "left", "left", "left", "left", "left",
"left", "left", "left", "left", "left", "left", "left", "left",
"left", "left", "left", "left", "left", "left", "left", "left",
"left"), T1.correct = c(0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1,
0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0,
0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1), Go.Nogo..whether.a.person.should.respond. = c("NR",
"NR", "R", "R", "R", "R", "R", "NR", "NR", "R", "NR", "NR", "R",
"NR", "NR", "R", "NR", "NR", "NR", "R", "R", "NR", "R", "R",
"NR", "NR", "R", "NR", "NR", "NR", "R", "R", "NR", "NR", "NR",
"R", "R", "R", "R", "NR", "NR", "R", "R", "R", "R", "R", "R",
"NR", "NR", "R"), T1.ACC = c(1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0,
1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0), CR = c(4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 4, 2, 2, 9, 1, 2, 1,
3, 3, 4, 4, 4, 3, 4, 2, 3, 4, 4, 4, 3, 3, 3, 4, 2, 4, 4, 3, 3,
2, 2, 3, 3, 2, 2, 1), difficulty = c("medium", "medium", "medium",
"easy", "hard", "hard", "medium", "hard", "easy", "easy", "hard",
"hard", "hard", "hard", "medium", "medium", "easy", "hard", "easy",
"easy", "hard", "medium", "easy", "hard", "easy", "easy", "medium",
"medium", "medium", "medium", "easy", "hard", "hard", "medium",
"hard", "hard", "hard", "easy", "hard", "hard", "easy", "easy",
"hard", "easy", "easy", "medium", "medium", "medium", "hard",
"medium")), row.names = c(NA, -50L), class = c("tbl_df", "tbl",
"data.frame"))
- 现在我们将更改它们的表示形式,使行包含单独的变量名称和数据(
tibble
in tibble
)
library(tidyverse)
df1 = df %>%
pivot_longer(where(is.numeric), names_to = "variable", values_to = "value") %>%
group_by(variable) %>%
nest()
输出df1
# A tibble: 12 x 2
# Groups: variable [12]
variable data
<chr> <list>
1 age <tibble [50 x 9]>
2 fixation_time <tibble [50 x 9]>
3 block <tibble [50 x 9]>
4 T1.response <tibble [50 x 9]>
5 T1.rt <tibble [50 x 9]>
6 CR.rt <tibble [50 x 9]>
7 trial_num <tibble [50 x 9]>
8 ldots <tibble [50 x 9]>
9 rdots <tibble [50 x 9]>
10 T1.correct <tibble [50 x 9]>
11 T1.ACC <tibble [50 x 9]>
12 CR <tibble [50 x 9]>
tibble
中的tibble
是什么?让我们检查一下。
df1$data[[6]] %>% select(ID, value)
输出
# A tibble: 50 x 2
ID value
<chr> <dbl>
1 P1323 0.652
2 P1323 0.585
3 P1323 0.652
4 P1323 0.699
5 P1323 1.02
6 P1323 0.550
7 P1323 0.0361
8 P1323 0.569
9 P1323 0.452
10 P1323 0.515
# ... with 40 more rows
好吧,既然我们有单独的数据 tibble
,让我们创建一个函数来 returns 我们感兴趣的统计数据。
add.statistic = function(data) {
x = data$value[!is.na(data$value)]
suppressWarnings(tibble(
n = length(x),
nunique = length(unique(x)),
mean = mean(x),
sd = sd(x),
skew = ifelse(nunique>1,moments::skewness(x), NA),
kur = ifelse(nunique>1,moments::kurtosis(x), NA),
ks.p = ifelse(nunique>1,stats::ks.test(x, "pnorm", mean, sd)$p.value, NA),
ad.stat = ifelse(nunique>1,nortest::ad.test(x)$statistic, NA),
ad.p = ifelse(nunique>1,nortest::ad.test(x)$p.value, NA),
sw.stat = ifelse(nunique>1,stats::shapiro.test(x)$statistic, NA),
sw.p = ifelse(nunique>1,stats::shapiro.test(x)$p.value, NA)
))
}
这里需要一些评论。变量可以包含所有相同的值。那么统计skewness
、 kurtosis
、shapiro
之类的数据就没有意义了。为此,我在其中添加了 ifelse
函数和 nunique
变量。
此外,测试 ks.test
可能会生成警告。为此,我使用 suppressWarnings
将其静音。
现在让我们看看我们的 add.statistic
函数是如何工作的
add.statistic(df1$data[[6]])
输出
# A tibble: 1 x 11
n nunique mean sd skew kur ks.p ad.stat ad.p sw.stat sw.p
<int> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 50 50 0.818 0.313 0.849 4.53 0.456 1.17 0.00421 0.928 0.00465
宾果!我们有统计数据!
让我们现在把它放在一起。
df1 = df %>%
pivot_longer(where(is.numeric), names_to = "variable", values_to = "value") %>%
group_by(variable) %>%
nest() %>%
mutate(stat = map(data, add.statistic))
df1 %>% unnest(stat)
输出
# A tibble: 12 x 13
# Groups: variable [12]
variable data n nunique mean sd skew kur ks.p ad.stat ad.p sw.stat sw.p
<chr> <list> <int> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 age <tibble [50 x 9]> 50 1 19 0 NA NA NA NA NA NA NA
2 fixation_time <tibble [50 x 9]> 50 3 60.4 8.07 -0.0719 1.57 0.0139 3.90 7.17e-10 0.799 8.36e- 7
3 block <tibble [50 x 9]> 50 1 0 0 NA NA NA NA NA NA NA
4 T1.response <tibble [50 x 9]> 50 2 0.34 0.479 0.676 1.46 0.0000000391 10.1 3.7 e-24 0.599 1.85e-10
5 T1.rt <tibble [50 x 9]> 17 17 0.731 0.191 -1.42 4.15 0.209 1.20 2.74e- 3 0.819 3.78e- 3
6 CR.rt <tibble [50 x 9]> 50 50 0.818 0.313 0.849 4.53 0.456 1.17 4.21e- 3 0.928 4.65e- 3
7 trial_num <tibble [50 x 9]> 50 50 26.5 16.0 -0.0204 1.78 0.854 0.582 1.23e- 1 0.952 4.28e- 2
8 ldots <tibble [50 x 9]> 50 6 50.2 3.05 0.0230 2.28 0.297 1.27 2.35e- 3 0.918 2.06e- 3
9 rdots <tibble [50 x 9]> 50 6 49.8 3.05 -0.0230 2.28 0.297 1.27 2.35e- 3 0.918 2.06e- 3
10 T1.correct <tibble [50 x 9]> 50 2 0.52 0.505 -0.0801 1.01 0.0000101 8.84 8.98e-22 0.636 6.99e-10
11 T1.ACC <tibble [50 x 9]> 50 2 0.66 0.479 -0.676 1.46 0.0000000391 10.1 3.7 e-24 0.599 1.85e-10
12 CR <tibble [50 x 9]> 50 5 3.32 1.25 1.39 9.82 0.00112 3.66 2.77e- 9 0.755 9.19e- 8
一切正常。因此,让我们创建一个生成图形的函数。但是,仅适用于 nunique
> 1!
的变量
create.plot = function(df, group){
stat = df$stat[[1]]
data = df$data[[1]]
hist.val = hist(data$value, 30)
if(stat$nunique ==1) return(NULL)
statlab = stat %>%
pivot_longer(everything(), names_to = "stat", values_to = "val") %>%
mutate(x=hist.val$breaks[2],
y=seq(max(hist.val$density)*0.1,
max(hist.val$density)*0.7,
length.out=length(stat)),
lab = paste(stat,":",signif(val,3)))
data %>% ggplot(aes(value))+
geom_histogram(aes(y=..density..), colour="black", fill="white")+
geom_density(alpha=.2, fill="red", col="red")+
stat_function(fun = dnorm, args = list(mean = stat$mean, sd = stat$sd),
col="blue")+
geom_label(data=statlab, aes(x=x, y=y, label = lab))+
xlab(group)
}
让我们检查一下我们的 create.plot
函数如何作用于所选变量
create.plot(df1[6,], df1[6,1])
不知道你是否真的期待,但我想你可能会喜欢这样的剧情。
现在我们所要做的就是将所有内容组合在一个简洁的命令中
df1 %>% group_by(variable) %>%
group_map(create.plot)
以下是部分精选图表
as.data.frame(d) %>%
dplyr::select(
age, T1.rt, CR.rt) %>%
na.omit() %>%
apply(., 2, ad.test)
但是,我不知道如何以一种方式为每个变量输入轮廓命令,我选择了包含正态分布和正态曲线的直方图。具体来说,我感兴趣的直方图代码应该具有以下特点:
取变量'age'
windows(width=7, height=7); par(lwd=1, las=1, family="sans", cex=1,
mgp=c(3.0,1,0))
hist2(d$age, freq=F, main="", xlab="age", ylab="", col="darkgray")
curve(dnorm(x, mean=mean(d$age[!is.na(d$age)]),
sd=sd(d$age[!is.na(d$age)])), add=T)
skewness.kurtosis(d$age)
ks.test(d$age, "pnorm", mean=mean(d$age[!is.na(d$age)]),
sd=sd(d$age[!is.na(d$age)]))
我只是在这里报告我正在处理的数据集的一些观察结果:
dput(head(d,50))
structure(list(ID = c("P1323", "P1323", "P1323", "P1323", "P1323",
"P1323", "P1323", "P1323", "P1323", "P1323", "P1323", "P1323",
"P1323", "P1323", "P1323", "P1323", "P1323", "P1323", "P1323",
"P1323", "P1323", "P1323", "P1323", "P1323", "P1323", "P1323",
"P1323", "P1323", "P1323", "P1323", "P1323", "P1323", "P1323",
"P1323", "P1323", "P1323", "P1323", "P1323", "P1323", "P1323",
"P1323", "P1323", "P1323", "P1323", "P1323", "P1323", "P1323",
"P1323", "P1323", "P1323"), gender = c("F", "F", "F", "F", "F",
"F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F",
"F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F",
"F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F",
"F", "F", "F", "F", "F", "F"), age = c(19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19), fixation_time = c(60,
60, 60, 60, 60, 70, 50, 50, 50, 70, 70, 60, 50, 60, 70, 70, 50,
70, 70, 60, 70, 50, 50, 50, 60, 70, 60, 50, 60, 70, 60, 70, 50,
60, 70, 50, 50, 70, 70, 70, 70, 50, 60, 50, 60, 60, 70, 50, 60,
60), block = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), t1.key = c("None", "None",
"None", "space", "None", "space", "None", "None", "None", "space",
"None", "None", "space", "None", "None", "space", "None", "None",
"space", "None", "space", "space", "space", "None", "None", "None",
"space", "space", "None", "None", "space", "None", "None", "None",
"None", "None", "None", "space", "space", "None", "None", "None",
"None", "space", "None", "None", "space", "None", "space", "None"
), T1.response = c(0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0,
0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0,
0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0), COND = c("NR",
"NR", "NR", "R", "NR", "R", "NR", "NR", "NR", "R", "NR", "NR",
"R", "NR", "NR", "R", "NR", "NR", "R", "NR", "R", "R", "R", "NR",
"NR", "NR", "R", "R", "NR", "NR", "R", "NR", "NR", "NR", "NR",
"NR", "NR", "R", "R", "NR", "NR", "NR", "NR", "R", "NR", "NR",
"R", "NR", "R", "NR"), T1.rt = c(NA, NA, NA, 0.812299799988978,
NA, 0.72336569998879, NA, NA, NA, 0.772733500052709, NA, NA,
0.606754800013732, NA, NA, 0.601030899968464, NA, NA, 0.838272600027267,
NA, 0.305548300035298, 0.849945599969942, 0.748269900039304,
NA, NA, NA, 0.859215400007088, 0.95704890001798, NA, NA, 0.874362500035204,
NA, NA, NA, NA, NA, NA, 0.270455699996091, 0.75726039998699,
NA, NA, NA, NA, 0.762694000033662, NA, NA, 0.789715700026136,
NA, 0.90579859999707, NA), CR.key = c("p", "p", "p", "p", "p",
"p", "p", "p", "p", "p", "p", "p", "p", "p", "o", "p", "i", "i",
"h", "u", "i", "u", "o", "o", "p", "p", "p", "o", "p", "i", "o",
"p", "p", "p", "o", "o", "o", "p", "i", "p", "p", "o", "o", "i",
"i", "o", "o", "i", "i", "u"), CR.rt = c(0.651771800010465, 0.585048799985088,
0.652350199990906, 0.69888829998672, 1.01917029998731, 0.550036200031173,
0.0361186999944039, 0.568817299965303, 0.452191599993966, 0.514980700041633,
0.619590600021184, 0.719264700019266, 0.466181399999186, 0.45217840000987,
0.668881699966732, 0.914478300022893, 1.01910460001091, 1.40315000002738,
1.69993370003067, 1.71914210001705, 1.29938790004235, 0.698139799991623,
0.848338100011461, 0.651829700043891, 0.486136299965438, 0.703567499993369,
0.76673849998042, 0.54929809999885, 0.718664799991529, 0.768383099988569,
0.898415500007104, 0.819344500021543, 0.61898209998617, 0.737225699995179,
1.03654629999073, 0.971092400024645, 1.4362695000018, 0.999490200018045,
0.932840399967972, 0.586312200000975, 0.786785800009966, 1.01987839996582,
0.93673920002766, 0.715710600023158, 0.819960499997251, 0.75370900001144,
0.818668299994897, 0.903600800025742, 1.1176545000053, 1.10352450003847
), trial_num = c(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
14, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 31, 32,
33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47, 48, 49,
50, 51, 52, 53), ldots = c(48, 48, 52, 55, 51, 51, 52, 49, 45,
55, 49, 49, 51, 49, 48, 52, 45, 49, 45, 55, 51, 48, 55, 51, 45,
45, 52, 48, 48, 48, 55, 51, 49, 48, 49, 51, 51, 55, 51, 49, 45,
55, 51, 55, 55, 52, 52, 48, 49, 52), rdots = c(52, 52, 48, 45,
49, 49, 48, 51, 55, 45, 51, 51, 49, 51, 52, 48, 55, 51, 55, 45,
49, 52, 45, 49, 55, 55, 48, 52, 52, 52, 45, 49, 51, 52, 51, 49,
49, 45, 49, 51, 55, 45, 49, 45, 45, 48, 48, 52, 51, 48), TASK = c("left",
"left", "left", "left", "left", "left", "left", "left", "left",
"left", "left", "left", "left", "left", "left", "left", "left",
"left", "left", "left", "left", "left", "left", "left", "left",
"left", "left", "left", "left", "left", "left", "left", "left",
"left", "left", "left", "left", "left", "left", "left", "left",
"left", "left", "left", "left", "left", "left", "left", "left",
"left"), T1.correct = c(0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1,
0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0,
0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1), Go.Nogo..whether.a.person.should.respond. = c("NR",
"NR", "R", "R", "R", "R", "R", "NR", "NR", "R", "NR", "NR", "R",
"NR", "NR", "R", "NR", "NR", "NR", "R", "R", "NR", "R", "R",
"NR", "NR", "R", "NR", "NR", "NR", "R", "R", "NR", "NR", "NR",
"R", "R", "R", "R", "NR", "NR", "R", "R", "R", "R", "R", "R",
"NR", "NR", "R"), T1.ACC = c(1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0,
1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0), CR = c(4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 4, 2, 2, 9, 1, 2, 1,
3, 3, 4, 4, 4, 3, 4, 2, 3, 4, 4, 4, 3, 3, 3, 4, 2, 4, 4, 3, 3,
2, 2, 3, 3, 2, 2, 1), difficulty = c("medium", "medium", "medium",
"easy", "hard", "hard", "medium", "hard", "easy", "easy", "hard",
"hard", "hard", "hard", "medium", "medium", "easy", "hard", "easy",
"easy", "hard", "medium", "easy", "hard", "easy", "easy", "medium",
"medium", "medium", "medium", "easy", "hard", "hard", "medium",
"hard", "hard", "hard", "easy", "hard", "hard", "easy", "easy",
"hard", "easy", "easy", "medium", "medium", "medium", "hard",
"medium")), row.names = c(NA, -50L), class = c("tbl_df", "tbl",
"data.frame"))
有人知道吗?我应该如何保留前面提到的代码?此外,我希望这些图表可以在同一个 window 中绘制在一起,而不是重叠。
好的,有了你的数据,我们可以再做一次。
- 我们加载您的数据
df=structure(list(ID = c("P1323", "P1323", "P1323", "P1323", "P1323",
"P1323", "P1323", "P1323", "P1323", "P1323", "P1323", "P1323",
"P1323", "P1323", "P1323", "P1323", "P1323", "P1323", "P1323",
"P1323", "P1323", "P1323", "P1323", "P1323", "P1323", "P1323",
"P1323", "P1323", "P1323", "P1323", "P1323", "P1323", "P1323",
"P1323", "P1323", "P1323", "P1323", "P1323", "P1323", "P1323",
"P1323", "P1323", "P1323", "P1323", "P1323", "P1323", "P1323",
"P1323", "P1323", "P1323"), gender = c("F", "F", "F", "F", "F",
"F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F",
"F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F",
"F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F",
"F", "F", "F", "F", "F", "F"), age = c(19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19), fixation_time = c(60,
60, 60, 60, 60, 70, 50, 50, 50, 70, 70, 60, 50, 60, 70, 70, 50,
70, 70, 60, 70, 50, 50, 50, 60, 70, 60, 50, 60, 70, 60, 70, 50,
60, 70, 50, 50, 70, 70, 70, 70, 50, 60, 50, 60, 60, 70, 50, 60,
60), block = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), t1.key = c("None", "None",
"None", "space", "None", "space", "None", "None", "None", "space",
"None", "None", "space", "None", "None", "space", "None", "None",
"space", "None", "space", "space", "space", "None", "None", "None",
"space", "space", "None", "None", "space", "None", "None", "None",
"None", "None", "None", "space", "space", "None", "None", "None",
"None", "space", "None", "None", "space", "None", "space", "None"
), T1.response = c(0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0,
0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0,
0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0), COND = c("NR",
"NR", "NR", "R", "NR", "R", "NR", "NR", "NR", "R", "NR", "NR",
"R", "NR", "NR", "R", "NR", "NR", "R", "NR", "R", "R", "R", "NR",
"NR", "NR", "R", "R", "NR", "NR", "R", "NR", "NR", "NR", "NR",
"NR", "NR", "R", "R", "NR", "NR", "NR", "NR", "R", "NR", "NR",
"R", "NR", "R", "NR"), T1.rt = c(NA, NA, NA, 0.812299799988978,
NA, 0.72336569998879, NA, NA, NA, 0.772733500052709, NA, NA,
0.606754800013732, NA, NA, 0.601030899968464, NA, NA, 0.838272600027267,
NA, 0.305548300035298, 0.849945599969942, 0.748269900039304,
NA, NA, NA, 0.859215400007088, 0.95704890001798, NA, NA, 0.874362500035204,
NA, NA, NA, NA, NA, NA, 0.270455699996091, 0.75726039998699,
NA, NA, NA, NA, 0.762694000033662, NA, NA, 0.789715700026136,
NA, 0.90579859999707, NA), CR.key = c("p", "p", "p", "p", "p",
"p", "p", "p", "p", "p", "p", "p", "p", "p", "o", "p", "i", "i",
"h", "u", "i", "u", "o", "o", "p", "p", "p", "o", "p", "i", "o",
"p", "p", "p", "o", "o", "o", "p", "i", "p", "p", "o", "o", "i",
"i", "o", "o", "i", "i", "u"), CR.rt = c(0.651771800010465, 0.585048799985088,
0.652350199990906, 0.69888829998672, 1.01917029998731, 0.550036200031173,
0.0361186999944039, 0.568817299965303, 0.452191599993966, 0.514980700041633,
0.619590600021184, 0.719264700019266, 0.466181399999186, 0.45217840000987,
0.668881699966732, 0.914478300022893, 1.01910460001091, 1.40315000002738,
1.69993370003067, 1.71914210001705, 1.29938790004235, 0.698139799991623,
0.848338100011461, 0.651829700043891, 0.486136299965438, 0.703567499993369,
0.76673849998042, 0.54929809999885, 0.718664799991529, 0.768383099988569,
0.898415500007104, 0.819344500021543, 0.61898209998617, 0.737225699995179,
1.03654629999073, 0.971092400024645, 1.4362695000018, 0.999490200018045,
0.932840399967972, 0.586312200000975, 0.786785800009966, 1.01987839996582,
0.93673920002766, 0.715710600023158, 0.819960499997251, 0.75370900001144,
0.818668299994897, 0.903600800025742, 1.1176545000053, 1.10352450003847
), trial_num = c(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
14, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 31, 32,
33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47, 48, 49,
50, 51, 52, 53), ldots = c(48, 48, 52, 55, 51, 51, 52, 49, 45,
55, 49, 49, 51, 49, 48, 52, 45, 49, 45, 55, 51, 48, 55, 51, 45,
45, 52, 48, 48, 48, 55, 51, 49, 48, 49, 51, 51, 55, 51, 49, 45,
55, 51, 55, 55, 52, 52, 48, 49, 52), rdots = c(52, 52, 48, 45,
49, 49, 48, 51, 55, 45, 51, 51, 49, 51, 52, 48, 55, 51, 55, 45,
49, 52, 45, 49, 55, 55, 48, 52, 52, 52, 45, 49, 51, 52, 51, 49,
49, 45, 49, 51, 55, 45, 49, 45, 45, 48, 48, 52, 51, 48), TASK = c("left",
"left", "left", "left", "left", "left", "left", "left", "left",
"left", "left", "left", "left", "left", "left", "left", "left",
"left", "left", "left", "left", "left", "left", "left", "left",
"left", "left", "left", "left", "left", "left", "left", "left",
"left", "left", "left", "left", "left", "left", "left", "left",
"left", "left", "left", "left", "left", "left", "left", "left",
"left"), T1.correct = c(0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1,
0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0,
0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1), Go.Nogo..whether.a.person.should.respond. = c("NR",
"NR", "R", "R", "R", "R", "R", "NR", "NR", "R", "NR", "NR", "R",
"NR", "NR", "R", "NR", "NR", "NR", "R", "R", "NR", "R", "R",
"NR", "NR", "R", "NR", "NR", "NR", "R", "R", "NR", "NR", "NR",
"R", "R", "R", "R", "NR", "NR", "R", "R", "R", "R", "R", "R",
"NR", "NR", "R"), T1.ACC = c(1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0,
1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0), CR = c(4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 4, 2, 2, 9, 1, 2, 1,
3, 3, 4, 4, 4, 3, 4, 2, 3, 4, 4, 4, 3, 3, 3, 4, 2, 4, 4, 3, 3,
2, 2, 3, 3, 2, 2, 1), difficulty = c("medium", "medium", "medium",
"easy", "hard", "hard", "medium", "hard", "easy", "easy", "hard",
"hard", "hard", "hard", "medium", "medium", "easy", "hard", "easy",
"easy", "hard", "medium", "easy", "hard", "easy", "easy", "medium",
"medium", "medium", "medium", "easy", "hard", "hard", "medium",
"hard", "hard", "hard", "easy", "hard", "hard", "easy", "easy",
"hard", "easy", "easy", "medium", "medium", "medium", "hard",
"medium")), row.names = c(NA, -50L), class = c("tbl_df", "tbl",
"data.frame"))
- 现在我们将更改它们的表示形式,使行包含单独的变量名称和数据(
tibble
intibble
)
library(tidyverse)
df1 = df %>%
pivot_longer(where(is.numeric), names_to = "variable", values_to = "value") %>%
group_by(variable) %>%
nest()
输出df1
# A tibble: 12 x 2
# Groups: variable [12]
variable data
<chr> <list>
1 age <tibble [50 x 9]>
2 fixation_time <tibble [50 x 9]>
3 block <tibble [50 x 9]>
4 T1.response <tibble [50 x 9]>
5 T1.rt <tibble [50 x 9]>
6 CR.rt <tibble [50 x 9]>
7 trial_num <tibble [50 x 9]>
8 ldots <tibble [50 x 9]>
9 rdots <tibble [50 x 9]>
10 T1.correct <tibble [50 x 9]>
11 T1.ACC <tibble [50 x 9]>
12 CR <tibble [50 x 9]>
tibble
中的tibble
是什么?让我们检查一下。
df1$data[[6]] %>% select(ID, value)
输出
# A tibble: 50 x 2
ID value
<chr> <dbl>
1 P1323 0.652
2 P1323 0.585
3 P1323 0.652
4 P1323 0.699
5 P1323 1.02
6 P1323 0.550
7 P1323 0.0361
8 P1323 0.569
9 P1323 0.452
10 P1323 0.515
# ... with 40 more rows
好吧,既然我们有单独的数据 tibble
,让我们创建一个函数来 returns 我们感兴趣的统计数据。
add.statistic = function(data) {
x = data$value[!is.na(data$value)]
suppressWarnings(tibble(
n = length(x),
nunique = length(unique(x)),
mean = mean(x),
sd = sd(x),
skew = ifelse(nunique>1,moments::skewness(x), NA),
kur = ifelse(nunique>1,moments::kurtosis(x), NA),
ks.p = ifelse(nunique>1,stats::ks.test(x, "pnorm", mean, sd)$p.value, NA),
ad.stat = ifelse(nunique>1,nortest::ad.test(x)$statistic, NA),
ad.p = ifelse(nunique>1,nortest::ad.test(x)$p.value, NA),
sw.stat = ifelse(nunique>1,stats::shapiro.test(x)$statistic, NA),
sw.p = ifelse(nunique>1,stats::shapiro.test(x)$p.value, NA)
))
}
这里需要一些评论。变量可以包含所有相同的值。那么统计skewness
、 kurtosis
、shapiro
之类的数据就没有意义了。为此,我在其中添加了 ifelse
函数和 nunique
变量。
此外,测试 ks.test
可能会生成警告。为此,我使用 suppressWarnings
将其静音。
现在让我们看看我们的 add.statistic
函数是如何工作的
add.statistic(df1$data[[6]])
输出
# A tibble: 1 x 11
n nunique mean sd skew kur ks.p ad.stat ad.p sw.stat sw.p
<int> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 50 50 0.818 0.313 0.849 4.53 0.456 1.17 0.00421 0.928 0.00465
宾果!我们有统计数据! 让我们现在把它放在一起。
df1 = df %>%
pivot_longer(where(is.numeric), names_to = "variable", values_to = "value") %>%
group_by(variable) %>%
nest() %>%
mutate(stat = map(data, add.statistic))
df1 %>% unnest(stat)
输出
# A tibble: 12 x 13
# Groups: variable [12]
variable data n nunique mean sd skew kur ks.p ad.stat ad.p sw.stat sw.p
<chr> <list> <int> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 age <tibble [50 x 9]> 50 1 19 0 NA NA NA NA NA NA NA
2 fixation_time <tibble [50 x 9]> 50 3 60.4 8.07 -0.0719 1.57 0.0139 3.90 7.17e-10 0.799 8.36e- 7
3 block <tibble [50 x 9]> 50 1 0 0 NA NA NA NA NA NA NA
4 T1.response <tibble [50 x 9]> 50 2 0.34 0.479 0.676 1.46 0.0000000391 10.1 3.7 e-24 0.599 1.85e-10
5 T1.rt <tibble [50 x 9]> 17 17 0.731 0.191 -1.42 4.15 0.209 1.20 2.74e- 3 0.819 3.78e- 3
6 CR.rt <tibble [50 x 9]> 50 50 0.818 0.313 0.849 4.53 0.456 1.17 4.21e- 3 0.928 4.65e- 3
7 trial_num <tibble [50 x 9]> 50 50 26.5 16.0 -0.0204 1.78 0.854 0.582 1.23e- 1 0.952 4.28e- 2
8 ldots <tibble [50 x 9]> 50 6 50.2 3.05 0.0230 2.28 0.297 1.27 2.35e- 3 0.918 2.06e- 3
9 rdots <tibble [50 x 9]> 50 6 49.8 3.05 -0.0230 2.28 0.297 1.27 2.35e- 3 0.918 2.06e- 3
10 T1.correct <tibble [50 x 9]> 50 2 0.52 0.505 -0.0801 1.01 0.0000101 8.84 8.98e-22 0.636 6.99e-10
11 T1.ACC <tibble [50 x 9]> 50 2 0.66 0.479 -0.676 1.46 0.0000000391 10.1 3.7 e-24 0.599 1.85e-10
12 CR <tibble [50 x 9]> 50 5 3.32 1.25 1.39 9.82 0.00112 3.66 2.77e- 9 0.755 9.19e- 8
一切正常。因此,让我们创建一个生成图形的函数。但是,仅适用于 nunique
> 1!
create.plot = function(df, group){
stat = df$stat[[1]]
data = df$data[[1]]
hist.val = hist(data$value, 30)
if(stat$nunique ==1) return(NULL)
statlab = stat %>%
pivot_longer(everything(), names_to = "stat", values_to = "val") %>%
mutate(x=hist.val$breaks[2],
y=seq(max(hist.val$density)*0.1,
max(hist.val$density)*0.7,
length.out=length(stat)),
lab = paste(stat,":",signif(val,3)))
data %>% ggplot(aes(value))+
geom_histogram(aes(y=..density..), colour="black", fill="white")+
geom_density(alpha=.2, fill="red", col="red")+
stat_function(fun = dnorm, args = list(mean = stat$mean, sd = stat$sd),
col="blue")+
geom_label(data=statlab, aes(x=x, y=y, label = lab))+
xlab(group)
}
让我们检查一下我们的 create.plot
函数如何作用于所选变量
create.plot(df1[6,], df1[6,1])
不知道你是否真的期待,但我想你可能会喜欢这样的剧情。
现在我们所要做的就是将所有内容组合在一个简洁的命令中
df1 %>% group_by(variable) %>%
group_map(create.plot)
以下是部分精选图表