使用 panel.linejoin 缺失数据
Using panel.linejoin with missing data
这个问题与收到的问题和答案非常相关here,其中@Mr. Flick 帮我解决了一个关于 lattice
包中的 xyplot
的问题。但是看到我现在正在对一些代码进行故障排除,我想我会向 "broader public" 寻求帮助。
我们论文的审稿人要求我提供患者体重指数随访数据,类似于我们在上面提供的 link 中提供术中数据的方式。
当我以模拟方式绘制数据时,代表 "mean" 的黑线停在三个月处,但我希望它经过所有时间点。见下图。
这是我的数据 bmi_data
dput(bmi_data)
structure(list(StudyID = structure(c(1L, 2L, 3L, 4L, 5L, 6L,
7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L), .Label = c("P1",
"P2", "P3", "P4", "P5", "P6", "P7"), class = "factor"), BMI = c(37.5,
43.82794785, 48.87848306, 39.93293705, 42.76788399, 39.44207394,
50.78043704, 25.61728395, 37.91099773, 39.02185224, 36.00823045,
37.75602259, 34.06360931, 39.12591051, 25.98765432, 34.89937642,
32.95178633, 35.62719098, 35.75127802, 32.27078777, NA, 23.61111111,
32.34835601, NA, 34.33165676, NA, 26.53375883, 35.79604579, 23.20987654,
31.71060091, NA, 34.29355281, NA, NA, NA), BMITIME2 = structure(c(5L,
5L, 5L, 5L, 5L, 5L, 5L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 4L, 4L, 4L, 4L,
4L, 4L), .Label = c("12 months FU", "3 months FU", "6 months FU",
"Over 12 months FU", "Preoperative BMI"), class = "factor"),
TIME2 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L), .Label = c("Preoperative BMI",
"3 months FU", "6 months FU", "12 months FU", "Over 12 months FU"
), class = "factor")), .Names = c("StudyID", "BMI", "BMITIME2",
"TIME2"), class = "data.frame", row.names = c(NA, -35L))
一些 data.frame 操作以获得我的时间点的正确顺序。
bmi_data$TIME2 <- factor(bmi_data$BMITIME2, unique(bmi_data$BMITIME2))
现在我的代码似乎无法正常工作。
require(lattice)
stderr <- function(x) sqrt(var(x,na.rm=TRUE)/length(na.omit(x)))
panel.sem <- function(x, y, col.se=plot.line$col, alpha.se=.10, ...) {
plot.line <- trellis.par.get("plot.line")
xs <- if(is.factor(x)) {
factor(c(levels(x) , rev(levels(x))), levels=levels(x))
} else {
xx <- sort(unique(x))
c(xx, rev(xx))
}
means <- tapply(y,x, mean, na.rm=T)
stderr <- tapply(y,x, stderr)
panel.polygon(xs, c(means+stderr, rev(means-stderr)), col=col.se, alpha=alpha.se)}
xyplot(BMI~bmi_data$TIME2, groups=StudyID, data=bmi_data, ty=c("l", "p"),
panel = function(x, y, ...) {
panel.sem(x,y, col.se="grey")
panel.xyplot(x, y, ...)
panel.linejoin(x, y, horizontal = FALSE ,..., col="black", lty=1, lwd=4)}
,xlab="Measurement Time Point",
ylab=expression("BMI"~"(kg/m^2)"))
导致此图:
非常感谢解决此问题的任何帮助!!!
这是一个使用 ggplot + dplyr 但不知道 lattice 的方法:
if (!require("pacman")) install.packages("pacman")
pacman::p_load(ggplot2, dplyr)
ave_data <- bmi_data %>%
group_by(TIME2) %>%
summarize(BMI = mean(BMI, na.rm=TRUE)) %>%
mutate(ave = TRUE)
ggplot(bmi_data, aes(y=BMI, x=TIME2)) +
geom_point(aes(color = StudyID), shape=21) +
geom_smooth(aes(group=1), alpha=.1) +
geom_line(size=.8, aes(group=StudyID, color = StudyID)) +
geom_path(data=ave_data, color="black", size=1.2, aes(group=ave)) +
xlab("Measurement Time Point") + theme_bw() +
ylab(expression("BMI"~"(kg/m^2)")) +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
legend.position=c(.87, .70)
) +
guides(fill=guide_legend(title="ID"))
问题是您在此数据集中缺少数据 (NA) 值。 panel.linejoin()
在每个 x 的观察值上调用 mean()
,如果有 NA 值,默认情况下平均值将为 NA,然后不会绘制一条线。要更改它,您可以为 panel.linejoin 指定一个函数包装器。尝试
xyplot(BMI~bmi_data$TIME2, groups=StudyID, data=bmi_data, ty=c("l", "p"),
panel = function(x, y, ...) {
panel.sem(x,y, col.se="grey")
panel.xyplot(x, y, ...)
panel.linejoin(x, y, horizontal = FALSE ,..., col="black",
lty=1, lwd=4, na.rm=T,
fun=function(x) mean(x, na.rm=T))
},
xlab="Measurement Time Point",
ylab=expression("BMI"~"(kg/m^2)")
)
这个问题与收到的问题和答案非常相关here,其中@Mr. Flick 帮我解决了一个关于 lattice
包中的 xyplot
的问题。但是看到我现在正在对一些代码进行故障排除,我想我会向 "broader public" 寻求帮助。
我们论文的审稿人要求我提供患者体重指数随访数据,类似于我们在上面提供的 link 中提供术中数据的方式。
当我以模拟方式绘制数据时,代表 "mean" 的黑线停在三个月处,但我希望它经过所有时间点。见下图。
这是我的数据 bmi_data
dput(bmi_data)
structure(list(StudyID = structure(c(1L, 2L, 3L, 4L, 5L, 6L,
7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L), .Label = c("P1",
"P2", "P3", "P4", "P5", "P6", "P7"), class = "factor"), BMI = c(37.5,
43.82794785, 48.87848306, 39.93293705, 42.76788399, 39.44207394,
50.78043704, 25.61728395, 37.91099773, 39.02185224, 36.00823045,
37.75602259, 34.06360931, 39.12591051, 25.98765432, 34.89937642,
32.95178633, 35.62719098, 35.75127802, 32.27078777, NA, 23.61111111,
32.34835601, NA, 34.33165676, NA, 26.53375883, 35.79604579, 23.20987654,
31.71060091, NA, 34.29355281, NA, NA, NA), BMITIME2 = structure(c(5L,
5L, 5L, 5L, 5L, 5L, 5L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 4L, 4L, 4L, 4L,
4L, 4L), .Label = c("12 months FU", "3 months FU", "6 months FU",
"Over 12 months FU", "Preoperative BMI"), class = "factor"),
TIME2 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L), .Label = c("Preoperative BMI",
"3 months FU", "6 months FU", "12 months FU", "Over 12 months FU"
), class = "factor")), .Names = c("StudyID", "BMI", "BMITIME2",
"TIME2"), class = "data.frame", row.names = c(NA, -35L))
一些 data.frame 操作以获得我的时间点的正确顺序。
bmi_data$TIME2 <- factor(bmi_data$BMITIME2, unique(bmi_data$BMITIME2))
现在我的代码似乎无法正常工作。
require(lattice)
stderr <- function(x) sqrt(var(x,na.rm=TRUE)/length(na.omit(x)))
panel.sem <- function(x, y, col.se=plot.line$col, alpha.se=.10, ...) {
plot.line <- trellis.par.get("plot.line")
xs <- if(is.factor(x)) {
factor(c(levels(x) , rev(levels(x))), levels=levels(x))
} else {
xx <- sort(unique(x))
c(xx, rev(xx))
}
means <- tapply(y,x, mean, na.rm=T)
stderr <- tapply(y,x, stderr)
panel.polygon(xs, c(means+stderr, rev(means-stderr)), col=col.se, alpha=alpha.se)}
xyplot(BMI~bmi_data$TIME2, groups=StudyID, data=bmi_data, ty=c("l", "p"),
panel = function(x, y, ...) {
panel.sem(x,y, col.se="grey")
panel.xyplot(x, y, ...)
panel.linejoin(x, y, horizontal = FALSE ,..., col="black", lty=1, lwd=4)}
,xlab="Measurement Time Point",
ylab=expression("BMI"~"(kg/m^2)"))
导致此图:
非常感谢解决此问题的任何帮助!!!
这是一个使用 ggplot + dplyr 但不知道 lattice 的方法:
if (!require("pacman")) install.packages("pacman")
pacman::p_load(ggplot2, dplyr)
ave_data <- bmi_data %>%
group_by(TIME2) %>%
summarize(BMI = mean(BMI, na.rm=TRUE)) %>%
mutate(ave = TRUE)
ggplot(bmi_data, aes(y=BMI, x=TIME2)) +
geom_point(aes(color = StudyID), shape=21) +
geom_smooth(aes(group=1), alpha=.1) +
geom_line(size=.8, aes(group=StudyID, color = StudyID)) +
geom_path(data=ave_data, color="black", size=1.2, aes(group=ave)) +
xlab("Measurement Time Point") + theme_bw() +
ylab(expression("BMI"~"(kg/m^2)")) +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
legend.position=c(.87, .70)
) +
guides(fill=guide_legend(title="ID"))
问题是您在此数据集中缺少数据 (NA) 值。 panel.linejoin()
在每个 x 的观察值上调用 mean()
,如果有 NA 值,默认情况下平均值将为 NA,然后不会绘制一条线。要更改它,您可以为 panel.linejoin 指定一个函数包装器。尝试
xyplot(BMI~bmi_data$TIME2, groups=StudyID, data=bmi_data, ty=c("l", "p"),
panel = function(x, y, ...) {
panel.sem(x,y, col.se="grey")
panel.xyplot(x, y, ...)
panel.linejoin(x, y, horizontal = FALSE ,..., col="black",
lty=1, lwd=4, na.rm=T,
fun=function(x) mean(x, na.rm=T))
},
xlab="Measurement Time Point",
ylab=expression("BMI"~"(kg/m^2)")
)