自定义列名作为稳定性 R 包函数中的参数
Custom column names as arguments in the functions of stability R package
我开发了 stability
R 包,可以从 CRAN 安装。
install.packages("stability")
但是,我很难让它将自定义列名作为函数参数。这是 add_anova
函数
的示例
library(stability)
data(ge_data)
YieldANOVA <-
add_anova(
.data = ge_data
, .y = Yield
, .rep = Rep
, .gen = Gen
, .env = Env
)
YieldANOVA
以上代码运行良好。但是,当我更改 data.frame 的列名时,它不起作用,如下所示:
df1 <- ge_data
names(df1) <- c("G", "Institute", "R", "Block", "E", "Y")
fm1 <-
add_anova(
.data = df1
, .y = Y
, .rep = Rep
, .gen = G
, .env = E
)
Error in model.frame.default(formula = terms(.data$Y ~ .data$E + .data$Rep:.data$E + :
invalid type (NULL) for variable '.data$Rep'
类似的另一个函数stab_reg
fm1Reg <-
stab_reg(
.data = df1
, .y = Y
, .gen = G
, .env = E
)
Error in eval(predvars, data, env) : object 'Gen' not found
这些函数的代码可以通过
访问
getAnywhere(add_anova.default)
function (.data, .y, .rep, .gen, .env)
{
Y <- enquo(.y)
Rep <- enquo(.rep)
G <- enquo(.gen)
E <- enquo(.env)
fm1 <- lm(formula = terms(.data$Y ~ .data$E + .data$Rep:.data$E +
.data$G + .data$G:.data$E, keep.order = TRUE), data = .data)
fm1ANOVA <- anova(fm1)
rownames(fm1ANOVA) <- c("Env", "Rep(Env)", "Gen", "Gen:Env",
"Residuals")
fm1ANOVA[1, 4] <- fm1ANOVA[1, 3]/fm1ANOVA[2, 3]
fm1ANOVA[2, 4] <- NA
fm1ANOVA[1, 5] <- 1 - pf(as.numeric(fm1ANOVA[1, 4]), fm1ANOVA[1,
1], fm1ANOVA[2, 1])
fm1ANOVA[2, 5] <- 1 - pf(as.numeric(fm1ANOVA[2, 4]), fm1ANOVA[2,
1], fm1ANOVA[5, 1])
class(fm1ANOVA) <- c("anova", "data.frame")
return(list(anova = fm1ANOVA))
}
<bytecode: 0xc327c28>
<environment: namespace:stability>
和
getAnywhere(stab_reg.default)
function (.data, .y, .rep, .gen, .env)
{
Y <- enquo(.y)
Rep <- enquo(.rep)
G <- enquo(.gen)
E <- enquo(.env)
g <- length(levels(.data$G))
e <- length(levels(.data$E))
r <- length(levels(.data$Rep))
g_means <- .data %>% dplyr::group_by(!!G) %>% dplyr::summarize(Mean = mean(!!Y))
names(g_means) <- c("G", "Mean")
DataNew <- .data %>% dplyr::group_by(!!G, !!E) %>% dplyr::summarize(GEMean = mean(!!Y)) %>%
dplyr::group_by(!!E) %>% dplyr::mutate(EnvMean = mean(GEMean))
IndvReg <- lme4::lmList(GEMean ~ EnvMean | Gen, data = DataNew)
IndvRegFit <- summary(IndvReg)
StabIndvReg <- tibble::as_tibble(data.frame(g_means, Slope = coef(IndvRegFit)[,
, 2][, 1], LCI = confint(IndvReg)[, , 2][, 1], UCI = confint(IndvReg)[,
, 2][, 2], R.Sqr = IndvRegFit$r.squared, RMSE = IndvRegFit$sigma,
SSE = IndvRegFit$sigma^2 * IndvRegFit$df[, 2], Delta = IndvRegFit$sigma^2 *
IndvRegFit$df[, 2]/r))
MeanSlopePlot <- ggplot(data = StabIndvReg, mapping = aes(x = Slope,
y = Mean)) + geom_point() + geom_text(aes(label = G),
size = 2.5, vjust = 1.25, colour = "black") + geom_vline(xintercept = 1,
linetype = "dotdash") + geom_hline(yintercept = mean(StabIndvReg$Mean),
linetype = "dotdash") + labs(x = "Slope", y = "Mean") +
scale_x_continuous(sec.axis = dup_axis(), labels = scales::comma) +
scale_y_continuous(sec.axis = dup_axis(), labels = scales::comma) +
theme_bw()
return(list(StabIndvReg = StabIndvReg, MeanSlopePlot = MeanSlopePlot))
}
<bytecode: 0xe431010>
<environment: namespace:stability>
数据 'df1' 中的一个问题是列名是 'R' 而不是传递给函数的 "Rep" 。其次,传递到公式中的项是 quosures。我们可以用 quo_names
将其更改为字符串,然后用 paste
构造公式
add_anova1 <- function (.data, .y, .rep, .gen, .env) {
y1 <- quo_name(enquo(.y))
r1 <- quo_name(enquo(.rep))
g1 <- quo_name(enquo(.gen))
e1 <- quo_name(enquo(.env))
fm <- formula(paste0(y1, "~", paste(e1, paste(r1, e1, sep=":"),
g1, paste(g1, e1, sep=":"), sep="+")))
fm1 <- lm(terms(fm, keep.order = TRUE), data = .data)
fm1ANOVA <- anova(fm1)
rownames(fm1ANOVA) <- c("Env", "Rep(Env)", "Gen", "Gen:Env",
"Residuals")
fm1ANOVA[1, 4] <- fm1ANOVA[1, 3]/fm1ANOVA[2, 3]
fm1ANOVA[2, 4] <- NA
fm1ANOVA[1, 5] <- 1 - pf(as.numeric(fm1ANOVA[1, 4]), fm1ANOVA[1,
1], fm1ANOVA[2, 1])
fm1ANOVA[2, 5] <- 1 - pf(as.numeric(fm1ANOVA[2, 4]), fm1ANOVA[2,
1], fm1ANOVA[5, 1])
class(fm1ANOVA) <- c("anova", "data.frame")
return(list(anova = fm1ANOVA))
}
YieldANOVA2 <- add_anova1(
.data = df1
, .y = Y
, .rep = R
, .gen = G
, .env = E
)
-检查使用 'ge_data' 生成的输出,而不更改列名
all.equal(YieldANOVA, YieldANOVA2, check.attributes = FALSE)
#[1] TRUE
同样stab_reg
可以改变
我开发了 stability
R 包,可以从 CRAN 安装。
install.packages("stability")
但是,我很难让它将自定义列名作为函数参数。这是 add_anova
函数
library(stability)
data(ge_data)
YieldANOVA <-
add_anova(
.data = ge_data
, .y = Yield
, .rep = Rep
, .gen = Gen
, .env = Env
)
YieldANOVA
以上代码运行良好。但是,当我更改 data.frame 的列名时,它不起作用,如下所示:
df1 <- ge_data
names(df1) <- c("G", "Institute", "R", "Block", "E", "Y")
fm1 <-
add_anova(
.data = df1
, .y = Y
, .rep = Rep
, .gen = G
, .env = E
)
Error in model.frame.default(formula = terms(.data$Y ~ .data$E + .data$Rep:.data$E + :
invalid type (NULL) for variable '.data$Rep'
类似的另一个函数stab_reg
fm1Reg <-
stab_reg(
.data = df1
, .y = Y
, .gen = G
, .env = E
)
Error in eval(predvars, data, env) : object 'Gen' not found
这些函数的代码可以通过
访问getAnywhere(add_anova.default)
function (.data, .y, .rep, .gen, .env)
{
Y <- enquo(.y)
Rep <- enquo(.rep)
G <- enquo(.gen)
E <- enquo(.env)
fm1 <- lm(formula = terms(.data$Y ~ .data$E + .data$Rep:.data$E +
.data$G + .data$G:.data$E, keep.order = TRUE), data = .data)
fm1ANOVA <- anova(fm1)
rownames(fm1ANOVA) <- c("Env", "Rep(Env)", "Gen", "Gen:Env",
"Residuals")
fm1ANOVA[1, 4] <- fm1ANOVA[1, 3]/fm1ANOVA[2, 3]
fm1ANOVA[2, 4] <- NA
fm1ANOVA[1, 5] <- 1 - pf(as.numeric(fm1ANOVA[1, 4]), fm1ANOVA[1,
1], fm1ANOVA[2, 1])
fm1ANOVA[2, 5] <- 1 - pf(as.numeric(fm1ANOVA[2, 4]), fm1ANOVA[2,
1], fm1ANOVA[5, 1])
class(fm1ANOVA) <- c("anova", "data.frame")
return(list(anova = fm1ANOVA))
}
<bytecode: 0xc327c28>
<environment: namespace:stability>
和
getAnywhere(stab_reg.default)
function (.data, .y, .rep, .gen, .env)
{
Y <- enquo(.y)
Rep <- enquo(.rep)
G <- enquo(.gen)
E <- enquo(.env)
g <- length(levels(.data$G))
e <- length(levels(.data$E))
r <- length(levels(.data$Rep))
g_means <- .data %>% dplyr::group_by(!!G) %>% dplyr::summarize(Mean = mean(!!Y))
names(g_means) <- c("G", "Mean")
DataNew <- .data %>% dplyr::group_by(!!G, !!E) %>% dplyr::summarize(GEMean = mean(!!Y)) %>%
dplyr::group_by(!!E) %>% dplyr::mutate(EnvMean = mean(GEMean))
IndvReg <- lme4::lmList(GEMean ~ EnvMean | Gen, data = DataNew)
IndvRegFit <- summary(IndvReg)
StabIndvReg <- tibble::as_tibble(data.frame(g_means, Slope = coef(IndvRegFit)[,
, 2][, 1], LCI = confint(IndvReg)[, , 2][, 1], UCI = confint(IndvReg)[,
, 2][, 2], R.Sqr = IndvRegFit$r.squared, RMSE = IndvRegFit$sigma,
SSE = IndvRegFit$sigma^2 * IndvRegFit$df[, 2], Delta = IndvRegFit$sigma^2 *
IndvRegFit$df[, 2]/r))
MeanSlopePlot <- ggplot(data = StabIndvReg, mapping = aes(x = Slope,
y = Mean)) + geom_point() + geom_text(aes(label = G),
size = 2.5, vjust = 1.25, colour = "black") + geom_vline(xintercept = 1,
linetype = "dotdash") + geom_hline(yintercept = mean(StabIndvReg$Mean),
linetype = "dotdash") + labs(x = "Slope", y = "Mean") +
scale_x_continuous(sec.axis = dup_axis(), labels = scales::comma) +
scale_y_continuous(sec.axis = dup_axis(), labels = scales::comma) +
theme_bw()
return(list(StabIndvReg = StabIndvReg, MeanSlopePlot = MeanSlopePlot))
}
<bytecode: 0xe431010>
<environment: namespace:stability>
数据 'df1' 中的一个问题是列名是 'R' 而不是传递给函数的 "Rep" 。其次,传递到公式中的项是 quosures。我们可以用 quo_names
将其更改为字符串,然后用 paste
add_anova1 <- function (.data, .y, .rep, .gen, .env) {
y1 <- quo_name(enquo(.y))
r1 <- quo_name(enquo(.rep))
g1 <- quo_name(enquo(.gen))
e1 <- quo_name(enquo(.env))
fm <- formula(paste0(y1, "~", paste(e1, paste(r1, e1, sep=":"),
g1, paste(g1, e1, sep=":"), sep="+")))
fm1 <- lm(terms(fm, keep.order = TRUE), data = .data)
fm1ANOVA <- anova(fm1)
rownames(fm1ANOVA) <- c("Env", "Rep(Env)", "Gen", "Gen:Env",
"Residuals")
fm1ANOVA[1, 4] <- fm1ANOVA[1, 3]/fm1ANOVA[2, 3]
fm1ANOVA[2, 4] <- NA
fm1ANOVA[1, 5] <- 1 - pf(as.numeric(fm1ANOVA[1, 4]), fm1ANOVA[1,
1], fm1ANOVA[2, 1])
fm1ANOVA[2, 5] <- 1 - pf(as.numeric(fm1ANOVA[2, 4]), fm1ANOVA[2,
1], fm1ANOVA[5, 1])
class(fm1ANOVA) <- c("anova", "data.frame")
return(list(anova = fm1ANOVA))
}
YieldANOVA2 <- add_anova1(
.data = df1
, .y = Y
, .rep = R
, .gen = G
, .env = E
)
-检查使用 'ge_data' 生成的输出,而不更改列名
all.equal(YieldANOVA, YieldANOVA2, check.attributes = FALSE)
#[1] TRUE
同样stab_reg
可以改变