将数据帧列表传递给 lm() 并查看结果
Passing list of data frames into lm() and viewing the results
我得到了三个数据帧,dfLON、dfMOS 和 dfATA。每个都具有相同的变量:y 是连续变量,a、b 和 c 是二元分类变量,还有一些 NA
.
我想建立单独的线性回归模型,每个数据集一个。
使用我当前的代码,我已经设法制作了一个数据帧列表并将其传递给 lm()。但是有没有比fitdfLON <- DfList[[1]]
更简洁的查看结果的方式呢?我在此示例中提供了三个数据框,但实际上我有 ~25 个,所以我必须输入 25 次!
如有任何帮助,我们将不胜感激。
起点(dfs):
dfLON <- data.frame(y=c(1.23,2.32,3.21,2.43),a=c(1,NA,1,2),b=c(1,1,2,2),c=c(2,1,2,1))
dfMOS <- data.frame(y=c(4.56,6.54,4.43,5.78),a=c(2,1,2,1),b=c(2,1,1,2),c=c(1,2,1,2))
dfATA <- data.frame(y=c(1.22,6.54,3.23,4.23),a=c(2,2,2,1),b=c(1,2,1,2),c=c(1,NA,1,2))
当前代码:
Mylm <- function(df){
fit <- lm(y ~ a + b + c, data=df)
return(fit)
}
DfList <- lapply(list(dfLON, dfMOS, dfATA), Mylm)
fitdfLON <- DfList[[1]]
fitdfMOS <- DfList[[2]]
fitdfATA <- DfList[[3]]
如果 data.frame 的名称有共同的模式,您可以使用 mget
和 ls
的组合来提取它们, 运行 lm
使用 lapply
fit = lapply(mget(ls(pattern = "^df[A-Z]{3}")), function(x) lm(y ~ a + b + c, data = x))
fit$dfATA
#Call:
#lm(formula = y ~ a + b + c, data = x)
#Coefficients:
#(Intercept) a b c
# 6.235 -2.005 NA NA
如果你只想要所有的系数,你可以这样做
do.call(rbind,
lapply(X = mget(ls(pattern = "^df[A-Z]{3}")),
FUN = function(x) lm(formula = y ~ a + b + c, data = x)[[1]]))
# (Intercept) a b c
#dfATA 6.2350 -2.005 NA NA
#dfLON 0.0300 -0.780 1.980 NA
#dfMOS 8.2975 -1.665 -0.315 NA
而不是 ls(pattern = "df[A-Z]{3}")
,您也可以只提供一个向量,其中包含所有 data.frame
的名称
每当您在许多不同的数据集上建立 运行 模型时,使用 broom 库整理它们是有意义的。这会为每个模型生成一个干净的数据框,然后您可以将其输出或用于下游分析。
最简单的例子:
library(broom)
Mylm <- function(df){
fit <- lm(y ~ a + b + c, data=df)
tidy(fit) # tidy the fit object
}
list(dfLON, dfMOS, dfATA) %>% lapply(Mylm)
#[[1]]
# term estimate std.error statistic p.value
#1 (Intercept) 0.03 NaN NaN NaN
#2 a -0.78 NaN NaN NaN
#3 b 1.98 NaN NaN NaN
#
#[[2]]
# term estimate std.error statistic p.value
#1 (Intercept) 8.2975 0.969855 8.5554025 0.07407531
#2 a -1.6650 0.445000 -3.7415730 0.16626155
#3 b -0.3150 0.445000 -0.7078652 0.60785169
#
#[[3]]
# term estimate std.error statistic p.value
#1 (Intercept) 6.235 3.015000 2.067993 0.2867398
#2 a -2.005 1.740711 -1.151828 0.4551559
现在您可以将它与 purrr 的 map_dfr()
函数结合起来,将所有内容组合成一个组合数据框:
library(purrr)
# note the named list entries; these will go into the "model" column
# without them, you'd just get a model number
list("LON" = dfLON, "MOS" = dfMOS, "ATA" = dfATA) %>%
map_dfr(Mylm, .id = "model")
# model term estimate std.error statistic p.value
#1 LON (Intercept) 0.0300 NaN NaN NaN
#2 LON a -0.7800 NaN NaN NaN
#3 LON b 1.9800 NaN NaN NaN
#4 MOS (Intercept) 8.2975 0.969855 8.5554025 0.07407531
#5 MOS a -1.6650 0.445000 -3.7415730 0.16626155
#6 MOS b -0.3150 0.445000 -0.7078652 0.60785169
#7 ATA (Intercept) 6.2350 3.015000 2.0679934 0.28673976
#8 ATA a -2.0050 1.740711 -1.1518281 0.45515586
为了使事情更紧凑,您可以在内部动态定义函数 map_dfr
。当您所做的只是拟合线性模型时似乎很合适。
list("LON" = dfLON, "MOS" = dfMOS, "ATA" = dfATA) %>%
map_dfr(~ tidy(lm(y ~ a + b + c, data = .)),
.id = "model")
# model term estimate std.error statistic p.value
#1 LON (Intercept) 0.0300 NaN NaN NaN
#2 LON a -0.7800 NaN NaN NaN
#3 LON b 1.9800 NaN NaN NaN
#4 MOS (Intercept) 8.2975 0.969855 8.5554025 0.07407531
#5 MOS a -1.6650 0.445000 -3.7415730 0.16626155
#6 MOS b -0.3150 0.445000 -0.7078652 0.60785169
#7 ATA (Intercept) 6.2350 3.015000 2.0679934 0.28673976
#8 ATA a -2.0050 1.740711 -1.1518281 0.45515586
#make a list of all the dataframes
df = list(dfATA = dfATA, dfLON =dfLON, dfMOS = dfMOS)
#fitting the model
lmr = lapply(df, function(x){
lmr = lm(x$y ~ x$a + x$b+ x$c, x)
})
#Get coefficients for each model
coefficients = lapply(lmr, function(x) x[["coefficients"]])
coefficients = unlist(coefficients)
我得到了三个数据帧,dfLON、dfMOS 和 dfATA。每个都具有相同的变量:y 是连续变量,a、b 和 c 是二元分类变量,还有一些 NA
.
我想建立单独的线性回归模型,每个数据集一个。
使用我当前的代码,我已经设法制作了一个数据帧列表并将其传递给 lm()。但是有没有比fitdfLON <- DfList[[1]]
更简洁的查看结果的方式呢?我在此示例中提供了三个数据框,但实际上我有 ~25 个,所以我必须输入 25 次!
如有任何帮助,我们将不胜感激。
起点(dfs):
dfLON <- data.frame(y=c(1.23,2.32,3.21,2.43),a=c(1,NA,1,2),b=c(1,1,2,2),c=c(2,1,2,1))
dfMOS <- data.frame(y=c(4.56,6.54,4.43,5.78),a=c(2,1,2,1),b=c(2,1,1,2),c=c(1,2,1,2))
dfATA <- data.frame(y=c(1.22,6.54,3.23,4.23),a=c(2,2,2,1),b=c(1,2,1,2),c=c(1,NA,1,2))
当前代码:
Mylm <- function(df){
fit <- lm(y ~ a + b + c, data=df)
return(fit)
}
DfList <- lapply(list(dfLON, dfMOS, dfATA), Mylm)
fitdfLON <- DfList[[1]]
fitdfMOS <- DfList[[2]]
fitdfATA <- DfList[[3]]
如果 data.frame 的名称有共同的模式,您可以使用 mget
和 ls
的组合来提取它们, 运行 lm
使用 lapply
fit = lapply(mget(ls(pattern = "^df[A-Z]{3}")), function(x) lm(y ~ a + b + c, data = x))
fit$dfATA
#Call:
#lm(formula = y ~ a + b + c, data = x)
#Coefficients:
#(Intercept) a b c
# 6.235 -2.005 NA NA
如果你只想要所有的系数,你可以这样做
do.call(rbind,
lapply(X = mget(ls(pattern = "^df[A-Z]{3}")),
FUN = function(x) lm(formula = y ~ a + b + c, data = x)[[1]]))
# (Intercept) a b c
#dfATA 6.2350 -2.005 NA NA
#dfLON 0.0300 -0.780 1.980 NA
#dfMOS 8.2975 -1.665 -0.315 NA
而不是 ls(pattern = "df[A-Z]{3}")
,您也可以只提供一个向量,其中包含所有 data.frame
每当您在许多不同的数据集上建立 运行 模型时,使用 broom 库整理它们是有意义的。这会为每个模型生成一个干净的数据框,然后您可以将其输出或用于下游分析。
最简单的例子:
library(broom)
Mylm <- function(df){
fit <- lm(y ~ a + b + c, data=df)
tidy(fit) # tidy the fit object
}
list(dfLON, dfMOS, dfATA) %>% lapply(Mylm)
#[[1]]
# term estimate std.error statistic p.value
#1 (Intercept) 0.03 NaN NaN NaN
#2 a -0.78 NaN NaN NaN
#3 b 1.98 NaN NaN NaN
#
#[[2]]
# term estimate std.error statistic p.value
#1 (Intercept) 8.2975 0.969855 8.5554025 0.07407531
#2 a -1.6650 0.445000 -3.7415730 0.16626155
#3 b -0.3150 0.445000 -0.7078652 0.60785169
#
#[[3]]
# term estimate std.error statistic p.value
#1 (Intercept) 6.235 3.015000 2.067993 0.2867398
#2 a -2.005 1.740711 -1.151828 0.4551559
现在您可以将它与 purrr 的 map_dfr()
函数结合起来,将所有内容组合成一个组合数据框:
library(purrr)
# note the named list entries; these will go into the "model" column
# without them, you'd just get a model number
list("LON" = dfLON, "MOS" = dfMOS, "ATA" = dfATA) %>%
map_dfr(Mylm, .id = "model")
# model term estimate std.error statistic p.value
#1 LON (Intercept) 0.0300 NaN NaN NaN
#2 LON a -0.7800 NaN NaN NaN
#3 LON b 1.9800 NaN NaN NaN
#4 MOS (Intercept) 8.2975 0.969855 8.5554025 0.07407531
#5 MOS a -1.6650 0.445000 -3.7415730 0.16626155
#6 MOS b -0.3150 0.445000 -0.7078652 0.60785169
#7 ATA (Intercept) 6.2350 3.015000 2.0679934 0.28673976
#8 ATA a -2.0050 1.740711 -1.1518281 0.45515586
为了使事情更紧凑,您可以在内部动态定义函数 map_dfr
。当您所做的只是拟合线性模型时似乎很合适。
list("LON" = dfLON, "MOS" = dfMOS, "ATA" = dfATA) %>%
map_dfr(~ tidy(lm(y ~ a + b + c, data = .)),
.id = "model")
# model term estimate std.error statistic p.value
#1 LON (Intercept) 0.0300 NaN NaN NaN
#2 LON a -0.7800 NaN NaN NaN
#3 LON b 1.9800 NaN NaN NaN
#4 MOS (Intercept) 8.2975 0.969855 8.5554025 0.07407531
#5 MOS a -1.6650 0.445000 -3.7415730 0.16626155
#6 MOS b -0.3150 0.445000 -0.7078652 0.60785169
#7 ATA (Intercept) 6.2350 3.015000 2.0679934 0.28673976
#8 ATA a -2.0050 1.740711 -1.1518281 0.45515586
#make a list of all the dataframes
df = list(dfATA = dfATA, dfLON =dfLON, dfMOS = dfMOS)
#fitting the model
lmr = lapply(df, function(x){
lmr = lm(x$y ~ x$a + x$b+ x$c, x)
})
#Get coefficients for each model
coefficients = lapply(lmr, function(x) x[["coefficients"]])
coefficients = unlist(coefficients)