如何对多个模型使用 broom::tidy?
How to use broom::tidy with multiple models?
我正在尝试使用 broom 来总结 19 个多项式回归模型的结果。我已关注此 并尝试将其与 broom::tidy
一起使用。我的脚本如下:
ALTER PROCEDURE [dbo].[spRegressionPeak]
@StudyID int
AS
BEGIN
Declare @sStudyID VARCHAR(50)
Set @sStudyID = CONVERT(VARCHAR(50),@StudyID)
--We are selecting the distinct StudyID, Productnumber, ResponseID and mean
values 1 thorugh 6 from the CodeMeans table.
--Note that spCodeMeans must be run before running this stored procedure to
ensure response data exists in the CodeMeans table.
--We use IsNull values to pass zeroes where an average wasn't calculated os that
the polynomial regression can be calculated.
DECLARE @inquery AS NVARCHAR(MAX) = '
Select
c.StudyID, c.RespID, c.LikingOrder, avg(isnull(C1,0)) as C1, avg(isnull(C2,0)) as C2, avg(isnull(C3,0)) as C3, avg(isnull(C4,0)) as C4,
avg(isnull(C5,0)) as C5, avg(isnull(C6,0)) as C6, avg(isnull(C7,0)) as C7, avg(isnull(C8,0)) as C8, avg(isnull(C9,0)) as C9,
avg(isnull(C10,0)) as C10, avg(isnull(C11,0)) as C11, avg(isnull(C12,0)) as C12, avg(isnull(C13,0)) as C13, avg(isnull(C14,0)) as C14,
avg(isnull(C15,0)) as C15, avg(isnull(C16,0)) as C16, avg(isnull(C17,0)) as C17, avg(isnull(C18,0)) as C18, avg(isnull(C19,0)) as C19
from ClosedStudyResponses c
where c.StudyID = @StudyID
group by StudyID, RespID, LikingOrder
order by RespID
'
--We are setting @inquery aka InputDataSet to be our initial dataset.
--R Services requires that a data.frame be passed to any calculations being
generated. As such, df is simply data framing the @inquery data.
--The res object holds the polynomial regression results by RespondentID and
LikingOrder for each of the averages in the @inquery resultset.
EXEC sp_execute_external_script @language = N'R'
, @script = N'
library(tidyr, broom)
studymeans <- InputDataSet
df <- data.frame(studymeans)
lin.mod.1 <- lm(df$LikingOrder ~ poly(df$C1,3, raw=TRUE))
lin.mod.2 <- lm(df$LikingOrder ~ poly(df$C2,3, raw=TRUE))
lin.mod.3 <- lm(df$LikingOrder ~ poly(df$C3,3, raw=TRUE))
lin.mod.4 <- lm(df$LikingOrder ~ poly(df$C4,3, raw=TRUE))
lin.mod.5 <- lm(df$LikingOrder ~ poly(df$C5,3, raw=TRUE))
lin.mod.6 <- lm(df$LikingOrder ~ poly(df$C6,3, raw=TRUE))
lin.mod.7 <- lm(df$LikingOrder ~ poly(df$C7,3, raw=TRUE))
lin.mod.8 <- lm(df$LikingOrder ~ poly(df$C8,3, raw=TRUE))
lin.mod.9 <- lm(df$LikingOrder ~ poly(df$C9,3, raw=TRUE))
lin.mod.10 <- lm(df$LikingOrder ~ poly(df$C10,3, raw=TRUE))
lin.mod.11 <- lm(df$LikingOrder ~ poly(df$C11,3, raw=TRUE))
lin.mod.12 <- lm(df$LikingOrder ~ poly(df$C12,3, raw=TRUE))
lin.mod.13 <- lm(df$LikingOrder ~ poly(df$C13,3, raw=TRUE))
lin.mod.14 <- lm(df$LikingOrder ~ poly(df$C14,3, raw=TRUE))
lin.mod.15 <- lm(df$LikingOrder ~ poly(df$C15,3, raw=TRUE))
lin.mod.16 <- lm(df$LikingOrder ~ poly(df$C16,3, raw=TRUE))
lin.mod.17 <- lm(df$LikingOrder ~ poly(df$C17,3, raw=TRUE))
lin.mod.18 <- lm(df$LikingOrder ~ poly(df$C18,3, raw=TRUE))
lin.mod.19 <- lm(df$LikingOrder ~ poly(df$C19,3, raw=TRUE))
lst <- lapply(ls(pattern="lin.mod"), get)
allmodels <- lapply(lst, summary)
res <- broom::tidy(allmodels)
'
, @input_data_1 = @inquery
, @output_data_1_name = N'res'
, @params = N'@StudyID int'
,@StudyID = @StudyID
--- Edit this line to handle the output data frame.
--WITH RESULT SETS ((StudyID int, RespID int, LikingOrder int, NewColumn int,
res varchar(max)));
END;
上面的脚本在向其传递有效的 StudyID 输入参数时抛出以下错误:
Error in setNames(data.frame(data), value.name) :
'names' attribute [1] must be the same length as the vector [0]
Calls: source ... <Anonymous> -> <Anonymous> -> melt.default -> setNames
In addition: There were 50 or more warnings (use warnings() to see the first
50)
我的输入数据如下:
期望的结果是在 data.frame 中获得所有 19 个模型的摘要。我该如何解决错误并修改我的代码以实现最终结果?
你没有给我们一个可重现的例子;这似乎有用。一些潜在的问题:您需要 运行 tidy
模型,而不是摘要;最好避免在模型公式中使用 $
-索引。
library(purrr)
df <- mtcars
predvars <- colnames(mtcars)[-1]
...在您的情况下是 paste0("C",1:19)
...
respvar <- "mpg" ## would be "LikingOrder"
predpolys <- sprintf("poly(%s,3,raw=TRUE)",predvars)
forms <- map(predpolys, reformulate,
response=respvar) ## construct formulas
names(forms) <- predvars ## names will be inherited by model lists
modList <- map(forms, lm, data= df) ## fit all models
res <- map(modList, broom::tidy) ## tidy all models
如果需要,您可以在此时 dplyr::bind_rows(res,.id="predvar")
,或者您可以将 map()
替换为 map_dfr(..., .id = "predvar")
...
如果没有您的工作环境,我不确定您的数据是如何设置的,但您似乎正试图在多个预测变量列上拟合具有相同因变量的模型。我认为缺少的部分是根据 broom and dplyr 小插图对 rowwise
的调用,但不完全确定。不过,这是一个使用 mtcars
数据集的工作示例。请注意,该结构是在具有包含模型的列表列的行数据框上使用 tidy
,而不是直接在列表上使用。您还可以通过映射包含预测变量列的数据框来直接创建模型,而不是在环境中混乱地存储模型并需要使用 get
和 ls
。每当您发现自己在使用 ls
时,请考虑是否可以将您的元素放入列表中!
编辑:再次查看这个问题提示的小插图后,我意识到您实际上可以像现在显示的那样做一个快速管道(请参阅使用 [=17= 的方法的编辑历史记录)。通过 gather
将数据转换成适合分组模型拟合的格式,可以更整齐地得到想要的结果!
library(tidyverse)
library(broom)
mtcars %>%
gather(predictor, measure, -mpg) %>%
group_by(predictor) %>%
do(tidy(lm(mpg ~ measure, .)))
#> # A tibble: 20 x 6
#> # Groups: predictor [10]
#> predictor term estimate std.error statistic p.value
#> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
#> 1 am (Intercept) 17.1 1.12 15.2 1.13e-15
#> 2 am measure 7.24 1.76 4.11 2.85e- 4
#> 3 carb (Intercept) 25.9 1.84 14.1 9.22e-15
#> 4 carb measure -2.06 0.569 -3.62 1.08e- 3
#> 5 cyl (Intercept) 37.9 2.07 18.3 8.37e-18
#> 6 cyl measure -2.88 0.322 -8.92 6.11e-10
#> 7 disp (Intercept) 29.6 1.23 24.1 3.58e-21
#> 8 disp measure -0.0412 0.00471 -8.75 9.38e-10
#> 9 drat (Intercept) -7.52 5.48 -1.37 1.80e- 1
#> 10 drat measure 7.68 1.51 5.10 1.78e- 5
#> 11 gear (Intercept) 5.62 4.92 1.14 2.62e- 1
#> 12 gear measure 3.92 1.31 3.00 5.40e- 3
#> 13 hp (Intercept) 30.1 1.63 18.4 6.64e-18
#> 14 hp measure -0.0682 0.0101 -6.74 1.79e- 7
#> 15 qsec (Intercept) -5.11 10.0 -0.510 6.14e- 1
#> 16 qsec measure 1.41 0.559 2.53 1.71e- 2
#> 17 vs (Intercept) 16.6 1.08 15.4 8.85e-16
#> 18 vs measure 7.94 1.63 4.86 3.42e- 5
#> 19 wt (Intercept) 37.3 1.88 19.9 8.24e-19
#> 20 wt measure -5.34 0.559 -9.56 1.29e-10
由 reprex package (v0.2.0) 创建于 2018-07-10。
我正在尝试使用 broom 来总结 19 个多项式回归模型的结果。我已关注此 broom::tidy
一起使用。我的脚本如下:
ALTER PROCEDURE [dbo].[spRegressionPeak]
@StudyID int
AS
BEGIN
Declare @sStudyID VARCHAR(50)
Set @sStudyID = CONVERT(VARCHAR(50),@StudyID)
--We are selecting the distinct StudyID, Productnumber, ResponseID and mean
values 1 thorugh 6 from the CodeMeans table.
--Note that spCodeMeans must be run before running this stored procedure to
ensure response data exists in the CodeMeans table.
--We use IsNull values to pass zeroes where an average wasn't calculated os that
the polynomial regression can be calculated.
DECLARE @inquery AS NVARCHAR(MAX) = '
Select
c.StudyID, c.RespID, c.LikingOrder, avg(isnull(C1,0)) as C1, avg(isnull(C2,0)) as C2, avg(isnull(C3,0)) as C3, avg(isnull(C4,0)) as C4,
avg(isnull(C5,0)) as C5, avg(isnull(C6,0)) as C6, avg(isnull(C7,0)) as C7, avg(isnull(C8,0)) as C8, avg(isnull(C9,0)) as C9,
avg(isnull(C10,0)) as C10, avg(isnull(C11,0)) as C11, avg(isnull(C12,0)) as C12, avg(isnull(C13,0)) as C13, avg(isnull(C14,0)) as C14,
avg(isnull(C15,0)) as C15, avg(isnull(C16,0)) as C16, avg(isnull(C17,0)) as C17, avg(isnull(C18,0)) as C18, avg(isnull(C19,0)) as C19
from ClosedStudyResponses c
where c.StudyID = @StudyID
group by StudyID, RespID, LikingOrder
order by RespID
'
--We are setting @inquery aka InputDataSet to be our initial dataset.
--R Services requires that a data.frame be passed to any calculations being
generated. As such, df is simply data framing the @inquery data.
--The res object holds the polynomial regression results by RespondentID and
LikingOrder for each of the averages in the @inquery resultset.
EXEC sp_execute_external_script @language = N'R'
, @script = N'
library(tidyr, broom)
studymeans <- InputDataSet
df <- data.frame(studymeans)
lin.mod.1 <- lm(df$LikingOrder ~ poly(df$C1,3, raw=TRUE))
lin.mod.2 <- lm(df$LikingOrder ~ poly(df$C2,3, raw=TRUE))
lin.mod.3 <- lm(df$LikingOrder ~ poly(df$C3,3, raw=TRUE))
lin.mod.4 <- lm(df$LikingOrder ~ poly(df$C4,3, raw=TRUE))
lin.mod.5 <- lm(df$LikingOrder ~ poly(df$C5,3, raw=TRUE))
lin.mod.6 <- lm(df$LikingOrder ~ poly(df$C6,3, raw=TRUE))
lin.mod.7 <- lm(df$LikingOrder ~ poly(df$C7,3, raw=TRUE))
lin.mod.8 <- lm(df$LikingOrder ~ poly(df$C8,3, raw=TRUE))
lin.mod.9 <- lm(df$LikingOrder ~ poly(df$C9,3, raw=TRUE))
lin.mod.10 <- lm(df$LikingOrder ~ poly(df$C10,3, raw=TRUE))
lin.mod.11 <- lm(df$LikingOrder ~ poly(df$C11,3, raw=TRUE))
lin.mod.12 <- lm(df$LikingOrder ~ poly(df$C12,3, raw=TRUE))
lin.mod.13 <- lm(df$LikingOrder ~ poly(df$C13,3, raw=TRUE))
lin.mod.14 <- lm(df$LikingOrder ~ poly(df$C14,3, raw=TRUE))
lin.mod.15 <- lm(df$LikingOrder ~ poly(df$C15,3, raw=TRUE))
lin.mod.16 <- lm(df$LikingOrder ~ poly(df$C16,3, raw=TRUE))
lin.mod.17 <- lm(df$LikingOrder ~ poly(df$C17,3, raw=TRUE))
lin.mod.18 <- lm(df$LikingOrder ~ poly(df$C18,3, raw=TRUE))
lin.mod.19 <- lm(df$LikingOrder ~ poly(df$C19,3, raw=TRUE))
lst <- lapply(ls(pattern="lin.mod"), get)
allmodels <- lapply(lst, summary)
res <- broom::tidy(allmodels)
'
, @input_data_1 = @inquery
, @output_data_1_name = N'res'
, @params = N'@StudyID int'
,@StudyID = @StudyID
--- Edit this line to handle the output data frame.
--WITH RESULT SETS ((StudyID int, RespID int, LikingOrder int, NewColumn int,
res varchar(max)));
END;
上面的脚本在向其传递有效的 StudyID 输入参数时抛出以下错误:
Error in setNames(data.frame(data), value.name) :
'names' attribute [1] must be the same length as the vector [0]
Calls: source ... <Anonymous> -> <Anonymous> -> melt.default -> setNames
In addition: There were 50 or more warnings (use warnings() to see the first
50)
我的输入数据如下:
你没有给我们一个可重现的例子;这似乎有用。一些潜在的问题:您需要 运行 tidy
模型,而不是摘要;最好避免在模型公式中使用 $
-索引。
library(purrr)
df <- mtcars
predvars <- colnames(mtcars)[-1]
...在您的情况下是 paste0("C",1:19)
...
respvar <- "mpg" ## would be "LikingOrder"
predpolys <- sprintf("poly(%s,3,raw=TRUE)",predvars)
forms <- map(predpolys, reformulate,
response=respvar) ## construct formulas
names(forms) <- predvars ## names will be inherited by model lists
modList <- map(forms, lm, data= df) ## fit all models
res <- map(modList, broom::tidy) ## tidy all models
如果需要,您可以在此时 dplyr::bind_rows(res,.id="predvar")
,或者您可以将 map()
替换为 map_dfr(..., .id = "predvar")
...
如果没有您的工作环境,我不确定您的数据是如何设置的,但您似乎正试图在多个预测变量列上拟合具有相同因变量的模型。我认为缺少的部分是根据 broom and dplyr 小插图对 rowwise
的调用,但不完全确定。不过,这是一个使用 mtcars
数据集的工作示例。请注意,该结构是在具有包含模型的列表列的行数据框上使用 tidy
,而不是直接在列表上使用。您还可以通过映射包含预测变量列的数据框来直接创建模型,而不是在环境中混乱地存储模型并需要使用 get
和 ls
。每当您发现自己在使用 ls
时,请考虑是否可以将您的元素放入列表中!
编辑:再次查看这个问题提示的小插图后,我意识到您实际上可以像现在显示的那样做一个快速管道(请参阅使用 [=17= 的方法的编辑历史记录)。通过 gather
将数据转换成适合分组模型拟合的格式,可以更整齐地得到想要的结果!
library(tidyverse)
library(broom)
mtcars %>%
gather(predictor, measure, -mpg) %>%
group_by(predictor) %>%
do(tidy(lm(mpg ~ measure, .)))
#> # A tibble: 20 x 6
#> # Groups: predictor [10]
#> predictor term estimate std.error statistic p.value
#> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
#> 1 am (Intercept) 17.1 1.12 15.2 1.13e-15
#> 2 am measure 7.24 1.76 4.11 2.85e- 4
#> 3 carb (Intercept) 25.9 1.84 14.1 9.22e-15
#> 4 carb measure -2.06 0.569 -3.62 1.08e- 3
#> 5 cyl (Intercept) 37.9 2.07 18.3 8.37e-18
#> 6 cyl measure -2.88 0.322 -8.92 6.11e-10
#> 7 disp (Intercept) 29.6 1.23 24.1 3.58e-21
#> 8 disp measure -0.0412 0.00471 -8.75 9.38e-10
#> 9 drat (Intercept) -7.52 5.48 -1.37 1.80e- 1
#> 10 drat measure 7.68 1.51 5.10 1.78e- 5
#> 11 gear (Intercept) 5.62 4.92 1.14 2.62e- 1
#> 12 gear measure 3.92 1.31 3.00 5.40e- 3
#> 13 hp (Intercept) 30.1 1.63 18.4 6.64e-18
#> 14 hp measure -0.0682 0.0101 -6.74 1.79e- 7
#> 15 qsec (Intercept) -5.11 10.0 -0.510 6.14e- 1
#> 16 qsec measure 1.41 0.559 2.53 1.71e- 2
#> 17 vs (Intercept) 16.6 1.08 15.4 8.85e-16
#> 18 vs measure 7.94 1.63 4.86 3.42e- 5
#> 19 wt (Intercept) 37.3 1.88 19.9 8.24e-19
#> 20 wt measure -5.34 0.559 -9.56 1.29e-10
由 reprex package (v0.2.0) 创建于 2018-07-10。