在 R 中逐行循环逻辑回归
looping logistic regression row wise in R
我正在尝试 运行 针对数据框“dat”的 C1 行从 R1 到 R6 的每一行进行逻辑回归分析
Input data frame
dat <- data.frame(list(X1 = c(1, 1, 0, 1, 1, 1, 0), X2 = c(1, 1, 0, 1, 1, 1, 0), X3 = c(1, 1, 1, 0, 1, 1, 0), X4 = c(1, 1, 0, 1, 1, 1, 0), X5 = c(0, 1, 0, 0, 1, 1, 0), X6 = c(1, 1, 1, 0, 1, 1, 0), X7 = c(0, 1, 0, 1, 1, 1, 0), X8 = c(0, 1, 1, 0, 1, 0, 0), X9 = c(0, 1, 1, 0, 1, 0, 0), X10 = c(1, 1, 1, 0, 1, 0, 0), X11 = c(1, 1, 1, 1, 1, 0, 0), X12 = c(0, 1, 0, 0, 1, 0, 0 ), X13 = c(0, 1, 0, 1, 1, 0, 0), X14 = c(1, 1, 0, 1, 1, 0, 0), X15 = c(1, 1, 1, 1, 1, 0, 0), X16 = c(0, 1, 1, 1, 1, 0, 0), X17 = c(0, 1, 1, 0, 1, 0, 0), X18 = c(0, 1, 1, 0, 1, 0, 0), X19 = c(1, 1, 0, 0, 1, 0, 0), X20 = c(1, 1, 1, 0, 1, 0, 0), X21 = c(0, 1, 0, 1, 1, 0, 0), X22 = c(1, 1, 0, 0, 1, 0, 0), X23 = c(1, 1, 1, 0, 1, 0, 0), X24 = c(1, 1, 0, 0, 1, 0, 0), X25 = c(1, 1, 0, 0, 1, 0, 0), X26 = c(0, 1, 1, 1, 1, 0, 0), X27 = c(0, 1, 0, 0, 1, 0, 0), X28 = c(1, 1, 0, 0, 1, 0, 0), X29 = c(1, 1, 1, 1, 0, 0, 0), X30 = c(1, 1, 0, 0, 0, 1, 0)), row.names = c("r1", "r2", "r3", "r4", "r5", "r6", "C1"))
logistic regression analyses
R1 <- glm(r1 ~ C1, data=dat, family=binomial); coef(summary(R1))[,2]
R2 <- glm(r2 ~ C1, data=dat, family=binomial); coef(summary(R2))[,2]
R3 <- glm(r3 ~ C1, data=dat, family=binomial); coef(summary(R3))[,2]
R4 <- glm(r4 ~ C1, data=dat, family=binomial); coef(summary(R4))[,2]
R5 <- glm(r5 ~ C1, data=dat, family=binomial); coef(summary(R5))[,2]
R6 <- glm(r6 ~ C1, data=dat, family=binomial); coef(summary(R6))[,2]
实际数据有 6000 行,因此无法针对 C1 逐行执行。
有没有办法在循环中针对 C1 从 R1 到 R6 的每一行计算 glm,并将输出提取到新列中?
library(tidyverse)
independent_var <- 'C1'
dependent_vars <- setdiff(rownames(dat),independent_var)
compare <- NULL
for(i in dependent_vars){
modelname <- toupper(i)
filtered_data <- dat %>%
t %>%
data.frame %>%
select(all_of(c(i,'C1')))
eval(parse(text=sprintf('%s <- glm(%s ~ %s,data=filtered_data,family=binomial)',modelname,i,independent_var)))
eval(parse(text=sprintf('newrow <- data.frame(model="%s",coef=as.numeric(%s$coefficients[-2]))',modelname,modelname)))
compare <- rbind(compare,newrow)
}
compare
输出;
model coef
<chr> <dbl>
1 R1 0.405
2 R2 25.6
3 R3 -0.134
4 R4 -0.405
5 R5 2.64
6 R6 -1.01
我想这就是您要找的。
library(tidyverse)
dat %>%
rownames_to_column("var") %>%
pivot_longer(cols = -var) %>%
mutate(grp = ifelse(grepl("r", var), "x", "y")) %>%
group_split(grp) %>%
reduce(full_join, by = "name") %>%
nest(data = -var.x) %>%
mutate(reg = map_dbl(data, ~glm(value.x ~ value.y, data=.x, family=binomial) %>%
summary() %>%
coef() %>%
.[,2])) %>%
select(-data)
#> # A tibble: 6 x 2
#> var.x reg
#> <chr> <dbl>
#> 1 r1 0.373
#> 2 r2 39436.
#> 3 r3 0.366
#> 4 r4 0.373
#> 5 r5 0.732
#> 6 r6 0.413
首先我们使用 rownames_to_column
将您的行名称变成一个变量名称,然后我们使用 pivot_longer
将数据帧从宽到长。然后我们根据您的“x”变量和“y”变量拆分数据框以进行逻辑回归。然后我将拆分数据框与其自身连接起来,以获得 x 和 y 的每个组合。最后,我 nest
数据框和 运行 每个“行”的所有回归。
如果有任何不清楚的地方,请告诉我。
可以对任意数量的行执行此操作。但是,首先要注意一点。在您的数据中,行 C1
包含所有值 0
。这当然不适合 glm
函数。所以我不得不稍微编辑一下 C1
行。
library(tidyverse)
dat <- data.frame(list(X1 = c(1, 1, 0, 1, 1, 1, 1), X2 = c(1, 1, 0, 1, 1, 1, 1), X3 = c(1, 1, 1, 0, 1, 1, 1), X4 = c(1, 1, 0, 1, 1, 1, 0), X5 = c(0, 1, 0, 0, 1, 1, 0), X6 = c(1, 1, 1, 0, 1, 1, 0), X7 = c(0, 1, 0, 1, 1, 1, 0), X8 = c(0, 1, 1, 0, 1, 0, 0), X9 = c(0, 1, 1, 0, 1, 0, 0), X10 = c(1, 1, 1, 0, 1, 0, 0), X11 = c(1, 1, 1, 1, 1, 0, 0), X12 = c(0, 1, 0, 0, 1, 0, 0 ), X13 = c(0, 1, 0, 1, 1, 0, 1), X14 = c(1, 1, 0, 1, 1, 0, 1), X15 = c(1, 1, 1, 1, 1, 0, 0), X16 = c(0, 1, 1, 1, 1, 0, 0), X17 = c(0, 1, 1, 0, 1, 0, 0), X18 = c(0, 1, 1, 0, 1, 0, 0), X19 = c(1, 1, 0, 0, 1, 0, 0), X20 = c(1, 1, 1, 0, 1, 0, 0), X21 = c(0, 1, 0, 1, 1, 0, 0), X22 = c(1, 1, 0, 0, 1, 0, 0), X23 = c(1, 1, 1, 0, 1, 0, 0), X24 = c(1, 1, 0, 0, 1, 0, 0), X25 = c(1, 1, 0, 0, 1, 0, 0), X26 = c(0, 1, 1, 1, 1, 0, 0), X27 = c(0, 1, 0, 0, 1, 0, 0), X28 = c(1, 1, 0, 0, 1, 0, 0), X29 = c(1, 1, 1, 1, 0, 0, 0), X30 = c(1, 1, 0, 0, 0, 1, 0)), row.names = c("r1", "r2", "r3", "r4", "r5", "r6", "C1"))
输出
X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30
r1 1 1 1 1 0 1 0 0 0 1 1 0 0 1 1 0 0 0 1 1 0 1 1 1 1 0 0 1 1 1
r2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
r3 0 0 1 0 0 1 0 1 1 1 1 0 0 0 1 1 1 1 0 1 0 0 1 0 0 1 0 0 1 0
r4 1 1 0 1 0 0 1 0 0 0 1 0 1 1 1 1 0 0 0 0 1 0 0 0 0 1 0 0 1 0
r5 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0
r6 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
C1 1 1 1 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
然后我将你的数据分成 r
行和 C1
行。
dfR = dat %>% as_tibble() %>% slice_head(n=nrow(.)-1)
dfC = dat %>% as_tibble() %>% slice_tail()
输出dfR
# A tibble: 6 x 30
X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 1 1 1 0 1 0 0 0 1 1 0 0 1 1 0 0 0 1 1 0
2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
3 0 0 1 0 0 1 0 1 1 1 1 0 0 0 1 1 1 1 0 1 0
4 1 1 0 1 0 0 1 0 0 0 1 0 1 1 1 1 0 0 0 0 1
5 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
6 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
# ... with 9 more variables: X22 <dbl>, X23 <dbl>, X24 <dbl>, X25 <dbl>, X26 <dbl>, X27 <dbl>, X28 <dbl>, X29 <dbl>, X30 <dbl>
输出dfC
# A tibble: 1 x 30
X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 1 1 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0
# ... with 9 more variables: X22 <dbl>, X23 <dbl>, X24 <dbl>, X25 <dbl>, X26 <dbl>, X27 <dbl>, X28 <dbl>, X29 <dbl>, X30 <dbl>
接下来我准备了四个简单的函数。 fGetRowValue
returns 具有一行值的向量。 fGetData
使用两个变量 x
和 C
创建一个 tibble
,其中 x
是单行的值,C
当然是值来自您的 C1
行。 fglm
returns 我的模型和 fCoef
returns tibble
包含模型的系数。
fGetRowValue = function(data) data %>%
pivot_longer(everything()) %>% pull(value)
fGetData = function(dataR, dataC) tibble(
x = fGetRowValue(dataR),
C = fGetRowValue(dataC)
)
fglm = function(data) glm(x~C, binomial, data)
fCoef = function(model) tibble(
Intercept = coef(model)[1],
C = coef(model)[2]
)
现在是正确计算的时候了。
dfR %>% mutate(row = 1:nrow(.)) %>%
group_by(row) %>%
nest() %>% #1
group_modify(~fGetData(.x$data[[1]], dfC)) %>% #2
nest() %>% #3
mutate(model = map(data, ~fglm(.x))) %>% #4
mutate(Ceof = map(model, ~fCoef(.x))) %>% #5
unnest(Ceof)
输出
# A tibble: 6 x 5
# Groups: row [6]
row data model Intercept C
<int> <list> <list> <dbl> <dbl>
1 1 <tibble [30 x 2]> <glm> 0.241 1.15e+0
2 2 <tibble [30 x 2]> <glm> 25.6 -1.98e-9
3 3 <tibble [30 x 2]> <glm> 0.0800 -1.47e+0
4 4 <tibble [30 x 2]> <glm> -0.754 2.14e+0
5 5 <tibble [30 x 2]> <glm> 2.44 1.71e+1
6 6 <tibble [30 x 2]> <glm> -1.39 1.79e+0
由于我的计算方式可能有点混乱,让我逐年描述一下(见评论编号)。
在第一步之后,我们收到了这种形式的数据
# A tibble: 6 x 2
# Groups: row [6]
row data
<int> <list>
1 1 <tibble [1 x 30]>
2 2 <tibble [1 x 30]>
3 3 <tibble [1 x 30]>
4 4 <tibble [1 x 30]>
5 5 <tibble [1 x 30]>
6 6 <tibble [1 x 30]>
变量数据包含一个 tibble
列表,其中一行包含值。
第二步之后,我们就这样了
# A tibble: 180 x 3
# Groups: row [6]
row x C
<int> <dbl> <dbl>
1 1 1 1
2 1 1 1
3 1 1 1
4 1 1 0
5 1 0 0
6 1 1 0
7 1 0 0
8 1 0 0
9 1 0 0
10 1 1 0
# ... with 170 more rows
我们在第三步回滚它
# A tibble: 6 x 2
# Groups: row [6]
row data
<int> <list>
1 1 <tibble [30 x 2]>
2 2 <tibble [30 x 2]>
3 3 <tibble [30 x 2]>
4 4 <tibble [30 x 2]>
5 5 <tibble [30 x 2]>
6 6 <tibble [30 x 2]>
然后在第四步中,我们使用包含 glm
个模型列表的变量完成我们的 tibble
。
# A tibble: 6 x 3
# Groups: row [6]
row data model
<int> <list> <list>
1 1 <tibble [30 x 2]> <glm>
2 2 <tibble [30 x 2]> <glm>
3 3 <tibble [30 x 2]> <glm>
4 4 <tibble [30 x 2]> <glm>
5 5 <tibble [30 x 2]> <glm>
6 6 <tibble [30 x 2]> <glm>
我们已经完成了最后一步,即从我们的模型中下载系数。
# A tibble: 6 x 4
# Groups: row [6]
row data model Ceof
<int> <list> <list> <list>
1 1 <tibble [30 x 2]> <glm> <tibble [1 x 2]>
2 2 <tibble [30 x 2]> <glm> <tibble [1 x 2]>
3 3 <tibble [30 x 2]> <glm> <tibble [1 x 2]>
4 4 <tibble [30 x 2]> <glm> <tibble [1 x 2]>
5 5 <tibble [30 x 2]> <glm> <tibble [1 x 2]>
6 6 <tibble [30 x 2]> <glm> <tibble [1 x 2]>
现在您需要做的就是unnest
,我们有您想要的结果。
# A tibble: 6 x 5
# Groups: row [6]
row data model Intercept C
<int> <list> <list> <dbl> <dbl>
1 1 <tibble [30 x 2]> <glm> 0.241 1.15e+0
2 2 <tibble [30 x 2]> <glm> 25.6 -1.98e-9
3 3 <tibble [30 x 2]> <glm> 0.0800 -1.47e+0
4 4 <tibble [30 x 2]> <glm> -0.754 2.14e+0
5 5 <tibble [30 x 2]> <glm> 2.44 1.71e+1
6 6 <tibble [30 x 2]> <glm> -1.39 1.79e+0
请注意,顺便说一句,所有用于构建它们的模型和数据都已保留。您可以自由使用它进行进一步的计算。
我正在尝试 运行 针对数据框“dat”的 C1 行从 R1 到 R6 的每一行进行逻辑回归分析
Input data frame
dat <- data.frame(list(X1 = c(1, 1, 0, 1, 1, 1, 0), X2 = c(1, 1, 0, 1, 1, 1, 0), X3 = c(1, 1, 1, 0, 1, 1, 0), X4 = c(1, 1, 0, 1, 1, 1, 0), X5 = c(0, 1, 0, 0, 1, 1, 0), X6 = c(1, 1, 1, 0, 1, 1, 0), X7 = c(0, 1, 0, 1, 1, 1, 0), X8 = c(0, 1, 1, 0, 1, 0, 0), X9 = c(0, 1, 1, 0, 1, 0, 0), X10 = c(1, 1, 1, 0, 1, 0, 0), X11 = c(1, 1, 1, 1, 1, 0, 0), X12 = c(0, 1, 0, 0, 1, 0, 0 ), X13 = c(0, 1, 0, 1, 1, 0, 0), X14 = c(1, 1, 0, 1, 1, 0, 0), X15 = c(1, 1, 1, 1, 1, 0, 0), X16 = c(0, 1, 1, 1, 1, 0, 0), X17 = c(0, 1, 1, 0, 1, 0, 0), X18 = c(0, 1, 1, 0, 1, 0, 0), X19 = c(1, 1, 0, 0, 1, 0, 0), X20 = c(1, 1, 1, 0, 1, 0, 0), X21 = c(0, 1, 0, 1, 1, 0, 0), X22 = c(1, 1, 0, 0, 1, 0, 0), X23 = c(1, 1, 1, 0, 1, 0, 0), X24 = c(1, 1, 0, 0, 1, 0, 0), X25 = c(1, 1, 0, 0, 1, 0, 0), X26 = c(0, 1, 1, 1, 1, 0, 0), X27 = c(0, 1, 0, 0, 1, 0, 0), X28 = c(1, 1, 0, 0, 1, 0, 0), X29 = c(1, 1, 1, 1, 0, 0, 0), X30 = c(1, 1, 0, 0, 0, 1, 0)), row.names = c("r1", "r2", "r3", "r4", "r5", "r6", "C1"))
logistic regression analyses
R1 <- glm(r1 ~ C1, data=dat, family=binomial); coef(summary(R1))[,2]
R2 <- glm(r2 ~ C1, data=dat, family=binomial); coef(summary(R2))[,2]
R3 <- glm(r3 ~ C1, data=dat, family=binomial); coef(summary(R3))[,2]
R4 <- glm(r4 ~ C1, data=dat, family=binomial); coef(summary(R4))[,2]
R5 <- glm(r5 ~ C1, data=dat, family=binomial); coef(summary(R5))[,2]
R6 <- glm(r6 ~ C1, data=dat, family=binomial); coef(summary(R6))[,2]
实际数据有 6000 行,因此无法针对 C1 逐行执行。
有没有办法在循环中针对 C1 从 R1 到 R6 的每一行计算 glm,并将输出提取到新列中?
library(tidyverse)
independent_var <- 'C1'
dependent_vars <- setdiff(rownames(dat),independent_var)
compare <- NULL
for(i in dependent_vars){
modelname <- toupper(i)
filtered_data <- dat %>%
t %>%
data.frame %>%
select(all_of(c(i,'C1')))
eval(parse(text=sprintf('%s <- glm(%s ~ %s,data=filtered_data,family=binomial)',modelname,i,independent_var)))
eval(parse(text=sprintf('newrow <- data.frame(model="%s",coef=as.numeric(%s$coefficients[-2]))',modelname,modelname)))
compare <- rbind(compare,newrow)
}
compare
输出;
model coef
<chr> <dbl>
1 R1 0.405
2 R2 25.6
3 R3 -0.134
4 R4 -0.405
5 R5 2.64
6 R6 -1.01
我想这就是您要找的。
library(tidyverse)
dat %>%
rownames_to_column("var") %>%
pivot_longer(cols = -var) %>%
mutate(grp = ifelse(grepl("r", var), "x", "y")) %>%
group_split(grp) %>%
reduce(full_join, by = "name") %>%
nest(data = -var.x) %>%
mutate(reg = map_dbl(data, ~glm(value.x ~ value.y, data=.x, family=binomial) %>%
summary() %>%
coef() %>%
.[,2])) %>%
select(-data)
#> # A tibble: 6 x 2
#> var.x reg
#> <chr> <dbl>
#> 1 r1 0.373
#> 2 r2 39436.
#> 3 r3 0.366
#> 4 r4 0.373
#> 5 r5 0.732
#> 6 r6 0.413
首先我们使用 rownames_to_column
将您的行名称变成一个变量名称,然后我们使用 pivot_longer
将数据帧从宽到长。然后我们根据您的“x”变量和“y”变量拆分数据框以进行逻辑回归。然后我将拆分数据框与其自身连接起来,以获得 x 和 y 的每个组合。最后,我 nest
数据框和 运行 每个“行”的所有回归。
如果有任何不清楚的地方,请告诉我。
可以对任意数量的行执行此操作。但是,首先要注意一点。在您的数据中,行 C1
包含所有值 0
。这当然不适合 glm
函数。所以我不得不稍微编辑一下 C1
行。
library(tidyverse)
dat <- data.frame(list(X1 = c(1, 1, 0, 1, 1, 1, 1), X2 = c(1, 1, 0, 1, 1, 1, 1), X3 = c(1, 1, 1, 0, 1, 1, 1), X4 = c(1, 1, 0, 1, 1, 1, 0), X5 = c(0, 1, 0, 0, 1, 1, 0), X6 = c(1, 1, 1, 0, 1, 1, 0), X7 = c(0, 1, 0, 1, 1, 1, 0), X8 = c(0, 1, 1, 0, 1, 0, 0), X9 = c(0, 1, 1, 0, 1, 0, 0), X10 = c(1, 1, 1, 0, 1, 0, 0), X11 = c(1, 1, 1, 1, 1, 0, 0), X12 = c(0, 1, 0, 0, 1, 0, 0 ), X13 = c(0, 1, 0, 1, 1, 0, 1), X14 = c(1, 1, 0, 1, 1, 0, 1), X15 = c(1, 1, 1, 1, 1, 0, 0), X16 = c(0, 1, 1, 1, 1, 0, 0), X17 = c(0, 1, 1, 0, 1, 0, 0), X18 = c(0, 1, 1, 0, 1, 0, 0), X19 = c(1, 1, 0, 0, 1, 0, 0), X20 = c(1, 1, 1, 0, 1, 0, 0), X21 = c(0, 1, 0, 1, 1, 0, 0), X22 = c(1, 1, 0, 0, 1, 0, 0), X23 = c(1, 1, 1, 0, 1, 0, 0), X24 = c(1, 1, 0, 0, 1, 0, 0), X25 = c(1, 1, 0, 0, 1, 0, 0), X26 = c(0, 1, 1, 1, 1, 0, 0), X27 = c(0, 1, 0, 0, 1, 0, 0), X28 = c(1, 1, 0, 0, 1, 0, 0), X29 = c(1, 1, 1, 1, 0, 0, 0), X30 = c(1, 1, 0, 0, 0, 1, 0)), row.names = c("r1", "r2", "r3", "r4", "r5", "r6", "C1"))
输出
X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30
r1 1 1 1 1 0 1 0 0 0 1 1 0 0 1 1 0 0 0 1 1 0 1 1 1 1 0 0 1 1 1
r2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
r3 0 0 1 0 0 1 0 1 1 1 1 0 0 0 1 1 1 1 0 1 0 0 1 0 0 1 0 0 1 0
r4 1 1 0 1 0 0 1 0 0 0 1 0 1 1 1 1 0 0 0 0 1 0 0 0 0 1 0 0 1 0
r5 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0
r6 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
C1 1 1 1 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
然后我将你的数据分成 r
行和 C1
行。
dfR = dat %>% as_tibble() %>% slice_head(n=nrow(.)-1)
dfC = dat %>% as_tibble() %>% slice_tail()
输出dfR
# A tibble: 6 x 30
X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 1 1 1 0 1 0 0 0 1 1 0 0 1 1 0 0 0 1 1 0
2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
3 0 0 1 0 0 1 0 1 1 1 1 0 0 0 1 1 1 1 0 1 0
4 1 1 0 1 0 0 1 0 0 0 1 0 1 1 1 1 0 0 0 0 1
5 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
6 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
# ... with 9 more variables: X22 <dbl>, X23 <dbl>, X24 <dbl>, X25 <dbl>, X26 <dbl>, X27 <dbl>, X28 <dbl>, X29 <dbl>, X30 <dbl>
输出dfC
# A tibble: 1 x 30
X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 1 1 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0
# ... with 9 more variables: X22 <dbl>, X23 <dbl>, X24 <dbl>, X25 <dbl>, X26 <dbl>, X27 <dbl>, X28 <dbl>, X29 <dbl>, X30 <dbl>
接下来我准备了四个简单的函数。 fGetRowValue
returns 具有一行值的向量。 fGetData
使用两个变量 x
和 C
创建一个 tibble
,其中 x
是单行的值,C
当然是值来自您的 C1
行。 fglm
returns 我的模型和 fCoef
returns tibble
包含模型的系数。
fGetRowValue = function(data) data %>%
pivot_longer(everything()) %>% pull(value)
fGetData = function(dataR, dataC) tibble(
x = fGetRowValue(dataR),
C = fGetRowValue(dataC)
)
fglm = function(data) glm(x~C, binomial, data)
fCoef = function(model) tibble(
Intercept = coef(model)[1],
C = coef(model)[2]
)
现在是正确计算的时候了。
dfR %>% mutate(row = 1:nrow(.)) %>%
group_by(row) %>%
nest() %>% #1
group_modify(~fGetData(.x$data[[1]], dfC)) %>% #2
nest() %>% #3
mutate(model = map(data, ~fglm(.x))) %>% #4
mutate(Ceof = map(model, ~fCoef(.x))) %>% #5
unnest(Ceof)
输出
# A tibble: 6 x 5
# Groups: row [6]
row data model Intercept C
<int> <list> <list> <dbl> <dbl>
1 1 <tibble [30 x 2]> <glm> 0.241 1.15e+0
2 2 <tibble [30 x 2]> <glm> 25.6 -1.98e-9
3 3 <tibble [30 x 2]> <glm> 0.0800 -1.47e+0
4 4 <tibble [30 x 2]> <glm> -0.754 2.14e+0
5 5 <tibble [30 x 2]> <glm> 2.44 1.71e+1
6 6 <tibble [30 x 2]> <glm> -1.39 1.79e+0
由于我的计算方式可能有点混乱,让我逐年描述一下(见评论编号)。
在第一步之后,我们收到了这种形式的数据
# A tibble: 6 x 2
# Groups: row [6]
row data
<int> <list>
1 1 <tibble [1 x 30]>
2 2 <tibble [1 x 30]>
3 3 <tibble [1 x 30]>
4 4 <tibble [1 x 30]>
5 5 <tibble [1 x 30]>
6 6 <tibble [1 x 30]>
变量数据包含一个 tibble
列表,其中一行包含值。
第二步之后,我们就这样了
# A tibble: 180 x 3
# Groups: row [6]
row x C
<int> <dbl> <dbl>
1 1 1 1
2 1 1 1
3 1 1 1
4 1 1 0
5 1 0 0
6 1 1 0
7 1 0 0
8 1 0 0
9 1 0 0
10 1 1 0
# ... with 170 more rows
我们在第三步回滚它
# A tibble: 6 x 2
# Groups: row [6]
row data
<int> <list>
1 1 <tibble [30 x 2]>
2 2 <tibble [30 x 2]>
3 3 <tibble [30 x 2]>
4 4 <tibble [30 x 2]>
5 5 <tibble [30 x 2]>
6 6 <tibble [30 x 2]>
然后在第四步中,我们使用包含 glm
个模型列表的变量完成我们的 tibble
。
# A tibble: 6 x 3
# Groups: row [6]
row data model
<int> <list> <list>
1 1 <tibble [30 x 2]> <glm>
2 2 <tibble [30 x 2]> <glm>
3 3 <tibble [30 x 2]> <glm>
4 4 <tibble [30 x 2]> <glm>
5 5 <tibble [30 x 2]> <glm>
6 6 <tibble [30 x 2]> <glm>
我们已经完成了最后一步,即从我们的模型中下载系数。
# A tibble: 6 x 4
# Groups: row [6]
row data model Ceof
<int> <list> <list> <list>
1 1 <tibble [30 x 2]> <glm> <tibble [1 x 2]>
2 2 <tibble [30 x 2]> <glm> <tibble [1 x 2]>
3 3 <tibble [30 x 2]> <glm> <tibble [1 x 2]>
4 4 <tibble [30 x 2]> <glm> <tibble [1 x 2]>
5 5 <tibble [30 x 2]> <glm> <tibble [1 x 2]>
6 6 <tibble [30 x 2]> <glm> <tibble [1 x 2]>
现在您需要做的就是unnest
,我们有您想要的结果。
# A tibble: 6 x 5
# Groups: row [6]
row data model Intercept C
<int> <list> <list> <dbl> <dbl>
1 1 <tibble [30 x 2]> <glm> 0.241 1.15e+0
2 2 <tibble [30 x 2]> <glm> 25.6 -1.98e-9
3 3 <tibble [30 x 2]> <glm> 0.0800 -1.47e+0
4 4 <tibble [30 x 2]> <glm> -0.754 2.14e+0
5 5 <tibble [30 x 2]> <glm> 2.44 1.71e+1
6 6 <tibble [30 x 2]> <glm> -1.39 1.79e+0
请注意,顺便说一句,所有用于构建它们的模型和数据都已保留。您可以自由使用它进行进一步的计算。