在 R 中的多个列上执行 lm() 和 segmented()
Performing lm() and segmented() on multiple columns in R
我正在尝试在 R 中使用相同的自变量 (x) 和多个相关响应变量(Curve1、Curve2 等)一个一个地执行 lm() 和 segmented()。我希望为每个响应变量提取估计的断点和模型系数。我在下面包含了我的数据示例。
x Curve1 Curve2 Curve3
1 -0.236422 98.8169 95.6828 101.7910
2 -0.198083 98.3260 95.4185 101.5170
3 -0.121406 97.3442 94.8899 100.9690
4 0.875399 84.5815 88.0176 93.8424
5 0.913738 84.1139 87.7533 93.5683
6 1.795530 73.3582 78.1278 82.9956
7 1.833870 72.8905 77.7093 82.7039
8 1.872200 72.4229 77.3505 82.4123
9 2.907350 59.2070 67.6652 74.5374
10 3.865810 46.4807 58.5158 65.0220
11 3.904150 45.9716 58.1498 64.7121
12 3.942490 45.4626 57.8099 64.4022
13 4.939300 33.3040 48.9742 56.3451
14 4.977640 32.9641 48.6344 56.0352
15 5.936100 24.4682 36.4758 47.0485
16 5.936100 24.4682 36.4758 47.0485
17 6.012780 23.7885 35.9667 46.5002
18 6.971250 20.7387 29.6035 39.6476
19 7.009580 20.6167 29.3490 39.3930
20 8.006390 18.7209 22.7313 32.7753
21 8.121410 18.5022 22.3914 32.1292
22 9.041530 16.4722 19.6728 26.9604
23 9.079870 16.3877 19.5595 26.7450
我可以使用以下代码一次完成一条曲线。但是,我的完整数据集有超过 1000 条曲线,因此我希望能够以某种方式在每一列上重复此代码。我一直没有成功地尝试在每一列上循环它,所以如果有人能告诉我如何做这样的事情并创建一个类似于下面代码生成的摘要数据框,但包含每一列,我会万分感激。谢谢!
model <- lm(Curve1~x, dat) # Linear model
seg_model <- segmented(model, seg.Z = ~x) # Segmented model
breakpoint <- as.matrix(seg_model$psi.history[[5]]) # Extract breakpoint
coefficients <- as.matrix(seg_model$coefficients) # Extract coefficients
summary_curve1 <- as.data.frame(rbind(breakpoint, coefficients)) # combine breakpoint and coefficeints
colnames(summary_curve1) <- "Curve_1" # header name
summary_curve1 # display summary
您可以将整个事情包装在一个函数中,将列名和数据作为参数,并在列名上使用 lapply
,如下所示:
library(segmented)
run_mod <- function(varname, data){
data$Y <- data[,varname]
model <- lm(Y ~ x, data) # Linear model
seg_model <- segmented(model, seg.Z = ~x) # Segmented model
breakpoint <- as.matrix(seg_model$psi.history[[5]]) # Extract breakpoint
coefficients <- as.matrix(seg_model$coefficients) # Extract coefficients
summary_curve1 <- as.data.frame(rbind(breakpoint, coefficients))
colnames(summary_curve1) <- varname
return(summary_curve1)
}
lapply(names(dat)[2:ncol(dat)], function(x)run_mod(x, dat))
其中给出了每条拟合曲线的摘要(不确定您真正想要的输出)。
这是一种使用 tidyverse
和 broom
到 return 包含每个 Curve
列的结果的数据框的方法:
library(broom)
library(tidyverse)
model.results = setNames(names(dat[,-1]), names(dat[,-1])) %>%
map(~ lm(paste0(.x, " ~ x"), data=dat) %>%
segmented(seg.Z=~x) %>%
list(model=tidy(.),
psi=data.frame(term="breakpoint", estimate=.[["psi.history"]][[5]]))) %>%
map_df(~.[2:3] %>% bind_rows, .id="Curve")
model.results
Curve term estimate std.error statistic p.value
1 Curve1 (Intercept) 95.866127 0.14972382 640.286416 1.212599e-42
2 Curve1 x -12.691455 0.05220412 -243.112130 1.184191e-34
3 Curve1 U1.x 10.185816 0.11080880 91.922447 1.233602e-26
4 Curve1 psi1.x 0.000000 0.02821843 0.000000 1.000000e+00
5 Curve1 breakpoint 5.595706 NA NA NA
6 Curve2 (Intercept) 94.826309 0.45750667 207.267599 2.450058e-33
7 Curve2 x -9.489342 0.11156425 -85.057193 5.372730e-26
8 Curve2 U1.x 6.532312 1.17332640 5.567344 2.275438e-05
9 Curve2 psi1.x 0.000000 0.23845241 0.000000 1.000000e+00
10 Curve2 breakpoint 7.412087 NA NA NA
11 Curve3 (Intercept) 100.027990 0.29453941 339.608175 2.069087e-37
12 Curve3 x -8.931163 0.08154534 -109.523900 4.447569e-28
13 Curve3 U1.x 2.807215 0.36046013 7.787865 2.492325e-07
14 Curve3 psi1.x 0.000000 0.26319757 0.000000 1.000000e+00
15 Curve3 breakpoint 6.362132 NA NA NA
我遇到了同样的问题,正在尝试调整建议的答案,但结果如下:
Error in model.frame.default(formula = Y ~ Prof, data = data, drop.unused.levels = TRUE) :
invalid type (list) for variable 'Y'
我运行这个代码:
run_mod <- function(varname, data){
data$Y <- data[,varname]
model <- lm(Y ~ Prof, data) # Linear model
seg_model <- segmented(model, seg.Z = ~ Prof) # Segmented model
breakpoint <- as.matrix(seg_model$psi.history[[5]]) # Extract breakpoint
coefficients <- as.matrix(seg_model$coefficients) # Extract coefficients
summary_curve1 <- as.data.frame(rbind(breakpoint, coefficients))
colnames(summary_curve1) <- varname
return(summary_curve1)
}
lapply(names(DATApiv)[3:ncol(DATApiv)], function(Prof)run_mod(Prof, DATApiv))
注意:Prof = 是我的 DF 中的列,对应于自变量作为本示例的 x 列)。 DataPiv 是我的数据库。
我正在尝试在 R 中使用相同的自变量 (x) 和多个相关响应变量(Curve1、Curve2 等)一个一个地执行 lm() 和 segmented()。我希望为每个响应变量提取估计的断点和模型系数。我在下面包含了我的数据示例。
x Curve1 Curve2 Curve3
1 -0.236422 98.8169 95.6828 101.7910
2 -0.198083 98.3260 95.4185 101.5170
3 -0.121406 97.3442 94.8899 100.9690
4 0.875399 84.5815 88.0176 93.8424
5 0.913738 84.1139 87.7533 93.5683
6 1.795530 73.3582 78.1278 82.9956
7 1.833870 72.8905 77.7093 82.7039
8 1.872200 72.4229 77.3505 82.4123
9 2.907350 59.2070 67.6652 74.5374
10 3.865810 46.4807 58.5158 65.0220
11 3.904150 45.9716 58.1498 64.7121
12 3.942490 45.4626 57.8099 64.4022
13 4.939300 33.3040 48.9742 56.3451
14 4.977640 32.9641 48.6344 56.0352
15 5.936100 24.4682 36.4758 47.0485
16 5.936100 24.4682 36.4758 47.0485
17 6.012780 23.7885 35.9667 46.5002
18 6.971250 20.7387 29.6035 39.6476
19 7.009580 20.6167 29.3490 39.3930
20 8.006390 18.7209 22.7313 32.7753
21 8.121410 18.5022 22.3914 32.1292
22 9.041530 16.4722 19.6728 26.9604
23 9.079870 16.3877 19.5595 26.7450
我可以使用以下代码一次完成一条曲线。但是,我的完整数据集有超过 1000 条曲线,因此我希望能够以某种方式在每一列上重复此代码。我一直没有成功地尝试在每一列上循环它,所以如果有人能告诉我如何做这样的事情并创建一个类似于下面代码生成的摘要数据框,但包含每一列,我会万分感激。谢谢!
model <- lm(Curve1~x, dat) # Linear model
seg_model <- segmented(model, seg.Z = ~x) # Segmented model
breakpoint <- as.matrix(seg_model$psi.history[[5]]) # Extract breakpoint
coefficients <- as.matrix(seg_model$coefficients) # Extract coefficients
summary_curve1 <- as.data.frame(rbind(breakpoint, coefficients)) # combine breakpoint and coefficeints
colnames(summary_curve1) <- "Curve_1" # header name
summary_curve1 # display summary
您可以将整个事情包装在一个函数中,将列名和数据作为参数,并在列名上使用 lapply
,如下所示:
library(segmented)
run_mod <- function(varname, data){
data$Y <- data[,varname]
model <- lm(Y ~ x, data) # Linear model
seg_model <- segmented(model, seg.Z = ~x) # Segmented model
breakpoint <- as.matrix(seg_model$psi.history[[5]]) # Extract breakpoint
coefficients <- as.matrix(seg_model$coefficients) # Extract coefficients
summary_curve1 <- as.data.frame(rbind(breakpoint, coefficients))
colnames(summary_curve1) <- varname
return(summary_curve1)
}
lapply(names(dat)[2:ncol(dat)], function(x)run_mod(x, dat))
其中给出了每条拟合曲线的摘要(不确定您真正想要的输出)。
这是一种使用 tidyverse
和 broom
到 return 包含每个 Curve
列的结果的数据框的方法:
library(broom)
library(tidyverse)
model.results = setNames(names(dat[,-1]), names(dat[,-1])) %>%
map(~ lm(paste0(.x, " ~ x"), data=dat) %>%
segmented(seg.Z=~x) %>%
list(model=tidy(.),
psi=data.frame(term="breakpoint", estimate=.[["psi.history"]][[5]]))) %>%
map_df(~.[2:3] %>% bind_rows, .id="Curve")
model.results
Curve term estimate std.error statistic p.value 1 Curve1 (Intercept) 95.866127 0.14972382 640.286416 1.212599e-42 2 Curve1 x -12.691455 0.05220412 -243.112130 1.184191e-34 3 Curve1 U1.x 10.185816 0.11080880 91.922447 1.233602e-26 4 Curve1 psi1.x 0.000000 0.02821843 0.000000 1.000000e+00 5 Curve1 breakpoint 5.595706 NA NA NA 6 Curve2 (Intercept) 94.826309 0.45750667 207.267599 2.450058e-33 7 Curve2 x -9.489342 0.11156425 -85.057193 5.372730e-26 8 Curve2 U1.x 6.532312 1.17332640 5.567344 2.275438e-05 9 Curve2 psi1.x 0.000000 0.23845241 0.000000 1.000000e+00 10 Curve2 breakpoint 7.412087 NA NA NA 11 Curve3 (Intercept) 100.027990 0.29453941 339.608175 2.069087e-37 12 Curve3 x -8.931163 0.08154534 -109.523900 4.447569e-28 13 Curve3 U1.x 2.807215 0.36046013 7.787865 2.492325e-07 14 Curve3 psi1.x 0.000000 0.26319757 0.000000 1.000000e+00 15 Curve3 breakpoint 6.362132 NA NA NA
我遇到了同样的问题,正在尝试调整建议的答案,但结果如下:
Error in model.frame.default(formula = Y ~ Prof, data = data, drop.unused.levels = TRUE) :
invalid type (list) for variable 'Y'
我运行这个代码:
run_mod <- function(varname, data){
data$Y <- data[,varname]
model <- lm(Y ~ Prof, data) # Linear model
seg_model <- segmented(model, seg.Z = ~ Prof) # Segmented model
breakpoint <- as.matrix(seg_model$psi.history[[5]]) # Extract breakpoint
coefficients <- as.matrix(seg_model$coefficients) # Extract coefficients
summary_curve1 <- as.data.frame(rbind(breakpoint, coefficients))
colnames(summary_curve1) <- varname
return(summary_curve1)
}
lapply(names(DATApiv)[3:ncol(DATApiv)], function(Prof)run_mod(Prof, DATApiv))
注意:Prof = 是我的 DF 中的列,对应于自变量作为本示例的 x 列)。 DataPiv 是我的数据库。