将线性回归模型应用于成对列
Apply linear regression model to pairwise columns
我有一些数据如下:
BNBBTC NULSBTC NEOBTC LINKBTC IOTABTC ETCBTC
2021-05-15 14:49:00 0.012036 0.00001983 0.001972 0.00089782 0.00003891 0.0021133
2021-05-15 14:50:00 0.012039 0.00001983 0.001968 0.00089699 0.00003889 0.0021148
2021-05-15 14:51:00 0.012032 0.00001975 0.001972 0.00089625 0.00003890 0.0021139
2021-05-15 14:52:00 0.012017 0.00001974 0.001969 0.00089484 0.00003885 0.0021152
2021-05-15 14:53:00 0.012026 0.00001975 0.001967 0.00089538 0.00003885 0.0021140
2021-05-15 14:54:00 0.012012 0.00001973 0.001966 0.00089511 0.00003885 0.0021104
2021-05-15 14:55:00 0.012019 0.00001973 0.001965 0.00089463 0.00003882 0.0021097
2021-05-15 14:56:00 0.012022 0.00001977 0.001966 0.00089541 0.00003879 0.0021131
2021-05-15 14:57:00 0.012022 0.00001977 0.001967 0.00089546 0.00003879 0.0021113
2021-05-15 14:58:00 0.012005 0.00001977 0.001967 0.00089500 0.00003886 0.0021110
2021-05-15 14:59:00 0.012015 0.00001984 0.001965 0.00089540 0.00003887 0.0021107
2021-05-15 15:00:00 0.012016 0.00001984 0.001967 0.00089574 0.00003882 0.0021088
2021-05-15 15:01:00 0.012032 0.00001980 0.001966 0.00089636 0.00003896 0.0021155
2021-05-15 15:02:00 0.012029 0.00001969 0.001966 0.00089600 0.00003894 0.0021191
2021-05-15 15:03:00 0.012014 0.00001974 0.001966 0.00089401 0.00003891 0.0021166
2021-05-15 15:04:00 0.012017 0.00001975 0.001966 0.00089410 0.00003890 0.0021187
2021-05-15 15:05:00 0.012017 0.00001975 0.001968 0.00089554 0.00003889 0.0021109
2021-05-15 15:06:00 0.012021 0.00001980 0.001967 0.00089600 0.00003898 0.0021052
2021-05-15 15:07:00 0.012035 0.00001980 0.001969 0.00089589 0.00003897 0.0020955
2021-05-15 15:08:00 0.012026 0.00001983 0.001970 0.00089409 0.00003881 0.0020878
我正在尝试 运行 跨所有不同列组合的线性回归。例如:
lm(dat$BNBBTC ~ dat$NULSBTC)
lm(dat$BNBBTC ~ dat$NEOBTC)
lm(dat$BNBBTC ~ dat$LINKBTC)
...
我尝试先展开网格并创建公式。
expand.grid(y = colnames(dat), x = colnames(dat), KEEP.OUT.ATTRS = FALSE) %>%
mutate(
formula = paste(y, "~", x, sep = " ")
) %>%
group_by(formula) %>%
mutate(
mod = summary(lm(formula, data = dat)$r.squared)
)
但这会产生错误,因为 mutate
函数确实处理线性模型。我想将线性回归模型存储在嵌套列中,我可以在其中访问系数、p 值并执行一些测试。
dat <- structure(c(0.012036, 0.012039, 0.012032, 0.012017, 0.012026,
0.012012, 0.012019, 0.012022, 0.012022, 0.012005, 0.012015, 0.012016,
0.012032, 0.012029, 0.012014, 0.012017, 0.012017, 0.012021, 0.012035,
0.012026, 0.00001983, 0.00001983, 0.00001975, 0.00001974, 0.00001975,
0.00001973, 0.00001973, 0.00001977, 0.00001977, 0.00001977, 0.00001984,
0.00001984, 0.0000198, 0.00001969, 0.00001974, 0.00001975, 0.00001975,
0.0000198, 0.0000198, 0.00001983, 0.001972, 0.001968, 0.001972,
0.001969, 0.001967, 0.001966, 0.001965, 0.001966, 0.001967, 0.001967,
0.001965, 0.001967, 0.001966, 0.001966, 0.001966, 0.001966, 0.001968,
0.001967, 0.001969, 0.00197, 0.00089782, 0.00089699, 0.00089625,
0.00089484, 0.00089538, 0.00089511, 0.00089463, 0.00089541, 0.00089546,
0.000895, 0.0008954, 0.00089574, 0.00089636, 0.000896, 0.00089401,
0.0008941, 0.00089554, 0.000896, 0.00089589, 0.00089409, 0.00003891,
0.00003889, 0.0000389, 0.00003885, 0.00003885, 0.00003885, 0.00003882,
0.00003879, 0.00003879, 0.00003886, 0.00003887, 0.00003882, 0.00003896,
0.00003894, 0.00003891, 0.0000389, 0.00003889, 0.00003898, 0.00003897,
0.00003881, 0.0021133, 0.0021148, 0.0021139, 0.0021152, 0.002114,
0.0021104, 0.0021097, 0.0021131, 0.0021113, 0.002111, 0.0021107,
0.0021088, 0.0021155, 0.0021191, 0.0021166, 0.0021187, 0.0021109,
0.0021052, 0.0020955, 0.0020878), class = c("xts", "zoo"), index = structure(c(1621082940,
1621083000, 1621083060, 1621083120, 1621083180, 1621083240, 1621083300,
1621083360, 1621083420, 1621083480, 1621083540, 1621083600, 1621083660,
1621083720, 1621083780, 1621083840, 1621083900, 1621083960, 1621084020,
1621084080), tzone = "", tclass = c("POSIXct", "POSIXt")), .Dim = c(20L,
6L), .Dimnames = list(NULL, c("BNBBTC", "NULSBTC", "NEOBTC",
"LINKBTC", "IOTABTC", "ETCBTC")))
编辑:
这可能会得到我想要的
myFormulas <- expand.grid(y = colnames(dat), x = colnames(dat), KEEP.OUT.ATTRS = FALSE) %>%
mutate(
formula = paste(y, "~", x, sep = " ")
) %>%
pull(formula)
map(myFormulas, ~lm(.x, data = dat))
我们可以使用 rowwise
或 map
来遍历公式并创建带有模型摘要输出的 list
列
out <- expand.grid(y = colnames(dat), x = colnames(dat),
KEEP.OUT.ATTRS = FALSE) %>%
mutate(
formula = paste(y, "~", x, sep = " ")
) %>%
rowwise %>%
mutate(mod = list(summary(lm(as.formula(formula), data = dat)))) %>%
ungroup
-输出
out
# A tibble: 36 x 4
y x formula mod
<fct> <fct> <chr> <list>
1 BNBBTC BNBBTC BNBBTC ~ BNBBTC <smmry.lm>
2 NULSBTC BNBBTC NULSBTC ~ BNBBTC <smmry.lm>
3 NEOBTC BNBBTC NEOBTC ~ BNBBTC <smmry.lm>
4 LINKBTC BNBBTC LINKBTC ~ BNBBTC <smmry.lm>
5 IOTABTC BNBBTC IOTABTC ~ BNBBTC <smmry.lm>
6 ETCBTC BNBBTC ETCBTC ~ BNBBTC <smmry.lm>
7 BNBBTC NULSBTC BNBBTC ~ NULSBTC <smmry.lm>
8 NULSBTC NULSBTC NULSBTC ~ NULSBTC <smmry.lm>
9 NEOBTC NULSBTC NEOBTC ~ NULSBTC <smmry.lm>
10 LINKBTC NULSBTC LINKBTC ~ NULSBTC <smmry.lm>
# … with 26 more rows
或使用map2
library(purrr)
library(tidyr)
out <- crossing(y = colnames(dat), x = colnames(dat)) %>%
mutate(mod = map2(x, y,
~ summary(lm(reformulate(.x, response = .y), data = dat))))
'mod' 是一个 list
列,可以 pull
编辑出来
out$mod
或
out %>%
pull(mod)
我有一些数据如下:
BNBBTC NULSBTC NEOBTC LINKBTC IOTABTC ETCBTC
2021-05-15 14:49:00 0.012036 0.00001983 0.001972 0.00089782 0.00003891 0.0021133
2021-05-15 14:50:00 0.012039 0.00001983 0.001968 0.00089699 0.00003889 0.0021148
2021-05-15 14:51:00 0.012032 0.00001975 0.001972 0.00089625 0.00003890 0.0021139
2021-05-15 14:52:00 0.012017 0.00001974 0.001969 0.00089484 0.00003885 0.0021152
2021-05-15 14:53:00 0.012026 0.00001975 0.001967 0.00089538 0.00003885 0.0021140
2021-05-15 14:54:00 0.012012 0.00001973 0.001966 0.00089511 0.00003885 0.0021104
2021-05-15 14:55:00 0.012019 0.00001973 0.001965 0.00089463 0.00003882 0.0021097
2021-05-15 14:56:00 0.012022 0.00001977 0.001966 0.00089541 0.00003879 0.0021131
2021-05-15 14:57:00 0.012022 0.00001977 0.001967 0.00089546 0.00003879 0.0021113
2021-05-15 14:58:00 0.012005 0.00001977 0.001967 0.00089500 0.00003886 0.0021110
2021-05-15 14:59:00 0.012015 0.00001984 0.001965 0.00089540 0.00003887 0.0021107
2021-05-15 15:00:00 0.012016 0.00001984 0.001967 0.00089574 0.00003882 0.0021088
2021-05-15 15:01:00 0.012032 0.00001980 0.001966 0.00089636 0.00003896 0.0021155
2021-05-15 15:02:00 0.012029 0.00001969 0.001966 0.00089600 0.00003894 0.0021191
2021-05-15 15:03:00 0.012014 0.00001974 0.001966 0.00089401 0.00003891 0.0021166
2021-05-15 15:04:00 0.012017 0.00001975 0.001966 0.00089410 0.00003890 0.0021187
2021-05-15 15:05:00 0.012017 0.00001975 0.001968 0.00089554 0.00003889 0.0021109
2021-05-15 15:06:00 0.012021 0.00001980 0.001967 0.00089600 0.00003898 0.0021052
2021-05-15 15:07:00 0.012035 0.00001980 0.001969 0.00089589 0.00003897 0.0020955
2021-05-15 15:08:00 0.012026 0.00001983 0.001970 0.00089409 0.00003881 0.0020878
我正在尝试 运行 跨所有不同列组合的线性回归。例如:
lm(dat$BNBBTC ~ dat$NULSBTC)
lm(dat$BNBBTC ~ dat$NEOBTC)
lm(dat$BNBBTC ~ dat$LINKBTC)
...
我尝试先展开网格并创建公式。
expand.grid(y = colnames(dat), x = colnames(dat), KEEP.OUT.ATTRS = FALSE) %>%
mutate(
formula = paste(y, "~", x, sep = " ")
) %>%
group_by(formula) %>%
mutate(
mod = summary(lm(formula, data = dat)$r.squared)
)
但这会产生错误,因为 mutate
函数确实处理线性模型。我想将线性回归模型存储在嵌套列中,我可以在其中访问系数、p 值并执行一些测试。
dat <- structure(c(0.012036, 0.012039, 0.012032, 0.012017, 0.012026,
0.012012, 0.012019, 0.012022, 0.012022, 0.012005, 0.012015, 0.012016,
0.012032, 0.012029, 0.012014, 0.012017, 0.012017, 0.012021, 0.012035,
0.012026, 0.00001983, 0.00001983, 0.00001975, 0.00001974, 0.00001975,
0.00001973, 0.00001973, 0.00001977, 0.00001977, 0.00001977, 0.00001984,
0.00001984, 0.0000198, 0.00001969, 0.00001974, 0.00001975, 0.00001975,
0.0000198, 0.0000198, 0.00001983, 0.001972, 0.001968, 0.001972,
0.001969, 0.001967, 0.001966, 0.001965, 0.001966, 0.001967, 0.001967,
0.001965, 0.001967, 0.001966, 0.001966, 0.001966, 0.001966, 0.001968,
0.001967, 0.001969, 0.00197, 0.00089782, 0.00089699, 0.00089625,
0.00089484, 0.00089538, 0.00089511, 0.00089463, 0.00089541, 0.00089546,
0.000895, 0.0008954, 0.00089574, 0.00089636, 0.000896, 0.00089401,
0.0008941, 0.00089554, 0.000896, 0.00089589, 0.00089409, 0.00003891,
0.00003889, 0.0000389, 0.00003885, 0.00003885, 0.00003885, 0.00003882,
0.00003879, 0.00003879, 0.00003886, 0.00003887, 0.00003882, 0.00003896,
0.00003894, 0.00003891, 0.0000389, 0.00003889, 0.00003898, 0.00003897,
0.00003881, 0.0021133, 0.0021148, 0.0021139, 0.0021152, 0.002114,
0.0021104, 0.0021097, 0.0021131, 0.0021113, 0.002111, 0.0021107,
0.0021088, 0.0021155, 0.0021191, 0.0021166, 0.0021187, 0.0021109,
0.0021052, 0.0020955, 0.0020878), class = c("xts", "zoo"), index = structure(c(1621082940,
1621083000, 1621083060, 1621083120, 1621083180, 1621083240, 1621083300,
1621083360, 1621083420, 1621083480, 1621083540, 1621083600, 1621083660,
1621083720, 1621083780, 1621083840, 1621083900, 1621083960, 1621084020,
1621084080), tzone = "", tclass = c("POSIXct", "POSIXt")), .Dim = c(20L,
6L), .Dimnames = list(NULL, c("BNBBTC", "NULSBTC", "NEOBTC",
"LINKBTC", "IOTABTC", "ETCBTC")))
编辑:
这可能会得到我想要的
myFormulas <- expand.grid(y = colnames(dat), x = colnames(dat), KEEP.OUT.ATTRS = FALSE) %>%
mutate(
formula = paste(y, "~", x, sep = " ")
) %>%
pull(formula)
map(myFormulas, ~lm(.x, data = dat))
我们可以使用 rowwise
或 map
来遍历公式并创建带有模型摘要输出的 list
列
out <- expand.grid(y = colnames(dat), x = colnames(dat),
KEEP.OUT.ATTRS = FALSE) %>%
mutate(
formula = paste(y, "~", x, sep = " ")
) %>%
rowwise %>%
mutate(mod = list(summary(lm(as.formula(formula), data = dat)))) %>%
ungroup
-输出
out
# A tibble: 36 x 4
y x formula mod
<fct> <fct> <chr> <list>
1 BNBBTC BNBBTC BNBBTC ~ BNBBTC <smmry.lm>
2 NULSBTC BNBBTC NULSBTC ~ BNBBTC <smmry.lm>
3 NEOBTC BNBBTC NEOBTC ~ BNBBTC <smmry.lm>
4 LINKBTC BNBBTC LINKBTC ~ BNBBTC <smmry.lm>
5 IOTABTC BNBBTC IOTABTC ~ BNBBTC <smmry.lm>
6 ETCBTC BNBBTC ETCBTC ~ BNBBTC <smmry.lm>
7 BNBBTC NULSBTC BNBBTC ~ NULSBTC <smmry.lm>
8 NULSBTC NULSBTC NULSBTC ~ NULSBTC <smmry.lm>
9 NEOBTC NULSBTC NEOBTC ~ NULSBTC <smmry.lm>
10 LINKBTC NULSBTC LINKBTC ~ NULSBTC <smmry.lm>
# … with 26 more rows
或使用map2
library(purrr)
library(tidyr)
out <- crossing(y = colnames(dat), x = colnames(dat)) %>%
mutate(mod = map2(x, y,
~ summary(lm(reformulate(.x, response = .y), data = dat))))
'mod' 是一个 list
列,可以 pull
编辑出来
out$mod
或
out %>%
pull(mod)