如何计算曲线上不同间隔的回归线斜率并应用于多列
How to calculate slope of regression line at different intervals on a curve and apply to multiple columns
我想计算曲线上不同间隔的回归线的斜率,然后将其应用于数据框 a 中的多个列以生成新的数据框 Curve_9.20。下面是一个高度子集化的数据框和我用于 运行 函数 Curve 的代码,但它产生了显示的错误。
a <- structure(list(RunTime = c(7.043, 7.093, 7.143, 7.193, 7.243,
7.293, 7.343, 7.393, 7.443, 7.493, 7.543, 7.593, 7.643, 7.693,
7.743, 7.793, 7.843, 7.893, 7.943, 7.993, 8.043, 8.093, 8.142,
8.193, 8.243, 8.293, 8.343, 8.393, 8.443, 8.493, 8.543, 8.593,
8.643, 8.693, 8.743, 8.793, 8.843, 8.893, 8.943, 8.993, 9.043
), O2_229 = c(216.272, 216.006, 216.164, 215.976, 215.937, 216.012,
216.224, 216.354, 216.584, 216.477, 216.347, 216.538, 216.315,
216.385, 216.297, 216.363, 216.353, 216.301, 216.379, 216.594,
216.192, 215.955, 216.134, 216.155, 216.254, 216.316, 216.313,
216.413, 216.477, 216.225, 216.347, 216.16, 215.976, 216.093,
216.224, 216.179, 216.239, 216.349, 216.085, 216.089, 216.213
), O2_35 = c(183.962, 184.032, 184.143, 184.134, 183.979, 184.028,
184.176, 184.055, 184.119, 184.137, 184.094, 184.114, 184.308,
184.097, 184.2, 184.287, 184.048, 184.263, 184.263, 184.346,
184.322, 184.256, 184.462, 184.155, 184.246, 184.328, 184.232,
184.418, 184.379, 184.331, 184.367, 184.298, 184.351, 184.375,
184.333, 184.351, 184.33, 184.247, 184.388, 184.404, 184.392)), class =
"data.frame", row.names = 142:182)
Curve <- function(y)
{
c(coef(lm(y ~ RunTime, subset(a, RunTime >= 7.043 & RunTime <=
8.043)))[2], # 10%
coef(lm(y ~ RunTime, subset(a, RunTime >= 9.000 & RunTime <=
9.043)))[2]) # 20%
}
Curve_9.20 <- data.frame("LightIntensity" = c("10%", "20%"),
lapply(a[c(2,3)], Curve))
Error in model.frame.default(formula = y ~ RunTime, data =
subset(a, : variable lengths differ (found for 'RunTime')
您正在为 lm
提供来自原始数据集的 y
变量和来自子集的 x
变量,因此存在不同长度的误差。您可以通过为 lm
提供列名来解决它,因此将从子集中调用所有变量。
由于两种情况下公式的右侧相同,因此最简单的解决方法是将整个公式作为单个字符串传递,并且在每次 lapply
迭代中只有 y
会发生变化。
a <- structure(list(RunTime = c(7.043, 7.093, 7.143, 7.193, 7.243,
7.293, 7.343, 7.393, 7.443, 7.493, 7.543, 7.593, 7.643, 7.693,
7.743, 7.793, 7.843, 7.893, 7.943, 7.993, 8.043, 8.093, 8.142,
8.193, 8.243, 8.293, 8.343, 8.393, 8.443, 8.493, 8.543, 8.593,
8.643, 8.693, 8.743, 8.793, 8.843, 8.893, 8.943, 8.993, 9.043
), O2_229 = c(216.272, 216.006, 216.164, 215.976, 215.937, 216.012,
216.224, 216.354, 216.584, 216.477, 216.347, 216.538, 216.315,
216.385, 216.297, 216.363, 216.353, 216.301, 216.379, 216.594,
216.192, 215.955, 216.134, 216.155, 216.254, 216.316, 216.313,
216.413, 216.477, 216.225, 216.347, 216.16, 215.976, 216.093,
216.224, 216.179, 216.239, 216.349, 216.085, 216.089, 216.213
), O2_35 = c(183.962, 184.032, 184.143, 184.134, 183.979, 184.028,
184.176, 184.055, 184.119, 184.137, 184.094, 184.114, 184.308,
184.097, 184.2, 184.287, 184.048, 184.263, 184.263, 184.346,
184.322, 184.256, 184.462, 184.155, 184.246, 184.328, 184.232,
184.418, 184.379, 184.331, 184.367, 184.298, 184.351, 184.375,
184.333, 184.351, 184.33, 184.247, 184.388, 184.404, 184.392)), class =
"data.frame", row.names = 142:182)
Curve <- function(y)
{
y <- paste0(y, '~ RunTime')
c(coef(lm(y, subset(a, RunTime >= 7.043 & RunTime <=
8.043)))[2], # 10%
coef(lm(y, subset(a, RunTime >= 9.000 & RunTime <=
9.043)))[2]) # 20%
}
Curve_9.20 <- data.frame("LightIntensity" = c("10%", "20%"),
lapply(names(a[c(2,3)]), Curve))
我想计算曲线上不同间隔的回归线的斜率,然后将其应用于数据框 a 中的多个列以生成新的数据框 Curve_9.20。下面是一个高度子集化的数据框和我用于 运行 函数 Curve 的代码,但它产生了显示的错误。
a <- structure(list(RunTime = c(7.043, 7.093, 7.143, 7.193, 7.243,
7.293, 7.343, 7.393, 7.443, 7.493, 7.543, 7.593, 7.643, 7.693,
7.743, 7.793, 7.843, 7.893, 7.943, 7.993, 8.043, 8.093, 8.142,
8.193, 8.243, 8.293, 8.343, 8.393, 8.443, 8.493, 8.543, 8.593,
8.643, 8.693, 8.743, 8.793, 8.843, 8.893, 8.943, 8.993, 9.043
), O2_229 = c(216.272, 216.006, 216.164, 215.976, 215.937, 216.012,
216.224, 216.354, 216.584, 216.477, 216.347, 216.538, 216.315,
216.385, 216.297, 216.363, 216.353, 216.301, 216.379, 216.594,
216.192, 215.955, 216.134, 216.155, 216.254, 216.316, 216.313,
216.413, 216.477, 216.225, 216.347, 216.16, 215.976, 216.093,
216.224, 216.179, 216.239, 216.349, 216.085, 216.089, 216.213
), O2_35 = c(183.962, 184.032, 184.143, 184.134, 183.979, 184.028,
184.176, 184.055, 184.119, 184.137, 184.094, 184.114, 184.308,
184.097, 184.2, 184.287, 184.048, 184.263, 184.263, 184.346,
184.322, 184.256, 184.462, 184.155, 184.246, 184.328, 184.232,
184.418, 184.379, 184.331, 184.367, 184.298, 184.351, 184.375,
184.333, 184.351, 184.33, 184.247, 184.388, 184.404, 184.392)), class =
"data.frame", row.names = 142:182)
Curve <- function(y)
{
c(coef(lm(y ~ RunTime, subset(a, RunTime >= 7.043 & RunTime <=
8.043)))[2], # 10%
coef(lm(y ~ RunTime, subset(a, RunTime >= 9.000 & RunTime <=
9.043)))[2]) # 20%
}
Curve_9.20 <- data.frame("LightIntensity" = c("10%", "20%"),
lapply(a[c(2,3)], Curve))
Error in model.frame.default(formula = y ~ RunTime, data =
subset(a, : variable lengths differ (found for 'RunTime')
您正在为 lm
提供来自原始数据集的 y
变量和来自子集的 x
变量,因此存在不同长度的误差。您可以通过为 lm
提供列名来解决它,因此将从子集中调用所有变量。
由于两种情况下公式的右侧相同,因此最简单的解决方法是将整个公式作为单个字符串传递,并且在每次 lapply
迭代中只有 y
会发生变化。
a <- structure(list(RunTime = c(7.043, 7.093, 7.143, 7.193, 7.243,
7.293, 7.343, 7.393, 7.443, 7.493, 7.543, 7.593, 7.643, 7.693,
7.743, 7.793, 7.843, 7.893, 7.943, 7.993, 8.043, 8.093, 8.142,
8.193, 8.243, 8.293, 8.343, 8.393, 8.443, 8.493, 8.543, 8.593,
8.643, 8.693, 8.743, 8.793, 8.843, 8.893, 8.943, 8.993, 9.043
), O2_229 = c(216.272, 216.006, 216.164, 215.976, 215.937, 216.012,
216.224, 216.354, 216.584, 216.477, 216.347, 216.538, 216.315,
216.385, 216.297, 216.363, 216.353, 216.301, 216.379, 216.594,
216.192, 215.955, 216.134, 216.155, 216.254, 216.316, 216.313,
216.413, 216.477, 216.225, 216.347, 216.16, 215.976, 216.093,
216.224, 216.179, 216.239, 216.349, 216.085, 216.089, 216.213
), O2_35 = c(183.962, 184.032, 184.143, 184.134, 183.979, 184.028,
184.176, 184.055, 184.119, 184.137, 184.094, 184.114, 184.308,
184.097, 184.2, 184.287, 184.048, 184.263, 184.263, 184.346,
184.322, 184.256, 184.462, 184.155, 184.246, 184.328, 184.232,
184.418, 184.379, 184.331, 184.367, 184.298, 184.351, 184.375,
184.333, 184.351, 184.33, 184.247, 184.388, 184.404, 184.392)), class =
"data.frame", row.names = 142:182)
Curve <- function(y)
{
y <- paste0(y, '~ RunTime')
c(coef(lm(y, subset(a, RunTime >= 7.043 & RunTime <=
8.043)))[2], # 10%
coef(lm(y, subset(a, RunTime >= 9.000 & RunTime <=
9.043)))[2]) # 20%
}
Curve_9.20 <- data.frame("LightIntensity" = c("10%", "20%"),
lapply(names(a[c(2,3)]), Curve))