如何计算 R 中移动 window 内的平均斜率
How to calculate the average slope within a moving window in R
我的数据集 包含 2 个变量 y 和 t [05s]。 y 每 05 秒测量一次。
我正在尝试计算移动 20 秒-window 内的平均斜率,即在计算第一个 20 秒斜率值后 window 向前移动一个时间单位(05 秒)并计算下一个 20 秒 -window,以 05 秒增量 产生连续的 20 秒斜率值。
我认为 使用 rollapply(动物园包)计算滚动回归可以解决问题,但我得到每个 window 相同的截距和斜率值又一遍。我能做什么?
我的数据:
dput(DataExample)
structure(list(t = c(0, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35,
0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95,
1, 1.05, 1.1, 1.15, 1.2, 1.25, 1.3, 1.35, 1.4, 1.45, 1.5, 1.55,
1.6, 1.65, 1.7, 1.75, 1.8, 1.85, 1.9, 1.95, 2, 2.05, 2.1, 2.15,
2.2, 2.25, 2.3, 2.35, 2.4, 2.45, 2.5, 2.55, 2.6, 2.65, 2.7, 2.75,
2.8, 2.85, 2.9, 2.95, 3, 3.05, 3.1, 3.15, 3.2, 3.25, 3.3, 3.35,
3.4, 3.45, 3.5, 3.55, 3.6, 3.65, 3.7, 3.75, 3.8, 3.85, 3.9, 3.95,
4, 4.05, 4.1, 4.15, 4.2, 4.25, 4.3, 4.35, 4.4, 4.45, 4.5, 4.55,
4.6, 4.65, 4.7, 4.75, 4.8, 4.85, 4.9, 4.95, 5, 5.05, 5.1, 5.15,
5.2, 5.25, 5.3, 5.35, 5.4, 5.45, 5.5, 5.55, 5.6, 5.65, 5.7, 5.75,
5.8, 5.85, 5.9, 5.95, 6, 6.05, 6.1, 6.15, 6.2, 6.25, 6.3, 6.35,
6.4, 6.45, 6.5, 6.55, 6.6, 6.65, 6.7, 6.75, 6.8, 6.85, 6.9, 6.95,
7, 7.05, 7.1, 7.15, 7.2, 7.25, 7.3, 7.35, 7.4, 7.45, 7.5, 7.55,
7.6, 7.65, 7.7, 7.75, 7.8, 7.85, 7.9, 7.95, 8, 8.05, 8.1, 8.15,
8.2, 8.25, 8.3, 8.35, 8.4, 8.45, 8.5, 8.55, 8.6, 8.65, 8.7, 8.75,
8.8, 8.85, 8.9, 8.95, 9, 9.05, 9.1, 9.15, 9.2, 9.25, 9.3, 9.35,
9.4, 9.45, 9.5, 9.55, 9.6, 9.65, 9.7, 9.75, 9.8, 9.85, 9.9, 9.95,
10, 10.05, 10.1, 10.15, 10.2, 10.25, 10.3), y = c(3.05, 3.04,
3.02, 3.05, 3.01, 3.02, 3.02, 3.05, 3.02, 3.01, 3.04, 3.04, 3.03,
3.03, 3.03, 3.02, 3.02, 3.03, 3.03, 3.03, 3.04, 3.03, 3.03, 3.03,
3.03, 3.02, 3.02, 3.02, 3.01, 3.03, 3.03, 3.03, 3.03, 3.03, 3.02,
3.01, 3.02, 3.02, 3.01, 3.02, 3.02, 3.02, 3.03, 3.02, 3.02, 3.01,
3.01, 3.02, 3.01, 3.02, 3.02, 3.02, 3.02, 3.01, 3.01, 3.01, 3.01,
3.02, 3, 3.01, 3.02, 3.02, 3.02, 3.01, 3.01, 3.01, 3.01, 3.02,
3, 3.01, 3.01, 3.01, 3.01, 3.01, 3.01, 3, 3, 3.01, 3, 3, 3.01,
3.01, 3.01, 3.01, 3, 3, 3, 3.01, 3, 3, 3.01, 3.01, 3.01, 3.01,
3.01, 3.01, 3, 3.02, 3, 3.01, 3.02, 3.04, 3.05, 3.08, 3.04, 3.06,
3.08, 3.06, 3.08, 3.09, 3.04, 3.05, 3.07, 3.08, 3.06, 3.08, 3.08,
3.07, 3.08, 3.08, 3.05, 3.06, 3.07, 3.07, 3.06, 3.08, 3.08, 3.08,
3.08, 3.08, 3.05, 3.06, 3.08, 3.08, 3.06, 3.09, 3.07, 3.08, 3.08,
3.08, 3.06, 3.07, 3.07, 3.07, 3.06, 3.09, 3.07, 3.07, 3.08, 3.08,
3.06, 3.07, 3.07, 3.07, 3.06, 3.09, 3.07, 3.07, 3.07, 3.08, 3.07,
3.07, 3.07, 3.07, 3.06, 3.08, 3.07, 3.07, 3.06, 3.08, 3.07, 3.07,
3.07, 3.07, 3.06, 3.08, 3.07, 3.07, 3.06, 3.08, 3.06, 3.07, 3.06,
3.07, 3.06, 3.08, 3.07, 3.07, 3.06, 3.07, 3.06, 3.07, 3.06, 3.07,
3.06, 3.07, 3.06, 3.06, 3.06, 3.07, 3.04, 3.04, 3.04, 3.06, 3.06,
3.04, 3.04)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-207L), .Names = c("t", "y"))
R-代码:
require(zoo)
library("zoo", lib.loc="~/R/win-library/3.3")
rollapply(zoo(DataExample),
width=5,
FUN = function(Z)
{
z = lm(formula=y~t, data = as.data.frame(DataExample));
return(z$coef)
}, by=1,
by.column=FALSE, align="right")
如果没有动物园库,我会这样做
## Modified version of your function that does not rely on accessing
## variables that is external to its environment.
slopes<-function(data) {
z = lm(formula=y~t, data=data );
z$coef ## Implicit return of last variable
}
## The number of frames to take the windowed slope of
windowsize<-4
do.call(rbind,lapply(seq(dim(data)[1]-windowsize),
function(x) slopes(data[x:(x+windowsize),])))
它遍历从 1 到 length data
的列表 - windowsize
子集 data
重叠 window 大小为 4。子集数据然后传递到您的斜坡在绑定到单个数组之前的函数。
我曾尝试将斜坡绘制为 geom_segment()
但我失败了。至少我得到了具有不同斜率值的 df:
slope <- function(dat){
return(data.frame(t = sprintf("[%f,%f]", min(dat$t), max(dat$t)),
slope = lm(y~t-1, data = dat)$coef,
row.names = NULL)
)
}
mw <- function(dtf, wdth = 0.2, incr = 0.05){
if(!nrow(dtf)){
return(data.frame())
}
return(rbind(slope(dtf[dtf$t <= min(dtf$t) + wdth,]),
mw(dtf[dtf$t >= min(dtf$t) + incr,])
)
)
}
slp <- mw(dtf)
head(slp)
tail(slp)
# t slope
# 1 [0.000000,0.200000] 20.180000
# 2 [0.050000,0.250000] 16.498182
# 3 [0.100000,0.300000] 13.433333
# 4 [0.200000,0.400000] 9.554737
# 5 [0.250000,0.450000] 8.299608
# 6 [0.300000,0.500000] 7.340606
# ...
#175 [9.900000,10.100000] 0.3049778
#176 [10.000000,10.200000] 0.3017733
#177 [10.050000,10.250000] 0.3002829
#178 [10.150000,10.300000] 0.2982748
#179 [10.250000,10.300000] 0.2958620
#180 [10.300000,10.300000] 0.2951456
评论好像被删了,但是有人指出题中代码中rollapply的函数没有使用传递给它的参数。修复该问题并进行其他一些小改进后,returns 分别在第 1 列和第 2 列中截距和斜率。
library(zoo)
Coef <- function(Z) coef(lm(y ~ t, as.data.frame(Z)))
rollapplyr(zoo(DataExample), 5, Coef, by.column = FALSE)
这里有一个完整的代码来说明我的意思是.lm.fit
和lm
的速度。
以及 data.table.
的用法
library(zoo)
library(data.table)
library(ggplot2)
theme_set(theme_bw())
library(microbenchmark)
# function for linear regression and find the slope coefficient
rollingSlope.lm <- function(vector) {
a <- coef(lm(vector ~ seq(vector)))[2]
return(a)
}
rollingSlope.lm.fit <- function(vector) {
a <- coef(.lm.fit(cbind(1, seq(vector)), vector))[2]
return(a)
}
# create data example
test <- data.table(x = seq(100), y = dnorm(seq(100), mean=75, sd=30))
ggplot(test, aes(x, y))+ geom_point()
# graphics about the slope calculated
test[, ':=' (Slope.lm.fit = rollapply(y, width=5, FUN=rollingSlope.lm.fit, fill=NA),
Slope.lm = rollapply(y, width=5, FUN=rollingSlope.lm, fill=NA))]
# change the width size
test[, ':=' (Slope.lm.fit.50 = rollapply(y, width=50, FUN=rollingSlope.lm.fit, fill=NA),
Slope.lm.50 = rollapply(y, width=50, FUN=rollingSlope.lm, fill=NA))]
# melt data for plotting
test2 <- melt.data.table(test, measure.vars=c("Slope.lm.fit", "Slope.lm", "Slope.lm.fit.50", "Slope.lm.50"))
ggplot(test2, aes(x, value))+ geom_point(aes(color=variable))
# efficiency of the 2 lm
mb <- microbenchmark(lm.fit = a <- rollapply(test$y, 5, rollingSlope.lm.fit, fill=NA),
lm = b <- rollapply(test$y, 5, rollingSlope.lm, fill=NA))
# check if they equal
all.equal(a, b, check.attributes=FALSE)
# TRUE
# plot results
boxplot(mb, unit="ms", notch=TRUE)
我的数据集 包含 2 个变量 y 和 t [05s]。 y 每 05 秒测量一次。
我正在尝试计算移动 20 秒-window 内的平均斜率,即在计算第一个 20 秒斜率值后 window 向前移动一个时间单位(05 秒)并计算下一个 20 秒 -window,以 05 秒增量 产生连续的 20 秒斜率值。
我认为 使用 rollapply(动物园包)计算滚动回归可以解决问题,但我得到每个 window 相同的截距和斜率值又一遍。我能做什么?
我的数据:
dput(DataExample)
structure(list(t = c(0, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35,
0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95,
1, 1.05, 1.1, 1.15, 1.2, 1.25, 1.3, 1.35, 1.4, 1.45, 1.5, 1.55,
1.6, 1.65, 1.7, 1.75, 1.8, 1.85, 1.9, 1.95, 2, 2.05, 2.1, 2.15,
2.2, 2.25, 2.3, 2.35, 2.4, 2.45, 2.5, 2.55, 2.6, 2.65, 2.7, 2.75,
2.8, 2.85, 2.9, 2.95, 3, 3.05, 3.1, 3.15, 3.2, 3.25, 3.3, 3.35,
3.4, 3.45, 3.5, 3.55, 3.6, 3.65, 3.7, 3.75, 3.8, 3.85, 3.9, 3.95,
4, 4.05, 4.1, 4.15, 4.2, 4.25, 4.3, 4.35, 4.4, 4.45, 4.5, 4.55,
4.6, 4.65, 4.7, 4.75, 4.8, 4.85, 4.9, 4.95, 5, 5.05, 5.1, 5.15,
5.2, 5.25, 5.3, 5.35, 5.4, 5.45, 5.5, 5.55, 5.6, 5.65, 5.7, 5.75,
5.8, 5.85, 5.9, 5.95, 6, 6.05, 6.1, 6.15, 6.2, 6.25, 6.3, 6.35,
6.4, 6.45, 6.5, 6.55, 6.6, 6.65, 6.7, 6.75, 6.8, 6.85, 6.9, 6.95,
7, 7.05, 7.1, 7.15, 7.2, 7.25, 7.3, 7.35, 7.4, 7.45, 7.5, 7.55,
7.6, 7.65, 7.7, 7.75, 7.8, 7.85, 7.9, 7.95, 8, 8.05, 8.1, 8.15,
8.2, 8.25, 8.3, 8.35, 8.4, 8.45, 8.5, 8.55, 8.6, 8.65, 8.7, 8.75,
8.8, 8.85, 8.9, 8.95, 9, 9.05, 9.1, 9.15, 9.2, 9.25, 9.3, 9.35,
9.4, 9.45, 9.5, 9.55, 9.6, 9.65, 9.7, 9.75, 9.8, 9.85, 9.9, 9.95,
10, 10.05, 10.1, 10.15, 10.2, 10.25, 10.3), y = c(3.05, 3.04,
3.02, 3.05, 3.01, 3.02, 3.02, 3.05, 3.02, 3.01, 3.04, 3.04, 3.03,
3.03, 3.03, 3.02, 3.02, 3.03, 3.03, 3.03, 3.04, 3.03, 3.03, 3.03,
3.03, 3.02, 3.02, 3.02, 3.01, 3.03, 3.03, 3.03, 3.03, 3.03, 3.02,
3.01, 3.02, 3.02, 3.01, 3.02, 3.02, 3.02, 3.03, 3.02, 3.02, 3.01,
3.01, 3.02, 3.01, 3.02, 3.02, 3.02, 3.02, 3.01, 3.01, 3.01, 3.01,
3.02, 3, 3.01, 3.02, 3.02, 3.02, 3.01, 3.01, 3.01, 3.01, 3.02,
3, 3.01, 3.01, 3.01, 3.01, 3.01, 3.01, 3, 3, 3.01, 3, 3, 3.01,
3.01, 3.01, 3.01, 3, 3, 3, 3.01, 3, 3, 3.01, 3.01, 3.01, 3.01,
3.01, 3.01, 3, 3.02, 3, 3.01, 3.02, 3.04, 3.05, 3.08, 3.04, 3.06,
3.08, 3.06, 3.08, 3.09, 3.04, 3.05, 3.07, 3.08, 3.06, 3.08, 3.08,
3.07, 3.08, 3.08, 3.05, 3.06, 3.07, 3.07, 3.06, 3.08, 3.08, 3.08,
3.08, 3.08, 3.05, 3.06, 3.08, 3.08, 3.06, 3.09, 3.07, 3.08, 3.08,
3.08, 3.06, 3.07, 3.07, 3.07, 3.06, 3.09, 3.07, 3.07, 3.08, 3.08,
3.06, 3.07, 3.07, 3.07, 3.06, 3.09, 3.07, 3.07, 3.07, 3.08, 3.07,
3.07, 3.07, 3.07, 3.06, 3.08, 3.07, 3.07, 3.06, 3.08, 3.07, 3.07,
3.07, 3.07, 3.06, 3.08, 3.07, 3.07, 3.06, 3.08, 3.06, 3.07, 3.06,
3.07, 3.06, 3.08, 3.07, 3.07, 3.06, 3.07, 3.06, 3.07, 3.06, 3.07,
3.06, 3.07, 3.06, 3.06, 3.06, 3.07, 3.04, 3.04, 3.04, 3.06, 3.06,
3.04, 3.04)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-207L), .Names = c("t", "y"))
R-代码:
require(zoo)
library("zoo", lib.loc="~/R/win-library/3.3")
rollapply(zoo(DataExample),
width=5,
FUN = function(Z)
{
z = lm(formula=y~t, data = as.data.frame(DataExample));
return(z$coef)
}, by=1,
by.column=FALSE, align="right")
如果没有动物园库,我会这样做
## Modified version of your function that does not rely on accessing
## variables that is external to its environment.
slopes<-function(data) {
z = lm(formula=y~t, data=data );
z$coef ## Implicit return of last variable
}
## The number of frames to take the windowed slope of
windowsize<-4
do.call(rbind,lapply(seq(dim(data)[1]-windowsize),
function(x) slopes(data[x:(x+windowsize),])))
它遍历从 1 到 length data
的列表 - windowsize
子集 data
重叠 window 大小为 4。子集数据然后传递到您的斜坡在绑定到单个数组之前的函数。
我曾尝试将斜坡绘制为 geom_segment()
但我失败了。至少我得到了具有不同斜率值的 df:
slope <- function(dat){
return(data.frame(t = sprintf("[%f,%f]", min(dat$t), max(dat$t)),
slope = lm(y~t-1, data = dat)$coef,
row.names = NULL)
)
}
mw <- function(dtf, wdth = 0.2, incr = 0.05){
if(!nrow(dtf)){
return(data.frame())
}
return(rbind(slope(dtf[dtf$t <= min(dtf$t) + wdth,]),
mw(dtf[dtf$t >= min(dtf$t) + incr,])
)
)
}
slp <- mw(dtf)
head(slp)
tail(slp)
# t slope
# 1 [0.000000,0.200000] 20.180000
# 2 [0.050000,0.250000] 16.498182
# 3 [0.100000,0.300000] 13.433333
# 4 [0.200000,0.400000] 9.554737
# 5 [0.250000,0.450000] 8.299608
# 6 [0.300000,0.500000] 7.340606
# ...
#175 [9.900000,10.100000] 0.3049778
#176 [10.000000,10.200000] 0.3017733
#177 [10.050000,10.250000] 0.3002829
#178 [10.150000,10.300000] 0.2982748
#179 [10.250000,10.300000] 0.2958620
#180 [10.300000,10.300000] 0.2951456
评论好像被删了,但是有人指出题中代码中rollapply的函数没有使用传递给它的参数。修复该问题并进行其他一些小改进后,returns 分别在第 1 列和第 2 列中截距和斜率。
library(zoo)
Coef <- function(Z) coef(lm(y ~ t, as.data.frame(Z)))
rollapplyr(zoo(DataExample), 5, Coef, by.column = FALSE)
这里有一个完整的代码来说明我的意思是.lm.fit
和lm
的速度。
以及 data.table.
library(zoo)
library(data.table)
library(ggplot2)
theme_set(theme_bw())
library(microbenchmark)
# function for linear regression and find the slope coefficient
rollingSlope.lm <- function(vector) {
a <- coef(lm(vector ~ seq(vector)))[2]
return(a)
}
rollingSlope.lm.fit <- function(vector) {
a <- coef(.lm.fit(cbind(1, seq(vector)), vector))[2]
return(a)
}
# create data example
test <- data.table(x = seq(100), y = dnorm(seq(100), mean=75, sd=30))
ggplot(test, aes(x, y))+ geom_point()
# graphics about the slope calculated
test[, ':=' (Slope.lm.fit = rollapply(y, width=5, FUN=rollingSlope.lm.fit, fill=NA),
Slope.lm = rollapply(y, width=5, FUN=rollingSlope.lm, fill=NA))]
# change the width size
test[, ':=' (Slope.lm.fit.50 = rollapply(y, width=50, FUN=rollingSlope.lm.fit, fill=NA),
Slope.lm.50 = rollapply(y, width=50, FUN=rollingSlope.lm, fill=NA))]
# melt data for plotting
test2 <- melt.data.table(test, measure.vars=c("Slope.lm.fit", "Slope.lm", "Slope.lm.fit.50", "Slope.lm.50"))
ggplot(test2, aes(x, value))+ geom_point(aes(color=variable))
# efficiency of the 2 lm
mb <- microbenchmark(lm.fit = a <- rollapply(test$y, 5, rollingSlope.lm.fit, fill=NA),
lm = b <- rollapply(test$y, 5, rollingSlope.lm, fill=NA))
# check if they equal
all.equal(a, b, check.attributes=FALSE)
# TRUE
# plot results
boxplot(mb, unit="ms", notch=TRUE)