ggplot2中具有不连续时间序列的预测
forecast in ggplot2 with discontinuous time series
考虑 this 示例文件。我想按时间预测体重。通常我会用下面的代码来做到这一点,但问题是我现在拥有的日期是不连续的。最旧的是每隔一段时间,而最新的是每天一次。我在某处读到,在这种情况下我将使用 xts
包而不是 ts
。
我收到的错误信息是:
Warning message:
In window.default(x, ...) : 'end' value not changed
并且:
Error in window.default(x, ...) : 'start' cannot be after 'end'
我必须在哪里调整以下代码才能获得我的预测 运行?我应该推断缺失的重量,而不是在日常测量中使用 ts
吗?
require(ggplot2)
require(zoo) # as.yearmon() function
require(forecast) # for forecasting
require(xts) # extensible time series
x <- get.url(https://dl.dropboxusercontent.com/u/109495328/example.csv)
app_df <- read.csv(x, header=T, sep = ",", quote = "", stringsAsFactors = FALSE, na.strings = "..")
colnames(app_df) <- c("Date", "Weight")
date <- as.Date(strptime(app_df$Date, "%d.%m.%Y"))
weight <- app_df$Weight
df <- na.omit(data.frame(date,weight))
w <- as.numeric(weight) # ask: modifyingfunction with xts
myts <- ts(w, start = c(2016), end = c(2016), freq = 7) # add time dimension
# tail(weight, n=1)
funggcast <- function(dn, fcast){
en <- max(time(fcast$mean)) # Extract the max date used in the forecast
# Extract Source and Training Data
ds <- as.data.frame(window(dn, end = en))
names(ds) <- 'observed'
ds$date <- as.Date(time(window(dn, end = en)))
# Extract the Fitted Values (need to figure out how to grab confidence intervals)
dfit <- as.data.frame(fcast$fitted)
dfit$date <- as.Date(time(fcast$fitted))
names(dfit)[1] <- 'fitted'
ds <- merge(ds, dfit, all.x = T) # Merge fitted values with source and training data
# Extract the Forecast values and confidence intervals
dfcastn <- as.data.frame(fcast)
dfcastn$date <- as.Date(paste(row.names(dfcastn),"01","01",sep="-"))
names(dfcastn) <- c('forecast','lo80','hi80','lo95','hi95','date')
pd <- merge(ds, dfcastn,all= T) # final data.frame for use in ggplot
return(pd)
} # ggplot function by Frank Davenport
yt <- window(myts, end = c(4360)) # extract training data until last year
yfit <- auto.arima(yt) # fit arima model
yfor <- forecast(yfit) # forecast
pd <- funggcast(myts, yfor) # extract the data for ggplot using function funggcast()
ggplot(data = pd, aes(x = date, y = observed)) +
geom_line(aes(color = "1")) +
geom_line(aes(y = fitted,color="2")) +
geom_line(aes(y = forecast,color="3")) +
scale_colour_manual(values=c("red", "blue","black"),labels = c("Observed", "Fitted", "Forecasted"),name="Data") +
geom_ribbon(aes(ymin = lo95, ymax = hi95), alpha = .25)
好吧,这似乎很接近您可能想要的。 funggcast
函数对日期做出的假设甚至不接近真实,因此我对其进行了更改以使其正常工作。我创建了一个 xts
。我去掉了所有似乎对这些数据没有任何意义的 window
东西。
# R Script
require(ggplot2)
require(zoo) # as.yearmon() function
require(forecast) # for forecasting
require(xts) # extensible time series
require(RCurl)
x <- getURL("https://dl.dropboxusercontent.com/u/109495328/example.csv")
app_df <- read.csv(text=x, header = T, sep = ",", quote = "",
stringsAsFactors = FALSE, na.strings = "..")
colnames(app_df) <- c("Date", "Weight")
date <- as.Date(strptime(app_df$Date, "%d.%m.%Y"))
weight <- app_df$Weight
df <- na.omit(data.frame(date, weight))
w <- as.numeric(weight) # ask: modifyingfunction with xts
myts <- xts(weight, order.by=date)
# tail(weight, n=1)
funggcast_new <- function(dn, fcast) {
# en <- max(time(fcast$mean)) # Extract the max date used in the forecast (?)
# Extract Source and Training Data
ds <- as.data.frame(dn[,1])
names(ds) <- 'observed'
ds$date <- time(dn)
# Extract the Fitted Values (need to figure out how to grab confidence intervals)
dfit <- as.data.frame(fcast$fitted)
dfit$date <- ds$date
names(dfit)[1] <- 'fitted'
ds <- merge(ds, dfit, all.x = T) # Merge fitted values with source and training data
# Extract the Forecast values and confidence intervals
dfcastn <- as.data.frame(fcast)
dfcastn$date <- time(fcast) + time(dn)[length(dn)]
names(dfcastn) <- c('forecast', 'lo80', 'hi80', 'lo95', 'hi95', 'date')
pd <- merge(ds, dfcastn, all = T) # final data.frame for use in ggplot
return(pd)
}
# ggplot function by Frank Davenport
# yt <- window(myts, end = c(4360)) # extract training data until last year (?)
yt <- myts
yfit <- auto.arima(yt) # fit arima model
yfor <- forecast(yfit) # forecast
pd <- funggcast_new(myts, yfor) # extract the data for ggplot using function funggcast()
ggplot(data = pd, aes(x = date, y = observed)) +
geom_line(aes(color = "1")) +
geom_line(aes(y = fitted, color = "2")) +
geom_line(aes(y = forecast, color = "3")) +
scale_colour_manual(values = c("red", "blue", "black"),
labels = c("Observed", "Fitted", "Forecasted"), name = "Data") +
geom_ribbon(aes(ymin = lo95, ymax = hi95), alpha = .25)
产量:
考虑 this 示例文件。我想按时间预测体重。通常我会用下面的代码来做到这一点,但问题是我现在拥有的日期是不连续的。最旧的是每隔一段时间,而最新的是每天一次。我在某处读到,在这种情况下我将使用 xts
包而不是 ts
。
我收到的错误信息是:
Warning message:
In window.default(x, ...) : 'end' value not changed
并且:
Error in window.default(x, ...) : 'start' cannot be after 'end'
我必须在哪里调整以下代码才能获得我的预测 运行?我应该推断缺失的重量,而不是在日常测量中使用 ts
吗?
require(ggplot2)
require(zoo) # as.yearmon() function
require(forecast) # for forecasting
require(xts) # extensible time series
x <- get.url(https://dl.dropboxusercontent.com/u/109495328/example.csv)
app_df <- read.csv(x, header=T, sep = ",", quote = "", stringsAsFactors = FALSE, na.strings = "..")
colnames(app_df) <- c("Date", "Weight")
date <- as.Date(strptime(app_df$Date, "%d.%m.%Y"))
weight <- app_df$Weight
df <- na.omit(data.frame(date,weight))
w <- as.numeric(weight) # ask: modifyingfunction with xts
myts <- ts(w, start = c(2016), end = c(2016), freq = 7) # add time dimension
# tail(weight, n=1)
funggcast <- function(dn, fcast){
en <- max(time(fcast$mean)) # Extract the max date used in the forecast
# Extract Source and Training Data
ds <- as.data.frame(window(dn, end = en))
names(ds) <- 'observed'
ds$date <- as.Date(time(window(dn, end = en)))
# Extract the Fitted Values (need to figure out how to grab confidence intervals)
dfit <- as.data.frame(fcast$fitted)
dfit$date <- as.Date(time(fcast$fitted))
names(dfit)[1] <- 'fitted'
ds <- merge(ds, dfit, all.x = T) # Merge fitted values with source and training data
# Extract the Forecast values and confidence intervals
dfcastn <- as.data.frame(fcast)
dfcastn$date <- as.Date(paste(row.names(dfcastn),"01","01",sep="-"))
names(dfcastn) <- c('forecast','lo80','hi80','lo95','hi95','date')
pd <- merge(ds, dfcastn,all= T) # final data.frame for use in ggplot
return(pd)
} # ggplot function by Frank Davenport
yt <- window(myts, end = c(4360)) # extract training data until last year
yfit <- auto.arima(yt) # fit arima model
yfor <- forecast(yfit) # forecast
pd <- funggcast(myts, yfor) # extract the data for ggplot using function funggcast()
ggplot(data = pd, aes(x = date, y = observed)) +
geom_line(aes(color = "1")) +
geom_line(aes(y = fitted,color="2")) +
geom_line(aes(y = forecast,color="3")) +
scale_colour_manual(values=c("red", "blue","black"),labels = c("Observed", "Fitted", "Forecasted"),name="Data") +
geom_ribbon(aes(ymin = lo95, ymax = hi95), alpha = .25)
好吧,这似乎很接近您可能想要的。 funggcast
函数对日期做出的假设甚至不接近真实,因此我对其进行了更改以使其正常工作。我创建了一个 xts
。我去掉了所有似乎对这些数据没有任何意义的 window
东西。
# R Script
require(ggplot2)
require(zoo) # as.yearmon() function
require(forecast) # for forecasting
require(xts) # extensible time series
require(RCurl)
x <- getURL("https://dl.dropboxusercontent.com/u/109495328/example.csv")
app_df <- read.csv(text=x, header = T, sep = ",", quote = "",
stringsAsFactors = FALSE, na.strings = "..")
colnames(app_df) <- c("Date", "Weight")
date <- as.Date(strptime(app_df$Date, "%d.%m.%Y"))
weight <- app_df$Weight
df <- na.omit(data.frame(date, weight))
w <- as.numeric(weight) # ask: modifyingfunction with xts
myts <- xts(weight, order.by=date)
# tail(weight, n=1)
funggcast_new <- function(dn, fcast) {
# en <- max(time(fcast$mean)) # Extract the max date used in the forecast (?)
# Extract Source and Training Data
ds <- as.data.frame(dn[,1])
names(ds) <- 'observed'
ds$date <- time(dn)
# Extract the Fitted Values (need to figure out how to grab confidence intervals)
dfit <- as.data.frame(fcast$fitted)
dfit$date <- ds$date
names(dfit)[1] <- 'fitted'
ds <- merge(ds, dfit, all.x = T) # Merge fitted values with source and training data
# Extract the Forecast values and confidence intervals
dfcastn <- as.data.frame(fcast)
dfcastn$date <- time(fcast) + time(dn)[length(dn)]
names(dfcastn) <- c('forecast', 'lo80', 'hi80', 'lo95', 'hi95', 'date')
pd <- merge(ds, dfcastn, all = T) # final data.frame for use in ggplot
return(pd)
}
# ggplot function by Frank Davenport
# yt <- window(myts, end = c(4360)) # extract training data until last year (?)
yt <- myts
yfit <- auto.arima(yt) # fit arima model
yfor <- forecast(yfit) # forecast
pd <- funggcast_new(myts, yfor) # extract the data for ggplot using function funggcast()
ggplot(data = pd, aes(x = date, y = observed)) +
geom_line(aes(color = "1")) +
geom_line(aes(y = fitted, color = "2")) +
geom_line(aes(y = forecast, color = "3")) +
scale_colour_manual(values = c("red", "blue", "black"),
labels = c("Observed", "Fitted", "Forecasted"), name = "Data") +
geom_ribbon(aes(ymin = lo95, ymax = hi95), alpha = .25)
产量: