计算R中时间序列的每日模式
Calculate daily mode of time series in R
我正在尝试计算这个时间序列的每日模式。在下面的示例数据中,我想每天查看 windDir.c
列的模式。
不知道如何使用 apply.daily()
包装器,因为没有 "colMode" 参数。因此,我尝试在 period.apply()
中使用自定义函数,但无济于事。我尝试的代码以及 dput
如下。
ep <- endpoints(wind.d,'days')
modefunc <- function(x) {
tabresult <- tabulate(x)
themode <- which(tabresult == max(tabresult))
if (sum(tabresult == max(tabresult))>1)
themode <- NA
return(themode)
}
period.apply(wind.d$windDir.c, INDEX=ep, FUN=function(x) mode(x))
可重现的数据:
wind.d <- structure(list(date = structure(c(1280635200, 1280635200, 1280635200,
1280635200, 1280635200, 1280635200, 1280635200, 1280721600, 1280721600,
1280721600, 1280721600, 1280721600, 1280721600, 1280721600, 1280808000,
1280808000, 1280808000, 1280808000, 1280808000, 1280808000), class = c("POSIXct",
"POSIXt"), tzone = ""), windDir.c = structure(c(4L, 3L, 3L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 6L, 5L, 5L, 4L, 5L, 5L
), .Label = c("15", "45", "75", "105", "135", "165", "195", "225",
"255", "285", "315", "345"), class = "factor")), .Names = c("date",
"windDir.c"), class = "data.frame", row.names = c(NA, -20L))
我们可以使用 dplyr
轻松做到这一点:
library(dplyr)
wind.d %>% group_by(date, windDir.c) %>%
summarise(count = n()) %>%
summarise(mode = windDir.c[which.max(count)])
或基数 R:
calMode <- function(x) {
ux <- unique(x)
return(ux[which.max(tabulate(match(x, ux)))])
}
myModes <- tapply(as.character(windDir.c), INDEX = date, FUN = calMode)
请注意,您尝试的代码与您提供的 dput
的输出不一致。 dput
输出不是 xts 对象,您提供的代码仅适用于 xts 对象(endpoints
在您提供的 data.frame 上失败)。
假设 wind.d
确实是一个 xts 对象,您可以使用 xts 轻松地做到这一点:
wind.d <- structure(c(105, 75, 75, 105, 105, 105, 105, 105, 105, 105, 105,
105, 135, 135, 165, 135, 135, 105, 135, 135), .Dim = c(20L, 1L),
index = structure(c(1280635200, 1280635200, 1280635200, 1280635200,
1280635200, 1280635200, 1280635200, 1280721600, 1280721600, 1280721600,
1280721600, 1280721600, 1280721600, 1280721600, 1280808000, 1280808000,
1280808000, 1280808000, 1280808000, 1280808000), tzone = "",
tclass = c("POSIXct", "POSIXt")), .indexCLASS = c("POSIXct", "POSIXt"),
tclass = c("POSIXct", "POSIXt"), .indexTZ = "", tzone = "",
.Dimnames = list(NULL, "windDir.c"), class = c("xts", "zoo"))
apply.daily(x, function(x) which.max(tabulate(x)))
# windDir.c
# 2010-07-31 23:00:00 105
# 2010-08-01 23:00:00 105
# 2010-08-02 23:00:00 135
我们可以加载包 modeest
以使用函数 mfv
(最频繁值)
library(dplyr)
library(modeest)
wind.d %>% group_by(date) %>% summarise(mode = mfv(windDir.c))
输出:
date mode
1 2010-08-01 06:00:00 105
2 2010-08-02 06:00:00 105
3 2010-08-03 06:00:00 135
如果有多种模式,我们需要指定要检索的元素。否则会return报错。比如第一个元素:
mfv(iris[iris$Species=="setosa", 1])
[1] 5.0 5.1
# dplyr
iris %>% group_by(Species) %>% summarise(mode = mfv(Sepal.Length)[1])
Species mode
1 setosa 5.0
2 versicolor 5.5
3 virginica 6.3
sqldf
对于那些对 sqldf
感兴趣的人,使用 this approach:
library(sqldf)
sqldf("SELECT date,
(SELECT [windDir.c]
FROM [wind.d]
WHERE date = tbl.date
GROUP BY [windDir.c]
ORDER BY count(*) DESC
LIMIT 1) AS mode
FROM (SELECT DISTINCT date
FROM [wind.d]) AS tbl")
我正在尝试计算这个时间序列的每日模式。在下面的示例数据中,我想每天查看 windDir.c
列的模式。
不知道如何使用 apply.daily()
包装器,因为没有 "colMode" 参数。因此,我尝试在 period.apply()
中使用自定义函数,但无济于事。我尝试的代码以及 dput
如下。
ep <- endpoints(wind.d,'days')
modefunc <- function(x) {
tabresult <- tabulate(x)
themode <- which(tabresult == max(tabresult))
if (sum(tabresult == max(tabresult))>1)
themode <- NA
return(themode)
}
period.apply(wind.d$windDir.c, INDEX=ep, FUN=function(x) mode(x))
可重现的数据:
wind.d <- structure(list(date = structure(c(1280635200, 1280635200, 1280635200,
1280635200, 1280635200, 1280635200, 1280635200, 1280721600, 1280721600,
1280721600, 1280721600, 1280721600, 1280721600, 1280721600, 1280808000,
1280808000, 1280808000, 1280808000, 1280808000, 1280808000), class = c("POSIXct",
"POSIXt"), tzone = ""), windDir.c = structure(c(4L, 3L, 3L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 6L, 5L, 5L, 4L, 5L, 5L
), .Label = c("15", "45", "75", "105", "135", "165", "195", "225",
"255", "285", "315", "345"), class = "factor")), .Names = c("date",
"windDir.c"), class = "data.frame", row.names = c(NA, -20L))
我们可以使用 dplyr
轻松做到这一点:
library(dplyr)
wind.d %>% group_by(date, windDir.c) %>%
summarise(count = n()) %>%
summarise(mode = windDir.c[which.max(count)])
或基数 R:
calMode <- function(x) {
ux <- unique(x)
return(ux[which.max(tabulate(match(x, ux)))])
}
myModes <- tapply(as.character(windDir.c), INDEX = date, FUN = calMode)
请注意,您尝试的代码与您提供的 dput
的输出不一致。 dput
输出不是 xts 对象,您提供的代码仅适用于 xts 对象(endpoints
在您提供的 data.frame 上失败)。
假设 wind.d
确实是一个 xts 对象,您可以使用 xts 轻松地做到这一点:
wind.d <- structure(c(105, 75, 75, 105, 105, 105, 105, 105, 105, 105, 105,
105, 135, 135, 165, 135, 135, 105, 135, 135), .Dim = c(20L, 1L),
index = structure(c(1280635200, 1280635200, 1280635200, 1280635200,
1280635200, 1280635200, 1280635200, 1280721600, 1280721600, 1280721600,
1280721600, 1280721600, 1280721600, 1280721600, 1280808000, 1280808000,
1280808000, 1280808000, 1280808000, 1280808000), tzone = "",
tclass = c("POSIXct", "POSIXt")), .indexCLASS = c("POSIXct", "POSIXt"),
tclass = c("POSIXct", "POSIXt"), .indexTZ = "", tzone = "",
.Dimnames = list(NULL, "windDir.c"), class = c("xts", "zoo"))
apply.daily(x, function(x) which.max(tabulate(x)))
# windDir.c
# 2010-07-31 23:00:00 105
# 2010-08-01 23:00:00 105
# 2010-08-02 23:00:00 135
我们可以加载包 modeest
以使用函数 mfv
(最频繁值)
library(dplyr)
library(modeest)
wind.d %>% group_by(date) %>% summarise(mode = mfv(windDir.c))
输出:
date mode
1 2010-08-01 06:00:00 105
2 2010-08-02 06:00:00 105
3 2010-08-03 06:00:00 135
如果有多种模式,我们需要指定要检索的元素。否则会return报错。比如第一个元素:
mfv(iris[iris$Species=="setosa", 1])
[1] 5.0 5.1
# dplyr
iris %>% group_by(Species) %>% summarise(mode = mfv(Sepal.Length)[1])
Species mode
1 setosa 5.0
2 versicolor 5.5
3 virginica 6.3
sqldf
对于那些对 sqldf
感兴趣的人,使用 this approach:
library(sqldf)
sqldf("SELECT date,
(SELECT [windDir.c]
FROM [wind.d]
WHERE date = tbl.date
GROUP BY [windDir.c]
ORDER BY count(*) DESC
LIMIT 1) AS mode
FROM (SELECT DISTINCT date
FROM [wind.d]) AS tbl")