如何在一天中的某个时间引用一个值来创建二进制向量 (xts)

How to reference a value at time in day to create a binary vector (xts)

有一个包含 15 分钟间隔数据的 xts 对象。每天在 930,我对 900 和 915 周期的值取平均值,如果平均值 > 5,我在 1s 和 0s BinVect 的向量中记录一个 1。下面的脚本实现了这一点:

library('xts')
XTS1 <- structure(c(12, 7, 7, 22, 24, 30, 26, 23, 27, 30), .indexCLASS = c("POSIXct", "POSIXt"), .indexTZ = "", tclass = c("POSIXct", "POSIXt"), tzone = "", class = c("xts", "zoo"), .CLASS = structure("double", class = "CLASS"), formattable = structure(list(formatter = "formatC", format = structure(list(format = "f", digits = 2), .Names = c("format", "digits")), preproc = "percent_preproc", postproc = "percent_postproc"), .Names = c("formatter", "format", "preproc", "postproc")), index = structure(c(1413981900, 1413982800, 1413983700, 1413984600, 1413985500, 1413986400, 1413987300, 1413988200, 1413989100, 1413990000), tzone = "", tclass = c("POSIXct", "POSIXt")), .Dim = c(10L, 1L))

AvgPrior2 <- lag((XTS1 + lag(XTS1)) / 2)
BinVect <- ifelse(AvgPrior2 > 5, 1, 0) & .indexhour(XTS1) == 9 & .indexmin(XTS1) == 30
XTS1 <- merge.xts(XTS1,AvgPrior2,BinVect)

#Output:
                    XTS1 AvgPrior2 BinVect
2014-10-22 08:45:00   12        NA       0
2014-10-22 09:00:00    7        NA       0
2014-10-22 09:15:00    7       9.5       0
2014-10-22 09:30:00   22       7.0       1
2014-10-22 09:45:00   24      14.5       0
2014-10-22 10:00:00   30      23.0       0
2014-10-22 10:15:00   26      27.0       0
2014-10-22 10:30:00   23      28.0       0
2014-10-22 10:45:00   27      24.5       0
2014-10-22 11:00:00   30      25.0       0

现在是我无法弄清楚的部分:想创建一个新向量,每天在 09:30:00 和 10:15:00 之间的任何时候都有 1 BinVect==1 @ 09:30:00。认为需要使用函数apply.daily,问题是如何。

#Desired Output:
                        XTS1 AvgPrior2 BinVect NewBinVect
2014-10-22 08:45:00   12        NA       0              0
2014-10-22 09:00:00    7        NA       0              0
2014-10-22 09:15:00    7       9.5       0              0
2014-10-22 09:30:00   22       7.0       1              1
2014-10-22 09:45:00   24      14.5       0              1
2014-10-22 10:00:00   30      23.0       0              1               
2014-10-22 10:15:00   26      27.0       0              1
2014-10-22 10:30:00   23      28.0       0              0
2014-10-22 10:45:00   27      24.5       0              0
2014-10-22 11:00:00   30      25.0       0              0

#This is the closest I got to getting anywhere (out of several trials):
XTS1$NewBinVect <- apply.daily(BinVect, function(x) ifelse(x == 1, 1, 0))
#Think it says if BinVect=1 on a given day => NewBinVect=1 on that day (but doesn't do that)

我想我一开始误解了你的问题。这段代码应该让你在每一天的 9:30 和 10:15 之间的时间戳处获得一个包含 9:30 BinVect 值的列。如果您发现它对其他应用程序有用,我将在下面保留我之前编写的代码。

library('xts')

XTS1 <- structure(c(12, 7, 7, 22, 24, 30, 26, 23, 27, 30), .indexCLASS = c("POSIXct", "POSIXt"), .indexTZ = "", tclass = c("POSIXct", "POSIXt"), tzone = "", class = c("xts", "zoo"), .CLASS = structure("double", class = "CLASS"), formattable = structure(list(formatter = "formatC", format = structure(list(format = "f", digits = 2), .Names = c("format", "digits")), preproc = "percent_preproc", postproc = "percent_postproc"), .Names = c("formatter", "format", "preproc", "postproc")), index = structure(c(1413981900, 1413982800, 1413983700, 1413984600, 1413985500, 1413986400, 1413987300, 1413988200, 1413989100, 1413990000), tzone = "", tclass = c("POSIXct", "POSIXt")), .Dim = c(10L, 1L))

# Set XTS1 column name
colnames(XTS1) <- "XTS1"

# Create empty AvgPrior2 column
XTS1$AvgPrior2 <- NA
# Get daily XTS1 Averages
XTS1$AvgPrior2 <- as.vector(t(apply.daily(XTS1, function(x) as.vector(lag((x$XTS1 + lag(x$XTS1)) / 2)))))

# Create BinVect column filled with zeros
XTS1$BinVect <- 0
# Assign value of 1 to BinVect rows at 9:30 time stamps if AvgPrior2 > 5
XTS1['T09:30/T09:30:59', "BinVect"] <- ifelse(XTS1['T09:30/T09:30:59', "AvgPrior2"] > 5, 1, 0)

# Create NewBinVect column filled with zeros
XTS1$NewBinVect <- 0
# Assign 1 to timestamps between 9:30 and 10:15 if BinVect value for that day is 1
XTS1$NewBinVect <- as.vector(t(apply.daily(XTS1, function(x){
                                    x['T09:30/T10:15', "NewBinVect"] <- as.numeric(x[.indexhour(x) == 9 & .indexmin(x) == 30]$BinVect)

                                    return(as.vector(x$NewBinVect))
                                })))

前一个代码

library('xts')

XTS1 <- structure(c(12, 7, 7, 22, 24, 30, 26, 23, 27, 30), .indexCLASS = c("POSIXct", "POSIXt"), .indexTZ = "", tclass = c("POSIXct", "POSIXt"), tzone = "", class = c("xts", "zoo"), .CLASS = structure("double", class = "CLASS"), formattable = structure(list(formatter = "formatC", format = structure(list(format = "f", digits = 2), .Names = c("format", "digits")), preproc = "percent_preproc", postproc = "percent_postproc"), .Names = c("formatter", "format", "preproc", "postproc")), index = structure(c(1413981900, 1413982800, 1413983700, 1413984600, 1413985500, 1413986400, 1413987300, 1413988200, 1413989100, 1413990000), tzone = "", tclass = c("POSIXct", "POSIXt")), .Dim = c(10L, 1L))

colnames(XTS1) <- "XTS1"

XTS1$AvgPrior2 <- lag((XTS1 + lag(XTS1)) / 2)
XTS1$BinVect <- ifelse(XTS1$AvgPrior2 > 5, 1, 0) & .indexhour(XTS1) == 9 & .indexmin(XTS1) == 30

XTS1$BinVect2 <- ifelse(XTS1$AvgPrior2 > 5, 1, 0) & .indexhour(XTS1) == 9 & .indexmin(XTS1) == 30

h.m.combos <- data.frame(hour = c(rep(9,4), rep(10,4)), min = c(rep(c(0,15,30,45),2)))

BinVects <- mapply(function(h,m){
                    apply.daily(XTS1, function(x){
                                       val <- ifelse(!is.na(x$AvgPrior2) & x$AvgPrior2 > 5 & .indexhour(x) == h & .indexmin(x) == m, 1, 0)
                                       val <- as.vector(val$AvgPrior2)
                                       names(val) <- paste0(h,m)
                                       return(val)
                                      })
                  },
                  h.m.combos$hour,
                  h.m.combos$min)

colnames(BinVects) <- paste(h.m.combos$hour, h.m.combos$min, sep = ":")

XTS1 <- cbind(XTS1, BinVects)

您也可以使用 xts 的时间子集来执行此操作。首先,一些可重现示例的数据:

x1 <- structure(c(12, 7, 7, 22, 24, 30, 26, 23, 27, 30),
  .indexCLASS = c("POSIXct", "POSIXt"), .indexTZ = "America/New_York",
  tclass = c("POSIXct", "POSIXt"), tzone = "America/New_York",
  class = c("xts", "zoo"), index = structure(c(1413981900, 1413982800, 1413983700,
  1413984600, 1413985500, 1413986400, 1413987300, 1413988200, 1413989100,
  1413990000), tzone = "America/New_York", tclass = c("POSIXct", "POSIXt")),
  .Dim = c(10L, 1L))
x2 <- x1 - 25
.index(x2) <- .index(x2) + 86400
XTS1 <- rbind(x1, x2)
colnames(XTS1) <- "XTS1"

现在,您可以使用时间子集来仅提取要在均值计算中使用的时间段。 (请注意,小于 2 位的时间需要前导零。)然后您可以使用 apply.daily 计算每天的平均值。

XTS1$BinVec <- apply.daily(XTS1["T0900/T0915"], mean) > 5

当您使用 $<- 函数向 xts 对象添加列时,它会隐式地将原始对象与在右侧创建的对象合并。默认情况下,merge.xts 填充 NA,这就是为什么 BinVec 中的大多数观察结果是 NA.

现在您有了 BinVec,您可以再次使用时间子集来提取要用 BinVec 的值填充的时间段。因此,您可以提取 09:15-10:15 之间的所有观察值,并用 09:15.

处的观察值填充 NA
XTS1$NewBinVec <- na.locf(XTS1["T0915/T1015", "BinVec"])

现在 XTS1 看起来像这样(同样,所有 NA 都来自隐式合并):

                    XTS1 BinVec NewBinVec
2014-10-22 08:45:00   12     NA        NA
2014-10-22 09:00:00    7     NA        NA
2014-10-22 09:15:00    7      1         1
2014-10-22 09:30:00   22     NA         1
2014-10-22 09:45:00   24     NA         1
2014-10-22 10:00:00   30     NA         1
2014-10-22 10:15:00   26     NA         1
2014-10-22 10:30:00   23     NA        NA
2014-10-22 10:45:00   27     NA        NA
2014-10-22 11:00:00   30     NA        NA
2014-10-23 08:45:00  -13     NA        NA
2014-10-23 09:00:00  -18     NA        NA
2014-10-23 09:15:00  -18      0         0
2014-10-23 09:30:00   -3     NA         0
2014-10-23 09:45:00   -1     NA         0
2014-10-23 10:00:00    5     NA         0
2014-10-23 10:15:00    1     NA         0
2014-10-23 10:30:00   -2     NA        NA
2014-10-23 10:45:00    2     NA        NA
2014-10-23 11:00:00    5     NA        NA

如果需要,您可以将所有 NA 填充为 0

XTS1[is.na(XTS1)] <- 0
XTS1
                    XTS1 BinVec NewBinVec
2014-10-22 08:45:00   12      0         0
2014-10-22 09:00:00    7      0         0
2014-10-22 09:15:00    7      1         1
2014-10-22 09:30:00   22      0         1
2014-10-22 09:45:00   24      0         1
2014-10-22 10:00:00   30      0         1
2014-10-22 10:15:00   26      0         1
2014-10-22 10:30:00   23      0         0
2014-10-22 10:45:00   27      0         0
2014-10-22 11:00:00   30      0         0
2014-10-23 08:45:00  -13      0         0
2014-10-23 09:00:00  -18      0         0
2014-10-23 09:15:00  -18      0         0
2014-10-23 09:30:00   -3      0         0
2014-10-23 09:45:00   -1      0         0
2014-10-23 10:00:00    5      0         0
2014-10-23 10:15:00    1      0         0
2014-10-23 10:30:00   -2      0         0
2014-10-23 10:45:00    2      0         0
2014-10-23 11:00:00    5      0         0