如何在一天中的某个时间引用一个值来创建二进制向量 (xts)
How to reference a value at time in day to create a binary vector (xts)
有一个包含 15 分钟间隔数据的 xts 对象。每天在 930,我对 900 和 915 周期的值取平均值,如果平均值 > 5,我在 1s 和 0s BinVect 的向量中记录一个 1。下面的脚本实现了这一点:
library('xts')
XTS1 <- structure(c(12, 7, 7, 22, 24, 30, 26, 23, 27, 30), .indexCLASS = c("POSIXct", "POSIXt"), .indexTZ = "", tclass = c("POSIXct", "POSIXt"), tzone = "", class = c("xts", "zoo"), .CLASS = structure("double", class = "CLASS"), formattable = structure(list(formatter = "formatC", format = structure(list(format = "f", digits = 2), .Names = c("format", "digits")), preproc = "percent_preproc", postproc = "percent_postproc"), .Names = c("formatter", "format", "preproc", "postproc")), index = structure(c(1413981900, 1413982800, 1413983700, 1413984600, 1413985500, 1413986400, 1413987300, 1413988200, 1413989100, 1413990000), tzone = "", tclass = c("POSIXct", "POSIXt")), .Dim = c(10L, 1L))
AvgPrior2 <- lag((XTS1 + lag(XTS1)) / 2)
BinVect <- ifelse(AvgPrior2 > 5, 1, 0) & .indexhour(XTS1) == 9 & .indexmin(XTS1) == 30
XTS1 <- merge.xts(XTS1,AvgPrior2,BinVect)
#Output:
XTS1 AvgPrior2 BinVect
2014-10-22 08:45:00 12 NA 0
2014-10-22 09:00:00 7 NA 0
2014-10-22 09:15:00 7 9.5 0
2014-10-22 09:30:00 22 7.0 1
2014-10-22 09:45:00 24 14.5 0
2014-10-22 10:00:00 30 23.0 0
2014-10-22 10:15:00 26 27.0 0
2014-10-22 10:30:00 23 28.0 0
2014-10-22 10:45:00 27 24.5 0
2014-10-22 11:00:00 30 25.0 0
现在是我无法弄清楚的部分:想创建一个新向量,每天在 09:30:00 和 10:15:00 之间的任何时候都有 1 BinVect==1 @ 09:30:00。认为需要使用函数apply.daily
,问题是如何。
#Desired Output:
XTS1 AvgPrior2 BinVect NewBinVect
2014-10-22 08:45:00 12 NA 0 0
2014-10-22 09:00:00 7 NA 0 0
2014-10-22 09:15:00 7 9.5 0 0
2014-10-22 09:30:00 22 7.0 1 1
2014-10-22 09:45:00 24 14.5 0 1
2014-10-22 10:00:00 30 23.0 0 1
2014-10-22 10:15:00 26 27.0 0 1
2014-10-22 10:30:00 23 28.0 0 0
2014-10-22 10:45:00 27 24.5 0 0
2014-10-22 11:00:00 30 25.0 0 0
#This is the closest I got to getting anywhere (out of several trials):
XTS1$NewBinVect <- apply.daily(BinVect, function(x) ifelse(x == 1, 1, 0))
#Think it says if BinVect=1 on a given day => NewBinVect=1 on that day (but doesn't do that)
我想我一开始误解了你的问题。这段代码应该让你在每一天的 9:30 和 10:15 之间的时间戳处获得一个包含 9:30 BinVect 值的列。如果您发现它对其他应用程序有用,我将在下面保留我之前编写的代码。
library('xts')
XTS1 <- structure(c(12, 7, 7, 22, 24, 30, 26, 23, 27, 30), .indexCLASS = c("POSIXct", "POSIXt"), .indexTZ = "", tclass = c("POSIXct", "POSIXt"), tzone = "", class = c("xts", "zoo"), .CLASS = structure("double", class = "CLASS"), formattable = structure(list(formatter = "formatC", format = structure(list(format = "f", digits = 2), .Names = c("format", "digits")), preproc = "percent_preproc", postproc = "percent_postproc"), .Names = c("formatter", "format", "preproc", "postproc")), index = structure(c(1413981900, 1413982800, 1413983700, 1413984600, 1413985500, 1413986400, 1413987300, 1413988200, 1413989100, 1413990000), tzone = "", tclass = c("POSIXct", "POSIXt")), .Dim = c(10L, 1L))
# Set XTS1 column name
colnames(XTS1) <- "XTS1"
# Create empty AvgPrior2 column
XTS1$AvgPrior2 <- NA
# Get daily XTS1 Averages
XTS1$AvgPrior2 <- as.vector(t(apply.daily(XTS1, function(x) as.vector(lag((x$XTS1 + lag(x$XTS1)) / 2)))))
# Create BinVect column filled with zeros
XTS1$BinVect <- 0
# Assign value of 1 to BinVect rows at 9:30 time stamps if AvgPrior2 > 5
XTS1['T09:30/T09:30:59', "BinVect"] <- ifelse(XTS1['T09:30/T09:30:59', "AvgPrior2"] > 5, 1, 0)
# Create NewBinVect column filled with zeros
XTS1$NewBinVect <- 0
# Assign 1 to timestamps between 9:30 and 10:15 if BinVect value for that day is 1
XTS1$NewBinVect <- as.vector(t(apply.daily(XTS1, function(x){
x['T09:30/T10:15', "NewBinVect"] <- as.numeric(x[.indexhour(x) == 9 & .indexmin(x) == 30]$BinVect)
return(as.vector(x$NewBinVect))
})))
前一个代码
library('xts')
XTS1 <- structure(c(12, 7, 7, 22, 24, 30, 26, 23, 27, 30), .indexCLASS = c("POSIXct", "POSIXt"), .indexTZ = "", tclass = c("POSIXct", "POSIXt"), tzone = "", class = c("xts", "zoo"), .CLASS = structure("double", class = "CLASS"), formattable = structure(list(formatter = "formatC", format = structure(list(format = "f", digits = 2), .Names = c("format", "digits")), preproc = "percent_preproc", postproc = "percent_postproc"), .Names = c("formatter", "format", "preproc", "postproc")), index = structure(c(1413981900, 1413982800, 1413983700, 1413984600, 1413985500, 1413986400, 1413987300, 1413988200, 1413989100, 1413990000), tzone = "", tclass = c("POSIXct", "POSIXt")), .Dim = c(10L, 1L))
colnames(XTS1) <- "XTS1"
XTS1$AvgPrior2 <- lag((XTS1 + lag(XTS1)) / 2)
XTS1$BinVect <- ifelse(XTS1$AvgPrior2 > 5, 1, 0) & .indexhour(XTS1) == 9 & .indexmin(XTS1) == 30
XTS1$BinVect2 <- ifelse(XTS1$AvgPrior2 > 5, 1, 0) & .indexhour(XTS1) == 9 & .indexmin(XTS1) == 30
h.m.combos <- data.frame(hour = c(rep(9,4), rep(10,4)), min = c(rep(c(0,15,30,45),2)))
BinVects <- mapply(function(h,m){
apply.daily(XTS1, function(x){
val <- ifelse(!is.na(x$AvgPrior2) & x$AvgPrior2 > 5 & .indexhour(x) == h & .indexmin(x) == m, 1, 0)
val <- as.vector(val$AvgPrior2)
names(val) <- paste0(h,m)
return(val)
})
},
h.m.combos$hour,
h.m.combos$min)
colnames(BinVects) <- paste(h.m.combos$hour, h.m.combos$min, sep = ":")
XTS1 <- cbind(XTS1, BinVects)
您也可以使用 xts 的时间子集来执行此操作。首先,一些可重现示例的数据:
x1 <- structure(c(12, 7, 7, 22, 24, 30, 26, 23, 27, 30),
.indexCLASS = c("POSIXct", "POSIXt"), .indexTZ = "America/New_York",
tclass = c("POSIXct", "POSIXt"), tzone = "America/New_York",
class = c("xts", "zoo"), index = structure(c(1413981900, 1413982800, 1413983700,
1413984600, 1413985500, 1413986400, 1413987300, 1413988200, 1413989100,
1413990000), tzone = "America/New_York", tclass = c("POSIXct", "POSIXt")),
.Dim = c(10L, 1L))
x2 <- x1 - 25
.index(x2) <- .index(x2) + 86400
XTS1 <- rbind(x1, x2)
colnames(XTS1) <- "XTS1"
现在,您可以使用时间子集来仅提取要在均值计算中使用的时间段。 (请注意,小于 2 位的时间需要前导零。)然后您可以使用 apply.daily
计算每天的平均值。
XTS1$BinVec <- apply.daily(XTS1["T0900/T0915"], mean) > 5
当您使用 $<-
函数向 xts 对象添加列时,它会隐式地将原始对象与在右侧创建的对象合并。默认情况下,merge.xts
填充 NA
,这就是为什么 BinVec
中的大多数观察结果是 NA
.
现在您有了 BinVec
,您可以再次使用时间子集来提取要用 BinVec
的值填充的时间段。因此,您可以提取 09:15-10:15 之间的所有观察值,并用 09:15.
处的观察值填充 NA
XTS1$NewBinVec <- na.locf(XTS1["T0915/T1015", "BinVec"])
现在 XTS1
看起来像这样(同样,所有 NA
都来自隐式合并):
XTS1 BinVec NewBinVec
2014-10-22 08:45:00 12 NA NA
2014-10-22 09:00:00 7 NA NA
2014-10-22 09:15:00 7 1 1
2014-10-22 09:30:00 22 NA 1
2014-10-22 09:45:00 24 NA 1
2014-10-22 10:00:00 30 NA 1
2014-10-22 10:15:00 26 NA 1
2014-10-22 10:30:00 23 NA NA
2014-10-22 10:45:00 27 NA NA
2014-10-22 11:00:00 30 NA NA
2014-10-23 08:45:00 -13 NA NA
2014-10-23 09:00:00 -18 NA NA
2014-10-23 09:15:00 -18 0 0
2014-10-23 09:30:00 -3 NA 0
2014-10-23 09:45:00 -1 NA 0
2014-10-23 10:00:00 5 NA 0
2014-10-23 10:15:00 1 NA 0
2014-10-23 10:30:00 -2 NA NA
2014-10-23 10:45:00 2 NA NA
2014-10-23 11:00:00 5 NA NA
如果需要,您可以将所有 NA
填充为 0
。
XTS1[is.na(XTS1)] <- 0
XTS1
XTS1 BinVec NewBinVec
2014-10-22 08:45:00 12 0 0
2014-10-22 09:00:00 7 0 0
2014-10-22 09:15:00 7 1 1
2014-10-22 09:30:00 22 0 1
2014-10-22 09:45:00 24 0 1
2014-10-22 10:00:00 30 0 1
2014-10-22 10:15:00 26 0 1
2014-10-22 10:30:00 23 0 0
2014-10-22 10:45:00 27 0 0
2014-10-22 11:00:00 30 0 0
2014-10-23 08:45:00 -13 0 0
2014-10-23 09:00:00 -18 0 0
2014-10-23 09:15:00 -18 0 0
2014-10-23 09:30:00 -3 0 0
2014-10-23 09:45:00 -1 0 0
2014-10-23 10:00:00 5 0 0
2014-10-23 10:15:00 1 0 0
2014-10-23 10:30:00 -2 0 0
2014-10-23 10:45:00 2 0 0
2014-10-23 11:00:00 5 0 0
有一个包含 15 分钟间隔数据的 xts 对象。每天在 930,我对 900 和 915 周期的值取平均值,如果平均值 > 5,我在 1s 和 0s BinVect 的向量中记录一个 1。下面的脚本实现了这一点:
library('xts')
XTS1 <- structure(c(12, 7, 7, 22, 24, 30, 26, 23, 27, 30), .indexCLASS = c("POSIXct", "POSIXt"), .indexTZ = "", tclass = c("POSIXct", "POSIXt"), tzone = "", class = c("xts", "zoo"), .CLASS = structure("double", class = "CLASS"), formattable = structure(list(formatter = "formatC", format = structure(list(format = "f", digits = 2), .Names = c("format", "digits")), preproc = "percent_preproc", postproc = "percent_postproc"), .Names = c("formatter", "format", "preproc", "postproc")), index = structure(c(1413981900, 1413982800, 1413983700, 1413984600, 1413985500, 1413986400, 1413987300, 1413988200, 1413989100, 1413990000), tzone = "", tclass = c("POSIXct", "POSIXt")), .Dim = c(10L, 1L))
AvgPrior2 <- lag((XTS1 + lag(XTS1)) / 2)
BinVect <- ifelse(AvgPrior2 > 5, 1, 0) & .indexhour(XTS1) == 9 & .indexmin(XTS1) == 30
XTS1 <- merge.xts(XTS1,AvgPrior2,BinVect)
#Output:
XTS1 AvgPrior2 BinVect
2014-10-22 08:45:00 12 NA 0
2014-10-22 09:00:00 7 NA 0
2014-10-22 09:15:00 7 9.5 0
2014-10-22 09:30:00 22 7.0 1
2014-10-22 09:45:00 24 14.5 0
2014-10-22 10:00:00 30 23.0 0
2014-10-22 10:15:00 26 27.0 0
2014-10-22 10:30:00 23 28.0 0
2014-10-22 10:45:00 27 24.5 0
2014-10-22 11:00:00 30 25.0 0
现在是我无法弄清楚的部分:想创建一个新向量,每天在 09:30:00 和 10:15:00 之间的任何时候都有 1 BinVect==1 @ 09:30:00。认为需要使用函数apply.daily
,问题是如何。
#Desired Output:
XTS1 AvgPrior2 BinVect NewBinVect
2014-10-22 08:45:00 12 NA 0 0
2014-10-22 09:00:00 7 NA 0 0
2014-10-22 09:15:00 7 9.5 0 0
2014-10-22 09:30:00 22 7.0 1 1
2014-10-22 09:45:00 24 14.5 0 1
2014-10-22 10:00:00 30 23.0 0 1
2014-10-22 10:15:00 26 27.0 0 1
2014-10-22 10:30:00 23 28.0 0 0
2014-10-22 10:45:00 27 24.5 0 0
2014-10-22 11:00:00 30 25.0 0 0
#This is the closest I got to getting anywhere (out of several trials):
XTS1$NewBinVect <- apply.daily(BinVect, function(x) ifelse(x == 1, 1, 0))
#Think it says if BinVect=1 on a given day => NewBinVect=1 on that day (but doesn't do that)
我想我一开始误解了你的问题。这段代码应该让你在每一天的 9:30 和 10:15 之间的时间戳处获得一个包含 9:30 BinVect 值的列。如果您发现它对其他应用程序有用,我将在下面保留我之前编写的代码。
library('xts')
XTS1 <- structure(c(12, 7, 7, 22, 24, 30, 26, 23, 27, 30), .indexCLASS = c("POSIXct", "POSIXt"), .indexTZ = "", tclass = c("POSIXct", "POSIXt"), tzone = "", class = c("xts", "zoo"), .CLASS = structure("double", class = "CLASS"), formattable = structure(list(formatter = "formatC", format = structure(list(format = "f", digits = 2), .Names = c("format", "digits")), preproc = "percent_preproc", postproc = "percent_postproc"), .Names = c("formatter", "format", "preproc", "postproc")), index = structure(c(1413981900, 1413982800, 1413983700, 1413984600, 1413985500, 1413986400, 1413987300, 1413988200, 1413989100, 1413990000), tzone = "", tclass = c("POSIXct", "POSIXt")), .Dim = c(10L, 1L))
# Set XTS1 column name
colnames(XTS1) <- "XTS1"
# Create empty AvgPrior2 column
XTS1$AvgPrior2 <- NA
# Get daily XTS1 Averages
XTS1$AvgPrior2 <- as.vector(t(apply.daily(XTS1, function(x) as.vector(lag((x$XTS1 + lag(x$XTS1)) / 2)))))
# Create BinVect column filled with zeros
XTS1$BinVect <- 0
# Assign value of 1 to BinVect rows at 9:30 time stamps if AvgPrior2 > 5
XTS1['T09:30/T09:30:59', "BinVect"] <- ifelse(XTS1['T09:30/T09:30:59', "AvgPrior2"] > 5, 1, 0)
# Create NewBinVect column filled with zeros
XTS1$NewBinVect <- 0
# Assign 1 to timestamps between 9:30 and 10:15 if BinVect value for that day is 1
XTS1$NewBinVect <- as.vector(t(apply.daily(XTS1, function(x){
x['T09:30/T10:15', "NewBinVect"] <- as.numeric(x[.indexhour(x) == 9 & .indexmin(x) == 30]$BinVect)
return(as.vector(x$NewBinVect))
})))
前一个代码
library('xts')
XTS1 <- structure(c(12, 7, 7, 22, 24, 30, 26, 23, 27, 30), .indexCLASS = c("POSIXct", "POSIXt"), .indexTZ = "", tclass = c("POSIXct", "POSIXt"), tzone = "", class = c("xts", "zoo"), .CLASS = structure("double", class = "CLASS"), formattable = structure(list(formatter = "formatC", format = structure(list(format = "f", digits = 2), .Names = c("format", "digits")), preproc = "percent_preproc", postproc = "percent_postproc"), .Names = c("formatter", "format", "preproc", "postproc")), index = structure(c(1413981900, 1413982800, 1413983700, 1413984600, 1413985500, 1413986400, 1413987300, 1413988200, 1413989100, 1413990000), tzone = "", tclass = c("POSIXct", "POSIXt")), .Dim = c(10L, 1L))
colnames(XTS1) <- "XTS1"
XTS1$AvgPrior2 <- lag((XTS1 + lag(XTS1)) / 2)
XTS1$BinVect <- ifelse(XTS1$AvgPrior2 > 5, 1, 0) & .indexhour(XTS1) == 9 & .indexmin(XTS1) == 30
XTS1$BinVect2 <- ifelse(XTS1$AvgPrior2 > 5, 1, 0) & .indexhour(XTS1) == 9 & .indexmin(XTS1) == 30
h.m.combos <- data.frame(hour = c(rep(9,4), rep(10,4)), min = c(rep(c(0,15,30,45),2)))
BinVects <- mapply(function(h,m){
apply.daily(XTS1, function(x){
val <- ifelse(!is.na(x$AvgPrior2) & x$AvgPrior2 > 5 & .indexhour(x) == h & .indexmin(x) == m, 1, 0)
val <- as.vector(val$AvgPrior2)
names(val) <- paste0(h,m)
return(val)
})
},
h.m.combos$hour,
h.m.combos$min)
colnames(BinVects) <- paste(h.m.combos$hour, h.m.combos$min, sep = ":")
XTS1 <- cbind(XTS1, BinVects)
您也可以使用 xts 的时间子集来执行此操作。首先,一些可重现示例的数据:
x1 <- structure(c(12, 7, 7, 22, 24, 30, 26, 23, 27, 30),
.indexCLASS = c("POSIXct", "POSIXt"), .indexTZ = "America/New_York",
tclass = c("POSIXct", "POSIXt"), tzone = "America/New_York",
class = c("xts", "zoo"), index = structure(c(1413981900, 1413982800, 1413983700,
1413984600, 1413985500, 1413986400, 1413987300, 1413988200, 1413989100,
1413990000), tzone = "America/New_York", tclass = c("POSIXct", "POSIXt")),
.Dim = c(10L, 1L))
x2 <- x1 - 25
.index(x2) <- .index(x2) + 86400
XTS1 <- rbind(x1, x2)
colnames(XTS1) <- "XTS1"
现在,您可以使用时间子集来仅提取要在均值计算中使用的时间段。 (请注意,小于 2 位的时间需要前导零。)然后您可以使用 apply.daily
计算每天的平均值。
XTS1$BinVec <- apply.daily(XTS1["T0900/T0915"], mean) > 5
当您使用 $<-
函数向 xts 对象添加列时,它会隐式地将原始对象与在右侧创建的对象合并。默认情况下,merge.xts
填充 NA
,这就是为什么 BinVec
中的大多数观察结果是 NA
.
现在您有了 BinVec
,您可以再次使用时间子集来提取要用 BinVec
的值填充的时间段。因此,您可以提取 09:15-10:15 之间的所有观察值,并用 09:15.
NA
XTS1$NewBinVec <- na.locf(XTS1["T0915/T1015", "BinVec"])
现在 XTS1
看起来像这样(同样,所有 NA
都来自隐式合并):
XTS1 BinVec NewBinVec
2014-10-22 08:45:00 12 NA NA
2014-10-22 09:00:00 7 NA NA
2014-10-22 09:15:00 7 1 1
2014-10-22 09:30:00 22 NA 1
2014-10-22 09:45:00 24 NA 1
2014-10-22 10:00:00 30 NA 1
2014-10-22 10:15:00 26 NA 1
2014-10-22 10:30:00 23 NA NA
2014-10-22 10:45:00 27 NA NA
2014-10-22 11:00:00 30 NA NA
2014-10-23 08:45:00 -13 NA NA
2014-10-23 09:00:00 -18 NA NA
2014-10-23 09:15:00 -18 0 0
2014-10-23 09:30:00 -3 NA 0
2014-10-23 09:45:00 -1 NA 0
2014-10-23 10:00:00 5 NA 0
2014-10-23 10:15:00 1 NA 0
2014-10-23 10:30:00 -2 NA NA
2014-10-23 10:45:00 2 NA NA
2014-10-23 11:00:00 5 NA NA
如果需要,您可以将所有 NA
填充为 0
。
XTS1[is.na(XTS1)] <- 0
XTS1
XTS1 BinVec NewBinVec
2014-10-22 08:45:00 12 0 0
2014-10-22 09:00:00 7 0 0
2014-10-22 09:15:00 7 1 1
2014-10-22 09:30:00 22 0 1
2014-10-22 09:45:00 24 0 1
2014-10-22 10:00:00 30 0 1
2014-10-22 10:15:00 26 0 1
2014-10-22 10:30:00 23 0 0
2014-10-22 10:45:00 27 0 0
2014-10-22 11:00:00 30 0 0
2014-10-23 08:45:00 -13 0 0
2014-10-23 09:00:00 -18 0 0
2014-10-23 09:15:00 -18 0 0
2014-10-23 09:30:00 -3 0 0
2014-10-23 09:45:00 -1 0 0
2014-10-23 10:00:00 5 0 0
2014-10-23 10:15:00 1 0 0
2014-10-23 10:30:00 -2 0 0
2014-10-23 10:45:00 2 0 0
2014-10-23 11:00:00 5 0 0