按生成 NA 的日期合并数据帧
Merge data frames by date generating NA
我正在学习 R,目前正在尝试填充缺少日期和 NA 值的数据框。
数据样本:
Date <- c("23-01-19", "24-01-19", "25-01-19", "30-01-19", "31-01-19" )
Open <- c("69.849998", "69.440002", "69.540001", "70.32", "69.559998")
High <- c("69.849998", "69.440002", "69.540001", "70.32", "69.559998")
Low <- c("69.849998", "69.440002", "69.540001", "70.32", "69.559998")
Close <- c("69.849998", "69.440002", "69.540001", "70.32", "69.559998")
Adj_Close <- c("69.849998", "69.440002", "69.540001", "70.32", "69.559998")
Volume <- c("0", "0", "0", "0","0")
InvescoDf <- data.frame(Date, Open, High, Low, Close, Adj_Close, Volume)
我正在尝试:
library(tidyverse)
library(zoo)
df <- InvescoDf
df$Date <- as.Date(df$Date, "%d-%m-%y") # assign as Date
df$Date<-as.POSIXlt(df$Date,format="%Y-%m-%d") # assign as POSIXlt
df1.zoo<-zoo(df[,-1],df[,1]) # assign Date as index
df2.zoo<-zoo(,seq(start(df1.zoo),end(df1.zoo),by="day")) # create data sequence
df2 <- merge(df1.zoo,df2.zoo, all=TRUE) # merge
Error :
Warning message:
In merge.zoo(df1.zoo, df2.zoo, all = TRUE) :
Index vectors are of different classes: POSIXlt POSIXct
显然 seq()
创建了一个 POSIXct
,但我只需要天数而不是小时数。我不太了解 zoo 对象,可能是有错误。请帮助并告诉我您需要什么进一步的信息。
编辑:
现在我正在尝试遍历多个 dfs,有人可以帮忙吗?
OssiamDf <- InvescoDf
new_list <- list(InvescoDf, OssiamDf)
new_list <- lapply(new_list, function(dat) {
# change all to date
dat[[1]] <- as.Date(dat3[[1]], "%d-%m-%y")
# change the other variables to num
dat[-1] <- lapply(dat[-1], function(x) as.numeric(as.character(x)))
# complete the dates?
dat[[1]] <- lapply(dat[[1]], complete(dat[[1]],
Date = seq(min(dat[[1]]), max(dat[[1]]), by = "day")))
dat
})
我不知道如何把complete
状态变成lapply
请帮助。
您可以将 Date
转换为日期对象,然后使用 tidyr
中的 complete
来填充缺失的日期。
library(dplyr)
library(tidyr)
InvescoDf %>%
mutate(Date = as.Date(Date, "%d-%m-%y")) %>%
complete(Date = seq(min(Date), max(Date), by = "day"))
# Date Open High Low Close Adj_Close Volume
# <date> <dbl> <dbl> <dbl> <dbl> <dbl> <int>
#1 2019-01-23 69.8 69.8 69.8 69.8 69.8 0
#2 2019-01-24 69.4 69.4 69.4 69.4 69.4 0
#3 2019-01-25 69.5 69.5 69.5 69.5 69.5 0
#4 2019-01-26 NA NA NA NA NA NA
#5 2019-01-27 NA NA NA NA NA NA
#6 2019-01-28 NA NA NA NA NA NA
#7 2019-01-29 NA NA NA NA NA NA
#8 2019-01-30 70.3 70.3 70.3 70.3 70.3 0
#9 2019-01-31 69.6 69.6 69.6 69.6 69.6 0
要对列表中的多个数据帧执行此操作,我们可以这样做
new_list <- lapply(new_list, function(dat) {
dat[[1]] <- as.Date(dat[[1]], "%d-%m-%y")
# change the other variables to num
dat[-1] <- lapply(dat[-1], function(x) as.numeric(as.character(x)))
# complete the dates?
dat <- complete(dat, Date = seq(min(Date), max(Date), by = "day"))
#OR
#dat <- complete(dat, Date = seq(min(dat[[1]]), max(dat[[1]]), by = "day"))
dat
})
数据
InvescoDf <- type.convert(InvescoDf)
我正在学习 R,目前正在尝试填充缺少日期和 NA 值的数据框。
数据样本:
Date <- c("23-01-19", "24-01-19", "25-01-19", "30-01-19", "31-01-19" )
Open <- c("69.849998", "69.440002", "69.540001", "70.32", "69.559998")
High <- c("69.849998", "69.440002", "69.540001", "70.32", "69.559998")
Low <- c("69.849998", "69.440002", "69.540001", "70.32", "69.559998")
Close <- c("69.849998", "69.440002", "69.540001", "70.32", "69.559998")
Adj_Close <- c("69.849998", "69.440002", "69.540001", "70.32", "69.559998")
Volume <- c("0", "0", "0", "0","0")
InvescoDf <- data.frame(Date, Open, High, Low, Close, Adj_Close, Volume)
我正在尝试:
library(tidyverse)
library(zoo)
df <- InvescoDf
df$Date <- as.Date(df$Date, "%d-%m-%y") # assign as Date
df$Date<-as.POSIXlt(df$Date,format="%Y-%m-%d") # assign as POSIXlt
df1.zoo<-zoo(df[,-1],df[,1]) # assign Date as index
df2.zoo<-zoo(,seq(start(df1.zoo),end(df1.zoo),by="day")) # create data sequence
df2 <- merge(df1.zoo,df2.zoo, all=TRUE) # merge
Error : Warning message: In merge.zoo(df1.zoo, df2.zoo, all = TRUE) : Index vectors are of different classes: POSIXlt POSIXct
显然 seq()
创建了一个 POSIXct
,但我只需要天数而不是小时数。我不太了解 zoo 对象,可能是有错误。请帮助并告诉我您需要什么进一步的信息。
编辑:
现在我正在尝试遍历多个 dfs,有人可以帮忙吗?
OssiamDf <- InvescoDf
new_list <- list(InvescoDf, OssiamDf)
new_list <- lapply(new_list, function(dat) {
# change all to date
dat[[1]] <- as.Date(dat3[[1]], "%d-%m-%y")
# change the other variables to num
dat[-1] <- lapply(dat[-1], function(x) as.numeric(as.character(x)))
# complete the dates?
dat[[1]] <- lapply(dat[[1]], complete(dat[[1]],
Date = seq(min(dat[[1]]), max(dat[[1]]), by = "day")))
dat
})
我不知道如何把complete
状态变成lapply
请帮助。
您可以将 Date
转换为日期对象,然后使用 tidyr
中的 complete
来填充缺失的日期。
library(dplyr)
library(tidyr)
InvescoDf %>%
mutate(Date = as.Date(Date, "%d-%m-%y")) %>%
complete(Date = seq(min(Date), max(Date), by = "day"))
# Date Open High Low Close Adj_Close Volume
# <date> <dbl> <dbl> <dbl> <dbl> <dbl> <int>
#1 2019-01-23 69.8 69.8 69.8 69.8 69.8 0
#2 2019-01-24 69.4 69.4 69.4 69.4 69.4 0
#3 2019-01-25 69.5 69.5 69.5 69.5 69.5 0
#4 2019-01-26 NA NA NA NA NA NA
#5 2019-01-27 NA NA NA NA NA NA
#6 2019-01-28 NA NA NA NA NA NA
#7 2019-01-29 NA NA NA NA NA NA
#8 2019-01-30 70.3 70.3 70.3 70.3 70.3 0
#9 2019-01-31 69.6 69.6 69.6 69.6 69.6 0
要对列表中的多个数据帧执行此操作,我们可以这样做
new_list <- lapply(new_list, function(dat) {
dat[[1]] <- as.Date(dat[[1]], "%d-%m-%y")
# change the other variables to num
dat[-1] <- lapply(dat[-1], function(x) as.numeric(as.character(x)))
# complete the dates?
dat <- complete(dat, Date = seq(min(Date), max(Date), by = "day"))
#OR
#dat <- complete(dat, Date = seq(min(dat[[1]]), max(dat[[1]]), by = "day"))
dat
})
数据
InvescoDf <- type.convert(InvescoDf)