新列:在 date = date 时传递值
New column: transfer values when date = date
我有两个数据框。两者都有一个日期列。 DF A 大约有 14000 行,包含每日 returns。 DF B 大约有 600 行,包含相应月份的平均每日 CPI。所以在 DF B 中只有日期列的 %m-%Y 是决定性的。这是一个简短的示例,数据框的样子:
DF A:
Date Returns
<date> <dbl>
y-m-d
1965-01-06 1.0049728
1965-01-07 1.0081001
1965-01-08 1.0094975
1965-01-12 1.0016388
1965-01-13 0.9946541
1965-01-14 1.0060906
1965-01-18 0.9860844
1965-01-19 0.9989457
1965-01-20 0.9988501
1965-01-21 1.0012444
1965-01-22 0.9931840
1965-01-25 0.9903565
1965-01-26 0.9986629
1965-01-27 1.0081077
1965-01-28 0.9912110
1965-01-29 0.9960394
1965-02-01 1.0004506
1965-02-02 0.9967118
1965-02-03 1.0074635
1965-02-04 1.0108144
# … with 14,065 more rows
DF B
DATE CPI
y-m-d
1 1965-01-01 1.000275
2 1965-02-01 1.000271
3 1965-03-01 1.000278
4 1965-04-01 1.000318
5 1965-05-01 1.000274
6 1965-06-01 1.000274
7 1965-07-01 1.000276
8 1965-08-01 1.000252
9 1965-09-01 1.000257
10 1965-10-01 1.000236
11 1965-11-01 1.000229
# … with 676 more rows
我想相互计算两个 DF。为此,我想在 DF A 中创建一个新列,其中为 XY 月的所有日期输入来自 DF B 的相应值。如下:
输出
Date Returns OUTPUT = CPI from data frame B
<date> <dbl> <dbl>
y-m-d
1965-01-06 1.0049728 1.000275
1965-01-07 1.0081001 1.000275
1965-01-08 1.0094975 1.000275
1965-01-12 1.0016388 1.000275
1965-01-13 0.9946541 1.000275
1965-01-14 1.0060906 1.000275
1965-01-18 0.9860844 1.000275
1965-01-19 0.9989457 1.000275
1965-01-20 0.9988501 1.000275
1965-01-21 1.0012444 1.000275
.............................
1965-02-01 1.0004506 1.000271
1965-02-02 0.9967118 1.000271
1965-02-03 1.0074635 1.000271
1965-02-04 1.0108144 1.000271
.............................
1965-03-01 0.9945527 1.000278
1965-03-02 0.9967290 1.000278
1965-03-03 1.0043810 1.000278
1965-03-04 1.0043043 1.000278
# … with 14,065 more rows
我假设这是可行的,参考日期列的月份和年份。但是,我是R的新手,不知道如何实现。
到目前为止,这是我的代码。
#DF A
library(quantmod)
library(mosaic)
library(dplyr)
library(xts)
data.N225 <- getSymbols("^N225",from="1965-01-01", to="2022-02-30", auto.assign=FALSE, src='yahoo') # funktion getSymbols wenn wir Kapitalmarkt haben wollten
class(data.N225)
data.N225[c(1:3, nrow(data.N225)),]
#View(data.N225)
data.N225<- na.omit(data.N225)
N225 <- data.N225[,6]
N225$Returns<- diff(log(N225$N225.Adjusted))+1
N225[c(1:3,nrow(N225)),]
options(digits=5)
N225.stetig <- N225[-1,2]
N225.stetig[c(1:3,nrow(N225.stetig)),]
DF_A <- as_tibble(data.frame(Date = index(N225.stetig), coredata(N225.stetig)))
View(DF_A)
#################
#DF B
setwd("enter file path")
CPI<- read.csv("InflationJapan.csv")
#View(CPI)
CPI$percent <- CPI[,2]/100
CPI$CPI.umsk <- (CPI[,3]/250)
CPI$CPI <- CPI[,4]+1
class(CPI$DATE)
CPI$DATE <- as.Date.character(CPI$DATE, tryFormats = c("%Y-%m-%d"))
class(CPI$DATE)
DF<- as_tibble(data.frame(Date = index(CPI$DATE), coredata(CPI)))
DF_B <- DF[,c(2,6)]
options(digits=7)
View(DF_B)
CSV文件可以从以下link.
下载
https://drive.google.com/drive/folders/1GUlce4wR_lWiCbJivwDadgDYaz2ir0_L?usp=sharing
希望有人能帮助我。非常感谢!
下面我提供了一个示例,说明如何将 Y-M 的列添加到每个数据帧,然后在循环中将 CPI 输出从数据帧 B 匹配到数据帧 A。
希望对您有所帮助!
library(lubridate)
# read in a sample of dataframe A data
dfA <- read.csv("DFA.csv")
# add column with y-m, add empty column for Output
dfA <- dfA %>%
mutate(YearMonth = format(as.Date(dfA$Date, format = "%Y-%m-%d"), "%Y-%m")) %>%
mutate(Output = NA)
dfA
Date Returns YearMonth Output
1 1965-01-06 1.0049728 1965-01 NA
2 1965-01-07 1.0081001 1965-01 NA
3 1965-01-08 1.0094975 1965-01 NA
4 1965-01-12 1.0016388 1965-01 NA
5 1965-01-13 0.9946541 1965-01 NA
6 1965-01-14 1.0060906 1965-01 NA
7 1965-02-01 1.0004506 1965-02 NA
8 1965-02-02 0.9967118 1965-02 NA
9 1965-02-03 1.0074635 1965-02 NA
10 1965-02-04 1.0108144 1965-02 NA
11 1965-03-01 0.9945527 1965-03 NA
12 1965-03-02 0.9967290 1965-03 NA
dfB <- read.csv("InflationJapan.csv")
# add column with y-m
dfB <- dfB %>%
mutate(YearMonth = format(as.Date(dfB$Date, format = "%m/%d/%Y"), "%Y-%m"))
head(dfB)
Date CPI YearMonth
1 1/1/1965 6.870229 1965-01
2 2/1/1965 6.768559 1965-02
3 3/1/1965 6.956522 1965-03
4 4/1/1965 7.948443 1965-04
5 5/1/1965 6.844920 1965-05
6 6/1/1965 6.859593 1965-06
# Add CPI per month/ year to dataframe A
for (i in 1:nrow(dfA)) { # for each row in dataframe A
for (j in 1:nrow(dfB)) { # for each row in dataframe B
if (dfA$YearMonth[i] == dfB$YearMonth[j]) { # if year and month match
dfA$Output[i] <- dfB$CPI[j] # add CPI value from dataframe B to dataframe A
}
}
}
dfA
Date Returns YearMonth Output
1 1965-01-06 1.0049728 1965-01 6.870229
2 1965-01-07 1.0081001 1965-01 6.870229
3 1965-01-08 1.0094975 1965-01 6.870229
4 1965-01-12 1.0016388 1965-01 6.870229
5 1965-01-13 0.9946541 1965-01 6.870229
6 1965-01-14 1.0060906 1965-01 6.870229
7 1965-02-01 1.0004506 1965-02 6.768559
8 1965-02-02 0.9967118 1965-02 6.768559
9 1965-02-03 1.0074635 1965-02 6.768559
10 1965-02-04 1.0108144 1965-02 6.768559
11 1965-03-01 0.9945527 1965-03 6.956522
12 1965-03-02 0.9967290 1965-03 6.956522
我有两个数据框。两者都有一个日期列。 DF A 大约有 14000 行,包含每日 returns。 DF B 大约有 600 行,包含相应月份的平均每日 CPI。所以在 DF B 中只有日期列的 %m-%Y 是决定性的。这是一个简短的示例,数据框的样子:
DF A:
Date Returns
<date> <dbl>
y-m-d
1965-01-06 1.0049728
1965-01-07 1.0081001
1965-01-08 1.0094975
1965-01-12 1.0016388
1965-01-13 0.9946541
1965-01-14 1.0060906
1965-01-18 0.9860844
1965-01-19 0.9989457
1965-01-20 0.9988501
1965-01-21 1.0012444
1965-01-22 0.9931840
1965-01-25 0.9903565
1965-01-26 0.9986629
1965-01-27 1.0081077
1965-01-28 0.9912110
1965-01-29 0.9960394
1965-02-01 1.0004506
1965-02-02 0.9967118
1965-02-03 1.0074635
1965-02-04 1.0108144
# … with 14,065 more rows
DF B
DATE CPI
y-m-d
1 1965-01-01 1.000275
2 1965-02-01 1.000271
3 1965-03-01 1.000278
4 1965-04-01 1.000318
5 1965-05-01 1.000274
6 1965-06-01 1.000274
7 1965-07-01 1.000276
8 1965-08-01 1.000252
9 1965-09-01 1.000257
10 1965-10-01 1.000236
11 1965-11-01 1.000229
# … with 676 more rows
我想相互计算两个 DF。为此,我想在 DF A 中创建一个新列,其中为 XY 月的所有日期输入来自 DF B 的相应值。如下:
输出
Date Returns OUTPUT = CPI from data frame B
<date> <dbl> <dbl>
y-m-d
1965-01-06 1.0049728 1.000275
1965-01-07 1.0081001 1.000275
1965-01-08 1.0094975 1.000275
1965-01-12 1.0016388 1.000275
1965-01-13 0.9946541 1.000275
1965-01-14 1.0060906 1.000275
1965-01-18 0.9860844 1.000275
1965-01-19 0.9989457 1.000275
1965-01-20 0.9988501 1.000275
1965-01-21 1.0012444 1.000275
.............................
1965-02-01 1.0004506 1.000271
1965-02-02 0.9967118 1.000271
1965-02-03 1.0074635 1.000271
1965-02-04 1.0108144 1.000271
.............................
1965-03-01 0.9945527 1.000278
1965-03-02 0.9967290 1.000278
1965-03-03 1.0043810 1.000278
1965-03-04 1.0043043 1.000278
# … with 14,065 more rows
我假设这是可行的,参考日期列的月份和年份。但是,我是R的新手,不知道如何实现。
到目前为止,这是我的代码。
#DF A
library(quantmod)
library(mosaic)
library(dplyr)
library(xts)
data.N225 <- getSymbols("^N225",from="1965-01-01", to="2022-02-30", auto.assign=FALSE, src='yahoo') # funktion getSymbols wenn wir Kapitalmarkt haben wollten
class(data.N225)
data.N225[c(1:3, nrow(data.N225)),]
#View(data.N225)
data.N225<- na.omit(data.N225)
N225 <- data.N225[,6]
N225$Returns<- diff(log(N225$N225.Adjusted))+1
N225[c(1:3,nrow(N225)),]
options(digits=5)
N225.stetig <- N225[-1,2]
N225.stetig[c(1:3,nrow(N225.stetig)),]
DF_A <- as_tibble(data.frame(Date = index(N225.stetig), coredata(N225.stetig)))
View(DF_A)
#################
#DF B
setwd("enter file path")
CPI<- read.csv("InflationJapan.csv")
#View(CPI)
CPI$percent <- CPI[,2]/100
CPI$CPI.umsk <- (CPI[,3]/250)
CPI$CPI <- CPI[,4]+1
class(CPI$DATE)
CPI$DATE <- as.Date.character(CPI$DATE, tryFormats = c("%Y-%m-%d"))
class(CPI$DATE)
DF<- as_tibble(data.frame(Date = index(CPI$DATE), coredata(CPI)))
DF_B <- DF[,c(2,6)]
options(digits=7)
View(DF_B)
CSV文件可以从以下link.
下载https://drive.google.com/drive/folders/1GUlce4wR_lWiCbJivwDadgDYaz2ir0_L?usp=sharing
希望有人能帮助我。非常感谢!
下面我提供了一个示例,说明如何将 Y-M 的列添加到每个数据帧,然后在循环中将 CPI 输出从数据帧 B 匹配到数据帧 A。
希望对您有所帮助!
library(lubridate)
# read in a sample of dataframe A data
dfA <- read.csv("DFA.csv")
# add column with y-m, add empty column for Output
dfA <- dfA %>%
mutate(YearMonth = format(as.Date(dfA$Date, format = "%Y-%m-%d"), "%Y-%m")) %>%
mutate(Output = NA)
dfA
Date Returns YearMonth Output
1 1965-01-06 1.0049728 1965-01 NA
2 1965-01-07 1.0081001 1965-01 NA
3 1965-01-08 1.0094975 1965-01 NA
4 1965-01-12 1.0016388 1965-01 NA
5 1965-01-13 0.9946541 1965-01 NA
6 1965-01-14 1.0060906 1965-01 NA
7 1965-02-01 1.0004506 1965-02 NA
8 1965-02-02 0.9967118 1965-02 NA
9 1965-02-03 1.0074635 1965-02 NA
10 1965-02-04 1.0108144 1965-02 NA
11 1965-03-01 0.9945527 1965-03 NA
12 1965-03-02 0.9967290 1965-03 NA
dfB <- read.csv("InflationJapan.csv")
# add column with y-m
dfB <- dfB %>%
mutate(YearMonth = format(as.Date(dfB$Date, format = "%m/%d/%Y"), "%Y-%m"))
head(dfB)
Date CPI YearMonth
1 1/1/1965 6.870229 1965-01
2 2/1/1965 6.768559 1965-02
3 3/1/1965 6.956522 1965-03
4 4/1/1965 7.948443 1965-04
5 5/1/1965 6.844920 1965-05
6 6/1/1965 6.859593 1965-06
# Add CPI per month/ year to dataframe A
for (i in 1:nrow(dfA)) { # for each row in dataframe A
for (j in 1:nrow(dfB)) { # for each row in dataframe B
if (dfA$YearMonth[i] == dfB$YearMonth[j]) { # if year and month match
dfA$Output[i] <- dfB$CPI[j] # add CPI value from dataframe B to dataframe A
}
}
}
dfA
Date Returns YearMonth Output
1 1965-01-06 1.0049728 1965-01 6.870229
2 1965-01-07 1.0081001 1965-01 6.870229
3 1965-01-08 1.0094975 1965-01 6.870229
4 1965-01-12 1.0016388 1965-01 6.870229
5 1965-01-13 0.9946541 1965-01 6.870229
6 1965-01-14 1.0060906 1965-01 6.870229
7 1965-02-01 1.0004506 1965-02 6.768559
8 1965-02-02 0.9967118 1965-02 6.768559
9 1965-02-03 1.0074635 1965-02 6.768559
10 1965-02-04 1.0108144 1965-02 6.768559
11 1965-03-01 0.9945527 1965-03 6.956522
12 1965-03-02 0.9967290 1965-03 6.956522