计算数据框列中单元格之间的差异
Calculate the differences between cells in the columns of a dataframe
我有一个来自 from here:
的 STOXX 投资领域
head(df)
Date SX5P SX5E SXXP SXXE SXXF SXXA DK5F DKXF
1 1986-12-31 775.00 900.82 82.76 98.58 98.06 69.06 645.26 65.56
2 1987-01-01 775.00 900.82 82.76 98.58 98.06 69.06 645.26 65.56
3 1987-01-02 770.89 891.78 82.57 97.80 97.43 69.37 647.62 65.81
4 1987-01-05 771.89 898.33 82.82 98.60 98.19 69.16 649.94 65.82
5 1987-01-06 775.92 902.32 83.28 99.19 98.83 69.50 652.49 66.06
6 1987-01-07 781.21 899.15 83.78 98.96 98.62 70.59 651.97 66.20
理解动作分配的原则。我必须在每个月末确定分配,以便每一股对总投资组合的风险相同。
然后我跟着 this tutorial 使你成为 Python。
但是,一方面,我在计算每日 returns 时遇到了问题。事实上,我拥有所有数据,这要归功于:
url <- 'https://www.stoxx.com/document/Indices/Current/HistoricalData/hbrbcpe.txt'
df <- read.table(url, sep = ';', skip = 4, stringsAsFactors = FALSE)
names(df) <- c('Date','SX5P','SX5E','SXXP','SXXE','SXXF','SXXA','DK5F','DKXF')
df$Date <- as.Date(sub('(.{2}).(.{2}).(.{4})', "\3-\2-\1", df$Date))
然后我必须计算它们。我已经看到 there is a function, Delt
说可以在两列之间进行。但我必须区分每个单元格。我不知道该怎么做:
new = df[2:9]
# How to calculate the returns ?
Delt(df.a_given_day,df.a_given_day_plus_1,k=0:2) #... Delt do it 0,1 y 2 periods between two columns.
之后,我可以用 cov_matrix_df <- cov(data.matrix(new, rownames.force = NA))
计算协方差并继续搜索以计算风险。
另一方面,我不知道如何修改它来确定,在每个月底风险决定在每个月底月,分配。
我的尝试:
来自 我试过这个答案:
dr_df = cbind(df[-1,1],apply(df[,-1],2,function(x) diff(x)/head(x,-1)))
哪个returns:
> head(dr_df)
SX5P SX5E SXXP SXXE SXXF SXXA
[1,] 6209 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000
[2,] 6210 -0.005303226 -0.010035301 -0.002295795 -0.007912355 -0.006424638 0.004488850
[3,] 6213 0.001297202 0.007344861 0.003027734 0.008179959 0.007800472 -0.003027245
[4,] 6214 0.005220951 0.004441575 0.005554214 0.005983773 0.006517975 0.004916136
[5,] 6215 0.006817713 -0.003513166 0.006003842 -0.002318782 -0.002124861 0.015683453
[6,] 6216 -0.004595435 -0.013101262 -0.003103366 -0.011014551 -0.009531535 0.005949851
DK5F DKXF
[1,] 0.0000000000 0.0000000000
[2,] 0.0036574404 0.0038133008
[3,] 0.0035823477 0.0001519526
[4,] 0.0039234391 0.0036463081
[5,] -0.0007969471 0.0021192855
[6,] -0.0098164026 -0.0087613293
这看起来不错,但我不明白代码:/当我尝试创建协方差矩阵时遇到一些问题:
> cov(dr_df[2:8])
Error in cov(dr_df[2:8]) : supply both 'x' and 'y' or a matrix-like 'x'
> cov(dr_df)
SX5P SX5E SXXP SXXE SXXF SXXA DK5F DKXF
9886513 NA NA NA NA NA NA NA NA
SX5P NA NA NA NA NA NA NA NA NA
SX5E NA NA NA NA NA NA NA NA NA
SXXP NA NA NA NA NA NA NA NA NA
SXXE NA NA NA NA NA NA NA NA NA
SXXF NA NA NA NA NA NA NA NA NA
SXXA NA NA NA NA NA NA NA NA NA
DK5F NA NA NA NA NA NA NA NA NA
DKXF NA NA NA NA NA NA NA NA NA
索伦的尝试
似乎我对 SX5P - SX5P1d
的二元运算符有非数字参数:
> library(lubridate)
Attaching package: ‘lubridate’
The following objects are masked from ‘package:data.table’:
hour, mday, month, quarter, wday, week, yday, year
The following object is masked from ‘package:base’:
date
> library(data.table)
>
>
> url <- 'https://www.stoxx.com/document/Indices/Current/HistoricalData/hbrbcpe.txt'
> df <- read.table(url, sep = ';', skip = 4, stringsAsFactors = FALSE)
> names(df) <- c('Date','SX5P','SX5E','SXXP','SXXE','SXXF','SXXA','DK5F','DKXF')
> df$Date <- dmy(df$Date)
> df$End_month_date <- ceiling_date(df$Date,unit="month") - days(1)
>
> dt <- as.data.table(df)
>
> #daily returns
> dt[, c("last_date",'SX5P1d','SX5E1d','SXXP1d','SXXE1d','SXXF1d','SXXA1d','DK5F1d','DKXF1d') := shift(.SD[,c("Date",'SX5P','SX5E','SXXP','SXXE','SXXF','SXXA','DK5F','DKXF')], n=1, fill=NA, type=c("lag")),]
Warning messages:
1: In `[.data.table`(dt, , `:=`(c("last_date", "SX5P1d", "SX5E1d", :
Supplied 9 items to be assigned to 7673 items of column 'last_date' (recycled leaving remainder of 5 items).
2: In `[.data.table`(dt, , `:=`(c("last_date", "SX5P1d", "SX5E1d", :
Supplied 9 items to be assigned to 7673 items of column 'SX5P1d' (recycled leaving remainder of 5 items).
3: In `[.data.table`(dt, , `:=`(c("last_date", "SX5P1d", "SX5E1d", :
Supplied 9 items to be assigned to 7673 items of column 'SX5E1d' (recycled leaving remainder of 5 items).
4: In `[.data.table`(dt, , `:=`(c("last_date", "SX5P1d", "SX5E1d", :
Supplied 9 items to be assigned to 7673 items of column 'SXXP1d' (recycled leaving remainder of 5 items).
5: In `[.data.table`(dt, , `:=`(c("last_date", "SX5P1d", "SX5E1d", :
Supplied 9 items to be assigned to 7673 items of column 'SXXE1d' (recycled leaving remainder of 5 items).
6: In `[.data.table`(dt, , `:=`(c("last_date", "SX5P1d", "SX5E1d", :
Supplied 9 items to be assigned to 7673 items of column 'SXXF1d' (recycled leaving remainder of 5 items).
7: In `[.data.table`(dt, , `:=`(c("last_date", "SX5P1d", "SX5E1d", :
Supplied 9 items to be assigned to 7673 items of column 'SXXA1d' (recycled leaving remainder of 5 items).
8: In `[.data.table`(dt, , `:=`(c("last_date", "SX5P1d", "SX5E1d", :
Supplied 9 items to be assigned to 7673 items of column 'DK5F1d' (recycled leaving remainder of 5 items).
9: In `[.data.table`(dt, , `:=`(c("last_date", "SX5P1d", "SX5E1d", :
Supplied 9 items to be assigned to 7673 items of column 'DKXF1d' (recycled leaving remainder of 5 items).
> dt[,`:=`(SX5P_r=SX5P-SX5P1d,
+ SX5E_r=SX5E-SX5E1d,
+ SXXP_r=SXXP-SXXP1d,
+ SXXE_r=SXXE-SXXE1d,
+ SXXF_r=SXXF-SXXF1d,
+ SXXA_r=SXXA-SXXA1d,
+ DK5F_r=DK5F-DK5F1d,
+ DKXF_r=DKXF-DKXF1d)]
Error in SX5P - SX5P1d : non-numeric argument to binary operator
> #monthly returns
> returns <- dt[,list(SX5P=sum(SX5P_r,na.rm=T),
+ SX5E=sum(SX5E_r,na.rm=T),
+ SXXP=sum(SXXP_r,na.rm=T),
+ SXXE=sum(SXXE_r,na.rm=T),
+ SXXF=sum(SXXF_r,na.rm=T),
+ SXXA=sum(SXXA_r,na.rm=T),
+ DK5F=sum(DK5F_r,na.rm=T),
+ DKXF=sum(DKXF_r,na.rm=T)),by="End_month_date"]
Error in `[.data.table`(dt, , list(SX5P = sum(SX5P_r, na.rm = T), SX5E = sum(SX5E_r, :
object 'SX5P_r' not found
这是在 shift
操作后生成警告消息的 dt
:
> head(dt)
Date SX5P SX5E SXXP SXXE SXXF SXXA DK5F DKXF End_month_date
1: 1986-12-31 775.00 900.82 82.76 98.58 98.06 69.06 645.26 65.56 1986-12-31
2: 1987-01-01 775.00 900.82 82.76 98.58 98.06 69.06 645.26 65.56 1987-01-31
3: 1987-01-02 770.89 891.78 82.57 97.80 97.43 69.37 647.62 65.81 1987-01-31
4: 1987-01-05 771.89 898.33 82.82 98.60 98.19 69.16 649.94 65.82 1987-01-31
5: 1987-01-06 775.92 902.32 83.28 99.19 98.83 69.50 652.49 66.06 1987-01-31
6: 1987-01-07 781.21 899.15 83.78 98.96 98.62 70.59 651.97 66.20 1987-01-31
last_date SX5P1d SX5E1d SXXP1d SXXE1d SXXF1d SXXA1d DK5F1d DKXF1d
1: NA NA NA NA NA NA NA NA NA
2: Date Date Date Date Date Date Date Date Date
3: SX5P SX5P SX5P SX5P SX5P SX5P SX5P SX5P SX5P
4: SX5E SX5E SX5E SX5E SX5E SX5E SX5E SX5E SX5E
5: SXXP SXXP SXXP SXXP SXXP SXXP SXXP SXXP SXXP
6: SXXE SXXE SXXE SXXE SXXE SXXE SXXE SXXE SXXE
数据在日期 2016-03-25 和 2016-03-28 有一些不稳定的值。
library(dplyr)
df <- filter(df, SX5P>0) # drop erratic data points
percent_change <- function(x) (x - lag(x)) / lag(x) # function that calculates percentage change
daily_return <- df %>%
mutate_at(vars(-Date), percent_change) %>% # for each column excluding Date, apply percent_change function
filter(complete.cases(.)) %>% # filter out NAs
select(-Date) %>% # drop Date variable
as.matrix() # convert to matrix
head(daily_return, 5)
# SX5P SX5E SXXP SXXE SXXF SXXA DK5F DKXF
#[1,] 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000 0.0000000000 0.0000000000
#[2,] -0.005303226 -0.010035301 -0.002295795 -0.007912355 -0.006424638 0.004488850 0.0036574404 0.0038133008
#[3,] 0.001297202 0.007344861 0.003027734 0.008179959 0.007800472 -0.003027245 0.0035823477 0.0001519526
#[4,] 0.005220951 0.004441575 0.005554214 0.005983773 0.006517975 0.004916136 0.0039234391 0.0036463081
#[5,] 0.006817713 -0.003513166 0.006003842 -0.002318782 -0.002124861 0.015683453 -0.0007969471 0.0021192855
cov(daily_return)
# SX5P SX5E SXXP SXXE SXXF SXXA DK5F DKXF
#SX5P 0.0001458898 0.0001531675 0.0001339905 0.0001400356 0.0001335696 0.0001283412 0.0001355236 0.0001410957
#SX5E 0.0001531675 0.0001781671 0.0001431415 0.0001622366 0.0001519764 0.0001252829 0.0001497803 0.0001561299
#SXXP 0.0001339905 0.0001431415 0.0001267415 0.0001328073 0.0001265858 0.0001210988 0.0001314346 0.0001359420
#SXXE 0.0001400356 0.0001622366 0.0001328073 0.0001502001 0.0001410354 0.0001165071 0.0001412857 0.0001471070
#SXXF 0.0001335696 0.0001519764 0.0001265858 0.0001410354 0.0001343114 0.0001130397 0.0001380515 0.0001432671
#SXXA 0.0001283412 0.0001252829 0.0001210988 0.0001165071 0.0001130397 0.0001257977 0.0001221743 0.0001254364
#DK5F 0.0001355236 0.0001497803 0.0001314346 0.0001412857 0.0001380515 0.0001221743 0.0001914781 0.0001946354
#DKXF 0.0001410957 0.0001561299 0.0001359420 0.0001471070 0.0001432671 0.0001254364 0.0001946354 0.0002103559
每月return
library(lubridate)
percent_change2 <- function(x)last(x)/first(x) - 1
monthly_return <- df %>%
group_by(gr = floor_date(Date, unit = "month")) %>%
summarize_at(vars(-Date, -gr), percent_change2) %>%
ungroup() %>%
select(-gr) %>%
as.matrix()
head(monthly_return, 5)
SX5P SX5E SXXP SXXE SXXF SXXA DK5F DKXF
[1,] 0.00000000 0.000000000 0.000000000 0.000000000 0.000000000 0.00000000 0.00000000 0.00000000
[2,] -0.01089032 -0.046335561 0.005316578 -0.025867316 -0.025494595 0.04170287 -0.02977095 -0.01281269
[3,] 0.03167912 -0.009493186 0.032518367 -0.011141476 -0.011708861 0.07918740 0.05577361 0.04355828
[4,] 0.02633308 0.031731340 0.025284157 0.027359491 0.027197099 0.02322630 0.04121760 0.03157433
[5,] 0.02660200 -0.002816901 0.023347620 -0.003767437 -0.002362366 0.05061867 0.03758165 0.03917672
cov(monthly_return)
SX5P SX5E SXXP SXXE SXXF SXXA DK5F DKXF
SX5P 0.002068415 0.002243488 0.002011784 0.002160762 0.002076261 0.001867744 0.002282369 0.002381529
SX5E 0.002243488 0.002712719 0.002225923 0.002605715 0.002448324 0.001857319 0.002549326 0.002671546
SXXP 0.002011784 0.002225923 0.002025003 0.002182308 0.002095078 0.001873543 0.002321951 0.002407614
SXXE 0.002160762 0.002605715 0.002182308 0.002548197 0.002399266 0.001826243 0.002514475 0.002629281
SXXF 0.002076261 0.002448324 0.002095078 0.002399266 0.002291523 0.001797954 0.002458753 0.002558314
SXXA 0.001867744 0.001857319 0.001873543 0.001826243 0.001797954 0.001927949 0.002134767 0.002189677
DK5F 0.002282369 0.002549326 0.002321951 0.002514475 0.002458753 0.002134767 0.003414248 0.003523391
DKXF 0.002381529 0.002671546 0.002407614 0.002629281 0.002558314 0.002189677 0.003523391 0.003813587
data.table版本
library(data.table)
per_change <- function(x)x/shift(x) - 1
setDT(df)
df <- df[SX5P>0]
daily <- df[, lapply(.SD, per_change), .SDcols=-"Date"][-1, ]
daily
cov(daily)
monthly <- df[, lapply(.SD, percent_change2), by = .(gr=floor_date(Date, unit = "month")), .SDcols=-"Date"][-1, -"gr" ]
cov(monthly)
以下解决方案将值移动一个日期(这可能比一个文字日期日期更好,以适应缺失的日子、周末、假期等)。它找到 'today' 和 'yesterday' 的差异作为 return 并对整个月的总数求和,按月的最后一天汇总。
library(lubridate)
library(data.table)
url <- 'https://www.stoxx.com/document/Indices/Current/HistoricalData/hbrbcpe.txt'
df <- read.table(url, sep = ';', skip = 4, stringsAsFactors = FALSE)
names(df) <- c('Date','SX5P','SX5E','SXXP','SXXE','SXXF','SXXA','DK5F','DKXF')
df$Date <- dmy(df$Date)
df$End_month_date <- ceiling_date(df$Date,unit="month") - days(1)
dt <- as.data.table(df)
#daily returns
dt[, c("last_date",'SX5P1d','SX5E1d','SXXP1d','SXXE1d','SXXF1d','SXXA1d','DK5F1d','DKXF1d') := shift(.SD[,c("Date",'SX5P','SX5E','SXXP','SXXE','SXXF','SXXA','DK5F','DKXF')], n=1, fill=NA, type=c("lag")),]
dt[,`:=`(SX5P_r=SX5P-SX5P1d,
SX5E_r=SX5E-SX5E1d,
SXXP_r=SXXP-SXXP1d,
SXXE_r=SXXE-SXXE1d,
SXXF_r=SXXF-SXXF1d,
SXXA_r=SXXA-SXXA1d,
DK5F_r=DK5F-DK5F1d,
DKXF_r=DKXF-DKXF1d)]
#monthly returns
returns <- dt[,list(SX5P=sum(SX5P_r,na.rm=T),
SX5E=sum(SX5E_r,na.rm=T),
SXXP=sum(SXXP_r,na.rm=T),
SXXE=sum(SXXE_r,na.rm=T),
SXXF=sum(SXXF_r,na.rm=T),
SXXA=sum(SXXA_r,na.rm=T),
DK5F=sum(DK5F_r,na.rm=T),
DKXF=sum(DKXF_r,na.rm=T)),by="End_month_date"]
结果:
> returns
End_month_date SX5P SX5E SXXP SXXE SXXF SXXA DK5F DKXF
1: 1986-12-31 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
2: 1987-01-31 -8.44 -41.74 0.44 -2.55 -2.50 2.88 -19.21 -0.84
3: 1987-02-28 21.55 -18.11 2.53 -1.95 -1.87 6.15 41.03 3.32
4: 1987-03-31 24.13 28.47 2.67 2.80 2.62 2.53 31.40 2.53
5: 1987-04-30 25.96 12.02 2.33 0.96 0.82 3.44 22.66 2.64
---
355: 2016-06-30 -94.12 -198.74 -17.57 -20.95 -20.18 -13.80 -384.00 -22.60
356: 2016-07-31 64.29 126.02 12.01 15.55 16.22 8.23 219.13 18.83
357: 2016-08-31 -14.71 32.37 1.64 3.98 2.69 -0.67 -26.23 -4.42
358: 2016-09-30 -19.74 -20.89 -0.61 -0.45 -0.79 -0.76 -48.93 -0.71
359: 2016-10-31 27.89 27.26 3.18 2.42 3.14 3.83 96.24 5.45
我有一个来自 from here:
的 STOXX 投资领域 head(df)
Date SX5P SX5E SXXP SXXE SXXF SXXA DK5F DKXF
1 1986-12-31 775.00 900.82 82.76 98.58 98.06 69.06 645.26 65.56
2 1987-01-01 775.00 900.82 82.76 98.58 98.06 69.06 645.26 65.56
3 1987-01-02 770.89 891.78 82.57 97.80 97.43 69.37 647.62 65.81
4 1987-01-05 771.89 898.33 82.82 98.60 98.19 69.16 649.94 65.82
5 1987-01-06 775.92 902.32 83.28 99.19 98.83 69.50 652.49 66.06
6 1987-01-07 781.21 899.15 83.78 98.96 98.62 70.59 651.97 66.20
理解动作分配的原则。我必须在每个月末确定分配,以便每一股对总投资组合的风险相同。
然后我跟着 this tutorial 使你成为 Python。
但是,一方面,我在计算每日 returns 时遇到了问题。事实上,我拥有所有数据,这要归功于:
url <- 'https://www.stoxx.com/document/Indices/Current/HistoricalData/hbrbcpe.txt'
df <- read.table(url, sep = ';', skip = 4, stringsAsFactors = FALSE)
names(df) <- c('Date','SX5P','SX5E','SXXP','SXXE','SXXF','SXXA','DK5F','DKXF')
df$Date <- as.Date(sub('(.{2}).(.{2}).(.{4})', "\3-\2-\1", df$Date))
然后我必须计算它们。我已经看到 there is a function, Delt
说可以在两列之间进行。但我必须区分每个单元格。我不知道该怎么做:
new = df[2:9]
# How to calculate the returns ?
Delt(df.a_given_day,df.a_given_day_plus_1,k=0:2) #... Delt do it 0,1 y 2 periods between two columns.
之后,我可以用 cov_matrix_df <- cov(data.matrix(new, rownames.force = NA))
计算协方差并继续搜索以计算风险。
另一方面,我不知道如何修改它来确定,在每个月底风险决定在每个月底月,分配。
我的尝试:
来自
dr_df = cbind(df[-1,1],apply(df[,-1],2,function(x) diff(x)/head(x,-1)))
哪个returns:
> head(dr_df)
SX5P SX5E SXXP SXXE SXXF SXXA
[1,] 6209 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000
[2,] 6210 -0.005303226 -0.010035301 -0.002295795 -0.007912355 -0.006424638 0.004488850
[3,] 6213 0.001297202 0.007344861 0.003027734 0.008179959 0.007800472 -0.003027245
[4,] 6214 0.005220951 0.004441575 0.005554214 0.005983773 0.006517975 0.004916136
[5,] 6215 0.006817713 -0.003513166 0.006003842 -0.002318782 -0.002124861 0.015683453
[6,] 6216 -0.004595435 -0.013101262 -0.003103366 -0.011014551 -0.009531535 0.005949851
DK5F DKXF
[1,] 0.0000000000 0.0000000000
[2,] 0.0036574404 0.0038133008
[3,] 0.0035823477 0.0001519526
[4,] 0.0039234391 0.0036463081
[5,] -0.0007969471 0.0021192855
[6,] -0.0098164026 -0.0087613293
这看起来不错,但我不明白代码:/当我尝试创建协方差矩阵时遇到一些问题:
> cov(dr_df[2:8])
Error in cov(dr_df[2:8]) : supply both 'x' and 'y' or a matrix-like 'x'
> cov(dr_df)
SX5P SX5E SXXP SXXE SXXF SXXA DK5F DKXF
9886513 NA NA NA NA NA NA NA NA
SX5P NA NA NA NA NA NA NA NA NA
SX5E NA NA NA NA NA NA NA NA NA
SXXP NA NA NA NA NA NA NA NA NA
SXXE NA NA NA NA NA NA NA NA NA
SXXF NA NA NA NA NA NA NA NA NA
SXXA NA NA NA NA NA NA NA NA NA
DK5F NA NA NA NA NA NA NA NA NA
DKXF NA NA NA NA NA NA NA NA NA
索伦的尝试
似乎我对 SX5P - SX5P1d
的二元运算符有非数字参数:
> library(lubridate)
Attaching package: ‘lubridate’
The following objects are masked from ‘package:data.table’:
hour, mday, month, quarter, wday, week, yday, year
The following object is masked from ‘package:base’:
date
> library(data.table)
>
>
> url <- 'https://www.stoxx.com/document/Indices/Current/HistoricalData/hbrbcpe.txt'
> df <- read.table(url, sep = ';', skip = 4, stringsAsFactors = FALSE)
> names(df) <- c('Date','SX5P','SX5E','SXXP','SXXE','SXXF','SXXA','DK5F','DKXF')
> df$Date <- dmy(df$Date)
> df$End_month_date <- ceiling_date(df$Date,unit="month") - days(1)
>
> dt <- as.data.table(df)
>
> #daily returns
> dt[, c("last_date",'SX5P1d','SX5E1d','SXXP1d','SXXE1d','SXXF1d','SXXA1d','DK5F1d','DKXF1d') := shift(.SD[,c("Date",'SX5P','SX5E','SXXP','SXXE','SXXF','SXXA','DK5F','DKXF')], n=1, fill=NA, type=c("lag")),]
Warning messages:
1: In `[.data.table`(dt, , `:=`(c("last_date", "SX5P1d", "SX5E1d", :
Supplied 9 items to be assigned to 7673 items of column 'last_date' (recycled leaving remainder of 5 items).
2: In `[.data.table`(dt, , `:=`(c("last_date", "SX5P1d", "SX5E1d", :
Supplied 9 items to be assigned to 7673 items of column 'SX5P1d' (recycled leaving remainder of 5 items).
3: In `[.data.table`(dt, , `:=`(c("last_date", "SX5P1d", "SX5E1d", :
Supplied 9 items to be assigned to 7673 items of column 'SX5E1d' (recycled leaving remainder of 5 items).
4: In `[.data.table`(dt, , `:=`(c("last_date", "SX5P1d", "SX5E1d", :
Supplied 9 items to be assigned to 7673 items of column 'SXXP1d' (recycled leaving remainder of 5 items).
5: In `[.data.table`(dt, , `:=`(c("last_date", "SX5P1d", "SX5E1d", :
Supplied 9 items to be assigned to 7673 items of column 'SXXE1d' (recycled leaving remainder of 5 items).
6: In `[.data.table`(dt, , `:=`(c("last_date", "SX5P1d", "SX5E1d", :
Supplied 9 items to be assigned to 7673 items of column 'SXXF1d' (recycled leaving remainder of 5 items).
7: In `[.data.table`(dt, , `:=`(c("last_date", "SX5P1d", "SX5E1d", :
Supplied 9 items to be assigned to 7673 items of column 'SXXA1d' (recycled leaving remainder of 5 items).
8: In `[.data.table`(dt, , `:=`(c("last_date", "SX5P1d", "SX5E1d", :
Supplied 9 items to be assigned to 7673 items of column 'DK5F1d' (recycled leaving remainder of 5 items).
9: In `[.data.table`(dt, , `:=`(c("last_date", "SX5P1d", "SX5E1d", :
Supplied 9 items to be assigned to 7673 items of column 'DKXF1d' (recycled leaving remainder of 5 items).
> dt[,`:=`(SX5P_r=SX5P-SX5P1d,
+ SX5E_r=SX5E-SX5E1d,
+ SXXP_r=SXXP-SXXP1d,
+ SXXE_r=SXXE-SXXE1d,
+ SXXF_r=SXXF-SXXF1d,
+ SXXA_r=SXXA-SXXA1d,
+ DK5F_r=DK5F-DK5F1d,
+ DKXF_r=DKXF-DKXF1d)]
Error in SX5P - SX5P1d : non-numeric argument to binary operator
> #monthly returns
> returns <- dt[,list(SX5P=sum(SX5P_r,na.rm=T),
+ SX5E=sum(SX5E_r,na.rm=T),
+ SXXP=sum(SXXP_r,na.rm=T),
+ SXXE=sum(SXXE_r,na.rm=T),
+ SXXF=sum(SXXF_r,na.rm=T),
+ SXXA=sum(SXXA_r,na.rm=T),
+ DK5F=sum(DK5F_r,na.rm=T),
+ DKXF=sum(DKXF_r,na.rm=T)),by="End_month_date"]
Error in `[.data.table`(dt, , list(SX5P = sum(SX5P_r, na.rm = T), SX5E = sum(SX5E_r, :
object 'SX5P_r' not found
这是在 shift
操作后生成警告消息的 dt
:
> head(dt)
Date SX5P SX5E SXXP SXXE SXXF SXXA DK5F DKXF End_month_date
1: 1986-12-31 775.00 900.82 82.76 98.58 98.06 69.06 645.26 65.56 1986-12-31
2: 1987-01-01 775.00 900.82 82.76 98.58 98.06 69.06 645.26 65.56 1987-01-31
3: 1987-01-02 770.89 891.78 82.57 97.80 97.43 69.37 647.62 65.81 1987-01-31
4: 1987-01-05 771.89 898.33 82.82 98.60 98.19 69.16 649.94 65.82 1987-01-31
5: 1987-01-06 775.92 902.32 83.28 99.19 98.83 69.50 652.49 66.06 1987-01-31
6: 1987-01-07 781.21 899.15 83.78 98.96 98.62 70.59 651.97 66.20 1987-01-31
last_date SX5P1d SX5E1d SXXP1d SXXE1d SXXF1d SXXA1d DK5F1d DKXF1d
1: NA NA NA NA NA NA NA NA NA
2: Date Date Date Date Date Date Date Date Date
3: SX5P SX5P SX5P SX5P SX5P SX5P SX5P SX5P SX5P
4: SX5E SX5E SX5E SX5E SX5E SX5E SX5E SX5E SX5E
5: SXXP SXXP SXXP SXXP SXXP SXXP SXXP SXXP SXXP
6: SXXE SXXE SXXE SXXE SXXE SXXE SXXE SXXE SXXE
数据在日期 2016-03-25 和 2016-03-28 有一些不稳定的值。
library(dplyr)
df <- filter(df, SX5P>0) # drop erratic data points
percent_change <- function(x) (x - lag(x)) / lag(x) # function that calculates percentage change
daily_return <- df %>%
mutate_at(vars(-Date), percent_change) %>% # for each column excluding Date, apply percent_change function
filter(complete.cases(.)) %>% # filter out NAs
select(-Date) %>% # drop Date variable
as.matrix() # convert to matrix
head(daily_return, 5)
# SX5P SX5E SXXP SXXE SXXF SXXA DK5F DKXF
#[1,] 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000 0.0000000000 0.0000000000
#[2,] -0.005303226 -0.010035301 -0.002295795 -0.007912355 -0.006424638 0.004488850 0.0036574404 0.0038133008
#[3,] 0.001297202 0.007344861 0.003027734 0.008179959 0.007800472 -0.003027245 0.0035823477 0.0001519526
#[4,] 0.005220951 0.004441575 0.005554214 0.005983773 0.006517975 0.004916136 0.0039234391 0.0036463081
#[5,] 0.006817713 -0.003513166 0.006003842 -0.002318782 -0.002124861 0.015683453 -0.0007969471 0.0021192855
cov(daily_return)
# SX5P SX5E SXXP SXXE SXXF SXXA DK5F DKXF
#SX5P 0.0001458898 0.0001531675 0.0001339905 0.0001400356 0.0001335696 0.0001283412 0.0001355236 0.0001410957
#SX5E 0.0001531675 0.0001781671 0.0001431415 0.0001622366 0.0001519764 0.0001252829 0.0001497803 0.0001561299
#SXXP 0.0001339905 0.0001431415 0.0001267415 0.0001328073 0.0001265858 0.0001210988 0.0001314346 0.0001359420
#SXXE 0.0001400356 0.0001622366 0.0001328073 0.0001502001 0.0001410354 0.0001165071 0.0001412857 0.0001471070
#SXXF 0.0001335696 0.0001519764 0.0001265858 0.0001410354 0.0001343114 0.0001130397 0.0001380515 0.0001432671
#SXXA 0.0001283412 0.0001252829 0.0001210988 0.0001165071 0.0001130397 0.0001257977 0.0001221743 0.0001254364
#DK5F 0.0001355236 0.0001497803 0.0001314346 0.0001412857 0.0001380515 0.0001221743 0.0001914781 0.0001946354
#DKXF 0.0001410957 0.0001561299 0.0001359420 0.0001471070 0.0001432671 0.0001254364 0.0001946354 0.0002103559
每月return
library(lubridate)
percent_change2 <- function(x)last(x)/first(x) - 1
monthly_return <- df %>%
group_by(gr = floor_date(Date, unit = "month")) %>%
summarize_at(vars(-Date, -gr), percent_change2) %>%
ungroup() %>%
select(-gr) %>%
as.matrix()
head(monthly_return, 5)
SX5P SX5E SXXP SXXE SXXF SXXA DK5F DKXF
[1,] 0.00000000 0.000000000 0.000000000 0.000000000 0.000000000 0.00000000 0.00000000 0.00000000
[2,] -0.01089032 -0.046335561 0.005316578 -0.025867316 -0.025494595 0.04170287 -0.02977095 -0.01281269
[3,] 0.03167912 -0.009493186 0.032518367 -0.011141476 -0.011708861 0.07918740 0.05577361 0.04355828
[4,] 0.02633308 0.031731340 0.025284157 0.027359491 0.027197099 0.02322630 0.04121760 0.03157433
[5,] 0.02660200 -0.002816901 0.023347620 -0.003767437 -0.002362366 0.05061867 0.03758165 0.03917672
cov(monthly_return)
SX5P SX5E SXXP SXXE SXXF SXXA DK5F DKXF
SX5P 0.002068415 0.002243488 0.002011784 0.002160762 0.002076261 0.001867744 0.002282369 0.002381529
SX5E 0.002243488 0.002712719 0.002225923 0.002605715 0.002448324 0.001857319 0.002549326 0.002671546
SXXP 0.002011784 0.002225923 0.002025003 0.002182308 0.002095078 0.001873543 0.002321951 0.002407614
SXXE 0.002160762 0.002605715 0.002182308 0.002548197 0.002399266 0.001826243 0.002514475 0.002629281
SXXF 0.002076261 0.002448324 0.002095078 0.002399266 0.002291523 0.001797954 0.002458753 0.002558314
SXXA 0.001867744 0.001857319 0.001873543 0.001826243 0.001797954 0.001927949 0.002134767 0.002189677
DK5F 0.002282369 0.002549326 0.002321951 0.002514475 0.002458753 0.002134767 0.003414248 0.003523391
DKXF 0.002381529 0.002671546 0.002407614 0.002629281 0.002558314 0.002189677 0.003523391 0.003813587
data.table版本
library(data.table)
per_change <- function(x)x/shift(x) - 1
setDT(df)
df <- df[SX5P>0]
daily <- df[, lapply(.SD, per_change), .SDcols=-"Date"][-1, ]
daily
cov(daily)
monthly <- df[, lapply(.SD, percent_change2), by = .(gr=floor_date(Date, unit = "month")), .SDcols=-"Date"][-1, -"gr" ]
cov(monthly)
以下解决方案将值移动一个日期(这可能比一个文字日期日期更好,以适应缺失的日子、周末、假期等)。它找到 'today' 和 'yesterday' 的差异作为 return 并对整个月的总数求和,按月的最后一天汇总。
library(lubridate)
library(data.table)
url <- 'https://www.stoxx.com/document/Indices/Current/HistoricalData/hbrbcpe.txt'
df <- read.table(url, sep = ';', skip = 4, stringsAsFactors = FALSE)
names(df) <- c('Date','SX5P','SX5E','SXXP','SXXE','SXXF','SXXA','DK5F','DKXF')
df$Date <- dmy(df$Date)
df$End_month_date <- ceiling_date(df$Date,unit="month") - days(1)
dt <- as.data.table(df)
#daily returns
dt[, c("last_date",'SX5P1d','SX5E1d','SXXP1d','SXXE1d','SXXF1d','SXXA1d','DK5F1d','DKXF1d') := shift(.SD[,c("Date",'SX5P','SX5E','SXXP','SXXE','SXXF','SXXA','DK5F','DKXF')], n=1, fill=NA, type=c("lag")),]
dt[,`:=`(SX5P_r=SX5P-SX5P1d,
SX5E_r=SX5E-SX5E1d,
SXXP_r=SXXP-SXXP1d,
SXXE_r=SXXE-SXXE1d,
SXXF_r=SXXF-SXXF1d,
SXXA_r=SXXA-SXXA1d,
DK5F_r=DK5F-DK5F1d,
DKXF_r=DKXF-DKXF1d)]
#monthly returns
returns <- dt[,list(SX5P=sum(SX5P_r,na.rm=T),
SX5E=sum(SX5E_r,na.rm=T),
SXXP=sum(SXXP_r,na.rm=T),
SXXE=sum(SXXE_r,na.rm=T),
SXXF=sum(SXXF_r,na.rm=T),
SXXA=sum(SXXA_r,na.rm=T),
DK5F=sum(DK5F_r,na.rm=T),
DKXF=sum(DKXF_r,na.rm=T)),by="End_month_date"]
结果:
> returns
End_month_date SX5P SX5E SXXP SXXE SXXF SXXA DK5F DKXF
1: 1986-12-31 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
2: 1987-01-31 -8.44 -41.74 0.44 -2.55 -2.50 2.88 -19.21 -0.84
3: 1987-02-28 21.55 -18.11 2.53 -1.95 -1.87 6.15 41.03 3.32
4: 1987-03-31 24.13 28.47 2.67 2.80 2.62 2.53 31.40 2.53
5: 1987-04-30 25.96 12.02 2.33 0.96 0.82 3.44 22.66 2.64
---
355: 2016-06-30 -94.12 -198.74 -17.57 -20.95 -20.18 -13.80 -384.00 -22.60
356: 2016-07-31 64.29 126.02 12.01 15.55 16.22 8.23 219.13 18.83
357: 2016-08-31 -14.71 32.37 1.64 3.98 2.69 -0.67 -26.23 -4.42
358: 2016-09-30 -19.74 -20.89 -0.61 -0.45 -0.79 -0.76 -48.93 -0.71
359: 2016-10-31 27.89 27.26 3.18 2.42 3.14 3.83 96.24 5.45