计算最高进先出交易
Calculating Highest In, First Out on trades
我正在尝试对交易使用最高进先出会计方法。
最高进先出意味着当您卖出时,您首先卖出最贵的股票。
这是我的买卖(从 借来的例子 - 这是一个类似但不同的问题):
buy = data.frame(BuyTransactionID = c(1:10),
Ticker=c(rep('MSFT',4),rep('AMZN',3),rep('DOCU',3)),
Date=c(rep('01-01-2018',2),rep('01-14-2020',2),rep('01-01-2018',2),rep('01-14-2020',1),'01-01-2018','03-15-2020','04-06-2020'),
Price=c(100,102,102,107,2000,2010,2011,197,182,167),
Quantity=c(10,10,5,5,1,1,2,12,15,15))
sell = data.frame(SellTransactionID=c(1:7),
Ticker=c('MSFT','MSFT','AMZN','AMZN','DOCU','DOCU','DOCU'),
Date=c('01-07-2020','01-20-2020','01-01-2020','01-30-2020','01-15-2020','04-10-2020','04-20-2020'),
Price=c(97,110,2100,2050,210,205,225),
Quantity=c(15,12,1,3,10,5,3))
规则如下:
- 您首先卖出最贵(最高价)的股票。
- 您不能在购买之前出售股票
- 您不能多次卖出相同的股票
例题:
第一笔交易(SellTransactionID = 1)是 2020 年 1 月 7 日 15 股 MSFT。因此,在该日期之前购买的任何商品都可以出售。根据日期,符合条件的股票是来自 BuyTransactionID 1 和 2 的股票。BuyTransactionID 2 是最高价。因此,BuyTransactionID 2 的 10 股全部卖出,剩余 5 股来自 BuyTransactionID 1。
期望输出:
'Date Sold' = 售出日期(不言自明);
'Ticker' = 已售出的代码;
'Proceeds' = 销售总额;
'Cost basis' = 已售股票的加权平均值。
示例解决方案:
这是 SellTransactionID 1 的解决方案。正确的解决方案会自动执行此操作并计算所有 SellTransactionID。
result <- data.frame(SellDate = '01-07-2020', Ticker = "MSFT", Proceeds = 1455, CostBasis = 101.33)
成本基础示例:
成本基础计算为加权平均值。对于前面的示例,成本基础计算如下:(Quantity1 * Price1 + Quanity2 * Price2 + .....)/所有数量的总和
所以对于上面的例子:(10 * 102 + 5 * 100)/15
如果我正确理解了您的问题,这是一种可能的解决方案。在简历中,我结合了销售和购买数据并将其分组到销售块中(由销售 ID 给出)。这假设销售 ID 的顺序是根据日期列。然后我依次遍历这些销售块并将中间结果写入单个数据框。对于处理此结果数据框的每个销售块,都会针对同一代码的最后一个销售块结果进行过滤。这意味着根据时间表销售数量不得大于可用数量(因为你不能出售你没有的东西,无论如何我必须指出它是一个可能的限制)
建议的循环解决方案 1 不是在 R 中处理数据的最佳方法,因为它是一个循环,会增长 data.frame。由于您列出了 purrr
标签,我修改了答案第二部分的代码以使用 map() 函数。
在我们开始实际编码之前,让我们先准备数据(需要以相同的方式回答两个部分):
library(tidyverse) # need dplyr, using lubridate for date parsing and tidyr to fill data.frame
# include identifier variable, manipulate column name and union sells and buy
df <- dplyr::mutate(buy, io = "i") %>%
dplyr::rename(TID = BuyTransactionID) %>%
dplyr::union(dplyr::mutate(sell, io = "o") %>%
dplyr::rename(TID = SellTransactionID)) %>%
# sort the data
dplyr::arrange(Ticker, Date) %>%
# make Qtd negative for sales, remove buy IDs, parce Date and set up helper column
dplyr::mutate(Quantity = ifelse(io == "o", -1 * Quantity, Quantity),
TID = ifelse(io == "i", NA, TID),
Date = lubridate::mdy(Date),
hprice = NA_real_) %>%
# group data to fill backwards per group
dplyr::group_by(Ticker) %>%
tidyr::fill(TID, .direction = "up") %>%
# ungroup to prevent unwanted behaviour downstream
dplyr::ungroup()
1 个标准循环
dfo <- df[0, ] # empty copy of df
for (i in sort(unique(df$TID))) {
# which ticker are we working with at this sale
t <- unique(df[df$TID == i, ]$Ticker)
# bind data from last sale of this ticker to current sale
sdf <- rbind(dplyr::slice_max(dfo[dfo$Ticker == t, ], "Date"),
df[df$TID == i, ])
# current sales quantiy as positive value
o1 <- abs(sdf[sdf$io == "o", ]$Quantity)
# copy to use for greedy algo
o2 <- o1
# vectors of price and qtd of bought shares at this sale, having price in decreasing order
ip <- sdf[sdf$io == "i", ] %>% dplyr::arrange(desc(Price)) %>% dplyr::pull(Price)
iq <- sdf[sdf$io == "i", ] %>% dplyr::arrange(desc(Price)) %>% dplyr::pull(Quantity)
# total value of shares in greedy
v <- 0
# loop to run over bought prices and quantities do reduce from sold qtd per sales block
# check if shares block is larger then remaining sales qtd to break loop
for(l in 1:length(ip)){
if(o2 < iq[l]){
v <- v + ip[l] * o2
break
}else{
o2 <- o2 - iq[l]
v <- v + ip[l] * iq[l]
}
}
# fill sales block frame and bind to output df
dfo <- rbind(dfo,
data.frame(TID = i,
Ticker = t,
Date = max(sdf$Date),
Price = (sum(ip * iq) - v) / sum(sdf$Quantity),
Quantity = sum(sdf$Quantity),
io = "i",
hprice = v / o1))
}
# format, join original data and calculate result per Sales block
dplyr::select(dfo, Ticker, Date, Remain_Price = Price, Remain_Qtd = Quantity, Sales_Cost = hprice) %>%
dplyr::left_join(dplyr::select(df, Ticker, Date, Sales_Price = Price, Sales_Qtd = Quantity), by = c("Ticker", "Date")) %>%
dplyr::mutate(Result = abs(Sales_Qtd) * (Sales_Price - Sales_Cost))
Ticker Date Remain_Price Remain_Qtd Sales_Cost Sales_Price Sales_Qtd Result
1 MSFT 2020-01-07 100.0000 5 101.3333 97 -15 -65
2 MSFT 2020-01-20 100.0000 3 103.7500 110 -12 75
3 AMZN 2020-01-01 2000.0000 1 2010.0000 2100 -1 90
4 AMZN 2020-01-30 NaN 0 2007.3333 2050 -3 128
5 DOCU 2020-01-15 197.0000 2 197.0000 210 -10 130
6 DOCU 2020-04-10 173.6667 27 188.0000 205 -5 85
7 DOCU 2020-04-20 0.0000 -1 131.3333 225 -3 281
2 循环改写为 purrr 解决方案(注意在函数末尾分配 dfo 的全局赋值运算符(<<- 而不是 <-))
# rephrase loop as function
myfun <- function(i){
# which ticker are we working with at this sale
t <- unique(df[df$TID == i, ]$Ticker)
# bind data from last sale of this ticker to current sale
sdf <- rbind(dplyr::slice_max(dfo[dfo$Ticker == t, ], "Date"),
df[df$TID == i, ])
# current sales quantiy as positive value
o1 <- abs(sdf[sdf$io == "o", ]$Quantity)
# copy to use for greedy algo
o2 <- o1
# vectors of price and qtd of bought shares at this sale, having price in decreasing order
ip <- sdf[sdf$io == "i", ] %>% dplyr::arrange(desc(Price)) %>% dplyr::pull(Price)
iq <- sdf[sdf$io == "i", ] %>% dplyr::arrange(desc(Price)) %>% dplyr::pull(Quantity)
# total value of shares in greedy
v <- 0
# loop to run over bought prices and quantities do reduce from sold qtd per sales block
# check if shares block is larger then remaining sales qtd to break loop
for(l in 1:length(ip)){
if(o2 < iq[l]){
v <- v + ip[l] * o2
break
}else{
o2 <- o2 - iq[l]
v <- v + ip[l] * iq[l]
}
}
# fill sales block frame and bind to output df
dfo <<- rbind(dfo,
data.frame(TID = i,
Ticker = t,
Date = max(sdf$Date),
Price = (sum(ip * iq) - v) / sum(sdf$Quantity),
Quantity = sum(sdf$Quantity),
io = "i",
hprice = v / o1))
}
# empty copy of df
dfo <- df[0, ]
purrr::map(sort(unique(df$TID)), ~ myfun(.x))[[max(df$TID)]] %>%
dplyr::select(Ticker, Date, Remain_Price = Price, Remain_Qtd = Quantity, Sales_Cost = hprice) %>%
dplyr::left_join(dplyr::select(df, Ticker, Date, Sales_Price = Price, Sales_Qtd = Quantity), by = c("Ticker", "Date")) %>%
dplyr::mutate(Result = abs(Sales_Qtd) * (Sales_Price - Sales_Cost))
Ticker Date Remain_Price Remain_Qtd Sales_Cost Sales_Price Sales_Qtd Result
1 MSFT 2020-01-07 100.0000 5 101.3333 97 -15 -65
2 MSFT 2020-01-20 100.0000 3 103.7500 110 -12 75
3 AMZN 2020-01-01 2000.0000 1 2010.0000 2100 -1 90
4 AMZN 2020-01-30 NaN 0 2007.3333 2050 -3 128
5 DOCU 2020-01-15 197.0000 2 197.0000 210 -10 130
6 DOCU 2020-04-10 173.6667 27 188.0000 205 -5 85
7 DOCU 2020-04-20 0.0000 -1 131.3333 225 -3 281
编辑
为了跟踪剩余股票,我们需要第二个 df 来保存当前的投资组合数据。我没有优化代码,只编辑了循环,不过 purrr
改编应该很直接。
library(tidyverse) # need dplyr, using lubridate for date parsing and tidyr to fill data.frame
# include identifier variable, manipulate column name and union sells and buy
df <- dplyr::mutate(buy, io = "i") %>%
dplyr::rename(TID = BuyTransactionID) %>%
dplyr::union(dplyr::mutate(sell, io = "o") %>%
dplyr::rename(TID = SellTransactionID)) %>%
# sort the data
dplyr::arrange(Ticker, Date) %>%
# make Qtd negative for sales, remove buy IDs, parce Date and set up helper column
dplyr::mutate(Quantity = ifelse(io == "o", -1 * Quantity, Quantity),
TID = ifelse(io == "i", NA, TID),
Date = lubridate::mdy(Date),
hprice = NA_real_) %>%
# group data to fill backwards per group
dplyr::group_by(Ticker) %>%
tidyr::fill(TID, .direction = "up") %>%
# ungroup to prevent unwanted behaviour downstream
dplyr::ungroup()
dfo <- df[0, ] # empty copy of df
dfh <- df[0, c("Ticker", "Price", "Quantity", "io")] # to hold current stock aka portfolio
for (i in sort(unique(df$TID))) {
# which ticker are we working with at this sale
t <- unique(df[df$TID == i, ]$Ticker)
# bind data from last sale of this ticker to current sale
sdf <- rbind(dplyr::slice_max(dfo[dfo$Ticker == t, ], "Date"),
df[df$TID == i, ])
# bind data from current portfolio to buys between last and current sale (new port folio before sale)
sdfh <- rbind(dfh[dfh$Ticker == t, ],
df[df$TID == i & df$io == "i", c("Ticker", "Price", "Quantity", "io")])
# current sales quantiy as positive value
o1 <- abs(sdf[sdf$io == "o", ]$Quantity)
# copy to use for greedy algo
o2 <- o1
# vectors of price and qtd of bought shares at this sale, having price in decreasing order
ip <- sdfh %>% dplyr::arrange(desc(Price)) %>% dplyr::pull(Price)
iq <- sdfh %>% dplyr::arrange(desc(Price)) %>% dplyr::pull(Quantity)
ips <- ip
iqs <- iq
# total value of shares in greedy
v <- 0
# loop to run over bought prices and quantities do reduce from sold qtd per sales block
# check if shares block is larger then remaining sales qtd to break loop
for(l in 1:length(ip)){
if(o2 < iq[l]){
v <- v + ip[l] * o2
iqs[1] <- iqs[1] - o2
break
}else{
o2 <- o2 - iq[l]
v <- v + ip[l] * iq[l]
ips <- ips[-1]
iqs <- iqs[-1]
}
}
dfh <- rbind(dfh[dfh$Ticker != t, ],
data.frame(Ticker = t,
Price = ips,
Quantity = iqs,
io = "i"))
# fill sales block frame and bind to output df
dfo <- rbind(dfo,
data.frame(TID = i,
Ticker = t,
Date = max(sdf$Date),
Price = sum(ips * iqs) / sum(iqs),
Quantity = sum(iqs),
io = "i",
hprice = v/o1))
}
dfo
TID Ticker Date Price Quantity io hprice
1 1 MSFT 2020-01-07 106.5652 69 i 115
2 2 MSFT 2020-01-20 105.0000 57 i 114
3 3 MSFT 2020-01-21 104.8750 56 i 112
4 4 MSFT 2020-01-22 104.1765 51 i 112
@DPH 的回答非常好,但不幸的是不够准确。我会解释为什么。
这是一个新数据集,其中所有购买都先于销售:
buy = data.frame(BuyTransactionID = c(1:10),
Ticker=c(rep('MSFT',10)),
Date=c(rep('01-01-2020',10)),
Price=c(100,102,102,107,105,111,109,112,115,106),
Quantity=c(10,10,5,5,1,1,2,12,15,15))
sell = data.frame(SellTransactionID=c(1:4),
Ticker=c('MSFT','MSFT','MSFT', 'MSFT'),
Date=c('01-07-2020','01-20-2020','01-21-2020',
'01-22-2020'),
Price=c(120,119,117, 121),
Quantity=c(7,12,1, 5))
如果您应用@DPH 的解决方案,您将得到以下结果:
请注意,'Remain_Price' 没有变化,最后三个交易的 'Sales_Cost' 也没有变化。发生这种情况是因为该函数确定了首次销售后剩余的股票数量以及剩余股票的平均价格。首次出售前购买的股票不能再单独出售。他们现在被视为具有平均价格和剩余股份数量的单一实体。
例如,本例共买入76股。第一次销售卖出 7 股。现在,如 'Remain_Qtd' 中所示,仍有 69 股。计算剩余股票的平均价格 - 该价格为 106.5652 美元。现在,该过程认为所有 69 股的定价为 106.5652 美元,剩余销售量减少 'Remain_Qtd' 的数量,但不会改变 'Remain_Price'。剩余股份不能再按购买时的价格考虑,它们是剩余股份和平均剩余价格的总和。
出现这种情况是因为对象dfo
和对象sdf
中dfo
的回收。特别是,此行计算了一个平均剩余价格,然后通过 dfo
和 sdf
.
循环使用
Price = (sum(ip * iq) - v) / sum(sdf$Quantity)
和Quantity = sum(sdf$Quantity)
将所有剩余份额相加。
我认为@DPH 的回答很棒,但希望可以对其进行修改以单独处理每次购买,而不是汇总过去的购买。
这是我在@DPH 的帮助下得出的最终工作解决方案。我对@DPH 编辑的解决方案进行了一些更改。
- 当一只股票的所有股份因多种原因(包括
dfh
对象)被出售时,编辑的解决方案不起作用。更新解决方案适用于我提供的修改后的数据集,但不适用于原始数据集。我已经修改了答案,以便在所有股票售出时都能正常工作。
- 我修改了结果以包含购买日期。这对于确定出售是长期资本收益还是短期资本收益很重要。
- 我已从已购买但未出售的股票中删除代码,因为它们会破坏脚本
- 我已将 purrr 应用于更新的解决方案以避免循环。
- 我已将基本子集(即
df[]
)更改为 dplyr 子集(即 df %>% filter()
)。出于某种原因,基本子集导致行中的 NA 值我的实际数据集,即使它并没有在示例数据集中引起这种情况。NA 行导致解决方案不起作用。
数据框准备:
df <- buy %>% filter(Ticker %in% unique(sell$Ticker)) %>% dplyr::mutate(io = "i") %>%
dplyr::rename(TID = BuyTransactionID) %>%
dplyr::union(dplyr::mutate(sell, io = "o") %>%
dplyr::rename(TID = SellTransactionID)) %>%
# sort the data
dplyr::arrange(Ticker, Date) %>%
# make Qtd negative for sales, remove buy IDs, parce Date and set up helper column
dplyr::mutate(Quantity = ifelse(io == "o", -1 * Quantity, Quantity),
TID = ifelse(io == "i", NA, TID),
Date = lubridate::mdy(Date),
hprice = NA_real_) %>%
dplyr::arrange(Ticker, Date) %>%
# group data to fill backwards per group
dplyr::group_by(Ticker) %>%
tidyr::fill(TID, .direction = "up") %>%
# ungroup to prevent unwanted behaviour downstream
dplyr::ungroup()
df$Dates_bought <- NA
函数和purrr:
# rephrase loop as function
myfun <- function(i){
# which ticker are we working with at this sale
t <- unique(df[df$TID == i, ]$Ticker)
# bind data from last sale of this ticker to current sale
sdf <- rbind(dplyr::slice_max(dfo[dfo$Ticker == t, ], "Date"),
df %>% filter(TID == i))
sdfh <- rbind(dfh %>% filter(Ticker == t),
df %>% filter(TID == i & io == "i") %>% select(c("Ticker", "Date", "Price", "Quantity", "io")))
# current sales quantiy as positive value
o1 <- abs(sdf %>% filter(io == "o") %>% pull(Quantity))
# copy to use for greedy algo
o2 <- o1
# vectors of price and qtd of bought shares at this sale, having price in decreasing order
ip <- sdfh %>% dplyr::arrange(desc(Price)) %>% dplyr::pull(Price)
iq <- sdfh %>% dplyr::arrange(desc(Price)) %>% dplyr::pull(Quantity)
date <- sdfh %>% dplyr::arrange(desc(Price)) %>% dplyr::pull(Date)
ips <- ip
iqs <- iq
dates <- date
# total value of shares in greedy
v <- 0
# loop to run over bought prices and quantities do reduce from sold qtd per sales block
# check if shares block is larger then remaining sales qtd to break loop. Modifications
# to make dates work properly.
for(l in 1:length(ip)){
if(o2 < iq[l]){
v <- v + ip[l] * o2
iqs[1] <- iqs[1] - o2
dates2 <- if(o2 == 0) dates else dates[-1]
break
}else{
o2 <- o2 - iq[l]
v <- v + ip[l] * iq[l]
ips <- ips[-1]
iqs <- iqs[-1]
dates <- dates[-1]
dates2 <- dates
}
}
# Needs to have the if else statements because when all shares are sold, the length
# of dates, ips, and iqs is 0, whereas Ticker and io are length 1.
dfh <<- rbind(dfh[dfh$Ticker != t, ],
data.frame(Ticker = if(length(ips) == 0) numeric(length = 0L) else t,
Date = dates,
Price = ips,
Quantity = iqs,
io = if(length(ips) == 0) numeric(length = 0L) else "i"))
# fill sales block frame and bind to output df
dfo <<- rbind(dfo,
data.frame(TID = i,
Ticker = t,
Date = max(sdf$Date),
Dates_bought = paste(date[seq(length(date)-length(dates2))], collapse = ","),
Price = sum(ips * iqs) / sum(iqs),
Quantity = sum(iqs),
io = "i",
hprice = v / o1))
}
# empty copy of df
dfo <- df[0, ] # empty copy of df
dfh <- df[0, c("Ticker", "Date", "Price", "Quantity", "io")] # to hold current stock aka portfolio
hifo <- purrr::map(sort(unique(df$TID)), ~ myfun(.x))[[length(unique(df$TID))]] %>%
dplyr::select(Ticker, Date, Dates_bought, Remain_Price = Price, Remain_Qtd = Quantity, Sales_Cost = hprice) %>%
dplyr::left_join(dplyr::select(df, Ticker, Date, Sales_Price = Price, Sales_Qtd = Quantity), by = c("Ticker", "Date")) %>%
dplyr::mutate(Result = abs(Sales_Qtd) * (Sales_Price - Sales_Cost))
如果有人有任何问题,请告诉我。我想把它变成一个闪亮的应用程序,也许会进一步开发它。如果您有兴趣合作,请告诉我。
我正在尝试对交易使用最高进先出会计方法。 最高进先出意味着当您卖出时,您首先卖出最贵的股票。
这是我的买卖(从
buy = data.frame(BuyTransactionID = c(1:10),
Ticker=c(rep('MSFT',4),rep('AMZN',3),rep('DOCU',3)),
Date=c(rep('01-01-2018',2),rep('01-14-2020',2),rep('01-01-2018',2),rep('01-14-2020',1),'01-01-2018','03-15-2020','04-06-2020'),
Price=c(100,102,102,107,2000,2010,2011,197,182,167),
Quantity=c(10,10,5,5,1,1,2,12,15,15))
sell = data.frame(SellTransactionID=c(1:7),
Ticker=c('MSFT','MSFT','AMZN','AMZN','DOCU','DOCU','DOCU'),
Date=c('01-07-2020','01-20-2020','01-01-2020','01-30-2020','01-15-2020','04-10-2020','04-20-2020'),
Price=c(97,110,2100,2050,210,205,225),
Quantity=c(15,12,1,3,10,5,3))
规则如下:
- 您首先卖出最贵(最高价)的股票。
- 您不能在购买之前出售股票
- 您不能多次卖出相同的股票
例题:
第一笔交易(SellTransactionID = 1)是 2020 年 1 月 7 日 15 股 MSFT。因此,在该日期之前购买的任何商品都可以出售。根据日期,符合条件的股票是来自 BuyTransactionID 1 和 2 的股票。BuyTransactionID 2 是最高价。因此,BuyTransactionID 2 的 10 股全部卖出,剩余 5 股来自 BuyTransactionID 1。
期望输出:
'Date Sold' = 售出日期(不言自明);
'Ticker' = 已售出的代码;
'Proceeds' = 销售总额;
'Cost basis' = 已售股票的加权平均值。
示例解决方案:
这是 SellTransactionID 1 的解决方案。正确的解决方案会自动执行此操作并计算所有 SellTransactionID。
result <- data.frame(SellDate = '01-07-2020', Ticker = "MSFT", Proceeds = 1455, CostBasis = 101.33)
成本基础示例:
成本基础计算为加权平均值。对于前面的示例,成本基础计算如下:(Quantity1 * Price1 + Quanity2 * Price2 + .....)/所有数量的总和
所以对于上面的例子:(10 * 102 + 5 * 100)/15
如果我正确理解了您的问题,这是一种可能的解决方案。在简历中,我结合了销售和购买数据并将其分组到销售块中(由销售 ID 给出)。这假设销售 ID 的顺序是根据日期列。然后我依次遍历这些销售块并将中间结果写入单个数据框。对于处理此结果数据框的每个销售块,都会针对同一代码的最后一个销售块结果进行过滤。这意味着根据时间表销售数量不得大于可用数量(因为你不能出售你没有的东西,无论如何我必须指出它是一个可能的限制)
建议的循环解决方案 1 不是在 R 中处理数据的最佳方法,因为它是一个循环,会增长 data.frame。由于您列出了 purrr
标签,我修改了答案第二部分的代码以使用 map() 函数。
在我们开始实际编码之前,让我们先准备数据(需要以相同的方式回答两个部分):
library(tidyverse) # need dplyr, using lubridate for date parsing and tidyr to fill data.frame
# include identifier variable, manipulate column name and union sells and buy
df <- dplyr::mutate(buy, io = "i") %>%
dplyr::rename(TID = BuyTransactionID) %>%
dplyr::union(dplyr::mutate(sell, io = "o") %>%
dplyr::rename(TID = SellTransactionID)) %>%
# sort the data
dplyr::arrange(Ticker, Date) %>%
# make Qtd negative for sales, remove buy IDs, parce Date and set up helper column
dplyr::mutate(Quantity = ifelse(io == "o", -1 * Quantity, Quantity),
TID = ifelse(io == "i", NA, TID),
Date = lubridate::mdy(Date),
hprice = NA_real_) %>%
# group data to fill backwards per group
dplyr::group_by(Ticker) %>%
tidyr::fill(TID, .direction = "up") %>%
# ungroup to prevent unwanted behaviour downstream
dplyr::ungroup()
1 个标准循环
dfo <- df[0, ] # empty copy of df
for (i in sort(unique(df$TID))) {
# which ticker are we working with at this sale
t <- unique(df[df$TID == i, ]$Ticker)
# bind data from last sale of this ticker to current sale
sdf <- rbind(dplyr::slice_max(dfo[dfo$Ticker == t, ], "Date"),
df[df$TID == i, ])
# current sales quantiy as positive value
o1 <- abs(sdf[sdf$io == "o", ]$Quantity)
# copy to use for greedy algo
o2 <- o1
# vectors of price and qtd of bought shares at this sale, having price in decreasing order
ip <- sdf[sdf$io == "i", ] %>% dplyr::arrange(desc(Price)) %>% dplyr::pull(Price)
iq <- sdf[sdf$io == "i", ] %>% dplyr::arrange(desc(Price)) %>% dplyr::pull(Quantity)
# total value of shares in greedy
v <- 0
# loop to run over bought prices and quantities do reduce from sold qtd per sales block
# check if shares block is larger then remaining sales qtd to break loop
for(l in 1:length(ip)){
if(o2 < iq[l]){
v <- v + ip[l] * o2
break
}else{
o2 <- o2 - iq[l]
v <- v + ip[l] * iq[l]
}
}
# fill sales block frame and bind to output df
dfo <- rbind(dfo,
data.frame(TID = i,
Ticker = t,
Date = max(sdf$Date),
Price = (sum(ip * iq) - v) / sum(sdf$Quantity),
Quantity = sum(sdf$Quantity),
io = "i",
hprice = v / o1))
}
# format, join original data and calculate result per Sales block
dplyr::select(dfo, Ticker, Date, Remain_Price = Price, Remain_Qtd = Quantity, Sales_Cost = hprice) %>%
dplyr::left_join(dplyr::select(df, Ticker, Date, Sales_Price = Price, Sales_Qtd = Quantity), by = c("Ticker", "Date")) %>%
dplyr::mutate(Result = abs(Sales_Qtd) * (Sales_Price - Sales_Cost))
Ticker Date Remain_Price Remain_Qtd Sales_Cost Sales_Price Sales_Qtd Result
1 MSFT 2020-01-07 100.0000 5 101.3333 97 -15 -65
2 MSFT 2020-01-20 100.0000 3 103.7500 110 -12 75
3 AMZN 2020-01-01 2000.0000 1 2010.0000 2100 -1 90
4 AMZN 2020-01-30 NaN 0 2007.3333 2050 -3 128
5 DOCU 2020-01-15 197.0000 2 197.0000 210 -10 130
6 DOCU 2020-04-10 173.6667 27 188.0000 205 -5 85
7 DOCU 2020-04-20 0.0000 -1 131.3333 225 -3 281
2 循环改写为 purrr 解决方案(注意在函数末尾分配 dfo 的全局赋值运算符(<<- 而不是 <-))
# rephrase loop as function
myfun <- function(i){
# which ticker are we working with at this sale
t <- unique(df[df$TID == i, ]$Ticker)
# bind data from last sale of this ticker to current sale
sdf <- rbind(dplyr::slice_max(dfo[dfo$Ticker == t, ], "Date"),
df[df$TID == i, ])
# current sales quantiy as positive value
o1 <- abs(sdf[sdf$io == "o", ]$Quantity)
# copy to use for greedy algo
o2 <- o1
# vectors of price and qtd of bought shares at this sale, having price in decreasing order
ip <- sdf[sdf$io == "i", ] %>% dplyr::arrange(desc(Price)) %>% dplyr::pull(Price)
iq <- sdf[sdf$io == "i", ] %>% dplyr::arrange(desc(Price)) %>% dplyr::pull(Quantity)
# total value of shares in greedy
v <- 0
# loop to run over bought prices and quantities do reduce from sold qtd per sales block
# check if shares block is larger then remaining sales qtd to break loop
for(l in 1:length(ip)){
if(o2 < iq[l]){
v <- v + ip[l] * o2
break
}else{
o2 <- o2 - iq[l]
v <- v + ip[l] * iq[l]
}
}
# fill sales block frame and bind to output df
dfo <<- rbind(dfo,
data.frame(TID = i,
Ticker = t,
Date = max(sdf$Date),
Price = (sum(ip * iq) - v) / sum(sdf$Quantity),
Quantity = sum(sdf$Quantity),
io = "i",
hprice = v / o1))
}
# empty copy of df
dfo <- df[0, ]
purrr::map(sort(unique(df$TID)), ~ myfun(.x))[[max(df$TID)]] %>%
dplyr::select(Ticker, Date, Remain_Price = Price, Remain_Qtd = Quantity, Sales_Cost = hprice) %>%
dplyr::left_join(dplyr::select(df, Ticker, Date, Sales_Price = Price, Sales_Qtd = Quantity), by = c("Ticker", "Date")) %>%
dplyr::mutate(Result = abs(Sales_Qtd) * (Sales_Price - Sales_Cost))
Ticker Date Remain_Price Remain_Qtd Sales_Cost Sales_Price Sales_Qtd Result
1 MSFT 2020-01-07 100.0000 5 101.3333 97 -15 -65
2 MSFT 2020-01-20 100.0000 3 103.7500 110 -12 75
3 AMZN 2020-01-01 2000.0000 1 2010.0000 2100 -1 90
4 AMZN 2020-01-30 NaN 0 2007.3333 2050 -3 128
5 DOCU 2020-01-15 197.0000 2 197.0000 210 -10 130
6 DOCU 2020-04-10 173.6667 27 188.0000 205 -5 85
7 DOCU 2020-04-20 0.0000 -1 131.3333 225 -3 281
编辑
为了跟踪剩余股票,我们需要第二个 df 来保存当前的投资组合数据。我没有优化代码,只编辑了循环,不过 purrr
改编应该很直接。
library(tidyverse) # need dplyr, using lubridate for date parsing and tidyr to fill data.frame
# include identifier variable, manipulate column name and union sells and buy
df <- dplyr::mutate(buy, io = "i") %>%
dplyr::rename(TID = BuyTransactionID) %>%
dplyr::union(dplyr::mutate(sell, io = "o") %>%
dplyr::rename(TID = SellTransactionID)) %>%
# sort the data
dplyr::arrange(Ticker, Date) %>%
# make Qtd negative for sales, remove buy IDs, parce Date and set up helper column
dplyr::mutate(Quantity = ifelse(io == "o", -1 * Quantity, Quantity),
TID = ifelse(io == "i", NA, TID),
Date = lubridate::mdy(Date),
hprice = NA_real_) %>%
# group data to fill backwards per group
dplyr::group_by(Ticker) %>%
tidyr::fill(TID, .direction = "up") %>%
# ungroup to prevent unwanted behaviour downstream
dplyr::ungroup()
dfo <- df[0, ] # empty copy of df
dfh <- df[0, c("Ticker", "Price", "Quantity", "io")] # to hold current stock aka portfolio
for (i in sort(unique(df$TID))) {
# which ticker are we working with at this sale
t <- unique(df[df$TID == i, ]$Ticker)
# bind data from last sale of this ticker to current sale
sdf <- rbind(dplyr::slice_max(dfo[dfo$Ticker == t, ], "Date"),
df[df$TID == i, ])
# bind data from current portfolio to buys between last and current sale (new port folio before sale)
sdfh <- rbind(dfh[dfh$Ticker == t, ],
df[df$TID == i & df$io == "i", c("Ticker", "Price", "Quantity", "io")])
# current sales quantiy as positive value
o1 <- abs(sdf[sdf$io == "o", ]$Quantity)
# copy to use for greedy algo
o2 <- o1
# vectors of price and qtd of bought shares at this sale, having price in decreasing order
ip <- sdfh %>% dplyr::arrange(desc(Price)) %>% dplyr::pull(Price)
iq <- sdfh %>% dplyr::arrange(desc(Price)) %>% dplyr::pull(Quantity)
ips <- ip
iqs <- iq
# total value of shares in greedy
v <- 0
# loop to run over bought prices and quantities do reduce from sold qtd per sales block
# check if shares block is larger then remaining sales qtd to break loop
for(l in 1:length(ip)){
if(o2 < iq[l]){
v <- v + ip[l] * o2
iqs[1] <- iqs[1] - o2
break
}else{
o2 <- o2 - iq[l]
v <- v + ip[l] * iq[l]
ips <- ips[-1]
iqs <- iqs[-1]
}
}
dfh <- rbind(dfh[dfh$Ticker != t, ],
data.frame(Ticker = t,
Price = ips,
Quantity = iqs,
io = "i"))
# fill sales block frame and bind to output df
dfo <- rbind(dfo,
data.frame(TID = i,
Ticker = t,
Date = max(sdf$Date),
Price = sum(ips * iqs) / sum(iqs),
Quantity = sum(iqs),
io = "i",
hprice = v/o1))
}
dfo
TID Ticker Date Price Quantity io hprice
1 1 MSFT 2020-01-07 106.5652 69 i 115
2 2 MSFT 2020-01-20 105.0000 57 i 114
3 3 MSFT 2020-01-21 104.8750 56 i 112
4 4 MSFT 2020-01-22 104.1765 51 i 112
@DPH 的回答非常好,但不幸的是不够准确。我会解释为什么。
这是一个新数据集,其中所有购买都先于销售:
buy = data.frame(BuyTransactionID = c(1:10),
Ticker=c(rep('MSFT',10)),
Date=c(rep('01-01-2020',10)),
Price=c(100,102,102,107,105,111,109,112,115,106),
Quantity=c(10,10,5,5,1,1,2,12,15,15))
sell = data.frame(SellTransactionID=c(1:4),
Ticker=c('MSFT','MSFT','MSFT', 'MSFT'),
Date=c('01-07-2020','01-20-2020','01-21-2020',
'01-22-2020'),
Price=c(120,119,117, 121),
Quantity=c(7,12,1, 5))
如果您应用@DPH 的解决方案,您将得到以下结果:
请注意,'Remain_Price' 没有变化,最后三个交易的 'Sales_Cost' 也没有变化。发生这种情况是因为该函数确定了首次销售后剩余的股票数量以及剩余股票的平均价格。首次出售前购买的股票不能再单独出售。他们现在被视为具有平均价格和剩余股份数量的单一实体。
例如,本例共买入76股。第一次销售卖出 7 股。现在,如 'Remain_Qtd' 中所示,仍有 69 股。计算剩余股票的平均价格 - 该价格为 106.5652 美元。现在,该过程认为所有 69 股的定价为 106.5652 美元,剩余销售量减少 'Remain_Qtd' 的数量,但不会改变 'Remain_Price'。剩余股份不能再按购买时的价格考虑,它们是剩余股份和平均剩余价格的总和。
出现这种情况是因为对象dfo
和对象sdf
中dfo
的回收。特别是,此行计算了一个平均剩余价格,然后通过 dfo
和 sdf
.
Price = (sum(ip * iq) - v) / sum(sdf$Quantity)
和Quantity = sum(sdf$Quantity)
将所有剩余份额相加。
我认为@DPH 的回答很棒,但希望可以对其进行修改以单独处理每次购买,而不是汇总过去的购买。
这是我在@DPH 的帮助下得出的最终工作解决方案。我对@DPH 编辑的解决方案进行了一些更改。
- 当一只股票的所有股份因多种原因(包括
dfh
对象)被出售时,编辑的解决方案不起作用。更新解决方案适用于我提供的修改后的数据集,但不适用于原始数据集。我已经修改了答案,以便在所有股票售出时都能正常工作。 - 我修改了结果以包含购买日期。这对于确定出售是长期资本收益还是短期资本收益很重要。
- 我已从已购买但未出售的股票中删除代码,因为它们会破坏脚本
- 我已将 purrr 应用于更新的解决方案以避免循环。
- 我已将基本子集(即
df[]
)更改为 dplyr 子集(即df %>% filter()
)。出于某种原因,基本子集导致行中的 NA 值我的实际数据集,即使它并没有在示例数据集中引起这种情况。NA 行导致解决方案不起作用。
数据框准备:
df <- buy %>% filter(Ticker %in% unique(sell$Ticker)) %>% dplyr::mutate(io = "i") %>%
dplyr::rename(TID = BuyTransactionID) %>%
dplyr::union(dplyr::mutate(sell, io = "o") %>%
dplyr::rename(TID = SellTransactionID)) %>%
# sort the data
dplyr::arrange(Ticker, Date) %>%
# make Qtd negative for sales, remove buy IDs, parce Date and set up helper column
dplyr::mutate(Quantity = ifelse(io == "o", -1 * Quantity, Quantity),
TID = ifelse(io == "i", NA, TID),
Date = lubridate::mdy(Date),
hprice = NA_real_) %>%
dplyr::arrange(Ticker, Date) %>%
# group data to fill backwards per group
dplyr::group_by(Ticker) %>%
tidyr::fill(TID, .direction = "up") %>%
# ungroup to prevent unwanted behaviour downstream
dplyr::ungroup()
df$Dates_bought <- NA
函数和purrr:
# rephrase loop as function
myfun <- function(i){
# which ticker are we working with at this sale
t <- unique(df[df$TID == i, ]$Ticker)
# bind data from last sale of this ticker to current sale
sdf <- rbind(dplyr::slice_max(dfo[dfo$Ticker == t, ], "Date"),
df %>% filter(TID == i))
sdfh <- rbind(dfh %>% filter(Ticker == t),
df %>% filter(TID == i & io == "i") %>% select(c("Ticker", "Date", "Price", "Quantity", "io")))
# current sales quantiy as positive value
o1 <- abs(sdf %>% filter(io == "o") %>% pull(Quantity))
# copy to use for greedy algo
o2 <- o1
# vectors of price and qtd of bought shares at this sale, having price in decreasing order
ip <- sdfh %>% dplyr::arrange(desc(Price)) %>% dplyr::pull(Price)
iq <- sdfh %>% dplyr::arrange(desc(Price)) %>% dplyr::pull(Quantity)
date <- sdfh %>% dplyr::arrange(desc(Price)) %>% dplyr::pull(Date)
ips <- ip
iqs <- iq
dates <- date
# total value of shares in greedy
v <- 0
# loop to run over bought prices and quantities do reduce from sold qtd per sales block
# check if shares block is larger then remaining sales qtd to break loop. Modifications
# to make dates work properly.
for(l in 1:length(ip)){
if(o2 < iq[l]){
v <- v + ip[l] * o2
iqs[1] <- iqs[1] - o2
dates2 <- if(o2 == 0) dates else dates[-1]
break
}else{
o2 <- o2 - iq[l]
v <- v + ip[l] * iq[l]
ips <- ips[-1]
iqs <- iqs[-1]
dates <- dates[-1]
dates2 <- dates
}
}
# Needs to have the if else statements because when all shares are sold, the length
# of dates, ips, and iqs is 0, whereas Ticker and io are length 1.
dfh <<- rbind(dfh[dfh$Ticker != t, ],
data.frame(Ticker = if(length(ips) == 0) numeric(length = 0L) else t,
Date = dates,
Price = ips,
Quantity = iqs,
io = if(length(ips) == 0) numeric(length = 0L) else "i"))
# fill sales block frame and bind to output df
dfo <<- rbind(dfo,
data.frame(TID = i,
Ticker = t,
Date = max(sdf$Date),
Dates_bought = paste(date[seq(length(date)-length(dates2))], collapse = ","),
Price = sum(ips * iqs) / sum(iqs),
Quantity = sum(iqs),
io = "i",
hprice = v / o1))
}
# empty copy of df
dfo <- df[0, ] # empty copy of df
dfh <- df[0, c("Ticker", "Date", "Price", "Quantity", "io")] # to hold current stock aka portfolio
hifo <- purrr::map(sort(unique(df$TID)), ~ myfun(.x))[[length(unique(df$TID))]] %>%
dplyr::select(Ticker, Date, Dates_bought, Remain_Price = Price, Remain_Qtd = Quantity, Sales_Cost = hprice) %>%
dplyr::left_join(dplyr::select(df, Ticker, Date, Sales_Price = Price, Sales_Qtd = Quantity), by = c("Ticker", "Date")) %>%
dplyr::mutate(Result = abs(Sales_Qtd) * (Sales_Price - Sales_Cost))
如果有人有任何问题,请告诉我。我想把它变成一个闪亮的应用程序,也许会进一步开发它。如果您有兴趣合作,请告诉我。