结合两个财务数据集,交互式账户余额随时间变化
Combining two financial datasets, with interactive account balance variable over time
我有一个与金融交易数据集相关的问题。
我有两个数据集:
第一个包含带有时间戳的金融交易。
Account_from Account_to Value Timestamp
1 1 2 25 1
2 1 3 25 1
3 2 1 50 2
4 2 3 20 2
5 2 4 25 2
6 1 2 40 3
7 3 1 20 3
8 2 4 25 3
另一个数据集包含帐户信息:
Account_id initial deposit
1 1 200
2 2 100
3 3 150
4 4 200
现在我想创建一个数据集,其中包含金融交易和原始账户的余额。此外,我希望账户余额随着每次交易的变化而变化,这样:
Account_from Account_to Value Timestamp Initial_deposit Old_bal_org New_bal_org Old_bal_des New_bal_des
1 1 2 25 1 200 200 175 100 125
2 1 3 25 1 200 175 150 150 175
3 2 1 50 2 100 125 75 150 200
4 2 3 20 2 100 75 55 175 195
5 2 4 25 2 100 55 30 200 225
6 1 2 40 3 200 200 160 30 70
7 3 1 20 3 150 195 175 160 180
8 2 4 25 3 100 70 45 225 250
这怎么可能?
重现数据:
dftrans <- structure(list(Account_from = c(1L, 1L, 2L, 2L, 2L, 1L, 3L, 2L
), Account_to = c(2L, 3L, 1L, 3L, 4L, 2L, 1L, 4L), Value = c(25,
25, 50, 20, 25, 40, 20, 25), Timestamp = c(1L, 1L, 2L, 2L, 2L,
3L, 3L, 3L)), class = "data.frame", row.names = c(NA, -8L))
dfacc <- structure(list(Account_id = c(1L, 2L, 3L, 4L), Initial__deposit = c(200, 100, 150, 200)), class = "data.frame", row.names = c(NA, -4L))
提前致谢
一种可行的方法:
dftransFinal <- dftrans %>%
# create a record id to keep track of each transaction
rowid_to_column(var = 'recordID') %>%
pivot_longer(cols = c(Account_to, Account_from), names_to = 'accountType',
values_to = 'Account_id') %>%
left_join(dfacc, by = 'Account_id') %>%
# If the record is a 'from' set value to negative so that it is subtracted from balance
mutate(Value = if_else(accountType == 'Account_from', -Value, Value)) %>%
group_by(Account_id) %>%
mutate(sum_changes = cumsum(Value),
# calculate the cumulative sum with a lag for old_bal
sum_changes_lag = lag(cumsum(Value), k = 1, default = 0),
Old_bal_org = Initial__deposit + sum_changes_lag,
New_bal_org = Initial__deposit + sum_changes) %>%
pivot_wider(names_from = 'accountType', values_from = c('Account_id', 'Old_bal_org',
'Initial__deposit',
'New_bal_org', 'Value'),
id_cols = c('recordID', 'Timestamp')) %>%
# select, rename, and order columns
select('Account_from' = 'Account_id_Account_from', 'Account_to' = 'Account_id_Account_to',
'Value' = 'Value_Account_to', Timestamp, 'Initial__deposit' = 'Initial__deposit_Account_from',
'Old_bal_org' = 'Old_bal_org_Account_from', 'New_bal_org' = 'New_bal_org_Account_from',
'Old_bal_des' = 'Old_bal_org_Account_to', 'New_bal_des' = 'New_bal_org_Account_to')
使用 data.table
和与 Amanda 类似的方法来保存 运行 分类帐:
ledger <- dftrans[, .(rn=rep(rn, each=2L), Account_id=c(rbind(Account_from,Account_to)),
Value=c(sapply(Value, function(x) c(-1, 1) * x)))][,
.(rn, DebitCredit=cumsum(Value)), .(Account_id)][
dfacc, on=.(Account_id), Balance := Initial__deposit + DebitCredit]
分类帐:
Account_id rn DebitCredit Balance
1: 1 1 -25 175
2: 1 2 -50 150
3: 1 3 0 200
4: 1 6 -40 160
5: 1 7 -20 180
6: 2 1 25 125
7: 2 3 -25 75
8: 2 4 -45 55
9: 2 5 -70 30
10: 2 6 -30 70
11: 2 8 -55 45
12: 3 2 25 175
13: 3 4 45 195
14: 3 7 25 175
15: 4 5 25 225
16: 4 8 50 250
然后使用滚动和非等连接来查找所需的列:
dftrans[, Old_bal_org := fcoalesce(
ledger[.SD, on=.(Account_id=Account_from, rn<rn), mult="last", Balance],
init_from)]
dftrans[, New_bal_org :=
ledger[.SD, on=.(Account_id=Account_from, rn), roll=Inf, init_from + DebitCredit]
]
dftrans[, Old_bal_des := fcoalesce(
ledger[.SD, on=.(Account_id=Account_to, rn<rn), mult="last", Balance],
init_to)]
dftrans[, New_bal_des :=
ledger[.SD, on=.(Account_id=Account_to, rn), roll=Inf, init_to + DebitCredit]
]
输出:
Account_from Account_to Value Timestamp rn init_from init_to Old_bal_org New_bal_org Old_bal_des New_bal_des
1: 1 2 25 1 1 200 100 200 175 100 125
2: 1 3 25 1 2 200 150 175 150 150 175
3: 2 1 50 2 3 100 200 125 75 150 200
4: 2 3 20 2 4 100 150 75 55 175 195
5: 2 4 25 2 5 100 200 55 30 200 225
6: 1 2 40 3 6 200 100 200 160 30 70
7: 3 1 20 3 7 150 200 195 175 160 180
8: 2 4 25 3 8 100 200 70 45 225 250
数据并查找初始存款:
dftrans <- structure(list(Account_from = c(1L, 1L, 2L, 2L, 2L, 1L, 3L, 2L
), Account_to = c(2L, 3L, 1L, 3L, 4L, 2L, 1L, 4L), Value = c(25,
25, 50, 20, 25, 40, 20, 25), Timestamp = c(1L, 1L, 2L, 2L, 2L,
3L, 3L, 3L)), class = "data.frame", row.names = c(NA, -8L))
dfacc <- structure(list(Account_id = c(1L, 2L, 3L, 4L), Initial__deposit = c(200, 100, 150, 200)), class = "data.frame", row.names = c(NA, -4L))
library(data.table)
setDT(dfacc)
setDT(dftrans)[, rn := .I][
dfacc, on=.(Account_from=Account_id), init_from := Initial__deposit][
dfacc, on=.(Account_to=Account_id), init_to := Initial__deposit]
我有一个与金融交易数据集相关的问题。 我有两个数据集:
第一个包含带有时间戳的金融交易。
Account_from Account_to Value Timestamp
1 1 2 25 1
2 1 3 25 1
3 2 1 50 2
4 2 3 20 2
5 2 4 25 2
6 1 2 40 3
7 3 1 20 3
8 2 4 25 3
另一个数据集包含帐户信息:
Account_id initial deposit
1 1 200
2 2 100
3 3 150
4 4 200
现在我想创建一个数据集,其中包含金融交易和原始账户的余额。此外,我希望账户余额随着每次交易的变化而变化,这样:
Account_from Account_to Value Timestamp Initial_deposit Old_bal_org New_bal_org Old_bal_des New_bal_des
1 1 2 25 1 200 200 175 100 125
2 1 3 25 1 200 175 150 150 175
3 2 1 50 2 100 125 75 150 200
4 2 3 20 2 100 75 55 175 195
5 2 4 25 2 100 55 30 200 225
6 1 2 40 3 200 200 160 30 70
7 3 1 20 3 150 195 175 160 180
8 2 4 25 3 100 70 45 225 250
这怎么可能?
重现数据:
dftrans <- structure(list(Account_from = c(1L, 1L, 2L, 2L, 2L, 1L, 3L, 2L
), Account_to = c(2L, 3L, 1L, 3L, 4L, 2L, 1L, 4L), Value = c(25,
25, 50, 20, 25, 40, 20, 25), Timestamp = c(1L, 1L, 2L, 2L, 2L,
3L, 3L, 3L)), class = "data.frame", row.names = c(NA, -8L))
dfacc <- structure(list(Account_id = c(1L, 2L, 3L, 4L), Initial__deposit = c(200, 100, 150, 200)), class = "data.frame", row.names = c(NA, -4L))
提前致谢
一种可行的方法:
dftransFinal <- dftrans %>%
# create a record id to keep track of each transaction
rowid_to_column(var = 'recordID') %>%
pivot_longer(cols = c(Account_to, Account_from), names_to = 'accountType',
values_to = 'Account_id') %>%
left_join(dfacc, by = 'Account_id') %>%
# If the record is a 'from' set value to negative so that it is subtracted from balance
mutate(Value = if_else(accountType == 'Account_from', -Value, Value)) %>%
group_by(Account_id) %>%
mutate(sum_changes = cumsum(Value),
# calculate the cumulative sum with a lag for old_bal
sum_changes_lag = lag(cumsum(Value), k = 1, default = 0),
Old_bal_org = Initial__deposit + sum_changes_lag,
New_bal_org = Initial__deposit + sum_changes) %>%
pivot_wider(names_from = 'accountType', values_from = c('Account_id', 'Old_bal_org',
'Initial__deposit',
'New_bal_org', 'Value'),
id_cols = c('recordID', 'Timestamp')) %>%
# select, rename, and order columns
select('Account_from' = 'Account_id_Account_from', 'Account_to' = 'Account_id_Account_to',
'Value' = 'Value_Account_to', Timestamp, 'Initial__deposit' = 'Initial__deposit_Account_from',
'Old_bal_org' = 'Old_bal_org_Account_from', 'New_bal_org' = 'New_bal_org_Account_from',
'Old_bal_des' = 'Old_bal_org_Account_to', 'New_bal_des' = 'New_bal_org_Account_to')
使用 data.table
和与 Amanda 类似的方法来保存 运行 分类帐:
ledger <- dftrans[, .(rn=rep(rn, each=2L), Account_id=c(rbind(Account_from,Account_to)),
Value=c(sapply(Value, function(x) c(-1, 1) * x)))][,
.(rn, DebitCredit=cumsum(Value)), .(Account_id)][
dfacc, on=.(Account_id), Balance := Initial__deposit + DebitCredit]
分类帐:
Account_id rn DebitCredit Balance
1: 1 1 -25 175
2: 1 2 -50 150
3: 1 3 0 200
4: 1 6 -40 160
5: 1 7 -20 180
6: 2 1 25 125
7: 2 3 -25 75
8: 2 4 -45 55
9: 2 5 -70 30
10: 2 6 -30 70
11: 2 8 -55 45
12: 3 2 25 175
13: 3 4 45 195
14: 3 7 25 175
15: 4 5 25 225
16: 4 8 50 250
然后使用滚动和非等连接来查找所需的列:
dftrans[, Old_bal_org := fcoalesce(
ledger[.SD, on=.(Account_id=Account_from, rn<rn), mult="last", Balance],
init_from)]
dftrans[, New_bal_org :=
ledger[.SD, on=.(Account_id=Account_from, rn), roll=Inf, init_from + DebitCredit]
]
dftrans[, Old_bal_des := fcoalesce(
ledger[.SD, on=.(Account_id=Account_to, rn<rn), mult="last", Balance],
init_to)]
dftrans[, New_bal_des :=
ledger[.SD, on=.(Account_id=Account_to, rn), roll=Inf, init_to + DebitCredit]
]
输出:
Account_from Account_to Value Timestamp rn init_from init_to Old_bal_org New_bal_org Old_bal_des New_bal_des
1: 1 2 25 1 1 200 100 200 175 100 125
2: 1 3 25 1 2 200 150 175 150 150 175
3: 2 1 50 2 3 100 200 125 75 150 200
4: 2 3 20 2 4 100 150 75 55 175 195
5: 2 4 25 2 5 100 200 55 30 200 225
6: 1 2 40 3 6 200 100 200 160 30 70
7: 3 1 20 3 7 150 200 195 175 160 180
8: 2 4 25 3 8 100 200 70 45 225 250
数据并查找初始存款:
dftrans <- structure(list(Account_from = c(1L, 1L, 2L, 2L, 2L, 1L, 3L, 2L
), Account_to = c(2L, 3L, 1L, 3L, 4L, 2L, 1L, 4L), Value = c(25,
25, 50, 20, 25, 40, 20, 25), Timestamp = c(1L, 1L, 2L, 2L, 2L,
3L, 3L, 3L)), class = "data.frame", row.names = c(NA, -8L))
dfacc <- structure(list(Account_id = c(1L, 2L, 3L, 4L), Initial__deposit = c(200, 100, 150, 200)), class = "data.frame", row.names = c(NA, -4L))
library(data.table)
setDT(dfacc)
setDT(dftrans)[, rn := .I][
dfacc, on=.(Account_from=Account_id), init_from := Initial__deposit][
dfacc, on=.(Account_to=Account_id), init_to := Initial__deposit]