从列表中查找每个帐户的平均值和最大值
Finding average and max value per account from a list
所以我得到了 'list' 类型的数据(这只是它的一小部分来说明这一点)存储在一个名为 sorted_trans 的变量中(我已经预先对它进行了排序将 accountId 组合在一起):
Transaction(transactionId='T000664', accountId='A1', transactionDay=21, category='GG', transactionAmount=364.69), Transaction(transactionId='T000776', accountId='A1', transactionDay=24, category='GG', transactionAmount=329.63), Transaction(transactionId='T000313', accountId='A10', transactionDay=8, category='AA', transactionAmount=960.56), Transaction(transactionId='T000472', accountId='A10', transactionDay=12, category='AA', transactionAmount=707.74), Transaction(transactionId='T000596', accountId='A10', transactionDay=18, category='AA', transactionAmount=156.77), Transaction(transactionId='T000730', accountId='A10', transactionDay=23, category='AA', transactionAmount=577.76)
我的任务是将过去 5 天的数据按 'accountId' 排序(我正在使用滚动 window)并计算:总计 'transactionAmount' 每 'accountId',平均 'transactionAmount' 每 'accountId',最大 'transactionAmount' 每 'accountId'
return
为了捕获过去五天(不包括当天)的数据,我使用的是普通的 for 循环
for i in range(day-window_size, day):
我怀疑我需要使用某种理解来分组和计算正确的值。输出应包含每个账户 ID 每天一行,每一行应包含每个计算的统计信息,例如:Output examle。
除了平均值和最大值,我已经设法得到了所有东西。
例如,我使用以下代码计算了每个类别 AA(其他类别相同)的总价值:
trans_AA = []
trans_AA.append([(x.accountId, x.transactionAmount) for x in sorted_trans if x.category == "AA"])
计算每个类别总交易金额的通用方法
def totals_per_cat(transactions):
for accountId, transactionAmount in chain.from_iterable(transactions):
d[accountId] += transactionAmount
trans_res = list(d.values())
return trans_res
如何计算过去 5 天每个 accountId 的平均值和最大值?
这有望帮助您完成大部分工作:
from collections import defaultdict
from dataclasses import dataclass, fields
from csv import DictReader
from statistics import mean
from typing import Dict, List
@dataclass
class Transaction:
transactionId: str
accountId: str
transactionDay: int
category: str
transactionAmount: float
def load_transactions(file_path: str) -> List[Transaction]:
with open(file_path) as csv_file:
return [
Transaction(**{
field.name: field.type(row[field.name])
for field in fields(Transaction)
}) for row in DictReader(csv_file)
]
transactions = load_transactions('transactions.txt')
min_day = min(xact.transactionDay for xact in transactions)
max_day = max(xact.transactionDay for xact in transactions)
window_size = 5
xact_by_acct: Dict[str, List[Transaction]] = defaultdict(list)
for xact in transactions:
xact_by_acct[xact.accountId].append(xact)
for day in range(min_day + window_size, max_day + 1):
for acct, xacts in xact_by_acct.items():
amts = [
xact.transactionAmount for xact in xacts
if xact.transactionDay in range(day - window_size, day)
] # all amounts within the window
try:
print(
f"Day: {day}\tAccount: {acct}\t"
f"Max: {max(amts)}\tAverage: {mean(amts)}"
)
except ValueError:
# No transactions in the window for this account.
pass
所以我得到了 'list' 类型的数据(这只是它的一小部分来说明这一点)存储在一个名为 sorted_trans 的变量中(我已经预先对它进行了排序将 accountId 组合在一起):
Transaction(transactionId='T000664', accountId='A1', transactionDay=21, category='GG', transactionAmount=364.69), Transaction(transactionId='T000776', accountId='A1', transactionDay=24, category='GG', transactionAmount=329.63), Transaction(transactionId='T000313', accountId='A10', transactionDay=8, category='AA', transactionAmount=960.56), Transaction(transactionId='T000472', accountId='A10', transactionDay=12, category='AA', transactionAmount=707.74), Transaction(transactionId='T000596', accountId='A10', transactionDay=18, category='AA', transactionAmount=156.77), Transaction(transactionId='T000730', accountId='A10', transactionDay=23, category='AA', transactionAmount=577.76)
我的任务是将过去 5 天的数据按 'accountId' 排序(我正在使用滚动 window)并计算:总计 'transactionAmount' 每 'accountId',平均 'transactionAmount' 每 'accountId',最大 'transactionAmount' 每 'accountId'
return为了捕获过去五天(不包括当天)的数据,我使用的是普通的 for 循环
for i in range(day-window_size, day):
我怀疑我需要使用某种理解来分组和计算正确的值。输出应包含每个账户 ID 每天一行,每一行应包含每个计算的统计信息,例如:Output examle。 除了平均值和最大值,我已经设法得到了所有东西。
例如,我使用以下代码计算了每个类别 AA(其他类别相同)的总价值:
trans_AA = []
trans_AA.append([(x.accountId, x.transactionAmount) for x in sorted_trans if x.category == "AA"])
计算每个类别总交易金额的通用方法
def totals_per_cat(transactions):
for accountId, transactionAmount in chain.from_iterable(transactions):
d[accountId] += transactionAmount
trans_res = list(d.values())
return trans_res
如何计算过去 5 天每个 accountId 的平均值和最大值?
这有望帮助您完成大部分工作:
from collections import defaultdict
from dataclasses import dataclass, fields
from csv import DictReader
from statistics import mean
from typing import Dict, List
@dataclass
class Transaction:
transactionId: str
accountId: str
transactionDay: int
category: str
transactionAmount: float
def load_transactions(file_path: str) -> List[Transaction]:
with open(file_path) as csv_file:
return [
Transaction(**{
field.name: field.type(row[field.name])
for field in fields(Transaction)
}) for row in DictReader(csv_file)
]
transactions = load_transactions('transactions.txt')
min_day = min(xact.transactionDay for xact in transactions)
max_day = max(xact.transactionDay for xact in transactions)
window_size = 5
xact_by_acct: Dict[str, List[Transaction]] = defaultdict(list)
for xact in transactions:
xact_by_acct[xact.accountId].append(xact)
for day in range(min_day + window_size, max_day + 1):
for acct, xacts in xact_by_acct.items():
amts = [
xact.transactionAmount for xact in xacts
if xact.transactionDay in range(day - window_size, day)
] # all amounts within the window
try:
print(
f"Day: {day}\tAccount: {acct}\t"
f"Max: {max(amts)}\tAverage: {mean(amts)}"
)
except ValueError:
# No transactions in the window for this account.
pass