f# deedle 基于列键聚合行值
f# deedle aggregate row values base on column key
假设我有以下 Frame
,
type Person =
{ Name:string; Age:int; Comp1:float; Comp2:float }
let peopleRecds =
[ { Name = "Joe"; Age = 51; Comp1=12.1; Comp2 =20.3 }
{ Name = "Tomas"; Age = 28; Comp1=1.1; Comp2 =29.3 }
{ Name = "Eve"; Age = 2; Comp1=2.1; Comp2 =40.3 }
{ Name = "Suzanne"; Age = 15; Comp1=12.4; Comp2 =26.3} ]
let peopleList = Frame.ofRecords peopleRecds
我想做的是将 Comp1
和 Comp2
列相加到 peopleList
的新列和 Comp(n)
的 n
列中目前尚未确定,所以我不能只知道两列的总和,可能有 Comp3
、Comp4
,所以必须基于正则表达式,比如 key 以 [=20= 开头].
看来我应该做的是每行mapRowValues
peopleList?TotalComp <- peopleList |>Frame.mapRowValues(
fun row ->
(do something to sum up)
)
但是我不确定如何在此处的行级操作。
如果打算增加'Comp'个数,将它们放在一个数组中可能会更好:
type nPerson = { Name:string; Age:int; Comp:float[] }
let npeopleRecds =
[ { Name = "Joe"; Age = 51; Comp = [| 12.1; 20.3 |] }
{ Name = "Tomas"; Age = 28; Comp = [| 1.1; 29.3 |] }
{ Name = "Eve"; Age = 2; Comp = [| 2.1; 40.3 |] }
{ Name = "Suzanne"; Age = 15; Comp = [|12.4; 26.3 |] } ]
然后您可以轻松添加一列,其总和为:
let npeopleList = Frame.ofRecords npeopleRecds
npeopleList.Format() |> printfn "%s"
let sumseries = npeopleList.GetColumn<float []>("Comp") |> Series.mapValues(fun x -> x |> Array.sum)
npeopleList?TotalComp <- sumseries
npeopleList.Format() |> printfn "%s"
打印:
Name Age Comp
0 -> Joe 51 System.Double[]
1 -> Tomas 28 System.Double[]
2 -> Eve 2 System.Double[]
3 -> Suzanne 15 System.Double[]
Name Age Comp TotalComp
0 -> Joe 51 System.Double[] 32,4
1 -> Tomas 28 System.Double[] 30,4
2 -> Eve 2 System.Double[] 42,4
3 -> Suzanne 15 System.Double[] 38,7
编辑:
如果个人记录中的字段更改不可接受 - 您可以使用过滤器:
let allSum =
peopleList.Columns
|> Series.filter(fun k _ -> k.StartsWith("Comp"))
|> Frame.ofColumns
|> Frame.rows
|> Series.mapValues(Series.foldValues(fun acc v -> acc + (v :?> float)) 0.0)
peopleList?TotalComp <- allSum
peopleList.Format() |> printfn "%s"
打印:
Name Age Comp1 Comp2 TotalComp
0 -> Joe 51 12,1 20,3 32,4
1 -> Tomas 28 1,1 29,3 30,4
2 -> Eve 2 2,1 40,3 42,4
3 -> Suzanne 15 12,4 26,3 38,7
假设我有以下 Frame
,
type Person =
{ Name:string; Age:int; Comp1:float; Comp2:float }
let peopleRecds =
[ { Name = "Joe"; Age = 51; Comp1=12.1; Comp2 =20.3 }
{ Name = "Tomas"; Age = 28; Comp1=1.1; Comp2 =29.3 }
{ Name = "Eve"; Age = 2; Comp1=2.1; Comp2 =40.3 }
{ Name = "Suzanne"; Age = 15; Comp1=12.4; Comp2 =26.3} ]
let peopleList = Frame.ofRecords peopleRecds
我想做的是将 Comp1
和 Comp2
列相加到 peopleList
的新列和 Comp(n)
的 n
列中目前尚未确定,所以我不能只知道两列的总和,可能有 Comp3
、Comp4
,所以必须基于正则表达式,比如 key 以 [=20= 开头].
看来我应该做的是每行mapRowValues
peopleList?TotalComp <- peopleList |>Frame.mapRowValues(
fun row ->
(do something to sum up)
)
但是我不确定如何在此处的行级操作。
如果打算增加'Comp'个数,将它们放在一个数组中可能会更好:
type nPerson = { Name:string; Age:int; Comp:float[] }
let npeopleRecds =
[ { Name = "Joe"; Age = 51; Comp = [| 12.1; 20.3 |] }
{ Name = "Tomas"; Age = 28; Comp = [| 1.1; 29.3 |] }
{ Name = "Eve"; Age = 2; Comp = [| 2.1; 40.3 |] }
{ Name = "Suzanne"; Age = 15; Comp = [|12.4; 26.3 |] } ]
然后您可以轻松添加一列,其总和为:
let npeopleList = Frame.ofRecords npeopleRecds
npeopleList.Format() |> printfn "%s"
let sumseries = npeopleList.GetColumn<float []>("Comp") |> Series.mapValues(fun x -> x |> Array.sum)
npeopleList?TotalComp <- sumseries
npeopleList.Format() |> printfn "%s"
打印:
Name Age Comp
0 -> Joe 51 System.Double[]
1 -> Tomas 28 System.Double[]
2 -> Eve 2 System.Double[]
3 -> Suzanne 15 System.Double[]
Name Age Comp TotalComp
0 -> Joe 51 System.Double[] 32,4
1 -> Tomas 28 System.Double[] 30,4
2 -> Eve 2 System.Double[] 42,4
3 -> Suzanne 15 System.Double[] 38,7
编辑:
如果个人记录中的字段更改不可接受 - 您可以使用过滤器:
let allSum =
peopleList.Columns
|> Series.filter(fun k _ -> k.StartsWith("Comp"))
|> Frame.ofColumns
|> Frame.rows
|> Series.mapValues(Series.foldValues(fun acc v -> acc + (v :?> float)) 0.0)
peopleList?TotalComp <- allSum
peopleList.Format() |> printfn "%s"
打印:
Name Age Comp1 Comp2 TotalComp
0 -> Joe 51 12,1 20,3 32,4
1 -> Tomas 28 1,1 29,3 30,4
2 -> Eve 2 2,1 40,3 42,4
3 -> Suzanne 15 12,4 26,3 38,7