通过 julia 中的公共列值合并大量数组
Merge large number of arrays by common column values in julia
扩展我之前在这里提出的一个问题,假设我们有大量的数组(比如 500 个数组),比如下面的前 3 个
5.0 3.5
6.0 3.6
7.0 3.0
5.0 4.5
6.0 4.7
8.0 3.0
5.0 4.0
6.0 3.2
8.0 4.0
等等,存储在一个数组中,这样我们就有了一个包含500个上述类型数组的数组。我想将500个数组合并成一个数组,通过第一列的公共值,计算第二列对应元素的平均值。结果必须是以下数组:
5.0 mean of all 5's values
6.0 mean of all 6's values
7.0 mean of all 7's values
8.0 mean of all 8's values
我怎样才能做到这一点?谢谢!
同样返回
function aggregate(m::Array{<:Array{<:Number,2},1})
result=sortrows(vcat(m...))
n = size(result,1)
if n <= 1
return result
end
key_idx = 1
key = result[key_idx,1]
count = 1
for i in 2:n
if key == result[i,1]
result[key_idx,2:end] += result[i,2:end]
count += 1
else
result[key_idx,2:end] /= count
count = 1
key = result[i,1]
key_idx += 1
result[key_idx,1] = key
result[key_idx,2:end] = result[i,2:end]
end
end
result[key_idx,2:end] /= count
return result[1:key_idx,:]
end
演示:
x = [5.0 3.5
6.0 3.6
7.0 3.0]
y = [5.0 4.5
6.0 4.7
8.0 3.0]
z = [5.0 4.0
6.0 3.2
8.0 4.0]
a=[x,y,z]
julia> a
3-element Array{Array{Float64,2},1}:
[5.0 3.5; 6.0 3.6; 7.0 3.0]
[5.0 4.5; 6.0 4.7; 8.0 3.0]
[5.0 4.0; 6.0 3.2; 8.0 4.0]
julia> aggregate(a)
4×2 Array{Float64,2}:
5.0 4.0
6.0 3.83333
7.0 3.0
8.0 3.5
这是一个比@PicaudVincent 的答案快 6 倍的版本(基于他的输入数据),但它不对键进行排序,因此 return 矩阵的行在任意顺序:
function accumarrays(A::Vector{Matrix{T}}) where {T}
d = Dict{T, Tuple{T, Int}}()
for a in A
for i in indices(a, 1)
ai = a[i, 1]
d[ai] = get(d, ai, (zero(T), 0)) .+ (a[i, 2], 1)
end
end
Aout = Matrix{typeof(one(T)/1)}(length(d), 2)
i = 0
for (key, val) in d
Aout[i+=1, 1] = key
Aout[i, 2] = val[1] / val[2]
end
return Aout
end
如果您需要对行进行排序,此方法可行,但速度仅快 4-5 倍:
function accumarrays_(A::Vector{Matrix{T}}) where {T}
d = Dict{T, Tuple{T, Int}}()
for a in A
for i in indices(a, 1)
ai = a[i, 1]
d[ai] = get(d, ai, (zero(T), 0)) .+ (a[i, 2], 1)
end
end
dkeys = sort!(collect(keys(d)))
Aout = Matrix{typeof(one(T)/1)}(length(dkeys), 2)
for i in eachindex(dkeys)
val = d[dkeys[i]]
Aout[i, 1] = dkeys[i]
Aout[i, 2] = val[1] / val[2]
end
return Aout
end
扩展我之前在这里提出的一个问题,假设我们有大量的数组(比如 500 个数组),比如下面的前 3 个
5.0 3.5
6.0 3.6
7.0 3.0
5.0 4.5
6.0 4.7
8.0 3.0
5.0 4.0
6.0 3.2
8.0 4.0
等等,存储在一个数组中,这样我们就有了一个包含500个上述类型数组的数组。我想将500个数组合并成一个数组,通过第一列的公共值,计算第二列对应元素的平均值。结果必须是以下数组:
5.0 mean of all 5's values
6.0 mean of all 6's values
7.0 mean of all 7's values
8.0 mean of all 8's values
我怎样才能做到这一点?谢谢!
同样返回
function aggregate(m::Array{<:Array{<:Number,2},1})
result=sortrows(vcat(m...))
n = size(result,1)
if n <= 1
return result
end
key_idx = 1
key = result[key_idx,1]
count = 1
for i in 2:n
if key == result[i,1]
result[key_idx,2:end] += result[i,2:end]
count += 1
else
result[key_idx,2:end] /= count
count = 1
key = result[i,1]
key_idx += 1
result[key_idx,1] = key
result[key_idx,2:end] = result[i,2:end]
end
end
result[key_idx,2:end] /= count
return result[1:key_idx,:]
end
演示:
x = [5.0 3.5
6.0 3.6
7.0 3.0]
y = [5.0 4.5
6.0 4.7
8.0 3.0]
z = [5.0 4.0
6.0 3.2
8.0 4.0]
a=[x,y,z]
julia> a
3-element Array{Array{Float64,2},1}:
[5.0 3.5; 6.0 3.6; 7.0 3.0]
[5.0 4.5; 6.0 4.7; 8.0 3.0]
[5.0 4.0; 6.0 3.2; 8.0 4.0]
julia> aggregate(a)
4×2 Array{Float64,2}:
5.0 4.0
6.0 3.83333
7.0 3.0
8.0 3.5
这是一个比@PicaudVincent 的答案快 6 倍的版本(基于他的输入数据),但它不对键进行排序,因此 return 矩阵的行在任意顺序:
function accumarrays(A::Vector{Matrix{T}}) where {T}
d = Dict{T, Tuple{T, Int}}()
for a in A
for i in indices(a, 1)
ai = a[i, 1]
d[ai] = get(d, ai, (zero(T), 0)) .+ (a[i, 2], 1)
end
end
Aout = Matrix{typeof(one(T)/1)}(length(d), 2)
i = 0
for (key, val) in d
Aout[i+=1, 1] = key
Aout[i, 2] = val[1] / val[2]
end
return Aout
end
如果您需要对行进行排序,此方法可行,但速度仅快 4-5 倍:
function accumarrays_(A::Vector{Matrix{T}}) where {T}
d = Dict{T, Tuple{T, Int}}()
for a in A
for i in indices(a, 1)
ai = a[i, 1]
d[ai] = get(d, ai, (zero(T), 0)) .+ (a[i, 2], 1)
end
end
dkeys = sort!(collect(keys(d)))
Aout = Matrix{typeof(one(T)/1)}(length(dkeys), 2)
for i in eachindex(dkeys)
val = d[dkeys[i]]
Aout[i, 1] = dkeys[i]
Aout[i, 2] = val[1] / val[2]
end
return Aout
end