Julia 中的栅格边距改组 - 更好的实现
Raster Marginals Shuffling in Julia - Better implementation
我想为一个零矩阵和一个矩阵实现一个洗牌方法,它采用随机的 2x2 子矩阵并仅在它们的 colsums 和 rowsums 相等时翻转它们,即 [0 1; 1 0] 或 [1 0; 0 1].
编辑:仅供参考,这应该意味着两者
sum(matrix,1) == sum(shuffledmatrix,1) &&
sum(matrix,2) == sum(shuffledmatrix,2)
==> 真
下面的代码是正确的,但基本上速度不够快。任何人都可以在这里看到任何明显的错误吗? (我对 Julia 还很陌生!)
function rastershuffle!(shuffledmatrix::Array{Int32,2},minchanges::Int)
@inbounds begin
numchanges = 0
numcols = size(shuffledmatrix,2)
numrows = size(shuffledmatrix,1)
while numchanges < minchanges
a = findmargeflip!(shuffledmatrix,numcols::Int, numrows::Int)
numchanges = numchanges + sum(a)
end
end
return shuffledmatrix
end
function findmargeflip!(shuffledmatrix::Array{Int32,2},numcols::Int, numrows::Int)
change = false
cols = EPhys.random_generator(2,numcols)
rows = EPhys.random_generator(2,numrows)
vall = sub(shuffledmatrix, [rows[1]; rows[2]],[cols[1]; cols[2]])
if vall == [0 1; 1 0] || vall == [1 0; 0 1]
flipvall!(vall)
#numchanges += 1
change = true
end
change
end
function flipvall!(vall)
if vall[1] == 1
vall[:] = [0 1 1 0]
else
vall[:] = [1 0 0 1]
end
nothing
end
到目前为止我根据文档中的信息尝试过的内容:
使用 BitArrays 而不是 Int32 - 没有太大区别,尽管我可能会改变它,函数 flipvall!然后也可以用 flipbits 代替!
为编译器提供额外的类型信息
设置迭代次数而不是更改,然后尝试使用 @simd
进行矢量化
我认为主要瓶颈是每次迭代都重新生成 SubArray,这需要内存重新分配/垃圾收集,但我不完全确定如何解决这个问题。
额外信息:
shuffledspikematrix3 = copy(spikematrixnonoise)
@time rastershuffle!(shuffledspikematrix3, 100);
@profile rastershuffle!(shuffledspikematrix3, 100);
Profile.print()
===> 输出:
8.776213 seconds (153.35 M allocations: 7.835 GB, 15.94% gc time)
1 abstractarray.jl; ==; line: 1060
1 abstractarray.jl; hvcat; line: 974
2 abstractarray.jl; vcat; line: 733
2 array.jl; getindex; line: 282
2 multidimensional.jl; start; line: 99
800 task.jl; anonymous; line: 447
800 .../IJulia/src/IJulia.jl; eventloop; line: 143
800 ...rc/execute_request.jl; execute_request_0x535c5df2; line: 183
800 loading.jl; include_string; line: 266
800 profile.jl; anonymous; line: 16
800 In[174]; rastershuffle!; line: 7
1 ...devel/src/helper.jl; random_generator; line: 52
1 In[174]; findmargeflip!; line: 15
77 In[174]; findmargeflip!; line: 16
13 ....devel/src/helper.jl; random_generator; line: 44
7 random.jl; rand; line: 255
5 random.jl; gen_rand; line: 88
1 dSFMT.jl; dsfmt_fill_array_close1_open2!; line: 66
4 dSFMT.jl; dsfmt_fill_array_close1_open2!; line: 67
2 random.jl; rand; line: 256
47 ....devel/src/helper.jl; random_generator; line: 47
1 ....devel/src/helper.jl; random_generator; line: 48
13 ....devel/src/helper.jl; random_generator; line: 49
1 ....devel/src/helper.jl; random_generator; line: 52
86 In[174]; findmargeflip!; line: 17
9 ....devel/src/helper.jl; random_generator; line: 44
5 random.jl; rand; line: 255
4 random.jl; gen_rand; line: 88
4 dSFMT.jl; dsfmt_fill_array_close1_open2!; line: 67
1 random.jl; rand; line: 256
53 ....devel/src/helper.jl; random_generator; line: 47
1 ....devel/src/helper.jl; random_generator; line: 48
13 ....devel/src/helper.jl; random_generator; line: 49
2 ....devel/src/helper.jl; random_generator; line: 52
211 In[174]; findmargeflip!; line: 19
87 abstractarray.jl; vcat; line: 733
9 subarray.jl; _sub; line: 90
35 subarray.jl; _sub; line: 91
1 subarray.jl; _sub_unsafe; line: 96
21 subarray.jl; _sub_unsafe; line: 125
1 subarray.jl; _sub_unsafe; line: 437
1 subarray.jl; _sub_unsafe; line: 440
411 In[174]; findmargeflip!; line: 20
5 abstractarray.jl; ==; line: 1060
4 abstractarray.jl; ==; line: 1066
258 abstractarray.jl; ==; line: 1067
4 abstractarray.jl; ==; line: 1068
2 abstractarray.jl; hvcat; line: 957
87 abstractarray.jl; hvcat; line: 960
1 abstractarray.jl; hvcat; line: 961
2 abstractarray.jl; hvcat; line: 969
3 abstractarray.jl; hvcat; line: 970
11 abstractarray.jl; hvcat; line: 971
1 abstractarray.jl; hvcat; line: 974
4 In[174]; findmargeflip!; line: 25
1 abstractarray.jl; ==; line: 1060
2 abstractarray.jl; hvcat; line: 960
1 abstractarray.jl; vcat; line: 733
1 tuple.jl; ==; line: 95
3 tuple.jl; ==; line: 96
分析清楚地告诉你大部分时间花在
211 In[174]; findmargeflip!; line: 19
411 In[174]; findmargeflip!; line: 20
这是
vall = sub(shuffledmatrix, [rows[1]; rows[2]],[cols[1]; cols[2]])
if vall == [0 1; 1 0] || vall == [1 0; 0 1]
您正在到处分配新数组。
尝试将 vall == [0 1; 1 0]
替换为
size(val1) == (2,2) && val1[1,1] == 0 &&
val1[1,2] == 1 && val1[2,1] == 1 && val1[2,2] == 0
顺便问一下,为什么要混合使用 Int32
和 Int64
?要在矩阵上节省内存?
这是同一功能的另一种实现方式(如果我理解正确的话)。它很有可能会工作得更快,但它不使用与 OP 相同的随机源。看看吧,或许能给点优化建议。
希望对您有所帮助。
function flipit!(m, flipcount)
zeroinds = map(x->ind2sub(m,x),find(m .== 0)) # find 0 locations
zerorows = Set{Int}(map(first,zeroinds)) # find rows with 0s
zerocols = Set{Int}(map(last,zeroinds)) # find cols with 0s
oneinds = map(x->ind2sub(m,x),find(m .== 1)) # find 1 locations
filter!(x->x[1] in zerorows && x[2] in zerocols,oneinds) # must satisfy trivially
n = length(oneinds)
numflips = 0
badcount = 0
badcorners = Set{Tuple{Int,Int}}() # track bad rectangles
maxbad = binomial(length(oneinds),2) # num candidate rectangles
maxbad == 0 && error("Can't find candidate rectangle")
randbuf = rand(1:n,2*flipcount) # make some rands to use later
while numflips < flipcount
if length(randbuf)==0
randbuf = rand(1:n,2*flipcount) # refresh rands
end
cornersinds = minmax(pop!(randbuf),pop!(randbuf))
if first(cornersinds)==last(cornersinds) continue ; end
if cornersinds in badcorners
continue # bad candidate
end
corners = (oneinds[cornersinds[1]],oneinds[cornersinds[2]])
if m[corners[1][1],corners[2][2]] == 0 && # check 0s
m[corners[2][1],corners[1][2]] == 0
m[corners[1]...] = 0 # flip
m[corners[2]...] = 0
m[corners[1][1],corners[2][2]] = 1
m[corners[2][1],corners[1][2]] = 1
oneinds[cornersinds[1]] = (corners[1][1],corners[2][2]) # flip corner list
oneinds[cornersinds[2]] = (corners[2][1],corners[1][2])
numflips += 1
if badcount>0
badcount = 0
empty!(badcorners)
end
else
push!(badcorners,cornersinds) # remember bad candidate
badcount += 1
if badcount == maxbad # if candidates exhausted
error("No flippable rectangle")
end
end
end
end
与 flipit!(M,n)
一起使用,其中 M
是矩阵,n
是所需的翻转次数。这不是最干净的代码,试图更喜欢清晰而不是紧凑。
我想为一个零矩阵和一个矩阵实现一个洗牌方法,它采用随机的 2x2 子矩阵并仅在它们的 colsums 和 rowsums 相等时翻转它们,即 [0 1; 1 0] 或 [1 0; 0 1].
编辑:仅供参考,这应该意味着两者
sum(matrix,1) == sum(shuffledmatrix,1) &&
sum(matrix,2) == sum(shuffledmatrix,2)
==> 真
下面的代码是正确的,但基本上速度不够快。任何人都可以在这里看到任何明显的错误吗? (我对 Julia 还很陌生!)
function rastershuffle!(shuffledmatrix::Array{Int32,2},minchanges::Int)
@inbounds begin
numchanges = 0
numcols = size(shuffledmatrix,2)
numrows = size(shuffledmatrix,1)
while numchanges < minchanges
a = findmargeflip!(shuffledmatrix,numcols::Int, numrows::Int)
numchanges = numchanges + sum(a)
end
end
return shuffledmatrix
end
function findmargeflip!(shuffledmatrix::Array{Int32,2},numcols::Int, numrows::Int)
change = false
cols = EPhys.random_generator(2,numcols)
rows = EPhys.random_generator(2,numrows)
vall = sub(shuffledmatrix, [rows[1]; rows[2]],[cols[1]; cols[2]])
if vall == [0 1; 1 0] || vall == [1 0; 0 1]
flipvall!(vall)
#numchanges += 1
change = true
end
change
end
function flipvall!(vall)
if vall[1] == 1
vall[:] = [0 1 1 0]
else
vall[:] = [1 0 0 1]
end
nothing
end
到目前为止我根据文档中的信息尝试过的内容:
使用 BitArrays 而不是 Int32 - 没有太大区别,尽管我可能会改变它,函数 flipvall!然后也可以用 flipbits 代替!
为编译器提供额外的类型信息
设置迭代次数而不是更改,然后尝试使用 @simd
进行矢量化
我认为主要瓶颈是每次迭代都重新生成 SubArray,这需要内存重新分配/垃圾收集,但我不完全确定如何解决这个问题。
额外信息:
shuffledspikematrix3 = copy(spikematrixnonoise)
@time rastershuffle!(shuffledspikematrix3, 100);
@profile rastershuffle!(shuffledspikematrix3, 100);
Profile.print()
===> 输出:
8.776213 seconds (153.35 M allocations: 7.835 GB, 15.94% gc time)
1 abstractarray.jl; ==; line: 1060
1 abstractarray.jl; hvcat; line: 974
2 abstractarray.jl; vcat; line: 733
2 array.jl; getindex; line: 282
2 multidimensional.jl; start; line: 99
800 task.jl; anonymous; line: 447
800 .../IJulia/src/IJulia.jl; eventloop; line: 143
800 ...rc/execute_request.jl; execute_request_0x535c5df2; line: 183
800 loading.jl; include_string; line: 266
800 profile.jl; anonymous; line: 16
800 In[174]; rastershuffle!; line: 7
1 ...devel/src/helper.jl; random_generator; line: 52
1 In[174]; findmargeflip!; line: 15
77 In[174]; findmargeflip!; line: 16
13 ....devel/src/helper.jl; random_generator; line: 44
7 random.jl; rand; line: 255
5 random.jl; gen_rand; line: 88
1 dSFMT.jl; dsfmt_fill_array_close1_open2!; line: 66
4 dSFMT.jl; dsfmt_fill_array_close1_open2!; line: 67
2 random.jl; rand; line: 256
47 ....devel/src/helper.jl; random_generator; line: 47
1 ....devel/src/helper.jl; random_generator; line: 48
13 ....devel/src/helper.jl; random_generator; line: 49
1 ....devel/src/helper.jl; random_generator; line: 52
86 In[174]; findmargeflip!; line: 17
9 ....devel/src/helper.jl; random_generator; line: 44
5 random.jl; rand; line: 255
4 random.jl; gen_rand; line: 88
4 dSFMT.jl; dsfmt_fill_array_close1_open2!; line: 67
1 random.jl; rand; line: 256
53 ....devel/src/helper.jl; random_generator; line: 47
1 ....devel/src/helper.jl; random_generator; line: 48
13 ....devel/src/helper.jl; random_generator; line: 49
2 ....devel/src/helper.jl; random_generator; line: 52
211 In[174]; findmargeflip!; line: 19
87 abstractarray.jl; vcat; line: 733
9 subarray.jl; _sub; line: 90
35 subarray.jl; _sub; line: 91
1 subarray.jl; _sub_unsafe; line: 96
21 subarray.jl; _sub_unsafe; line: 125
1 subarray.jl; _sub_unsafe; line: 437
1 subarray.jl; _sub_unsafe; line: 440
411 In[174]; findmargeflip!; line: 20
5 abstractarray.jl; ==; line: 1060
4 abstractarray.jl; ==; line: 1066
258 abstractarray.jl; ==; line: 1067
4 abstractarray.jl; ==; line: 1068
2 abstractarray.jl; hvcat; line: 957
87 abstractarray.jl; hvcat; line: 960
1 abstractarray.jl; hvcat; line: 961
2 abstractarray.jl; hvcat; line: 969
3 abstractarray.jl; hvcat; line: 970
11 abstractarray.jl; hvcat; line: 971
1 abstractarray.jl; hvcat; line: 974
4 In[174]; findmargeflip!; line: 25
1 abstractarray.jl; ==; line: 1060
2 abstractarray.jl; hvcat; line: 960
1 abstractarray.jl; vcat; line: 733
1 tuple.jl; ==; line: 95
3 tuple.jl; ==; line: 96
分析清楚地告诉你大部分时间花在
211 In[174]; findmargeflip!; line: 19
411 In[174]; findmargeflip!; line: 20
这是
vall = sub(shuffledmatrix, [rows[1]; rows[2]],[cols[1]; cols[2]])
if vall == [0 1; 1 0] || vall == [1 0; 0 1]
您正在到处分配新数组。
尝试将 vall == [0 1; 1 0]
替换为
size(val1) == (2,2) && val1[1,1] == 0 &&
val1[1,2] == 1 && val1[2,1] == 1 && val1[2,2] == 0
顺便问一下,为什么要混合使用 Int32
和 Int64
?要在矩阵上节省内存?
这是同一功能的另一种实现方式(如果我理解正确的话)。它很有可能会工作得更快,但它不使用与 OP 相同的随机源。看看吧,或许能给点优化建议。
希望对您有所帮助。
function flipit!(m, flipcount)
zeroinds = map(x->ind2sub(m,x),find(m .== 0)) # find 0 locations
zerorows = Set{Int}(map(first,zeroinds)) # find rows with 0s
zerocols = Set{Int}(map(last,zeroinds)) # find cols with 0s
oneinds = map(x->ind2sub(m,x),find(m .== 1)) # find 1 locations
filter!(x->x[1] in zerorows && x[2] in zerocols,oneinds) # must satisfy trivially
n = length(oneinds)
numflips = 0
badcount = 0
badcorners = Set{Tuple{Int,Int}}() # track bad rectangles
maxbad = binomial(length(oneinds),2) # num candidate rectangles
maxbad == 0 && error("Can't find candidate rectangle")
randbuf = rand(1:n,2*flipcount) # make some rands to use later
while numflips < flipcount
if length(randbuf)==0
randbuf = rand(1:n,2*flipcount) # refresh rands
end
cornersinds = minmax(pop!(randbuf),pop!(randbuf))
if first(cornersinds)==last(cornersinds) continue ; end
if cornersinds in badcorners
continue # bad candidate
end
corners = (oneinds[cornersinds[1]],oneinds[cornersinds[2]])
if m[corners[1][1],corners[2][2]] == 0 && # check 0s
m[corners[2][1],corners[1][2]] == 0
m[corners[1]...] = 0 # flip
m[corners[2]...] = 0
m[corners[1][1],corners[2][2]] = 1
m[corners[2][1],corners[1][2]] = 1
oneinds[cornersinds[1]] = (corners[1][1],corners[2][2]) # flip corner list
oneinds[cornersinds[2]] = (corners[2][1],corners[1][2])
numflips += 1
if badcount>0
badcount = 0
empty!(badcorners)
end
else
push!(badcorners,cornersinds) # remember bad candidate
badcount += 1
if badcount == maxbad # if candidates exhausted
error("No flippable rectangle")
end
end
end
end
与 flipit!(M,n)
一起使用,其中 M
是矩阵,n
是所需的翻转次数。这不是最干净的代码,试图更喜欢清晰而不是紧凑。