Julia 中的栅格边距改组 - 更好的实现

Raster Marginals Shuffling in Julia - Better implementation

我想为一个零矩阵和一个矩阵实现一个洗牌方法,它采用随机的 2x2 子矩阵并仅在它们的 colsums 和 rowsums 相等时翻转它们,即 [0 1; 1 0] 或 [1 0; 0 1].

编辑:仅供参考,这应该意味着两者

sum(matrix,1) == sum(shuffledmatrix,1) && 
sum(matrix,2) == sum(shuffledmatrix,2)

==> 真

下面的代码是正确的,但基本上速度不够快。任何人都可以在这里看到任何明显的错误吗? (我对 Julia 还很陌生!)

function rastershuffle!(shuffledmatrix::Array{Int32,2},minchanges::Int)
    @inbounds begin
        numchanges = 0
        numcols = size(shuffledmatrix,2)
        numrows = size(shuffledmatrix,1)
        while numchanges < minchanges
            a = findmargeflip!(shuffledmatrix,numcols::Int, numrows::Int)
            numchanges = numchanges + sum(a) 
        end
    end
    return shuffledmatrix
end

function findmargeflip!(shuffledmatrix::Array{Int32,2},numcols::Int, numrows::Int)
    change = false
    cols = EPhys.random_generator(2,numcols)
    rows = EPhys.random_generator(2,numrows)
    vall = sub(shuffledmatrix, [rows[1]; rows[2]],[cols[1]; cols[2]])
    if vall == [0 1; 1 0] || vall == [1 0; 0 1]
        flipvall!(vall)
        #numchanges += 1
        change = true
    end
    change
end

function flipvall!(vall)
    if vall[1] == 1
        vall[:] = [0 1 1 0]    
    else
        vall[:] = [1 0 0 1]
    end
    nothing
end

到目前为止我根据文档中的信息尝试过的内容:

我认为主要瓶颈是每次迭代都重新生成 SubArray,这需要内存重新分配/垃圾收集,但我不完全确定如何解决这个问题。

额外信息:

shuffledspikematrix3 = copy(spikematrixnonoise)
@time rastershuffle!(shuffledspikematrix3, 100);
@profile rastershuffle!(shuffledspikematrix3, 100);
Profile.print()

===> 输出:

 8.776213 seconds (153.35 M allocations: 7.835 GB, 15.94% gc time)
    1   abstractarray.jl; ==; line: 1060
    1   abstractarray.jl; hvcat; line: 974
    2   abstractarray.jl; vcat; line: 733
    2   array.jl; getindex; line: 282
    2   multidimensional.jl; start; line: 99
    800 task.jl; anonymous; line: 447
     800 .../IJulia/src/IJulia.jl; eventloop; line: 143
      800 ...rc/execute_request.jl; execute_request_0x535c5df2; line: 183
       800 loading.jl; include_string; line: 266
        800 profile.jl; anonymous; line: 16
         800 In[174]; rastershuffle!; line: 7
          1   ...devel/src/helper.jl; random_generator; line: 52
          1   In[174]; findmargeflip!; line: 15
          77  In[174]; findmargeflip!; line: 16
           13 ....devel/src/helper.jl; random_generator; line: 44
            7 random.jl; rand; line: 255
             5 random.jl; gen_rand; line: 88
              1 dSFMT.jl; dsfmt_fill_array_close1_open2!; line: 66
              4 dSFMT.jl; dsfmt_fill_array_close1_open2!; line: 67
            2 random.jl; rand; line: 256
           47 ....devel/src/helper.jl; random_generator; line: 47
           1  ....devel/src/helper.jl; random_generator; line: 48
           13 ....devel/src/helper.jl; random_generator; line: 49
           1  ....devel/src/helper.jl; random_generator; line: 52
          86  In[174]; findmargeflip!; line: 17
           9  ....devel/src/helper.jl; random_generator; line: 44
            5 random.jl; rand; line: 255
             4 random.jl; gen_rand; line: 88
              4 dSFMT.jl; dsfmt_fill_array_close1_open2!; line: 67
            1 random.jl; rand; line: 256
           53 ....devel/src/helper.jl; random_generator; line: 47
           1  ....devel/src/helper.jl; random_generator; line: 48
           13 ....devel/src/helper.jl; random_generator; line: 49
           2  ....devel/src/helper.jl; random_generator; line: 52
          211 In[174]; findmargeflip!; line: 19
           87 abstractarray.jl; vcat; line: 733
           9  subarray.jl; _sub; line: 90
           35 subarray.jl; _sub; line: 91
            1  subarray.jl; _sub_unsafe; line: 96
            21 subarray.jl; _sub_unsafe; line: 125
            1  subarray.jl; _sub_unsafe; line: 437
            1  subarray.jl; _sub_unsafe; line: 440
          411 In[174]; findmargeflip!; line: 20
           5   abstractarray.jl; ==; line: 1060
           4   abstractarray.jl; ==; line: 1066
           258 abstractarray.jl; ==; line: 1067
           4   abstractarray.jl; ==; line: 1068
           2   abstractarray.jl; hvcat; line: 957
           87  abstractarray.jl; hvcat; line: 960
           1   abstractarray.jl; hvcat; line: 961
           2   abstractarray.jl; hvcat; line: 969
           3   abstractarray.jl; hvcat; line: 970
           11  abstractarray.jl; hvcat; line: 971
           1   abstractarray.jl; hvcat; line: 974
          4   In[174]; findmargeflip!; line: 25
          1   abstractarray.jl; ==; line: 1060
          2   abstractarray.jl; hvcat; line: 960
          1   abstractarray.jl; vcat; line: 733
    1   tuple.jl; ==; line: 95
    3   tuple.jl; ==; line: 96

分析清楚地告诉你大部分时间花在

211 In[174]; findmargeflip!; line: 19
411 In[174]; findmargeflip!; line: 20

这是

  vall = sub(shuffledmatrix, [rows[1]; rows[2]],[cols[1]; cols[2]])
  if vall == [0 1; 1 0] || vall == [1 0; 0 1]

您正在到处分配新数组。

尝试将 vall == [0 1; 1 0] 替换为

 size(val1) == (2,2) && val1[1,1] == 0 &&
     val1[1,2] == 1 && val1[2,1] == 1 && val1[2,2] == 0

顺便问一下,为什么要混合使用 Int32Int64?要在矩阵上节省内存?

这是同一功能的另一种实现方式(如果我理解正确的话)。它很有可能会工作得更快,但它不使用与 OP 相同的随机源。看看吧,或许能给点优化建议。

希望对您有所帮助。

function flipit!(m, flipcount)
  zeroinds = map(x->ind2sub(m,x),find(m .== 0))  # find 0 locations
  zerorows = Set{Int}(map(first,zeroinds))       # find rows with 0s
  zerocols = Set{Int}(map(last,zeroinds))        # find cols with 0s
  oneinds = map(x->ind2sub(m,x),find(m .== 1))   # find 1 locations
  filter!(x->x[1] in zerorows && x[2] in zerocols,oneinds) # must satisfy trivially
  n = length(oneinds)
  numflips = 0
  badcount = 0                                   
  badcorners = Set{Tuple{Int,Int}}()       # track bad rectangles
  maxbad = binomial(length(oneinds),2)  # num candidate rectangles
  maxbad == 0 && error("Can't find candidate rectangle")
  randbuf = rand(1:n,2*flipcount)       # make some rands to use later
  while numflips < flipcount
    if length(randbuf)==0
      randbuf = rand(1:n,2*flipcount)  # refresh rands
    end
    cornersinds = minmax(pop!(randbuf),pop!(randbuf))
    if first(cornersinds)==last(cornersinds) continue ; end
    if cornersinds in badcorners                             
      continue                                    # bad candidate
    end
    corners = (oneinds[cornersinds[1]],oneinds[cornersinds[2]])
    if m[corners[1][1],corners[2][2]] == 0 &&     # check 0s
      m[corners[2][1],corners[1][2]] == 0        
      m[corners[1]...] = 0                        # flip
      m[corners[2]...] = 0
      m[corners[1][1],corners[2][2]] = 1
      m[corners[2][1],corners[1][2]] = 1
      oneinds[cornersinds[1]] = (corners[1][1],corners[2][2]) # flip corner list
      oneinds[cornersinds[2]] = (corners[2][1],corners[1][2])
      numflips += 1
      if badcount>0
        badcount = 0
        empty!(badcorners)
      end
    else
      push!(badcorners,cornersinds)     # remember bad candidate
      badcount += 1
      if badcount == maxbad             # if candidates exhausted
        error("No flippable rectangle")
      end
    end
  end
end

flipit!(M,n) 一起使用,其中 M 是矩阵,n 是所需的翻转次数。这不是最干净的代码,试图更喜欢清晰而不是紧凑。