F# 中旅行商的性能

Performance of Travelling Salesman in F#

我的问题不是算法本身,而是 F# 达到极限时的性能。在这个例子中,我尝试通过蛮力解决 25 个城市的问题(动态编程)

(该算法对于一个小例子来说似乎工作正常,我相信它能完成它的工作)

我正在控制台输出中打印一个计数变量 window 以监控进度。 正如预期的那样,最多 m=12 次迭代,运行时间呈指数增长。

m=2
1887ms;
m=3
1870ms;
m=4
1902ms;
m=5
2074ms;
m=6
2954ms;
m=7
6261ms;
m=8
16746ms;
m=9
38442ms;
m=10
80396ms;
m=11
140985ms;
m=12
207950ms;

因此,虽然第 13 次迭代之前的性能并不出色,但至少看起来 "manageable"。实际上,如果我的理解是正确的,迭代 12 和 13 是最 CPU 重的。不过,根据执行模式,我预计该迭代的执行时间约为 300000 毫秒,但事实并非如此。

在 MacOS X 10.11.3 El Capitan 上放大我的(新)iMAC Retina 运行 的控制显示器,配备 3.3Ghz i7 四核、16 GB RAM 和 SSD,F# 运行 里面 Xamarin.Studio 6.0。我看到程序的内存使用量高达 3 GB。完全没有优化,但应该在机器的能力范围内,应该不会有负担吧?

m=13 进度非常非常非常慢,按照这个速度,好像要计算好几个小时。在此阶段,在监视器的 CPU 侧,它表示进程使用了​​ CPU 的 105%(左侧第一列)。 1 小时后(完成迭代的 2/3)它崩溃并显示以下消息:

Error: Garbage collector could not allocate 16384 bytes of memory for major heap section.

我有点惊讶我需要进行垃圾收集,因为内存看起来不是主要问题?

我正在定义一个巨大的 Array2D,其中包含 2^24 个条目和 24 列 = 17M * 24 个 (float32*sbyte) 条目,每个条目使用 32+8=40 位 = 5 个字节,因此 2 GB

也许这就是问题所在,我做了一个 for S in Subset_of_size_m 循环?

这必须在第 13 次迭代(停止在 1,860,000 次)时完成 2,700,000 次,在这个循环中我对列表进行了一些分配。也许那里需要垃圾收集?

我以前从未在 F# 中这样做过(仅在 R 中时不时地编写命令 GC()remove(object) 就足够了。在控制监视器中监视,内存似乎从来没有与可用 RAM 相比成为一个问题?发生了什么?

我知道我也许应该找到一个更好的算法,它占用的内存更少,但无论如何这是了解这个问题的好机会,与其他解决问题的人相比,如果一切按计划进行,执行时间,对于第一次(残酷的)尝试来说并不完全荒谬。

这是源代码

//////// Travelling Salesman problem ////////


open System
open System.Collections
open System.Collections.Generic
open System.IO

open System.Windows

open FSharp.Charting

exception InnerError of string


let stopWatch = System.Diagnostics.Stopwatch.StartNew()

///////////////// preparing the data /////////////////

// format of the files
//[number_of_cities]
//[x_1] [y_1] // coordinate

let x = File.ReadAllLines "/Users/francois-guillaume.rideau/Documents/MOOC/TSP.txt"

let split (text:string)=
    text.Split [|'\t';' '|]

let splitInto2Values (A: string []) =  
    (float A.[0],float A.[1])

let parseLine (line:string) = 
    line
    |> split 
    |> splitInto2Values



let num_cities = int x.[0] // 25

let cities = x |> Array.tail |> Array.map parseLine //  [x_1][y_1] 

let dist_matrix = Array2D.create num_cities num_cities 0.0f
for i in 0..(num_cities-1)do
    for j in 0..(num_cities-1) do
        dist_matrix.[i,j]<- float32 (sqrt  ( (fst cities.[i] - fst cities.[j])*(fst cities.[i] - fst cities.[j]) + (snd cities.[i] - snd cities.[j])*(snd cities.[i] - snd cities.[j]) ))

let arrayOfArrays = [| [| 0.0f; 2.0f;1.0f;4.0f |]; [|2.0f;0.0f; 3.0f;5.0f |]; [|1.0f;3.0f; 0.0f;6.0f |]; [|4.0f;5.0f; 6.0f;0.0f |] |]
let example = Array2D.init 4 4 (fun i j -> arrayOfArrays.[i].[j]) 

// Dynamic programming algorithm

// Subproblems: 
// For every destination j in {1,2,......n} every subset S in {1,2,....n} that contains 1 and j, let
// A(S,j) = minimum length of a path of a path from 1 to j that visits precisely the vertices of S [exactly once each]

// create A = Array2D indexed by subsets S that contain 1 and destinations j
// Base Case A[S,1] = 0 if S = {1} , +infinity otherwise
// for m = 2,3,4,.... n (m = subproblem size)
//     for each Set S in {1,2,...n} of size m that contains 1
//         for each j in S, j different from 1:
//             A[S,j] = min (k in S, k <> j) {A[S-{j},k]+Ckj}
// Return min (j=2 to n) A[{1,2,3,....,n},j]+Cj1

let limit = 100000000.0f

//// the first city is city 0 in array D. we put it apart, 
//// then we represents S as integers. 
//// we take the binary representation of integer, and the pth bit indicates whether city p+1 belongs to S
//// for example S =11 = 1+2+8 contains cities 2,3 and 9 (members 11 will return [(0, 1); (1, 2); (3, 8)])

/////////////////////////////// with path ///////////////////////////////////////////

let TSP_all_c_Dynamic_Programming_with_path_main(D:float32 [,]) = // solves the TSP problem when ALL cities are connected together with an exhaustive search in exponential time O(n^2 2^n)
                                                   // the input is a distance matrix in float32
                                                   // memory usage is not optimized at all....   

    let num_cities = Array2D.length1 D
    let l2 = Array2D.length2 D
    if (l2<>num_cities) then failwith "Distance matrix is not a squared matrix"
    let powers_of_2 = [|1;2;4;8;16;32;64;128;256;512;1024;2048;4096;8192;16384;32768;65536;131072;262144;524288;1048576;2097152;4194304;8388608;16777216|]
    let power2 k =    
       if ((k >= 25) || (k<0)) then raise (InnerError("power exponent not allowed"))
           else powers_of_2.[k]  

    let num_subsets = power2 (num_cities-1)
    let S_full = num_subsets-1 

    let A = Array2D.create num_subsets (num_cities-1)  (limit,-1y)

    A.[0,0]<-(-0.0f,-2y)

    let rec sumbits (n:int):int=
      let rec helper acc m =
         match m with
            | 0 -> acc
            | 1 -> acc+1 // remove this ?
            | _ -> let r = m%2
                   helper (acc+r) (m>>>1)
      helper 0 n

    let hashtable = Array2D.create (num_cities-1) num_subsets false // hashtable.[i-1,n]=true if (sumbits n) = i
    for k in 1..(num_subsets-1) do hashtable.[(sumbits k)-1,k]<-true
    // member returns [(p,2^p);....] if the p+1th city is in S
    let members S = [for j in 0..(num_cities-2) do let a= powers_of_2.[j] &&& S
                                                   if (a<>0) then yield (j,a)] // list length = num_cities-1


    for m in 2..num_cities do // S size m
        printfn "m=%A" m
        let stopWatch = System.Diagnostics.Stopwatch.StartNew()

        let mutable count = 1

        let Subset_of_size_m = hashtable.[m-2,0..] |> Seq.mapi (fun i x -> (i,x)) |> Seq.filter (fun (a,b)-> (b=true)) |> Seq.map fst |> Seq.toList
        for S in Subset_of_size_m do         
            if m = 2 then let (i,S') = (members S).Head
                          A.[S',i]<- (D.[0,i+1],-1y) // distance from 0 to city i+1
                     else
                          let S'_list = List.fold (fun acc x -> let a = (((snd x)^^^S_full)&&&S)             // list of subsets of size m-1
                                                                if a = S then acc else (fst x,a)::acc ) [] (members S)
                          for (j,S') in S'_list do
                              A.[S,j] <- ([for (k,expk) in (members S') do
                                                yield (fst A.[S',k]+D.[j+1,k+1],k) ]|> List.min |> fun (a,b)->(a,sbyte b))// can do faster than that if we remember from above ?
                          count <- count + 1 // to check progress in the console
                          if count%10000 =0 then printfn "count=%A" count

        printfn "%f" stopWatch.Elapsed.TotalMilliseconds

    // printfn "%A" A
    // A.[num_subsets-1,0..]
    A

let calculate_path_TSP_all_c_Dynamic_Programming_with_path (D:float32 [,]) =
    // calls the core subroutine defined above
    let A' = TSP_all_c_Dynamic_Programming_with_path_main D
                                                    // memory usage is not optimized at all....  

    // from here its smooth sailing, just analyzing the results.

    let num_cities = Array2D.length1 D
    let l2 = Array2D.length2 D
    if (l2<>num_cities) then failwith "Distance matrix is not a squared matrix"

    let powers_of_2 = [|1;2;4;8;16;32;64;128;256;512;1024;2048;4096;8192;16384;32768;65536;131072;262144;524288;1048576;2097152;4194304;8388608;16777216|]
    let power2 k =    
       if ((k >= 25) || (k<0)) then raise (InnerError("power exponent not allowed"))
           else powers_of_2.[k]  

    let num_subsets = power2 (num_cities-1)
    let S_full = num_subsets-1 

    let A'' = A'.[S_full,0..]

    let res' = [for i in 0..num_cities-2 do yield (fst A''.[i]+ example.[0,i+1]) ]  |> Seq.mapi (fun i x -> (i, x)) |> Seq.minBy snd
    printfn "TSP answer = %A" res'

    let path = Array.create (num_cities+1) -1y

    let mutable current_city = sbyte (fst res')
    path.[num_cities-1]<- current_city// the last city 

    let mutable current_S = S_full
    let mutable next_S = -2
    let mutable next_city = -2y

    for k in num_cities-2..-1..1 do 
        next_city <- snd A'.[current_S,int current_city]
        next_S <- (S_full^^^powers_of_2.[int current_city]) &&& current_S
        //printfn "k=%A current_S=%A next_city=%A" k current_S next_city
        path.[k]<-next_city
        current_S<-next_S
        current_city<-next_city

    for i in 0..num_cities do path.[i]<-path.[i]+1y
    printfn "path=%A" path  


////// main program /////

calculate_path_TSP_all_c_Dynamic_Programming_with_path dist_matrix

我根本没有分析你的代码,但由于你的目标是 Debug/x86 配置,这意味着你应用:

  • 没有为了提供更多调试信息而进行一定的优化(可能会增加内存占用)
  • 仅使用 x86 处理器指令集的 32 位指令,因此它不能使用超过 4GB 的内存(或更少,depending on your OS

尝试将其更改为类似 Release/x64 (*) 的内容。您还可以调整 Mono 内置 garbage collector.

的参数

您可以预期,当您的程序使用的数据占用进程可用的大部分内存时,您的算法处理的元素数量将急剧减少。在这种情况下,您的程序花费大部分时间进行垃圾收集(每个 GC 只能释放少量内存,因为大多数对象都处于活动状态)并且只有一小部分时间进行 "real" 计算(并且计算需要内存触发 GC 的分配)。

当您调查内存使用情况时,另一件可能有用的事情是内存分析器。

您提到在 R 中您可以强制进行垃圾回收;在 .NET 中,您也可以通过调用 GC.Collect() 来做到这一点,但是,在大多数情况下不鼓励使用它(必要时会自动执行垃圾收集)。

*您需要一个生成 64 位程序集的编译器和一个 64 位公共语言运行时来 运行 您的代码。 Mono 2.10 on MacOS X doesn't support 64-bits unless it is built from sources:

Support for 64-bit VMs as of Mono 2.10 is only available if you build Mono from source code and install your own copy of the VM. In the future we will ship both mono and mono64 binaries for our users.

较新的版本有通用安装程序,我猜它们支持 64 位。