R:重现旅行商问题

R: Recreating the Travelling Salesman Problem

我正在使用 R 编程语言。

我正在尝试重现旅行商问题。旅行商问题是一个推销员必须恰好访问“n”个城市一次的问题,在这样一个使他的总距离最小化的方式。

针对这个问题,我首先创建了一个由n = 6个城市(经度,纬度)组成的数据集:

set.seed(123)

data_1 = data.frame(id = c(1,2,3), long = rnorm(3, -74, 1 ), lat = rnorm(3, 40, 1 ))

data_2 = data.frame(id = c(4,5,6), long = rnorm(3, -78, 1 ), lat = rnorm(3, 42, 1 ))

final_data = rbind(data_1, data_2) 

 final_data
  id      long      lat
1  1 -74.56048 40.07051
2  2 -74.23018 40.12929
3  3 -72.44129 41.71506
4  4 -77.53908 41.55434
5  5 -79.26506 43.22408
6  6 -78.68685 42.35981

对于给定的城市顺序(例如 1、2、3、4、5、6),我创建了一个函数来确定每对连续城市之间的距离(基于欧氏距离),然后计算总行进距离:

distance <- function( long1, lat1, long2, lat2, long3, lat3, long4, lat4, long5, lat5, long6, lat6) {

d1_2 = sqrt( (long1 - lat1)^2 + (long2 - lat2)^2 ) 

d2_3 = sqrt( (long2 - lat2)^2 + (lat3 - long3)^2 ) 

d3_4 = sqrt( (long3 - lat3)^2 + (long4 - lat4)^2 ) 

d4_5 = sqrt( (long4 - lat4)^2 + (long5 - lat5)^2 ) 

d5_6 = sqrt( (long5 - lat5)^2 + (long6 - lat6)^2 ) 

return( d1_2 + d2_3 + d3_4 + d4_5 + d5_6 )
}

distance(final_data[1,2], final_data[1,3], final_data[2,2], final_data[2,3], final_data[3,2], final_data[3,3], final_data[4,2], final_data[4,3], final_data[5,2], final_data[5,3], final_data[6,2], final_data[6,3]) 

然后,我可以随机化行的顺序以获得不同的路线并计算每条路线的距离:

#first route 
rows <- sample(nrow(final_data))
route_1 <- final_data[rows, ]

> route_1
  id      long      lat
1  1 -74.56048 40.07051
3  3 -72.44129 41.71506
4  4 -77.53908 41.55434
2  2 -74.23018 40.12929
6  6 -78.68685 42.35981
5  5 -79.26506 43.22408
    
distance(route_1[1,2], route_1[1,3], route_1[2,2], route_1[2,3], route_1[3,2], route_1[3,3], route_1[4,2], route_1[4,3], route_1[5,2], route_1[5,3], route_1[6,2], route_1[6,3]) 

[1] 830.5902

下一路线:

#second route

rows <- sample(nrow(final_data))
route_2 <- final_data[rows, ]

> route_2
  id      long      lat
5  5 -79.26506 43.22408
4  4 -77.53908 41.55434
3  3 -72.44129 41.71506
2  2 -74.23018 40.12929
1  1 -74.56048 40.07051
6  6 -78.68685 42.35981

distance(route_2[1,2], route_2[1,3], route_2[2,2], route_2[2,3], route_2[3,2], route_2[3,3], route_2[4,2], route_2[4,3], route_2[5,2], route_2[5,3], route_2[6,2], route_2[6,3]) 

[1] 826.028
#etc

我的问题: 本着旅行商问题的精神,我试图(讽刺地)表明我所做的事情效率极低,而且不会工作超过10 个城市(即 运行 花费的时间太长)。在 6 个城市的情况下,有人可以告诉我如何计算每条可能路线的距离(6!= 720 条路线)并计算计算所有这些距离所需的时间吗?

这是我目前知道的方法:

第 1 部分:生成所有可能的路线

library(combinat)
 all_routes = permn(c(1,2,3,4,5,6))

> head(all_routes)
[[1]]
[1] 1 2 3 4 5 6

[[2]]
[1] 1 2 3 4 6 5

[[3]]
[1] 1 2 3 6 4 5

[[4]]
[1] 1 2 6 3 4 5

[[5]]
[1] 1 6 2 3 4 5

[[6]]
[1] 6 1 2 3 4 5

第 2 部分:记录计算单个路线所需的时间

start.time <- Sys.time()
distance(route_1[1,2], route_1[1,3], route_1[2,2], route_1[2,3], route_1[3,2], route_1[3,3], route_1[4,2], route_1[4,3], route_1[5,2], route_1[5,3], route_1[6,2], route_1[6,3]) 
end.time <- Sys.time()
time.taken <- end.time - start.time

time.taken

Time difference of 0.003665924 secs

有人可以告诉我如何将这些放在一起吗?

谢谢!

要计算给定 final_data 的所有 6! 路线的累积距离,可以这样完成:

set.seed(123)
data_1 = data.frame(id = c(1,2,3), long = rnorm(3, -74, 1 ), lat = rnorm(3, 40, 1 ))
data_2 = data.frame(id = c(4,5,6), long = rnorm(3, -78, 1 ), lat = rnorm(3, 42, 1 ))
final_data = rbind(data_1, data_2)
N <- nrow(final_data) # just for repeated convenience
final_data
#   id      long      lat
# 1  1 -74.56048 40.07051
# 2  2 -74.23018 40.12929
# 3  3 -72.44129 41.71506
# 4  4 -77.53908 41.55434
# 5  5 -79.26506 43.22408
# 6  6 -78.68685 42.35981

逐对计算每个城市之间的距离。我正在使用 distHaversine,因为你列出了 lat/lon,看到笛卡尔距离计算应用于它,我感到有些畏缩:-)

dists <- outer(seq_len(N), seq_len(N), function(a,b) {
  geosphere::distHaversine(final_data[a,2:3], final_data[b,2:3]) # Notes 1, 2
})
dists
#           [,1]      [,2]     [,3]     [,4]     [,5]     [,6]
# [1,]      0.00  28876.24 255554.4 300408.5 525566.9 429264.3
# [2,]  28876.24      0.00 231942.7 320616.0 541980.9 448013.6
# [3,] 255554.43 231942.67      0.0 424449.9 584761.5 521210.7
# [4,] 300408.47 320616.03 424449.9      0.0 233840.9 130640.9
# [5,] 525566.87 541980.93 584761.5 233840.9      0.0 107178.2
# [6,] 429264.34 448013.57 521210.7 130640.9 107178.2      0.0

(单位为米。)

计算每条路线的累计距离:

perms <- gtools::permutations(N, N)
nrow(perms)
# [1] 720
perms[c(1:4, 719:720),]
#      [,1] [,2] [,3] [,4] [,5] [,6]
# [1,]    1    2    3    4    5    6
# [2,]    1    2    3    4    6    5
# [3,]    1    2    3    5    4    6
# [4,]    1    2    3    5    6    4
# [5,]    6    5    4    3    1    2
# [6,]    6    5    4    3    2    1

allroutes5 <- t(apply(perms, 1, function(route) {
  dists[cbind(route[-N], route[-1])]
}))
head(allroutes5)
#          [,1]     [,2]     [,3]     [,4]     [,5]
# [1,] 28876.24 231942.7 424449.9 233840.9 107178.2
# [2,] 28876.24 231942.7 424449.9 130640.9 107178.2
# [3,] 28876.24 231942.7 584761.5 233840.9 130640.9
# [4,] 28876.24 231942.7 584761.5 107178.2 130640.9
# [5,] 28876.24 231942.7 521210.7 130640.9 233840.9
# [6,] 28876.24 231942.7 521210.7 107178.2 233840.9

allroutes_total <- rowSums(allroutes5)
head(allroutes_total)
# [1] 1026287.9  923087.9 1210062.2 1083399.4 1146511.4 1123048.7

作为证实,allroutes5的第一行是城市1、2、3、4、5、6的顺序。回顾上面的dists,从1-2是28876; 2-3为231942; 3-4为424449;等等。将这些加起来,我们就得到了这条路线上所有城市的总行驶距离。 allroutes_total 保存所有 720 种可能路由(排列)的距离。

min(allroutes_total)
# [1] 799046.4
which.min(allroutes_total)
# [1] 266
perms[which.min(allroutes_total),]
# [1] 3 2 1 4 6 5

备注:

  1. 使用你的公式,我能够复制你的距离:

    dists <- outer(seq_len(N), seq_len(N), function(a,b) {
      sqrt((final_data[a,"long"] - final_data[a,"lat"])^2 + (final_data[b,"long"] - final_data[b,"lat"])^2)
    })
    dists
    #          [,1]     [,2]     [,3]     [,4]     [,5]     [,6]
    # [1,] 162.1127 161.9208 161.7774 165.2982 167.7613 166.7110
    # [2,] 161.9208 161.7287 161.5852 165.1101 167.5759 166.5244
    # [3,] 161.7774 161.5852 161.4415 164.9694 167.4373 166.3850
    # [4,] 165.2982 165.1101 164.9694 168.4235 170.8415 169.8103
    # [5,] 167.7613 167.5759 167.4373 170.8415 173.2258 172.2088
    # [6,] 166.7110 166.5244 166.3850 169.8103 172.2088 171.1858
    
    ### first route
    which(apply(perms, 1, identical, c(1L, 3L, 4L, 2L, 6L, 5L)))
    # [1] 32
    allroutes_total[32]
    # [1] 830.5902
    
    ### second route
    which(apply(perms, 1, identical, c(5L, 4L, 3L, 2L, 1L, 6L)))
    # [1] 567
    allroutes_total[567]
    # [1] 826.028
    

    如果你很好奇,你的第二条路线并列第五:

    min(allroutes_total)
    # [1] 826.0252
    which.min(allroutes_total)
    # [1] 561
    perms[which.min(allroutes_total),]
    # [1] 5 4 2 3 1 6
    rank(allroutes_total)[567]
    # [1] 5.5
    
  2. 不过,我不确定这是正确的距离计算。我认为欧氏距离应该是:

    dists <- outer(seq_len(N), seq_len(N), function(a,b) {
      sqrt((final_data[a,"long"] - final_data[b,"long"])^2 + (final_data[a,"lat"] - final_data[b,"lat"])^2)
    })
    dists
    #           [,1]      [,2]     [,3]     [,4]     [,5]     [,6]
    # [1,] 0.0000000 0.3354875 2.682444 3.327741 5.663758 4.718888
    # [2,] 0.3354875 0.0000000 2.390565 3.602725 5.909975 4.983694
    # [3,] 2.6824442 2.3905652 0.000000 5.100325 6.988631 6.278753
    # [4,] 3.3277405 3.6027253 5.100325 0.000000 2.401467 1.402200
    # [5,] 5.6637577 5.9099750 6.988631 2.401467 0.000000 1.039848
    # [6,] 4.7188885 4.9836936 6.278753 1.402200 1.039848 0.000000