从矩阵和距离计算中随机抽样的循环

Loops with random sampling from a matrix and distance calculation

我得到了一个节点列表,我需要将 'p' 个集线器随机分配给 'n' 个客户端。

我得到了以下数据,其中第一行显示:

以下几行显示:

下面我将显示原始数据,添加 colnames() 它看起来像这样:

total_nodes hubs_required  total_capacity
    50           5              120

node number x_coordinate y_coordinate  node_demand
   1            2           62            3
   2           80           25           14
   3           36           88            1
   4           57           23           14
   .            .            .            .
   .            .            .            .
   .            .            .            .
   50           1           58            2

提供了 x 和 y 值,因此我们可以计算欧氏距离。

nodes:

 50 5 120
 1 2 62 3
 2 80 25 14
 3 36 88 1
 4 57 23 14
 5 33 17 19
 6 76 43 2
 7 77 85 14
 8 94 6 6
 9 89 11 7
 10 59 72 6
 11 39 82 10
 12 87 24 18
 13 44 76 3
 14 2 83 6
 15 19 43 20
 16 5 27 4
 17 58 72 14
 18 14 50 11
 19 43 18 19
 20 87 7 15
 21 11 56 15
 22 31 16 4
 23 51 94 13
 24 55 13 13
 25 84 57 5
 26 12 2 16
 27 53 33 3
 28 53 10 7
 29 33 32 14
 30 69 67 17
 31 43 5 3
 32 10 75 3
 33 8 26 12
 34 3 1 14
 35 96 22 20
 36 6 48 13
 37 59 22 10
 38 66 69 9
 39 22 50 6
 40 75 21 18
 41 4 81 7
 42 41 97 20
 43 92 34 9
 44 12 64 1
 45 60 84 8
 46 35 100 5
 47 38 2 1
 48 9 9 7
 49 54 59 9
 50 1 58 2

我提取了第一行的信息。

nodes <- as.matrix(read.table(data))
header<-colnames(nodes)
clean_header <-gsub('X','',header)
requested_hubs <- as.numeric(clean_header[2])
max_supply_capacity <- as.numeric(clean_header[3])

我需要随机 select 5个节点,它们将充当集线器

set.seed(37)
node_to_hub <-nodes[sample(nrow(nodes),requested_hubs,replace = FALSE),]

然后随机我需要为每个集线器分配节点计算集线器和每个节点之间的距离以及何时超过max_supply_capacity(120) select 以下枢纽并重复该过程。

最后一次迭代后,我需要 return 所有集线器的累计距离总和。

我需要重复此过程 100 次,并且 return 距离累积和的 min() 值。

这是我完全卡住的地方,因为我不确定如何遍历矩阵,更不用说当我必须随机 select 个元素时。

我得到了以下元素:

capacity <- c(numeric()) # needs to be <= to 120
distance_sum <- c(numeric())
global_hub_distance <- c(numeric())

欧氏距离(四舍五入)的公式如下,但我不确定在分配节点时如何反映随机 selection。

distance <-round(sqrt(((node_to_hub[i,2]-nodes[i,2]))^2+(node_to_hub[random,3]-nodes[random,3])^2))

我认为我需要的循环想法如下,但正如我之前提到的,我不知道如何处理示例客户端 selection,以及随机客户端的距离计算。

    for(i in 1:100){
    node_to_hub
    for(i in 1:nrow(node_to_hub){
#Should I randomly sample the clients here???
    while(capacity < 120){ 
    node_demand <- nodes[**random**,3] 
    distance <-round(sqrt(((node_to_hub[i,2]-nodes[i,2]))^2+(node_to_hub[**random**,3]-nodes[**random**,3])^2))
    capacity <-c(capacity, node_demand)
    distance_sum <- c(distance_sum,distance)
}
global_hub_distance <- c(global_hub_distance,distance_sum)
capacity <- 0 
distance_sum <- 0 
}
min(global_hub_distance)
}

不能完全确定您要寻找的是什么,但此代码可能会为您提供帮助。它不是非常快,因为它不是在点击 total_capacity 后使用一段时间停止,而是在完整节点列表上执行 cumsum 并找到超过 120 的位置。

nodes <- structure(list(node_number = 1:50, 
               x = c(2L, 80L, 36L, 57L, 33L, 76L, 77L, 94L, 
                     89L, 59L, 39L, 87L, 44L, 2L, 19L, 5L, 
                     58L, 14L, 43L, 87L, 11L, 31L, 51L, 55L, 
                     84L, 12L, 53L, 53L, 33L, 69L, 43L, 10L, 
                     8L, 3L, 96L, 6L, 59L, 66L, 22L, 75L, 4L, 
                     41L, 92L, 12L, 60L, 35L, 38L, 9L, 54L, 1L), 
               y = c(62L, 25L, 88L, 23L, 17L, 43L, 85L, 6L, 11L, 
                     72L, 82L, 24L, 76L, 83L, 43L, 27L, 72L, 50L, 
                     18L, 7L, 56L, 16L, 94L, 13L, 57L, 2L, 33L, 10L, 
                     32L, 67L, 5L, 75L, 26L, 1L, 22L, 48L, 22L, 69L,
                     50L, 21L, 81L, 97L, 34L, 64L, 84L, 100L, 2L, 9L, 59L, 58L), 
               node_demand = c(3L, 14L, 1L, 14L, 19L, 2L, 14L, 6L, 
                               7L, 6L, 10L, 18L, 3L, 6L, 20L, 4L, 
                               14L, 11L, 19L,  15L, 15L, 4L, 13L, 
                               13L, 5L, 16L, 3L, 7L, 14L, 17L, 
                               3L, 3L, 12L, 14L, 20L, 13L, 10L, 
                               9L, 6L, 18L, 7L, 20L, 9L, 1L, 8L, 
                               5L, 1L, 7L, 9L, 2L)), 
          .Names = c("node_number", "x", "y", "node_demand"), 
          class = "data.frame", row.names = c(NA, -50L))

total_nodes = nrow(nodes)
hubs_required = 5
total_capacity = 120
iterations <- 100
track_sums <- matrix(NA, nrow = iterations, ncol = hubs_required)
colnames(track_sums) <- paste0("demand_at_hub",1:hubs_required)

然后我更喜欢使用距离函数,在这种情况下,A 和 B 是 2 个独立的向量,分别为 c(x,y) 和 c(x,y)。

euc.dist <- function(A, B) round(sqrt(sum((A - B) ^ 2))) # distances

循环:

for(i in 1:iterations){
  # random hub selection
  hubs <- nodes[sample(1:total_nodes, hubs_required, replace = FALSE),]
  for(h in 1:hubs_required){
    # sample the nodes into a random order
    random_nodes <- nodes[sample(1:nrow(nodes), size = nrow(nodes), replace = FALSE),]
    # cumulative sum their demand, and get which number passes 120, 
    # and subtract 1 to get the node before that
    last <- which(cumsum(random_nodes$node_demand) > total_capacity) [1] - 1
    # get sum of all distances to those nodes (1 though the last)
     all_distances <- apply(random_nodes[1:last,], 1, function(rn) {
      euc.dist(A = hubs[h,c("x","y")], 
               B = rn[c("x","y")]) 
    })
    track_sums[i,h] <- sum(all_distances)
  }
}

min(rowSums(track_sums))

编辑

作为函数:

hubnode <- function(nodes, hubs_required = 5, total_capacity = 120, iterations = 10){
  # initialize results matrices
  track_sums <- node_count <- matrix(NA, nrow = iterations, ncol = hubs_required)
  colnames(track_sums) <- paste0("demand_at_hub",1:hubs_required)
  colnames(node_count) <- paste0("nodes_at_hub",1:hubs_required)
  # user defined distance function (only exists wihtin hubnode() function)
  euc.dist <- function(A, B) round(sqrt(sum((A - B) ^ 2))) 

  for(i in 1:iterations){
    # random hub selection
    assigned_hubs <- sample(1:nrow(nodes), hubs_required, replace = FALSE)
    hubs <- nodes[assigned_hubs,]
    assigned_nodes <- NULL
    for(h in 1:hubs_required){
      # sample the nodes into a random order
      assigned_nodes <- sample((1:nrow(nodes))[-assigned_hubs], replace = FALSE)
      random_nodes <- nodes[assigned_nodes,]
      # cumulative sum their demand, and get which number passes 120, 
      # and subtract 1 to get the node before that
      last <- which(cumsum(random_nodes$node_demand) > total_capacity) [1] - 1
      # if there are none
      if(is.na(last)) last = nrow(random_nodes)
      node_count[i,h] <- last
      # get sum of all distances to those nodes (1 though the last)
      all_distances <- apply(random_nodes[1:last,], 1, function(rn) {
        euc.dist(A = hubs[h,c("x","y")], 
                 B = rn[c("x","y")]) 
      })
      track_sums[i,h] <- sum(all_distances)
    }
  }
  return(list(track_sums = track_sums, node_count = node_count))
}

output <- hubnode(nodes, iterations = 100)

node_count <- output$node_count
track_sums <- output$track_sums

plot(rowSums(node_count),  
     rowSums(track_sums), xlab = "Node Count", ylab = "Total Demand", main = paste("Result of", 100, "iterations"))

min(rowSums(track_sums))