在 R 中使用 Rglpk 的约束最大值,不一致

Constrained max using Rglpk in R, not conformable

我对使用 R 中打包的 Rglpk 进行约束最大化有疑问。

在下面的代码中,我们生成了一个数据帧 'df'。我正在尝试最大化 z 列的总和,但须满足:

不确定我哪里出错了...但是,我现在可能离题太远了。在两天的大部分时间里,我一直在敲击键盘。

    library(random)
    library(Rglpk)
    library(data.table)
    w<-c( "F","G","G","F","F","G","F","G",
      "G","F","G","G","F","G","G","F",
      "G","F","F","F","G","G","F","G",
      "F","G","F","F","G","F","F","F",
      "G","F","G","G","F","G","F","G")
    x<-randomStrings(n=40, len=3, digits=F, upperalpha=F,loweralpha=T, unique=T)
    y<-c("8100",  "8000",  "7900",  "7800", "7700", "7400", "7300", "7200", "7100", "6700",
     "6500",    "6100", "6000", "5800", "5800", "5600", "5400", "5200", "5000", "4900",
     "4800",    "4200", "4100", "4100", "3900", "3800", "3700", "3400", "3300", "3200",
     "3000",    "3000", "3000", "3000", "3000", "3000", "3000", "3000", "3000", "3000")
    z<-c( "27.85","25.057", "24.588",   "23.893",   "23.284",   "24.071",   "24.864",   "22.525",   "23.15",    "22.023",
      "24.803", "18.284",   "19.675",   "20.138",   "16.179",   "20.6", "17.821",   "16.333",   "16.659",   "16.013",   
      "14.947", "10.262",   "15.425",   "10.989",   "11.556",   "11.429",   "11.3", "10.682",   "9.542",    "4.727",    
      "7.162",  "5.053",    "3.706",    "8.604",    "10.868",   "8.638",    "7.167",    "3.333",    "2.833",    "7.662")
    df <- as.data.frame(cbind(w,x,y,z))
    setnames(df, old = c('w','V1','y','z'), new = c('w','x','y','z'))
    rm(w,x,y,z)
    num.x <- length(df$x)
        obj <- df$z
    var.types <- rep("B", num.x)
    matrix <- rbind(as.numeric(df$w == "G"), # num G
                as.numeric(df$w == "F"), # num F
            as.numeric(df$w %in% c("G", "F")),  # Num G/F
                df$y)  
    direction <- c(">=",
           "<=",
           ">=",
           "<=",
           "==",
           "<=")
    rhs <- c(3, 
     5, 
     3, 
     5, 
     8, 
     50000)                

    sol <- Rglpk_solve_LP(obj = obj, mat = matrix, dir = direction, rhs = rhs,
                  types = var.types, max = TRUE)

有 20 个 F 和 20 个 G,所以可能的解总数是

C(20,3)*C(20,5) + C(20,4)*C(20,4) + C(20,5)*C(20,3) = 58821345

R 会将其计算为 sum(choose(20, 3:5)*choose(20, 5:3))。)这对于在所有可能性中进行强力搜索来说已经足够小了。记录 1 7 9 11 14 16 17 23 的输出为 174.651。这是它们的 y 和 z 值,由 w:

标记

这是找到这个答案的程序(总时间约 10 秒)。

w<-c( "F","G","G","F","F","G","F","G",
      "G","F","G","G","F","G","G","F",
      "G","F","F","F","G","G","F","G",
      "F","G","F","F","G","F","F","F",
      "G","F","G","G","F","G","F","G")
y<-c(8100,  8000,  7900,  7800, 7700, 7400, 7300, 7200, 7100, 6700,
     6500,    6100, 6000, 5800, 5800, 5600, 5400, 5200, 5000, 4900,
     4800,    4200, 4100, 4100, 3900, 3800, 3700, 3400, 3300, 3200,
     3000,    3000, 3000, 3000, 3000, 3000, 3000, 3000, 3000, 3000)
z<-c( 27.85,25.057, 24.588,   23.893,   23.284,   24.071,   24.864,   22.525,   23.15,    22.023,
      24.803, 18.284,   19.675,   20.138,   16.179,   20.6, 17.821,   16.333,   16.659,   16.013,   
      14.947, 10.262,   15.425,   10.989,   11.556,   11.429,   11.3, 10.682,   9.542,    4.727,    
      7.162,  5.053,    3.706,    8.604,    10.868,   8.638,    7.167,    3.333,    2.833,    7.662)
df <- data.frame(y=y, z=z)

system.time({
  f.index <- which(w=="F")
  g.index <- which(w=="G")

  threshold <- 50000
  temp <- matrix(NA, length(w), 8)
  z.max <- rep(NA, length(w))
  for (k in 3:5) {
    f <- apply(f.subsets <- combn(f.index, k), 2, function(i) colSums(df[i, ]))
    g <- apply(g.subsets <- combn(g.index, 8-k), 2, function(i) colSums(df[i, ]))
    y.sum <- as.vector(outer(f["y", ], g["y", ], "+"))
    z.sum <- as.vector(outer(f["z", ], g["z", ], "+"))
    z.sum[y.sum > threshold] <- NA
    n <- which.max(z.sum)
    i <- (n-1) %% dim(f.subsets)[2] + 1
    j <- floor((n-1) / dim(f.subsets)[2]) + 1
    temp[k, ] <- c(f.subsets[, i], g.subsets[, j])
    z.max[k] <- f["z", i] + g["z", j]
  }

  solution <- temp[which.max(z.max), ]
})

sort(solution)
sum(z[solution])

plot(y, z)
points(y[solution], z[solution], pch=16, col=ifelse(w[solution]=="F", "Blue", "Red"))
text(y[solution], z[solution], w[solution], pos=1)

我讨厌这样做,但我回答了我自己的问题。我没有为矩阵中的约束包含足够的参考。道歉。正如 whuber 指出的那样,最佳值为 174.651。
编辑如下:

library(random)
library(Rglpk)
library(data.table)
w<-c( "F","G","G","F","F","G","F","G",
  "G","F","G","G","F","G","G","F",
  "G","F","F","F","G","G","F","G",
  "F","G","F","F","G","F","F","F",
  "G","F","G","G","F","G","F","G")
x<-randomStrings(n=40, len=3, digits=F, upperalpha=F,loweralpha=T, unique=T)
y<-list(8100,  8000,  7900,  7800, 7700, 7400, 7300, 7200, 7100, 6700,
 6500,    6100, 6000, 5800, 5800, 5600, 5400, 5200, 5000, 4900,
 4800,    4200, 4100, 4100, 3900, 3800, 3700, 3400, 3300, 3200,
 3000,    3000, 3000, 3000, 3000, 3000, 3000, 3000, 3000, 3000)
z<-list( 27.85,25.057, 24.588,   23.893,   23.284,   24.071,   24.864, 22.525,   23.15,    22.023,
  24.803, 18.284,   19.675,   20.138,   16.179,   20.6, 17.821,   16.333,   16.659,   16.013,   
  14.947, 10.262,   15.425,   10.989,   11.556,   11.429,   11.3, 10.682,   9.542,    4.727,    
  7.162,  5.053,    3.706,    8.604,    10.868,   8.638,    7.167,    3.333,    2.833,    7.662)

df <- as.data.frame(cbind(w,x,y,z))
df$w <- as.character(df$w)
df$y <- as.integer(df$y)
setnames(df, old = c('w','V1','y','z'), new = c('w','x','y','z'))
rm(w,x,y,z)

num.x <- length(df$x)
# objective:
obj <- df$z
# the vars are represented as booleans
var.types <- rep("B", num.x)
# the constraints
matrix <- rbind(as.numeric(df$w == "G"),
            as.numeric(df$w == "G"),
            as.numeric(df$w == "F"),
            as.numeric(df$w == "F"),
            as.numeric(df$w %in% c("G", "F")),  
            df$y)                       
direction <- c(">=",
           "<=",
           ">=",
           "<=",
           "==",
           "<=")
rhs <- c(3, # G Min
     5, # G Max
     3, # F Min
     5, # F Max
     8, # G/F total

     50000)               

sol <- Rglpk_solve_LP(obj = obj, mat = matrix, dir = direction, rhs = rhs,
                  types = var.types, max = TRUE)
sol