连接 4 的 Minimax 算法产生失败的移动

Minimax algorithm for connect 4 producing a losing move

当深度设置为 4 时,该算法似乎产生了正确的移动,但当我将其增加到 5 时,它出乎意料地变得更糟。在这种特殊情况下,当我认为第三个是时,它建议第 0 列是下一个最佳移动。我很可能不完全理解 minimax 算法,所以我请求你的帮助来解决这个问题,因为我已经尝试了好几天都没有成功。也将不胜感激任何提高代码可读性的建议。

这是一个 link 游戏:http://connect4.getforge.io/ - 原谅可怜的 UI (wip)。默认4层深,请注意增加AI_DEPTH.

时的游戏差异

这是网格,现在轮到 AI 扮演 G(最大化玩家)了。

 ''  '' ''   ''  ''  '' ''
 ''  '' ''   ''  ''  '' ''
 ''  '' ''   ''  ''  '' ''
 ''  '' 'G'  'R' 'G' '' 'G'
 'G' '' 'R'  'R' 'R' '' 'R'
 'G' '' 'R'  'R' 'G' '' 'G'

这是从我的项目中提取的代码:

const GRID_ROW_COUNT = 6;
const GRID_COL_COUNT = 7;
const GRID_ROW_MID = 3;
const GRID_COL_MID = 3;

const WIN_SCORE = 1000;

const rotateGrid = grid => grid.reduce((newGrid, gridRow) => {
  return newGrid.map((column, i) => column.concat(gridRow[i]));
}, [...Array(grid[0].length)].map(_ => []));

function* getValidMoves(grid, player) {
  for(let col = 0; col < grid[0].length; col++){
    for(let row = grid.length; row > 0; row--){
      if(!grid[row - 1][col]){
        const tempGrid = JSON.parse(JSON.stringify(grid));
        tempGrid[row - 1][col] = player;
        yield [tempGrid, col];
        break;
      }
    }
  }
}

const isDrawn = function(grid){
  for(let row = GRID_ROW_COUNT; row > 0; row--){
    if(grid[row - 1].filter(Boolean).length < GRID_COL_COUNT){
      return false;
    }
  }
  return true;
}

const countInRow = (target, row, index, count) => {
  if(count == 0 || !row[index] || row[index] != target){
    return index;
  }
  return countInRow(target, row, index - 1, count - 1);
}

const countInDiagonal = (target, grid, row, col, count) => {
  const colModulus = Math.abs(col);
  if(count == 0 || row < 0 || !grid[row][colModulus] || grid[row][colModulus] != target){
    return row;
  }
  return countInDiagonal( target, grid, row - 1, col - 1, count - 1 );
};

const countInCounterDiagonal = (target, grid, row, col, count) => countInDiagonal(target, grid, row, -col, count); 

function scoreGridPosition(grid, player, count = 4){
  let score = 0; 
  
  function checkWinOnHorizontals(grid, count){
    const GRID_ROW_COUNT = grid.length;
    const GRID_COL_COUNT = grid[0].length;    
    const GRID_COL_MID = player ? 0 : Math.floor(GRID_COL_COUNT/2);

    for(let row = GRID_ROW_COUNT - 1; row >= 0; row--){
      for(let col = GRID_COL_COUNT - 1; col >= GRID_COL_MID; col--){
        const cell = grid[row][col];
        if(!cell){ continue; }

        const colIndex = countInRow(cell, grid[row], col - 1, count - 1);
        
        if(col - colIndex == count){ return WIN_SCORE; }

        if(player){
          const weight = col - 1 - colIndex;
          if(cell == player){
            score += weight;
          } else {
            score -= weight * 2;         
          }
        } 

        col = colIndex + 1;
      }
    }
    return 0;
  }

  const checkWinOnVerticals = (grid, count) => checkWinOnHorizontals(rotateGrid(grid), count);

  function checkWinOnDiagonals(grid, count){
    const _GRID_ROW_MID = player ? 0 : GRID_ROW_MID;

    for(let row = GRID_ROW_COUNT - 1; row >= _GRID_ROW_MID; row--){
      for(let col = GRID_COL_COUNT - 1; col >= 0; col--){
        const cell = grid[row][col];
        if(!cell){ continue; }
        
        let rowIndexL = row, rowIndexR = row;
        
        if(col >= GRID_COL_MID){
          rowIndexL = countInDiagonal(cell, grid, row - 1, col - 1, count - 1);
        }
        if(col <= GRID_COL_MID){
          rowIndexR = countInCounterDiagonal(cell, grid, row - 1, col + 1, count - 1);
        }
        
        if(row - rowIndexL == count || row - rowIndexR == count){ return WIN_SCORE; }

        if(player){
          const weight = (row - rowIndexL) + (row - rowIndexR);
          if(cell == player){
            score += weight
          } else {
            score -= weight;
          }
        }
      }
    }
    return 0;
  }
    
  return [
    checkWinOnHorizontals(grid, count) ||
    checkWinOnVerticals(grid, count) ||
    checkWinOnDiagonals(grid, count),
    score
  ];
}

const alphaBetaAI = (grid, depth = 5, alpha = -Infinity, beta = Infinity, isMaxPlayer = true) => {
  let value = isMaxPlayer ? -Infinity : Infinity;
  let move = null;

  if(isDrawn(grid)){ return [0, move]; }

  const player = isMaxPlayer ? 'G' : 'R';
  const [terminalScore, score] = scoreGridPosition(grid, player);
  
  if(terminalScore){
                       // -1000            1000  
    return [isMaxPlayer ? -terminalScore : terminalScore, move]
  }
  if(depth == 0){ return [score, move]; }

  if(isMaxPlayer){
    for(let [newGrid, column] of getValidMoves(grid, player)){
      let [tempVal] = alphaBetaAI(newGrid, depth - 1, alpha, beta, !isMaxPlayer);
      if(tempVal > value){
        value = tempVal;
        move = column;
      }
      alpha = Math.max(value, alpha);
      if(beta <= alpha){ break; }
    }
  } else {
    for(let [newGrid, column] of getValidMoves(grid, player)){
      let [tempVal] = alphaBetaAI(newGrid, depth - 1, alpha, beta, !isMaxPlayer);
      if(tempVal < value){
        value = tempVal;
        move = column;
      }
      beta = Math.min(value, beta);
      if(beta <= alpha){ break; }
    }
  }
  return [value, move];
}

// here is the grid
let g = [
  ['', '', '', '', '', '', ''],
  ['', '', '', '', '', '', ''],
  ['', '', '', '', '', '', ''],
  ['', '', 'G', 'R', 'G', '', 'G'],
  ['G', '', 'R', 'R', 'R', '', 'R'],
  ['G', '', 'R', 'R', 'G', '', 'G']
];

console.log('Move: ', alphaBetaAI(g)[1]); // 0 - I was expecting 3

正如 Ouroborus 指出的那样,在深度 5 处,它发现无论下什么棋都会输。所以现在它选择了你的可能动作列表中的第一步,因为所有结果 return -1000.

如果你想让它总是找到最长的路线输,那么你需要 return -1000 + depth 如果你输了, 1000 - depth 如果你赢了。那么你的 AI 将始终选择最长的失败路线(如果有不止一种获胜方式,则选择最快的获胜路线)。