极小极大算法错误
Minimax algorithm bug
我一直在尝试学习 minimax 算法,但偶然发现了一个我无法解决的错误。
代码:
private List<Integer> generatemoves(int[] evalFields) {
List<Integer> nextMoves = new ArrayList<Integer>();
for (int i = 0; i < evalFields.length; i++) {
if (evalFields[i] == 0) {
nextMoves.add(i);
}
}
return nextMoves;
}
private int evaluateLine(int p1, int p2, int p3, int[] evalFields) {
int score = 0;
if (evalFields[p1] == 1) {
score = 1;
} else if (evalFields[p1] == 10) {
score = -1;
}
if (evalFields[p2] == 1) {
if (score == 1) {
score = 10;
} else if (score == -1) {
return 0;
} else {
score = 1;
}
} else if (evalFields[p2] == 10) {
if (score == -1) {
score = -10;
} else if (score == 1) {
return 0;
} else {
score = -1;
}
}
if (evalFields[p3] == 1) {
if (score > 0) {
score *= 10;
} else if (score < 0) {
return 0;
} else {
score = 1;
}
} else if (evalFields[p3] == 10) {
if (score < 0) {
score *= 10;
} else if (score > 1) {
return 0;
} else {
score = -1;
}
}
return score;
}
private int evaluateBoard(int [] evalFields) {
int score = 0;
score += evaluateLine(0, 1, 2, evalFields);
score += evaluateLine(3, 4, 5, evalFields);
score += evaluateLine(6, 7, 8, evalFields);
score += evaluateLine(0, 3, 6, evalFields);
score += evaluateLine(1, 4, 7, evalFields);
score += evaluateLine(2, 5, 8, evalFields);
score += evaluateLine(0, 4, 8, evalFields);
score += evaluateLine(2, 4, 6, evalFields);
return score;
}
private int bestMove(int currentTurn, int[] board) {
int move;
int bestScore;
if (currentTurn == 1) {
bestScore = Integer.MIN_VALUE;
} else {
bestScore = Integer.MAX_VALUE;
}
List<Integer> nextMoves = generatemoves(board);
List<Integer> bestScores = new ArrayList<Integer>();
for (int i = 0; i < nextMoves.size(); i++) {
int[] newBoards = new int[9];
for (int j = 0; j < board.length; j++) {
newBoards[j] = board[j];
}
newBoards[nextMoves.get(i)] = turn;
bestScores.add(evaluateBoard(newBoards));
}
for (int scores : bestScores) {
if (currentTurn == 1) {
if (scores > bestScore) bestScore = scores;
} else {
if (scores < bestScore) bestScore = scores;
}
}
move = nextMoves.get(bestScores.indexOf(bestScore));
return move;
}
这是代码中最相关的部分。它所做的或者我认为它所做的是它从称为字段的板上生成每一个可能的移动。然后它计算每一步的分数。然后它继续进行导致最高或最低分数的移动,x(1) 试图获得最高分,O(10) 试图获得最低分。发生的错误是,当玩家开始并在中间占据场地时,ai 正常运行,但在玩家第二回合后,ai 开始运行奇怪:
[ ][ ][ ] [O][ ][ ] [O][ ][O]
[ ][x][ ] => [ ][x][ ] => [x][x][ ]
[ ][ ][ ] [ ][ ][ ] [ ][ ][ ]
如果玩家选择这个:
[O][ ][ ] [O][ ][ ]
[ ][x][x] => [O][x][x]
[ ][ ][ ] [ ][ ][ ]
然后ai就正常了。
我不知道出了什么问题,或者即使我已经正确理解了 minimax 算法。
****编辑****
添加了这段代码还是有同样的问题
private int[] evaluateMove(int [] board, int currentTurn) {
int bestScore;
int currentScore;
int bestMove = -1;
if (currentTurn == 1) {
bestScore = Integer.MIN_VALUE;
} else {
bestScore = Integer.MAX_VALUE;
}
List<Integer> nextMoves = generatemoves(board);
if (nextMoves.isEmpty()) {
bestScore = evaluateTheBoard(board);
} else {
for (int move : nextMoves) {
int[] nextBoard = new int[9];
for (int i = 0; i < nextBoard.length; i ++) {
nextBoard[i] = board[i];
}
nextBoard[move] = currentTurn;
currentScore = evaluateMove(nextBoard, nextTurn())[0];
if (currentTurn == 1) {
if (currentScore > bestScore) {
bestScore = currentScore;
bestMove = move;
}
} else {
if (currentScore < bestScore) {
bestScore = currentScore;
bestMove = move;
}
}
}
}
return new int[] {bestScore, bestMove};
}
我认为你误解了如何在这样的游戏中展望未来。不要 'total' return 由 evaluateLine
编辑的值。
这是井字棋盘的极小值最大分数的伪代码(evaluateBoard
应该 return)。请注意,evaluateBoard
需要有一个概念 currentTurn
。
function evaluateBoard(board, currentTurn)
// check if the game has already ended:
if WhiteHasWon then return -10
if BlackHasWon then return +10
// WhiteHasWon returns true if there exists one or more winning 3-in-a-row line for white.
// (You will have to scan for all 8 possible 3-in-a-row lines of white pieces)
// BlackHasWon returns true if there exists one or more winning 3-in-a-row line for black
if no legal moves, return 0 // draw
// The game isn't over yet, so look ahead:
bestMove = notset
resultScore = notset
for each legal move i for currentTurn,
nextBoard = board
Apply move i to nextBoard
score = evaluateBoard(nextBoard, NOT currentTurn).score
if score is <better for currentTurn> than resultScore, then
resultScore = score
bestMove = move i
return (resultScore, bestMove)
这个版本与你的版本和我的版本之间的一个非常关键的区别是我的版本是递归。你的只深入一层。我的从 evaluateBoard
内部调用 evaluateBoard
,如果我们不小心,这将是一个无限循环(一旦棋盘填满,它就不能再深入,所以它实际上不是无限的)
另一个区别是你的总计不应该。 tic-tac-toe 的结果分数是 -10,0,或者只有当您查看到游戏结束时才为 10。您应该选择当时该玩家可用的最佳着法,并完全忽略所有其他可能性,因为您只关心 "best" 行。游戏得分等于最佳发挥的结果。
展开 <better for currentTurn>
在 minimax 中很混乱,这就是 negamax 更干净的原因。白色喜欢低分,黑色喜欢高分,所以你需要一些 if 语句让它选择合适的首选分数。你已经有了这部分(在你最好的移动代码的末尾),但它需要在递归内部而不是在末尾进行评估。
我一直在尝试学习 minimax 算法,但偶然发现了一个我无法解决的错误。 代码:
private List<Integer> generatemoves(int[] evalFields) {
List<Integer> nextMoves = new ArrayList<Integer>();
for (int i = 0; i < evalFields.length; i++) {
if (evalFields[i] == 0) {
nextMoves.add(i);
}
}
return nextMoves;
}
private int evaluateLine(int p1, int p2, int p3, int[] evalFields) {
int score = 0;
if (evalFields[p1] == 1) {
score = 1;
} else if (evalFields[p1] == 10) {
score = -1;
}
if (evalFields[p2] == 1) {
if (score == 1) {
score = 10;
} else if (score == -1) {
return 0;
} else {
score = 1;
}
} else if (evalFields[p2] == 10) {
if (score == -1) {
score = -10;
} else if (score == 1) {
return 0;
} else {
score = -1;
}
}
if (evalFields[p3] == 1) {
if (score > 0) {
score *= 10;
} else if (score < 0) {
return 0;
} else {
score = 1;
}
} else if (evalFields[p3] == 10) {
if (score < 0) {
score *= 10;
} else if (score > 1) {
return 0;
} else {
score = -1;
}
}
return score;
}
private int evaluateBoard(int [] evalFields) {
int score = 0;
score += evaluateLine(0, 1, 2, evalFields);
score += evaluateLine(3, 4, 5, evalFields);
score += evaluateLine(6, 7, 8, evalFields);
score += evaluateLine(0, 3, 6, evalFields);
score += evaluateLine(1, 4, 7, evalFields);
score += evaluateLine(2, 5, 8, evalFields);
score += evaluateLine(0, 4, 8, evalFields);
score += evaluateLine(2, 4, 6, evalFields);
return score;
}
private int bestMove(int currentTurn, int[] board) {
int move;
int bestScore;
if (currentTurn == 1) {
bestScore = Integer.MIN_VALUE;
} else {
bestScore = Integer.MAX_VALUE;
}
List<Integer> nextMoves = generatemoves(board);
List<Integer> bestScores = new ArrayList<Integer>();
for (int i = 0; i < nextMoves.size(); i++) {
int[] newBoards = new int[9];
for (int j = 0; j < board.length; j++) {
newBoards[j] = board[j];
}
newBoards[nextMoves.get(i)] = turn;
bestScores.add(evaluateBoard(newBoards));
}
for (int scores : bestScores) {
if (currentTurn == 1) {
if (scores > bestScore) bestScore = scores;
} else {
if (scores < bestScore) bestScore = scores;
}
}
move = nextMoves.get(bestScores.indexOf(bestScore));
return move;
}
这是代码中最相关的部分。它所做的或者我认为它所做的是它从称为字段的板上生成每一个可能的移动。然后它计算每一步的分数。然后它继续进行导致最高或最低分数的移动,x(1) 试图获得最高分,O(10) 试图获得最低分。发生的错误是,当玩家开始并在中间占据场地时,ai 正常运行,但在玩家第二回合后,ai 开始运行奇怪:
[ ][ ][ ] [O][ ][ ] [O][ ][O]
[ ][x][ ] => [ ][x][ ] => [x][x][ ]
[ ][ ][ ] [ ][ ][ ] [ ][ ][ ]
如果玩家选择这个:
[O][ ][ ] [O][ ][ ]
[ ][x][x] => [O][x][x]
[ ][ ][ ] [ ][ ][ ]
然后ai就正常了。 我不知道出了什么问题,或者即使我已经正确理解了 minimax 算法。
****编辑**** 添加了这段代码还是有同样的问题
private int[] evaluateMove(int [] board, int currentTurn) {
int bestScore;
int currentScore;
int bestMove = -1;
if (currentTurn == 1) {
bestScore = Integer.MIN_VALUE;
} else {
bestScore = Integer.MAX_VALUE;
}
List<Integer> nextMoves = generatemoves(board);
if (nextMoves.isEmpty()) {
bestScore = evaluateTheBoard(board);
} else {
for (int move : nextMoves) {
int[] nextBoard = new int[9];
for (int i = 0; i < nextBoard.length; i ++) {
nextBoard[i] = board[i];
}
nextBoard[move] = currentTurn;
currentScore = evaluateMove(nextBoard, nextTurn())[0];
if (currentTurn == 1) {
if (currentScore > bestScore) {
bestScore = currentScore;
bestMove = move;
}
} else {
if (currentScore < bestScore) {
bestScore = currentScore;
bestMove = move;
}
}
}
}
return new int[] {bestScore, bestMove};
}
我认为你误解了如何在这样的游戏中展望未来。不要 'total' return 由 evaluateLine
编辑的值。
这是井字棋盘的极小值最大分数的伪代码(evaluateBoard
应该 return)。请注意,evaluateBoard
需要有一个概念 currentTurn
。
function evaluateBoard(board, currentTurn)
// check if the game has already ended:
if WhiteHasWon then return -10
if BlackHasWon then return +10
// WhiteHasWon returns true if there exists one or more winning 3-in-a-row line for white.
// (You will have to scan for all 8 possible 3-in-a-row lines of white pieces)
// BlackHasWon returns true if there exists one or more winning 3-in-a-row line for black
if no legal moves, return 0 // draw
// The game isn't over yet, so look ahead:
bestMove = notset
resultScore = notset
for each legal move i for currentTurn,
nextBoard = board
Apply move i to nextBoard
score = evaluateBoard(nextBoard, NOT currentTurn).score
if score is <better for currentTurn> than resultScore, then
resultScore = score
bestMove = move i
return (resultScore, bestMove)
这个版本与你的版本和我的版本之间的一个非常关键的区别是我的版本是递归。你的只深入一层。我的从 evaluateBoard
内部调用 evaluateBoard
,如果我们不小心,这将是一个无限循环(一旦棋盘填满,它就不能再深入,所以它实际上不是无限的)
另一个区别是你的总计不应该。 tic-tac-toe 的结果分数是 -10,0,或者只有当您查看到游戏结束时才为 10。您应该选择当时该玩家可用的最佳着法,并完全忽略所有其他可能性,因为您只关心 "best" 行。游戏得分等于最佳发挥的结果。
展开 <better for currentTurn>
在 minimax 中很混乱,这就是 negamax 更干净的原因。白色喜欢低分,黑色喜欢高分,所以你需要一些 if 语句让它选择合适的首选分数。你已经有了这部分(在你最好的移动代码的末尾),但它需要在递归内部而不是在末尾进行评估。