验证迭代加深的启发式函数
Heuristic function ivalidating iterative deepening
对于宝石棋游戏,我正在编写一个迭代加深算法。这是代码:
public int optimize(GameState currentBoard) {
List<Integer> aiMove = this.getMoves(currentBoard.getNextPlayer());
double maxScore = 1e-30;
int bestMove = -1;
int maxTime = 5000;
int moves = aiMove.size();
long timeForEach = maxTime / moves;
for (var moveForPit : aiMove) {
GameState gameClone = new GameState(currentBoard);
gameClone.makeMove(moveForPit);
double score = iterativeDeepSearch(gameClone, timeForEach);
if (score >= MAX_CUTOFF) {
return moveForPit;
}
if (score > maxScore) {
maxScore = score;
bestMove = moveForPit;
}
}
return bestMove;
}
private double iterativeDeepSearch(GameState gameClone, long timeForEachMove) {
long sTime = System.currentTimeMillis();
long endTime = sTime + timeForEachMove;
int depth = 1;
double score = 0;
searchCutoff = false;
while (true) {
long cTime = System.currentTimeMillis();
if (endTime - cTime < 0)
break;
double searchResults = this.alphaBetaPruning(gameClone, depth, Double.MIN_VALUE, Double.MAX_VALUE, cTime,
endTime - cTime);
if (searchResults >= MAX_CUTOFF) {
return searchResults;
}
if (!searchCutoff) {
score = searchResults;
}
depth++;
}
return score;
}
private double alphaBetaPruning(GameState gameClone, int depth, double alpha, double beta, long startTime, long timeLimit) {
boolean isAi = gameClone.getNextPlayer() == 2;
double score = gameClone.scoreHeuristic();
long currentTime = System.currentTimeMillis();
long elapsedTime = (currentTime - startTime);
boolean won = gameClone.gameEnded();
searchCutoff = elapsedTime - timeLimit >= 0;
if (won || searchCutoff || (depth == 0) || (score >= MAX_CUTOFF) || (score <= MIN_CUTOFF)) {
return score;
}
if (isAi) {
List<Integer> moveList = this.getMoves(Players.AI.id);
double value = Double.MIN_VALUE;
for (var m : moveList) {
GameState childClone = new GameState(gameClone);
if (childClone.makeMove(m)) {
value = alphaBetaPruning(childClone, depth - 1, alpha, beta, startTime, timeLimit);
alpha = Math.max(alpha, value);
int comp = Double.compare(alpha, beta);
if (comp >= 0) {
break;
}
}
}
return value;
} else {
List<Integer> moveList = this.getMoves(Players.HUMAN.id);
double value = Double.MAX_VALUE;
for (var m : moveList) {
GameState childClone = new GameState(gameClone);
if (childClone.makeMove(m)) {
value = alphaBetaPruning(childClone, depth - 1, alpha, beta, startTime, timeLimit);
beta = Math.min(beta, value);
int comp = Double.compare(beta, alpha);
if (comp >= 0) {
break;
}
}
}
return value;
}
}
我还提供了一个启发式函数,由 board.scoreHeuristic() 给出。这本质上是玩家的得分差异除以可能获胜的总数。在这个版本的宝石棋中,这应该是 72(12 个坑,每个坑中有 6 个 ambos)。我明白这是不可能的,但也差不多了。
我的代码偶尔会遇到无限循环,我认为问题出在启发式算法上,有时 returns0。我不明白为什么会发生这种无限循环。
事实证明,正如许多 Java 程序员所知,clone()
与在克隆中分配数组不同...
对于其他不知道的程序员(比如我自己..):
这个
GameState(GameState parent) {
this.board = parent.board; // BAD!
}
应该是这样的:
GameState(GameState parent) {
this.board = parent.board.clone(); // or Arraycopy...
}
对于宝石棋游戏,我正在编写一个迭代加深算法。这是代码:
public int optimize(GameState currentBoard) {
List<Integer> aiMove = this.getMoves(currentBoard.getNextPlayer());
double maxScore = 1e-30;
int bestMove = -1;
int maxTime = 5000;
int moves = aiMove.size();
long timeForEach = maxTime / moves;
for (var moveForPit : aiMove) {
GameState gameClone = new GameState(currentBoard);
gameClone.makeMove(moveForPit);
double score = iterativeDeepSearch(gameClone, timeForEach);
if (score >= MAX_CUTOFF) {
return moveForPit;
}
if (score > maxScore) {
maxScore = score;
bestMove = moveForPit;
}
}
return bestMove;
}
private double iterativeDeepSearch(GameState gameClone, long timeForEachMove) {
long sTime = System.currentTimeMillis();
long endTime = sTime + timeForEachMove;
int depth = 1;
double score = 0;
searchCutoff = false;
while (true) {
long cTime = System.currentTimeMillis();
if (endTime - cTime < 0)
break;
double searchResults = this.alphaBetaPruning(gameClone, depth, Double.MIN_VALUE, Double.MAX_VALUE, cTime,
endTime - cTime);
if (searchResults >= MAX_CUTOFF) {
return searchResults;
}
if (!searchCutoff) {
score = searchResults;
}
depth++;
}
return score;
}
private double alphaBetaPruning(GameState gameClone, int depth, double alpha, double beta, long startTime, long timeLimit) {
boolean isAi = gameClone.getNextPlayer() == 2;
double score = gameClone.scoreHeuristic();
long currentTime = System.currentTimeMillis();
long elapsedTime = (currentTime - startTime);
boolean won = gameClone.gameEnded();
searchCutoff = elapsedTime - timeLimit >= 0;
if (won || searchCutoff || (depth == 0) || (score >= MAX_CUTOFF) || (score <= MIN_CUTOFF)) {
return score;
}
if (isAi) {
List<Integer> moveList = this.getMoves(Players.AI.id);
double value = Double.MIN_VALUE;
for (var m : moveList) {
GameState childClone = new GameState(gameClone);
if (childClone.makeMove(m)) {
value = alphaBetaPruning(childClone, depth - 1, alpha, beta, startTime, timeLimit);
alpha = Math.max(alpha, value);
int comp = Double.compare(alpha, beta);
if (comp >= 0) {
break;
}
}
}
return value;
} else {
List<Integer> moveList = this.getMoves(Players.HUMAN.id);
double value = Double.MAX_VALUE;
for (var m : moveList) {
GameState childClone = new GameState(gameClone);
if (childClone.makeMove(m)) {
value = alphaBetaPruning(childClone, depth - 1, alpha, beta, startTime, timeLimit);
beta = Math.min(beta, value);
int comp = Double.compare(beta, alpha);
if (comp >= 0) {
break;
}
}
}
return value;
}
}
我还提供了一个启发式函数,由 board.scoreHeuristic() 给出。这本质上是玩家的得分差异除以可能获胜的总数。在这个版本的宝石棋中,这应该是 72(12 个坑,每个坑中有 6 个 ambos)。我明白这是不可能的,但也差不多了。
我的代码偶尔会遇到无限循环,我认为问题出在启发式算法上,有时 returns0。我不明白为什么会发生这种无限循环。
事实证明,正如许多 Java 程序员所知,clone()
与在克隆中分配数组不同...
对于其他不知道的程序员(比如我自己..):
这个
GameState(GameState parent) {
this.board = parent.board; // BAD!
}
应该是这样的:
GameState(GameState parent) {
this.board = parent.board.clone(); // or Arraycopy...
}