minimax 函数不生成所有游戏状态

minimax function not generating all game states

谁能告诉我为什么我的代码打印 1 而不是 8?它似乎没有经历非常单一的状态。这是为什么?

使用 minimax 算法根据游戏状态(一个可能的井字棋盘)找到可能的最佳着法。通常,它会分支成一个大的游戏状态树,当游戏没有在结束状态结束时调用每个新分支,重复,然后通过递归地沿着树向下寻找每个的最佳动作来找到最佳可能的动作播放器。

我在 http://giocc.com/concise-implementation-of-minimax-through-higher-order-functions.html 关注 "tutorial"。

我的代码:

#!/usr/bin/env python3
'''Minimax finds the best possible moves by applying a set of rules.
A win = 1, tie = 0, loss = -1 (for us). Assuming that each player chooses the best move
(we choose 1 if possible, opponent chooses -1). Starting at the top of a 'game tree',
generate the possible moves we can make. If It reaches a terminal state, stop. Otherwise keep searching in depth.
We find max.
'''
#[0,1,2,3,4,5,6,7,8]
class GameState: #a game state is a certain state of the board
    #
    x_went_first = True
    def __init__(self,board):
        self.board = board
        self.winning_combos = [[0,1,2],[3,4,5],[6,7,8],[0,3,6],[1,4,7],[2,5,8],[0,4,8],[2,4,8]]
    def is_gameover(self):
        if self.board.count('X') + self.board.count('O') == 9:
            return True
        for combo in self.winning_combos:
            if (self.board[combo[0]] == 'X' and self.board[combo[1]] == 'X' and self.board[combo[2]] == 'X') or (self.board[combo[0]] == 'O' and self.board[combo[1]] == 'O' and self.board[combo[2]] == 'O'):
                return True
        return False
    def get_possible_moves(self):
        squares = []
        for square in self.board:
            if square != 'X' and square != 'O':
                squares.append(int(square))
        return squares
    def get_next_state(self, move):
        copy = self.board
        num_of_x = copy.count('X')
        num_of_o = copy.count('O')
        #x starts, o's turn 1 > 0 o's turn
        #o starts, x's turn 1 < 0 x's turn
        #x starts, x's turn 1 > 1 
        #o starts, o's turn 1 < 1
        if (self.x_went_first and num_of_x > num_of_o) or (self.x_went_first is not True and num_of_o == num_of_x):
            copy[move] = 'O'
        else:
            copy[move] = 'X'
        return GameState(copy)

def evals(game_state):
    for combo in [[0,1,2],[3,4,5],[6,7,8],[0,3,6],[1,4,7],[2,5,8],[0,4,8],[2,4,8]]:
        if game_state.board[0] == 'X' and game_state.board[1] == 'X' and game_state.board[2] == 'X':
            return 1    
        elif game_state.board[0] == 'O' and game_state.board[1] == 'O' and game_state.board[2] == 'O':
            return -1
        else:
            return 0



def min_play(game_state):
    if game_state.is_gameover():
        return evals(game_state)
    moves = game_state.get_possible_moves()
    best_move = moves[0]
    best_score = 2 #not possible, best score is -1
    for move in moves:
        clone = game_state.get_next_state(move)
        score = max_play(clone)
        if score < best_score:
            best_move = move
            best_score = score
    return best_score

def max_play(game_state):
    if game_state.is_gameover():
        return evals(game_state)
    moves = game_state.get_possible_moves()
    best_score = -2 #not possible, best score is 1
    for move in moves:
        clone = game_state.get_next_state(move)
        score = min_play(clone)
        if score > best_score:
            best_move = move
            best_score = score
    return best_score

def minimax(game_state):
    moves = game_state.get_possible_moves()
    best_move = moves[0]
    best_score = -2
    for move in moves:
        clone = game_state.get_next_state(move)
        score = min_play(clone)
        if score > best_score:
            best_move = move
            best_score = score
    return best_move


game = GameState(['X',1,2,
                    3,'O',5,
                    6,7,8])
print(minimax(game))

我的 evals 总是返回 0,其中一个获胜组合被打乱了。新评估:

def evals:
    for combo in [[0,1,2],[3,4,5],[6,7,8],[0,3,6],[1,4,7],[2,5,8],[0,4,8],[2,4,6]]:
        if game_state.board[0] == 'X' and game_state.board[1] == 'X' and game_state.board[2] == 'X':
            return 1    
        elif game_state.board[0] == 'O' and game_state.board[1] == 'O' and game_state.board[2] == 'O':
            return -1
     return 0

我还对其进行了修改,使索引不在每个空槽中。在 https://github.com/retep-mathwizard/pyai/blob/master/minimax_ttt

查看完整代码