我正在尝试实施极小极大算法来创建井字游戏机器人,但出现递归错误

I'm trying to implement a minimax algorithm to create a tic-tac-toe bot, but i'm getting a recursion error

我正在尝试实施极小极大算法来创建井字游戏机器人,但我收到 RecursionError: maximum recursion depth exceeded in comparison 错误。我有下面的代码。我添加了注释,提到函数应该做什么。 我最后 你能看看下面的代码吗? 谢谢

X = "X"
O = "O"
EMPTY = None

def initial_state():
    """
    Returns starting state of the board.
    """
    return [[EMPTY, EMPTY, EMPTY],
            [EMPTY, EMPTY, EMPTY],
            [EMPTY, EMPTY, EMPTY]]


def player(board):
    """
    Returns player who has the next turn on a board.
    """
    o_counter = 0
    x_counter = 0
    for i in board:
        for j in i:
            if j == 'X':
                x_counter += 1
            elif j == 'O':
                o_counter += 1
    if x_counter == 0 and o_counter == 0:
        return 'O'
    elif x_counter > o_counter:
        return 'O'
    elif o_counter > x_counter:
        return 'X'



def actions(board):
    """
    Returns set of all possible actions (i, j) available on the board.
    """
    action = []
    for i in range(3):
        for j in range(3):
            if board[i][j] is None:
                action.append([i, j])
    return action


def result(board, action):
    """
    Returns the board that results from making move (i, j) on the board.
    """
    p = player(board)
    i, j = action
    board[i][j] = p
    return board


def winner(board):
    """
    Returns the winner of the game, if there is one.
    """
    i = 1
    if board[0][0] == board[1][1] == board[2][2] and (board[0][0] == 'X' or board[0][0] == 'O'):
        return board[0][0]
    elif board[0][2] == board[1][1] == board[2][0] and (board[0][2] == 'X' or board[0][2] == 'O'):
        return  board[0][2]
    else:
        if board[0][0] == board[0][1] == board[0][2] and (board[0][0] == 'X' or board[0][0] == 'O'):
            return board[0][0]
        elif board[i][0] == board[i][1] == board[i][2] and (board[i][0] == 'X' or board[i][0] == 'O'):
            return board[i][0]
        elif board[2][0] == board[2][1] == board[2][2] and (board[2][0] == 'X' or board[2][0] == 'O'):
            return board[2][0]

        elif board[0][0] == board[1][0] == board[2][0] and (board[0][0] == 'X' or board[0][0] == 'O'):
            return board[0][0]
        elif board[0][i] == board[1][i] == board[2][i] and (board[0][i] == 'X' or board[0][i] == 'O'):
            return board[0][i]
        elif board[0][2] == board[1][2] == board[2][2] and (board[0][2] == 'X' or board[0][2] == 'O'):
            return board[0][2]

def terminal(board):
    """
    Returns True if game is over, False otherwise.
    """
    check = True
    if winner(board) == 'X' or winner(board) == 'O':
        return True
    elif check:
        for i in board:
            for j in i:
                if j is None:
                    check = False
                    return False
        if check:
            return True
    else:
        return False


def utility(board):
    """
    Returns 1 if X has won the game, -1 if O has won, 0 otherwise.
    """
    if winner(board) == 'X':
        return 1
    elif winner(board) == 'O':
        return -1
    else:
        return 0


def maximum(board):
    if terminal(board):
        return utility(board)
    v = -9999999999999999999999
    for action in actions(board):
        m = minimum(result(board, action))
        if m > v:
            v = m
    return v


def minimum(board):
    if terminal(board):
        return utility(board)
    v = 9999999999999999999999
    for action in actions(board):
        m = maximum(result(board, action))
        if m < v:
            v = m
    return v


def minimax(board):
    """
    Returns the optimal action for the current player on the board.
    """
    return_action = None
    curr_player = player(board)
    states = actions(board)
    temp_board = board.copy()
    score = 0
    temp_score = 0
    for state in states:
        i, j = state
        if curr_player == 'X':
            temp_board[i][j] = curr_player
            temp_score = maximum(temp_board)
        elif curr_player == 'O':
            temp_board[i][j] = curr_player
            temp_score = minimum(temp_board)
        if curr_player == 'X':
            if temp_score > score:
                score = temp_score
                return_action = state
        elif curr_player == 'O':
            if temp_score < score:
                score = temp_score
                return_action = state

    return  return_action

你的问题是你陷入了无限状态,这意味着你一直递归调用函数直到达到递归限制。您的问题出在您的播放器功能以及您如何决定下一个轮到谁。在 O 在位置 0,0 和 X 在位置 0,1 播放之后,您然后尝试决定谁是下一个播放

所以你数一数,O 和 X 都各放置了 1 个令牌。然而,你决定谁是下一个的逻辑不适合这个董事会状态。

    if x_counter == 0 and o_counter == 0:
        return 'O'
    elif x_counter > o_counter:
        return 'O'
    elif o_counter > x_counter:
        return 'X'

所以当 x_counter 和 y_counter 相等但不为 0 时,你不会 return 任何东西。这导致 None 被函数 return 编辑所以你卡住了,永远不会在位置 0,2 放置一个标记。如果 O 总是先走,那么任何时候 x_counter == o_counter 你应该 return '0' 所以把它改成

    if x_counter == o_counter:
        return 'O'
    elif x_counter > o_counter:
        return 'O'
    elif o_counter > x_counter:
        return 'X'