游戏 AI 在 Tic-Tac-Toe 中一方面发挥强大作用,另一方面变得愚蠢

Game AI works powerfully on one side and becomes dumb on the other in Tic-Tac-Toe

我正在尝试使用 PyGame 和 MiniMax 算法在 Python 中制作 Tic-Tac-Toe 游戏。 AI 在第一次机会时表现得非常好(扮演 'X'),但变得愚蠢到 帮助 用户在没有第一次机会时获胜(扮演 'O').我想我知道问题出在哪里,但改变它会扰乱整个程序,并且不符合给定的文档字符串。
我制作了两个 python 文件 - 一个用于 GUI (runner.py),另一个用于游戏和 AI 背后的逻辑 (tictactoe.py)。

这是游戏背后的逻辑:

# Import module `copy` for function `deepcopy` to deeply copy an
# original (mutable) object to save the object from mutations
import copy


X = 'X'
O = 'O'
EMPTY = None


def initial_state():
    """Returns starting state of the board
    """
    return [
        [EMPTY, EMPTY, EMPTY],
        [EMPTY, EMPTY, EMPTY],
        [EMPTY, EMPTY, EMPTY]
    ]


def display(board, autoprint=False):
    """Displays the board nested list as
a 3x3 matrix for board visualization
"""
    vis_board = ''

    for row in board:
        for playr in row:
            if playr is None:
                playr = ' '

            playr += ' '
            vis_board += playr

        vis_board += '\n'

    if autoprint:
        print(vis_board)

    return vis_board


def player(board):
    """Returns player who has the next turn on a board
    """

    global X, O

    # Initial values for every call of the function
    X_count = 0
    O_count = 0

    for row in board:
        for playr in row:
            if playr == X:
                X_count += 1

            elif playr == O:
                O_count += 1

    # `X` always starts first
    if O_count < X_count:
        return O

    return X


def actions(board):
    """Returns set of all possible actions
(i, j) available on the board
    """

    global EMPTY

    action_set = set()

    for i, row in enumerate(board):
        for j, playr in enumerate(row):
            if playr is EMPTY:
                action_set.add((i, j))

    return action_set


def result(board, action):
    """Returns the board that results from
making move (i, j) on the board.
    """

    global EMPTY

    if type(action) is not tuple or len(action) != 2:
        raise Exception('invalid action taken')

    # Using `deepcopy` to make a deepcopy of *board*
    # as duplication by slicing entire list and by
    # type conversion is not working poperly
    dup_board = copy.deepcopy(board)

    # Unpack the coordinates as `I` and `J`
    I, J = action

    # Check if place has not already been used
    if dup_board[I][J] is EMPTY:
        dup_board[I][J] = player(dup_board)

    else:
        raise Exception('invalid action taken')

    return dup_board


def is_full(board):
    """Returns True if all places have been occupied, else returns False
"""

    global EMPTY

    for row in board:
        for playr in row:
            if playr is EMPTY:
                return False

    return True


def winner(board):
    """Returns the winner of the game, if there is one.
    """

    winr = None # Initial declaration to avoid errors if no winner found

    # Check diagonally
    if (board[1][1] == board[0][0] and board[0][0] == board[2][2])\
         or (board[1][1] == board[0][2] and board[0][2] == board[2][0]):
            winr = board[1][1]
            return winr

    for i in range(3):
        # Check each row for three-in-a-row
        if board[i][0] == board[i][1] and board[i][1] == board[i][2]:
            winr = board[i][1]
            break

        # Check each column for three-in-a-column
        elif board[0][i] == board[1][i] and board[1][i] == board[2][i]:
            winr = board[1][i]
            break

    return winr


def terminal(board):
    """Returns True if game is over, False otherwise.
    """

    if winner(board) is None and not is_full(board):
        return False

    return True


def utility(board):
    """Returns 1 if X has won the game, -1 if O has won, 0 otherwise.
    """
    global X, O

    if terminal(board):
        winr = winner(board)

        if winr == X:
            util = 1

        elif winr == O:
            util = -1

        else:
            util = 0

        return util

    return None


def get_best_score(board, is_max_turn):
    """Returns the best value of values of all possible moves
"""
    if utility(board) is not None:
        return utility(board)

    scores = []

    # Recursively help `minimax` choose the best action
    # in `actions` of *board* by returning the best value
    for action in actions(board):
        rslt = result(board, action)
        scores.append(get_best_score(rslt, not is_max_turn))

    return max(scores) if is_max_turn else min(scores)


def minimax(board):
    """Returns the optimal action for the current player on the board.
    """

    if terminal(board):
        return None

    best_score = -float('inf')  # Least possible score
    best_action = None

    for action in actions(board):
        rslt = result(board, action)
        score = get_best_score(rslt, False)

        if score > best_score:
            best_score = score
            best_action = action

    return best_action

GUI代码文件:

# Import module `PyGame` for a GUI
import pygame

import sys
import time

# Import module `tictactoe` (from the same folder as
# this file `__file__`) for the logic of the game's AI
import tictactoe as ttt

pygame.init()
size = width, height = 600, 400

# Colors
black = (0, 0, 0)
white = (255, 255, 255)

screen = pygame.display.set_mode(size)

mediumFont = pygame.font.Font('OpenSans-Regular.ttf', 24)
largeFont = pygame.font.Font('OpenSans-Regular.ttf', 40)
moveFont = pygame.font.Font('OpenSans-Regular.ttf', 60)

user = None
board = ttt.initial_state()
ai_turn = False

while True:

    for event in pygame.event.get():
        if event.type == pygame.QUIT:
            sys.exit()

    screen.fill(black)

    # Let user choose a player.
    if user is None:

        # Draw title
        title = largeFont.render('Play Tic-Tac-Toe', True, white)
        titleRect = title.get_rect()
        titleRect.center = (round(width/2), 50)
        screen.blit(title, titleRect)

        # Draw buttons
        playXButton = pygame.Rect(round(width/8), round(height/2), round(width/4), 50)
        playX = mediumFont.render('Play as X', True, black)
        playXRect = playX.get_rect()
        playXRect.center = playXButton.center
        pygame.draw.rect(screen, white, playXButton)
        screen.blit(playX, playXRect)

        playOButton = pygame.Rect(5*round(width/8), round(height/2), round(width/4), 50)
        playO = mediumFont.render('Play as O', True, black)
        playORect = playO.get_rect()
        playORect.center = playOButton.center
        pygame.draw.rect(screen, white, playOButton)
        screen.blit(playO, playORect)

        # Check if button is clicked
        click, _, _ = pygame.mouse.get_pressed()
        if click == 1:
            mouse = pygame.mouse.get_pos()
            time.sleep(0.5)
            if playXButton.collidepoint(mouse):
                user = ttt.X
            elif playOButton.collidepoint(mouse):
                user = ttt.O

    else:

        # Draw game board
        tile_size = 80
        tile_origin = (width / 2 - (1.5 * tile_size),
                       height / 2 - (1.5 * tile_size))
        tiles = []
        for i in range(3):
            row = []
            for j in range(3):
                rect = pygame.Rect(
                    round(tile_origin[0]+j*tile_size),
                    round(tile_origin[1]+i*tile_size),
                    round(tile_size), round(tile_size)
                )
                pygame.draw.rect(screen, white, rect, 3)

                if board[i][j] != ttt.EMPTY:
                    move = moveFont.render(board[i][j], True, white)
                    moveRect = move.get_rect()
                    moveRect.center = rect.center
                    screen.blit(move, moveRect)
                row.append(rect)
            tiles.append(row)

        game_over = ttt.terminal(board)
        player = ttt.player(board)

        # Show title
        if game_over:
            winner = ttt.winner(board)
            if winner is None:
                title = f'Game Over: Tie.'
            else:
                title = f'Game Over: {winner} wins.'
        elif user == player:
            title = f'Play as {user}'
        else:
            title = f'AI thinking...'
        title = largeFont.render(title, True, white)
        titleRect = title.get_rect()
        titleRect.center = (round(width/2), 30)
        screen.blit(title, titleRect)

        # Check for AI move
        if user != player and not game_over:
            if ai_turn:
                time.sleep(0.5)
                move = ttt.minimax(board)
                board = ttt.result(board, move)
                ai_turn = False
            else:
                ai_turn = True

        # Check for a user move
        click, _, _ = pygame.mouse.get_pressed()
        if click == 1 and user == player and not game_over:
            mouse = pygame.mouse.get_pos()
            for i in range(3):
                for j in range(3):
                    if (board[i][j] == ttt.EMPTY and tiles[i][j].collidepoint(mouse)):
                        board = ttt.result(board, (i, j))

        if game_over:
            againButton = pygame.Rect(round(width/3), round(height-65), round(width/3), 50)
            again = mediumFont.render('Play Again', True, black)
            againRect = again.get_rect()
            againRect.center = againButton.center
            pygame.draw.rect(screen, white, againButton)
            screen.blit(again, againRect)
            click, _, _ = pygame.mouse.get_pressed()
            if click == 1:
                mouse = pygame.mouse.get_pos()
                if againButton.collidepoint(mouse):
                    time.sleep(0.2)
                    user = None
                    board = ttt.initial_state()
                    ai_turn = False

    pygame.display.flip()

这些是提出这些问题的组织给出的答案的旁注:

如果有任何 bugs/errors 导致 AI 在玩 'O' 时变得愚蠢,请告诉我。我相信错误在 utility 中,但我无法更改代码,因为它是不允许的(写在文档字符串中)。

谢谢!

编辑:问题几乎已经解决,但 AI 有时会变得愚蠢,比如不试图用相反的符号阻止用户的移动等。

best_score = -float('inf')  # Least possible score

您需要根据您计算移动的玩家来改变它。我认为正因为如此,负面玩家正在选择 random/first 合理的举动。

我已经实施了 minimax 和相关的启发式方法 2 次,并且总是发现使用 "negamax" 方法效果最好,因为您无需担心何时应用 max 以及何时应用 min 基于播放器。