游戏 AI 在 Tic-Tac-Toe 中一方面发挥强大作用,另一方面变得愚蠢
Game AI works powerfully on one side and becomes dumb on the other in Tic-Tac-Toe
我正在尝试使用 PyGame 和 MiniMax 算法在 Python 中制作 Tic-Tac-Toe 游戏。 AI 在第一次机会时表现得非常好(扮演 'X'),但变得愚蠢到 帮助 用户在没有第一次机会时获胜(扮演 'O').我想我知道问题出在哪里,但改变它会扰乱整个程序,并且不符合给定的文档字符串。
我制作了两个 python 文件 - 一个用于 GUI (runner.py),另一个用于游戏和 AI 背后的逻辑 (tictactoe.py)。
这是游戏背后的逻辑:
# Import module `copy` for function `deepcopy` to deeply copy an
# original (mutable) object to save the object from mutations
import copy
X = 'X'
O = 'O'
EMPTY = None
def initial_state():
"""Returns starting state of the board
"""
return [
[EMPTY, EMPTY, EMPTY],
[EMPTY, EMPTY, EMPTY],
[EMPTY, EMPTY, EMPTY]
]
def display(board, autoprint=False):
"""Displays the board nested list as
a 3x3 matrix for board visualization
"""
vis_board = ''
for row in board:
for playr in row:
if playr is None:
playr = ' '
playr += ' '
vis_board += playr
vis_board += '\n'
if autoprint:
print(vis_board)
return vis_board
def player(board):
"""Returns player who has the next turn on a board
"""
global X, O
# Initial values for every call of the function
X_count = 0
O_count = 0
for row in board:
for playr in row:
if playr == X:
X_count += 1
elif playr == O:
O_count += 1
# `X` always starts first
if O_count < X_count:
return O
return X
def actions(board):
"""Returns set of all possible actions
(i, j) available on the board
"""
global EMPTY
action_set = set()
for i, row in enumerate(board):
for j, playr in enumerate(row):
if playr is EMPTY:
action_set.add((i, j))
return action_set
def result(board, action):
"""Returns the board that results from
making move (i, j) on the board.
"""
global EMPTY
if type(action) is not tuple or len(action) != 2:
raise Exception('invalid action taken')
# Using `deepcopy` to make a deepcopy of *board*
# as duplication by slicing entire list and by
# type conversion is not working poperly
dup_board = copy.deepcopy(board)
# Unpack the coordinates as `I` and `J`
I, J = action
# Check if place has not already been used
if dup_board[I][J] is EMPTY:
dup_board[I][J] = player(dup_board)
else:
raise Exception('invalid action taken')
return dup_board
def is_full(board):
"""Returns True if all places have been occupied, else returns False
"""
global EMPTY
for row in board:
for playr in row:
if playr is EMPTY:
return False
return True
def winner(board):
"""Returns the winner of the game, if there is one.
"""
winr = None # Initial declaration to avoid errors if no winner found
# Check diagonally
if (board[1][1] == board[0][0] and board[0][0] == board[2][2])\
or (board[1][1] == board[0][2] and board[0][2] == board[2][0]):
winr = board[1][1]
return winr
for i in range(3):
# Check each row for three-in-a-row
if board[i][0] == board[i][1] and board[i][1] == board[i][2]:
winr = board[i][1]
break
# Check each column for three-in-a-column
elif board[0][i] == board[1][i] and board[1][i] == board[2][i]:
winr = board[1][i]
break
return winr
def terminal(board):
"""Returns True if game is over, False otherwise.
"""
if winner(board) is None and not is_full(board):
return False
return True
def utility(board):
"""Returns 1 if X has won the game, -1 if O has won, 0 otherwise.
"""
global X, O
if terminal(board):
winr = winner(board)
if winr == X:
util = 1
elif winr == O:
util = -1
else:
util = 0
return util
return None
def get_best_score(board, is_max_turn):
"""Returns the best value of values of all possible moves
"""
if utility(board) is not None:
return utility(board)
scores = []
# Recursively help `minimax` choose the best action
# in `actions` of *board* by returning the best value
for action in actions(board):
rslt = result(board, action)
scores.append(get_best_score(rslt, not is_max_turn))
return max(scores) if is_max_turn else min(scores)
def minimax(board):
"""Returns the optimal action for the current player on the board.
"""
if terminal(board):
return None
best_score = -float('inf') # Least possible score
best_action = None
for action in actions(board):
rslt = result(board, action)
score = get_best_score(rslt, False)
if score > best_score:
best_score = score
best_action = action
return best_action
GUI代码文件:
# Import module `PyGame` for a GUI
import pygame
import sys
import time
# Import module `tictactoe` (from the same folder as
# this file `__file__`) for the logic of the game's AI
import tictactoe as ttt
pygame.init()
size = width, height = 600, 400
# Colors
black = (0, 0, 0)
white = (255, 255, 255)
screen = pygame.display.set_mode(size)
mediumFont = pygame.font.Font('OpenSans-Regular.ttf', 24)
largeFont = pygame.font.Font('OpenSans-Regular.ttf', 40)
moveFont = pygame.font.Font('OpenSans-Regular.ttf', 60)
user = None
board = ttt.initial_state()
ai_turn = False
while True:
for event in pygame.event.get():
if event.type == pygame.QUIT:
sys.exit()
screen.fill(black)
# Let user choose a player.
if user is None:
# Draw title
title = largeFont.render('Play Tic-Tac-Toe', True, white)
titleRect = title.get_rect()
titleRect.center = (round(width/2), 50)
screen.blit(title, titleRect)
# Draw buttons
playXButton = pygame.Rect(round(width/8), round(height/2), round(width/4), 50)
playX = mediumFont.render('Play as X', True, black)
playXRect = playX.get_rect()
playXRect.center = playXButton.center
pygame.draw.rect(screen, white, playXButton)
screen.blit(playX, playXRect)
playOButton = pygame.Rect(5*round(width/8), round(height/2), round(width/4), 50)
playO = mediumFont.render('Play as O', True, black)
playORect = playO.get_rect()
playORect.center = playOButton.center
pygame.draw.rect(screen, white, playOButton)
screen.blit(playO, playORect)
# Check if button is clicked
click, _, _ = pygame.mouse.get_pressed()
if click == 1:
mouse = pygame.mouse.get_pos()
time.sleep(0.5)
if playXButton.collidepoint(mouse):
user = ttt.X
elif playOButton.collidepoint(mouse):
user = ttt.O
else:
# Draw game board
tile_size = 80
tile_origin = (width / 2 - (1.5 * tile_size),
height / 2 - (1.5 * tile_size))
tiles = []
for i in range(3):
row = []
for j in range(3):
rect = pygame.Rect(
round(tile_origin[0]+j*tile_size),
round(tile_origin[1]+i*tile_size),
round(tile_size), round(tile_size)
)
pygame.draw.rect(screen, white, rect, 3)
if board[i][j] != ttt.EMPTY:
move = moveFont.render(board[i][j], True, white)
moveRect = move.get_rect()
moveRect.center = rect.center
screen.blit(move, moveRect)
row.append(rect)
tiles.append(row)
game_over = ttt.terminal(board)
player = ttt.player(board)
# Show title
if game_over:
winner = ttt.winner(board)
if winner is None:
title = f'Game Over: Tie.'
else:
title = f'Game Over: {winner} wins.'
elif user == player:
title = f'Play as {user}'
else:
title = f'AI thinking...'
title = largeFont.render(title, True, white)
titleRect = title.get_rect()
titleRect.center = (round(width/2), 30)
screen.blit(title, titleRect)
# Check for AI move
if user != player and not game_over:
if ai_turn:
time.sleep(0.5)
move = ttt.minimax(board)
board = ttt.result(board, move)
ai_turn = False
else:
ai_turn = True
# Check for a user move
click, _, _ = pygame.mouse.get_pressed()
if click == 1 and user == player and not game_over:
mouse = pygame.mouse.get_pos()
for i in range(3):
for j in range(3):
if (board[i][j] == ttt.EMPTY and tiles[i][j].collidepoint(mouse)):
board = ttt.result(board, (i, j))
if game_over:
againButton = pygame.Rect(round(width/3), round(height-65), round(width/3), 50)
again = mediumFont.render('Play Again', True, black)
againRect = again.get_rect()
againRect.center = againButton.center
pygame.draw.rect(screen, white, againButton)
screen.blit(again, againRect)
click, _, _ = pygame.mouse.get_pressed()
if click == 1:
mouse = pygame.mouse.get_pos()
if againButton.collidepoint(mouse):
time.sleep(0.2)
user = None
board = ttt.initial_state()
ai_turn = False
pygame.display.flip()
这些是提出这些问题的组织给出的答案的旁注:
- 不改号。任何函数中的参数或参数本身。
- 遵循所有函数中编写的文档字符串
- 新功能可根据需要定义
如果有任何 bugs/errors 导致 AI 在玩 'O' 时变得愚蠢,请告诉我。我相信错误在 utility
中,但我无法更改代码,因为它是不允许的(写在文档字符串中)。
谢谢!
编辑:问题几乎已经解决,但 AI 有时会变得愚蠢,比如不试图用相反的符号阻止用户的移动等。
best_score = -float('inf') # Least possible score
您需要根据您计算移动的玩家来改变它。我认为正因为如此,负面玩家正在选择 random/first 合理的举动。
我已经实施了 minimax 和相关的启发式方法 2 次,并且总是发现使用 "negamax" 方法效果最好,因为您无需担心何时应用 max 以及何时应用 min 基于播放器。
我正在尝试使用 PyGame 和 MiniMax 算法在 Python 中制作 Tic-Tac-Toe 游戏。 AI 在第一次机会时表现得非常好(扮演 'X'),但变得愚蠢到 帮助 用户在没有第一次机会时获胜(扮演 'O').我想我知道问题出在哪里,但改变它会扰乱整个程序,并且不符合给定的文档字符串。
我制作了两个 python 文件 - 一个用于 GUI (runner.py),另一个用于游戏和 AI 背后的逻辑 (tictactoe.py)。
这是游戏背后的逻辑:
# Import module `copy` for function `deepcopy` to deeply copy an
# original (mutable) object to save the object from mutations
import copy
X = 'X'
O = 'O'
EMPTY = None
def initial_state():
"""Returns starting state of the board
"""
return [
[EMPTY, EMPTY, EMPTY],
[EMPTY, EMPTY, EMPTY],
[EMPTY, EMPTY, EMPTY]
]
def display(board, autoprint=False):
"""Displays the board nested list as
a 3x3 matrix for board visualization
"""
vis_board = ''
for row in board:
for playr in row:
if playr is None:
playr = ' '
playr += ' '
vis_board += playr
vis_board += '\n'
if autoprint:
print(vis_board)
return vis_board
def player(board):
"""Returns player who has the next turn on a board
"""
global X, O
# Initial values for every call of the function
X_count = 0
O_count = 0
for row in board:
for playr in row:
if playr == X:
X_count += 1
elif playr == O:
O_count += 1
# `X` always starts first
if O_count < X_count:
return O
return X
def actions(board):
"""Returns set of all possible actions
(i, j) available on the board
"""
global EMPTY
action_set = set()
for i, row in enumerate(board):
for j, playr in enumerate(row):
if playr is EMPTY:
action_set.add((i, j))
return action_set
def result(board, action):
"""Returns the board that results from
making move (i, j) on the board.
"""
global EMPTY
if type(action) is not tuple or len(action) != 2:
raise Exception('invalid action taken')
# Using `deepcopy` to make a deepcopy of *board*
# as duplication by slicing entire list and by
# type conversion is not working poperly
dup_board = copy.deepcopy(board)
# Unpack the coordinates as `I` and `J`
I, J = action
# Check if place has not already been used
if dup_board[I][J] is EMPTY:
dup_board[I][J] = player(dup_board)
else:
raise Exception('invalid action taken')
return dup_board
def is_full(board):
"""Returns True if all places have been occupied, else returns False
"""
global EMPTY
for row in board:
for playr in row:
if playr is EMPTY:
return False
return True
def winner(board):
"""Returns the winner of the game, if there is one.
"""
winr = None # Initial declaration to avoid errors if no winner found
# Check diagonally
if (board[1][1] == board[0][0] and board[0][0] == board[2][2])\
or (board[1][1] == board[0][2] and board[0][2] == board[2][0]):
winr = board[1][1]
return winr
for i in range(3):
# Check each row for three-in-a-row
if board[i][0] == board[i][1] and board[i][1] == board[i][2]:
winr = board[i][1]
break
# Check each column for three-in-a-column
elif board[0][i] == board[1][i] and board[1][i] == board[2][i]:
winr = board[1][i]
break
return winr
def terminal(board):
"""Returns True if game is over, False otherwise.
"""
if winner(board) is None and not is_full(board):
return False
return True
def utility(board):
"""Returns 1 if X has won the game, -1 if O has won, 0 otherwise.
"""
global X, O
if terminal(board):
winr = winner(board)
if winr == X:
util = 1
elif winr == O:
util = -1
else:
util = 0
return util
return None
def get_best_score(board, is_max_turn):
"""Returns the best value of values of all possible moves
"""
if utility(board) is not None:
return utility(board)
scores = []
# Recursively help `minimax` choose the best action
# in `actions` of *board* by returning the best value
for action in actions(board):
rslt = result(board, action)
scores.append(get_best_score(rslt, not is_max_turn))
return max(scores) if is_max_turn else min(scores)
def minimax(board):
"""Returns the optimal action for the current player on the board.
"""
if terminal(board):
return None
best_score = -float('inf') # Least possible score
best_action = None
for action in actions(board):
rslt = result(board, action)
score = get_best_score(rslt, False)
if score > best_score:
best_score = score
best_action = action
return best_action
GUI代码文件:
# Import module `PyGame` for a GUI
import pygame
import sys
import time
# Import module `tictactoe` (from the same folder as
# this file `__file__`) for the logic of the game's AI
import tictactoe as ttt
pygame.init()
size = width, height = 600, 400
# Colors
black = (0, 0, 0)
white = (255, 255, 255)
screen = pygame.display.set_mode(size)
mediumFont = pygame.font.Font('OpenSans-Regular.ttf', 24)
largeFont = pygame.font.Font('OpenSans-Regular.ttf', 40)
moveFont = pygame.font.Font('OpenSans-Regular.ttf', 60)
user = None
board = ttt.initial_state()
ai_turn = False
while True:
for event in pygame.event.get():
if event.type == pygame.QUIT:
sys.exit()
screen.fill(black)
# Let user choose a player.
if user is None:
# Draw title
title = largeFont.render('Play Tic-Tac-Toe', True, white)
titleRect = title.get_rect()
titleRect.center = (round(width/2), 50)
screen.blit(title, titleRect)
# Draw buttons
playXButton = pygame.Rect(round(width/8), round(height/2), round(width/4), 50)
playX = mediumFont.render('Play as X', True, black)
playXRect = playX.get_rect()
playXRect.center = playXButton.center
pygame.draw.rect(screen, white, playXButton)
screen.blit(playX, playXRect)
playOButton = pygame.Rect(5*round(width/8), round(height/2), round(width/4), 50)
playO = mediumFont.render('Play as O', True, black)
playORect = playO.get_rect()
playORect.center = playOButton.center
pygame.draw.rect(screen, white, playOButton)
screen.blit(playO, playORect)
# Check if button is clicked
click, _, _ = pygame.mouse.get_pressed()
if click == 1:
mouse = pygame.mouse.get_pos()
time.sleep(0.5)
if playXButton.collidepoint(mouse):
user = ttt.X
elif playOButton.collidepoint(mouse):
user = ttt.O
else:
# Draw game board
tile_size = 80
tile_origin = (width / 2 - (1.5 * tile_size),
height / 2 - (1.5 * tile_size))
tiles = []
for i in range(3):
row = []
for j in range(3):
rect = pygame.Rect(
round(tile_origin[0]+j*tile_size),
round(tile_origin[1]+i*tile_size),
round(tile_size), round(tile_size)
)
pygame.draw.rect(screen, white, rect, 3)
if board[i][j] != ttt.EMPTY:
move = moveFont.render(board[i][j], True, white)
moveRect = move.get_rect()
moveRect.center = rect.center
screen.blit(move, moveRect)
row.append(rect)
tiles.append(row)
game_over = ttt.terminal(board)
player = ttt.player(board)
# Show title
if game_over:
winner = ttt.winner(board)
if winner is None:
title = f'Game Over: Tie.'
else:
title = f'Game Over: {winner} wins.'
elif user == player:
title = f'Play as {user}'
else:
title = f'AI thinking...'
title = largeFont.render(title, True, white)
titleRect = title.get_rect()
titleRect.center = (round(width/2), 30)
screen.blit(title, titleRect)
# Check for AI move
if user != player and not game_over:
if ai_turn:
time.sleep(0.5)
move = ttt.minimax(board)
board = ttt.result(board, move)
ai_turn = False
else:
ai_turn = True
# Check for a user move
click, _, _ = pygame.mouse.get_pressed()
if click == 1 and user == player and not game_over:
mouse = pygame.mouse.get_pos()
for i in range(3):
for j in range(3):
if (board[i][j] == ttt.EMPTY and tiles[i][j].collidepoint(mouse)):
board = ttt.result(board, (i, j))
if game_over:
againButton = pygame.Rect(round(width/3), round(height-65), round(width/3), 50)
again = mediumFont.render('Play Again', True, black)
againRect = again.get_rect()
againRect.center = againButton.center
pygame.draw.rect(screen, white, againButton)
screen.blit(again, againRect)
click, _, _ = pygame.mouse.get_pressed()
if click == 1:
mouse = pygame.mouse.get_pos()
if againButton.collidepoint(mouse):
time.sleep(0.2)
user = None
board = ttt.initial_state()
ai_turn = False
pygame.display.flip()
这些是提出这些问题的组织给出的答案的旁注:
- 不改号。任何函数中的参数或参数本身。
- 遵循所有函数中编写的文档字符串
- 新功能可根据需要定义
如果有任何 bugs/errors 导致 AI 在玩 'O' 时变得愚蠢,请告诉我。我相信错误在 utility
中,但我无法更改代码,因为它是不允许的(写在文档字符串中)。
谢谢!
编辑:问题几乎已经解决,但 AI 有时会变得愚蠢,比如不试图用相反的符号阻止用户的移动等。
best_score = -float('inf') # Least possible score
您需要根据您计算移动的玩家来改变它。我认为正因为如此,负面玩家正在选择 random/first 合理的举动。
我已经实施了 minimax 和相关的启发式方法 2 次,并且总是发现使用 "negamax" 方法效果最好,因为您无需担心何时应用 max 以及何时应用 min 基于播放器。