如何让我的 AI 算法玩 9 板井字游戏?
How to I make my AI algorithm play 9 board tic-tac-toe?
为了方便别人帮助我
我把所有代码都放在这里https://pastebin.com/WENzM41k
它将在 2 个代理相互竞争时开始。
我正在尝试实施 Monte Carlo 树搜索以在 Python 中玩 9 板井字棋。游戏规则类似于常规井字游戏,但有 9 个 3x3 子板。最后一块的放置位置决定了放置您的块的子板。这有点像终极井字游戏,但如果其中一个子棋盘获胜,游戏就结束了。
我正在尝试学习 MCTS,我在这里找到了一些代码:
http://mcts.ai/code/python.html
我在网站上使用了节点 class 和 UCT class 并添加了我的 9 棋盘井字游戏状态 class 和一些其他代码。所有代码都在这里:
from math import log, sqrt
import random
import numpy as np
from copy import deepcopy
class BigGameState:
def __init__(self):
self.board = np.zeros((10, 10), dtype="int8")
self.curr = 1
self.playerJustMoved = 2 # At the root pretend the player just moved is player 2 - player 1 has the first move
def Clone(self):
""" Create a deep clone of this game state.
"""
st = BigGameState()
st.playerJustMoved = self.playerJustMoved
st.curr = self.curr
st.board = deepcopy(self.board)
return st
def DoMove(self, move):
""" Update a state by carrying out the given move.
Must update playerJustMoved.
"""
self.playerJustMoved = 3 - self.playerJustMoved
if move >= 1 and move <= 9 and move == int(move) and self.board[self.curr][move] == 0:
self.board[self.curr][move] = self.playerJustMoved
self.curr = move
def GetMoves(self):
""" Get all possible moves from this state.
"""
return [i for i in range(1, 10) if self.board[self.curr][i] == 0]
def GetResult(self, playerjm):
""" Get the game result from the viewpoint of playerjm.
"""
for bo in self.board:
for (x,y,z) in [(1,2,3),(4,5,6),(7,8,9),(1,4,7),(2,5,8),(3,6,9),(1,5,9),(3,5,7)]:
if bo[x] == [y] == bo[z]:
if bo[x] == playerjm:
return 1.0
else:
return 0.0
if self.GetMoves() == []: return 0.5 # draw
def drawboard(self):
print_board_row(self.board, 1, 2, 3, 1, 2, 3)
print_board_row(self.board, 1, 2, 3, 4, 5, 6)
print_board_row(self.board, 1, 2, 3, 7, 8, 9)
print(" ------+-------+------")
print_board_row(self.board, 4, 5, 6, 1, 2, 3)
print_board_row(self.board, 4, 5, 6, 4, 5, 6)
print_board_row(self.board, 4, 5, 6, 7, 8, 9)
print(" ------+-------+------")
print_board_row(self.board, 7, 8, 9, 1, 2, 3)
print_board_row(self.board, 7, 8, 9, 4, 5, 6)
print_board_row(self.board, 7, 8, 9, 7, 8, 9)
print()
def print_board_row(board, a, b, c, i, j, k):
# The marking script doesn't seem to like this either, so just take it out to submit
print("", board[a][i], board[a][j], board[a][k], end = " | ")
print(board[b][i], board[b][j], board[b][k], end = " | ")
print(board[c][i], board[c][j], board[c][k])
class Node:
""" A node in the game tree. Note wins is always from the viewpoint of playerJustMoved.
Crashes if state not specified.
"""
def __init__(self, move = None, parent = None, state = None):
self.move = move # the move that got us to this node - "None" for the root node
self.parentNode = parent # "None" for the root node
self.childNodes = []
self.wins = 0
self.visits = 0
self.untriedMoves = state.GetMoves() # future child nodes
self.playerJustMoved = state.playerJustMoved # the only part of the state that the Node needs later
def UCTSelectChild(self):
""" Use the UCB1 formula to select a child node. Often a constant UCTK is applied so we have
lambda c: c.wins/c.visits + UCTK * sqrt(2*log(self.visits)/c.visits to vary the amount of
exploration versus exploitation.
"""
s = sorted(self.childNodes, key = lambda c: c.wins/c.visits + 0.2 * sqrt(2*log(self.visits)/c.visits))[-1]
return s
def AddChild(self, m, s):
""" Remove m from untriedMoves and add a new child node for this move.
Return the added child node
"""
n = Node(move = m, parent = self, state = s)
self.untriedMoves.remove(m)
self.childNodes.append(n)
return n
def Update(self, result):
""" Update this node - one additional visit and result additional wins. result must be from the viewpoint of playerJustmoved.
"""
self.visits += 1
self.wins += result
def __repr__(self):
return "[M:" + str(self.move) + " W/V:" + str(self.wins) + "/" + str(self.visits) + " U:" + str(self.untriedMoves) + "]"
def TreeToString(self, indent):
s = self.IndentString(indent) + str(self)
for c in self.childNodes:
s += c.TreeToString(indent+1)
return s
def IndentString(self,indent):
s = "\n"
for i in range (1,indent+1):
s += "| "
return s
def ChildrenToString(self):
s = ""
for c in self.childNodes:
s += str(c) + "\n"
return s
def UCT(rootstate, itermax, verbose = False):
""" Conduct a UCT search for itermax iterations starting from rootstate.
Return the best move from the rootstate.
Assumes 2 alternating players (player 1 starts), with game results in the range [0.0, 1.0]."""
rootnode = Node(state = rootstate)
for i in range(itermax):
node = rootnode
state = rootstate.Clone()
# Select
while node.untriedMoves == [] and node.childNodes != []: # node is fully expanded and non-terminal
node = node.UCTSelectChild()
state.DoMove(node.move)
# Expand
if node.untriedMoves != []: # if we can expand (i.e. state/node is non-terminal)
m = random.choice(node.untriedMoves)
state.DoMove(m)
node = node.AddChild(m,state) # add child and descend tree
# Rollout - this can often be made orders of magnitude quicker using a state.GetRandomMove() function
while state.GetMoves() != []: # while state is non-terminal
state.DoMove(random.choice(state.GetMoves()))
# Backpropagate
while node != None: # backpropagate from the expanded node and work back to the root node
node.Update(state.GetResult(node.playerJustMoved)) # state is terminal. Update node with result from POV of node.playerJustMoved
node = node.parentNode
# Output some information about the tree - can be omitted
if (verbose): print(rootnode.TreeToString(0))
else: print(rootnode.ChildrenToString())
return sorted(rootnode.childNodes, key = lambda c: c.visits)[-1].move # return the move that was most visited
def UCTPlayGame():
""" Play a sample game between two UCT players where each player gets a different number
of UCT iterations (= simulations = tree nodes).
"""
state = BigGameState() # uncomment to play OXO
while (state.GetMoves() != []):
state.drawboard()
m = UCT(rootstate = state, itermax = 1000, verbose = False) # play with values for itermax and verbose = True
print("Best Move: " + str(m) + "\n")
state.DoMove(m)
if state.GetResult(state.playerJustMoved) == 1.0:
print("Player " + str(state.playerJustMoved) + " wins!")
elif state.GetResult(state.playerJustMoved) == 0.0:
print("Player " + str(3 - state.playerJustMoved) + " wins!")
else: print("Nobody wins!")
if __name__ == "__main__":
""" Play a single game to the end using UCT for both players.
"""
UCTPlayGame()
运行 它将在 2 个代理相互竞争时启动的代码。
但是,代理不能很好地玩游戏。糟糕的比赛是不可接受的。例如,如果 ai 在子棋盘中连续获得 2,并且又轮到他了,他就不会下获胜棋步。我应该从哪里开始改进以及如何改进?我试图更改 Node class 和 UCT class 的代码,但没有任何效果。
更新:如果棋盘处于以下状态,则轮到我的AI(打X)打棋盘8(第三排中间子棋盘)。我应该编写特定代码让 AI 不玩 1 或 5(因为那样对手会赢)还是应该让 AI 来决定。我尝试编写代码告诉 AI,但那样我必须遍历所有子板。
--O|---|---
-O-|---|---
---|---|---
-----------
---|-O-|---
---|-O-|---
---|---|---
-----------
---|---|---
---|---|---
---|---|---
我花了一些时间阅读有关 MCTS 的内容,并花了更多时间来捕捉其余的错误:
- 我添加了OXOState (tic-tac-toe),这样我就可以调试熟悉的简单游戏了。来自 http://mcts.ai/code/python.html 的原始源代码只有一个问题:它会在有人赢得游戏后继续播放。所以,我解决了这个问题。
- 为了调试和乐趣添加了 HumanPlayer。
- 为评价游戏水平增加了RandomPlayer和NegamaxPlayer(negamax算法https://en.wikipedia.org/wiki/Negamax)
NegamaxPlayer 与 UCT(Monte Carlo 树搜索)
itermax= won lost draw total_time
1 964 0 36 172.8
10 923 0 77 173.4
100 577 0 423 182.1
1000 48 0 952 328.9
10000 0 0 1000 1950.3
UTC 对完美玩家的表现相当令人印象深刻(minimax 进行了完整的三搜索):itermax=1000 时得分和比赛时间几乎相等 - 1000 场比赛中只有 48 场输掉比赛。
- 修复了 BigGameState 的其余(我认为)问题。现在打的很熟练,赢不了
我在 NegamaxPlayer 中添加了一个深度限制来玩 9 棋盘井字游戏,因为在这个游戏中可能需要一段时间才能找到最佳着法。
NegamaxPlayer(深度)与 UCT(itermax)
depth itermax won lost draw total_time
4 1 9 1 0 18.4
4 10 9 1 0 20.7
4 100 5 5 0 36.2
4 1000 2 8 0 188.8
5 10000 2 8 0 318.0
6 10000 0 10 0 996.5
现在UTC(itermax=100)和NegamaxPlayer(depth 4)打同关,8比2赢下一个关卡,惊艳! ;-)
我在该级别 (itermax=100) 玩过的第一场比赛赢了,但在 1000 级别输了第二场比赛:
Game 1, Move 40:
┏━━━━━━━┳━━━━━━━┳━━━━━━━┓
┃ X X . ┃*O O O ┃ O . . ┃
┃ . O O ┃ . . X ┃ . X O ┃
┃ O X X ┃ X . . ┃ . X . ┃
┣━━━━━━━╋━━━━━━━╋━━━━━━━┫
┃ X . . ┃ . X . ┃ O . . ┃
┃ . X . ┃ O O X ┃ O X . ┃
┃ . O . ┃ O . . ┃ X . . ┃
┣━━━━━━━╋━━━━━━━╋━━━━━━━┫
┃ X X O ┃ O . X ┃ . O X ┃
┃ X . . ┃ . . . ┃ . . . ┃
┃ . . O ┃ O . X ┃ . O . ┃
┗━━━━━━━┻━━━━━━━┻━━━━━━━┛
Player 2 wins!
won 0 lost 1 draw 0
完整代码如下:
from math import *
import random
import time
from copy import deepcopy
class BigGameState:
def __init__(self):
self.board = [[0 for i in range(10)] for j in range(10)]
self.curr = 1
# At the root pretend the player just moved is player 2,
# so player 1 will have the first move
self.playerJustMoved = 2
self.ended = False
# to put * in __str__
self.last_move = None
self.last_curr = None
def Clone(self):
return deepcopy(self)
def DoMove(self, move):
# 1 2 3
# 4 5 6
# 7 8 9
winning_pairs = [[], # 0
[[2, 3], [5, 9], [4, 7]], # for 1
[[1, 3], [5, 8]], # for 2
[[1, 2], [5, 7], [6, 9]], # for 3
[[1, 7], [5, 6]], # for 4
[[1, 9], [2, 8], [3, 7], [4, 6]], # for 5
[[3, 9], [4, 5]], # for 6
[[1, 4], [5, 3], [8, 9]], # for 7
[[7, 9], [2, 5]], # for 8
[[7, 8], [1, 5], [3, 6]], # for 9
]
if not isinstance(move, int) or 1 < move > 9 or \
self.board[self.curr][move] != 0:
raise ValueError
self.playerJustMoved = 3 - self.playerJustMoved
self.board[self.curr][move] = self.playerJustMoved
for index1, index2 in winning_pairs[move]:
if self.playerJustMoved == self.board[self.curr][index1] == \
self.board[self.curr][index2]:
self.ended = True
self.last_move = move
self.last_curr = self.curr
self.curr = move
def GetMoves(self):
if self.ended:
return []
return [i for i in range(1, 10) if self.board[self.curr][i] == 0]
def GetResult(self, playerjm):
# Get the game result from the viewpoint of playerjm.
for bo in self.board:
for x, y, z in [(1, 2, 3), (4, 5, 6), (7, 8, 9),
(1, 4, 7), (2, 5, 8), (3, 6, 9),
(1, 5, 9), (3, 5, 7)]:
if bo[x] == bo[y] == bo[z]:
if bo[x] == playerjm:
return 1.0
elif bo[x] != 0:
return 0.0
if not self.GetMoves():
return 0.5 # draw
raise ValueError
def _one_board_string(self, a, row):
return ''.join([' ' + '.XO'[self.board[a][i+row]] for i in range(3)])
def _three_board_line(self, index, row):
return '┃' + ''.join([self._one_board_string(i + index, row) + ' ┃' for i in range(3)])
def __repr__(self):
# ┏━━━━━━━┳━━━━━━━┳━━━━━━━┓
# ┃ . . . ┃ . . . ┃ . . . ┃
# ┃ . . . ┃ X . X ┃ . . O ┃
# ┃ . X . ┃ . . O ┃ . . . ┃
# ┣━━━━━━━╋━━━━━━━╋━━━━━━━┫
# ┃ . . . ┃ . . . ┃*X X X ┃
# ┃ X . O ┃ . . . ┃ O . O ┃
# ┃ . . O ┃ . . . ┃ . . . ┃
# ┣━━━━━━━╋━━━━━━━╋━━━━━━━┫
# ┃ . . . ┃ . O . ┃ . O . ┃
# ┃ . . . ┃ . . . ┃ . . X ┃
# ┃ . . . ┃ . . . ┃ . . X ┃
# ┗━━━━━━━┻━━━━━━━┻━━━━━━━┛
s = '┏━━━━━━━┳━━━━━━━┳━━━━━━━┓\n'
for i in [1, 4, 7]:
for j in [1, 4, 7]:
s += self._three_board_line(i, j) + '\n'
if i != 7:
s += '┣━━━━━━━╋━━━━━━━╋━━━━━━━┫\n'
else:
s += '┗━━━━━━━┻━━━━━━━┻━━━━━━━┛\n'
# Hack: by rows and colums of move and current board
# calculate place to put '*' so it is visible
c = self.last_curr - 1
c_c = c % 3
c_r = c // 3
m_c = (self.last_move - 1) % 3
m_r = (self.last_move - 1)// 3
p = 26 + c_r * (26 * 4) + c_c * 8 + m_r * 26 + m_c * 2 + 1
s = s[:p] + '*' + s[p+1:]
return s
class OXOState:
def __init__(self):
self.playerJustMoved = 2
self.ended = False
self.board = [0, 0, 0, 0, 0, 0, 0, 0, 0]
def Clone(self):
return deepcopy(self)
def DoMove(self, move):
# 0 1 2
# 3 4 5
# 6 7 8
winning_pairs = [[[1, 2], [4, 8], [3, 6]], # for 0
[[0, 2], [4, 7]], # for 1
[[0, 1], [4, 6], [5, 8]], # for 2
[[0, 6], [4, 5]], # for 3
[[0, 8], [1, 7], [2, 6], [3, 5]], # for 4
[[2, 8], [3, 4]], # for 5
[[0, 3], [4, 2], [7, 8]], # for 6
[[6, 8], [1, 4]], # for 7
[[6, 7], [0, 4], [2, 5]], # for 6
]
if not isinstance(move, int) or 0 < move > 8 or \
self.board[move] != 0:
raise ValueError
self.playerJustMoved = 3 - self.playerJustMoved
self.board[move] = self.playerJustMoved
for index1, index2 in winning_pairs[move]:
if self.playerJustMoved == self.board[index1] == self.board[index2]:
self.ended = True
def GetMoves(self):
return [] if self.ended else [i for i in range(9) if self.board[i] == 0]
def GetResult(self, playerjm):
for (x, y, z) in [(0, 1, 2), (3, 4, 5), (6, 7, 8), (0, 3, 6), (1, 4, 7),
(2, 5, 8), (0, 4, 8), (2, 4, 6)]:
if self.board[x] == self.board[y] == self.board[z]:
if self.board[x] == playerjm:
return 1.0
elif self.board[x] != 0:
return 0.0
if self.GetMoves() == []:
return 0.5 # draw
raise ValueError
def __repr__(self):
s = ""
for i in range(9):
s += '.XO'[self.board[i]]
if i % 3 == 2: s += "\n"
return s
class Node:
""" A node in the game tree. Note wins is always from the viewpoint of playerJustMoved.
Crashes if state not specified.
"""
def __init__(self, move=None, parent=None, state=None):
self.move = move # the move that got us to this node - "None" for the root node
self.parentNode = parent # "None" for the root node
self.childNodes = []
self.wins = 0
self.visits = 0
self.untriedMoves = state.GetMoves() # future child nodes
self.playerJustMoved = state.playerJustMoved # the only part of the state that the Node needs later
def UCTSelectChild(self):
""" Use the UCB1 formula to select a child node. Often a constant UCTK is applied so we have
lambda c: c.wins/c.visits + UCTK * sqrt(2*log(self.visits)/c.visits to vary the amount of
exploration versus exploitation.
"""
s = sorted(self.childNodes, key=lambda c: c.wins / c.visits + sqrt(
2 * log(self.visits) / c.visits))[-1]
return s
def AddChild(self, m, s):
""" Remove m from untriedMoves and add a new child node for this move.
Return the added child node
"""
n = Node(move=m, parent=self, state=s)
self.untriedMoves.remove(m)
self.childNodes.append(n)
return n
def Update(self, result):
""" Update this node - one additional visit and result additional wins. result must be from the viewpoint of playerJustmoved.
"""
self.visits += 1
self.wins += result
def __repr__(self):
return "[M:" + str(self.move) + " W/V:" + str(self.wins) + "/" + str(
self.visits) + " U:" + str(self.untriedMoves) + "]"
def TreeToString(self, indent):
s = self.IndentString(indent) + str(self)
for c in self.childNodes:
s += c.TreeToString(indent + 1)
return s
def IndentString(self, indent):
s = "\n"
for i in range(1, indent + 1):
s += "| "
return s
def ChildrenToString(self):
s = ""
for c in self.childNodes:
s += str(c) + "\n"
return s
def UCT(rootstate, itermax, verbose=False):
""" Conduct a UCT search for itermax iterations starting from rootstate.
Return the best move from the rootstate.
Assumes 2 alternating players (player 1 starts), with game results in the range [0.0, 1.0]."""
rootnode = Node(state=rootstate)
for i in range(itermax):
node = rootnode
state = rootstate.Clone()
# Select
while node.untriedMoves == [] and node.childNodes != []: # node is fully expanded and non-terminal
node = node.UCTSelectChild()
state.DoMove(node.move)
# Expand
if node.untriedMoves != []: # if we can expand (i.e. state/node is non-terminal)
m = random.choice(node.untriedMoves)
state.DoMove(m)
node = node.AddChild(m, state) # add child and descend tree
# Rollout - this can often be made orders of magnitude quicker using a state.GetRandomMove() function
while state.GetMoves() != []: # while state is non-terminal
state.DoMove(random.choice(state.GetMoves()))
# Backpropagate
while node != None: # backpropagate from the expanded node and work back to the root node
node.Update(state.GetResult(
node.playerJustMoved)) # state is terminal. Update node with result from POV of node.playerJustMoved
node = node.parentNode
# Output some information about the tree - can be omitted
# if (verbose):
# print(rootnode.TreeToString(0))
# else:
# print(rootnode.ChildrenToString())
return sorted(rootnode.childNodes, key=lambda c: c.visits)[
-1].move # return the move that was most visited
def HumanPlayer(state):
moves = state.GetMoves()
while True:
try:
m = int(input("Your move " + str(moves) + " : "))
if m in moves:
return m
except ValueError:
pass
def RandomPlayer(state):
return random.choice(state.GetMoves())
def negamax(board, color, depth): # ##################################################
moves = board.GetMoves()
if not moves:
x = board.GetResult(board.playerJustMoved)
if x == 0.0:
print('negamax ERROR:')
print(board)
print(board.playerJustMoved)
print(board.curr, board.ended)
print(board.GetMoves())
raise ValueError
if x == 0.5:
return 0.0, None
if color == 1 and board.playerJustMoved == 1:
return 1.0, None
else:
return -1.0, None
if depth == 0:
return 0.0, None
v = float("-inf")
best_move = []
for m in moves:
new_board = board.Clone()
new_board.DoMove(m)
x, _ = negamax(new_board, -color, depth - 1)
x = - x
if x >= v:
if x > v:
best_move = []
v = x
best_move.append(m)
if depth >=8:
print(depth, v, best_move)
return v, best_move
def NegamaxPlayer(game):
best_moves = game.GetMoves()
if len(best_moves) != 9:
_, best_moves = negamax(game, 1, 4)
print(best_moves)
return random.choice(best_moves)
if __name__ == "__main__":
def main():
random.seed(123456789)
won = 0
lost = 0
draw = 0
for i in range(10):
# state = OXOState() # uncomment to play OXO
state = BigGameState()
move = 0
while (state.GetMoves() != []):
if state.playerJustMoved == 1:
# m = RandomPlayer(state)
m = UCT(rootstate=state, itermax=100, verbose=False)
else:
# m = UCT(rootstate=state, itermax=100, verbose=False)
# m = NegamaxPlayer(state)
m = HumanPlayer(state)
# m = RandomPlayer(state)
state.DoMove(m)
move += 1
print('Game ', i + 1, ', Move ', move, ':\n', state, sep='')
if state.GetResult(1) == 1.0:
won += 1
print("Player 1 wins!")
elif state.GetResult(1) == 0.0:
lost += 1
print("Player 2 wins!")
else:
draw += 1
print("Nobody wins!")
print('won', won, 'lost', lost, 'draw', draw)
start_time = time.perf_counter()
main()
total_time = time.perf_counter() - start_time
print('total_time', total_time)
一般来说,是的,alpha-beta 修剪 (a-b) 会提高回合制搜索的性能。然而,MCTS 有不同的方法。 a-b 通常取决于在应用时进行合理准确的电路板评估。 MCTS将一个个随机选择的分支推送到它的结论,然后根据每个节点的决策权重向上传播结果。
如果您想在做出 向下 决定时应用修剪,您可以这样做,但不是 添加 a-b,你混合了这两种方法。我不知道这种改进是否值得付出额外的编写和调试努力。
如 MCTS 站点上的性能警告中所述,如果您添加特定领域的知识,您将获得 巨大 的改进。对于九个棋盘中的每一个,只考虑已知最佳的移动。普通井字游戏的优先级是
- 如果可能的话,采取制胜法宝。
- 阻止对手获胜(如果可用)。
- 取中心方块。
- 取一个角方块(随机选择)。
- 取一个边格(随机选择)。
这套启发式绝不会输掉一场正常的比赛。它可以作为您的起点。添加代码以正确识别获胜后,您的程序将加速很多。
感谢 Alexander Lopatin 记住了 "never" 声明中的致命缺陷。我脸都红了,因为我已经教过这个案例至少三门课程:
- - - - - - -
亚历山大写道:
由于格式限制,我无法将其放在评论中。这是该策略输给 minimax 的一场比赛:
0 1 2 3 4 5 6 7
--- --- --- --- X-- X-- X-X X-X
--- --- -X- -XO -XO -XO -XO -XO
--- -O- -O- -O- -O- -OO -OO OOO
X 在第 4 步中随机取了一个错误的角方块。该算法应该在这一步中对抗可能的叉子。
- - - - - - -
再次修剪:
更糟糕的是,如果你知道 AI 正在使用我给出的算法进行游戏,你可以强制 获胜,而不是依赖于随机选择角球:
0 1 2 3 4 5
--- --- --- --O X-O X-O
--- --- -X- -X- -X- -X-
--- O-- O-- O-- O-- O-O ... and 'X' cannot block both winning moves.
在第 4 步,AI 需要看到即将到来的叉子,并通过任何一边移动(任一角都输)避开它,这将导致平局。
为了方便别人帮助我 我把所有代码都放在这里https://pastebin.com/WENzM41k 它将在 2 个代理相互竞争时开始。
我正在尝试实施 Monte Carlo 树搜索以在 Python 中玩 9 板井字棋。游戏规则类似于常规井字游戏,但有 9 个 3x3 子板。最后一块的放置位置决定了放置您的块的子板。这有点像终极井字游戏,但如果其中一个子棋盘获胜,游戏就结束了。
我正在尝试学习 MCTS,我在这里找到了一些代码: http://mcts.ai/code/python.html
我在网站上使用了节点 class 和 UCT class 并添加了我的 9 棋盘井字游戏状态 class 和一些其他代码。所有代码都在这里:
from math import log, sqrt
import random
import numpy as np
from copy import deepcopy
class BigGameState:
def __init__(self):
self.board = np.zeros((10, 10), dtype="int8")
self.curr = 1
self.playerJustMoved = 2 # At the root pretend the player just moved is player 2 - player 1 has the first move
def Clone(self):
""" Create a deep clone of this game state.
"""
st = BigGameState()
st.playerJustMoved = self.playerJustMoved
st.curr = self.curr
st.board = deepcopy(self.board)
return st
def DoMove(self, move):
""" Update a state by carrying out the given move.
Must update playerJustMoved.
"""
self.playerJustMoved = 3 - self.playerJustMoved
if move >= 1 and move <= 9 and move == int(move) and self.board[self.curr][move] == 0:
self.board[self.curr][move] = self.playerJustMoved
self.curr = move
def GetMoves(self):
""" Get all possible moves from this state.
"""
return [i for i in range(1, 10) if self.board[self.curr][i] == 0]
def GetResult(self, playerjm):
""" Get the game result from the viewpoint of playerjm.
"""
for bo in self.board:
for (x,y,z) in [(1,2,3),(4,5,6),(7,8,9),(1,4,7),(2,5,8),(3,6,9),(1,5,9),(3,5,7)]:
if bo[x] == [y] == bo[z]:
if bo[x] == playerjm:
return 1.0
else:
return 0.0
if self.GetMoves() == []: return 0.5 # draw
def drawboard(self):
print_board_row(self.board, 1, 2, 3, 1, 2, 3)
print_board_row(self.board, 1, 2, 3, 4, 5, 6)
print_board_row(self.board, 1, 2, 3, 7, 8, 9)
print(" ------+-------+------")
print_board_row(self.board, 4, 5, 6, 1, 2, 3)
print_board_row(self.board, 4, 5, 6, 4, 5, 6)
print_board_row(self.board, 4, 5, 6, 7, 8, 9)
print(" ------+-------+------")
print_board_row(self.board, 7, 8, 9, 1, 2, 3)
print_board_row(self.board, 7, 8, 9, 4, 5, 6)
print_board_row(self.board, 7, 8, 9, 7, 8, 9)
print()
def print_board_row(board, a, b, c, i, j, k):
# The marking script doesn't seem to like this either, so just take it out to submit
print("", board[a][i], board[a][j], board[a][k], end = " | ")
print(board[b][i], board[b][j], board[b][k], end = " | ")
print(board[c][i], board[c][j], board[c][k])
class Node:
""" A node in the game tree. Note wins is always from the viewpoint of playerJustMoved.
Crashes if state not specified.
"""
def __init__(self, move = None, parent = None, state = None):
self.move = move # the move that got us to this node - "None" for the root node
self.parentNode = parent # "None" for the root node
self.childNodes = []
self.wins = 0
self.visits = 0
self.untriedMoves = state.GetMoves() # future child nodes
self.playerJustMoved = state.playerJustMoved # the only part of the state that the Node needs later
def UCTSelectChild(self):
""" Use the UCB1 formula to select a child node. Often a constant UCTK is applied so we have
lambda c: c.wins/c.visits + UCTK * sqrt(2*log(self.visits)/c.visits to vary the amount of
exploration versus exploitation.
"""
s = sorted(self.childNodes, key = lambda c: c.wins/c.visits + 0.2 * sqrt(2*log(self.visits)/c.visits))[-1]
return s
def AddChild(self, m, s):
""" Remove m from untriedMoves and add a new child node for this move.
Return the added child node
"""
n = Node(move = m, parent = self, state = s)
self.untriedMoves.remove(m)
self.childNodes.append(n)
return n
def Update(self, result):
""" Update this node - one additional visit and result additional wins. result must be from the viewpoint of playerJustmoved.
"""
self.visits += 1
self.wins += result
def __repr__(self):
return "[M:" + str(self.move) + " W/V:" + str(self.wins) + "/" + str(self.visits) + " U:" + str(self.untriedMoves) + "]"
def TreeToString(self, indent):
s = self.IndentString(indent) + str(self)
for c in self.childNodes:
s += c.TreeToString(indent+1)
return s
def IndentString(self,indent):
s = "\n"
for i in range (1,indent+1):
s += "| "
return s
def ChildrenToString(self):
s = ""
for c in self.childNodes:
s += str(c) + "\n"
return s
def UCT(rootstate, itermax, verbose = False):
""" Conduct a UCT search for itermax iterations starting from rootstate.
Return the best move from the rootstate.
Assumes 2 alternating players (player 1 starts), with game results in the range [0.0, 1.0]."""
rootnode = Node(state = rootstate)
for i in range(itermax):
node = rootnode
state = rootstate.Clone()
# Select
while node.untriedMoves == [] and node.childNodes != []: # node is fully expanded and non-terminal
node = node.UCTSelectChild()
state.DoMove(node.move)
# Expand
if node.untriedMoves != []: # if we can expand (i.e. state/node is non-terminal)
m = random.choice(node.untriedMoves)
state.DoMove(m)
node = node.AddChild(m,state) # add child and descend tree
# Rollout - this can often be made orders of magnitude quicker using a state.GetRandomMove() function
while state.GetMoves() != []: # while state is non-terminal
state.DoMove(random.choice(state.GetMoves()))
# Backpropagate
while node != None: # backpropagate from the expanded node and work back to the root node
node.Update(state.GetResult(node.playerJustMoved)) # state is terminal. Update node with result from POV of node.playerJustMoved
node = node.parentNode
# Output some information about the tree - can be omitted
if (verbose): print(rootnode.TreeToString(0))
else: print(rootnode.ChildrenToString())
return sorted(rootnode.childNodes, key = lambda c: c.visits)[-1].move # return the move that was most visited
def UCTPlayGame():
""" Play a sample game between two UCT players where each player gets a different number
of UCT iterations (= simulations = tree nodes).
"""
state = BigGameState() # uncomment to play OXO
while (state.GetMoves() != []):
state.drawboard()
m = UCT(rootstate = state, itermax = 1000, verbose = False) # play with values for itermax and verbose = True
print("Best Move: " + str(m) + "\n")
state.DoMove(m)
if state.GetResult(state.playerJustMoved) == 1.0:
print("Player " + str(state.playerJustMoved) + " wins!")
elif state.GetResult(state.playerJustMoved) == 0.0:
print("Player " + str(3 - state.playerJustMoved) + " wins!")
else: print("Nobody wins!")
if __name__ == "__main__":
""" Play a single game to the end using UCT for both players.
"""
UCTPlayGame()
运行 它将在 2 个代理相互竞争时启动的代码。 但是,代理不能很好地玩游戏。糟糕的比赛是不可接受的。例如,如果 ai 在子棋盘中连续获得 2,并且又轮到他了,他就不会下获胜棋步。我应该从哪里开始改进以及如何改进?我试图更改 Node class 和 UCT class 的代码,但没有任何效果。
更新:如果棋盘处于以下状态,则轮到我的AI(打X)打棋盘8(第三排中间子棋盘)。我应该编写特定代码让 AI 不玩 1 或 5(因为那样对手会赢)还是应该让 AI 来决定。我尝试编写代码告诉 AI,但那样我必须遍历所有子板。
--O|---|---
-O-|---|---
---|---|---
-----------
---|-O-|---
---|-O-|---
---|---|---
-----------
---|---|---
---|---|---
---|---|---
我花了一些时间阅读有关 MCTS 的内容,并花了更多时间来捕捉其余的错误:
- 我添加了OXOState (tic-tac-toe),这样我就可以调试熟悉的简单游戏了。来自 http://mcts.ai/code/python.html 的原始源代码只有一个问题:它会在有人赢得游戏后继续播放。所以,我解决了这个问题。
- 为了调试和乐趣添加了 HumanPlayer。
- 为评价游戏水平增加了RandomPlayer和NegamaxPlayer(negamax算法https://en.wikipedia.org/wiki/Negamax)
NegamaxPlayer 与 UCT(Monte Carlo 树搜索)
itermax= won lost draw total_time
1 964 0 36 172.8
10 923 0 77 173.4
100 577 0 423 182.1
1000 48 0 952 328.9
10000 0 0 1000 1950.3
UTC 对完美玩家的表现相当令人印象深刻(minimax 进行了完整的三搜索):itermax=1000 时得分和比赛时间几乎相等 - 1000 场比赛中只有 48 场输掉比赛。
- 修复了 BigGameState 的其余(我认为)问题。现在打的很熟练,赢不了
我在 NegamaxPlayer 中添加了一个深度限制来玩 9 棋盘井字游戏,因为在这个游戏中可能需要一段时间才能找到最佳着法。
NegamaxPlayer(深度)与 UCT(itermax)
depth itermax won lost draw total_time
4 1 9 1 0 18.4
4 10 9 1 0 20.7
4 100 5 5 0 36.2
4 1000 2 8 0 188.8
5 10000 2 8 0 318.0
6 10000 0 10 0 996.5
现在UTC(itermax=100)和NegamaxPlayer(depth 4)打同关,8比2赢下一个关卡,惊艳! ;-)
我在该级别 (itermax=100) 玩过的第一场比赛赢了,但在 1000 级别输了第二场比赛:
Game 1, Move 40:
┏━━━━━━━┳━━━━━━━┳━━━━━━━┓
┃ X X . ┃*O O O ┃ O . . ┃
┃ . O O ┃ . . X ┃ . X O ┃
┃ O X X ┃ X . . ┃ . X . ┃
┣━━━━━━━╋━━━━━━━╋━━━━━━━┫
┃ X . . ┃ . X . ┃ O . . ┃
┃ . X . ┃ O O X ┃ O X . ┃
┃ . O . ┃ O . . ┃ X . . ┃
┣━━━━━━━╋━━━━━━━╋━━━━━━━┫
┃ X X O ┃ O . X ┃ . O X ┃
┃ X . . ┃ . . . ┃ . . . ┃
┃ . . O ┃ O . X ┃ . O . ┃
┗━━━━━━━┻━━━━━━━┻━━━━━━━┛
Player 2 wins!
won 0 lost 1 draw 0
完整代码如下:
from math import *
import random
import time
from copy import deepcopy
class BigGameState:
def __init__(self):
self.board = [[0 for i in range(10)] for j in range(10)]
self.curr = 1
# At the root pretend the player just moved is player 2,
# so player 1 will have the first move
self.playerJustMoved = 2
self.ended = False
# to put * in __str__
self.last_move = None
self.last_curr = None
def Clone(self):
return deepcopy(self)
def DoMove(self, move):
# 1 2 3
# 4 5 6
# 7 8 9
winning_pairs = [[], # 0
[[2, 3], [5, 9], [4, 7]], # for 1
[[1, 3], [5, 8]], # for 2
[[1, 2], [5, 7], [6, 9]], # for 3
[[1, 7], [5, 6]], # for 4
[[1, 9], [2, 8], [3, 7], [4, 6]], # for 5
[[3, 9], [4, 5]], # for 6
[[1, 4], [5, 3], [8, 9]], # for 7
[[7, 9], [2, 5]], # for 8
[[7, 8], [1, 5], [3, 6]], # for 9
]
if not isinstance(move, int) or 1 < move > 9 or \
self.board[self.curr][move] != 0:
raise ValueError
self.playerJustMoved = 3 - self.playerJustMoved
self.board[self.curr][move] = self.playerJustMoved
for index1, index2 in winning_pairs[move]:
if self.playerJustMoved == self.board[self.curr][index1] == \
self.board[self.curr][index2]:
self.ended = True
self.last_move = move
self.last_curr = self.curr
self.curr = move
def GetMoves(self):
if self.ended:
return []
return [i for i in range(1, 10) if self.board[self.curr][i] == 0]
def GetResult(self, playerjm):
# Get the game result from the viewpoint of playerjm.
for bo in self.board:
for x, y, z in [(1, 2, 3), (4, 5, 6), (7, 8, 9),
(1, 4, 7), (2, 5, 8), (3, 6, 9),
(1, 5, 9), (3, 5, 7)]:
if bo[x] == bo[y] == bo[z]:
if bo[x] == playerjm:
return 1.0
elif bo[x] != 0:
return 0.0
if not self.GetMoves():
return 0.5 # draw
raise ValueError
def _one_board_string(self, a, row):
return ''.join([' ' + '.XO'[self.board[a][i+row]] for i in range(3)])
def _three_board_line(self, index, row):
return '┃' + ''.join([self._one_board_string(i + index, row) + ' ┃' for i in range(3)])
def __repr__(self):
# ┏━━━━━━━┳━━━━━━━┳━━━━━━━┓
# ┃ . . . ┃ . . . ┃ . . . ┃
# ┃ . . . ┃ X . X ┃ . . O ┃
# ┃ . X . ┃ . . O ┃ . . . ┃
# ┣━━━━━━━╋━━━━━━━╋━━━━━━━┫
# ┃ . . . ┃ . . . ┃*X X X ┃
# ┃ X . O ┃ . . . ┃ O . O ┃
# ┃ . . O ┃ . . . ┃ . . . ┃
# ┣━━━━━━━╋━━━━━━━╋━━━━━━━┫
# ┃ . . . ┃ . O . ┃ . O . ┃
# ┃ . . . ┃ . . . ┃ . . X ┃
# ┃ . . . ┃ . . . ┃ . . X ┃
# ┗━━━━━━━┻━━━━━━━┻━━━━━━━┛
s = '┏━━━━━━━┳━━━━━━━┳━━━━━━━┓\n'
for i in [1, 4, 7]:
for j in [1, 4, 7]:
s += self._three_board_line(i, j) + '\n'
if i != 7:
s += '┣━━━━━━━╋━━━━━━━╋━━━━━━━┫\n'
else:
s += '┗━━━━━━━┻━━━━━━━┻━━━━━━━┛\n'
# Hack: by rows and colums of move and current board
# calculate place to put '*' so it is visible
c = self.last_curr - 1
c_c = c % 3
c_r = c // 3
m_c = (self.last_move - 1) % 3
m_r = (self.last_move - 1)// 3
p = 26 + c_r * (26 * 4) + c_c * 8 + m_r * 26 + m_c * 2 + 1
s = s[:p] + '*' + s[p+1:]
return s
class OXOState:
def __init__(self):
self.playerJustMoved = 2
self.ended = False
self.board = [0, 0, 0, 0, 0, 0, 0, 0, 0]
def Clone(self):
return deepcopy(self)
def DoMove(self, move):
# 0 1 2
# 3 4 5
# 6 7 8
winning_pairs = [[[1, 2], [4, 8], [3, 6]], # for 0
[[0, 2], [4, 7]], # for 1
[[0, 1], [4, 6], [5, 8]], # for 2
[[0, 6], [4, 5]], # for 3
[[0, 8], [1, 7], [2, 6], [3, 5]], # for 4
[[2, 8], [3, 4]], # for 5
[[0, 3], [4, 2], [7, 8]], # for 6
[[6, 8], [1, 4]], # for 7
[[6, 7], [0, 4], [2, 5]], # for 6
]
if not isinstance(move, int) or 0 < move > 8 or \
self.board[move] != 0:
raise ValueError
self.playerJustMoved = 3 - self.playerJustMoved
self.board[move] = self.playerJustMoved
for index1, index2 in winning_pairs[move]:
if self.playerJustMoved == self.board[index1] == self.board[index2]:
self.ended = True
def GetMoves(self):
return [] if self.ended else [i for i in range(9) if self.board[i] == 0]
def GetResult(self, playerjm):
for (x, y, z) in [(0, 1, 2), (3, 4, 5), (6, 7, 8), (0, 3, 6), (1, 4, 7),
(2, 5, 8), (0, 4, 8), (2, 4, 6)]:
if self.board[x] == self.board[y] == self.board[z]:
if self.board[x] == playerjm:
return 1.0
elif self.board[x] != 0:
return 0.0
if self.GetMoves() == []:
return 0.5 # draw
raise ValueError
def __repr__(self):
s = ""
for i in range(9):
s += '.XO'[self.board[i]]
if i % 3 == 2: s += "\n"
return s
class Node:
""" A node in the game tree. Note wins is always from the viewpoint of playerJustMoved.
Crashes if state not specified.
"""
def __init__(self, move=None, parent=None, state=None):
self.move = move # the move that got us to this node - "None" for the root node
self.parentNode = parent # "None" for the root node
self.childNodes = []
self.wins = 0
self.visits = 0
self.untriedMoves = state.GetMoves() # future child nodes
self.playerJustMoved = state.playerJustMoved # the only part of the state that the Node needs later
def UCTSelectChild(self):
""" Use the UCB1 formula to select a child node. Often a constant UCTK is applied so we have
lambda c: c.wins/c.visits + UCTK * sqrt(2*log(self.visits)/c.visits to vary the amount of
exploration versus exploitation.
"""
s = sorted(self.childNodes, key=lambda c: c.wins / c.visits + sqrt(
2 * log(self.visits) / c.visits))[-1]
return s
def AddChild(self, m, s):
""" Remove m from untriedMoves and add a new child node for this move.
Return the added child node
"""
n = Node(move=m, parent=self, state=s)
self.untriedMoves.remove(m)
self.childNodes.append(n)
return n
def Update(self, result):
""" Update this node - one additional visit and result additional wins. result must be from the viewpoint of playerJustmoved.
"""
self.visits += 1
self.wins += result
def __repr__(self):
return "[M:" + str(self.move) + " W/V:" + str(self.wins) + "/" + str(
self.visits) + " U:" + str(self.untriedMoves) + "]"
def TreeToString(self, indent):
s = self.IndentString(indent) + str(self)
for c in self.childNodes:
s += c.TreeToString(indent + 1)
return s
def IndentString(self, indent):
s = "\n"
for i in range(1, indent + 1):
s += "| "
return s
def ChildrenToString(self):
s = ""
for c in self.childNodes:
s += str(c) + "\n"
return s
def UCT(rootstate, itermax, verbose=False):
""" Conduct a UCT search for itermax iterations starting from rootstate.
Return the best move from the rootstate.
Assumes 2 alternating players (player 1 starts), with game results in the range [0.0, 1.0]."""
rootnode = Node(state=rootstate)
for i in range(itermax):
node = rootnode
state = rootstate.Clone()
# Select
while node.untriedMoves == [] and node.childNodes != []: # node is fully expanded and non-terminal
node = node.UCTSelectChild()
state.DoMove(node.move)
# Expand
if node.untriedMoves != []: # if we can expand (i.e. state/node is non-terminal)
m = random.choice(node.untriedMoves)
state.DoMove(m)
node = node.AddChild(m, state) # add child and descend tree
# Rollout - this can often be made orders of magnitude quicker using a state.GetRandomMove() function
while state.GetMoves() != []: # while state is non-terminal
state.DoMove(random.choice(state.GetMoves()))
# Backpropagate
while node != None: # backpropagate from the expanded node and work back to the root node
node.Update(state.GetResult(
node.playerJustMoved)) # state is terminal. Update node with result from POV of node.playerJustMoved
node = node.parentNode
# Output some information about the tree - can be omitted
# if (verbose):
# print(rootnode.TreeToString(0))
# else:
# print(rootnode.ChildrenToString())
return sorted(rootnode.childNodes, key=lambda c: c.visits)[
-1].move # return the move that was most visited
def HumanPlayer(state):
moves = state.GetMoves()
while True:
try:
m = int(input("Your move " + str(moves) + " : "))
if m in moves:
return m
except ValueError:
pass
def RandomPlayer(state):
return random.choice(state.GetMoves())
def negamax(board, color, depth): # ##################################################
moves = board.GetMoves()
if not moves:
x = board.GetResult(board.playerJustMoved)
if x == 0.0:
print('negamax ERROR:')
print(board)
print(board.playerJustMoved)
print(board.curr, board.ended)
print(board.GetMoves())
raise ValueError
if x == 0.5:
return 0.0, None
if color == 1 and board.playerJustMoved == 1:
return 1.0, None
else:
return -1.0, None
if depth == 0:
return 0.0, None
v = float("-inf")
best_move = []
for m in moves:
new_board = board.Clone()
new_board.DoMove(m)
x, _ = negamax(new_board, -color, depth - 1)
x = - x
if x >= v:
if x > v:
best_move = []
v = x
best_move.append(m)
if depth >=8:
print(depth, v, best_move)
return v, best_move
def NegamaxPlayer(game):
best_moves = game.GetMoves()
if len(best_moves) != 9:
_, best_moves = negamax(game, 1, 4)
print(best_moves)
return random.choice(best_moves)
if __name__ == "__main__":
def main():
random.seed(123456789)
won = 0
lost = 0
draw = 0
for i in range(10):
# state = OXOState() # uncomment to play OXO
state = BigGameState()
move = 0
while (state.GetMoves() != []):
if state.playerJustMoved == 1:
# m = RandomPlayer(state)
m = UCT(rootstate=state, itermax=100, verbose=False)
else:
# m = UCT(rootstate=state, itermax=100, verbose=False)
# m = NegamaxPlayer(state)
m = HumanPlayer(state)
# m = RandomPlayer(state)
state.DoMove(m)
move += 1
print('Game ', i + 1, ', Move ', move, ':\n', state, sep='')
if state.GetResult(1) == 1.0:
won += 1
print("Player 1 wins!")
elif state.GetResult(1) == 0.0:
lost += 1
print("Player 2 wins!")
else:
draw += 1
print("Nobody wins!")
print('won', won, 'lost', lost, 'draw', draw)
start_time = time.perf_counter()
main()
total_time = time.perf_counter() - start_time
print('total_time', total_time)
一般来说,是的,alpha-beta 修剪 (a-b) 会提高回合制搜索的性能。然而,MCTS 有不同的方法。 a-b 通常取决于在应用时进行合理准确的电路板评估。 MCTS将一个个随机选择的分支推送到它的结论,然后根据每个节点的决策权重向上传播结果。
如果您想在做出 向下 决定时应用修剪,您可以这样做,但不是 添加 a-b,你混合了这两种方法。我不知道这种改进是否值得付出额外的编写和调试努力。
如 MCTS 站点上的性能警告中所述,如果您添加特定领域的知识,您将获得 巨大 的改进。对于九个棋盘中的每一个,只考虑已知最佳的移动。普通井字游戏的优先级是
- 如果可能的话,采取制胜法宝。
- 阻止对手获胜(如果可用)。
- 取中心方块。
- 取一个角方块(随机选择)。
- 取一个边格(随机选择)。
这套启发式绝不会输掉一场正常的比赛。它可以作为您的起点。添加代码以正确识别获胜后,您的程序将加速很多。
感谢 Alexander Lopatin 记住了 "never" 声明中的致命缺陷。我脸都红了,因为我已经教过这个案例至少三门课程:
- - - - - - -
亚历山大写道:
由于格式限制,我无法将其放在评论中。这是该策略输给 minimax 的一场比赛:
0 1 2 3 4 5 6 7
--- --- --- --- X-- X-- X-X X-X
--- --- -X- -XO -XO -XO -XO -XO
--- -O- -O- -O- -O- -OO -OO OOO
X 在第 4 步中随机取了一个错误的角方块。该算法应该在这一步中对抗可能的叉子。
- - - - - - -
再次修剪:
更糟糕的是,如果你知道 AI 正在使用我给出的算法进行游戏,你可以强制 获胜,而不是依赖于随机选择角球:
0 1 2 3 4 5
--- --- --- --O X-O X-O
--- --- -X- -X- -X- -X-
--- O-- O-- O-- O-- O-O ... and 'X' cannot block both winning moves.
在第 4 步,AI 需要看到即将到来的叉子,并通过任何一边移动(任一角都输)避开它,这将导致平局。