Tic Tac Toe 的 Minimax 算法 Python
Minimax algorithm for Tic Tac Toe Python
我有点理解 minimax 算法如何用于 Tic Tac Toe python 但我不知道如何在 Python 中实际编码...这就是我目前所知道的:
from copy import deepcopy
class TicTacToeBrain :
def __init__(self, player = "x") :
self._squares = {}
self._copySquares = {}
self._winningCombos = (
[0, 1, 2], [3, 4, 5], [6, 7, 8],
[0, 3, 6], [1, 4, 7], [2, 5, 8],
[0, 4, 8], [2, 4, 6])
def createBoard(self) :
for i in range(9) :
self._squares[i] = None
print(self._squares)
def showBoard(self) :
print(self._squares[0], self._squares[1], self._squares[2])
print(self._squares[3], self._squares[4], self._squares[5])
print(self._squares[6], self._squares[7], self._squares[8])
def getAvailableMoves(self) :
self._availableMoves = []
for i in range(9) :
if self._squares[i] == None :
self._availableMoves.append(i)
return self._availableMoves
def makeMove(self, position, player) :
self._squares[position] = player
self.showBoard()
def complete(self) :
if None not in self._squares.values() :
return True
if self.getWinner() != None :
return True
return False
def getWinner(self) :
for player in ("x", "o") :
for combos in self._winningCombos :
if self._squares[combos[0]] == player and self._squares[combos[1]] == player and self._squares[combos[2]] == player :
return player
if None not in self._squares.values() :
return "tie"
return None
def getEnemyPlayer(self, player) :
if player == "x" :
return "o"
return "x"
def minimax(self, node, player, depth = 0, first = True) :
if first :
best = 0
self._copySquares = deepcopy(self._squares)
if node.complete() :
if node.getWinner() == "x" :
self._squares = self._copySquares
return -1 - depth
elif node.getWinner() == "tie" :
self._squares = self._copySquares
return 0
elif node.getWinner() == "o" :
self._squares = self._copySquares
return 1 + depth
best = None
for move in node.getAvailableMoves() :
depth += 1
node.makeMove(move, player)
print()
val = self.minimax(node, node.getEnemyPlayer(player), depth, first = False)
print(val)
if player == "o" :
if val > best :
best = val
else :
if val < best :
best = val
return best
print()
print()
def printCopy(self) :
print(self._copySquares)
然而,它从来没有打印出所有的场景....请有人帮忙!!!这是周一的一个项目..
一些问题:
执行在第一次迭代时以 return
跳出 for
循环:这是不成熟的,因为您永远无法测试任何其他可用的移动。 return
应该发生在 之后 循环。
在for
循环的每次迭代中递增深度值是错误的。相反,将 depth+1
传递给递归调用,这样当你从那里 return 时,你会在相同的深度继续。
在递归调用之前完成的移动,必须在它之后立即收回,否则 for
循环的下一次迭代将不会从相同的位置开始。
best
的值需要在 每次 调用 minimax 方法时初始化,而不仅仅是在递归树的顶部。这个初始值不应该是0,因为当前用户的最佳值可能比0还差。所以你需要将它初始化为一个极差的值。
minimax方法不return最好的着法,只有评估值。由于该方法的全部目的是告诉您应该下哪一步,因此您需要两者。因此,让方法 return 具有两个值的元组:评估值和生成该值的移动。
一些非关键问题:
因为你想延迟不可避免的损失,或者加速强制获胜,所以计算玩家获胜时的价值的公式应该越远越接近0,而不是更近了。因此需要对该公式进行更改。
既然你应该通过撤退来恢复棋盘,就没有必要使用重复的棋盘和重复的方块。如果一切都很好地编码,在 minimax 方法的顶部调用完成后,板应该处于与调用之前完全相同的状态。
当您不使用 None
表示空方块,而是使用单个字符(如“.”)时,电路板的打印效果会更好。因此,在您引用空方块值的任何地方,都放置该字符。
为了分隔输出,这里和那里有 print()
。将一个放在方法 showBoard
中,您的其余代码可以没有它们。
鉴于以上几点,您不需要 node
也不需要 minimax
方法的 first
参数。
这是一个经过注释、更正的版本。我保留了您的原始台词,但在需要的地方将其注释掉。
# *** not needed:
# from copy import deepcopy
class TicTacToeBrain :
def __init__(self, player = "x") :
self._squares = {}
self._copySquares = {}
self._winningCombos = (
[0, 1, 2], [3, 4, 5], [6, 7, 8],
[0, 3, 6], [1, 4, 7], [2, 5, 8],
[0, 4, 8], [2, 4, 6])
def createBoard(self) :
for i in range(9) :
# *** use a single character, ... easier to print
self._squares[i] = "."
print(self._squares)
def showBoard(self) :
# *** add empty line here, instead of in minimax
print ()
print(self._squares[0], self._squares[1], self._squares[2])
print(self._squares[3], self._squares[4], self._squares[5])
print(self._squares[6], self._squares[7], self._squares[8])
def getAvailableMoves(self) :
self._availableMoves = []
for i in range(9) :
# *** see above
if self._squares[i] == "." :
self._availableMoves.append(i)
return self._availableMoves
def makeMove(self, position, player) :
self._squares[position] = player
self.showBoard()
def complete(self) :
# *** see above
if "." not in self._squares.values() :
return True
if self.getWinner() != None :
return True
return False
def getWinner(self) :
for player in ("x", "o") :
for combos in self._winningCombos :
if self._squares[combos[0]] == player and self._squares[combos[1]] == player and self._squares[combos[2]] == player :
return player
# *** see above
if "." not in self._squares.values() :
return "tie"
return None
def getEnemyPlayer(self, player) :
if player == "x" :
return "o"
return "x"
# *** no need for `node` argument, nor `first`
# *** use `self` instead of `node` in all this method
def minimax(self, player, depth = 0) :
# *** not needed
# if first :
# best = 0
# *** not needed
# self._copySquares = deepcopy(self._squares)
# *** always start with initilisation of `best`, but with worst possible value
# for this player
if player == "o":
best = -10
else:
best = 10
if self.complete() :
if self.getWinner() == "x" :
# *** don't do this, you may still need the position to try other moves
# self._squares = self._copySquares
# *** value should be closer to zero for greater depth!
# *** expect tuple return value
return -10 + depth, None
elif self.getWinner() == "tie" :
# self._squares = self._copySquares
# *** expect tuple return value
return 0, None
elif self.getWinner() == "o" :
# self._squares = self._copySquares
# *** value should be closer to zero for greater depth!
# *** expect tuple return value
return 10 - depth, None
# *** Execution can never get here
# best = None
for move in self.getAvailableMoves() :
# *** don't increase depth in each iteration, instead pass depth+1 to
# the recursive call
# depth += 1
self.makeMove(move, player)
# *** pass depth+1, no need for passing `node` nor `first`.
# *** expect tuple return value
val, _ = self.minimax(self.getEnemyPlayer(player), depth+1)
print(val)
# *** undo last move
self.makeMove(move, ".")
if player == "o" :
if val > best :
# *** Also keep track of the actual move
best, bestMove = val, move
else :
if val < best :
# *** Also keep track of the actual move
best, bestMove = val, move
# *** don't interrupt the loop here!
# return best
# *** this is dead code:
# print()
# print()
# *** Also keep track of the actual move
return best, bestMove
def printCopy(self) :
print(self._copySquares)
这是一个关于如何使用 class:
的例子
game = TicTacToeBrain()
game.createBoard()
game.makeMove(4, "o")
game.makeMove(3, "x")
val, bestMove = game.minimax("o")
print('best move', bestMove) # --> 0 is a winning move.
在eval.in上看到运行...等待它。
有些事情您仍然可以改进
我不会为此提供代码,但您可以:
跟踪 self.player
轮到谁了。这样你就不必将玩家作为参数传递给 minimax,这样可以避免错误。它还使构造函数参数变得有用——目前你不需要对它做任何事情。
添加一个方法bestMove
,它将简单地调用minimax
,但只会 return最好的一步,不是价值。这样会更容易管理。
使用 alpha-beta p运行ing,这样你就可以停止评估其他动作,当很明显你无法提高递归树中已经达到的值时。
我有点理解 minimax 算法如何用于 Tic Tac Toe python 但我不知道如何在 Python 中实际编码...这就是我目前所知道的:
from copy import deepcopy
class TicTacToeBrain :
def __init__(self, player = "x") :
self._squares = {}
self._copySquares = {}
self._winningCombos = (
[0, 1, 2], [3, 4, 5], [6, 7, 8],
[0, 3, 6], [1, 4, 7], [2, 5, 8],
[0, 4, 8], [2, 4, 6])
def createBoard(self) :
for i in range(9) :
self._squares[i] = None
print(self._squares)
def showBoard(self) :
print(self._squares[0], self._squares[1], self._squares[2])
print(self._squares[3], self._squares[4], self._squares[5])
print(self._squares[6], self._squares[7], self._squares[8])
def getAvailableMoves(self) :
self._availableMoves = []
for i in range(9) :
if self._squares[i] == None :
self._availableMoves.append(i)
return self._availableMoves
def makeMove(self, position, player) :
self._squares[position] = player
self.showBoard()
def complete(self) :
if None not in self._squares.values() :
return True
if self.getWinner() != None :
return True
return False
def getWinner(self) :
for player in ("x", "o") :
for combos in self._winningCombos :
if self._squares[combos[0]] == player and self._squares[combos[1]] == player and self._squares[combos[2]] == player :
return player
if None not in self._squares.values() :
return "tie"
return None
def getEnemyPlayer(self, player) :
if player == "x" :
return "o"
return "x"
def minimax(self, node, player, depth = 0, first = True) :
if first :
best = 0
self._copySquares = deepcopy(self._squares)
if node.complete() :
if node.getWinner() == "x" :
self._squares = self._copySquares
return -1 - depth
elif node.getWinner() == "tie" :
self._squares = self._copySquares
return 0
elif node.getWinner() == "o" :
self._squares = self._copySquares
return 1 + depth
best = None
for move in node.getAvailableMoves() :
depth += 1
node.makeMove(move, player)
print()
val = self.minimax(node, node.getEnemyPlayer(player), depth, first = False)
print(val)
if player == "o" :
if val > best :
best = val
else :
if val < best :
best = val
return best
print()
print()
def printCopy(self) :
print(self._copySquares)
然而,它从来没有打印出所有的场景....请有人帮忙!!!这是周一的一个项目..
一些问题:
执行在第一次迭代时以
return
跳出for
循环:这是不成熟的,因为您永远无法测试任何其他可用的移动。return
应该发生在 之后 循环。在
for
循环的每次迭代中递增深度值是错误的。相反,将depth+1
传递给递归调用,这样当你从那里 return 时,你会在相同的深度继续。在递归调用之前完成的移动,必须在它之后立即收回,否则
for
循环的下一次迭代将不会从相同的位置开始。best
的值需要在 每次 调用 minimax 方法时初始化,而不仅仅是在递归树的顶部。这个初始值不应该是0,因为当前用户的最佳值可能比0还差。所以你需要将它初始化为一个极差的值。minimax方法不return最好的着法,只有评估值。由于该方法的全部目的是告诉您应该下哪一步,因此您需要两者。因此,让方法 return 具有两个值的元组:评估值和生成该值的移动。
一些非关键问题:
因为你想延迟不可避免的损失,或者加速强制获胜,所以计算玩家获胜时的价值的公式应该越远越接近0,而不是更近了。因此需要对该公式进行更改。
既然你应该通过撤退来恢复棋盘,就没有必要使用重复的棋盘和重复的方块。如果一切都很好地编码,在 minimax 方法的顶部调用完成后,板应该处于与调用之前完全相同的状态。
当您不使用
None
表示空方块,而是使用单个字符(如“.”)时,电路板的打印效果会更好。因此,在您引用空方块值的任何地方,都放置该字符。为了分隔输出,这里和那里有
print()
。将一个放在方法showBoard
中,您的其余代码可以没有它们。鉴于以上几点,您不需要
node
也不需要minimax
方法的first
参数。
这是一个经过注释、更正的版本。我保留了您的原始台词,但在需要的地方将其注释掉。
# *** not needed:
# from copy import deepcopy
class TicTacToeBrain :
def __init__(self, player = "x") :
self._squares = {}
self._copySquares = {}
self._winningCombos = (
[0, 1, 2], [3, 4, 5], [6, 7, 8],
[0, 3, 6], [1, 4, 7], [2, 5, 8],
[0, 4, 8], [2, 4, 6])
def createBoard(self) :
for i in range(9) :
# *** use a single character, ... easier to print
self._squares[i] = "."
print(self._squares)
def showBoard(self) :
# *** add empty line here, instead of in minimax
print ()
print(self._squares[0], self._squares[1], self._squares[2])
print(self._squares[3], self._squares[4], self._squares[5])
print(self._squares[6], self._squares[7], self._squares[8])
def getAvailableMoves(self) :
self._availableMoves = []
for i in range(9) :
# *** see above
if self._squares[i] == "." :
self._availableMoves.append(i)
return self._availableMoves
def makeMove(self, position, player) :
self._squares[position] = player
self.showBoard()
def complete(self) :
# *** see above
if "." not in self._squares.values() :
return True
if self.getWinner() != None :
return True
return False
def getWinner(self) :
for player in ("x", "o") :
for combos in self._winningCombos :
if self._squares[combos[0]] == player and self._squares[combos[1]] == player and self._squares[combos[2]] == player :
return player
# *** see above
if "." not in self._squares.values() :
return "tie"
return None
def getEnemyPlayer(self, player) :
if player == "x" :
return "o"
return "x"
# *** no need for `node` argument, nor `first`
# *** use `self` instead of `node` in all this method
def minimax(self, player, depth = 0) :
# *** not needed
# if first :
# best = 0
# *** not needed
# self._copySquares = deepcopy(self._squares)
# *** always start with initilisation of `best`, but with worst possible value
# for this player
if player == "o":
best = -10
else:
best = 10
if self.complete() :
if self.getWinner() == "x" :
# *** don't do this, you may still need the position to try other moves
# self._squares = self._copySquares
# *** value should be closer to zero for greater depth!
# *** expect tuple return value
return -10 + depth, None
elif self.getWinner() == "tie" :
# self._squares = self._copySquares
# *** expect tuple return value
return 0, None
elif self.getWinner() == "o" :
# self._squares = self._copySquares
# *** value should be closer to zero for greater depth!
# *** expect tuple return value
return 10 - depth, None
# *** Execution can never get here
# best = None
for move in self.getAvailableMoves() :
# *** don't increase depth in each iteration, instead pass depth+1 to
# the recursive call
# depth += 1
self.makeMove(move, player)
# *** pass depth+1, no need for passing `node` nor `first`.
# *** expect tuple return value
val, _ = self.minimax(self.getEnemyPlayer(player), depth+1)
print(val)
# *** undo last move
self.makeMove(move, ".")
if player == "o" :
if val > best :
# *** Also keep track of the actual move
best, bestMove = val, move
else :
if val < best :
# *** Also keep track of the actual move
best, bestMove = val, move
# *** don't interrupt the loop here!
# return best
# *** this is dead code:
# print()
# print()
# *** Also keep track of the actual move
return best, bestMove
def printCopy(self) :
print(self._copySquares)
这是一个关于如何使用 class:
的例子game = TicTacToeBrain()
game.createBoard()
game.makeMove(4, "o")
game.makeMove(3, "x")
val, bestMove = game.minimax("o")
print('best move', bestMove) # --> 0 is a winning move.
在eval.in上看到运行...等待它。
有些事情您仍然可以改进
我不会为此提供代码,但您可以:
跟踪
self.player
轮到谁了。这样你就不必将玩家作为参数传递给 minimax,这样可以避免错误。它还使构造函数参数变得有用——目前你不需要对它做任何事情。添加一个方法
bestMove
,它将简单地调用minimax
,但只会 return最好的一步,不是价值。这样会更容易管理。使用 alpha-beta p运行ing,这样你就可以停止评估其他动作,当很明显你无法提高递归树中已经达到的值时。