Connect 4 Alpha-beta 修剪可能失败:(
Connect 4 Alpha-beta pruning possibly fails :(
我已经在人工智能问题上工作了一段时间,这周我尝试编写 AI 代码以将 4 与 python 连接起来。首先我在复制板时遇到了问题,但我发现在 python 中你需要使用 deepcopy 来避免复制错误。
最后我设法创建了 alpha-beta 剪枝算法并且它工作正常但后来我测试了深度 8 中的算法与深度 6 中的在线 alpha-beta 剪枝算法,令人惊讶的是我的算法丢失了。我与哈佛的导师一起创建了评估函数,并从 msavenski 的代码中修改了 alpha-beta(链接在代码中)
一些一直在处理这些问题的人能否再检查一下我的算法和评估函数是否按预期工作,因为我很确定其中有一些错误。我知道我可以使用换位表、深度迭代等来使代码更快、更有效,但我的另一个目标是保持简单。
这是我的代码:
# -*- coding: utf-8 -*-
import copy
class ConnectFour:
def __init__(self):
self.moves = 0 #The count of moves, 42 moves is equal than board is full
self.turn = 0 #Use this variable to recognize which one player turn is it
def PrintGameBoard(self, board):
print(' 0 1 2 3 4 5 6') # This function just raws a board
for i in range(5, -1, -1):
print('|---|---|---|---|---|---|---|')
print("| ",end="")
for j in range(7):
print(board[i][j],end="")
if j != 6:
print(" | ",end="")
else:
print(" |")
print('`---------------------------´')
def LegalRow(self, col, board):
stacks = [[x[i] for x in board] for i in range(len(board[0]))] # This function checks stack of given column and return the row where you can draw mark. If the stack is full return -1
countofitems = stacks[col].count("x") + stacks[col].count("o") # count of items in stack
if (countofitems) < 6:
return (countofitems)
else:
return -1
def LegalMoves(self, board):
legalmoves = []
stacks = [[x[i] for x in board] for i in range(len(board[0]))]
order = [3,2,4,1,5,0,6]
for i in order:
if self.LegalRow(i, board)!=-1:
legalmoves.append(i)
return legalmoves
def MakeMove(self, board, col, player, row):
board[row][col] = player # This function make a move and increases count of moves
self.moves += 1
return board
def UnmakeMove(self, board, col, row):
board[row][col] = " " # This function make a move and increases count of moves
self.moves -= 1
return board
def IsWinning(self, currentplayer, board):
for i in range(6): # This function returns True or False depending on if current player have four "tila" in a row (win)
for j in range(4):
if board[i][j] == currentplayer and board[i][j+1] == currentplayer and board[i][j+2] == currentplayer and board[i][j+3] == currentplayer:
return True
for i in range(3):
for j in range(7):
if board[i][j] == currentplayer and board[i+1][j] == currentplayer and board[i+2][j] == currentplayer and board[i+3][j] == currentplayer:
return True
for i in range(3):
for j in range(4):
if board[i][j] == currentplayer and board[i+1][j+1] == currentplayer and board[i+2][j+2] == currentplayer and board[i+3][j+3] == currentplayer:
return True
for i in range(3,6):
for j in range(4):
if board[i][j] == currentplayer and board[i-1][j+1] == currentplayer and board[i-2][j+2] == currentplayer and board[i-3][j+3] == currentplayer:
return True
return False
def PlayerMove(self, board, player):
allowedmove = False # This function ask players move when its his turn and returns board after making move.
while not allowedmove:
try:
print("Choose a column where you want to make your move (0-6): ",end="")
col =input()
col=int(col)
row = self.LegalRow(col, board)
except (NameError, ValueError, IndexError, TypeError, SyntaxError) as e:
print("Give a number as an integer between 0-6!")
else:
if row != -1 and (col<=6 and col>=0):
board[row][int(col)] = player
self.moves += 1
allowedmove = True
elif col>6 or col<0:
print("The range was 0-6!!!")
else:
print("This column is full")
return board
def SwitchPlayer(self, player): # This function gives opponent player's mark
if player=="x":
nextplayer="o"
else:
nextplayer="x"
return nextplayer
def evaluation(self, board): # This function evaluate gameboard (heuristic). The rules of evaluation are in site: http://isites.harvard.edu/fs/docs/icb.topic788088.files/Assignment%203/asst3c.pdf
list = []
player = "x"
opponent = "o"
if self.IsWinning(player, board):
score = -512
elif self.IsWinning(opponent, board):
score = +512
elif self.moves==42:
score=0
else:
score = 0
for i in range(6): #append to list horizontal segments
for j in range(4):
list.append([str(board[i][j]),str(board[i][j+1]),str(board[i][j+2]),str(board[i][j+3])])
for i in range(3): #append to list vertical segments
for j in range(7):
list.append([str(board[i][j]),str(board[i+1][j]),str(board[i+2][j]),str(board[i+3][j])])
for i in range(3): #append to list diagonal segments
for j in range(4):
list.append([str(board[i][j]),str(board[i+1][j+2]),str(board[i+2][j+2]),str(board[i+3][j+3])])
for i in range(3, 6): #append to list diagonal segments
for j in range(4):
list.append([str(board[i][j]),str(board[i-1][j+2]),str(board[i-2][j+2]),str(board[i-3][j+3])])
for k in range(len(list)): #add to score with rules of site above
if ((list[k].count(str("x"))>0) and (list[k].count(str("o"))>0)) or list[k].count(" ")==4:
score += 0
if list[k].count(player)==1 and list[k].count(opponent)==0:
score -= 1
if list[k].count(player)==2 and list[k].count(opponent)==0:
score -= 10
if list[k].count(player)==3 and list[k].count(opponent)==0:
score -= 50
if list[k].count(opponent)==1 and list[k].count(player)==0:
score += 1
if list[k].count(opponent)==2 and list[k].count(player)==0:
score += 10
if list[k].count(opponent)==3 and list[k].count(player)==0:
score += 50
if self.turn==player:
score -= 16
else:
score += 16
return score
def maxfunction(self, board, depth, player, alpha, beta):
opponent = self.SwitchPlayer(player)
self.turn = opponent
legalmoves = self.LegalMoves(board)
if (depth==0) or self.moves==42:
return self.evaluation(board)
value=-1000000000
for col in legalmoves:
row = self.LegalRow(col, board)
newboard = self.MakeMove(board, col, opponent, row)
value = max(value, self.minfunction(board, depth-1, opponent,alpha, beta))
newboard = self.UnmakeMove(board, col, row)
if value >= beta:
return value
alpha = max(alpha, value)
return value
def minfunction(self, board, depth, opponent, alpha, beta):
player = self.SwitchPlayer(opponent)
self.turn = player
legalmoves = self.LegalMoves(board)
if (depth==0) or self.moves==42:
return evaluation(board)
value=1000000000
for col in legalmoves:
row = self.LegalRow(col, board)
newboard = self.MakeMove(board, col, player, row)
value = min(value, self.maxfunction(board, depth-1, player ,alpha, beta))
newboard = self.UnmakeMove(board, col, row)
if value <= alpha:
return value
beta = min(beta, value)
return value
def alphabetapruning(self, board, depth, opponent, alpha, beta): #This is the alphabeta-function modified from: https://github.com/msaveski/connect-four
values = []
cols = []
value = -1000000000
for col in self.LegalMoves(board):
row = self.LegalRow(col, board)
board = self.MakeMove(board, col, opponent, row)
value = max(value, self.minfunction(board, depth-1, opponent, alpha, beta))
values.append(value)
cols.append(col)
board = self.UnmakeMove(board, col, row)
largestvalue= max(values)
print(cols)
print(values)
for i in range(len(values)):
if largestvalue==values[i]:
position = cols[i]
return largestvalue, position
def searchingfunction(self, board, depth, opponent):
#This function update turn to opponent and calls alphabeta (main algorithm) and after that update new board (add alphabeta position to old board) and returns new board.
newboard = copy.deepcopy(board)
value, position=self.alphabetapruning(newboard, depth, opponent, -10000000000, 10000000000)
board = self.MakeMove(board, position, opponent, self.LegalRow(position, board))
return board
def PlayerGoesFirst():
print("Player is X and AI is O") #This function just ask who goes first
player = 'x'
opponent = 'o'
print('Do you want to play first? (y/n) : ',end="")
return input().lower().startswith('y')
def PlayAgain():
print('Do you want to play again? (y/n) :',end="") #This function ask if player want to play new game
return input().lower().startswith('y')
def main():
print("Connect4") #The main function. This ask player mark, initialize gameboard (table), print board after each turn, ask players move, make AI's move and checks after each move is game is tie/win or lose.
print("-"*34)
while True:
connectfour = ConnectFour()
gameisgoing = True
table = [[],[],[],[],[],[]]
for i in range(7):
for j in range(6):
table[j].append(" ")
player = "x"
opponent = "o"
if PlayerGoesFirst():
turn = "x"
else:
turn = "o"
while gameisgoing:
connectfour.PrintGameBoard(table)
if turn=="x":
table = connectfour.PlayerMove(table, player)
if connectfour.IsWinning(player, table):
connectfour.PrintGameBoard(table)
print('You won the game!')
gameisgoing = False
else:
if connectfour.moves==42:
connectfour.PrintGameBoard(table)
print('Game is tie')
gameisgoing=False
else:
turn = "o"
else:
table = connectfour.searchingfunction(table, 6, opponent) #Here is AI's move. Takes as input current table (board), depth and opponents mark. Output should be new gameboard with AI's move.
if connectfour.IsWinning(opponent, table):
connectfour.PrintGameBoard(table)
print('Computer won the game')
gameisgoing = False
else:
if connectfour.moves==42:
connectfour.PrintGameBoard(table)
print('Game is tie')
gameisgoing=False
else:
turn = "x"
if not PlayAgain():
print("Game ended")
print("-"*34)
break
if __name__ == '__main__':
main()
该实现看起来不错,而且它的着法也很合理。
很难仅从一场比赛中对比赛强度做出假设。您需要 运行 更多游戏才能获得可靠的统计数据。例如,通过改变开始位置并让两个引擎分别播放一次 'X' 和一次 '0'.
其中也有一定的运气因素。当搜索深度增加时,Alpha-beta 通常会带来更好的游戏体验。然而,在某些情况下,它可能最终会下一个在更浅的搜索中不会下的坏棋。
不同的评价函数也有这个作用。即使一个评估函数更差,但在某些位置,糟糕的评估函数会导致更好的移动。
只有当你玩了足够多的游戏或者你的搜索非常深入以至于它可以看到所有变化到最后时,运气因素才会消失。 Connect 4 是一款解谜游戏。如果算法是完美的,那么第一个移动的玩家将永远获胜。这是指出搜索错误的唯一 objective 方法。启发式算法,就像您的评估函数一样,总会有弱点。
我已经在人工智能问题上工作了一段时间,这周我尝试编写 AI 代码以将 4 与 python 连接起来。首先我在复制板时遇到了问题,但我发现在 python 中你需要使用 deepcopy 来避免复制错误。
最后我设法创建了 alpha-beta 剪枝算法并且它工作正常但后来我测试了深度 8 中的算法与深度 6 中的在线 alpha-beta 剪枝算法,令人惊讶的是我的算法丢失了。我与哈佛的导师一起创建了评估函数,并从 msavenski 的代码中修改了 alpha-beta(链接在代码中)
一些一直在处理这些问题的人能否再检查一下我的算法和评估函数是否按预期工作,因为我很确定其中有一些错误。我知道我可以使用换位表、深度迭代等来使代码更快、更有效,但我的另一个目标是保持简单。
这是我的代码:
# -*- coding: utf-8 -*-
import copy
class ConnectFour:
def __init__(self):
self.moves = 0 #The count of moves, 42 moves is equal than board is full
self.turn = 0 #Use this variable to recognize which one player turn is it
def PrintGameBoard(self, board):
print(' 0 1 2 3 4 5 6') # This function just raws a board
for i in range(5, -1, -1):
print('|---|---|---|---|---|---|---|')
print("| ",end="")
for j in range(7):
print(board[i][j],end="")
if j != 6:
print(" | ",end="")
else:
print(" |")
print('`---------------------------´')
def LegalRow(self, col, board):
stacks = [[x[i] for x in board] for i in range(len(board[0]))] # This function checks stack of given column and return the row where you can draw mark. If the stack is full return -1
countofitems = stacks[col].count("x") + stacks[col].count("o") # count of items in stack
if (countofitems) < 6:
return (countofitems)
else:
return -1
def LegalMoves(self, board):
legalmoves = []
stacks = [[x[i] for x in board] for i in range(len(board[0]))]
order = [3,2,4,1,5,0,6]
for i in order:
if self.LegalRow(i, board)!=-1:
legalmoves.append(i)
return legalmoves
def MakeMove(self, board, col, player, row):
board[row][col] = player # This function make a move and increases count of moves
self.moves += 1
return board
def UnmakeMove(self, board, col, row):
board[row][col] = " " # This function make a move and increases count of moves
self.moves -= 1
return board
def IsWinning(self, currentplayer, board):
for i in range(6): # This function returns True or False depending on if current player have four "tila" in a row (win)
for j in range(4):
if board[i][j] == currentplayer and board[i][j+1] == currentplayer and board[i][j+2] == currentplayer and board[i][j+3] == currentplayer:
return True
for i in range(3):
for j in range(7):
if board[i][j] == currentplayer and board[i+1][j] == currentplayer and board[i+2][j] == currentplayer and board[i+3][j] == currentplayer:
return True
for i in range(3):
for j in range(4):
if board[i][j] == currentplayer and board[i+1][j+1] == currentplayer and board[i+2][j+2] == currentplayer and board[i+3][j+3] == currentplayer:
return True
for i in range(3,6):
for j in range(4):
if board[i][j] == currentplayer and board[i-1][j+1] == currentplayer and board[i-2][j+2] == currentplayer and board[i-3][j+3] == currentplayer:
return True
return False
def PlayerMove(self, board, player):
allowedmove = False # This function ask players move when its his turn and returns board after making move.
while not allowedmove:
try:
print("Choose a column where you want to make your move (0-6): ",end="")
col =input()
col=int(col)
row = self.LegalRow(col, board)
except (NameError, ValueError, IndexError, TypeError, SyntaxError) as e:
print("Give a number as an integer between 0-6!")
else:
if row != -1 and (col<=6 and col>=0):
board[row][int(col)] = player
self.moves += 1
allowedmove = True
elif col>6 or col<0:
print("The range was 0-6!!!")
else:
print("This column is full")
return board
def SwitchPlayer(self, player): # This function gives opponent player's mark
if player=="x":
nextplayer="o"
else:
nextplayer="x"
return nextplayer
def evaluation(self, board): # This function evaluate gameboard (heuristic). The rules of evaluation are in site: http://isites.harvard.edu/fs/docs/icb.topic788088.files/Assignment%203/asst3c.pdf
list = []
player = "x"
opponent = "o"
if self.IsWinning(player, board):
score = -512
elif self.IsWinning(opponent, board):
score = +512
elif self.moves==42:
score=0
else:
score = 0
for i in range(6): #append to list horizontal segments
for j in range(4):
list.append([str(board[i][j]),str(board[i][j+1]),str(board[i][j+2]),str(board[i][j+3])])
for i in range(3): #append to list vertical segments
for j in range(7):
list.append([str(board[i][j]),str(board[i+1][j]),str(board[i+2][j]),str(board[i+3][j])])
for i in range(3): #append to list diagonal segments
for j in range(4):
list.append([str(board[i][j]),str(board[i+1][j+2]),str(board[i+2][j+2]),str(board[i+3][j+3])])
for i in range(3, 6): #append to list diagonal segments
for j in range(4):
list.append([str(board[i][j]),str(board[i-1][j+2]),str(board[i-2][j+2]),str(board[i-3][j+3])])
for k in range(len(list)): #add to score with rules of site above
if ((list[k].count(str("x"))>0) and (list[k].count(str("o"))>0)) or list[k].count(" ")==4:
score += 0
if list[k].count(player)==1 and list[k].count(opponent)==0:
score -= 1
if list[k].count(player)==2 and list[k].count(opponent)==0:
score -= 10
if list[k].count(player)==3 and list[k].count(opponent)==0:
score -= 50
if list[k].count(opponent)==1 and list[k].count(player)==0:
score += 1
if list[k].count(opponent)==2 and list[k].count(player)==0:
score += 10
if list[k].count(opponent)==3 and list[k].count(player)==0:
score += 50
if self.turn==player:
score -= 16
else:
score += 16
return score
def maxfunction(self, board, depth, player, alpha, beta):
opponent = self.SwitchPlayer(player)
self.turn = opponent
legalmoves = self.LegalMoves(board)
if (depth==0) or self.moves==42:
return self.evaluation(board)
value=-1000000000
for col in legalmoves:
row = self.LegalRow(col, board)
newboard = self.MakeMove(board, col, opponent, row)
value = max(value, self.minfunction(board, depth-1, opponent,alpha, beta))
newboard = self.UnmakeMove(board, col, row)
if value >= beta:
return value
alpha = max(alpha, value)
return value
def minfunction(self, board, depth, opponent, alpha, beta):
player = self.SwitchPlayer(opponent)
self.turn = player
legalmoves = self.LegalMoves(board)
if (depth==0) or self.moves==42:
return evaluation(board)
value=1000000000
for col in legalmoves:
row = self.LegalRow(col, board)
newboard = self.MakeMove(board, col, player, row)
value = min(value, self.maxfunction(board, depth-1, player ,alpha, beta))
newboard = self.UnmakeMove(board, col, row)
if value <= alpha:
return value
beta = min(beta, value)
return value
def alphabetapruning(self, board, depth, opponent, alpha, beta): #This is the alphabeta-function modified from: https://github.com/msaveski/connect-four
values = []
cols = []
value = -1000000000
for col in self.LegalMoves(board):
row = self.LegalRow(col, board)
board = self.MakeMove(board, col, opponent, row)
value = max(value, self.minfunction(board, depth-1, opponent, alpha, beta))
values.append(value)
cols.append(col)
board = self.UnmakeMove(board, col, row)
largestvalue= max(values)
print(cols)
print(values)
for i in range(len(values)):
if largestvalue==values[i]:
position = cols[i]
return largestvalue, position
def searchingfunction(self, board, depth, opponent):
#This function update turn to opponent and calls alphabeta (main algorithm) and after that update new board (add alphabeta position to old board) and returns new board.
newboard = copy.deepcopy(board)
value, position=self.alphabetapruning(newboard, depth, opponent, -10000000000, 10000000000)
board = self.MakeMove(board, position, opponent, self.LegalRow(position, board))
return board
def PlayerGoesFirst():
print("Player is X and AI is O") #This function just ask who goes first
player = 'x'
opponent = 'o'
print('Do you want to play first? (y/n) : ',end="")
return input().lower().startswith('y')
def PlayAgain():
print('Do you want to play again? (y/n) :',end="") #This function ask if player want to play new game
return input().lower().startswith('y')
def main():
print("Connect4") #The main function. This ask player mark, initialize gameboard (table), print board after each turn, ask players move, make AI's move and checks after each move is game is tie/win or lose.
print("-"*34)
while True:
connectfour = ConnectFour()
gameisgoing = True
table = [[],[],[],[],[],[]]
for i in range(7):
for j in range(6):
table[j].append(" ")
player = "x"
opponent = "o"
if PlayerGoesFirst():
turn = "x"
else:
turn = "o"
while gameisgoing:
connectfour.PrintGameBoard(table)
if turn=="x":
table = connectfour.PlayerMove(table, player)
if connectfour.IsWinning(player, table):
connectfour.PrintGameBoard(table)
print('You won the game!')
gameisgoing = False
else:
if connectfour.moves==42:
connectfour.PrintGameBoard(table)
print('Game is tie')
gameisgoing=False
else:
turn = "o"
else:
table = connectfour.searchingfunction(table, 6, opponent) #Here is AI's move. Takes as input current table (board), depth and opponents mark. Output should be new gameboard with AI's move.
if connectfour.IsWinning(opponent, table):
connectfour.PrintGameBoard(table)
print('Computer won the game')
gameisgoing = False
else:
if connectfour.moves==42:
connectfour.PrintGameBoard(table)
print('Game is tie')
gameisgoing=False
else:
turn = "x"
if not PlayAgain():
print("Game ended")
print("-"*34)
break
if __name__ == '__main__':
main()
该实现看起来不错,而且它的着法也很合理。
很难仅从一场比赛中对比赛强度做出假设。您需要 运行 更多游戏才能获得可靠的统计数据。例如,通过改变开始位置并让两个引擎分别播放一次 'X' 和一次 '0'.
其中也有一定的运气因素。当搜索深度增加时,Alpha-beta 通常会带来更好的游戏体验。然而,在某些情况下,它可能最终会下一个在更浅的搜索中不会下的坏棋。
不同的评价函数也有这个作用。即使一个评估函数更差,但在某些位置,糟糕的评估函数会导致更好的移动。
只有当你玩了足够多的游戏或者你的搜索非常深入以至于它可以看到所有变化到最后时,运气因素才会消失。 Connect 4 是一款解谜游戏。如果算法是完美的,那么第一个移动的玩家将永远获胜。这是指出搜索错误的唯一 objective 方法。启发式算法,就像您的评估函数一样,总会有弱点。