为什么我的 tic tac toe minimax 算法不起作用?

Why does my tic tac toe minimax algorithm not work?

很抱歉像这样转储我的代码,但过去几个小时我一直在努力找出我在 python 中的 minimax 算法哪里出了问题。非常感谢任何帮助!

inf = 1000
bo = [["x", "o", "o"],
      [" ", "o", " "],
      [" ", "x", "x"]]    

def bestMove(board):
    bestScore = -inf
    bestMove = None
    for i in range(3):
       for j in range(3):
          if(board[i][j]==" "):
            board[i][j]=getTurn(board)
            score = minimax(board, searchdepth, True)
            board[i][j]=" "
            if score > bestScore:
                bestScore = score
                bestMove = [i, j]
print("\n\n\n")
return bestMove

searchdepth = 10
def minimax(node, depth, maxP):
    resultat = win(node)
    if resultat=="x": return 1
    if resultat=="o": return -1
    if resultat=="tie": return 0
    if depth == 0: return 0

if maxP==True:
    value = -inf
    for i in range(3):
        for j in range(3):
            if node[i][j] == " ":
                node[i][j] = getTurn(node)
                newval = minimax(node, depth - 1, False)
                node[i][j] = " "
                value = max(newval, value)
    return value
if maxP==False:
    value = inf
    for i in range(3):
        for j in range(3):
            if node[i][j] == " ":
                node[i][j] = getTurn(node)
                newval = minimax(node, depth - 1, True)
                node[i][j] = " "
                value = min(newval, value)
    return value
print(bestMove(bo))

输出:[1, 0] 预期输出:[2, 0]

我提供了 getTurnwin 的版本,因为你没有提供它们,我修复了下面的缩进问题,并打印出 [2,0].

inf = 1000
bo = [[" ", " ", " "],
      [" ", " ", " "],
      [" ", " ", " "]]    

def getTurn(board):
    x = sum(n == 'x' for row in bo for n in row)
    o = sum(n == 'o' for row in bo for n in row)
    return 'x' if x == o else 'o'

def win(board):
    for i in range(3):
        if board[i][0]==board[i][1]==board[i][2] and board[i][0] != ' ':
            return board[i][0]
        if board[0][i]==board[1][i]==board[2][i] and board[0][i] != ' ':
            return board[0][i]
    if board[0][0]==board[1][1]==board[2][2] and board[0][0] != ' ':
        return board[0][0]
    if board[0][2]==board[1][1]==board[2][0] and board[0][2] != ' ':
        return board[0][2]
    if not any( n == ' ' for row in board for n in row ):
        return 'tie'
    return None

def bestMove(board):
    bestScore = -inf
    bestMove = None
    for i in range(3):
       for j in range(3):
          if(board[i][j]==" "):
            board[i][j]=getTurn(board)
            score = minimax(board, searchdepth, True)
            board[i][j]=" "
            if score > bestScore:
                bestScore = score
                bestMove = [i, j]
    return bestMove

searchdepth = 10

def minimax(node, depth, maxP):
    resultat = win(node)
    if resultat=="x": return 1
    if resultat=="o": return -1
    if resultat=="tie": return 0
    if depth == 0: return 0

    if maxP:
        value = -inf
        for i in range(3):
            for j in range(3):
                if node[i][j] == " ":
                    node[i][j] = getTurn(node)
                    newval = minimax(node, depth - 1, False)
                    node[i][j] = " "
                    value = max(newval, value)
    else:
        value = inf
        for i in range(3):
            for j in range(3):
                if node[i][j] == " ":
                    node[i][j] = getTurn(node)
                    newval = minimax(node, depth - 1, True)
                    node[i][j] = " "
                    value = min(newval, value)

    return value

while not win(bo):
    t = getTurn(bo)
    m = bestMove(bo)
    bo[m[0]][m[1]] = t

print(bo)
print("Result:",win(bo))

你总是发送 1 以防 'X' 获胜,这是不正确的。这意味着如果是 O:s 回合,它会认为 X 获胜是一件好事。最简单的方法是根据轮到谁给出不同的值,即如果您自己获胜则得分为 1,如果对手获胜则得分为 -1,如果平局则得分为 0。