2048 场比赛 - AI 平均得分不能超过 256
2048 game - AI can't score more that 256 average
我正在尝试基于蛇策略(参见 this 论文)使用 MiniMax 和 Alpha-Beta 修剪实现 2048 年的 AI,这似乎是最好的单一启发式算法。
不幸的是,AI 在大多数游戏中的得分为 256,这并不比空单元格启发式算法好多少。我已经在这里阅读了相关主题,但我自己找不到解决方案。
代码如下:
import math
from BaseAI_3 import BaseAI
INF_P = math.inf
class PlayerAI(BaseAI):
move_str = {
0: "UP",
1: "DOWN",
2: "LEFT",
3: "RIGHT"
}
def __init__(self):
super().__init__()
self.depth_max = 4
def getMove(self, grid):
move_direction, state, utility = self.decision(grid)
act_move = moves.index(move_direction)
return moves[act_move] if moves else None
def get_children(self, grid):
grid.children = []
for move_direction in grid.getAvailableMoves():
gridCopy = grid.clone()
gridCopy.path = grid.path[:]
gridCopy.path.append(PlayerAI.move_str[move_direction])
gridCopy.move(move_direction)
gridCopy.depth_current = grid.depth_current + 1
grid.children.append((move_direction, gridCopy))
return grid.children
def utility(self, state):
def snake():
poses = [
[
[2 ** 15, 2 ** 14, 2 ** 13, 2 ** 12],
[2 ** 8, 2 ** 9, 2 ** 10, 2 ** 11],
[2 ** 7, 2 ** 6, 2 ** 5, 2 ** 4],
[2 ** 0, 2 ** 1, 2 ** 2, 2 ** 3]
]
,
[
[2 ** 15, 2 ** 8, 2 ** 7, 2 ** 0],
[2 ** 14, 2 ** 9, 2 ** 6, 2 ** 1],
[2 ** 13, 2 ** 10, 2 ** 5, 2 ** 2],
[2 ** 12, 2 ** 11, 2 ** 4, 2 ** 3]
]
]
poses.append([item for item in reversed(poses[0])])
poses.append([list(reversed(item)) for item in reversed(poses[0])])
poses.append([list(reversed(item)) for item in poses[0]])
poses.append([item for item in reversed(poses[1])])
poses.append([list(reversed(item)) for item in reversed(poses[1])])
poses.append([list(reversed(item)) for item in poses[1]])
max_value = -INF_P
for pos in poses:
value = 0
for i in range(state.size):
for j in range(state.size):
value += state.map[i][j] * pos[i][j]
if value > max_value:
max_value = value
return max_value
weight_snake = 1 / (2 ** 13)
value = (
weight_snake * snake(),
)
return value
def decision(self, state):
state.depth_current = 1
state.path = []
return self.maximize(state, -INF_P, INF_P)
def terminal_state(self, state):
return state.depth_current >= self.depth_max
def maximize(self, state, alpha, beta):
# terminal-state check
if self.terminal_state(state):
return (None, state, self.utility(state))
max_move_direction, max_child, max_utility = None, None, (-INF_P, )
for move_direction, child in self.get_children(state):
_, state2, utility = self.minimize(child, alpha, beta)
child.utility = utility
if sum(utility) > sum(max_utility):
max_move_direction, max_child, max_utility = move_direction, child, utility
if sum(max_utility) >= beta:
break
if sum(max_utility) > alpha:
alpha = sum(max_utility)
state.utility = max_utility
state.alpha = alpha
state.beta = beta
return max_move_direction, max_child, max_utility
def minimize(self, state, alpha, beta):
# terminal-state check
if self.terminal_state(state):
return (None, state, self.utility(state))
min_move_direction, min_child, min_utility = None, None, (INF_P, )
for move_direction, child in self.get_children(state):
_, state2, utility = self.maximize(child, alpha, beta)
child.utility = utility
if sum(utility) < sum(min_utility):
min_move_direction, min_child, min_utility = move_direction, child, utility
if sum(min_utility) <= alpha:
break
if sum(min_utility) < beta:
beta = sum(min_utility)
state.utility = min_utility
state.alpha = alpha
state.beta = beta
return min_move_direction, min_child, min_utility
grid
是一个对象,grid.map
是一个二维数组(list of lists)。
我有什么错误吗?我怎样才能改进代码?
在过去的周末,我意识到算法没有正确实现。 minimize()
函数中有一个错误,我以错误的方式搜索 children - 它应该是这样的:
def get_opponent_children(self, grid):
grid.children = []
for x in range(grid.size):
for y in range(grid.size):
if grid.map[x][y] == 0:
for c in (2, 4):
gridCopy = grid.clone()
gridCopy.path = grid.path[:]
gridCopy.deep_current = grid.deep_current + 1
gridCopy.map[x][y] = c
grid.children.append((None, gridCopy))
return grid.children
和相应的变化:
for move_direction, child in self.get_opponent_children(state):
现在大部分时间打1024和2048就可以了
我正在尝试基于蛇策略(参见 this 论文)使用 MiniMax 和 Alpha-Beta 修剪实现 2048 年的 AI,这似乎是最好的单一启发式算法。
不幸的是,AI 在大多数游戏中的得分为 256,这并不比空单元格启发式算法好多少。我已经在这里阅读了相关主题,但我自己找不到解决方案。
代码如下:
import math
from BaseAI_3 import BaseAI
INF_P = math.inf
class PlayerAI(BaseAI):
move_str = {
0: "UP",
1: "DOWN",
2: "LEFT",
3: "RIGHT"
}
def __init__(self):
super().__init__()
self.depth_max = 4
def getMove(self, grid):
move_direction, state, utility = self.decision(grid)
act_move = moves.index(move_direction)
return moves[act_move] if moves else None
def get_children(self, grid):
grid.children = []
for move_direction in grid.getAvailableMoves():
gridCopy = grid.clone()
gridCopy.path = grid.path[:]
gridCopy.path.append(PlayerAI.move_str[move_direction])
gridCopy.move(move_direction)
gridCopy.depth_current = grid.depth_current + 1
grid.children.append((move_direction, gridCopy))
return grid.children
def utility(self, state):
def snake():
poses = [
[
[2 ** 15, 2 ** 14, 2 ** 13, 2 ** 12],
[2 ** 8, 2 ** 9, 2 ** 10, 2 ** 11],
[2 ** 7, 2 ** 6, 2 ** 5, 2 ** 4],
[2 ** 0, 2 ** 1, 2 ** 2, 2 ** 3]
]
,
[
[2 ** 15, 2 ** 8, 2 ** 7, 2 ** 0],
[2 ** 14, 2 ** 9, 2 ** 6, 2 ** 1],
[2 ** 13, 2 ** 10, 2 ** 5, 2 ** 2],
[2 ** 12, 2 ** 11, 2 ** 4, 2 ** 3]
]
]
poses.append([item for item in reversed(poses[0])])
poses.append([list(reversed(item)) for item in reversed(poses[0])])
poses.append([list(reversed(item)) for item in poses[0]])
poses.append([item for item in reversed(poses[1])])
poses.append([list(reversed(item)) for item in reversed(poses[1])])
poses.append([list(reversed(item)) for item in poses[1]])
max_value = -INF_P
for pos in poses:
value = 0
for i in range(state.size):
for j in range(state.size):
value += state.map[i][j] * pos[i][j]
if value > max_value:
max_value = value
return max_value
weight_snake = 1 / (2 ** 13)
value = (
weight_snake * snake(),
)
return value
def decision(self, state):
state.depth_current = 1
state.path = []
return self.maximize(state, -INF_P, INF_P)
def terminal_state(self, state):
return state.depth_current >= self.depth_max
def maximize(self, state, alpha, beta):
# terminal-state check
if self.terminal_state(state):
return (None, state, self.utility(state))
max_move_direction, max_child, max_utility = None, None, (-INF_P, )
for move_direction, child in self.get_children(state):
_, state2, utility = self.minimize(child, alpha, beta)
child.utility = utility
if sum(utility) > sum(max_utility):
max_move_direction, max_child, max_utility = move_direction, child, utility
if sum(max_utility) >= beta:
break
if sum(max_utility) > alpha:
alpha = sum(max_utility)
state.utility = max_utility
state.alpha = alpha
state.beta = beta
return max_move_direction, max_child, max_utility
def minimize(self, state, alpha, beta):
# terminal-state check
if self.terminal_state(state):
return (None, state, self.utility(state))
min_move_direction, min_child, min_utility = None, None, (INF_P, )
for move_direction, child in self.get_children(state):
_, state2, utility = self.maximize(child, alpha, beta)
child.utility = utility
if sum(utility) < sum(min_utility):
min_move_direction, min_child, min_utility = move_direction, child, utility
if sum(min_utility) <= alpha:
break
if sum(min_utility) < beta:
beta = sum(min_utility)
state.utility = min_utility
state.alpha = alpha
state.beta = beta
return min_move_direction, min_child, min_utility
grid
是一个对象,grid.map
是一个二维数组(list of lists)。
我有什么错误吗?我怎样才能改进代码?
在过去的周末,我意识到算法没有正确实现。 minimize()
函数中有一个错误,我以错误的方式搜索 children - 它应该是这样的:
def get_opponent_children(self, grid):
grid.children = []
for x in range(grid.size):
for y in range(grid.size):
if grid.map[x][y] == 0:
for c in (2, 4):
gridCopy = grid.clone()
gridCopy.path = grid.path[:]
gridCopy.deep_current = grid.deep_current + 1
gridCopy.map[x][y] = c
grid.children.append((None, gridCopy))
return grid.children
和相应的变化:
for move_direction, child in self.get_opponent_children(state):
现在大部分时间打1024和2048就可以了