尝试对 FrozenLake Openai 游戏使用 DQN 算法时出错
Errors when trying to use DQN algorithm for FrozenLake Openai game
我正在尝试使一个非常简单的 DQN 算法与 FrozenLake-v0 游戏一起使用,但我遇到了错误。我知道使用 DQN 而不是 Q-table 可能有点矫枉过正,但我仍然希望它能工作。这是代码:
import gym
import numpy as np
import tensorflow as tf
env = gym.make("FrozenLake-v0")
n_actions = env.action_space.n
input_dim = env.observation_space.n
model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(64, input_dim = input_dim , activation = 'relu'))
model.add(tf.keras.layers.Dense(32, activation = 'relu'))
model.add(tf.keras.layers.Dense(n_actions, activation = 'linear'))
model.compile(optimizer=tf.keras.optimizers.Adam(), loss = 'mse')
def replay(replay_memory, minibatch_size=32):
minibatch = np.random.choice(replay_memory, minibatch_size, replace=True)
s_l = np.array(list(map(lambda x: x['s'], minibatch)))
a_l = np.array(list(map(lambda x: x['a'], minibatch)))
r_l = np.array(list(map(lambda x: x['r'], minibatch)))
sprime_l = np.array(list(map(lambda x: x['sprime'], minibatch)))
done_l = np.array(list(map(lambda x: x['done'], minibatch)))
qvals_sprime_l = model.predict(sprime_l)
target_f = model.predict(s_l)
for i,(s,a,r,qvals_sprime, done) in enumerate(zip(s_l,a_l,r_l,qvals_sprime_l, done_l)):
if not done: target = r + gamma * np.max(qvals_sprime)
else: target = r
target_f[i][a] = target
model.fit(s_l,target_f, epochs=1, verbose=0)
return model
n_episodes = 500
gamma = 0.99
epsilon = 0.9
minibatch_size = 32
r_sums = []
replay_memory = []
mem_max_size = 100000
for n in range(n_episodes):
s = env.reset()
done=False
r_sum = 0
print(s)
while not done:
qvals_s = model.predict(s.reshape(16))
if np.random.random() < epsilon: a = env.action_space.sample()
else: a = np.argmax(qvals_s);
sprime, r, done, info = env.step(a)
r_sum += r
if len(replay_memory) > mem_max_size:
replay_memory.pop(0)
replay_memory.append({"s":s,"a":a,"r":r,"sprime":sprime,"done":done})
s=sprime
model=replay(replay_memory, minibatch_size = minibatch_size)
if epsilon > 0.1: epsilon -= 0.001
r_sums.append(r_sum)
if n % 100 == 0: print(n)
我得到的错误是:
Traceback (most recent call last):
File "froz_versuch.py", line 48, in <module>
qvals_s = model.predict(s.reshape(16))
ValueError: cannot reshape array of size 1 into shape (16,)
然后当我尝试将 qvals_s = model.predict(s.reshape(16))
更改为 qvals_s = model.predict(s.reshape(1))
时,我收到错误消息:
ValueError: Input 0 of layer sequential is incompatible with the layer: expected axis -1 of input shape to have value 16 but received input with shape [None, 1]
如有任何帮助,我将不胜感激!
问题与单热编码有关。我必须对 s
和 sprime
进行编码,以便它们具有 16
的维度。 for 循环中的这一更改使其起作用。 encode()
函数可以移到循环之外,但我现在只是在测试,以便稍后进行优化。这是解决方案:
for n in range(n_episodes):
ss = env.reset()
states_total = 16
data = [[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]]
def encode(data, states_total):
targets = np.array(data).reshape(-1)
return np.eye(states_total)[targets]
m = encode(data,states_total)
s = m[ss]
#print(s)
#print(len(s))
done=False
r_sum = 0
while not done:
#env.render()
qvals_s = model.predict(s.reshape(1,-1))
if np.random.random() < epsilon: a = env.action_space.sample()
else: a = np.argmax(qvals_s);
sprime, r, done, info = env.step(a)
r_sum += r
q = encode(data,states_total)
sprime = q[sprime]
if len(replay_memory) > mem_max_size:
replay_memory.pop(0)
replay_memory.append({"s":s,"a":a,"r":r,"sprime":sprime,"done":done})
#s = n[sprime]
s=sprime
model=replay(replay_memory, minibatch_size = minibatch_size)
if epsilon > 0.001: epsilon -= 0.001
r_sums.append(r_sum)
print(r_sum)
print(epsilon)
if n % 100 == 0: print(n)
我正在尝试使一个非常简单的 DQN 算法与 FrozenLake-v0 游戏一起使用,但我遇到了错误。我知道使用 DQN 而不是 Q-table 可能有点矫枉过正,但我仍然希望它能工作。这是代码:
import gym
import numpy as np
import tensorflow as tf
env = gym.make("FrozenLake-v0")
n_actions = env.action_space.n
input_dim = env.observation_space.n
model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(64, input_dim = input_dim , activation = 'relu'))
model.add(tf.keras.layers.Dense(32, activation = 'relu'))
model.add(tf.keras.layers.Dense(n_actions, activation = 'linear'))
model.compile(optimizer=tf.keras.optimizers.Adam(), loss = 'mse')
def replay(replay_memory, minibatch_size=32):
minibatch = np.random.choice(replay_memory, minibatch_size, replace=True)
s_l = np.array(list(map(lambda x: x['s'], minibatch)))
a_l = np.array(list(map(lambda x: x['a'], minibatch)))
r_l = np.array(list(map(lambda x: x['r'], minibatch)))
sprime_l = np.array(list(map(lambda x: x['sprime'], minibatch)))
done_l = np.array(list(map(lambda x: x['done'], minibatch)))
qvals_sprime_l = model.predict(sprime_l)
target_f = model.predict(s_l)
for i,(s,a,r,qvals_sprime, done) in enumerate(zip(s_l,a_l,r_l,qvals_sprime_l, done_l)):
if not done: target = r + gamma * np.max(qvals_sprime)
else: target = r
target_f[i][a] = target
model.fit(s_l,target_f, epochs=1, verbose=0)
return model
n_episodes = 500
gamma = 0.99
epsilon = 0.9
minibatch_size = 32
r_sums = []
replay_memory = []
mem_max_size = 100000
for n in range(n_episodes):
s = env.reset()
done=False
r_sum = 0
print(s)
while not done:
qvals_s = model.predict(s.reshape(16))
if np.random.random() < epsilon: a = env.action_space.sample()
else: a = np.argmax(qvals_s);
sprime, r, done, info = env.step(a)
r_sum += r
if len(replay_memory) > mem_max_size:
replay_memory.pop(0)
replay_memory.append({"s":s,"a":a,"r":r,"sprime":sprime,"done":done})
s=sprime
model=replay(replay_memory, minibatch_size = minibatch_size)
if epsilon > 0.1: epsilon -= 0.001
r_sums.append(r_sum)
if n % 100 == 0: print(n)
我得到的错误是:
Traceback (most recent call last):
File "froz_versuch.py", line 48, in <module>
qvals_s = model.predict(s.reshape(16))
ValueError: cannot reshape array of size 1 into shape (16,)
然后当我尝试将 qvals_s = model.predict(s.reshape(16))
更改为 qvals_s = model.predict(s.reshape(1))
时,我收到错误消息:
ValueError: Input 0 of layer sequential is incompatible with the layer: expected axis -1 of input shape to have value 16 but received input with shape [None, 1]
如有任何帮助,我将不胜感激!
问题与单热编码有关。我必须对 s
和 sprime
进行编码,以便它们具有 16
的维度。 for 循环中的这一更改使其起作用。 encode()
函数可以移到循环之外,但我现在只是在测试,以便稍后进行优化。这是解决方案:
for n in range(n_episodes):
ss = env.reset()
states_total = 16
data = [[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]]
def encode(data, states_total):
targets = np.array(data).reshape(-1)
return np.eye(states_total)[targets]
m = encode(data,states_total)
s = m[ss]
#print(s)
#print(len(s))
done=False
r_sum = 0
while not done:
#env.render()
qvals_s = model.predict(s.reshape(1,-1))
if np.random.random() < epsilon: a = env.action_space.sample()
else: a = np.argmax(qvals_s);
sprime, r, done, info = env.step(a)
r_sum += r
q = encode(data,states_total)
sprime = q[sprime]
if len(replay_memory) > mem_max_size:
replay_memory.pop(0)
replay_memory.append({"s":s,"a":a,"r":r,"sprime":sprime,"done":done})
#s = n[sprime]
s=sprime
model=replay(replay_memory, minibatch_size = minibatch_size)
if epsilon > 0.001: epsilon -= 0.001
r_sums.append(r_sum)
print(r_sum)
print(epsilon)
if n % 100 == 0: print(n)