Getting error: ValueError: too many values to unpack (expected 5)
Getting error: ValueError: too many values to unpack (expected 5)
import random
import gym
import numpy as np
from collections import deque
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
import os
env = gym.make('CartPole-v0')
state_size = env.observation_space.shape[0]
action_size = env.action_space.n
batch_size = 32
n_episodes = 1000
output_dir = 'model_output/cartpole'
if not os.path.exists(output_dir):
os.makedirs(output_dir)
class DQNAgent:
def __init__(self, state_size, action_size):
self.state_size = state_size
self.action_size = action_size
self.memory = deque(maxlen=2000)
self.gamma = 0.9
self.epsilon = 1.0
self.epsilon_decay = 0.995
self.epsilon_min = 0.05
self._learning_rate = 0.01
self.model = self._build_model()
def _build_model(self):
model = Sequential()
model.add(Dense(24, input_dim = self.state_size, activation='relu'))
model.add(Dense(24,activation='relu'))
model.add(Dense(24,activation='relu'))
model.add(Dense(24,activation='relu'))
model.add(Dense(24,activation='relu'))
model.add(Dense(24,activation='relu'))
model.add(Dense(24,activation='relu'))
model.add(Dense(24,activation='relu'))
model.add(Dense(24,activation='relu'))
model.add(Dense(24,activation='relu'))
model.add(Dense(24,activation='relu'))
model.add(Dense(24,activation='relu'))
model.add(Dense(24,activation='relu'))
model.add(Dense(24,activation='relu'))
model.add(Dense(24,activation='relu'))
model.add(Dense(24,activation='relu'))
model.add(Dense(24,activation='relu'))
model.add(Dense(24,activation='relu'))
model.add(Dense(24,activation='relu'))
model.add(Dense(24,activation='relu'))
model.add(Dense(24,activation='relu'))
model.add(Dense(24,activation='relu'))
model.add(Dense(24,activation='relu'))
model.add(Dense(24,activation='relu'))
model.add(Dense(24,activation='relu'))
model.add(Dense(50,activation='relu'))
model.add(Dense(self.action_size, activation='sigmoid'))
model.compile(loss='mse', optimizer=Adam(lr=self._learning_rate))
return model
def remember(self, state, action, reward, next_state, done):
self.memory.append((self, state, action, reward, next_state, done))
def act(self, state):
if np.random.rand() <= self.epsilon:
return random.randrange(self.action_size)
act_values = self.model.predict(state)
return np.argmax(act_values[0])
def replay(self, batch_size):
minibatch = random.sample(self.memory, batch_size)
print(len(minibatch))
for state, action, reward, next_state, done in minibatch:
target = reward
if not done:
target = (reward + self.gamma*np.amax(self.model.predict(next_state)[0]))
target_f = self.model.predict(state)
target_f[0][action] = target
self.model.fit(state, target_f, epochs=1, verboss=0)
if self.epsilon > self.epsilon_min:
self.epsilon *= self.epsilon_decay
def load(self,name):
self.model.load_weights(name)
def save(self, name):
self.model.save_weights(name)
agent = DQNAgent(state_size, action_size)
done = False
for e in range(n_episodes):
state = env.reset()
state = np.reshape(state, [1, state_size])
if agent.epsilon > agent.epsilon_min:
agent.epsilon *= agent.epsilon_decay
for time in range(5000):
# env.render()
action = agent.act(state)
next_state, reward, done, _ = env.step(action)
reward = reward if not done else -10
next_state = np.reshape(next_state, [1, state_size])
agent.remember(state, action, reward, next_state, done)
state = next_state
if done:
print("episode: {}/{}, score: {}, e: {:.2}".format(e, n_episodes, time, agent.epsilon))
break
if len(agent.memory) > batch_size:
agent.replay(batch_size)
if e % 50 == 0:
agent.save(output_dir + "weights_" + '{:04d}'.format(e) + ".hdf5")
我正在为 openai gym 中的 cartpole 环境创建算法,但我收到此错误:
回溯(最近调用最后):
文件 "C:/Users/ardao/Desktop/Ardaficial Intelligence/DQNs/CartPole.py",第 145 行,位于
agent.replay(batch_size)
文件 "C:/Users/ardao/Desktop/Ardaficial Intelligence/DQNs/CartPole.py",第 93 行,重放中
对于状态、动作、奖励,next_state,以小批量完成:
ValueError:要解压的值太多(预期为 5)
我正在学习本教程:https://www.youtube.com/watch?v=OYhFoMySoVs&t=2444s
谢谢
阿尔达
您刚刚添加了一个额外的自己。这应该解决它。如果你仔细想想,这个错误是很容易解释的。
要解压的值太多(预计 5 个)
在这一行中你可以看到你有 6 个。在 youtube 中验证代码显示了同样的事情。但是当你刚开始的时候,这些很容易被忽略。祝你好运,我鼓励你花点时间喘口气,下次慢慢地再看一遍。也许你可以自己解决。
self.memory.append((state, action, reward, next_state, done))
import random
import gym
import numpy as np
from collections import deque
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
import os
env = gym.make('CartPole-v0')
state_size = env.observation_space.shape[0]
action_size = env.action_space.n
batch_size = 32
n_episodes = 1000
output_dir = 'model_output/cartpole'
if not os.path.exists(output_dir):
os.makedirs(output_dir)
class DQNAgent:
def __init__(self, state_size, action_size):
self.state_size = state_size
self.action_size = action_size
self.memory = deque(maxlen=2000)
self.gamma = 0.9
self.epsilon = 1.0
self.epsilon_decay = 0.995
self.epsilon_min = 0.05
self._learning_rate = 0.01
self.model = self._build_model()
def _build_model(self):
model = Sequential()
model.add(Dense(24, input_dim = self.state_size, activation='relu'))
model.add(Dense(24,activation='relu'))
model.add(Dense(24,activation='relu'))
model.add(Dense(24,activation='relu'))
model.add(Dense(24,activation='relu'))
model.add(Dense(24,activation='relu'))
model.add(Dense(24,activation='relu'))
model.add(Dense(24,activation='relu'))
model.add(Dense(24,activation='relu'))
model.add(Dense(24,activation='relu'))
model.add(Dense(24,activation='relu'))
model.add(Dense(24,activation='relu'))
model.add(Dense(24,activation='relu'))
model.add(Dense(24,activation='relu'))
model.add(Dense(24,activation='relu'))
model.add(Dense(24,activation='relu'))
model.add(Dense(24,activation='relu'))
model.add(Dense(24,activation='relu'))
model.add(Dense(24,activation='relu'))
model.add(Dense(24,activation='relu'))
model.add(Dense(24,activation='relu'))
model.add(Dense(24,activation='relu'))
model.add(Dense(24,activation='relu'))
model.add(Dense(24,activation='relu'))
model.add(Dense(24,activation='relu'))
model.add(Dense(50,activation='relu'))
model.add(Dense(self.action_size, activation='sigmoid'))
model.compile(loss='mse', optimizer=Adam(lr=self._learning_rate))
return model
def remember(self, state, action, reward, next_state, done):
self.memory.append((self, state, action, reward, next_state, done))
def act(self, state):
if np.random.rand() <= self.epsilon:
return random.randrange(self.action_size)
act_values = self.model.predict(state)
return np.argmax(act_values[0])
def replay(self, batch_size):
minibatch = random.sample(self.memory, batch_size)
print(len(minibatch))
for state, action, reward, next_state, done in minibatch:
target = reward
if not done:
target = (reward + self.gamma*np.amax(self.model.predict(next_state)[0]))
target_f = self.model.predict(state)
target_f[0][action] = target
self.model.fit(state, target_f, epochs=1, verboss=0)
if self.epsilon > self.epsilon_min:
self.epsilon *= self.epsilon_decay
def load(self,name):
self.model.load_weights(name)
def save(self, name):
self.model.save_weights(name)
agent = DQNAgent(state_size, action_size)
done = False
for e in range(n_episodes):
state = env.reset()
state = np.reshape(state, [1, state_size])
if agent.epsilon > agent.epsilon_min:
agent.epsilon *= agent.epsilon_decay
for time in range(5000):
# env.render()
action = agent.act(state)
next_state, reward, done, _ = env.step(action)
reward = reward if not done else -10
next_state = np.reshape(next_state, [1, state_size])
agent.remember(state, action, reward, next_state, done)
state = next_state
if done:
print("episode: {}/{}, score: {}, e: {:.2}".format(e, n_episodes, time, agent.epsilon))
break
if len(agent.memory) > batch_size:
agent.replay(batch_size)
if e % 50 == 0:
agent.save(output_dir + "weights_" + '{:04d}'.format(e) + ".hdf5")
我正在为 openai gym 中的 cartpole 环境创建算法,但我收到此错误:
回溯(最近调用最后): 文件 "C:/Users/ardao/Desktop/Ardaficial Intelligence/DQNs/CartPole.py",第 145 行,位于 agent.replay(batch_size) 文件 "C:/Users/ardao/Desktop/Ardaficial Intelligence/DQNs/CartPole.py",第 93 行,重放中 对于状态、动作、奖励,next_state,以小批量完成: ValueError:要解压的值太多(预期为 5)
我正在学习本教程:https://www.youtube.com/watch?v=OYhFoMySoVs&t=2444s
谢谢
阿尔达
您刚刚添加了一个额外的自己。这应该解决它。如果你仔细想想,这个错误是很容易解释的。
要解压的值太多(预计 5 个)
在这一行中你可以看到你有 6 个。在 youtube 中验证代码显示了同样的事情。但是当你刚开始的时候,这些很容易被忽略。祝你好运,我鼓励你花点时间喘口气,下次慢慢地再看一遍。也许你可以自己解决。
self.memory.append((state, action, reward, next_state, done))