Keras fit 需要很多时间
Keras fit takes so much time
我最近在学习深度强化学习,我想将我学到的知识应用到使用 Keras 的健身房解决问题。
在训练过程中我意识到它太慢了,在检查原因后我看到“fit”函数需要那么多时间。
运行每集时长 3-4 分钟。
我做的有什么问题吗?或者您能提出改进建议吗?
import pandas as pd
import numpy as np
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras.optimizers import Adam
from collections import deque
import random
import gym
import datetime
class DQN():
def __init__(self, env):
self.env = env
self.memory = deque(maxlen=2000)
self.gamma = 0.98
self.epsilon = 1
self.epsilon_min = 0.01
self.epsilon_decay = 0.998
self.learning_rate = 0.001
self.model = self.create_model()
self.target_model = self.create_model()
def create_model(self):
model = keras.Sequential()
state_shape = self.env.observation_space.shape
model.add(keras.layers.Dense(48, activation="relu", input_dim=state_shape[0]))
model.add(keras.layers.Dense(24, activation="relu"))
model.add(keras.layers.Dense(self.env.action_space.n, activation="relu"))
model.compile(loss="mse", optimizer=Adam(lr=self.learning_rate))
return model
def remember(self, state, action, reward, new_state, done):
self.memory.append([state, action, reward, new_state, done])
def replay(self):
batch_size = 32
if len(self.memory) < batch_size:
return
samples = random.sample(self.memory, batch_size)
# states, actions, rewards, states_, dones = samples
# targets = self.target_model.predict(states)
# _states = [i for i in range(len(samples))]
# targets = [[0 for j in range(self.env.action_space.n)] for i in range(len(samples))]
_states = np.zeros((len(samples), 8))
targets = np.zeros((len(samples), self.env.action_space.n))
for i, sample in enumerate(samples):
state, action, reward, new_state, done = sample
_states[i] = state
# target = self.target_model.predict(state)
if done:
targets[i][action] = reward
else:
Q_future = max(self.target_model.predict(new_state)[0])
targets[i][action] = reward + Q_future*self.gamma
self.model.fit(_states, targets, epochs=1, verbose=0)
# for sample in samples:
# state, action, reward, new_state, done = sample
# target = self.target_model.predict(state)
# if done:
# target[0][action] = reward
# else:
# Q_future = max(self.target_model.predict(new_state)[0])
# target[0][action] = reward + Q_future*self.gamma
# start_time = datetime.datetime.now()
# self.model.fit(state, target, epochs=1, verbose=0)
# end_time = datetime.datetime.now()
# print("--fit--")
# print(end_time-start_time)
def target_train(self):
weights = self.model.get_weights()
target_weights = self.target_model.get_weights()
for i in range(len(target_weights)):
target_weights[i] = weights[i]
self.target_model.set_weights(target_weights)
def act(self, state):
self.epsilon *= self.epsilon_decay
self.epsilon = max(self.epsilon_min, self.epsilon)
if np.random.random() < self.epsilon:
return self.env.action_space.sample()
return np.argmax(self.model.predict(state)[0])
def save_model(self, fn):
self.model.save(fn)
def act_eval(self, state):
return np.argmax(self.model.predict(state)[0])
def evaluation(self, n_eval=10):
total_reward = 0
for _ in range(n_eval):
self.env.reset()
cur_state = self.env.reset().reshape(1,8)
done = False
while not done:
action = self.act_eval(cur_state)
new_state, reward, done, _ = self.env.step(action)
total_reward += reward
cur_state = new_state.reshape(1,8)
return total_reward / n_eval
def main():
save_path = "policies/"
env = gym.make("LunarLander-v2")
trials = 2000
trial_len = 500
update_target_network = 500
agent = DQN(env=env)
for trial in range(trials):
cur_state = env.reset().reshape(1,8)
time_step_cntr = 0
# check execution durations
dur_replay = 0
dur_step = 0
dur_act = 0
for step in range(trial_len):
print("Trial {0}, step {1}".format(trial, step))
action = agent.act(cur_state) #
new_state, reward, done, _ = env.step(action) #
new_state = new_state.reshape(1,8)
agent.remember(cur_state, action, reward, new_state, done)
# learn from experience
agent.replay() #
# after "update_target_network" steps, update target network
if time_step_cntr % update_target_network == 0:
agent.target_train()
time_step_cntr += 1
cur_state = new_state
if done:
break
# print("Duration replay {0}, duration act {1}, duration step {2}".format(dur_replay, dur_act, dur_step))
# at each N steps, evaluate
print("Evaluation over 10 episodes", agent.evaluation())
print("Trial #{0} completed.".format(trial))
# # print the progress
# if trial % 100 == 0:
# print("Trial #{0} completed.".format(trial))
# save the model
# if trial % 20 == 0:
agent.save_model(save_path + str(trial) + "__.model")
agent.save_model(save_path + "_final" + "__.model")
if __name__ == "__main__":
main()
您的问题不在 fit 调用中,而是在 replay() 方法中的循环中。 在这些情况下,请始终尝试用 numpy 操作替换循环,即使操作更加敏捷。
用以下方法替换您的重播方法,让我知道它是否对您更快
def replay(self):
batch_size = 32
if len(self.memory) >= batch_size:
# Draw a sample
samples = random.sample(self.memory, batch_size)
# Prepare the batch
state, action, reward, new_state, done = zip(*samples)
next_state = np.concatenate(new_state)
done = np.array(done)[:,None]
state = np.concatenate(state)
reward = np.array(reward)[:,None]
q_future = self.target_model.predict(next_state)
targets = reward + self.gamma*np.max(q_future, axis=1, keepdims=True)*(1-done)
# Fit the model
self.model.fit(state, targets, epochs=1, verbose=0)
我最近在学习深度强化学习,我想将我学到的知识应用到使用 Keras 的健身房解决问题。
在训练过程中我意识到它太慢了,在检查原因后我看到“fit”函数需要那么多时间。
运行每集时长 3-4 分钟。
我做的有什么问题吗?或者您能提出改进建议吗?
import pandas as pd
import numpy as np
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras.optimizers import Adam
from collections import deque
import random
import gym
import datetime
class DQN():
def __init__(self, env):
self.env = env
self.memory = deque(maxlen=2000)
self.gamma = 0.98
self.epsilon = 1
self.epsilon_min = 0.01
self.epsilon_decay = 0.998
self.learning_rate = 0.001
self.model = self.create_model()
self.target_model = self.create_model()
def create_model(self):
model = keras.Sequential()
state_shape = self.env.observation_space.shape
model.add(keras.layers.Dense(48, activation="relu", input_dim=state_shape[0]))
model.add(keras.layers.Dense(24, activation="relu"))
model.add(keras.layers.Dense(self.env.action_space.n, activation="relu"))
model.compile(loss="mse", optimizer=Adam(lr=self.learning_rate))
return model
def remember(self, state, action, reward, new_state, done):
self.memory.append([state, action, reward, new_state, done])
def replay(self):
batch_size = 32
if len(self.memory) < batch_size:
return
samples = random.sample(self.memory, batch_size)
# states, actions, rewards, states_, dones = samples
# targets = self.target_model.predict(states)
# _states = [i for i in range(len(samples))]
# targets = [[0 for j in range(self.env.action_space.n)] for i in range(len(samples))]
_states = np.zeros((len(samples), 8))
targets = np.zeros((len(samples), self.env.action_space.n))
for i, sample in enumerate(samples):
state, action, reward, new_state, done = sample
_states[i] = state
# target = self.target_model.predict(state)
if done:
targets[i][action] = reward
else:
Q_future = max(self.target_model.predict(new_state)[0])
targets[i][action] = reward + Q_future*self.gamma
self.model.fit(_states, targets, epochs=1, verbose=0)
# for sample in samples:
# state, action, reward, new_state, done = sample
# target = self.target_model.predict(state)
# if done:
# target[0][action] = reward
# else:
# Q_future = max(self.target_model.predict(new_state)[0])
# target[0][action] = reward + Q_future*self.gamma
# start_time = datetime.datetime.now()
# self.model.fit(state, target, epochs=1, verbose=0)
# end_time = datetime.datetime.now()
# print("--fit--")
# print(end_time-start_time)
def target_train(self):
weights = self.model.get_weights()
target_weights = self.target_model.get_weights()
for i in range(len(target_weights)):
target_weights[i] = weights[i]
self.target_model.set_weights(target_weights)
def act(self, state):
self.epsilon *= self.epsilon_decay
self.epsilon = max(self.epsilon_min, self.epsilon)
if np.random.random() < self.epsilon:
return self.env.action_space.sample()
return np.argmax(self.model.predict(state)[0])
def save_model(self, fn):
self.model.save(fn)
def act_eval(self, state):
return np.argmax(self.model.predict(state)[0])
def evaluation(self, n_eval=10):
total_reward = 0
for _ in range(n_eval):
self.env.reset()
cur_state = self.env.reset().reshape(1,8)
done = False
while not done:
action = self.act_eval(cur_state)
new_state, reward, done, _ = self.env.step(action)
total_reward += reward
cur_state = new_state.reshape(1,8)
return total_reward / n_eval
def main():
save_path = "policies/"
env = gym.make("LunarLander-v2")
trials = 2000
trial_len = 500
update_target_network = 500
agent = DQN(env=env)
for trial in range(trials):
cur_state = env.reset().reshape(1,8)
time_step_cntr = 0
# check execution durations
dur_replay = 0
dur_step = 0
dur_act = 0
for step in range(trial_len):
print("Trial {0}, step {1}".format(trial, step))
action = agent.act(cur_state) #
new_state, reward, done, _ = env.step(action) #
new_state = new_state.reshape(1,8)
agent.remember(cur_state, action, reward, new_state, done)
# learn from experience
agent.replay() #
# after "update_target_network" steps, update target network
if time_step_cntr % update_target_network == 0:
agent.target_train()
time_step_cntr += 1
cur_state = new_state
if done:
break
# print("Duration replay {0}, duration act {1}, duration step {2}".format(dur_replay, dur_act, dur_step))
# at each N steps, evaluate
print("Evaluation over 10 episodes", agent.evaluation())
print("Trial #{0} completed.".format(trial))
# # print the progress
# if trial % 100 == 0:
# print("Trial #{0} completed.".format(trial))
# save the model
# if trial % 20 == 0:
agent.save_model(save_path + str(trial) + "__.model")
agent.save_model(save_path + "_final" + "__.model")
if __name__ == "__main__":
main()
您的问题不在 fit 调用中,而是在 replay() 方法中的循环中。 在这些情况下,请始终尝试用 numpy 操作替换循环,即使操作更加敏捷。
用以下方法替换您的重播方法,让我知道它是否对您更快
def replay(self):
batch_size = 32
if len(self.memory) >= batch_size:
# Draw a sample
samples = random.sample(self.memory, batch_size)
# Prepare the batch
state, action, reward, new_state, done = zip(*samples)
next_state = np.concatenate(new_state)
done = np.array(done)[:,None]
state = np.concatenate(state)
reward = np.array(reward)[:,None]
q_future = self.target_model.predict(next_state)
targets = reward + self.gamma*np.max(q_future, axis=1, keepdims=True)*(1-done)
# Fit the model
self.model.fit(state, targets, epochs=1, verbose=0)