为什么我的模型不学习?非常高的损失
Why does my model not learn? Very high loss
我建立了一个模拟模型,其中卡车根据垃圾箱的填充量收集垃圾箱。
我使用 OpenAi Gym 和 Tensorflow/keras 创建了我的深度强化学习模型......
但是我的训练有很高的损失...
我哪里做错了?提前致谢
这是环境
class Marltf(Env):
def __init__(self):
self.i= 0
self.containers1 = Container(3,3)
self.containers2 = Container(1,3)
self.containers3 = Container(3,1)
self.containers4 = Container(5,6)
self.containers5 = Container(8,6)
self.containers6 = Container(10,10)
self.containers7 = Container(11,11)
self.containers8 = Container(7,12)
self.passo = 0
self.containers2.lv = 2
self.containers3.lv = 4
self.containers5.lv = 4
self.containers6.lv = 1
self.containers8.lv = 2
self.shower_length= 300
self.containers = [self.containers1,self.containers2,self.containers3,self.containers4, self.containers5, self.containers6, self.containers7, self.containers8]
self.positions ={}
self.capacities ={}
self.camions= []
b = 0
for cont in self.containers:
b += cont.lv
reward = 0
nCamionFloat = 0
while b > 6:
b +=-10
nCamionFloat +=1
nCamionInt = int(nCamionFloat)
for ic in range(nCamionInt):
self.camions.append(Camion(1,1,None,ic))
for cam in self.camions:
self.positions[cam.name] = cam.position
self.capacities[cam.name] = 10
self.frames = []
self.cnt=0
self.mapp = Map(15,15,self.camions,self.containers)
self.state = (15*15)/5
self.action_space = gym.spaces.Discrete(4)
self.observation_space = Box(low = np.array([0]), high= np.array([51]))
def step(self, action):
moves = {0: (-1, 0),1: (1, 0),2: (0, -1),3: (0, 1)}
done = False
ic = 0
for cam in self.camions:
cam.position = (self.positions[ic][0],self.positions[ic][1])
cam.capacity = self.capacities[ic]
self.state += -5
mossa = moves[action]
x=self.camions[self.i].position
reward = 0
nuovaposizione = [mossa[0] + x[0],mossa[1] +x[1]]
self.shower_length -= 1
if self.mapp.mapp[nuovaposizione[0],nuovaposizione[1]] == -1:
reward += -5
self.state += -5
else:
self.mapp.mapp[x[0],x[1]] = 0
self.camions[self.i].position=nuovaposizione
self.mapp.mapp[nuovaposizione[0],nuovaposizione[1]] = 9
self.positions.update({self.camions[self.i].name : nuovaposizione})
reward += -1
self.state = -2
for contain in self.containers:
if self.camions[self.i].position[0] == contain.position[0] and camion.position[1] == contain.position[1] :
if contain.lv ==3 and self.camions[self.i].capacity >=3:
self.camions[self.i].reward += 100
self.camions[self.i].capacity += -3
self.capacities.update({self.camions[self.i].name : self.camions[self.i].capacity})
reward +=20
self.state +=20
contain.lv=0
elif contain.lv == 2 and self.camions[self.i].capacity >=2:
self.camions[self.i].reward += 50
self.camions[self.i].capacity += -2
self.capacities.update({self.camions[self.i].name : self.camions[self.i].capacity})
self.state +=10
reward += 50
contain.lv=0
elif contain.lv == 1 and self.camions[self.i].capacity >=1:
reward += 10
self.camions[self.i].reward +=5
self.camions[self.i].capacity += -1
self.capacities.update({self.camions[self.i].name : self.camions[self.i].capacity})
contain.lv=0
self.state+=1
elif contain.lv==4 and self.camions[self.i].capacity >=4:
reward +=50
self.camions[self.i].reward +=50
self.camions[self.i].capacity += -4
self.capacities.update({self.camions[self.i].name : self.camions[self.i].capacity})
self.state +=50
contain.lv=0
elif contain.lv==0 and self.camions[self.i].capacity >=4:
reward += -20
self.camions[self.i].reward +=-20
self.camions[self.i].capacity += 0
self.state += -20
contain.lv=0
if self.camions[self.i].capacity <=2:
self.camions[self.i].positions=(1,1)
self.positions.update({self.camions[self.i].name : (1,1)})
self.camions[self.i].capacity = 10
self.capacities.update({self.camions[self.i].name : self.camions[self.i].capacity})
if self.i ==1:
self.i= 0
self.i = 0
self.i = 0
elif self.i ==0:
self.i= 1
if self.shower_length <= 0:
done = True
else:
done = False
self.passo +=1
info = {}
return self.state,reward,done,info
def render(self, mode="human"):
BLACK = (0, 0, 0)
WHITE = (200, 200, 200)
WINDOW_HEIGHT = len(self.mapp.mapp[0]) *50
WINDOW_WIDTH = len(self.mapp.mapp[0]) *50
whiteC=pygame.image.load('white.jpg')
whiteC=pygame.transform.scale(whiteC,(50, 50))
greenC=pygame.image.load('green.jpg')
greenC=pygame.transform.scale(greenC,(50, 50))
yellowC=pygame.image.load('yellow.jpg')
yellowC=pygame.transform.scale(yellowC,(50, 50))
orangeC=pygame.image.load('orange.jpg')
orangeC=pygame.transform.scale(orangeC,(50, 50))
redC=pygame.image.load('red.jpg')
redC=pygame.transform.scale(redC,(50, 50))
gT=pygame.image.load('greenCamion.jpg')
gT=pygame.transform.scale(gT,(50, 50))
yT=pygame.image.load('yellowCamion.jpg')
yT=pygame.transform.scale(yT,(50, 50))
rT=pygame.image.load('redCamion.jpg')
rT=pygame.transform.scale(rT,(50, 50))
global SCREEN, CLOCK
pygame.init()
SCREEN = pygame.display.set_mode((WINDOW_WIDTH, WINDOW_HEIGHT))
CLOCK = pygame.time.Clock()
SCREEN.fill(BLACK)
pygame.draw.rect(SCREEN, WHITE, pygame.Rect( 10, 0, 50, 50))
blockSize = 50 #Set the size of the grid block
for i in range(0,len(self.mapp.mapp[0])):
for j in range(0,len(self.mapp.mapp[0])):
a=i*50
b=j*50
if self.mapp.mapp[i][j] == -1:
pygame.draw.rect(SCREEN, WHITE, pygame.Rect( a, b, 50, 50))
for c in self.camions :
if c.capacity > 6:
SCREEN.blit(gT, (c.position[0]*50, c.position[1]*50))
if c.capacity > 3 and c.capacity <= 6:
SCREEN.blit(yT, (c.position[0]*50, c.position[1]*50))
if c.capacity <= 3:
SCREEN.blit(rT, (c.position[0]*50, c.position[1]*50))
for contain in self.containers :
if contain.lv == 0:
SCREEN.blit(whiteC,(contain.position[0]*50 , contain.position[1]*50))
elif contain.lv == 1:
SCREEN.blit(greenC,(contain.position[0]*50 , contain.position[1]*50))
elif contain.lv == 2:
SCREEN.blit(yellowC,(contain.position[0]*50 , contain.position[1]*50))
elif contain.lv == 3:
SCREEN.blit(orangeC,(contain.position[0]*50 , contain.position[1]*50))
if contain.lv == 4:
SCREEN.blit(redC,(contain.position[0]*50 , contain.position[1]*50))
for x in range(0, WINDOW_WIDTH, blockSize):
for y in range(0, WINDOW_HEIGHT, blockSize):
rect = pygame.Rect(x, y, blockSize, blockSize)
pygame.draw.rect(SCREEN, WHITE, rect, 1)
pygame.display.flip()
view = pygame.surfarray.array3d(SCREEN)
view = view.transpose([1, 0, 2])
img_bgr = cv2.cvtColor(view, cv2.COLOR_RGB2BGR)
pygame.image.save(SCREEN, f"screenshot{self.cnt}.png")
self.cnt +=1
pygame.event.get()
def reset(self):
self.state = (15*15)/4
self.shower_length = 300
self.containers1.lv=3
self.containers2.lv=1
self.containers7.lv = 2
self.containers3.lv = 4
self.containers5.lv = 4
self.containers6.lv = 1
self.containers8.lv = 2
self.passo = 0
self.positions ={}
self.capacities ={}
self.camions= []
b = 0
for cont in self.containers:
b += cont.lv
reward = 0
nCamionFloat = 0
while b > 6:
b +=-10
nCamionFloat +=1
nCamionInt = int(nCamionFloat)
for ic in range(nCamionInt):
self.camions.append(Camion(1,1,None,ic))
for cam in self.camions:
self.positions[cam.name] = cam.position
self.capacities[cam.name] = 10
self.shower_length =60
self.cnt=0
self.i = 0
containers = [ containers1, containers2, containers3, containers4]
containers.append( containers1)
states = env.observation_space.shape
actions = env.action_space.n
b = env.action_space.sample()
我的模型
def build_model(states,actions):
model = tf.keras.Sequential([
keras.layers.Dense(64, input_shape=states),
keras.layers.LeakyReLU(0.24,),
keras.layers.Dense(64),
keras.layers.LeakyReLU(0.24,),
keras.layers.Dense(32),
keras.layers.LeakyReLU(0.24,),
keras.layers.Dense(16),
keras.layers.LeakyReLU(0.24,),
keras.layers.Dense(8),
keras.layers.LeakyReLU(0.24,),
keras.layers.Dense(actions, activation='linear'),
])
return model
model = build_model(states, actions)
model.compile(loss='mse', metrics=['accuracy'])
def build_agent(model, actions):
policy = GreedyQPolicy()
memory = SequentialMemory(limit=10000, window_length=1)
dqn = DQNAgent(model=model, memory=memory, policy=policy,nb_actions=actions, nb_steps_warmup=10, target_model_update=1e-2)
return dqn
dqn = build_agent(model, actions)
dqn.compile(tf.keras.optimizers.Adadelta(
learning_rate=0.1, rho=0.95, epsilon=1e-07, name='Adadelta'), metrics= ["accuracy"]
)
a =dqn.fit(env, nb_steps=5000, visualize=True, verbose=2,)
亏损从50开始到200
损失在 RL 中并不重要。非常高的损失其实很正常。在 RL 中,我们最关心奖励。
在强化学习中你通常不关心损失,而是回报。从 class 的名字看,它似乎也是一个多智能体强化学习问题,通常更难处理 w.r.t 单智能体问题。
我要尝试更改的第一件事是步数:5000 非常低。
尝试定义一个 episode,如果还没有定义,然后绘制 episode 结束时的累积奖励,并检查累积奖励是否随着 episode 的增加而增加。
这是检查奖励是否实际增加以及代理是否正在学习的最简洁的方法。
我建立了一个模拟模型,其中卡车根据垃圾箱的填充量收集垃圾箱。 我使用 OpenAi Gym 和 Tensorflow/keras 创建了我的深度强化学习模型...... 但是我的训练有很高的损失... 我哪里做错了?提前致谢
这是环境
class Marltf(Env):
def __init__(self):
self.i= 0
self.containers1 = Container(3,3)
self.containers2 = Container(1,3)
self.containers3 = Container(3,1)
self.containers4 = Container(5,6)
self.containers5 = Container(8,6)
self.containers6 = Container(10,10)
self.containers7 = Container(11,11)
self.containers8 = Container(7,12)
self.passo = 0
self.containers2.lv = 2
self.containers3.lv = 4
self.containers5.lv = 4
self.containers6.lv = 1
self.containers8.lv = 2
self.shower_length= 300
self.containers = [self.containers1,self.containers2,self.containers3,self.containers4, self.containers5, self.containers6, self.containers7, self.containers8]
self.positions ={}
self.capacities ={}
self.camions= []
b = 0
for cont in self.containers:
b += cont.lv
reward = 0
nCamionFloat = 0
while b > 6:
b +=-10
nCamionFloat +=1
nCamionInt = int(nCamionFloat)
for ic in range(nCamionInt):
self.camions.append(Camion(1,1,None,ic))
for cam in self.camions:
self.positions[cam.name] = cam.position
self.capacities[cam.name] = 10
self.frames = []
self.cnt=0
self.mapp = Map(15,15,self.camions,self.containers)
self.state = (15*15)/5
self.action_space = gym.spaces.Discrete(4)
self.observation_space = Box(low = np.array([0]), high= np.array([51]))
def step(self, action):
moves = {0: (-1, 0),1: (1, 0),2: (0, -1),3: (0, 1)}
done = False
ic = 0
for cam in self.camions:
cam.position = (self.positions[ic][0],self.positions[ic][1])
cam.capacity = self.capacities[ic]
self.state += -5
mossa = moves[action]
x=self.camions[self.i].position
reward = 0
nuovaposizione = [mossa[0] + x[0],mossa[1] +x[1]]
self.shower_length -= 1
if self.mapp.mapp[nuovaposizione[0],nuovaposizione[1]] == -1:
reward += -5
self.state += -5
else:
self.mapp.mapp[x[0],x[1]] = 0
self.camions[self.i].position=nuovaposizione
self.mapp.mapp[nuovaposizione[0],nuovaposizione[1]] = 9
self.positions.update({self.camions[self.i].name : nuovaposizione})
reward += -1
self.state = -2
for contain in self.containers:
if self.camions[self.i].position[0] == contain.position[0] and camion.position[1] == contain.position[1] :
if contain.lv ==3 and self.camions[self.i].capacity >=3:
self.camions[self.i].reward += 100
self.camions[self.i].capacity += -3
self.capacities.update({self.camions[self.i].name : self.camions[self.i].capacity})
reward +=20
self.state +=20
contain.lv=0
elif contain.lv == 2 and self.camions[self.i].capacity >=2:
self.camions[self.i].reward += 50
self.camions[self.i].capacity += -2
self.capacities.update({self.camions[self.i].name : self.camions[self.i].capacity})
self.state +=10
reward += 50
contain.lv=0
elif contain.lv == 1 and self.camions[self.i].capacity >=1:
reward += 10
self.camions[self.i].reward +=5
self.camions[self.i].capacity += -1
self.capacities.update({self.camions[self.i].name : self.camions[self.i].capacity})
contain.lv=0
self.state+=1
elif contain.lv==4 and self.camions[self.i].capacity >=4:
reward +=50
self.camions[self.i].reward +=50
self.camions[self.i].capacity += -4
self.capacities.update({self.camions[self.i].name : self.camions[self.i].capacity})
self.state +=50
contain.lv=0
elif contain.lv==0 and self.camions[self.i].capacity >=4:
reward += -20
self.camions[self.i].reward +=-20
self.camions[self.i].capacity += 0
self.state += -20
contain.lv=0
if self.camions[self.i].capacity <=2:
self.camions[self.i].positions=(1,1)
self.positions.update({self.camions[self.i].name : (1,1)})
self.camions[self.i].capacity = 10
self.capacities.update({self.camions[self.i].name : self.camions[self.i].capacity})
if self.i ==1:
self.i= 0
self.i = 0
self.i = 0
elif self.i ==0:
self.i= 1
if self.shower_length <= 0:
done = True
else:
done = False
self.passo +=1
info = {}
return self.state,reward,done,info
def render(self, mode="human"):
BLACK = (0, 0, 0)
WHITE = (200, 200, 200)
WINDOW_HEIGHT = len(self.mapp.mapp[0]) *50
WINDOW_WIDTH = len(self.mapp.mapp[0]) *50
whiteC=pygame.image.load('white.jpg')
whiteC=pygame.transform.scale(whiteC,(50, 50))
greenC=pygame.image.load('green.jpg')
greenC=pygame.transform.scale(greenC,(50, 50))
yellowC=pygame.image.load('yellow.jpg')
yellowC=pygame.transform.scale(yellowC,(50, 50))
orangeC=pygame.image.load('orange.jpg')
orangeC=pygame.transform.scale(orangeC,(50, 50))
redC=pygame.image.load('red.jpg')
redC=pygame.transform.scale(redC,(50, 50))
gT=pygame.image.load('greenCamion.jpg')
gT=pygame.transform.scale(gT,(50, 50))
yT=pygame.image.load('yellowCamion.jpg')
yT=pygame.transform.scale(yT,(50, 50))
rT=pygame.image.load('redCamion.jpg')
rT=pygame.transform.scale(rT,(50, 50))
global SCREEN, CLOCK
pygame.init()
SCREEN = pygame.display.set_mode((WINDOW_WIDTH, WINDOW_HEIGHT))
CLOCK = pygame.time.Clock()
SCREEN.fill(BLACK)
pygame.draw.rect(SCREEN, WHITE, pygame.Rect( 10, 0, 50, 50))
blockSize = 50 #Set the size of the grid block
for i in range(0,len(self.mapp.mapp[0])):
for j in range(0,len(self.mapp.mapp[0])):
a=i*50
b=j*50
if self.mapp.mapp[i][j] == -1:
pygame.draw.rect(SCREEN, WHITE, pygame.Rect( a, b, 50, 50))
for c in self.camions :
if c.capacity > 6:
SCREEN.blit(gT, (c.position[0]*50, c.position[1]*50))
if c.capacity > 3 and c.capacity <= 6:
SCREEN.blit(yT, (c.position[0]*50, c.position[1]*50))
if c.capacity <= 3:
SCREEN.blit(rT, (c.position[0]*50, c.position[1]*50))
for contain in self.containers :
if contain.lv == 0:
SCREEN.blit(whiteC,(contain.position[0]*50 , contain.position[1]*50))
elif contain.lv == 1:
SCREEN.blit(greenC,(contain.position[0]*50 , contain.position[1]*50))
elif contain.lv == 2:
SCREEN.blit(yellowC,(contain.position[0]*50 , contain.position[1]*50))
elif contain.lv == 3:
SCREEN.blit(orangeC,(contain.position[0]*50 , contain.position[1]*50))
if contain.lv == 4:
SCREEN.blit(redC,(contain.position[0]*50 , contain.position[1]*50))
for x in range(0, WINDOW_WIDTH, blockSize):
for y in range(0, WINDOW_HEIGHT, blockSize):
rect = pygame.Rect(x, y, blockSize, blockSize)
pygame.draw.rect(SCREEN, WHITE, rect, 1)
pygame.display.flip()
view = pygame.surfarray.array3d(SCREEN)
view = view.transpose([1, 0, 2])
img_bgr = cv2.cvtColor(view, cv2.COLOR_RGB2BGR)
pygame.image.save(SCREEN, f"screenshot{self.cnt}.png")
self.cnt +=1
pygame.event.get()
def reset(self):
self.state = (15*15)/4
self.shower_length = 300
self.containers1.lv=3
self.containers2.lv=1
self.containers7.lv = 2
self.containers3.lv = 4
self.containers5.lv = 4
self.containers6.lv = 1
self.containers8.lv = 2
self.passo = 0
self.positions ={}
self.capacities ={}
self.camions= []
b = 0
for cont in self.containers:
b += cont.lv
reward = 0
nCamionFloat = 0
while b > 6:
b +=-10
nCamionFloat +=1
nCamionInt = int(nCamionFloat)
for ic in range(nCamionInt):
self.camions.append(Camion(1,1,None,ic))
for cam in self.camions:
self.positions[cam.name] = cam.position
self.capacities[cam.name] = 10
self.shower_length =60
self.cnt=0
self.i = 0
containers = [ containers1, containers2, containers3, containers4]
containers.append( containers1)
states = env.observation_space.shape
actions = env.action_space.n
b = env.action_space.sample()
我的模型
def build_model(states,actions):
model = tf.keras.Sequential([
keras.layers.Dense(64, input_shape=states),
keras.layers.LeakyReLU(0.24,),
keras.layers.Dense(64),
keras.layers.LeakyReLU(0.24,),
keras.layers.Dense(32),
keras.layers.LeakyReLU(0.24,),
keras.layers.Dense(16),
keras.layers.LeakyReLU(0.24,),
keras.layers.Dense(8),
keras.layers.LeakyReLU(0.24,),
keras.layers.Dense(actions, activation='linear'),
])
return model
model = build_model(states, actions)
model.compile(loss='mse', metrics=['accuracy'])
def build_agent(model, actions):
policy = GreedyQPolicy()
memory = SequentialMemory(limit=10000, window_length=1)
dqn = DQNAgent(model=model, memory=memory, policy=policy,nb_actions=actions, nb_steps_warmup=10, target_model_update=1e-2)
return dqn
dqn = build_agent(model, actions)
dqn.compile(tf.keras.optimizers.Adadelta(
learning_rate=0.1, rho=0.95, epsilon=1e-07, name='Adadelta'), metrics= ["accuracy"]
)
a =dqn.fit(env, nb_steps=5000, visualize=True, verbose=2,)
亏损从50开始到200
损失在 RL 中并不重要。非常高的损失其实很正常。在 RL 中,我们最关心奖励。
在强化学习中你通常不关心损失,而是回报。从 class 的名字看,它似乎也是一个多智能体强化学习问题,通常更难处理 w.r.t 单智能体问题。
我要尝试更改的第一件事是步数:5000 非常低。 尝试定义一个 episode,如果还没有定义,然后绘制 episode 结束时的累积奖励,并检查累积奖励是否随着 episode 的增加而增加。
这是检查奖励是否实际增加以及代理是否正在学习的最简洁的方法。