在 python 中出现以下错误:索引 100 超出尺寸为 100 的轴 0 的范围。通过其他解决方案,但我发现很难理解
Getting following error in python: index 100 is out of bounds for axis 0 with size 100. Went through other solutions but I found it hard to understand
我的agent的objective是控制电机转速的。这里所有的状态值都是电机的转速值,动作定义为 0(将转速降低 1)、1(无变化)和 2(将转速提高 1)。我在用Q学习
class SpeedControlEnv(Env): #we can access gym env
def __init__(self): #initializing actions, observation, spaces,
#Actions that we can take up, down, no change in speed
self.action_space = Discrete(3)
# Observation space to hold the current speed so our agent can take necessary action
#self.observation_space = Box(low=np.array([0]), high=np.array([100])) #box is used for continuous state space
self.observation_space = Discrete(100) #discrete observation space
#set start temp
self.state = 40 + random.randint(-30,40) #this is the start state for my agent.
# set time for our agent to complete the task before my motor blows
self.control_length = 60 # this is in seconds : So my agent has 'n' seconds to bring it in normal state
def step(self, action): #what actions agent can take in each steps
#Take action (0,1,2)
# 0 -1 = -1 #decreaase speed by 1
#1 -1 = 0 #no change
#2- 1 = 1 #increase speed by 1
self.state += action -1
#with each action reduce the time my agent has by 1
self.control_length -= 1
# assign reward
if self.state >= 40 and self.state <= 45:
reward = 1
else:
reward = -1
# check if shower is done
if self.control_length <= 0:
done = True
else:
done = False
#apply random noise
#self.state += random.randint(-3,3)
#set placeholder for information, Required by OpenAI
info = {}
#return step information
return self.state, reward, done , info
pass
def render(self): #visualization
pass
def reset(self): #reset after training or a episode
#Reset speed that is the state
self.state = 40 + random.randint(-30,40)
#Reset control time
self.control_length = 60
return self.state
#将我的超参数定义为
#初始化所有超参数
num_episodes = 50000 #agent plays step
#max_steps_per_episode = 60 #max steps agent can take in one episode
learning_rate = 0.1 #alpha
discount_rate = 0.99 #gamma
exploration_rate = 1 #epsilon
max_exploration_rate = 1 #max epsilon
min_exploration_rate = 0.01 #min epsilon
exploration_decay_rate = 0.01 #decaying rate of exploration
#我的Q学习代码如下
for episode in range(num_episodes):
state = env.reset()
done = False
reward_current_episode = 0
for step in range(env.control_length):
exploration_rate_threshold = random.uniform(0,1)
if exploration_rate_threshold > exploration_rate:
action = np.argmax(q_table[state,:])
else:
action = env.action_space.sample()
new_state,reward,done,info = env.step(action)
#Update Q table
q_table[state,action] = q_table[state, action]*(1-learning_rate) + learning_rate*(reward + discount_rate*np.max(q_table[new_state,:]))
state = new_state
reward_current_episode += reward
if done == True:
break
exploration_rate = min_exploration_rate + (max_exploration_rate - min_exploration_rate)*np.exp(-exploration_decay_rate*episode)
#append rewards from current episode to the list of rewards achieved from all episode
reward_from_all_episodes.append(reward_current_episode)
each_state.append(state)
#错误
IndexError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_6020/777037272.py in <module>
14
15 #Update Q table
---> 16 q_table[state,action] = q_table[state, action]*(1-learning_rate) + learning_rate*(reward + discount_rate*np.max(q_table[new_state,:]))
17
18 state = new_state
IndexError: index 100 is out of bounds for axis 0 with size 100
如果有人能向我解释为什么会出现此错误,那就太好了。我是编程和机器学习的新手。
您似乎在尝试为 numpy 数组编制索引。数组,以及 python 和一般编程中的几乎所有内容,都是从 0 开始索引的。这意味着它们的索引从 0 而不是 1 开始,这意味着具有 100 个项目的数组中的最大索引是 99.
我的agent的objective是控制电机转速的。这里所有的状态值都是电机的转速值,动作定义为 0(将转速降低 1)、1(无变化)和 2(将转速提高 1)。我在用Q学习
class SpeedControlEnv(Env): #we can access gym env
def __init__(self): #initializing actions, observation, spaces,
#Actions that we can take up, down, no change in speed
self.action_space = Discrete(3)
# Observation space to hold the current speed so our agent can take necessary action
#self.observation_space = Box(low=np.array([0]), high=np.array([100])) #box is used for continuous state space
self.observation_space = Discrete(100) #discrete observation space
#set start temp
self.state = 40 + random.randint(-30,40) #this is the start state for my agent.
# set time for our agent to complete the task before my motor blows
self.control_length = 60 # this is in seconds : So my agent has 'n' seconds to bring it in normal state
def step(self, action): #what actions agent can take in each steps
#Take action (0,1,2)
# 0 -1 = -1 #decreaase speed by 1
#1 -1 = 0 #no change
#2- 1 = 1 #increase speed by 1
self.state += action -1
#with each action reduce the time my agent has by 1
self.control_length -= 1
# assign reward
if self.state >= 40 and self.state <= 45:
reward = 1
else:
reward = -1
# check if shower is done
if self.control_length <= 0:
done = True
else:
done = False
#apply random noise
#self.state += random.randint(-3,3)
#set placeholder for information, Required by OpenAI
info = {}
#return step information
return self.state, reward, done , info
pass
def render(self): #visualization
pass
def reset(self): #reset after training or a episode
#Reset speed that is the state
self.state = 40 + random.randint(-30,40)
#Reset control time
self.control_length = 60
return self.state
#将我的超参数定义为 #初始化所有超参数
num_episodes = 50000 #agent plays step
#max_steps_per_episode = 60 #max steps agent can take in one episode
learning_rate = 0.1 #alpha
discount_rate = 0.99 #gamma
exploration_rate = 1 #epsilon
max_exploration_rate = 1 #max epsilon
min_exploration_rate = 0.01 #min epsilon
exploration_decay_rate = 0.01 #decaying rate of exploration
#我的Q学习代码如下
for episode in range(num_episodes):
state = env.reset()
done = False
reward_current_episode = 0
for step in range(env.control_length):
exploration_rate_threshold = random.uniform(0,1)
if exploration_rate_threshold > exploration_rate:
action = np.argmax(q_table[state,:])
else:
action = env.action_space.sample()
new_state,reward,done,info = env.step(action)
#Update Q table
q_table[state,action] = q_table[state, action]*(1-learning_rate) + learning_rate*(reward + discount_rate*np.max(q_table[new_state,:]))
state = new_state
reward_current_episode += reward
if done == True:
break
exploration_rate = min_exploration_rate + (max_exploration_rate - min_exploration_rate)*np.exp(-exploration_decay_rate*episode)
#append rewards from current episode to the list of rewards achieved from all episode
reward_from_all_episodes.append(reward_current_episode)
each_state.append(state)
#错误
IndexError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_6020/777037272.py in <module>
14
15 #Update Q table
---> 16 q_table[state,action] = q_table[state, action]*(1-learning_rate) + learning_rate*(reward + discount_rate*np.max(q_table[new_state,:]))
17
18 state = new_state
IndexError: index 100 is out of bounds for axis 0 with size 100
如果有人能向我解释为什么会出现此错误,那就太好了。我是编程和机器学习的新手。
您似乎在尝试为 numpy 数组编制索引。数组,以及 python 和一般编程中的几乎所有内容,都是从 0 开始索引的。这意味着它们的索引从 0 而不是 1 开始,这意味着具有 100 个项目的数组中的最大索引是 99.