强化学习中的多维动作 Space
Multidimensional Action Space in Reinforcement Learning
我的目标是暂时训练一个执行两个动作的代理(船)。 1. 选择它的航向角(下一步去哪里)和 2. 选择它的加速度(它是否会改变速度)。
然而,我似乎无法理解如何正确构建我的动作 space 和状态 space。我不断收到一个错误,我不知道如何修复。我一直在尝试使用 Space 包装器使其工作。
我使用下面的代码。
#Packages used
using ReinforcementLearning
using Flux #Needed for all the Neural Networks functionalities
using Plots
using DelimitedFiles #Needed to read all the txt files
using PolygonOps
using Random
using Intervals #not being used
#GeoBoundariesManipulation
include(joinpath(pwd(),"GeoBoundariesManipulation.jl"));
using .GeoBoundariesManipulation
#My problem's parameters
struct ShippingEnvParams
gridworld_dims::Tuple{Int64,Int64} #Gridworld dimensions
velocities::Vector{Int64} #available velocities from 6 knots to 20 knots
acceleration::Vector{Int64} #available acceleration per step: -2, 0, 2
heading::Vector{CartesianIndex{2}} #all heading manoeuvers
punishment::Int64 #punishment per ordinary step
out_of_grid_punishment::Int64 #punishment for going towards an island or out of grid bounds
StartingPoint::CartesianIndex{2}
GoalPoint::CartesianIndex{2}
all_polygons::Vector{Vector{Tuple{Float64,Float64}}} #all the boundaries
end
function ShippingEnvParams(;
gridworld_dims = (50,50),
velocities = Vector((6:2:20)),
acceleration = Vector((-2:2:2)),
heading = [CartesianIndex(0,1);CartesianIndex(0,-1);CartesianIndex(-1,0);CartesianIndex(-1,1);CartesianIndex(-1,-1);CartesianIndex(1,-1);CartesianIndex(1,1);CartesianIndex(1,0)],
punishment = -5,
out_of_grid_punishment = -100,
StartingPoint = GeoBoundariesManipulation.GoalPointToCartesianIndex((-6.733535,61.997345),gridworld_dims[1],gridworld_dims[2]),
EndingPoint = GeoBoundariesManipulation.GoalPointToCartesianIndex((-6.691500,61.535580),gridworld_dims[1],gridworld_dims[2]),
AllPolygons = GeoBoundariesManipulation.load_files("finalboundaries")
)
ShippingEnvParams(
gridworld_dims,
velocities,
acceleration,
heading,
punishment,
out_of_grid_punishment,
StartingPoint,
EndingPoint,
AllPolygons
)
end
###ENVIRONMENT CONSTRUCTION
#Instance
mutable struct ShippingEnv <: AbstractEnv
params::ShippingEnvParams
action_space::Space{Tuple{UnitRange{Int64},UnitRange{Int64}}}
observation_space::Space{Tuple{UnitRange{Int64},UnitRange{Int64}}} #state_space
state::Space{Tuple{Int64,Int64}} #state: (position,velocity)
action::Space{Tuple{Int64,Int64}} #action: (heading_angle,acceleration)
done::Bool #checks if agent has reached its goal
position::CartesianIndex{2}
time::Float64
velocity::Int64
distance::Float64
reward::Union{Nothing,Float64}
end
function ShippingEnv()
params1 = ShippingEnvParams()
env = ShippingEnv(
params1,
#Base.OneTo(length(params.heading)*length(params.velocities)),
Space((1:length(params1.heading),1:length(params1.acceleration))), #Space: (1-number of heading options, 1-number of acceleration options)
#Space([1..params.gridworld_dims[1]*params.gridworld_dims[2],minimum(params.velocities)..maximum(params.velocities)]),
Space((1:(params1.gridworld_dims[1]*params1.gridworld_dims[2]),(1:length(params1.velocities)))), #(1-number of grid tiles, 1-number of velocity options)
Space((LinearIndices((params1.gridworld_dims[1],params1.gridworld_dims[2]))[params1.StartingPoint],6)),
Space((1,1)),
false,
params1.StartingPoint,
0.0,
params1.velocities[1],
0.0,
0.0
)
reset!(env)
env
end
#Minimal interfaces implemented
RLBase.action_space(m::ShippingEnv) = m.action_space
RLBase.state_space(m::ShippingEnv) = m.observation_space
RLBase.reward(m::ShippingEnv) = m.done ? 0.0 : -1.0
RLBase.is_terminated(m::ShippingEnv) = m.done
RLBase.state(m::ShippingEnv) = m.state
#Random.seed!(m::ShippingEnv,seed) = Random.seed!(m.rng,seed)
function RLBase.reset!(m::ShippingEnv)
m.position = m.params.StartingPoint
m.velocity = m.params.velocities[1]
m.done = false
m.time = 0
m.distance = 0
nothing
end
#Function defining what happens every time an action is made
function (m::ShippingEnv)(a::Vector{Int64})
nextstep(m,a[1],a[2])
end
function nextstep(m::ShippingEnv, head_action, acceleration)
heading = m.params.heading[head_action]
r = m.params.punishment #initialized punishment if everything's okay
m.position += heading
dist_covered = sqrt(heading[1]^2 + heading[2]^2)
m.distance += dist_covered
next_state_norm = (m.position[1]/m.params.gridworld_dims[1],m.position[2]/m.params.gridworld_dims[2])
#Check if next state is out of bounds and assign appropriate punishment
if m.position[1]<1 || m.position[1]>m.params.gridworld_dims[1] || m.position[2]<1 || m.position[2]>m.params.gridworld_dims[2] || inanypolygon(next_state_norm, m.params.all_polygons)
r = m.params.out_of_grid_punishment #replace punishment
m.position -= heading
m.distance -= dist_covered
end
#Checking if velocity+acceleration is out of velocities' bounds
if (m.velocity + acceleration > minimum(m.params.velocities)) && (m.velocity + acceleration < maximum(m.params.velocities))
m.velocity += acceleration
end
m.time = dist_covered/m.velocity
m.reward = r -m.time
m.state[1] = LinearIndices((m.params.gridworld_dims[1],m.params.gridworld_dims[2]))[m.position]
m.state[2] = m.velocity
end
env = ShippingEnv()
RLBase.test_runnable!(env)
这是我在 运行 test_runnable!(env) 之后得到的堆栈跟踪。
Error During Test at C:\Users\kwstas\.julia\packages\ReinforcementLearningBase\E7jI5\src\base.jl:266
Got exception outside of a @test
method not implemented
Stacktrace:
[1] error(s::String)
@ Base .\error.jl:33
[2] (::ShippingEnv)(action::Tuple{Int64, Int64}, player::DefaultPlayer) (repeats 2 times)
@ ReinforcementLearningBase .\none:0
[3] macro expansion
@ C:\Users\kwstas\.julia\packages\ReinforcementLearningBase\E7jI5\src\base.jl:281 [inlined]
[4] macro expansion
@ C:\Users\kwstas\AppData\Local\Programs\Julia-1.7.1\share\julia\stdlib\v1.7\Test\src\Test.jl:1283 [inlined]
[5] test_runnable!(env::ShippingEnv, n::Int64; rng::Random._GLOBAL_RNG)
@ ReinforcementLearningBase C:\Users\kwstas\.julia\packages\ReinforcementLearningBase\E7jI5\src\base.jl:267
[6] test_runnable! (repeats 2 times)
@ C:\Users\kwstas\.julia\packages\ReinforcementLearningBase\E7jI5\src\base.jl:266 [inlined]
[7] top-level scope
@ c:\Users\kwstas\Desktop\ThesisDir\RL-New-Env.jl:138
[8] eval
@ .\boot.jl:373 [inlined]
[9] include_string(mapexpr::typeof(REPL.softscope), mod::Module, code::String, filename::String)
@ Base .\loading.jl:1196
[10] invokelatest(::Any, ::Any, ::Vararg{Any}; kwargs::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
@ Base .\essentials.jl:716
[11] invokelatest(::Any, ::Any, ::Vararg{Any})
@ Base .\essentials.jl:714
[12] inlineeval(m::Module, code::String, code_line::Int64, code_column::Int64, file::String; softscope::Bool)
@ VSCodeServer c:\Users\kwstas\.vscode\extensions\julialang.language-julia-1.6.17\scripts\packages\VSCodeServer\src\eval.jl:211
[13] (::VSCodeServer.var"#65#69"{Bool, Bool, Module, String, Int64, Int64, String, VSCodeServer.ReplRunCodeRequestParams})()
@ VSCodeServer c:\Users\kwstas\.vscode\extensions\julialang.language-julia-1.6.17\scripts\packages\VSCodeServer\src\eval.jl:155
[14] withpath(f::VSCodeServer.var"#65#69"{Bool, Bool, Module, String, Int64, Int64, String, VSCodeServer.ReplRunCodeRequestParams}, path::String)
@ VSCodeServer c:\Users\kwstas\.vscode\extensions\julialang.language-julia-1.6.17\scripts\packages\VSCodeServer\src\repl.jl:184
[15] (::VSCodeServer.var"#64#68"{Bool, Bool, Bool, Module, String, Int64, Int64, String, VSCodeServer.ReplRunCodeRequestParams})()
@ VSCodeServer c:\Users\kwstas\.vscode\extensions\julialang.language-julia-1.6.17\scripts\packages\VSCodeServer\src\eval.jl:153
[16] hideprompt(f::VSCodeServer.var"#64#68"{Bool, Bool, Bool, Module, String, Int64, Int64, String, VSCodeServer.ReplRunCodeRequestParams})
@ VSCodeServer c:\Users\kwstas\.vscode\extensions\julialang.language-julia-1.6.17\scripts\packages\VSCodeServer\src\repl.jl:36
[17] (::VSCodeServer.var"#63#67"{Bool, Bool, Bool, Module, String, Int64, Int64, String, VSCodeServer.ReplRunCodeRequestParams})()
@ VSCodeServer c:\Users\kwstas\.vscode\extensions\julialang.language-julia-1.6.17\scripts\packages\VSCodeServer\src\eval.jl:124
[18] with_logstate(f::Function, logstate::Any)
@ Base.CoreLogging .\logging.jl:511
[19] with_logger
@ .\logging.jl:623 [inlined]
[20] (::VSCodeServer.var"#62#66"{VSCodeServer.ReplRunCodeRequestParams})()
@ VSCodeServer c:\Users\kwstas\.vscode\extensions\julialang.language-julia-1.6.17\scripts\packages\VSCodeServer\src\eval.jl:201
[21] #invokelatest#2
@ .\essentials.jl:716 [inlined]
[22] invokelatest(::Any)
@ Base .\essentials.jl:714
[23] macro expansion
@ c:\Users\kwstas\.vscode\extensions\julialang.language-julia-1.6.17\scripts\packages\VSCodeServer\src\eval.jl:34 [inlined]
[24] (::VSCodeServer.var"#60#61")()
@ VSCodeServer .\task.jl:423
Test Summary: | Pass Error Total
random policy with ShippingEnv | 2 1 3
ERROR: Some tests did not pass: 2 passed, 0 failed, 1 errored, 0 broken.
我觉得错误信息已经解释的很清楚了
(::ShippingEnv)(action::Tuple{Int64, Int64}, player::DefaultPlayer)
这意味着没有找到这个方法。它是 (::ShippingEnv)(action::Tuple{Int, Int})
的回退
而您实施的是 (m::ShippingEnv)(a::Vector{Int64})
。所以这里有两个选择,要么改为定义 (m::ShippingEnv)(a::Tuple{Int64, Int64})
,要么定义类型为 Space([1:length(params1.heading),1:length(params1.acceleration)])
的操作 space。注意 tuple
和 vector
之间的区别。
我的目标是暂时训练一个执行两个动作的代理(船)。 1. 选择它的航向角(下一步去哪里)和 2. 选择它的加速度(它是否会改变速度)。
然而,我似乎无法理解如何正确构建我的动作 space 和状态 space。我不断收到一个错误,我不知道如何修复。我一直在尝试使用 Space 包装器使其工作。
我使用下面的代码。
#Packages used
using ReinforcementLearning
using Flux #Needed for all the Neural Networks functionalities
using Plots
using DelimitedFiles #Needed to read all the txt files
using PolygonOps
using Random
using Intervals #not being used
#GeoBoundariesManipulation
include(joinpath(pwd(),"GeoBoundariesManipulation.jl"));
using .GeoBoundariesManipulation
#My problem's parameters
struct ShippingEnvParams
gridworld_dims::Tuple{Int64,Int64} #Gridworld dimensions
velocities::Vector{Int64} #available velocities from 6 knots to 20 knots
acceleration::Vector{Int64} #available acceleration per step: -2, 0, 2
heading::Vector{CartesianIndex{2}} #all heading manoeuvers
punishment::Int64 #punishment per ordinary step
out_of_grid_punishment::Int64 #punishment for going towards an island or out of grid bounds
StartingPoint::CartesianIndex{2}
GoalPoint::CartesianIndex{2}
all_polygons::Vector{Vector{Tuple{Float64,Float64}}} #all the boundaries
end
function ShippingEnvParams(;
gridworld_dims = (50,50),
velocities = Vector((6:2:20)),
acceleration = Vector((-2:2:2)),
heading = [CartesianIndex(0,1);CartesianIndex(0,-1);CartesianIndex(-1,0);CartesianIndex(-1,1);CartesianIndex(-1,-1);CartesianIndex(1,-1);CartesianIndex(1,1);CartesianIndex(1,0)],
punishment = -5,
out_of_grid_punishment = -100,
StartingPoint = GeoBoundariesManipulation.GoalPointToCartesianIndex((-6.733535,61.997345),gridworld_dims[1],gridworld_dims[2]),
EndingPoint = GeoBoundariesManipulation.GoalPointToCartesianIndex((-6.691500,61.535580),gridworld_dims[1],gridworld_dims[2]),
AllPolygons = GeoBoundariesManipulation.load_files("finalboundaries")
)
ShippingEnvParams(
gridworld_dims,
velocities,
acceleration,
heading,
punishment,
out_of_grid_punishment,
StartingPoint,
EndingPoint,
AllPolygons
)
end
###ENVIRONMENT CONSTRUCTION
#Instance
mutable struct ShippingEnv <: AbstractEnv
params::ShippingEnvParams
action_space::Space{Tuple{UnitRange{Int64},UnitRange{Int64}}}
observation_space::Space{Tuple{UnitRange{Int64},UnitRange{Int64}}} #state_space
state::Space{Tuple{Int64,Int64}} #state: (position,velocity)
action::Space{Tuple{Int64,Int64}} #action: (heading_angle,acceleration)
done::Bool #checks if agent has reached its goal
position::CartesianIndex{2}
time::Float64
velocity::Int64
distance::Float64
reward::Union{Nothing,Float64}
end
function ShippingEnv()
params1 = ShippingEnvParams()
env = ShippingEnv(
params1,
#Base.OneTo(length(params.heading)*length(params.velocities)),
Space((1:length(params1.heading),1:length(params1.acceleration))), #Space: (1-number of heading options, 1-number of acceleration options)
#Space([1..params.gridworld_dims[1]*params.gridworld_dims[2],minimum(params.velocities)..maximum(params.velocities)]),
Space((1:(params1.gridworld_dims[1]*params1.gridworld_dims[2]),(1:length(params1.velocities)))), #(1-number of grid tiles, 1-number of velocity options)
Space((LinearIndices((params1.gridworld_dims[1],params1.gridworld_dims[2]))[params1.StartingPoint],6)),
Space((1,1)),
false,
params1.StartingPoint,
0.0,
params1.velocities[1],
0.0,
0.0
)
reset!(env)
env
end
#Minimal interfaces implemented
RLBase.action_space(m::ShippingEnv) = m.action_space
RLBase.state_space(m::ShippingEnv) = m.observation_space
RLBase.reward(m::ShippingEnv) = m.done ? 0.0 : -1.0
RLBase.is_terminated(m::ShippingEnv) = m.done
RLBase.state(m::ShippingEnv) = m.state
#Random.seed!(m::ShippingEnv,seed) = Random.seed!(m.rng,seed)
function RLBase.reset!(m::ShippingEnv)
m.position = m.params.StartingPoint
m.velocity = m.params.velocities[1]
m.done = false
m.time = 0
m.distance = 0
nothing
end
#Function defining what happens every time an action is made
function (m::ShippingEnv)(a::Vector{Int64})
nextstep(m,a[1],a[2])
end
function nextstep(m::ShippingEnv, head_action, acceleration)
heading = m.params.heading[head_action]
r = m.params.punishment #initialized punishment if everything's okay
m.position += heading
dist_covered = sqrt(heading[1]^2 + heading[2]^2)
m.distance += dist_covered
next_state_norm = (m.position[1]/m.params.gridworld_dims[1],m.position[2]/m.params.gridworld_dims[2])
#Check if next state is out of bounds and assign appropriate punishment
if m.position[1]<1 || m.position[1]>m.params.gridworld_dims[1] || m.position[2]<1 || m.position[2]>m.params.gridworld_dims[2] || inanypolygon(next_state_norm, m.params.all_polygons)
r = m.params.out_of_grid_punishment #replace punishment
m.position -= heading
m.distance -= dist_covered
end
#Checking if velocity+acceleration is out of velocities' bounds
if (m.velocity + acceleration > minimum(m.params.velocities)) && (m.velocity + acceleration < maximum(m.params.velocities))
m.velocity += acceleration
end
m.time = dist_covered/m.velocity
m.reward = r -m.time
m.state[1] = LinearIndices((m.params.gridworld_dims[1],m.params.gridworld_dims[2]))[m.position]
m.state[2] = m.velocity
end
env = ShippingEnv()
RLBase.test_runnable!(env)
这是我在 运行 test_runnable!(env) 之后得到的堆栈跟踪。
Error During Test at C:\Users\kwstas\.julia\packages\ReinforcementLearningBase\E7jI5\src\base.jl:266
Got exception outside of a @test
method not implemented
Stacktrace:
[1] error(s::String)
@ Base .\error.jl:33
[2] (::ShippingEnv)(action::Tuple{Int64, Int64}, player::DefaultPlayer) (repeats 2 times)
@ ReinforcementLearningBase .\none:0
[3] macro expansion
@ C:\Users\kwstas\.julia\packages\ReinforcementLearningBase\E7jI5\src\base.jl:281 [inlined]
[4] macro expansion
@ C:\Users\kwstas\AppData\Local\Programs\Julia-1.7.1\share\julia\stdlib\v1.7\Test\src\Test.jl:1283 [inlined]
[5] test_runnable!(env::ShippingEnv, n::Int64; rng::Random._GLOBAL_RNG)
@ ReinforcementLearningBase C:\Users\kwstas\.julia\packages\ReinforcementLearningBase\E7jI5\src\base.jl:267
[6] test_runnable! (repeats 2 times)
@ C:\Users\kwstas\.julia\packages\ReinforcementLearningBase\E7jI5\src\base.jl:266 [inlined]
[7] top-level scope
@ c:\Users\kwstas\Desktop\ThesisDir\RL-New-Env.jl:138
[8] eval
@ .\boot.jl:373 [inlined]
[9] include_string(mapexpr::typeof(REPL.softscope), mod::Module, code::String, filename::String)
@ Base .\loading.jl:1196
[10] invokelatest(::Any, ::Any, ::Vararg{Any}; kwargs::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
@ Base .\essentials.jl:716
[11] invokelatest(::Any, ::Any, ::Vararg{Any})
@ Base .\essentials.jl:714
[12] inlineeval(m::Module, code::String, code_line::Int64, code_column::Int64, file::String; softscope::Bool)
@ VSCodeServer c:\Users\kwstas\.vscode\extensions\julialang.language-julia-1.6.17\scripts\packages\VSCodeServer\src\eval.jl:211
[13] (::VSCodeServer.var"#65#69"{Bool, Bool, Module, String, Int64, Int64, String, VSCodeServer.ReplRunCodeRequestParams})()
@ VSCodeServer c:\Users\kwstas\.vscode\extensions\julialang.language-julia-1.6.17\scripts\packages\VSCodeServer\src\eval.jl:155
[14] withpath(f::VSCodeServer.var"#65#69"{Bool, Bool, Module, String, Int64, Int64, String, VSCodeServer.ReplRunCodeRequestParams}, path::String)
@ VSCodeServer c:\Users\kwstas\.vscode\extensions\julialang.language-julia-1.6.17\scripts\packages\VSCodeServer\src\repl.jl:184
[15] (::VSCodeServer.var"#64#68"{Bool, Bool, Bool, Module, String, Int64, Int64, String, VSCodeServer.ReplRunCodeRequestParams})()
@ VSCodeServer c:\Users\kwstas\.vscode\extensions\julialang.language-julia-1.6.17\scripts\packages\VSCodeServer\src\eval.jl:153
[16] hideprompt(f::VSCodeServer.var"#64#68"{Bool, Bool, Bool, Module, String, Int64, Int64, String, VSCodeServer.ReplRunCodeRequestParams})
@ VSCodeServer c:\Users\kwstas\.vscode\extensions\julialang.language-julia-1.6.17\scripts\packages\VSCodeServer\src\repl.jl:36
[17] (::VSCodeServer.var"#63#67"{Bool, Bool, Bool, Module, String, Int64, Int64, String, VSCodeServer.ReplRunCodeRequestParams})()
@ VSCodeServer c:\Users\kwstas\.vscode\extensions\julialang.language-julia-1.6.17\scripts\packages\VSCodeServer\src\eval.jl:124
[18] with_logstate(f::Function, logstate::Any)
@ Base.CoreLogging .\logging.jl:511
[19] with_logger
@ .\logging.jl:623 [inlined]
[20] (::VSCodeServer.var"#62#66"{VSCodeServer.ReplRunCodeRequestParams})()
@ VSCodeServer c:\Users\kwstas\.vscode\extensions\julialang.language-julia-1.6.17\scripts\packages\VSCodeServer\src\eval.jl:201
[21] #invokelatest#2
@ .\essentials.jl:716 [inlined]
[22] invokelatest(::Any)
@ Base .\essentials.jl:714
[23] macro expansion
@ c:\Users\kwstas\.vscode\extensions\julialang.language-julia-1.6.17\scripts\packages\VSCodeServer\src\eval.jl:34 [inlined]
[24] (::VSCodeServer.var"#60#61")()
@ VSCodeServer .\task.jl:423
Test Summary: | Pass Error Total
random policy with ShippingEnv | 2 1 3
ERROR: Some tests did not pass: 2 passed, 0 failed, 1 errored, 0 broken.
我觉得错误信息已经解释的很清楚了
(::ShippingEnv)(action::Tuple{Int64, Int64}, player::DefaultPlayer)
这意味着没有找到这个方法。它是 (::ShippingEnv)(action::Tuple{Int, Int})
而您实施的是 (m::ShippingEnv)(a::Vector{Int64})
。所以这里有两个选择,要么改为定义 (m::ShippingEnv)(a::Tuple{Int64, Int64})
,要么定义类型为 Space([1:length(params1.heading),1:length(params1.acceleration)])
的操作 space。注意 tuple
和 vector
之间的区别。