强化学习中的多维动作 Space

Multidimensional Action Space in Reinforcement Learning

我的目标是暂时训练一个执行两个动作的代理(船)。 1. 选择它的航向角(下一步去哪里)和 2. 选择它的加速度(它是否会改变速度)。

然而,我似乎无法理解如何正确构建我的动作 space 和状态 space。我不断收到一个错误,我不知道如何修复。我一直在尝试使用 Space 包装器使其工作。

我使用下面的代码。

#Packages used
using ReinforcementLearning
using Flux #Needed for all the Neural Networks functionalities
using Plots
using DelimitedFiles #Needed to read all the txt files
using PolygonOps
using Random
using Intervals #not being used

#GeoBoundariesManipulation
include(joinpath(pwd(),"GeoBoundariesManipulation.jl"));
using .GeoBoundariesManipulation

#My problem's parameters
struct ShippingEnvParams
    gridworld_dims::Tuple{Int64,Int64} #Gridworld dimensions
    velocities::Vector{Int64} #available velocities from 6 knots to 20 knots
    acceleration::Vector{Int64} #available acceleration per step: -2, 0, 2
    heading::Vector{CartesianIndex{2}} #all heading manoeuvers
    punishment::Int64 #punishment per ordinary step
    out_of_grid_punishment::Int64 #punishment for going towards an island or out of grid bounds
    StartingPoint::CartesianIndex{2}
    GoalPoint::CartesianIndex{2}
    all_polygons::Vector{Vector{Tuple{Float64,Float64}}} #all the boundaries
end

function ShippingEnvParams(;
    gridworld_dims = (50,50),
    velocities = Vector((6:2:20)), 
    acceleration = Vector((-2:2:2)), 
    heading = [CartesianIndex(0,1);CartesianIndex(0,-1);CartesianIndex(-1,0);CartesianIndex(-1,1);CartesianIndex(-1,-1);CartesianIndex(1,-1);CartesianIndex(1,1);CartesianIndex(1,0)], 
    punishment = -5, 
    out_of_grid_punishment = -100, 
    StartingPoint = GeoBoundariesManipulation.GoalPointToCartesianIndex((-6.733535,61.997345),gridworld_dims[1],gridworld_dims[2]),
    EndingPoint = GeoBoundariesManipulation.GoalPointToCartesianIndex((-6.691500,61.535580),gridworld_dims[1],gridworld_dims[2]),
    AllPolygons = GeoBoundariesManipulation.load_files("finalboundaries") 
    )
    ShippingEnvParams(
        gridworld_dims,
        velocities,
        acceleration,
        heading,
        punishment,
        out_of_grid_punishment,
        StartingPoint,
        EndingPoint,
        AllPolygons
    )
end

###ENVIRONMENT CONSTRUCTION
#Instance
mutable struct ShippingEnv <: AbstractEnv
    params::ShippingEnvParams
    action_space::Space{Tuple{UnitRange{Int64},UnitRange{Int64}}}
    observation_space::Space{Tuple{UnitRange{Int64},UnitRange{Int64}}} #state_space
    state::Space{Tuple{Int64,Int64}} #state: (position,velocity)
    action::Space{Tuple{Int64,Int64}} #action: (heading_angle,acceleration)
    done::Bool #checks if agent has reached its goal
    position::CartesianIndex{2}
    time::Float64
    velocity::Int64
    distance::Float64
    reward::Union{Nothing,Float64} 
end

function ShippingEnv()
    params1 = ShippingEnvParams()
    env = ShippingEnv(
        params1,
        #Base.OneTo(length(params.heading)*length(params.velocities)),
        Space((1:length(params1.heading),1:length(params1.acceleration))), #Space: (1-number of heading options, 1-number of acceleration options)
        #Space([1..params.gridworld_dims[1]*params.gridworld_dims[2],minimum(params.velocities)..maximum(params.velocities)]),
        Space((1:(params1.gridworld_dims[1]*params1.gridworld_dims[2]),(1:length(params1.velocities)))), #(1-number of grid tiles, 1-number of velocity options)
        Space((LinearIndices((params1.gridworld_dims[1],params1.gridworld_dims[2]))[params1.StartingPoint],6)),
        Space((1,1)),
        false,
        params1.StartingPoint,
        0.0,
        params1.velocities[1],
        0.0,
        0.0
    )
    reset!(env)
    env
end


#Minimal interfaces implemented
RLBase.action_space(m::ShippingEnv) = m.action_space
RLBase.state_space(m::ShippingEnv) = m.observation_space
RLBase.reward(m::ShippingEnv) = m.done ? 0.0 : -1.0
RLBase.is_terminated(m::ShippingEnv) = m.done
RLBase.state(m::ShippingEnv) = m.state
#Random.seed!(m::ShippingEnv,seed) = Random.seed!(m.rng,seed)

function RLBase.reset!(m::ShippingEnv)
    m.position = m.params.StartingPoint
    m.velocity = m.params.velocities[1]
    m.done = false
    m.time = 0
    m.distance = 0
    nothing
end

#Function defining what happens every time an action is made
function (m::ShippingEnv)(a::Vector{Int64})
    nextstep(m,a[1],a[2])
end

function nextstep(m::ShippingEnv, head_action, acceleration)
    heading = m.params.heading[head_action]
    r = m.params.punishment #initialized punishment if everything's okay
    m.position += heading
    dist_covered = sqrt(heading[1]^2 + heading[2]^2)
    m.distance += dist_covered
    next_state_norm = (m.position[1]/m.params.gridworld_dims[1],m.position[2]/m.params.gridworld_dims[2])
    #Check if next state is out of bounds and assign appropriate punishment
    if m.position[1]<1 || m.position[1]>m.params.gridworld_dims[1] || m.position[2]<1 || m.position[2]>m.params.gridworld_dims[2] || inanypolygon(next_state_norm, m.params.all_polygons)
        r = m.params.out_of_grid_punishment #replace punishment
        m.position -= heading
        m.distance -= dist_covered
    end

    #Checking if velocity+acceleration is out of velocities' bounds
    if (m.velocity + acceleration > minimum(m.params.velocities)) && (m.velocity + acceleration < maximum(m.params.velocities))
        m.velocity += acceleration
    end
    
    m.time = dist_covered/m.velocity
    m.reward = r -m.time

    m.state[1] = LinearIndices((m.params.gridworld_dims[1],m.params.gridworld_dims[2]))[m.position]
    m.state[2] = m.velocity
end

env = ShippingEnv()
RLBase.test_runnable!(env)

这是我在 运行 test_runnable!(env) 之后得到的堆栈跟踪。

Error During Test at C:\Users\kwstas\.julia\packages\ReinforcementLearningBase\E7jI5\src\base.jl:266
  Got exception outside of a @test
  method not implemented
  Stacktrace:
    [1] error(s::String)
      @ Base .\error.jl:33
    [2] (::ShippingEnv)(action::Tuple{Int64, Int64}, player::DefaultPlayer) (repeats 2 times)
      @ ReinforcementLearningBase .\none:0
    [3] macro expansion
      @ C:\Users\kwstas\.julia\packages\ReinforcementLearningBase\E7jI5\src\base.jl:281 [inlined]
    [4] macro expansion
      @ C:\Users\kwstas\AppData\Local\Programs\Julia-1.7.1\share\julia\stdlib\v1.7\Test\src\Test.jl:1283 [inlined]
    [5] test_runnable!(env::ShippingEnv, n::Int64; rng::Random._GLOBAL_RNG)
      @ ReinforcementLearningBase C:\Users\kwstas\.julia\packages\ReinforcementLearningBase\E7jI5\src\base.jl:267
    [6] test_runnable! (repeats 2 times)
      @ C:\Users\kwstas\.julia\packages\ReinforcementLearningBase\E7jI5\src\base.jl:266 [inlined]
    [7] top-level scope
      @ c:\Users\kwstas\Desktop\ThesisDir\RL-New-Env.jl:138
    [8] eval
      @ .\boot.jl:373 [inlined]
    [9] include_string(mapexpr::typeof(REPL.softscope), mod::Module, code::String, filename::String)
      @ Base .\loading.jl:1196
   [10] invokelatest(::Any, ::Any, ::Vararg{Any}; kwargs::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
      @ Base .\essentials.jl:716
   [11] invokelatest(::Any, ::Any, ::Vararg{Any})
      @ Base .\essentials.jl:714
   [12] inlineeval(m::Module, code::String, code_line::Int64, code_column::Int64, file::String; softscope::Bool)
      @ VSCodeServer c:\Users\kwstas\.vscode\extensions\julialang.language-julia-1.6.17\scripts\packages\VSCodeServer\src\eval.jl:211
   [13] (::VSCodeServer.var"#65#69"{Bool, Bool, Module, String, Int64, Int64, String, VSCodeServer.ReplRunCodeRequestParams})()
      @ VSCodeServer c:\Users\kwstas\.vscode\extensions\julialang.language-julia-1.6.17\scripts\packages\VSCodeServer\src\eval.jl:155
   [14] withpath(f::VSCodeServer.var"#65#69"{Bool, Bool, Module, String, Int64, Int64, String, VSCodeServer.ReplRunCodeRequestParams}, path::String)
      @ VSCodeServer c:\Users\kwstas\.vscode\extensions\julialang.language-julia-1.6.17\scripts\packages\VSCodeServer\src\repl.jl:184
   [15] (::VSCodeServer.var"#64#68"{Bool, Bool, Bool, Module, String, Int64, Int64, String, VSCodeServer.ReplRunCodeRequestParams})()
      @ VSCodeServer c:\Users\kwstas\.vscode\extensions\julialang.language-julia-1.6.17\scripts\packages\VSCodeServer\src\eval.jl:153
   [16] hideprompt(f::VSCodeServer.var"#64#68"{Bool, Bool, Bool, Module, String, Int64, Int64, String, VSCodeServer.ReplRunCodeRequestParams})
      @ VSCodeServer c:\Users\kwstas\.vscode\extensions\julialang.language-julia-1.6.17\scripts\packages\VSCodeServer\src\repl.jl:36
   [17] (::VSCodeServer.var"#63#67"{Bool, Bool, Bool, Module, String, Int64, Int64, String, VSCodeServer.ReplRunCodeRequestParams})()
      @ VSCodeServer c:\Users\kwstas\.vscode\extensions\julialang.language-julia-1.6.17\scripts\packages\VSCodeServer\src\eval.jl:124
   [18] with_logstate(f::Function, logstate::Any)
      @ Base.CoreLogging .\logging.jl:511
   [19] with_logger
      @ .\logging.jl:623 [inlined]
   [20] (::VSCodeServer.var"#62#66"{VSCodeServer.ReplRunCodeRequestParams})()
      @ VSCodeServer c:\Users\kwstas\.vscode\extensions\julialang.language-julia-1.6.17\scripts\packages\VSCodeServer\src\eval.jl:201
   [21] #invokelatest#2
      @ .\essentials.jl:716 [inlined]
   [22] invokelatest(::Any)
      @ Base .\essentials.jl:714
   [23] macro expansion
      @ c:\Users\kwstas\.vscode\extensions\julialang.language-julia-1.6.17\scripts\packages\VSCodeServer\src\eval.jl:34 [inlined]
   [24] (::VSCodeServer.var"#60#61")()
      @ VSCodeServer .\task.jl:423
Test Summary:                  | Pass  Error  Total
random policy with ShippingEnv |    2      1      3
ERROR: Some tests did not pass: 2 passed, 0 failed, 1 errored, 0 broken.

我觉得错误信息已经解释的很清楚了

(::ShippingEnv)(action::Tuple{Int64, Int64}, player::DefaultPlayer)

这意味着没有找到这个方法。它是 (::ShippingEnv)(action::Tuple{Int, Int})

的回退

而您实施的是 (m::ShippingEnv)(a::Vector{Int64})。所以这里有两个选择,要么改为定义 (m::ShippingEnv)(a::Tuple{Int64, Int64}),要么定义类型为 Space([1:length(params1.heading),1:length(params1.acceleration)]) 的操作 space。注意 tuplevector 之间的区别。