具有障碍物覆盖算法的网格
Grid with obstacles coverage algorithm
我必须找到一个机器人代理的算法来执行以下操作(对不起,我真的不知道怎么称呼它):
机器人在有障碍物的10x10网格上(每个方格要么是障碍物要么是可穿越的)
机器人有碰撞传感器:当机器人撞到障碍物时激活。
网格上有胡萝卜不断生长。有快速增长的方块和缓慢增长的方块。
每走一步,机器人可以:前进或向右或向左转90°或原地不动
事先不知道胡萝卜和障碍物的位置
机器人移动时胡萝卜继续生长(即使在收获后)
大多数不是障碍物的方格都长出胡萝卜
机器人不知道方块长得快还是慢
每个方格中可以有 0 到 20 个胡萝卜。在每个时间实例中,正方形的胡萝卜数量增加
的概率为 p = 0.01(对于快速增长的正方形,p = 0.02)
您可以测量收获的胡萝卜量。
目标是在 2000 步中获得最多的胡萝卜。
有 lazy/easy 的方法吗?
到目前为止,我有点迷茫,因为它不是解决迷宫的问题。它会是一种洪水填充算法吗?有没有更简单的?
我不一定要搜索 "solve" 问题,而是尽可能地寻找一个简单的近似值
要找到一个具有完美策略的机器人实现确实有点工作,因为它不知道食物来源的位置和数量。
机器人的任何给定策略可能不会在每个 运行 中产生最大可能的收获。所以问题是,哪种策略在多次模拟中最成功 运行s.
要为正方形类型(P(fastFood)、P(slowFood)、P(obstacle))的给定统计分布找到合适的策略,可能会想到以下想法:
让 Bot(npatch) 成为一个寻找 npatch 食物点的机器人。其策略是先吃掉在第一个食物块中找到的东西,然后再搜索第二个食物块,依此类推。当它访问 npatch 食物来源(或没有找到更多的食物补丁)时,它 returns 找到第一个并重新收获。
这个 class 的机器人 (Bot(npatch)) 现在可以在统计上相关的模拟次数 运行 中相互竞争。最佳机器人是比赛的获胜者。
这种方法可以被认为是受遗传算法的启发,但没有混合任何基因,而是简单地迭代所有基因(1..npatch)。也许有人有想法如何将这个想法转化为完全遗传算法。这可能涉及转向 Bot(npatch,searchStrategy),然后使用多个基因来应用遗传算法。
每当模拟参数发生变化时,就必须重复比赛,显然这取决于世界上食物块的数量,如果某些食物可能会或可能不会再去寻找另一个食物块。补丁已经知道了。
下面的代码是用 F# 编写的,是该问题的模拟器(如果我的所有要求都正确,那就是...)。写一个新的机器人就像写一个函数一样简单,然后传递给模拟器。
对于那些想尝试自己的机器人的人来说,这是我的彩蛋。
我写的 2 个机器人被称为 "marvinRobot",它做 Marvin 会做的事情,"lazyRobot" 一个机器人在它找到的第一个食物来源上扎营。
type Square =
| Empty
| Obstacle
| Food of float * (float -> float) // available * growth
| Unknown
let rnd = new System.Random()
let grow p a =
let r = rnd.NextDouble()
if r < p then a + 1.0
else a
let slowGrowth a = grow 0.01 a
let fastGrowth a = grow 0.02 a
let eatPerTick = 1.0
let maxFoodPerSquare = 20.0
let randomPick values =
let count = List.length values
let r = rnd.Next(0,count-1)
values.Item(r)
type World = Square[,]
let randomSquare pobstacle pfood =
let r = rnd.NextDouble()
match r with
| x1 when x1 < pobstacle -> Obstacle
| x2 when x2 < (pobstacle + pfood) && x2 >= pobstacle ->
Food(rnd.NextDouble() * maxFoodPerSquare, randomPick [slowGrowth; fastGrowth])
| _ -> Empty
let createRandomWorld n pobstacle pfood =
Array2D.init n n (fun col row -> randomSquare pobstacle pfood)
let createUnknownWorld n =
Array2D.create n n Unknown
type Position = { Column : int; Row : int }
type RoboState = { Memory : Square[,]; Pos : Position; Heading : Position }
type RoboAction =
| TurnRight
| TurnLeft
| MoveOne
| Eat
| Idle
type RoboActor = World -> RoboState -> RoboAction
let right heading : Position =
match heading with
| { Column = 0; Row = 1 } -> { Column = -1; Row = 0 }
| { Column = -1; Row = 0 } -> { Column = 0; Row = -1 }
| { Column = 0; Row = -1 } -> { Column = 1; Row = 0 }
| { Column = 1; Row = 0 } -> { Column = 0; Row = 1 }
| _ -> failwith "Invalid heading!"
let left heading : Position =
match heading with
| { Column = -1; Row = 0 } -> { Column = 0; Row = 1 }
| { Column = 0; Row = -1 } -> { Column = -1; Row = 0 }
| { Column = 1; Row = 0 } -> { Column = 0; Row = -1 }
| { Column = 0; Row = 1 } -> { Column = 1; Row = 0 }
| _ -> failwith "Invalid heading!"
let checkAccess n position =
let inRange v = v >= 0 && v < n
(inRange position.Column) && (inRange position.Row)
let tickWorld world =
world
|> Array2D.map
(fun sq ->
match sq with
| Empty -> Empty
| Obstacle -> Obstacle
| Food(a,r) -> Food(min (r a) maxFoodPerSquare, r)
| Unknown -> Unknown
)
let rec step robot world roboState i imax acc =
if i < imax then
let action = robot world roboState
match action with
| TurnRight ->
let rs1 = { roboState with Heading = right roboState.Heading }
let wrld1 = tickWorld world
step robot wrld1 rs1 (i+1) imax acc
| TurnLeft ->
let rs1 = { roboState with Heading = left roboState.Heading }
let wrld1 = tickWorld world
step robot wrld1 rs1 (i+1) imax acc
| MoveOne ->
let rs1 =
let c =
{ Column = roboState.Pos.Column + roboState.Heading.Column
Row = roboState.Pos.Row + roboState.Heading.Row
}
if checkAccess (Array2D.length1 world) c
then
match world.[c.Column,c.Row] with
| Obstacle ->
roboState.Memory.[c.Column,c.Row] <- Obstacle
roboState
| _ -> { roboState with Pos = c }
else
roboState
let wrld1 = tickWorld world
step robot wrld1 rs1 (i+1) imax acc
| Eat ->
let eat,acc1 =
match world.[roboState.Pos.Column,roboState.Pos.Row] with
| Empty -> Empty,acc
| Obstacle -> Obstacle,acc
| Food(a,r) ->
let eaten = if a >= eatPerTick then eatPerTick else 0.0
printfn "eating %f carrots" eaten
Food(a - eaten, r),eaten + acc
| Unknown -> Unknown,acc
world.[roboState.Pos.Column,roboState.Pos.Row] <- eat
let wrld1 = tickWorld world
step robot wrld1 roboState (i+1) imax acc1
| Idle ->
step robot (tickWorld world) roboState (i+1) imax acc
else
acc
let initRoboState n =
{ Memory = createUnknownWorld n;
Pos = { Column = 0; Row = 0;};
Heading = {Column = 1; Row = 0}
}
let simulate n pobstacle pfood imax robot =
let w0 = createRandomWorld n pobstacle pfood
let r0 = initRoboState n
printfn "World: %A" w0
printfn "Initial Robo State: %A" r0
let result = step robot w0 r0 0 imax 0.0
printfn "Final Robo State: %A" r0
result
// Not that Marvin would care, but the rule for this simulator is that the
// bot may only inspect the square in the world at the current position.
// This means, IT CANNOT SEE the neighboring squares.
// This means, that if there is a obstacle next to current square,
// it costs a simulation tick to find out, trying to bump against it.
// Any access to other squares in world is considered cheating!
// world is passed in spite of all said above to allow for alternate rules.
let marvinRobot world roboState =
Idle
// Tries to find a square with food, then stays there, eating when there is something to eat.
let lazyRobot (world : World) (roboState : RoboState) =
let search() =
let status action : RoboAction =
match action with
| TurnLeft -> printfn "%A TurnLeft at %A (heading: %A)" world.[roboState.Pos.Column,roboState.Pos.Row] roboState.Pos roboState.Heading
| TurnRight -> printfn "%ATurnRight at %A (heading: %A)" world.[roboState.Pos.Column,roboState.Pos.Row] roboState.Pos roboState.Heading
| MoveOne -> printfn "%A MoveOne at %A (heading: %A)" world.[roboState.Pos.Column,roboState.Pos.Row] roboState.Pos roboState.Heading
| Idle -> printfn "%A Idle at %A (heading: %A)" world.[roboState.Pos.Column,roboState.Pos.Row] roboState.Pos roboState.Heading
| Eat -> printfn "%A Eat at %A (heading: %A)" world.[roboState.Pos.Column,roboState.Pos.Row] roboState.Pos roboState.Heading
action
let neighbors =
[ roboState.Heading, MoveOne;
(roboState.Heading |> right),TurnRight;
(roboState.Heading |> left),TurnLeft;
(roboState.Heading |> right |> right),TurnRight
]
|> List.map (fun (p,a) -> (p.Column,p.Row),a)
|> List.map (fun ((c,r),a) -> (roboState.Pos.Column + c,roboState.Pos.Row + r),a)
|> List.filter (fun ((c,r),a) -> checkAccess (Array2D.length1 world){Position.Column = c; Row = r})
|> List.sortBy (fun ((c,r),a) -> match roboState.Memory.[c,r] with | Food(_,_) -> 0 | Unknown -> 1 | Empty -> 2 | Obstacle -> 3)
|> List.map (fun ((c,r),a) -> { Column = c; Row = r},a)
if neighbors.IsEmpty then failwith "It's a trap!" // can happen if bot is surrounded by obstacles, e.g. in a corner
else
let p,a = neighbors.Head
status a
roboState.Memory.[roboState.Pos.Column, roboState.Pos.Row] <-
world.[roboState.Pos.Column,roboState.Pos.Row]
match world.[roboState.Pos.Column,roboState.Pos.Row] with
| Food(a,_) ->
printfn "Found food at %A" roboState.Pos
Eat
| _ ->
search()
//simulate 10 0.1 0.05 2000 marvinRobot
simulate 10 0.1 0.1 2000 lazyRobot
最后一个提示:如果您使用 0.0 个食物块进行模拟,您的机器人应该已经访问了地图上的所有方块。如果它做不到这一点,那它肯定不是一个好的机器人 ;)
我必须找到一个机器人代理的算法来执行以下操作(对不起,我真的不知道怎么称呼它):
机器人在有障碍物的10x10网格上(每个方格要么是障碍物要么是可穿越的)
机器人有碰撞传感器:当机器人撞到障碍物时激活。
网格上有胡萝卜不断生长。有快速增长的方块和缓慢增长的方块。
每走一步,机器人可以:前进或向右或向左转90°或原地不动
事先不知道胡萝卜和障碍物的位置
机器人移动时胡萝卜继续生长(即使在收获后)
大多数不是障碍物的方格都长出胡萝卜
机器人不知道方块长得快还是慢
每个方格中可以有 0 到 20 个胡萝卜。在每个时间实例中,正方形的胡萝卜数量增加
的概率为 p = 0.01(对于快速增长的正方形,p = 0.02)
您可以测量收获的胡萝卜量。
目标是在 2000 步中获得最多的胡萝卜。
有 lazy/easy 的方法吗?
到目前为止,我有点迷茫,因为它不是解决迷宫的问题。它会是一种洪水填充算法吗?有没有更简单的?
我不一定要搜索 "solve" 问题,而是尽可能地寻找一个简单的近似值
要找到一个具有完美策略的机器人实现确实有点工作,因为它不知道食物来源的位置和数量。
机器人的任何给定策略可能不会在每个 运行 中产生最大可能的收获。所以问题是,哪种策略在多次模拟中最成功 运行s.
要为正方形类型(P(fastFood)、P(slowFood)、P(obstacle))的给定统计分布找到合适的策略,可能会想到以下想法:
让 Bot(npatch) 成为一个寻找 npatch 食物点的机器人。其策略是先吃掉在第一个食物块中找到的东西,然后再搜索第二个食物块,依此类推。当它访问 npatch 食物来源(或没有找到更多的食物补丁)时,它 returns 找到第一个并重新收获。
这个 class 的机器人 (Bot(npatch)) 现在可以在统计上相关的模拟次数 运行 中相互竞争。最佳机器人是比赛的获胜者。
这种方法可以被认为是受遗传算法的启发,但没有混合任何基因,而是简单地迭代所有基因(1..npatch)。也许有人有想法如何将这个想法转化为完全遗传算法。这可能涉及转向 Bot(npatch,searchStrategy),然后使用多个基因来应用遗传算法。
每当模拟参数发生变化时,就必须重复比赛,显然这取决于世界上食物块的数量,如果某些食物可能会或可能不会再去寻找另一个食物块。补丁已经知道了。
下面的代码是用 F# 编写的,是该问题的模拟器(如果我的所有要求都正确,那就是...)。写一个新的机器人就像写一个函数一样简单,然后传递给模拟器。
对于那些想尝试自己的机器人的人来说,这是我的彩蛋。
我写的 2 个机器人被称为 "marvinRobot",它做 Marvin 会做的事情,"lazyRobot" 一个机器人在它找到的第一个食物来源上扎营。
type Square =
| Empty
| Obstacle
| Food of float * (float -> float) // available * growth
| Unknown
let rnd = new System.Random()
let grow p a =
let r = rnd.NextDouble()
if r < p then a + 1.0
else a
let slowGrowth a = grow 0.01 a
let fastGrowth a = grow 0.02 a
let eatPerTick = 1.0
let maxFoodPerSquare = 20.0
let randomPick values =
let count = List.length values
let r = rnd.Next(0,count-1)
values.Item(r)
type World = Square[,]
let randomSquare pobstacle pfood =
let r = rnd.NextDouble()
match r with
| x1 when x1 < pobstacle -> Obstacle
| x2 when x2 < (pobstacle + pfood) && x2 >= pobstacle ->
Food(rnd.NextDouble() * maxFoodPerSquare, randomPick [slowGrowth; fastGrowth])
| _ -> Empty
let createRandomWorld n pobstacle pfood =
Array2D.init n n (fun col row -> randomSquare pobstacle pfood)
let createUnknownWorld n =
Array2D.create n n Unknown
type Position = { Column : int; Row : int }
type RoboState = { Memory : Square[,]; Pos : Position; Heading : Position }
type RoboAction =
| TurnRight
| TurnLeft
| MoveOne
| Eat
| Idle
type RoboActor = World -> RoboState -> RoboAction
let right heading : Position =
match heading with
| { Column = 0; Row = 1 } -> { Column = -1; Row = 0 }
| { Column = -1; Row = 0 } -> { Column = 0; Row = -1 }
| { Column = 0; Row = -1 } -> { Column = 1; Row = 0 }
| { Column = 1; Row = 0 } -> { Column = 0; Row = 1 }
| _ -> failwith "Invalid heading!"
let left heading : Position =
match heading with
| { Column = -1; Row = 0 } -> { Column = 0; Row = 1 }
| { Column = 0; Row = -1 } -> { Column = -1; Row = 0 }
| { Column = 1; Row = 0 } -> { Column = 0; Row = -1 }
| { Column = 0; Row = 1 } -> { Column = 1; Row = 0 }
| _ -> failwith "Invalid heading!"
let checkAccess n position =
let inRange v = v >= 0 && v < n
(inRange position.Column) && (inRange position.Row)
let tickWorld world =
world
|> Array2D.map
(fun sq ->
match sq with
| Empty -> Empty
| Obstacle -> Obstacle
| Food(a,r) -> Food(min (r a) maxFoodPerSquare, r)
| Unknown -> Unknown
)
let rec step robot world roboState i imax acc =
if i < imax then
let action = robot world roboState
match action with
| TurnRight ->
let rs1 = { roboState with Heading = right roboState.Heading }
let wrld1 = tickWorld world
step robot wrld1 rs1 (i+1) imax acc
| TurnLeft ->
let rs1 = { roboState with Heading = left roboState.Heading }
let wrld1 = tickWorld world
step robot wrld1 rs1 (i+1) imax acc
| MoveOne ->
let rs1 =
let c =
{ Column = roboState.Pos.Column + roboState.Heading.Column
Row = roboState.Pos.Row + roboState.Heading.Row
}
if checkAccess (Array2D.length1 world) c
then
match world.[c.Column,c.Row] with
| Obstacle ->
roboState.Memory.[c.Column,c.Row] <- Obstacle
roboState
| _ -> { roboState with Pos = c }
else
roboState
let wrld1 = tickWorld world
step robot wrld1 rs1 (i+1) imax acc
| Eat ->
let eat,acc1 =
match world.[roboState.Pos.Column,roboState.Pos.Row] with
| Empty -> Empty,acc
| Obstacle -> Obstacle,acc
| Food(a,r) ->
let eaten = if a >= eatPerTick then eatPerTick else 0.0
printfn "eating %f carrots" eaten
Food(a - eaten, r),eaten + acc
| Unknown -> Unknown,acc
world.[roboState.Pos.Column,roboState.Pos.Row] <- eat
let wrld1 = tickWorld world
step robot wrld1 roboState (i+1) imax acc1
| Idle ->
step robot (tickWorld world) roboState (i+1) imax acc
else
acc
let initRoboState n =
{ Memory = createUnknownWorld n;
Pos = { Column = 0; Row = 0;};
Heading = {Column = 1; Row = 0}
}
let simulate n pobstacle pfood imax robot =
let w0 = createRandomWorld n pobstacle pfood
let r0 = initRoboState n
printfn "World: %A" w0
printfn "Initial Robo State: %A" r0
let result = step robot w0 r0 0 imax 0.0
printfn "Final Robo State: %A" r0
result
// Not that Marvin would care, but the rule for this simulator is that the
// bot may only inspect the square in the world at the current position.
// This means, IT CANNOT SEE the neighboring squares.
// This means, that if there is a obstacle next to current square,
// it costs a simulation tick to find out, trying to bump against it.
// Any access to other squares in world is considered cheating!
// world is passed in spite of all said above to allow for alternate rules.
let marvinRobot world roboState =
Idle
// Tries to find a square with food, then stays there, eating when there is something to eat.
let lazyRobot (world : World) (roboState : RoboState) =
let search() =
let status action : RoboAction =
match action with
| TurnLeft -> printfn "%A TurnLeft at %A (heading: %A)" world.[roboState.Pos.Column,roboState.Pos.Row] roboState.Pos roboState.Heading
| TurnRight -> printfn "%ATurnRight at %A (heading: %A)" world.[roboState.Pos.Column,roboState.Pos.Row] roboState.Pos roboState.Heading
| MoveOne -> printfn "%A MoveOne at %A (heading: %A)" world.[roboState.Pos.Column,roboState.Pos.Row] roboState.Pos roboState.Heading
| Idle -> printfn "%A Idle at %A (heading: %A)" world.[roboState.Pos.Column,roboState.Pos.Row] roboState.Pos roboState.Heading
| Eat -> printfn "%A Eat at %A (heading: %A)" world.[roboState.Pos.Column,roboState.Pos.Row] roboState.Pos roboState.Heading
action
let neighbors =
[ roboState.Heading, MoveOne;
(roboState.Heading |> right),TurnRight;
(roboState.Heading |> left),TurnLeft;
(roboState.Heading |> right |> right),TurnRight
]
|> List.map (fun (p,a) -> (p.Column,p.Row),a)
|> List.map (fun ((c,r),a) -> (roboState.Pos.Column + c,roboState.Pos.Row + r),a)
|> List.filter (fun ((c,r),a) -> checkAccess (Array2D.length1 world){Position.Column = c; Row = r})
|> List.sortBy (fun ((c,r),a) -> match roboState.Memory.[c,r] with | Food(_,_) -> 0 | Unknown -> 1 | Empty -> 2 | Obstacle -> 3)
|> List.map (fun ((c,r),a) -> { Column = c; Row = r},a)
if neighbors.IsEmpty then failwith "It's a trap!" // can happen if bot is surrounded by obstacles, e.g. in a corner
else
let p,a = neighbors.Head
status a
roboState.Memory.[roboState.Pos.Column, roboState.Pos.Row] <-
world.[roboState.Pos.Column,roboState.Pos.Row]
match world.[roboState.Pos.Column,roboState.Pos.Row] with
| Food(a,_) ->
printfn "Found food at %A" roboState.Pos
Eat
| _ ->
search()
//simulate 10 0.1 0.05 2000 marvinRobot
simulate 10 0.1 0.1 2000 lazyRobot
最后一个提示:如果您使用 0.0 个食物块进行模拟,您的机器人应该已经访问了地图上的所有方块。如果它做不到这一点,那它肯定不是一个好的机器人 ;)