Python: A* routing from dataframe 经度和纬度
Python: A* routing from dataframe with longitude and latitude
我有一个包含 30,000 条记录的数据框,格式如下:
ID | Name | Latitude | Longitude | Country |
1 | Hull | 53.744 | -0.3456 | GB |
我想select一条记录作为起始位置,一条记录作为目的地,return一条路径(列表)作为最短路径。
我正在使用 Geopy 查找以公里为单位的点之间的距离
import geopy.distance
coords_1 = (52.2296756, 21.0122287)
coords_2 = (52.406374, 16.9251681)
print (geopy.distance.vincenty(coords_1, coords_2).km)
我已经从以下教程中阅读了如何在 python 中执行 A*:
https://www.redblobgames.com/pathfinding/a-star/implementation.html
但是他们创建了一个网格系统来导航。
这是数据框中记录的可视化表示:
这是我目前的代码,但它找不到路径:
def calcH(start, end):
coords_1 = (df['latitude'][start], df['longitude'][start])
coords_2 = (df['latitude'][end], df['longitude'][end])
distance = (geopy.distance.vincenty(coords_1, coords_2)).km
return distance
^计算点之间的距离
def getneighbors(startlocation):
neighborDF = pd.DataFrame(columns=['ID', 'Distance'])
coords_1 = (df['latitude'][startlocation], df['longitude'][startlocation])
for index, row in df.iterrows():
coords_2 = (df['latitude'][index], df['longitude'][index])
distance = round((geopy.distance.vincenty(coords_1, coords_2)).km,2)
neighborDF.loc[len(neighborDF)] = [index, distance]
neighborDF = neighborDF.sort_values(by=['Distance'])
neighborDF = neighborDF.reset_index(drop=True)
return neighborDF[1:5]
^Returns最近的4个位置(忽略自身)
openlist = pd.DataFrame(columns=['ID', 'F', 'G', 'H', 'parentID'])
closedlist = pd.DataFrame(columns=['ID', 'F', 'G', 'H', 'parentID'])
startIndex = 25479 # Hessle
endIndex = 8262 # Leeds
h = calcH(startIndex, endIndex)
openlist.loc[len(openlist)] = [startIndex,h, 0, h, startIndex]
while True:
#sort the open list by F score
openlist = openlist.sort_values(by=['F'])
openlist = openlist.reset_index(drop=True)
currentLocation = openlist.loc[0]
closedlist.loc[len(closedlist)] = currentLocation
openlist = openlist[openlist.ID != currentLocation.ID]
if currentLocation.ID == endIndex:
print("Complete")
break
adjacentLocations = getneighbors(currentLocation.ID)
if(len(adjacentLocations) < 1):
print("No Neighbors: " + str(currentLocation.ID))
else:
print(str(len(adjacentLocations)))
for index, row in adjacentLocations.iterrows():
if adjacentLocations['ID'][index] in closedlist.values:
continue
if (adjacentLocations['ID'][index] in openlist.values) == False:
g = currentLocation.G + calcH(currentLocation.ID, adjacentLocations['ID'][index])
h = calcH(adjacentLocations['ID'][index], endIndex)
f = g + h
openlist.loc[len(openlist)] = [adjacentLocations['ID'][index], f, g, h, currentLocation.ID]
else:
adjacentLocationInDF = openlist.loc[openlist['ID'] == adjacentLocations['ID'][index]] #Get location from openlist
g = currentLocation.G + calcH(currentLocation.ID, adjacentLocations['ID'][index])
f = g + adjacentLocationInDF.H
if float(f) < float(adjacentLocationInDF.F):
openlist = openlist[openlist.ID != currentLocation.ID]
openlist.loc[len(openlist)] = [adjacentLocations['ID'][index], f, g, adjacentLocationInDF.H, currentLocation.ID]
if (len(openlist)< 1):
print("No Path")
break
从封闭列表中查找路径:
# return the path
pathdf = pd.DataFrame(columns=['name', 'latitude', 'longitude', 'country'])
def getParent(index):
parentDF = closedlist.loc[closedlist['ID'] == index]
pathdf.loc[len(pathdf)] = [df['name'][parentDF.ID.values[0]],df['latitude'][parentDF.ID.values[0]],df['longitude'][parentDF.ID.values[0]],df['country'][parentDF.ID.values[0]]]
if index != startIndex:
getParent(parentDF.parentID.values[0])
getParent(closedlist['ID'][len(closedlist)-1])
目前 A* 的这个实现没有找到完整的路径。有什么建议吗?
编辑:
我尝试将考虑的邻居数量从 4 个增加到 10 个,我得到了一条路径,但不是最佳路径:
我们正在尝试从 Hessle 前往利兹。
^ 可用节点
原始数据:
Link
我仍然不确定你的方法有什么问题,尽管肯定有一些问题,正如评论中已经提到的那样。
- 仅考虑最近的四个(或就此而言,任何固定数量的)邻居可能会导致死胡同或图表的某些部分被完全切断,例如不在任何邻居"closest X"范围内的孤立城市
- 您以
x in dataframe.values
形式进行的检查将检查 x
是否是 values
返回的 numpy 数组中值的 任何 , 不一定是 ID 字段
- 对开放列表使用数据帧而不是适当的堆,对封闭列表使用哈希集会使搜索变得不必要地缓慢,因为您必须一直搜索和排序整个列表(不确定是否Pandas可以通过索引加快查找速度,但是排序肯定是需要时间的)
总之,我发现这是一个有趣的问题并尝试了一下。然而,事实证明,使用数据帧作为某种伪堆确实非常慢,而且我发现数据帧索引非常混乱(并且可能容易出错?),所以我更改了代码以使用 [= openlist
的 20=] 堆和 closedlist
的 dict
映射节点到它们的父节点。此外,检查次数少于代码中的检查次数(例如,节点是否已在 openlist 中),这些并不重要。
import csv, geopy.distance, collections, heapq
Location = collections.namedtuple("Location", "ID name latitude longitude country".split())
data = {}
with open("stations.csv") as f:
r = csv.DictReader(f)
for d in r:
i, n, x, y, c = int(d["id"]), d["name"], d["latitude"], d["longitude"], d["country"]
if c == "GB":
data[i] = Location(i,n,x,y,c)
def calcH(start, end):
coords_1 = (data[start].latitude, data[start].longitude)
coords_2 = (data[end].latitude, data[end].longitude)
distance = (geopy.distance.vincenty(coords_1, coords_2)).km
return distance
def getneighbors(startlocation, n=10):
return sorted(data.values(), key=lambda x: calcH(startlocation, x.ID))[1:n+1]
def getParent(closedlist, index):
path = []
while index is not None:
path.append(index)
index = closedlist.get(index, None)
return [data[i] for i in path[::-1]]
startIndex = 25479 # Hessle
endIndex = 8262 # Leeds
Node = collections.namedtuple("Node", "ID F G H parentID".split())
h = calcH(startIndex, endIndex)
openlist = [(h, Node(startIndex, h, 0, h, None))] # heap
closedlist = {} # map visited nodes to parent
while len(openlist) >= 1:
_, currentLocation = heapq.heappop(openlist)
print(currentLocation)
if currentLocation.ID in closedlist:
continue
closedlist[currentLocation.ID] = currentLocation.parentID
if currentLocation.ID == endIndex:
print("Complete")
for p in getParent(closedlist, currentLocation.ID):
print(p)
break
for other in getneighbors(currentLocation.ID):
g = currentLocation.G + calcH(currentLocation.ID, other.ID)
h = calcH(other.ID, endIndex)
f = g + h
heapq.heappush(openlist, (f, Node(other.ID, f, g, h, currentLocation.ID)))
这给了我从 Hessle 到 Leeds 的路径,这似乎更合理:
Location(ID=25479, name='Hessle', latitude='53.717567', longitude='-0.442169', country='GB')
Location(ID=8166, name='Brough', latitude='53.726452', longitude='-0.578255', country='GB')
Location(ID=25208, name='Eastrington', latitude='53.75481', longitude='-0.786612', country='GB')
Location(ID=25525, name='Howden', latitude='53.764526', longitude='-0.86068', country='GB')
Location(ID=7780, name='Selby', latitude='53.78336', longitude='-1.06355', country='GB')
Location(ID=26157, name='Sherburn-In-Elmet', latitude='53.797142', longitude='-1.23176', country='GB')
Location(ID=25308, name='Garforth Station', latitude='53.796211', longitude='-1.382083', country='GB')
Location(ID=8262, name='Leeds', latitude='53.795158', longitude='-1.549089', country='GB')
即使因为不得不使用Pandas(?)而无法使用它,也许这可以帮助您最终发现实际错误。
我有一个包含 30,000 条记录的数据框,格式如下:
ID | Name | Latitude | Longitude | Country |
1 | Hull | 53.744 | -0.3456 | GB |
我想select一条记录作为起始位置,一条记录作为目的地,return一条路径(列表)作为最短路径。
我正在使用 Geopy 查找以公里为单位的点之间的距离
import geopy.distance
coords_1 = (52.2296756, 21.0122287)
coords_2 = (52.406374, 16.9251681)
print (geopy.distance.vincenty(coords_1, coords_2).km)
我已经从以下教程中阅读了如何在 python 中执行 A*: https://www.redblobgames.com/pathfinding/a-star/implementation.html
但是他们创建了一个网格系统来导航。
这是数据框中记录的可视化表示:
这是我目前的代码,但它找不到路径:
def calcH(start, end):
coords_1 = (df['latitude'][start], df['longitude'][start])
coords_2 = (df['latitude'][end], df['longitude'][end])
distance = (geopy.distance.vincenty(coords_1, coords_2)).km
return distance
^计算点之间的距离
def getneighbors(startlocation):
neighborDF = pd.DataFrame(columns=['ID', 'Distance'])
coords_1 = (df['latitude'][startlocation], df['longitude'][startlocation])
for index, row in df.iterrows():
coords_2 = (df['latitude'][index], df['longitude'][index])
distance = round((geopy.distance.vincenty(coords_1, coords_2)).km,2)
neighborDF.loc[len(neighborDF)] = [index, distance]
neighborDF = neighborDF.sort_values(by=['Distance'])
neighborDF = neighborDF.reset_index(drop=True)
return neighborDF[1:5]
^Returns最近的4个位置(忽略自身)
openlist = pd.DataFrame(columns=['ID', 'F', 'G', 'H', 'parentID'])
closedlist = pd.DataFrame(columns=['ID', 'F', 'G', 'H', 'parentID'])
startIndex = 25479 # Hessle
endIndex = 8262 # Leeds
h = calcH(startIndex, endIndex)
openlist.loc[len(openlist)] = [startIndex,h, 0, h, startIndex]
while True:
#sort the open list by F score
openlist = openlist.sort_values(by=['F'])
openlist = openlist.reset_index(drop=True)
currentLocation = openlist.loc[0]
closedlist.loc[len(closedlist)] = currentLocation
openlist = openlist[openlist.ID != currentLocation.ID]
if currentLocation.ID == endIndex:
print("Complete")
break
adjacentLocations = getneighbors(currentLocation.ID)
if(len(adjacentLocations) < 1):
print("No Neighbors: " + str(currentLocation.ID))
else:
print(str(len(adjacentLocations)))
for index, row in adjacentLocations.iterrows():
if adjacentLocations['ID'][index] in closedlist.values:
continue
if (adjacentLocations['ID'][index] in openlist.values) == False:
g = currentLocation.G + calcH(currentLocation.ID, adjacentLocations['ID'][index])
h = calcH(adjacentLocations['ID'][index], endIndex)
f = g + h
openlist.loc[len(openlist)] = [adjacentLocations['ID'][index], f, g, h, currentLocation.ID]
else:
adjacentLocationInDF = openlist.loc[openlist['ID'] == adjacentLocations['ID'][index]] #Get location from openlist
g = currentLocation.G + calcH(currentLocation.ID, adjacentLocations['ID'][index])
f = g + adjacentLocationInDF.H
if float(f) < float(adjacentLocationInDF.F):
openlist = openlist[openlist.ID != currentLocation.ID]
openlist.loc[len(openlist)] = [adjacentLocations['ID'][index], f, g, adjacentLocationInDF.H, currentLocation.ID]
if (len(openlist)< 1):
print("No Path")
break
从封闭列表中查找路径:
# return the path
pathdf = pd.DataFrame(columns=['name', 'latitude', 'longitude', 'country'])
def getParent(index):
parentDF = closedlist.loc[closedlist['ID'] == index]
pathdf.loc[len(pathdf)] = [df['name'][parentDF.ID.values[0]],df['latitude'][parentDF.ID.values[0]],df['longitude'][parentDF.ID.values[0]],df['country'][parentDF.ID.values[0]]]
if index != startIndex:
getParent(parentDF.parentID.values[0])
getParent(closedlist['ID'][len(closedlist)-1])
目前 A* 的这个实现没有找到完整的路径。有什么建议吗?
编辑: 我尝试将考虑的邻居数量从 4 个增加到 10 个,我得到了一条路径,但不是最佳路径:
我们正在尝试从 Hessle 前往利兹。
原始数据: Link
我仍然不确定你的方法有什么问题,尽管肯定有一些问题,正如评论中已经提到的那样。
- 仅考虑最近的四个(或就此而言,任何固定数量的)邻居可能会导致死胡同或图表的某些部分被完全切断,例如不在任何邻居"closest X"范围内的孤立城市
- 您以
x in dataframe.values
形式进行的检查将检查x
是否是values
返回的 numpy 数组中值的 任何 , 不一定是 ID 字段 - 对开放列表使用数据帧而不是适当的堆,对封闭列表使用哈希集会使搜索变得不必要地缓慢,因为您必须一直搜索和排序整个列表(不确定是否Pandas可以通过索引加快查找速度,但是排序肯定是需要时间的)
总之,我发现这是一个有趣的问题并尝试了一下。然而,事实证明,使用数据帧作为某种伪堆确实非常慢,而且我发现数据帧索引非常混乱(并且可能容易出错?),所以我更改了代码以使用 [= openlist
的 20=] 堆和 closedlist
的 dict
映射节点到它们的父节点。此外,检查次数少于代码中的检查次数(例如,节点是否已在 openlist 中),这些并不重要。
import csv, geopy.distance, collections, heapq
Location = collections.namedtuple("Location", "ID name latitude longitude country".split())
data = {}
with open("stations.csv") as f:
r = csv.DictReader(f)
for d in r:
i, n, x, y, c = int(d["id"]), d["name"], d["latitude"], d["longitude"], d["country"]
if c == "GB":
data[i] = Location(i,n,x,y,c)
def calcH(start, end):
coords_1 = (data[start].latitude, data[start].longitude)
coords_2 = (data[end].latitude, data[end].longitude)
distance = (geopy.distance.vincenty(coords_1, coords_2)).km
return distance
def getneighbors(startlocation, n=10):
return sorted(data.values(), key=lambda x: calcH(startlocation, x.ID))[1:n+1]
def getParent(closedlist, index):
path = []
while index is not None:
path.append(index)
index = closedlist.get(index, None)
return [data[i] for i in path[::-1]]
startIndex = 25479 # Hessle
endIndex = 8262 # Leeds
Node = collections.namedtuple("Node", "ID F G H parentID".split())
h = calcH(startIndex, endIndex)
openlist = [(h, Node(startIndex, h, 0, h, None))] # heap
closedlist = {} # map visited nodes to parent
while len(openlist) >= 1:
_, currentLocation = heapq.heappop(openlist)
print(currentLocation)
if currentLocation.ID in closedlist:
continue
closedlist[currentLocation.ID] = currentLocation.parentID
if currentLocation.ID == endIndex:
print("Complete")
for p in getParent(closedlist, currentLocation.ID):
print(p)
break
for other in getneighbors(currentLocation.ID):
g = currentLocation.G + calcH(currentLocation.ID, other.ID)
h = calcH(other.ID, endIndex)
f = g + h
heapq.heappush(openlist, (f, Node(other.ID, f, g, h, currentLocation.ID)))
这给了我从 Hessle 到 Leeds 的路径,这似乎更合理:
Location(ID=25479, name='Hessle', latitude='53.717567', longitude='-0.442169', country='GB')
Location(ID=8166, name='Brough', latitude='53.726452', longitude='-0.578255', country='GB')
Location(ID=25208, name='Eastrington', latitude='53.75481', longitude='-0.786612', country='GB')
Location(ID=25525, name='Howden', latitude='53.764526', longitude='-0.86068', country='GB')
Location(ID=7780, name='Selby', latitude='53.78336', longitude='-1.06355', country='GB')
Location(ID=26157, name='Sherburn-In-Elmet', latitude='53.797142', longitude='-1.23176', country='GB')
Location(ID=25308, name='Garforth Station', latitude='53.796211', longitude='-1.382083', country='GB')
Location(ID=8262, name='Leeds', latitude='53.795158', longitude='-1.549089', country='GB')
即使因为不得不使用Pandas(?)而无法使用它,也许这可以帮助您最终发现实际错误。