Multipoint(df['geometry']) key error from dataframe but key exist. KeyError: 13 geopandas
df_subway = pd.read_csv('/content/drive/MyDrive/Despliegue_de_modelos/NYC_Transit_Subway_Entrance_And_Exit_Data.csv')
geometry = [Point(xy) for xy in zip(df_subway['Station Longitude'], df_subway['Station Latitude'])]
# Coordinate reference system :
crs = {'init': 'EPSG:4326'}
# Creating a Geographic data frame
gdf_subway_entrance_geometry = gpd.GeoDataFrame(df_subway, crs=crs, geometry=geometry).to_crs('EPSG:5234')
df_yes_entry = gdf_subway_entrance_geometry[gdf_subway_entrance_geometry.Entry=='YES']
from shapely.geometry import Point, MultiPoint
from shapely.ops import nearest_points
pts = MultiPoint(df_yes_entry['geometry']) #it fails in this line
pt = Point(gpdPoint.x, gpdPoint.y)
#[o.wkt for o in nearest_points(pt, pts)]
for o in nearest_points(pt, pts):
问题是,如果我做同样的事情,但使用 gdf_subway_entrance_geometry 而不是 df_yes_entry 它可以工作,但我需要做一些过滤器!
This is the error:
KeyError Traceback (most recent call last)
/usr/local/lib/python3.7/dist-packages/pandas/core/indexes/ in get_loc(self, key, method, tolerance)
2897 try:
-> 2898 return self._engine.get_loc(casted_key)
2899 except KeyError as err:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
KeyError: 13
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
7 frames
<ipython-input-480-b6733def46e0> in <module>()
1 from shapely.geometry import Point, MultiPoint
2 from shapely.ops import nearest_points
----> 3 pts = MultiPoint(df_yes_entry['geometry'])
4 pt = Point(gpdPoint.x, gpdPoint.y)
5 #[o.wkt for o in nearest_points(pt, pts)]
/usr/local/lib/python3.7/dist-packages/shapely/geometry/ in __init__(self, points)
56 pass
57 else:
---> 58 self._geom, self._ndim = geos_multipoint_from_py(points)
60 def shape_factory(self, *args):
/usr/local/lib/python3.7/dist-packages/shapely/geometry/ in geos_multipoint_from_py(ob)
169 # add to coordinate sequence
170 for i in range(m):
--> 171 coords = ob[i]
172 geom, ndims = point.geos_point_from_py(coords)
/usr/local/lib/python3.7/dist-packages/geopandas/ in __getitem__(self, key)
607 def __getitem__(self, key):
--> 608 return self._wrapped_pandas_method("__getitem__", key)
610 @doc(pd.Series)
/usr/local/lib/python3.7/dist-packages/geopandas/ in _wrapped_pandas_method(self, mtd, *args, **kwargs)
599 def _wrapped_pandas_method(self, mtd, *args, **kwargs):
600 """Wrap a generic pandas method to ensure it returns a GeoSeries"""
--> 601 val = getattr(super(), mtd)(*args, **kwargs)
602 if type(val) == Series:
603 val.__class__ = GeoSeries
/usr/local/lib/python3.7/dist-packages/pandas/core/ in __getitem__(self, key)
881 elif key_is_scalar:
--> 882 return self._get_value(key)
884 if is_hashable(key):
/usr/local/lib/python3.7/dist-packages/pandas/core/ in _get_value(self, label, takeable)
989 # Similar to Index.get_value, but we do not fall back to positional
--> 990 loc = self.index.get_loc(label)
991 return self.index._get_values_for_loc(self, loc, label)
/usr/local/lib/python3.7/dist-packages/pandas/core/indexes/ in get_loc(self, key, method, tolerance)
2898 return self._engine.get_loc(casted_key)
2899 except KeyError as err:
-> 2900 raise KeyError(key) from err
2902 if tolerance is not None:
KeyError: 13
我正在使用 colab,这是我的软件包:
- 注意到你的数据在 kaggle 上,所以从采购它开始
- 确实只有一个问题
构造函数不适用于过滤后的 系列 。传递给它一个 numpy array 就可以了。
- 完整代码如下,随机选了一个点作为
import kaggle.cli
import sys, requests, urllib
import pandas as pd
from pathlib import Path
from zipfile import ZipFile
# fmt: off
# download data set
url = ""
sys.argv = [sys.argv[0]] + f"datasets download {urllib.parse.urlparse(url).path[1:]}".split(" ")
zfile = ZipFile(f'{urllib.parse.urlparse(url).path.split("/")[-1]}.zip')
dfs = {f.filename: pd.read_csv( for f in zfile.infolist() if Path(f.filename).suffix in [".csv"]}
# fmt: on
df_subway = dfs['nyc-transit-subway-entrance-and-exit-data.csv']
from shapely.geometry import Point, MultiPoint
from shapely.ops import nearest_points
import geopandas as gpd
geometry = [Point(xy) for xy in zip(df_subway['Station Longitude'], df_subway['Station Latitude'])]
# Coordinate reference system :
crs = {'init': 'EPSG:4326'}
# Creating a Geographic data frame
gdf_subway_entrance_geometry = gpd.GeoDataFrame(df_subway, crs=crs, geometry=geometry).to_crs('EPSG:5234')
df_yes_entry = gdf_subway_entrance_geometry
df_yes_entry = gdf_subway_entrance_geometry[gdf_subway_entrance_geometry.Entry=='YES']
# randomly select a point....
gpdPoint = gdf_subway_entrance_geometry.sample(1).geometry.tolist()[0]
pts = MultiPoint(df_yes_entry['geometry'].values) # does not work with a geopandas series, works with a numpy array
pt = Point(gpdPoint.x, gpdPoint.y)
#[o.wkt for o in nearest_points(pt, pts)]
for o in nearest_points(pt, pts):
