How is this different behaviour possible? TypeError: unhashable type: 'Point'
How is this different behaviour possible? TypeError: unhashable type: 'Point'
我尝试查找并过滤 GeoDataFrame (df1) 中与第二个 GDF (df2) 中的点接近的点,反之亦然。
我为此使用了这段代码:
ps1 = []
ps2 = []
for p1 in df1.geometry:
for p2 in df2.geometry:
dist = haversine(p1.y,p1.x,p2.y,p2.x)
if dist < 100:
ps1.append(p1)
ps2.append(p2)
df1 = df1[df1.geometry.isin(ps1)]
df2 = df2[df2.geometry.isin(ps2)]
但是,我在最后一行收到错误消息:
TypeError: unhashable type: 'Point'
但是它上面的那行就像一个魅力,两行(df1/df2和ps1/ps2)的数据类型完全相同。
这怎么可能?以及如何解决?
编辑:
变量类型:
df1 : <class 'geopandas.geodataframe.GeoDataFrame'>
df1.geometry: <class 'geopandas.geoseries.GeoSeries'>
ps1 : <class 'list'>
val1 : <class 'pandas.core.series.Series'>
df2 : <class 'geopandas.geodataframe.GeoDataFrame'>
df2.geometry: <class 'geopandas.geoseries.GeoSeries'>
ps2 : <class 'list'>
编辑 2:
df1.dtypes
Out[301]:
lat float64
lon float64
time datetime64[ns, UTC]
geometry geometry
dtype: object
df2.dtypes
Out[302]:
lat float64
lon float64
time datetime64[ns, UTC]
geometry geometry
dtype: object
MWE:
import pandas as pd
from pandas import Timestamp
import geopandas as gpd
import numpy as np
def haversine(lat1, lon1, lat2, lon2, to_radians=True, earth_radius=6371000):
"""
slightly modified version: of
Calculate the great circle distance between two points
on the earth (specified in decimal degrees or in radians)
All (lat, lon) coordinates must have numeric dtypes and be of equal length.
"""
if to_radians:
lat1, lon1, lat2, lon2 = np.radians([lat1, lon1, lat2, lon2])
a = np.sin((lat2-lat1)/2.0)**2 + \
np.cos(lat1) * np.cos(lat2) * np.sin((lon2-lon1)/2.0)**2
return earth_radius * 2 * np.arcsin(np.sqrt(a))
df1 = pd.DataFrame.from_dict({'lat': {0: 52.378851603519905,
1: 52.37896949048437,
2: 52.378654032960824,
3: 52.37818902922923},
'lon': {0: 4.88585622453752,
1: 4.886671616078047,
2: 4.886413945242339,
3: 4.885995520636016},
'time': {0: Timestamp('2019-11-05 11:31:42+0000', tz='UTC'),
1: Timestamp('2019-11-05 11:32:22+0000', tz='UTC'),
2: Timestamp('2019-11-05 11:32:49+0000', tz='UTC'),
3: Timestamp('2019-11-05 11:33:31+0000', tz='UTC')}})
df2 = pd.DataFrame.from_dict({'lat': {0: 52.378851603519905,
1: 52.369466977365214,
2: 52.36923115238693,
3: 52.36898222465506},
'lon': {0: 4.88585622453752,
1: 4.9121331184582,
2: 4.912723204441477,
3: 4.913505393878495},
'time': {0: Timestamp('2019-11-05 08:54:32+0000', tz='UTC'),
1: Timestamp('2019-11-05 08:55:06+0000', tz='UTC'),
2: Timestamp('2019-11-05 08:55:40+0000', tz='UTC'),
3: Timestamp('2019-11-05 08:56:22+0000', tz='UTC')}})
df1 = gpd.GeoDataFrame(df1, geometry=gpd.points_from_xy(df1.lat, df1.lon))
df2 = gpd.GeoDataFrame(df2, geometry=gpd.points_from_xy(df2.lat, df2.lon))
ps1 = []
ps2 = []
for p1 in df1.geometry:
for p2 in df2.geometry:
dist = haversine(p1.y,p1.x,p2.y,p2.x)
if dist < 100:
ps1.append(p1)
ps2.append(p2)
val1 = gpd.GeoDataFrame(df1)
val2 = gpd.GeoDataFrame(df2)
# print(type(df1))
# print(type(df2))
# print(type(ps1))
# print(type(ps2))
print('df1 : ', type(df1))
print('df1.geometry: ', type(df1.geometry))
print('ps1 : ', type(ps1))
val1 = df1.geometry.isin(ps1)
print('val1 : ', type(val1))
print('df2 : ', type(df2))
print('df2.geometry: ', type(df2.geometry))
print('ps2 : ', type(ps2))
val2 = df2.geometry.isin(ps2)
print('val2 : ', type(val2))
# df1 = df1[df1.geometry.isin(ps1)]
# df2 = df2[df2.geometry.isin(ps2)]
如错误所述,Point 不可哈希(因为 this?)。
事实证明,出于我忽略的原因,pandas.Series.isin
函数似乎要求数据是可散列的。见 question I just posted.
对于您的问题,解决方法是使用列表,然后再次将其转换为系列,例如:
val2 = pd.Series([v in ps2 for v in df2.geometry])
我尝试查找并过滤 GeoDataFrame (df1) 中与第二个 GDF (df2) 中的点接近的点,反之亦然。 我为此使用了这段代码:
ps1 = []
ps2 = []
for p1 in df1.geometry:
for p2 in df2.geometry:
dist = haversine(p1.y,p1.x,p2.y,p2.x)
if dist < 100:
ps1.append(p1)
ps2.append(p2)
df1 = df1[df1.geometry.isin(ps1)]
df2 = df2[df2.geometry.isin(ps2)]
但是,我在最后一行收到错误消息:
TypeError: unhashable type: 'Point'
但是它上面的那行就像一个魅力,两行(df1/df2和ps1/ps2)的数据类型完全相同。
这怎么可能?以及如何解决?
编辑:
变量类型:
df1 : <class 'geopandas.geodataframe.GeoDataFrame'>
df1.geometry: <class 'geopandas.geoseries.GeoSeries'>
ps1 : <class 'list'>
val1 : <class 'pandas.core.series.Series'>
df2 : <class 'geopandas.geodataframe.GeoDataFrame'>
df2.geometry: <class 'geopandas.geoseries.GeoSeries'>
ps2 : <class 'list'>
编辑 2:
df1.dtypes
Out[301]:
lat float64
lon float64
time datetime64[ns, UTC]
geometry geometry
dtype: object
df2.dtypes
Out[302]:
lat float64
lon float64
time datetime64[ns, UTC]
geometry geometry
dtype: object
MWE:
import pandas as pd
from pandas import Timestamp
import geopandas as gpd
import numpy as np
def haversine(lat1, lon1, lat2, lon2, to_radians=True, earth_radius=6371000):
"""
slightly modified version: of
Calculate the great circle distance between two points
on the earth (specified in decimal degrees or in radians)
All (lat, lon) coordinates must have numeric dtypes and be of equal length.
"""
if to_radians:
lat1, lon1, lat2, lon2 = np.radians([lat1, lon1, lat2, lon2])
a = np.sin((lat2-lat1)/2.0)**2 + \
np.cos(lat1) * np.cos(lat2) * np.sin((lon2-lon1)/2.0)**2
return earth_radius * 2 * np.arcsin(np.sqrt(a))
df1 = pd.DataFrame.from_dict({'lat': {0: 52.378851603519905,
1: 52.37896949048437,
2: 52.378654032960824,
3: 52.37818902922923},
'lon': {0: 4.88585622453752,
1: 4.886671616078047,
2: 4.886413945242339,
3: 4.885995520636016},
'time': {0: Timestamp('2019-11-05 11:31:42+0000', tz='UTC'),
1: Timestamp('2019-11-05 11:32:22+0000', tz='UTC'),
2: Timestamp('2019-11-05 11:32:49+0000', tz='UTC'),
3: Timestamp('2019-11-05 11:33:31+0000', tz='UTC')}})
df2 = pd.DataFrame.from_dict({'lat': {0: 52.378851603519905,
1: 52.369466977365214,
2: 52.36923115238693,
3: 52.36898222465506},
'lon': {0: 4.88585622453752,
1: 4.9121331184582,
2: 4.912723204441477,
3: 4.913505393878495},
'time': {0: Timestamp('2019-11-05 08:54:32+0000', tz='UTC'),
1: Timestamp('2019-11-05 08:55:06+0000', tz='UTC'),
2: Timestamp('2019-11-05 08:55:40+0000', tz='UTC'),
3: Timestamp('2019-11-05 08:56:22+0000', tz='UTC')}})
df1 = gpd.GeoDataFrame(df1, geometry=gpd.points_from_xy(df1.lat, df1.lon))
df2 = gpd.GeoDataFrame(df2, geometry=gpd.points_from_xy(df2.lat, df2.lon))
ps1 = []
ps2 = []
for p1 in df1.geometry:
for p2 in df2.geometry:
dist = haversine(p1.y,p1.x,p2.y,p2.x)
if dist < 100:
ps1.append(p1)
ps2.append(p2)
val1 = gpd.GeoDataFrame(df1)
val2 = gpd.GeoDataFrame(df2)
# print(type(df1))
# print(type(df2))
# print(type(ps1))
# print(type(ps2))
print('df1 : ', type(df1))
print('df1.geometry: ', type(df1.geometry))
print('ps1 : ', type(ps1))
val1 = df1.geometry.isin(ps1)
print('val1 : ', type(val1))
print('df2 : ', type(df2))
print('df2.geometry: ', type(df2.geometry))
print('ps2 : ', type(ps2))
val2 = df2.geometry.isin(ps2)
print('val2 : ', type(val2))
# df1 = df1[df1.geometry.isin(ps1)]
# df2 = df2[df2.geometry.isin(ps2)]
如错误所述,Point 不可哈希(因为 this?)。
事实证明,出于我忽略的原因,pandas.Series.isin
函数似乎要求数据是可散列的。见 question I just posted.
对于您的问题,解决方法是使用列表,然后再次将其转换为系列,例如:
val2 = pd.Series([v in ps2 for v in df2.geometry])