Google 使用 python 数据帧计算位置距离
Google Location distance calculation using python dataframes
我正在尝试使用 Google 位置数据和 Python Pandas 数据框来梳理我在某个区域(一英里左右)的日期。
首先从latitudeE7转换为纬度:
with open(Takeout_google_location_history) as f:
data = json.loads(f.read())
df = json_normalize(data['locations'])
df['latitudeE7'] = df['latitudeE7'].div(10000000.0)
df['longitudeE7'] = df['longitudeE7'].div(10000000.0)
df.head()
然后计算距离:
import haversine as hs
from haversine import Unit
loc1 = (31.393300,-99.070050)
df['diff'] = hs.haversine(loc1,(df['latitudeE7'],df['longitudeE7']),unit=Unit.MILES)
df.head()
出现此错误:
~\Anaconda2\envs\notebook\lib\site-packages\haversine\haversine.py in
haversine(point1, point2, unit)
92 lat1 = radians(lat1)
93 lng1 = radians(lng1)
---> 94 lat2 = radians(lat2)
95 lng2 = radians(lng2)
96
~\Anaconda2\envs\notebook\lib\site-packages\pandas\core\series.py in wrapper(self)
183 if len(self) == 1:
184 return converter(self.iloc[0])
--> 185 raise TypeError(f"cannot convert the series to {converter}")
186
187 wrapper.__name__ = f"__{converter.__name__}__"
TypeError: cannot convert the series to <class 'float'>
我不确定如何处理数据才能使其成为浮点数。
我试过:
df['latitudeE7'] = df['latitudeE7'].div(10000000.0).astype(float)
以及使用手写距离:
import math
def distance(origin, destination):
lat1, lon1 = origin
lat2, lon2 = destination
radius = 6371 # km
dlat = math.radians(float(lat2) - lat1)
dlon = math.radians(float(lon2) - lon1)
a = (math.sin(dlat / 2) * math.sin(dlat / 2) +
math.cos(math.radians(lat1)) * math.cos(math.radians(lat2)) *
math.sin(dlon / 2) * math.sin(dlon / 2))
c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
d = radius * c
return d
仍然出现同样的错误:
~\AppData\Local\Temp/ipykernel_22916/3664391511.py in distance(origin, destination)
26 radius = 6371 # km
27
---> 28 dlat = math.radians(float(lat2) - lat1)
29 dlon = math.radians(float(lon2) - lon1)
30 a = (math.sin(dlat / 2) * math.sin(dlat / 2) +
~\Anaconda2\envs\notebook\lib\site-packages\pandas\core\series.py in wrapper(self)
183 if len(self) == 1:
184 return converter(self.iloc[0])
--> 185 raise TypeError(f"cannot convert the series to {converter}")
186
187 wrapper.__name__ = f"__{converter.__name__}__"
TypeError: cannot convert the series to <class 'float'>
您不能直接将 pd.Series 传递给 haversine
函数。
代码:
from haversine import haversine, Unit
import pandas as pd
loc1 = (31.393300, -99.070050)
# Sample dataframe
df = pd.DataFrame({'latitudeE7': [0, 0], 'longitudeE7': [0, 0]})
# Calculation
# df['diff'] = haversine(loc1, (df['latitudeE7'], df['longitudeE7']), unit=Unit.MILES) # This doesn't work
df['diff'] = df.apply(lambda row: haversine(loc1, (row['latitudeE7'], row['longitudeE7']), unit=Unit.MILES), axis=1)
输出:
latitudeE7
longitudeE7
diff
0
0
6752.74
0
0
6752.74
参考:
您遇到的问题似乎与以下问题有关 post:understanding math errors in pandas dataframes
[编辑]
如果行数较多,haversin_vector
速度较快
代码
#准备工作:
from haversine import haversine, haversine_vector, Unit
import pandas as pd
import numpy as np
loc1 = (31.393300, -99.070050)
# Sample dataframe
n = 1000000
df = pd.DataFrame({'latitudeE7': np.random.rand(n) * 180 - 90, 'longitudeE7': np.random.rand(n) * 360 - 180})
# 速度测试 1(使用 haversine
)
df['diff'] = df.apply(lambda row: haversine(loc1, (row['latitudeE7'], row['longitudeE7']), unit=Unit.MILES), axis=1)
9.9 s ± 172 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
#速度测试2(使用haversine_vector
)
df['diff'] = haversine_vector(loc1, df, unit=Unit.MILES, comb=True)
105 ms ± 1.1 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
参考:
haversine_vector
: document
haversine_vector
: implementation
我正在尝试使用 Google 位置数据和 Python Pandas 数据框来梳理我在某个区域(一英里左右)的日期。
首先从latitudeE7转换为纬度:
with open(Takeout_google_location_history) as f:
data = json.loads(f.read())
df = json_normalize(data['locations'])
df['latitudeE7'] = df['latitudeE7'].div(10000000.0)
df['longitudeE7'] = df['longitudeE7'].div(10000000.0)
df.head()
然后计算距离:
import haversine as hs
from haversine import Unit
loc1 = (31.393300,-99.070050)
df['diff'] = hs.haversine(loc1,(df['latitudeE7'],df['longitudeE7']),unit=Unit.MILES)
df.head()
出现此错误:
~\Anaconda2\envs\notebook\lib\site-packages\haversine\haversine.py in
haversine(point1, point2, unit)
92 lat1 = radians(lat1)
93 lng1 = radians(lng1)
---> 94 lat2 = radians(lat2)
95 lng2 = radians(lng2)
96
~\Anaconda2\envs\notebook\lib\site-packages\pandas\core\series.py in wrapper(self)
183 if len(self) == 1:
184 return converter(self.iloc[0])
--> 185 raise TypeError(f"cannot convert the series to {converter}")
186
187 wrapper.__name__ = f"__{converter.__name__}__"
TypeError: cannot convert the series to <class 'float'>
我不确定如何处理数据才能使其成为浮点数。
我试过:
df['latitudeE7'] = df['latitudeE7'].div(10000000.0).astype(float)
以及使用手写距离:
import math
def distance(origin, destination):
lat1, lon1 = origin
lat2, lon2 = destination
radius = 6371 # km
dlat = math.radians(float(lat2) - lat1)
dlon = math.radians(float(lon2) - lon1)
a = (math.sin(dlat / 2) * math.sin(dlat / 2) +
math.cos(math.radians(lat1)) * math.cos(math.radians(lat2)) *
math.sin(dlon / 2) * math.sin(dlon / 2))
c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
d = radius * c
return d
仍然出现同样的错误:
~\AppData\Local\Temp/ipykernel_22916/3664391511.py in distance(origin, destination)
26 radius = 6371 # km
27
---> 28 dlat = math.radians(float(lat2) - lat1)
29 dlon = math.radians(float(lon2) - lon1)
30 a = (math.sin(dlat / 2) * math.sin(dlat / 2) +
~\Anaconda2\envs\notebook\lib\site-packages\pandas\core\series.py in wrapper(self)
183 if len(self) == 1:
184 return converter(self.iloc[0])
--> 185 raise TypeError(f"cannot convert the series to {converter}")
186
187 wrapper.__name__ = f"__{converter.__name__}__"
TypeError: cannot convert the series to <class 'float'>
您不能直接将 pd.Series 传递给 haversine
函数。
代码:
from haversine import haversine, Unit
import pandas as pd
loc1 = (31.393300, -99.070050)
# Sample dataframe
df = pd.DataFrame({'latitudeE7': [0, 0], 'longitudeE7': [0, 0]})
# Calculation
# df['diff'] = haversine(loc1, (df['latitudeE7'], df['longitudeE7']), unit=Unit.MILES) # This doesn't work
df['diff'] = df.apply(lambda row: haversine(loc1, (row['latitudeE7'], row['longitudeE7']), unit=Unit.MILES), axis=1)
输出:
latitudeE7 | longitudeE7 | diff |
---|---|---|
0 | 0 | 6752.74 |
0 | 0 | 6752.74 |
参考:
您遇到的问题似乎与以下问题有关 post:understanding math errors in pandas dataframes
[编辑]
如果行数较多,haversin_vector
速度较快
代码
#准备工作:
from haversine import haversine, haversine_vector, Unit
import pandas as pd
import numpy as np
loc1 = (31.393300, -99.070050)
# Sample dataframe
n = 1000000
df = pd.DataFrame({'latitudeE7': np.random.rand(n) * 180 - 90, 'longitudeE7': np.random.rand(n) * 360 - 180})
# 速度测试 1(使用 haversine
)
df['diff'] = df.apply(lambda row: haversine(loc1, (row['latitudeE7'], row['longitudeE7']), unit=Unit.MILES), axis=1)
9.9 s ± 172 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
#速度测试2(使用haversine_vector
)
df['diff'] = haversine_vector(loc1, df, unit=Unit.MILES, comb=True)
105 ms ± 1.1 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
参考:
haversine_vector
: documenthaversine_vector
: implementation