Geopy error: GeocoderServiceError: HTTP Error 500: Internal Server Error using pandas apply function with str concat
Geopy error: GeocoderServiceError: HTTP Error 500: Internal Server Error using pandas apply function with str concat
工作函数(参见代码)已停止工作。唯一的区别是我将字符串连接传递给它。
# Get geocode, return LAT and LON
def locate(x):
geolocator = Nominatim()
print("'" + x + "'")
location = geolocator.geocode(x) # Get geocode
print(location)
lat = location.latitude
lon = location.longitude
try:
#Get geocode
location = geolocator.geocode(x, timeout=8, exactly_one=True)
lat = location.latitude
lon = location.longitude
except:
#didn't work for some reason that I really don't care about
lat = np.nan
lon = np.nan
print(lat,lon)
return pd.Series([lat, lon])
这个有效
In[4] locate('MOSCOW 123098 RUSSIA')
'MOSCOW 123098 RUSSIA'
Москва, Центральный административный округ, Москва, ЦФО, Россия
Out[4]:
0 55.751633
1 37.618704
dtype: float64
但这不是:
df_addr[['LAT','LON']] = df_addr['COUNTRY'].apply(locate(df_addr['CITY'] + ' ' + \
df_addr['PROVINCE'] + ' ' + \
df_addr['STATE'] + ' ' + \
df_addr['ZIP_CODE'] + ' ' + \
df_addr['COUNTRY'])) # Geocode it!
我看到函数回显了正确的输入字符串:
0 'INNSBRUCK AUSTRIA'
1 'BERN CH-3001 SWITZERLAND'
2 'INNSBRUCK AUSTRIA'
3 'MOSCOW 123098 RUSSIA'
4 'MOSCOW 123098 RUSSIA'
5 'FREDERICK MD 21702 USA'
删除 try/except 我得到以下异常信息
.
.
99 'GLASGOW LANARK G20 9NB SCOTLAND'
dtype: object
---------------------------------------------------------------------------
HTTPError Traceback (most recent call last)
C:\Users\gn\Anaconda3\lib\site-packages\geopy\geocoders\base.py in _call_geocoder(self, url, timeout, raw, requester, deserializer, **kwargs)
131 try:
--> 132 page = requester(url, timeout=(timeout or self.timeout), **kwargs)
133 except Exception as error: # pylint: disable=W0703
C:\Users\gn\Anaconda3\lib\urllib\request.py in urlopen(url, data, timeout, cafile, capath, cadefault)
152 opener = _opener
--> 153 return opener.open(url, data, timeout)
154
C:\Users\gn\Anaconda3\lib\urllib\request.py in open(self, fullurl, data, timeout)
460 meth = getattr(processor, meth_name)
--> 461 response = meth(req, response)
462
C:\Users\gn\Anaconda3\lib\urllib\request.py in http_response(self, request, response)
570 response = self.parent.error(
--> 571 'http', request, response, code, msg, hdrs)
572
C:\Users\gn\Anaconda3\lib\urllib\request.py in error(self, proto, *args)
498 args = (dict, 'default', 'http_error_default') + orig_args
--> 499 return self._call_chain(*args)
500
C:\Users\gn\Anaconda3\lib\urllib\request.py in _call_chain(self, chain, kind, meth_name, *args)
432 func = getattr(handler, meth_name)
--> 433 result = func(*args)
434 if result is not None:
C:\Users\gn\Anaconda3\lib\urllib\request.py in http_error_default(self, req, fp, code, msg, hdrs)
578 def http_error_default(self, req, fp, code, msg, hdrs):
--> 579 raise HTTPError(req.full_url, code, msg, hdrs, fp)
580
HTTPError: HTTP Error 500: Internal Server Error
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
C:\Users\gn\Anaconda3\lib\site-packages\geopy\geocoders\base.py in _call_geocoder(self, url, timeout, raw, requester, deserializer, **kwargs)
146 try:
--> 147 raise ERROR_CODE_MAP[code](message)
148 except KeyError:
KeyError: 500
During handling of the above exception, another exception occurred:
GeocoderServiceError Traceback (most recent call last)
<ipython-input-6-7412c2e27dd8> in <module>()
----> 1 df_addr[['LAT','LON']] = df_addr['COUNTRY'].apply(locate(df_addr['CITY'] + ' ' + df_addr['PROVINCE'] + ' ' + df_addr['STATE'] + ' ' + df_addr['ZIP_CODE'] + ' ' + df_addr['COUNTRY'])) # Geocode it!
2 df_addr.head()
<ipython-input-3-d957ac2e2e2e> in locate(x)
3 geolocator = Nominatim()
4 print("'" + x + "'")
----> 5 location = geolocator.geocode(x,timeout=20) # Get geocode
6 print(location)
7 lat = location.latitude
C:\Users\gn\Anaconda3\lib\site-packages\geopy\geocoders\osm.py in geocode(self, query, exactly_one, timeout, addressdetails, language, geometry)
190 logger.debug("%s.geocode: %s", self.__class__.__name__, url)
191 return self._parse_json(
--> 192 self._call_geocoder(url, timeout=timeout), exactly_one
193 )
194
C:\Users\gn\Anaconda3\lib\site-packages\geopy\geocoders\base.py in _call_geocoder(self, url, timeout, raw, requester, deserializer, **kwargs)
147 raise ERROR_CODE_MAP[code](message)
148 except KeyError:
--> 149 raise GeocoderServiceError(message)
150 elif isinstance(error, URLError):
151 if "timed out" in message:
GeocoderServiceError: HTTP Error 500: Internal Server Error
让我头疼。更新了所有库,但问题没有改变。
提前致谢
老实说,您的做法有点反常,您在一个系列上调用 apply
,然后尝试从许多列中构建一个 str,这是错误的做法这样,您可以在 df 上调用 apply 并传递 axis=1
以便传递行并访问 lambda func 中的每一列并将它们传递给 locate
或在 locate
中提取每一列值,或者只是从所有列的串联中创建一个系列,然后调用应用:
df_addr[['LAT','LON']] = (df_addr['CITY'] + ' ' + df_addr['PROVINCE'] + ' ' + df_addr['STATE'] + ' ' + df_addr['ZIP_CODE'] + ' ' + df_addr['COUNTRY']).apply(locate)
我相信以上应该有效。
因此,根据 Ed Chum 的见解,我编写了以下有效的错误代码:
#Create a summary address field in a new geo dataframe
df_geo = pd.DataFrame(columns = ['BIG_ADDR', 'LAT', 'LON'])
df_geo['BIG_ADDR'] = df = df_addr['CITY'] + ' ' + df_addr['PROVINCE'] + ' ' + df_addr['STATE'] + ' ' + \
df_addr['ZIP_CODE'] + ' ' + df_addr['COUNTRY']
# Eliminate dups
df_geo = df_geo['BIG_ADDR'].drop_duplicates().reset_index()
# Geocode ALL THINGS in GEO frame!
df_geo[['LAT','LON']] = df_geo['BIG_ADDR'].apply(locate)
# Create the same index in the address dataframe
df_addr['BIG_ADDR'] = df = df_addr['CITY'] + ' ' + df_addr['PROVINCE'] + ' ' + df_addr['STATE'] + ' ' + \
df_addr['ZIP_CODE'] + ' ' + df_addr['COUNTRY']
# Combine the address and geo frames
df_addr = pd.merge(df_addr, df_geo, on=['BIG_ADDR'], how='left')
df_addr.rename(columns={'LAT_y': 'LAT', 'LON_y': 'LON'}, inplace=True) #cleanup
df_addr.rename(columns={'LAT_y': 'LAT', 'LON_y': 'LON'}, inplace=True)
del df_geo['index']
工作函数(参见代码
# Get geocode, return LAT and LON
def locate(x):
geolocator = Nominatim()
print("'" + x + "'")
location = geolocator.geocode(x) # Get geocode
print(location)
lat = location.latitude
lon = location.longitude
try:
#Get geocode
location = geolocator.geocode(x, timeout=8, exactly_one=True)
lat = location.latitude
lon = location.longitude
except:
#didn't work for some reason that I really don't care about
lat = np.nan
lon = np.nan
print(lat,lon)
return pd.Series([lat, lon])
这个有效
In[4] locate('MOSCOW 123098 RUSSIA')
'MOSCOW 123098 RUSSIA'
Москва, Центральный административный округ, Москва, ЦФО, Россия
Out[4]:
0 55.751633
1 37.618704
dtype: float64
但这不是:
df_addr[['LAT','LON']] = df_addr['COUNTRY'].apply(locate(df_addr['CITY'] + ' ' + \
df_addr['PROVINCE'] + ' ' + \
df_addr['STATE'] + ' ' + \
df_addr['ZIP_CODE'] + ' ' + \
df_addr['COUNTRY'])) # Geocode it!
我看到函数回显了正确的输入字符串:
0 'INNSBRUCK AUSTRIA'
1 'BERN CH-3001 SWITZERLAND'
2 'INNSBRUCK AUSTRIA'
3 'MOSCOW 123098 RUSSIA'
4 'MOSCOW 123098 RUSSIA'
5 'FREDERICK MD 21702 USA'
删除 try/except 我得到以下异常信息
.
.
99 'GLASGOW LANARK G20 9NB SCOTLAND'
dtype: object
---------------------------------------------------------------------------
HTTPError Traceback (most recent call last)
C:\Users\gn\Anaconda3\lib\site-packages\geopy\geocoders\base.py in _call_geocoder(self, url, timeout, raw, requester, deserializer, **kwargs)
131 try:
--> 132 page = requester(url, timeout=(timeout or self.timeout), **kwargs)
133 except Exception as error: # pylint: disable=W0703
C:\Users\gn\Anaconda3\lib\urllib\request.py in urlopen(url, data, timeout, cafile, capath, cadefault)
152 opener = _opener
--> 153 return opener.open(url, data, timeout)
154
C:\Users\gn\Anaconda3\lib\urllib\request.py in open(self, fullurl, data, timeout)
460 meth = getattr(processor, meth_name)
--> 461 response = meth(req, response)
462
C:\Users\gn\Anaconda3\lib\urllib\request.py in http_response(self, request, response)
570 response = self.parent.error(
--> 571 'http', request, response, code, msg, hdrs)
572
C:\Users\gn\Anaconda3\lib\urllib\request.py in error(self, proto, *args)
498 args = (dict, 'default', 'http_error_default') + orig_args
--> 499 return self._call_chain(*args)
500
C:\Users\gn\Anaconda3\lib\urllib\request.py in _call_chain(self, chain, kind, meth_name, *args)
432 func = getattr(handler, meth_name)
--> 433 result = func(*args)
434 if result is not None:
C:\Users\gn\Anaconda3\lib\urllib\request.py in http_error_default(self, req, fp, code, msg, hdrs)
578 def http_error_default(self, req, fp, code, msg, hdrs):
--> 579 raise HTTPError(req.full_url, code, msg, hdrs, fp)
580
HTTPError: HTTP Error 500: Internal Server Error
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
C:\Users\gn\Anaconda3\lib\site-packages\geopy\geocoders\base.py in _call_geocoder(self, url, timeout, raw, requester, deserializer, **kwargs)
146 try:
--> 147 raise ERROR_CODE_MAP[code](message)
148 except KeyError:
KeyError: 500
During handling of the above exception, another exception occurred:
GeocoderServiceError Traceback (most recent call last)
<ipython-input-6-7412c2e27dd8> in <module>()
----> 1 df_addr[['LAT','LON']] = df_addr['COUNTRY'].apply(locate(df_addr['CITY'] + ' ' + df_addr['PROVINCE'] + ' ' + df_addr['STATE'] + ' ' + df_addr['ZIP_CODE'] + ' ' + df_addr['COUNTRY'])) # Geocode it!
2 df_addr.head()
<ipython-input-3-d957ac2e2e2e> in locate(x)
3 geolocator = Nominatim()
4 print("'" + x + "'")
----> 5 location = geolocator.geocode(x,timeout=20) # Get geocode
6 print(location)
7 lat = location.latitude
C:\Users\gn\Anaconda3\lib\site-packages\geopy\geocoders\osm.py in geocode(self, query, exactly_one, timeout, addressdetails, language, geometry)
190 logger.debug("%s.geocode: %s", self.__class__.__name__, url)
191 return self._parse_json(
--> 192 self._call_geocoder(url, timeout=timeout), exactly_one
193 )
194
C:\Users\gn\Anaconda3\lib\site-packages\geopy\geocoders\base.py in _call_geocoder(self, url, timeout, raw, requester, deserializer, **kwargs)
147 raise ERROR_CODE_MAP[code](message)
148 except KeyError:
--> 149 raise GeocoderServiceError(message)
150 elif isinstance(error, URLError):
151 if "timed out" in message:
GeocoderServiceError: HTTP Error 500: Internal Server Error
让我头疼。更新了所有库,但问题没有改变。
提前致谢
老实说,您的做法有点反常,您在一个系列上调用 apply
,然后尝试从许多列中构建一个 str,这是错误的做法这样,您可以在 df 上调用 apply 并传递 axis=1
以便传递行并访问 lambda func 中的每一列并将它们传递给 locate
或在 locate
中提取每一列值,或者只是从所有列的串联中创建一个系列,然后调用应用:
df_addr[['LAT','LON']] = (df_addr['CITY'] + ' ' + df_addr['PROVINCE'] + ' ' + df_addr['STATE'] + ' ' + df_addr['ZIP_CODE'] + ' ' + df_addr['COUNTRY']).apply(locate)
我相信以上应该有效。
因此,根据 Ed Chum 的见解,我编写了以下有效的错误代码:
#Create a summary address field in a new geo dataframe
df_geo = pd.DataFrame(columns = ['BIG_ADDR', 'LAT', 'LON'])
df_geo['BIG_ADDR'] = df = df_addr['CITY'] + ' ' + df_addr['PROVINCE'] + ' ' + df_addr['STATE'] + ' ' + \
df_addr['ZIP_CODE'] + ' ' + df_addr['COUNTRY']
# Eliminate dups
df_geo = df_geo['BIG_ADDR'].drop_duplicates().reset_index()
# Geocode ALL THINGS in GEO frame!
df_geo[['LAT','LON']] = df_geo['BIG_ADDR'].apply(locate)
# Create the same index in the address dataframe
df_addr['BIG_ADDR'] = df = df_addr['CITY'] + ' ' + df_addr['PROVINCE'] + ' ' + df_addr['STATE'] + ' ' + \
df_addr['ZIP_CODE'] + ' ' + df_addr['COUNTRY']
# Combine the address and geo frames
df_addr = pd.merge(df_addr, df_geo, on=['BIG_ADDR'], how='left')
df_addr.rename(columns={'LAT_y': 'LAT', 'LON_y': 'LON'}, inplace=True) #cleanup
df_addr.rename(columns={'LAT_y': 'LAT', 'LON_y': 'LON'}, inplace=True)
del df_geo['index']