GeocoderUnavailable:使用 geopy 和 Nominatim 的某些地址出现 HTTPSConnectionPool 错误
GeocoderUnavailable: HTTPSConnectionPool error for some addresses using geopy and Nominatim
使用 geopy(使用 Nominatim)进行地理编码时,某些地址出现错误。我真的不明白为什么一个地址会出错而另一个地址不会出错的模式,例如只需更改门牌号即可有所作为。
当我通过 urllib3 发出错误消息中提到的 API 请求时,它起作用了,所以我认为错误是由 geopy 引起的,但我不确定。
最小的可重现示例
from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent="my-test-app")
geolocator.geocode({'country': 'DE', 'city': 'Erlangen', 'postalcode': '91052',
'street': 'Nürnberger Straße 6'}) # working
>>> Location(Nürnberger Straße, Sebaldussiedlung, Erlangen, Bayern, 91052, Deutschland, (49.5772384, 11.015895, 0.0))
geolocator.geocode({'country': 'DE', 'city': 'Erlangen', 'postalcode': '91052',
'street': 'Nürnberger Straße 7'}) # error
错误信息
---------------------------------------------------------------------------
timeout Traceback (most recent call last)
C:\Users\USERNAME\Anaconda3\envs\crm_templates\lib\site-packages\urllib3\connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
425 # Otherwise it looks like a bug in the code.
--> 426 six.raise_from(e, None)
427 except (SocketTimeout, BaseSSLError, SocketError) as e:
C:\Users\USERNAME\Anaconda3\envs\crm_templates\lib\site-packages\urllib3\packages\six.py in raise_from(value, from_value)
C:\Users\USERNAME\Anaconda3\envs\crm_templates\lib\site-packages\urllib3\connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
420 try:
--> 421 httplib_response = conn.getresponse()
422 except BaseException as e:
C:\Users\USERNAME\Anaconda3\envs\crm_templates\lib\http\client.py in getresponse(self)
1353 try:
-> 1354 response.begin()
1355 except ConnectionError:
C:\Users\USERNAME\Anaconda3\envs\crm_templates\lib\http\client.py in begin(self)
305 while True:
--> 306 version, status, reason = self._read_status()
307 if status != CONTINUE:
C:\Users\USERNAME\Anaconda3\envs\crm_templates\lib\http\client.py in _read_status(self)
266 def _read_status(self):
--> 267 line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
268 if len(line) > _MAXLINE:
C:\Users\USERNAME\Anaconda3\envs\crm_templates\lib\socket.py in readinto(self, b)
588 try:
--> 589 return self._sock.recv_into(b)
590 except timeout:
C:\Users\USERNAME\Anaconda3\envs\crm_templates\lib\ssl.py in recv_into(self, buffer, nbytes, flags)
1070 self.__class__)
-> 1071 return self.read(nbytes, buffer)
1072 else:
C:\Users\USERNAME\Anaconda3\envs\crm_templates\lib\ssl.py in read(self, len, buffer)
928 if buffer is not None:
--> 929 return self._sslobj.read(len, buffer)
930 else:
timeout: The read operation timed out
During handling of the above exception, another exception occurred:
ReadTimeoutError Traceback (most recent call last)
C:\Users\USERNAME\Anaconda3\envs\crm_templates\lib\site-packages\urllib3\connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
676 headers=headers,
--> 677 chunked=chunked,
678 )
C:\Users\USERNAME\Anaconda3\envs\crm_templates\lib\site-packages\urllib3\connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
427 except (SocketTimeout, BaseSSLError, SocketError) as e:
--> 428 self._raise_timeout(err=e, url=url, timeout_value=read_timeout)
429 raise
C:\Users\USERNAME\Anaconda3\envs\crm_templates\lib\site-packages\urllib3\connectionpool.py in _raise_timeout(self, err, url, timeout_value)
335 raise ReadTimeoutError(
--> 336 self, url, "Read timed out. (read timeout=%s)" % timeout_value
337 )
ReadTimeoutError: HTTPSConnectionPool(host='nominatim.openstreetmap.org', port=443): Read timed out. (read timeout=1)
During handling of the above exception, another exception occurred:
MaxRetryError Traceback (most recent call last)
C:\Users\USERNAME\Anaconda3\envs\crm_templates\lib\site-packages\requests\adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
448 retries=self.max_retries,
--> 449 timeout=timeout
450 )
C:\Users\USERNAME\Anaconda3\envs\crm_templates\lib\site-packages\urllib3\connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
766 body_pos=body_pos,
--> 767 **response_kw
768 )
C:\Users\USERNAME\Anaconda3\envs\crm_templates\lib\site-packages\urllib3\connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
766 body_pos=body_pos,
--> 767 **response_kw
768 )
C:\Users\USERNAME\Anaconda3\envs\crm_templates\lib\site-packages\urllib3\connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
726 retries = retries.increment(
--> 727 method, url, error=e, _pool=self, _stacktrace=sys.exc_info()[2]
728 )
C:\Users\USERNAME\Anaconda3\envs\crm_templates\lib\site-packages\urllib3\util\retry.py in increment(self, method, url, response, error, _pool, _stacktrace)
445 if new_retry.is_exhausted():
--> 446 raise MaxRetryError(_pool, url, error or ResponseError(cause))
447
MaxRetryError: HTTPSConnectionPool(host='nominatim.openstreetmap.org', port=443): Max retries exceeded with url: /search?country=DE&city=Erlangen&postalcode=91052&street=N%C3%BCrnberger+Stra%C3%9Fe+7&format=json&limit=1 (Caused by ReadTimeoutError("HTTPSConnectionPool(host='nominatim.openstreetmap.org', port=443): Read timed out. (read timeout=1)"))
During handling of the above exception, another exception occurred:
ConnectionError Traceback (most recent call last)
C:\Users\USERNAME\Anaconda3\envs\crm_templates\lib\site-packages\geopy\adapters.py in _request(self, url, timeout, headers)
382 try:
--> 383 resp = self.session.get(url, timeout=timeout, headers=headers)
384 except Exception as error:
C:\Users\USERNAME\Anaconda3\envs\crm_templates\lib\site-packages\requests\sessions.py in get(self, url, **kwargs)
554 kwargs.setdefault('allow_redirects', True)
--> 555 return self.request('GET', url, **kwargs)
556
C:\Users\USERNAME\Anaconda3\envs\crm_templates\lib\site-packages\requests\sessions.py in request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
541 send_kwargs.update(settings)
--> 542 resp = self.send(prep, **send_kwargs)
543
C:\Users\USERNAME\Anaconda3\envs\crm_templates\lib\site-packages\requests\sessions.py in send(self, request, **kwargs)
654 # Send the request
--> 655 r = adapter.send(request, **kwargs)
656
C:\Users\USERNAME\Anaconda3\envs\crm_templates\lib\site-packages\requests\adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
515
--> 516 raise ConnectionError(e, request=request)
517
ConnectionError: HTTPSConnectionPool(host='nominatim.openstreetmap.org', port=443): Max retries exceeded with url: /search?country=DE&city=Erlangen&postalcode=91052&street=N%C3%BCrnberger+Stra%C3%9Fe+7&format=json&limit=1 (Caused by ReadTimeoutError("HTTPSConnectionPool(host='nominatim.openstreetmap.org', port=443): Read timed out. (read timeout=1)"))
During handling of the above exception, another exception occurred:
GeocoderUnavailable Traceback (most recent call last)
<ipython-input-4-aa66519ee9b9> in <module>()
----> 1 geolocator.geocode({'country': 'DE', 'city': 'Erlangen', 'postalcode': '91052', 'street': 'Nürnberger Straße 7'})
C:\Users\USERNAME\Anaconda3\envs\crm_templates\lib\site-packages\geopy\geocoders\nominatim.py in geocode(self, query, exactly_one, timeout, limit, addressdetails, language, geometry, extratags, country_codes, viewbox, bounded, featuretype, namedetails)
292 logger.debug("%s.geocode: %s", self.__class__.__name__, url)
293 callback = partial(self._parse_json, exactly_one=exactly_one)
--> 294 return self._call_geocoder(url, callback, timeout=timeout)
295
296 def reverse(
C:\Users\USERNAME\Anaconda3\envs\crm_templates\lib\site-packages\geopy\geocoders\base.py in _call_geocoder(self, url, callback, timeout, is_json, headers)
358 try:
359 if is_json:
--> 360 result = self.adapter.get_json(url, timeout=timeout, headers=req_headers)
361 else:
362 result = self.adapter.get_text(url, timeout=timeout, headers=req_headers)
C:\Users\USERNAME\Anaconda3\envs\crm_templates\lib\site-packages\geopy\adapters.py in get_json(self, url, timeout, headers)
371
372 def get_json(self, url, *, timeout, headers):
--> 373 resp = self._request(url, timeout=timeout, headers=headers)
374 try:
375 return resp.json()
C:\Users\USERNAME\Anaconda3\envs\crm_templates\lib\site-packages\geopy\adapters.py in _request(self, url, timeout, headers)
393 raise GeocoderServiceError(message)
394 else:
--> 395 raise GeocoderUnavailable(message)
396 elif isinstance(error, requests.Timeout):
397 raise GeocoderTimedOut("Service timed out")
GeocoderUnavailable: HTTPSConnectionPool(host='nominatim.openstreetmap.org', port=443): Max retries exceeded with url: /search?country=DE&city=Erlangen&postalcode=91052&street=N%C3%BCrnberger+Stra%C3%9Fe+7&format=json&limit=1 (Caused by ReadTimeoutError("HTTPSConnectionPool(host='nominatim.openstreetmap.org', port=443): Read timed out. (read timeout=1)"))
使用 urllib3 的工作示例
import urllib3
http = urllib3.PoolManager(1, headers={'user-agent': 'my-test-app'})
url = 'https://nominatim.openstreetmap.org/search?country=DE&city=Erlangen&postalcode=91052&street=N%C3%BCrnberger+Stra%C3%9Fe+7&format=json&limit=1'
resp = http.request('GET', url)
json.loads(resp.data.decode())
>>> [{'place_id': 17025708,
>>> 'licence': 'Data © OpenStreetMap contributors, ODbL 1.0. https://osm.org/copyright',
>>> 'osm_type': 'node',
>>> 'osm_id': 1641967158,
>>> 'boundingbox': ['49.5924431', '49.5925431', '11.0043901', '11.0044901'],
>>> 'lat': '49.5924931',
>>> 'lon': '11.0044401',
>>> 'display_name': 'Postbank, 7, Nürnberger Straße, Am Anger, Erlangen, Bayern, 91052, Deutschland',
>>> 'class': 'amenity',
>>> 'type': 'bank',
>>> 'importance': 0.6309999999999999,
>>> 'icon': 'https://nominatim.openstreetmap.org/ui/mapicons//money_bank2.p.20.png'}]
这些服务似乎托管在捐赠的服务器上。因此,Nominatim 建议避免广泛使用。我只能认为这是失败的原因——因为之前我提取了几百个,后来开始提取了 1000 个位置。 Nominatim Usage Policy
我采用了替代解决方案来处理此错误。解决这个错误变得非常困难。我在替代解决方案中使用 Requests。在我的脚本中,我正在从 Excel 读取纬度和经度并触发“反向”休息 API 的 HTTP 请求。能够在提取的每个 iterations.Information 中触发 1000 个请求打印到 Excel 输出文件中。
这是我的代码片段 -
import json
from geopy import Nominatim
from openpyxl import load_workbook
import time
from geopy.extra.rate_limiter import RateLimiter
import urllib3
import requests
import xml.etree.ElementTree as ET
filepath = "/xxx/100KV-BusLoc.xlsx"
wb = load_workbook(filepath)
sheet = wb["Envision"]
#wb["Elec_Sub"]
cell = sheet.cell(2,20)
for i1 in range(5999, 7000):
if (str(sheet.cell(row=i1, column=1).value) != "None"):
print(i1)
cell = sheet.cell(i1, 20)
# print("All Sub - 345")
latt = str(sheet.cell(row=i1, column=8).value)
long = str(sheet.cell(row=i1, column=9).value)
url = 'https://nominatim.openstreetmap.org/reverse?lat=' + latt + '&lon=' + long
resp = requests.request("Get",url)
if (resp.status_code != 404) :
strRes = resp.text
root = ET.fromstring(strRes)
for child in root.findall('addressparts'):
locDet = ""
if child.find('road') != None:
locDet = child.find('road').text
if child.find('municipality') != None:
locDet = locDet + '|' + child.find('municipality').text
if child.find('county') != None :
locDet = locDet + '|' + child.find('county').text
if child.find('state') != None:
locDet = locDet + '|' + child.find('state').text
if child.find('postcode') != None :
locDet = locDet + '|' + child.find('postcode').text
if child.find('country') != None:
locDet = locDet + '|' + child.find('country').text
if locDet != "":
print(locDet)
cell.value = locDet
wb.save(filepath)
我的最小可重现示例不再调用错误,无论是在具有旧版本 geopy (1.22) 的过时环境中还是在具有最新版本 geopy (2.1.0) 的环境中,所以我不是真的很确定发生了什么变化。
使用 geopy(使用 Nominatim)进行地理编码时,某些地址出现错误。我真的不明白为什么一个地址会出错而另一个地址不会出错的模式,例如只需更改门牌号即可有所作为。
当我通过 urllib3 发出错误消息中提到的 API 请求时,它起作用了,所以我认为错误是由 geopy 引起的,但我不确定。
最小的可重现示例
from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent="my-test-app")
geolocator.geocode({'country': 'DE', 'city': 'Erlangen', 'postalcode': '91052',
'street': 'Nürnberger Straße 6'}) # working
>>> Location(Nürnberger Straße, Sebaldussiedlung, Erlangen, Bayern, 91052, Deutschland, (49.5772384, 11.015895, 0.0))
geolocator.geocode({'country': 'DE', 'city': 'Erlangen', 'postalcode': '91052',
'street': 'Nürnberger Straße 7'}) # error
错误信息
---------------------------------------------------------------------------
timeout Traceback (most recent call last)
C:\Users\USERNAME\Anaconda3\envs\crm_templates\lib\site-packages\urllib3\connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
425 # Otherwise it looks like a bug in the code.
--> 426 six.raise_from(e, None)
427 except (SocketTimeout, BaseSSLError, SocketError) as e:
C:\Users\USERNAME\Anaconda3\envs\crm_templates\lib\site-packages\urllib3\packages\six.py in raise_from(value, from_value)
C:\Users\USERNAME\Anaconda3\envs\crm_templates\lib\site-packages\urllib3\connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
420 try:
--> 421 httplib_response = conn.getresponse()
422 except BaseException as e:
C:\Users\USERNAME\Anaconda3\envs\crm_templates\lib\http\client.py in getresponse(self)
1353 try:
-> 1354 response.begin()
1355 except ConnectionError:
C:\Users\USERNAME\Anaconda3\envs\crm_templates\lib\http\client.py in begin(self)
305 while True:
--> 306 version, status, reason = self._read_status()
307 if status != CONTINUE:
C:\Users\USERNAME\Anaconda3\envs\crm_templates\lib\http\client.py in _read_status(self)
266 def _read_status(self):
--> 267 line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
268 if len(line) > _MAXLINE:
C:\Users\USERNAME\Anaconda3\envs\crm_templates\lib\socket.py in readinto(self, b)
588 try:
--> 589 return self._sock.recv_into(b)
590 except timeout:
C:\Users\USERNAME\Anaconda3\envs\crm_templates\lib\ssl.py in recv_into(self, buffer, nbytes, flags)
1070 self.__class__)
-> 1071 return self.read(nbytes, buffer)
1072 else:
C:\Users\USERNAME\Anaconda3\envs\crm_templates\lib\ssl.py in read(self, len, buffer)
928 if buffer is not None:
--> 929 return self._sslobj.read(len, buffer)
930 else:
timeout: The read operation timed out
During handling of the above exception, another exception occurred:
ReadTimeoutError Traceback (most recent call last)
C:\Users\USERNAME\Anaconda3\envs\crm_templates\lib\site-packages\urllib3\connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
676 headers=headers,
--> 677 chunked=chunked,
678 )
C:\Users\USERNAME\Anaconda3\envs\crm_templates\lib\site-packages\urllib3\connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
427 except (SocketTimeout, BaseSSLError, SocketError) as e:
--> 428 self._raise_timeout(err=e, url=url, timeout_value=read_timeout)
429 raise
C:\Users\USERNAME\Anaconda3\envs\crm_templates\lib\site-packages\urllib3\connectionpool.py in _raise_timeout(self, err, url, timeout_value)
335 raise ReadTimeoutError(
--> 336 self, url, "Read timed out. (read timeout=%s)" % timeout_value
337 )
ReadTimeoutError: HTTPSConnectionPool(host='nominatim.openstreetmap.org', port=443): Read timed out. (read timeout=1)
During handling of the above exception, another exception occurred:
MaxRetryError Traceback (most recent call last)
C:\Users\USERNAME\Anaconda3\envs\crm_templates\lib\site-packages\requests\adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
448 retries=self.max_retries,
--> 449 timeout=timeout
450 )
C:\Users\USERNAME\Anaconda3\envs\crm_templates\lib\site-packages\urllib3\connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
766 body_pos=body_pos,
--> 767 **response_kw
768 )
C:\Users\USERNAME\Anaconda3\envs\crm_templates\lib\site-packages\urllib3\connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
766 body_pos=body_pos,
--> 767 **response_kw
768 )
C:\Users\USERNAME\Anaconda3\envs\crm_templates\lib\site-packages\urllib3\connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
726 retries = retries.increment(
--> 727 method, url, error=e, _pool=self, _stacktrace=sys.exc_info()[2]
728 )
C:\Users\USERNAME\Anaconda3\envs\crm_templates\lib\site-packages\urllib3\util\retry.py in increment(self, method, url, response, error, _pool, _stacktrace)
445 if new_retry.is_exhausted():
--> 446 raise MaxRetryError(_pool, url, error or ResponseError(cause))
447
MaxRetryError: HTTPSConnectionPool(host='nominatim.openstreetmap.org', port=443): Max retries exceeded with url: /search?country=DE&city=Erlangen&postalcode=91052&street=N%C3%BCrnberger+Stra%C3%9Fe+7&format=json&limit=1 (Caused by ReadTimeoutError("HTTPSConnectionPool(host='nominatim.openstreetmap.org', port=443): Read timed out. (read timeout=1)"))
During handling of the above exception, another exception occurred:
ConnectionError Traceback (most recent call last)
C:\Users\USERNAME\Anaconda3\envs\crm_templates\lib\site-packages\geopy\adapters.py in _request(self, url, timeout, headers)
382 try:
--> 383 resp = self.session.get(url, timeout=timeout, headers=headers)
384 except Exception as error:
C:\Users\USERNAME\Anaconda3\envs\crm_templates\lib\site-packages\requests\sessions.py in get(self, url, **kwargs)
554 kwargs.setdefault('allow_redirects', True)
--> 555 return self.request('GET', url, **kwargs)
556
C:\Users\USERNAME\Anaconda3\envs\crm_templates\lib\site-packages\requests\sessions.py in request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
541 send_kwargs.update(settings)
--> 542 resp = self.send(prep, **send_kwargs)
543
C:\Users\USERNAME\Anaconda3\envs\crm_templates\lib\site-packages\requests\sessions.py in send(self, request, **kwargs)
654 # Send the request
--> 655 r = adapter.send(request, **kwargs)
656
C:\Users\USERNAME\Anaconda3\envs\crm_templates\lib\site-packages\requests\adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
515
--> 516 raise ConnectionError(e, request=request)
517
ConnectionError: HTTPSConnectionPool(host='nominatim.openstreetmap.org', port=443): Max retries exceeded with url: /search?country=DE&city=Erlangen&postalcode=91052&street=N%C3%BCrnberger+Stra%C3%9Fe+7&format=json&limit=1 (Caused by ReadTimeoutError("HTTPSConnectionPool(host='nominatim.openstreetmap.org', port=443): Read timed out. (read timeout=1)"))
During handling of the above exception, another exception occurred:
GeocoderUnavailable Traceback (most recent call last)
<ipython-input-4-aa66519ee9b9> in <module>()
----> 1 geolocator.geocode({'country': 'DE', 'city': 'Erlangen', 'postalcode': '91052', 'street': 'Nürnberger Straße 7'})
C:\Users\USERNAME\Anaconda3\envs\crm_templates\lib\site-packages\geopy\geocoders\nominatim.py in geocode(self, query, exactly_one, timeout, limit, addressdetails, language, geometry, extratags, country_codes, viewbox, bounded, featuretype, namedetails)
292 logger.debug("%s.geocode: %s", self.__class__.__name__, url)
293 callback = partial(self._parse_json, exactly_one=exactly_one)
--> 294 return self._call_geocoder(url, callback, timeout=timeout)
295
296 def reverse(
C:\Users\USERNAME\Anaconda3\envs\crm_templates\lib\site-packages\geopy\geocoders\base.py in _call_geocoder(self, url, callback, timeout, is_json, headers)
358 try:
359 if is_json:
--> 360 result = self.adapter.get_json(url, timeout=timeout, headers=req_headers)
361 else:
362 result = self.adapter.get_text(url, timeout=timeout, headers=req_headers)
C:\Users\USERNAME\Anaconda3\envs\crm_templates\lib\site-packages\geopy\adapters.py in get_json(self, url, timeout, headers)
371
372 def get_json(self, url, *, timeout, headers):
--> 373 resp = self._request(url, timeout=timeout, headers=headers)
374 try:
375 return resp.json()
C:\Users\USERNAME\Anaconda3\envs\crm_templates\lib\site-packages\geopy\adapters.py in _request(self, url, timeout, headers)
393 raise GeocoderServiceError(message)
394 else:
--> 395 raise GeocoderUnavailable(message)
396 elif isinstance(error, requests.Timeout):
397 raise GeocoderTimedOut("Service timed out")
GeocoderUnavailable: HTTPSConnectionPool(host='nominatim.openstreetmap.org', port=443): Max retries exceeded with url: /search?country=DE&city=Erlangen&postalcode=91052&street=N%C3%BCrnberger+Stra%C3%9Fe+7&format=json&limit=1 (Caused by ReadTimeoutError("HTTPSConnectionPool(host='nominatim.openstreetmap.org', port=443): Read timed out. (read timeout=1)"))
使用 urllib3 的工作示例
import urllib3
http = urllib3.PoolManager(1, headers={'user-agent': 'my-test-app'})
url = 'https://nominatim.openstreetmap.org/search?country=DE&city=Erlangen&postalcode=91052&street=N%C3%BCrnberger+Stra%C3%9Fe+7&format=json&limit=1'
resp = http.request('GET', url)
json.loads(resp.data.decode())
>>> [{'place_id': 17025708,
>>> 'licence': 'Data © OpenStreetMap contributors, ODbL 1.0. https://osm.org/copyright',
>>> 'osm_type': 'node',
>>> 'osm_id': 1641967158,
>>> 'boundingbox': ['49.5924431', '49.5925431', '11.0043901', '11.0044901'],
>>> 'lat': '49.5924931',
>>> 'lon': '11.0044401',
>>> 'display_name': 'Postbank, 7, Nürnberger Straße, Am Anger, Erlangen, Bayern, 91052, Deutschland',
>>> 'class': 'amenity',
>>> 'type': 'bank',
>>> 'importance': 0.6309999999999999,
>>> 'icon': 'https://nominatim.openstreetmap.org/ui/mapicons//money_bank2.p.20.png'}]
这些服务似乎托管在捐赠的服务器上。因此,Nominatim 建议避免广泛使用。我只能认为这是失败的原因——因为之前我提取了几百个,后来开始提取了 1000 个位置。 Nominatim Usage Policy
我采用了替代解决方案来处理此错误。解决这个错误变得非常困难。我在替代解决方案中使用 Requests。在我的脚本中,我正在从 Excel 读取纬度和经度并触发“反向”休息 API 的 HTTP 请求。能够在提取的每个 iterations.Information 中触发 1000 个请求打印到 Excel 输出文件中。 这是我的代码片段 -
import json
from geopy import Nominatim
from openpyxl import load_workbook
import time
from geopy.extra.rate_limiter import RateLimiter
import urllib3
import requests
import xml.etree.ElementTree as ET
filepath = "/xxx/100KV-BusLoc.xlsx"
wb = load_workbook(filepath)
sheet = wb["Envision"]
#wb["Elec_Sub"]
cell = sheet.cell(2,20)
for i1 in range(5999, 7000):
if (str(sheet.cell(row=i1, column=1).value) != "None"):
print(i1)
cell = sheet.cell(i1, 20)
# print("All Sub - 345")
latt = str(sheet.cell(row=i1, column=8).value)
long = str(sheet.cell(row=i1, column=9).value)
url = 'https://nominatim.openstreetmap.org/reverse?lat=' + latt + '&lon=' + long
resp = requests.request("Get",url)
if (resp.status_code != 404) :
strRes = resp.text
root = ET.fromstring(strRes)
for child in root.findall('addressparts'):
locDet = ""
if child.find('road') != None:
locDet = child.find('road').text
if child.find('municipality') != None:
locDet = locDet + '|' + child.find('municipality').text
if child.find('county') != None :
locDet = locDet + '|' + child.find('county').text
if child.find('state') != None:
locDet = locDet + '|' + child.find('state').text
if child.find('postcode') != None :
locDet = locDet + '|' + child.find('postcode').text
if child.find('country') != None:
locDet = locDet + '|' + child.find('country').text
if locDet != "":
print(locDet)
cell.value = locDet
wb.save(filepath)
我的最小可重现示例不再调用错误,无论是在具有旧版本 geopy (1.22) 的过时环境中还是在具有最新版本 geopy (2.1.0) 的环境中,所以我不是真的很确定发生了什么变化。