为什么我与 Google Analytics 的连接在 AWS-Databricks 中失败,但在其他环境中却没有?
Why does my connection to Google Analytics fail in AWS-Databricks, but not in other environments?
我正在尝试使用 google's google-analytics-data
package. I'm using an example script from google 从 databricks 中提取 Google 分析数据,这在本地和其他远程环境中工作正常,但随后在 Databricks 中失败,跟踪如下:
---------------------------------------------------------------------------
gaierror Traceback (most recent call last)
/databricks/python/lib/python3.7/site-packages/httplib2/__init__.py in _conn_request(self, conn, request_uri, method, body, headers)
1345 if conn.sock is None:
-> 1346 conn.connect()
1347 conn.request(method, request_uri, body, headers)
/databricks/python/lib/python3.7/site-packages/httplib2/__init__.py in connect(self)
1121
-> 1122 address_info = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
1123 for family, socktype, proto, canonname, sockaddr in address_info:
/usr/lib/python3.7/socket.py in getaddrinfo(host, port, family, type, proto, flags)
747 addrlist = []
--> 748 for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
749 af, socktype, proto, canonname, sa = res
gaierror: [Errno -2] Name or service not known
During handling of the above exception, another exception occurred:
ServerNotFoundError Traceback (most recent call last)
<command-4478422> in <module>
76
77 if __name__ == '__main__':
---> 78 main()
<command-4478422> in main()
72 def main():
73 analytics = initialize_analyticsreporting()
---> 74 response = get_report(analytics)
75 print_response(response)
76
<command-4478422> in get_report(analytics)
40 'dateRanges': [{'startDate': '7daysAgo', 'endDate': 'today'}],
41 'metrics': [{'expression': 'ga:sessions'}],
---> 42 'dimensions': [{'name': 'ga:country'}]
43 }]
44 }
/databricks/python/lib/python3.7/site-packages/googleapiclient/_helpers.py in positional_wrapper(*args, **kwargs)
132 elif positional_parameters_enforcement == POSITIONAL_WARNING:
133 logger.warning(message)
--> 134 return wrapped(*args, **kwargs)
135
136 return positional_wrapper
/databricks/python/lib/python3.7/site-packages/googleapiclient/http.py in execute(self, http, num_retries)
912 method=str(self.method),
913 body=self.body,
--> 914 headers=self.headers,
915 )
916
/databricks/python/lib/python3.7/site-packages/googleapiclient/http.py in _retry_request(http, num_retries, req_type, sleep, rand, uri, method, *args, **kwargs)
205 if exception:
206 if retry_num == num_retries:
--> 207 raise exception
208 else:
209 continue
/databricks/python/lib/python3.7/site-packages/googleapiclient/http.py in _retry_request(http, num_retries, req_type, sleep, rand, uri, method, *args, **kwargs)
174 try:
175 exception = None
--> 176 resp, content = http.request(uri, method, *args, **kwargs)
177 # Retry on SSL errors and socket timeout errors.
178 except _ssl_SSLError as ssl_error:
/databricks/python/lib/python3.7/site-packages/oauth2client/transport.py in new_request(uri, method, body, headers, redirections, connection_type)
157 _LOGGER.info('Attempting refresh to obtain '
158 'initial access_token')
--> 159 credentials._refresh(orig_request_method)
160
161 # Clone and modify the request headers to add the appropriate
/databricks/python/lib/python3.7/site-packages/oauth2client/client.py in _refresh(self, http)
747 """
748 if not self.store:
--> 749 self._do_refresh_request(http)
750 else:
751 self.store.acquire_lock()
/databricks/python/lib/python3.7/site-packages/oauth2client/client.py in _do_refresh_request(self, http)
778 resp, content = transport.request(
779 http, self.token_uri, method='POST',
--> 780 body=body, headers=headers)
781 content = _helpers._from_bytes(content)
782 if resp.status == http_client.OK:
/databricks/python/lib/python3.7/site-packages/oauth2client/transport.py in request(http, uri, method, body, headers, redirections, connection_type)
280 return http_callable(uri, method=method, body=body, headers=headers,
281 redirections=redirections,
--> 282 connection_type=connection_type)
283
284
/databricks/python/lib/python3.7/site-packages/httplib2/__init__.py in request(self, uri, method, body, headers, redirections, connection_type)
1707 else:
1708 (response, content) = self._request(
-> 1709 conn, authority, uri, request_uri, method, body, headers, redirections, cachekey,
1710 )
1711 except Exception as e:
/databricks/python/lib/python3.7/site-packages/httplib2/__init__.py in _request(self, conn, host, absolute_uri, request_uri, method, body, headers, redirections, cachekey)
1422 auth.request(method, request_uri, headers, body)
1423
-> 1424 (response, content) = self._conn_request(conn, request_uri, method, body, headers)
1425
1426 if auth:
/databricks/python/lib/python3.7/site-packages/httplib2/__init__.py in _conn_request(self, conn, request_uri, method, body, headers)
1351 except socket.gaierror:
1352 conn.close()
-> 1353 raise ServerNotFoundError("Unable to find the server at %s" % conn.host)
1354 except socket.error as e:
1355 errno_ = e.args[0].errno if isinstance(e.args[0], socket.error) else e.errno
ServerNotFoundError: Unable to find the server at oauth2.googleapis.com
我最初认为这是一个代理问题,但我也可以 dig
从跟踪中 URL 数据块中我当前的网络配置没有问题:
%sh dig oauth2.googleapis.com
; <<>> DiG 9.11.3-1ubuntu1.13-Ubuntu <<>> oauth2.googleapis.com
;; global options: +cmd
;; Got answer:
;; ->>HEADER<<- opcode: QUERY, status: NOERROR, id: 51610
;; flags: qr rd ra; QUERY: 1, ANSWER: 1, AUTHORITY: 0, ADDITIONAL: 1
;; OPT PSEUDOSECTION:
; EDNS: version: 0, flags:; udp: 4096
; COOKIE: cdebff11af82f9f35ad9acb36064bf0e97dc635743627347 (good)
;; QUESTION SECTION:
;oauth2.googleapis.com. IN A
;; ANSWER SECTION:
oauth2.googleapis.com. 226 IN A 216.58.192.234
;; Query time: 125 msec
;; SERVER: X.X.X.X#X
;; WHEN: Wed Mar 31 18:27:26 UTC 2021
;; MSG SIZE rcvd: 94
Databricks 执行 python 代码的方式是否会导致像这样的连接错误?
这是一个代理问题...即使在 shell 中设置了 http_proxy
和 https_proxy
,我仍然需要用 python 来定义它们。
import os
os.environ['http_proxy'] = "some.fake.proxy:port"
os.environ['https_proxy'] = "some.fake.proxy:port"
我正在尝试使用 google's google-analytics-data
package. I'm using an example script from google 从 databricks 中提取 Google 分析数据,这在本地和其他远程环境中工作正常,但随后在 Databricks 中失败,跟踪如下:
---------------------------------------------------------------------------
gaierror Traceback (most recent call last)
/databricks/python/lib/python3.7/site-packages/httplib2/__init__.py in _conn_request(self, conn, request_uri, method, body, headers)
1345 if conn.sock is None:
-> 1346 conn.connect()
1347 conn.request(method, request_uri, body, headers)
/databricks/python/lib/python3.7/site-packages/httplib2/__init__.py in connect(self)
1121
-> 1122 address_info = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
1123 for family, socktype, proto, canonname, sockaddr in address_info:
/usr/lib/python3.7/socket.py in getaddrinfo(host, port, family, type, proto, flags)
747 addrlist = []
--> 748 for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
749 af, socktype, proto, canonname, sa = res
gaierror: [Errno -2] Name or service not known
During handling of the above exception, another exception occurred:
ServerNotFoundError Traceback (most recent call last)
<command-4478422> in <module>
76
77 if __name__ == '__main__':
---> 78 main()
<command-4478422> in main()
72 def main():
73 analytics = initialize_analyticsreporting()
---> 74 response = get_report(analytics)
75 print_response(response)
76
<command-4478422> in get_report(analytics)
40 'dateRanges': [{'startDate': '7daysAgo', 'endDate': 'today'}],
41 'metrics': [{'expression': 'ga:sessions'}],
---> 42 'dimensions': [{'name': 'ga:country'}]
43 }]
44 }
/databricks/python/lib/python3.7/site-packages/googleapiclient/_helpers.py in positional_wrapper(*args, **kwargs)
132 elif positional_parameters_enforcement == POSITIONAL_WARNING:
133 logger.warning(message)
--> 134 return wrapped(*args, **kwargs)
135
136 return positional_wrapper
/databricks/python/lib/python3.7/site-packages/googleapiclient/http.py in execute(self, http, num_retries)
912 method=str(self.method),
913 body=self.body,
--> 914 headers=self.headers,
915 )
916
/databricks/python/lib/python3.7/site-packages/googleapiclient/http.py in _retry_request(http, num_retries, req_type, sleep, rand, uri, method, *args, **kwargs)
205 if exception:
206 if retry_num == num_retries:
--> 207 raise exception
208 else:
209 continue
/databricks/python/lib/python3.7/site-packages/googleapiclient/http.py in _retry_request(http, num_retries, req_type, sleep, rand, uri, method, *args, **kwargs)
174 try:
175 exception = None
--> 176 resp, content = http.request(uri, method, *args, **kwargs)
177 # Retry on SSL errors and socket timeout errors.
178 except _ssl_SSLError as ssl_error:
/databricks/python/lib/python3.7/site-packages/oauth2client/transport.py in new_request(uri, method, body, headers, redirections, connection_type)
157 _LOGGER.info('Attempting refresh to obtain '
158 'initial access_token')
--> 159 credentials._refresh(orig_request_method)
160
161 # Clone and modify the request headers to add the appropriate
/databricks/python/lib/python3.7/site-packages/oauth2client/client.py in _refresh(self, http)
747 """
748 if not self.store:
--> 749 self._do_refresh_request(http)
750 else:
751 self.store.acquire_lock()
/databricks/python/lib/python3.7/site-packages/oauth2client/client.py in _do_refresh_request(self, http)
778 resp, content = transport.request(
779 http, self.token_uri, method='POST',
--> 780 body=body, headers=headers)
781 content = _helpers._from_bytes(content)
782 if resp.status == http_client.OK:
/databricks/python/lib/python3.7/site-packages/oauth2client/transport.py in request(http, uri, method, body, headers, redirections, connection_type)
280 return http_callable(uri, method=method, body=body, headers=headers,
281 redirections=redirections,
--> 282 connection_type=connection_type)
283
284
/databricks/python/lib/python3.7/site-packages/httplib2/__init__.py in request(self, uri, method, body, headers, redirections, connection_type)
1707 else:
1708 (response, content) = self._request(
-> 1709 conn, authority, uri, request_uri, method, body, headers, redirections, cachekey,
1710 )
1711 except Exception as e:
/databricks/python/lib/python3.7/site-packages/httplib2/__init__.py in _request(self, conn, host, absolute_uri, request_uri, method, body, headers, redirections, cachekey)
1422 auth.request(method, request_uri, headers, body)
1423
-> 1424 (response, content) = self._conn_request(conn, request_uri, method, body, headers)
1425
1426 if auth:
/databricks/python/lib/python3.7/site-packages/httplib2/__init__.py in _conn_request(self, conn, request_uri, method, body, headers)
1351 except socket.gaierror:
1352 conn.close()
-> 1353 raise ServerNotFoundError("Unable to find the server at %s" % conn.host)
1354 except socket.error as e:
1355 errno_ = e.args[0].errno if isinstance(e.args[0], socket.error) else e.errno
ServerNotFoundError: Unable to find the server at oauth2.googleapis.com
我最初认为这是一个代理问题,但我也可以 dig
从跟踪中 URL 数据块中我当前的网络配置没有问题:
%sh dig oauth2.googleapis.com
; <<>> DiG 9.11.3-1ubuntu1.13-Ubuntu <<>> oauth2.googleapis.com
;; global options: +cmd
;; Got answer:
;; ->>HEADER<<- opcode: QUERY, status: NOERROR, id: 51610
;; flags: qr rd ra; QUERY: 1, ANSWER: 1, AUTHORITY: 0, ADDITIONAL: 1
;; OPT PSEUDOSECTION:
; EDNS: version: 0, flags:; udp: 4096
; COOKIE: cdebff11af82f9f35ad9acb36064bf0e97dc635743627347 (good)
;; QUESTION SECTION:
;oauth2.googleapis.com. IN A
;; ANSWER SECTION:
oauth2.googleapis.com. 226 IN A 216.58.192.234
;; Query time: 125 msec
;; SERVER: X.X.X.X#X
;; WHEN: Wed Mar 31 18:27:26 UTC 2021
;; MSG SIZE rcvd: 94
Databricks 执行 python 代码的方式是否会导致像这样的连接错误?
这是一个代理问题...即使在 shell 中设置了 http_proxy
和 https_proxy
,我仍然需要用 python 来定义它们。
import os
os.environ['http_proxy'] = "some.fake.proxy:port"
os.environ['https_proxy'] = "some.fake.proxy:port"