py4j.protocol.Py4JNetworkError: Answer from Java side is empty while trying to execute df.show(5)
py4j.protocol.Py4JNetworkError: Answer from Java side is empty while trying to execute df.show(5)
我是 PySpark 的新手,曾一度被卡住了。在这里,我尝试通过 PySpark 分析 Parquet 文件中的 Twitter 数据转储。我正在尝试在 Google CoLab 上读取 Pyspark 中的镶木地板文件,它工作正常,直到我尝试 运行“df.show(5)”。我认为驱动程序和执行程序的内存存在一些问题,但我不确定,也不知道我可以将其更改为多少。我正在使用 Google Colab Pro+。我在下面包含了整个错误。这是在我构建 SparkSession 并执行 spark.read.parquet 之后,然后当我尝试 运行 df.show(5) 时,它给了我这个错误。
错误以及所有异常:
ERROR:root:Exception while sending command.
Traceback (most recent call last):
File "/content/spark-3.2.0-bin-hadoop3.2/python/lib/py4j-0.10.9.2-src.zip/py4j/clientserver.py", line 480, in send_command
raise Py4JNetworkError("Answer from Java side is empty")
py4j.protocol.Py4JNetworkError: Answer from Java side is empty
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/content/spark-3.2.0-bin-hadoop3.2/python/lib/py4j-0.10.9.2-src.zip/py4j/java_gateway.py", line 1038, in send_command
response = connection.send_command(command)
File "/content/spark-3.2.0-bin-hadoop3.2/python/lib/py4j-0.10.9.2-src.zip/py4j/clientserver.py", line 504, in send_command
"Error while sending or receiving", e, proto.ERROR_ON_RECEIVE)
py4j.protocol.Py4JNetworkError: Error while sending or receiving
ERROR:root:Exception while sending command.
Traceback (most recent call last):
File "/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py", line 2882, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-14-eb589bae8d4b>", line 1, in <module>
df.show(5)
File "/content/spark-3.2.0-bin-hadoop3.2/python/pyspark/sql/dataframe.py", line 494, in show
print(self._jdf.showString(n, 20, vertical))
File "/content/spark-3.2.0-bin-hadoop3.2/python/lib/py4j-0.10.9.2-src.zip/py4j/java_gateway.py", line 1310, in __call__
answer, self.gateway_client, self.target_id, self.name)
File "/content/spark-3.2.0-bin-hadoop3.2/python/pyspark/sql/utils.py", line 111, in deco
return f(*a, **kw)
File "/content/spark-3.2.0-bin-hadoop3.2/python/lib/py4j-0.10.9.2-src.zip/py4j/protocol.py", line 328, in get_return_value
format(target_id, ".", name), value)
py4j.protocol.Py4JJavaError: <unprintable Py4JJavaError object>
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py", line 1823, in showtraceback
stb = value._render_traceback_()
AttributeError: 'Py4JJavaError' object has no attribute '_render_traceback_'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/content/spark-3.2.0-bin-hadoop3.2/python/lib/py4j-0.10.9.2-src.zip/py4j/clientserver.py", line 480, in send_command
raise Py4JNetworkError("Answer from Java side is empty")
py4j.protocol.Py4JNetworkError: Answer from Java side is empty
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/content/spark-3.2.0-bin-hadoop3.2/python/lib/py4j-0.10.9.2-src.zip/py4j/java_gateway.py", line 1038, in send_command
response = connection.send_command(command)
File "/content/spark-3.2.0-bin-hadoop3.2/python/lib/py4j-0.10.9.2-src.zip/py4j/clientserver.py", line 504, in send_command
"Error while sending or receiving", e, proto.ERROR_ON_RECEIVE)
py4j.protocol.Py4JNetworkError: Error while sending or receiving
---------------------------------------------------------------------------
Py4JJavaError Traceback (most recent call last)
/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py in run_code(self, code_obj, result)
2881 #rprint('Running code', repr(code_obj)) # dbg
-> 2882 exec(code_obj, self.user_global_ns, self.user_ns)
2883 finally:
13 frames
<ipython-input-14-eb589bae8d4b> in <module>()
----> 1 df.show(5)
/content/spark-3.2.0-bin-hadoop3.2/python/pyspark/sql/dataframe.py in show(self, n, truncate, vertical)
493 if isinstance(truncate, bool) and truncate:
--> 494 print(self._jdf.showString(n, 20, vertical))
495 else:
/content/spark-3.2.0-bin-hadoop3.2/python/lib/py4j-0.10.9.2-src.zip/py4j/java_gateway.py in __call__(self, *args)
1309 return_value = get_return_value(
-> 1310 answer, self.gateway_client, self.target_id, self.name)
1311
/content/spark-3.2.0-bin-hadoop3.2/python/pyspark/sql/utils.py in deco(*a, **kw)
110 try:
--> 111 return f(*a, **kw)
112 except py4j.protocol.Py4JJavaError as e:
/content/spark-3.2.0-bin-hadoop3.2/python/lib/py4j-0.10.9.2-src.zip/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name)
327 "An error occurred while calling {0}{1}{2}.\n".
--> 328 format(target_id, ".", name), value)
329 else:
<class 'str'>: (<class 'ConnectionRefusedError'>, ConnectionRefusedError(111, 'Connection refused'))
During handling of the above exception, another exception occurred:
ConnectionRefusedError Traceback (most recent call last)
/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py in run_code(self, code_obj, result)
2897 if result is not None:
2898 result.error_in_exec = sys.exc_info()[1]
-> 2899 self.showtraceback()
2900 else:
2901 outflag = 0
/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py in showtraceback(self, exc_tuple, filename, tb_offset, exception_only)
1826 value, tb, tb_offset=tb_offset)
1827
-> 1828 self._showtraceback(etype, value, stb)
1829 if self.call_pdb:
1830 # drop into debugger
/usr/local/lib/python3.7/dist-packages/google/colab/_shell.py in _showtraceback(self, etype, evalue, stb)
131 'traceback': stb,
132 'ename': py3compat.unicode_type(etype.__name__),
--> 133 'evalue': py3compat.safe_unicode(evalue),
134 }
135
/usr/local/lib/python3.7/dist-packages/ipython_genutils/py3compat.py in safe_unicode(e)
63 """
64 try:
---> 65 return unicode_type(e)
66 except UnicodeError:
67 pass
/content/spark-3.2.0-bin-hadoop3.2/python/lib/py4j-0.10.9.2-src.zip/py4j/protocol.py in __str__(self)
469 def __str__(self):
470 gateway_client = self.java_exception._gateway_client
--> 471 answer = gateway_client.send_command(self.exception_cmd)
472 return_value = get_return_value(answer, gateway_client, None, None)
473 # Note: technically this should return a bytestring 'str' rather than
/content/spark-3.2.0-bin-hadoop3.2/python/lib/py4j-0.10.9.2-src.zip/py4j/java_gateway.py in send_command(self, command, retry, binary)
1034 if `binary` is `True`.
1035 """
-> 1036 connection = self._get_connection()
1037 try:
1038 response = connection.send_command(command)
/content/spark-3.2.0-bin-hadoop3.2/python/lib/py4j-0.10.9.2-src.zip/py4j/clientserver.py in _get_connection(self)
279
280 if connection is None or connection.socket is None:
--> 281 connection = self._create_new_connection()
282 return connection
283
/content/spark-3.2.0-bin-hadoop3.2/python/lib/py4j-0.10.9.2-src.zip/py4j/clientserver.py in _create_new_connection(self)
286 self.java_parameters, self.python_parameters,
287 self.gateway_property, self)
--> 288 connection.connect_to_java_server()
289 self.set_thread_connection(connection)
290 return connection
/content/spark-3.2.0-bin-hadoop3.2/python/lib/py4j-0.10.9.2-src.zip/py4j/clientserver.py in connect_to_java_server(self)
400 self.socket = self.ssl_context.wrap_socket(
401 self.socket, server_hostname=self.java_address)
--> 402 self.socket.connect((self.java_address, self.java_port))
403 self.stream = self.socket.makefile("rb")
404 self.is_connected = True
ConnectionRefusedError: [Errno 111] Connection refused
我找到了答案。我刚刚将驱动程序内存配置为 12 G,并且可以正常工作。我认为它不起作用,因为驱动程序正在接收大量数据,而 2-4G 的默认驱动程序内存无法处理它。
我是 PySpark 的新手,曾一度被卡住了。在这里,我尝试通过 PySpark 分析 Parquet 文件中的 Twitter 数据转储。我正在尝试在 Google CoLab 上读取 Pyspark 中的镶木地板文件,它工作正常,直到我尝试 运行“df.show(5)”。我认为驱动程序和执行程序的内存存在一些问题,但我不确定,也不知道我可以将其更改为多少。我正在使用 Google Colab Pro+。我在下面包含了整个错误。这是在我构建 SparkSession 并执行 spark.read.parquet 之后,然后当我尝试 运行 df.show(5) 时,它给了我这个错误。
错误以及所有异常:
ERROR:root:Exception while sending command.
Traceback (most recent call last):
File "/content/spark-3.2.0-bin-hadoop3.2/python/lib/py4j-0.10.9.2-src.zip/py4j/clientserver.py", line 480, in send_command
raise Py4JNetworkError("Answer from Java side is empty")
py4j.protocol.Py4JNetworkError: Answer from Java side is empty
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/content/spark-3.2.0-bin-hadoop3.2/python/lib/py4j-0.10.9.2-src.zip/py4j/java_gateway.py", line 1038, in send_command
response = connection.send_command(command)
File "/content/spark-3.2.0-bin-hadoop3.2/python/lib/py4j-0.10.9.2-src.zip/py4j/clientserver.py", line 504, in send_command
"Error while sending or receiving", e, proto.ERROR_ON_RECEIVE)
py4j.protocol.Py4JNetworkError: Error while sending or receiving
ERROR:root:Exception while sending command.
Traceback (most recent call last):
File "/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py", line 2882, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-14-eb589bae8d4b>", line 1, in <module>
df.show(5)
File "/content/spark-3.2.0-bin-hadoop3.2/python/pyspark/sql/dataframe.py", line 494, in show
print(self._jdf.showString(n, 20, vertical))
File "/content/spark-3.2.0-bin-hadoop3.2/python/lib/py4j-0.10.9.2-src.zip/py4j/java_gateway.py", line 1310, in __call__
answer, self.gateway_client, self.target_id, self.name)
File "/content/spark-3.2.0-bin-hadoop3.2/python/pyspark/sql/utils.py", line 111, in deco
return f(*a, **kw)
File "/content/spark-3.2.0-bin-hadoop3.2/python/lib/py4j-0.10.9.2-src.zip/py4j/protocol.py", line 328, in get_return_value
format(target_id, ".", name), value)
py4j.protocol.Py4JJavaError: <unprintable Py4JJavaError object>
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py", line 1823, in showtraceback
stb = value._render_traceback_()
AttributeError: 'Py4JJavaError' object has no attribute '_render_traceback_'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/content/spark-3.2.0-bin-hadoop3.2/python/lib/py4j-0.10.9.2-src.zip/py4j/clientserver.py", line 480, in send_command
raise Py4JNetworkError("Answer from Java side is empty")
py4j.protocol.Py4JNetworkError: Answer from Java side is empty
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/content/spark-3.2.0-bin-hadoop3.2/python/lib/py4j-0.10.9.2-src.zip/py4j/java_gateway.py", line 1038, in send_command
response = connection.send_command(command)
File "/content/spark-3.2.0-bin-hadoop3.2/python/lib/py4j-0.10.9.2-src.zip/py4j/clientserver.py", line 504, in send_command
"Error while sending or receiving", e, proto.ERROR_ON_RECEIVE)
py4j.protocol.Py4JNetworkError: Error while sending or receiving
---------------------------------------------------------------------------
Py4JJavaError Traceback (most recent call last)
/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py in run_code(self, code_obj, result)
2881 #rprint('Running code', repr(code_obj)) # dbg
-> 2882 exec(code_obj, self.user_global_ns, self.user_ns)
2883 finally:
13 frames
<ipython-input-14-eb589bae8d4b> in <module>()
----> 1 df.show(5)
/content/spark-3.2.0-bin-hadoop3.2/python/pyspark/sql/dataframe.py in show(self, n, truncate, vertical)
493 if isinstance(truncate, bool) and truncate:
--> 494 print(self._jdf.showString(n, 20, vertical))
495 else:
/content/spark-3.2.0-bin-hadoop3.2/python/lib/py4j-0.10.9.2-src.zip/py4j/java_gateway.py in __call__(self, *args)
1309 return_value = get_return_value(
-> 1310 answer, self.gateway_client, self.target_id, self.name)
1311
/content/spark-3.2.0-bin-hadoop3.2/python/pyspark/sql/utils.py in deco(*a, **kw)
110 try:
--> 111 return f(*a, **kw)
112 except py4j.protocol.Py4JJavaError as e:
/content/spark-3.2.0-bin-hadoop3.2/python/lib/py4j-0.10.9.2-src.zip/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name)
327 "An error occurred while calling {0}{1}{2}.\n".
--> 328 format(target_id, ".", name), value)
329 else:
<class 'str'>: (<class 'ConnectionRefusedError'>, ConnectionRefusedError(111, 'Connection refused'))
During handling of the above exception, another exception occurred:
ConnectionRefusedError Traceback (most recent call last)
/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py in run_code(self, code_obj, result)
2897 if result is not None:
2898 result.error_in_exec = sys.exc_info()[1]
-> 2899 self.showtraceback()
2900 else:
2901 outflag = 0
/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py in showtraceback(self, exc_tuple, filename, tb_offset, exception_only)
1826 value, tb, tb_offset=tb_offset)
1827
-> 1828 self._showtraceback(etype, value, stb)
1829 if self.call_pdb:
1830 # drop into debugger
/usr/local/lib/python3.7/dist-packages/google/colab/_shell.py in _showtraceback(self, etype, evalue, stb)
131 'traceback': stb,
132 'ename': py3compat.unicode_type(etype.__name__),
--> 133 'evalue': py3compat.safe_unicode(evalue),
134 }
135
/usr/local/lib/python3.7/dist-packages/ipython_genutils/py3compat.py in safe_unicode(e)
63 """
64 try:
---> 65 return unicode_type(e)
66 except UnicodeError:
67 pass
/content/spark-3.2.0-bin-hadoop3.2/python/lib/py4j-0.10.9.2-src.zip/py4j/protocol.py in __str__(self)
469 def __str__(self):
470 gateway_client = self.java_exception._gateway_client
--> 471 answer = gateway_client.send_command(self.exception_cmd)
472 return_value = get_return_value(answer, gateway_client, None, None)
473 # Note: technically this should return a bytestring 'str' rather than
/content/spark-3.2.0-bin-hadoop3.2/python/lib/py4j-0.10.9.2-src.zip/py4j/java_gateway.py in send_command(self, command, retry, binary)
1034 if `binary` is `True`.
1035 """
-> 1036 connection = self._get_connection()
1037 try:
1038 response = connection.send_command(command)
/content/spark-3.2.0-bin-hadoop3.2/python/lib/py4j-0.10.9.2-src.zip/py4j/clientserver.py in _get_connection(self)
279
280 if connection is None or connection.socket is None:
--> 281 connection = self._create_new_connection()
282 return connection
283
/content/spark-3.2.0-bin-hadoop3.2/python/lib/py4j-0.10.9.2-src.zip/py4j/clientserver.py in _create_new_connection(self)
286 self.java_parameters, self.python_parameters,
287 self.gateway_property, self)
--> 288 connection.connect_to_java_server()
289 self.set_thread_connection(connection)
290 return connection
/content/spark-3.2.0-bin-hadoop3.2/python/lib/py4j-0.10.9.2-src.zip/py4j/clientserver.py in connect_to_java_server(self)
400 self.socket = self.ssl_context.wrap_socket(
401 self.socket, server_hostname=self.java_address)
--> 402 self.socket.connect((self.java_address, self.java_port))
403 self.stream = self.socket.makefile("rb")
404 self.is_connected = True
ConnectionRefusedError: [Errno 111] Connection refused
我找到了答案。我刚刚将驱动程序内存配置为 12 G,并且可以正常工作。我认为它不起作用,因为驱动程序正在接收大量数据,而 2-4G 的默认驱动程序内存无法处理它。