DATAFRAME TO BIGQUERY - Error: FileNotFoundError: [Errno 2] No such file or directory: '/tmp/tmp1yeitxcu_job_4b7daa39.parquet'
DATAFRAME TO BIGQUERY - Error: FileNotFoundError: [Errno 2] No such file or directory: '/tmp/tmp1yeitxcu_job_4b7daa39.parquet'
我正在将数据框上传到 bigquery table。
df.to_gbq('Deduplic.DailyReport', project_id=BQ_PROJECT_ID, credentials=credentials, if_exists='append')
我收到以下错误:
OSError Traceback (most recent call last)
~/.local/lib/python3.8/site-packages/google/cloud/bigquery/client.py in load_table_from_dataframe(self, dataframe, destination, num_retries, job_id, job_id_prefix, location, project, job_config, parquet_compression, timeout)
2624
-> 2625 _pandas_helpers.dataframe_to_parquet(
2626 dataframe,
~/.local/lib/python3.8/site-packages/google/cloud/bigquery/_pandas_helpers.py in dataframe_to_parquet(dataframe, bq_schema, filepath, parquet_compression, parquet_use_compliant_nested_type)
672 arrow_table = dataframe_to_arrow(dataframe, bq_schema)
--> 673 pyarrow.parquet.write_table(
674 arrow_table,
~/.local/lib/python3.8/site-packages/pyarrow/parquet.py in write_table(table, where, row_group_size, version, use_dictionary, compression, write_statistics, use_deprecated_int96_timestamps, coerce_timestamps, allow_truncated_timestamps, data_page_size, flavor, filesystem, compression_level, use_byte_stream_split, column_encoding, data_page_version, use_compliant_nested_type, **kwargs)
2091 **kwargs) as writer:
-> 2092 writer.write_table(table, row_group_size=row_group_size)
2093 except Exception:
~/.local/lib/python3.8/site-packages/pyarrow/parquet.py in write_table(self, table, row_group_size)
753
--> 754 self.writer.write_table(table, row_group_size=row_group_size)
755
~/.local/lib/python3.8/site-packages/pyarrow/_parquet.pyx in pyarrow._parquet.ParquetWriter.write_table()
~/.local/lib/python3.8/site-packages/pyarrow/error.pxi in pyarrow.lib.check_status()
OSError: [Errno 28] Error writing bytes to file. Detail: [errno 28] No space left on device
During handling of the above exception, another exception occurred:
FileNotFoundError Traceback (most recent call last)
<ipython-input-8-f7137c1f7ee8> in <module>
62 )
63
---> 64 df.to_gbq('Deduplic.DailyReport', project_id=BQ_PROJECT_ID, credentials=credentials, if_exists='append')
~/.local/lib/python3.8/site-packages/pandas/core/frame.py in to_gbq(self, destination_table, project_id, chunksize, reauth, if_exists, auth_local_webserver, table_schema, location, progress_bar, credentials)
2052 from pandas.io import gbq
2053
-> 2054 gbq.to_gbq(
2055 self,
2056 destination_table,
~/.local/lib/python3.8/site-packages/pandas/io/gbq.py in to_gbq(dataframe, destination_table, project_id, chunksize, reauth, if_exists, auth_local_webserver, table_schema, location, progress_bar, credentials)
210 ) -> None:
211 pandas_gbq = _try_import()
--> 212 pandas_gbq.to_gbq(
213 dataframe,
214 destination_table,
~/.local/lib/python3.8/site-packages/pandas_gbq/gbq.py in to_gbq(dataframe, destination_table, project_id, chunksize, reauth, if_exists, auth_local_webserver, table_schema, location, progress_bar, credentials, api_method, verbose, private_key)
1191 return
1192
-> 1193 connector.load_data(
1194 dataframe,
1195 destination_table_ref,
~/.local/lib/python3.8/site-packages/pandas_gbq/gbq.py in load_data(self, dataframe, destination_table_ref, chunksize, schema, progress_bar, api_method, billing_project)
584
585 try:
--> 586 chunks = load.load_chunks(
587 self.client,
588 dataframe,
~/.local/lib/python3.8/site-packages/pandas_gbq/load.py in load_chunks(client, dataframe, destination_table_ref, chunksize, schema, location, api_method, billing_project)
235 ):
236 if api_method == "load_parquet":
--> 237 load_parquet(
238 client,
239 dataframe,
~/.local/lib/python3.8/site-packages/pandas_gbq/load.py in load_parquet(client, dataframe, destination_table_ref, location, schema, billing_project)
127
128 try:
--> 129 client.load_table_from_dataframe(
130 dataframe,
131 destination_table_ref,
~/.local/lib/python3.8/site-packages/google/cloud/bigquery/client.py in load_table_from_dataframe(self, dataframe, destination, num_retries, job_id, job_id_prefix, location, project, job_config, parquet_compression, timeout)
2670
2671 finally:
-> 2672 os.remove(tmppath)
2673
2674 def load_table_from_json(
FileNotFoundError: [Errno 2] No such file or directory: '/tmp/tmp1yeitxcu_job_4b7daa39.parquet'
求解决方案
正如 Ricco D 所提到的,将数据帧写入 table 时,BigQuery 客户端会在主机系统上创建临时文件,然后在写入数据帧后将其删除。客户端的source code供大家参考。链接的代码块执行以下操作。
- 创建一个临时文件
- 将临时文件加载到 table
- 加载后删除文件。
您遇到的错误来自第一步。 space BigQuery 客户端无法创建临时文件。因此,考虑从主机系统中删除未使用的文件,以便客户端创建临时文件。
我正在将数据框上传到 bigquery table。
df.to_gbq('Deduplic.DailyReport', project_id=BQ_PROJECT_ID, credentials=credentials, if_exists='append')
我收到以下错误:
OSError Traceback (most recent call last)
~/.local/lib/python3.8/site-packages/google/cloud/bigquery/client.py in load_table_from_dataframe(self, dataframe, destination, num_retries, job_id, job_id_prefix, location, project, job_config, parquet_compression, timeout)
2624
-> 2625 _pandas_helpers.dataframe_to_parquet(
2626 dataframe,
~/.local/lib/python3.8/site-packages/google/cloud/bigquery/_pandas_helpers.py in dataframe_to_parquet(dataframe, bq_schema, filepath, parquet_compression, parquet_use_compliant_nested_type)
672 arrow_table = dataframe_to_arrow(dataframe, bq_schema)
--> 673 pyarrow.parquet.write_table(
674 arrow_table,
~/.local/lib/python3.8/site-packages/pyarrow/parquet.py in write_table(table, where, row_group_size, version, use_dictionary, compression, write_statistics, use_deprecated_int96_timestamps, coerce_timestamps, allow_truncated_timestamps, data_page_size, flavor, filesystem, compression_level, use_byte_stream_split, column_encoding, data_page_version, use_compliant_nested_type, **kwargs)
2091 **kwargs) as writer:
-> 2092 writer.write_table(table, row_group_size=row_group_size)
2093 except Exception:
~/.local/lib/python3.8/site-packages/pyarrow/parquet.py in write_table(self, table, row_group_size)
753
--> 754 self.writer.write_table(table, row_group_size=row_group_size)
755
~/.local/lib/python3.8/site-packages/pyarrow/_parquet.pyx in pyarrow._parquet.ParquetWriter.write_table()
~/.local/lib/python3.8/site-packages/pyarrow/error.pxi in pyarrow.lib.check_status()
OSError: [Errno 28] Error writing bytes to file. Detail: [errno 28] No space left on device
During handling of the above exception, another exception occurred:
FileNotFoundError Traceback (most recent call last)
<ipython-input-8-f7137c1f7ee8> in <module>
62 )
63
---> 64 df.to_gbq('Deduplic.DailyReport', project_id=BQ_PROJECT_ID, credentials=credentials, if_exists='append')
~/.local/lib/python3.8/site-packages/pandas/core/frame.py in to_gbq(self, destination_table, project_id, chunksize, reauth, if_exists, auth_local_webserver, table_schema, location, progress_bar, credentials)
2052 from pandas.io import gbq
2053
-> 2054 gbq.to_gbq(
2055 self,
2056 destination_table,
~/.local/lib/python3.8/site-packages/pandas/io/gbq.py in to_gbq(dataframe, destination_table, project_id, chunksize, reauth, if_exists, auth_local_webserver, table_schema, location, progress_bar, credentials)
210 ) -> None:
211 pandas_gbq = _try_import()
--> 212 pandas_gbq.to_gbq(
213 dataframe,
214 destination_table,
~/.local/lib/python3.8/site-packages/pandas_gbq/gbq.py in to_gbq(dataframe, destination_table, project_id, chunksize, reauth, if_exists, auth_local_webserver, table_schema, location, progress_bar, credentials, api_method, verbose, private_key)
1191 return
1192
-> 1193 connector.load_data(
1194 dataframe,
1195 destination_table_ref,
~/.local/lib/python3.8/site-packages/pandas_gbq/gbq.py in load_data(self, dataframe, destination_table_ref, chunksize, schema, progress_bar, api_method, billing_project)
584
585 try:
--> 586 chunks = load.load_chunks(
587 self.client,
588 dataframe,
~/.local/lib/python3.8/site-packages/pandas_gbq/load.py in load_chunks(client, dataframe, destination_table_ref, chunksize, schema, location, api_method, billing_project)
235 ):
236 if api_method == "load_parquet":
--> 237 load_parquet(
238 client,
239 dataframe,
~/.local/lib/python3.8/site-packages/pandas_gbq/load.py in load_parquet(client, dataframe, destination_table_ref, location, schema, billing_project)
127
128 try:
--> 129 client.load_table_from_dataframe(
130 dataframe,
131 destination_table_ref,
~/.local/lib/python3.8/site-packages/google/cloud/bigquery/client.py in load_table_from_dataframe(self, dataframe, destination, num_retries, job_id, job_id_prefix, location, project, job_config, parquet_compression, timeout)
2670
2671 finally:
-> 2672 os.remove(tmppath)
2673
2674 def load_table_from_json(
FileNotFoundError: [Errno 2] No such file or directory: '/tmp/tmp1yeitxcu_job_4b7daa39.parquet'
求解决方案
正如 Ricco D 所提到的,将数据帧写入 table 时,BigQuery 客户端会在主机系统上创建临时文件,然后在写入数据帧后将其删除。客户端的source code供大家参考。链接的代码块执行以下操作。
- 创建一个临时文件
- 将临时文件加载到 table
- 加载后删除文件。
您遇到的错误来自第一步。 space BigQuery 客户端无法创建临时文件。因此,考虑从主机系统中删除未使用的文件,以便客户端创建临时文件。