内存错误,即使 RAM 空闲
Memory error even though RAM is free
我正在将 4 个文件夹中的文件合并在一起。在这 4 个文件夹中,我将 80 个 .dbf
文件合并在一起,每个文件大小为 35 兆字节。我正在使用以下代码:
import os
import pandas as pd
from simpledbf import Dbf5
list1=[]
folders=r'F:\dbf_tables'
out=r'F:\merged'
if not os.path.isdir(out):
os.mkdir(out)
for folder in os.listdir(folders):
if not os.path.isdir(os.path.join(out,folder)):
os.mkdir(os.path.join(out,folder))
for f in os.listdir(os.path.join(folders,folder)):
if '.xml' not in f:
if '.cpg' not in f:
table=Dbf5(os.path.join(folders,folder,f))
df=table.to_dataframe()
list1.append(df)
dfs = reduce(lambda left,right: pd.merge(left,right,on=['POINTID'],how='outer',),list1)
dfs.to_csv(os.path.join(out,folder,'combined.csv'), index=False)
几乎在运行输入代码后我收到了这个错误:
Traceback (most recent call last):
File "<ipython-input-1-77eb6fd0cda7>", line 1, in <module>
runfile('F:/python codes/prelim_codes/raster_to_point.py', wdir='F:/python codes/prelim_codes')
File "C:\Users\spotter\AppData\Local\Continuum\Anaconda_64\lib\site-packages\spyderlib\widgets\externalshell\sitecustomize.py", line 714, in runfile
execfile(filename, namespace)
File "C:\Users\spotter\AppData\Local\Continuum\Anaconda_64\lib\site-packages\spyderlib\widgets\externalshell\sitecustomize.py", line 74, in execfile
exec(compile(scripttext, filename, 'exec'), glob, loc)
File "F:/python codes/prelim_codes/raster_to_point.py", line 66, in <module>
dfs = reduce(lambda left,right: pd.merge(left,right,on=['POINTID'],how='outer',),list1)
File "F:/python codes/prelim_codes/raster_to_point.py", line 66, in <lambda>
dfs = reduce(lambda left,right: pd.merge(left,right,on=['POINTID'],how='outer',),list1)
File "C:\Users\spotter\AppData\Local\Continuum\Anaconda_64\lib\site-packages\pandas\tools\merge.py", line 39, in merge
return op.get_result()
File "C:\Users\spotter\AppData\Local\Continuum\Anaconda_64\lib\site-packages\pandas\tools\merge.py", line 217, in get_result
join_index, left_indexer, right_indexer = self._get_join_info()
File "C:\Users\spotter\AppData\Local\Continuum\Anaconda_64\lib\site-packages\pandas\tools\merge.py", line 353, in _get_join_info
sort=self.sort, how=self.how)
File "C:\Users\spotter\AppData\Local\Continuum\Anaconda_64\lib\site-packages\pandas\tools\merge.py", line 559, in _get_join_indexers
return join_func(lkey, rkey, count, **kwargs)
File "pandas\src\join.pyx", line 160, in pandas.algos.full_outer_join (pandas\algos.c:61256)
MemoryError
但只有 30% 的内存被使用,这几乎是基准。
编辑:
我只挑选了 2 个文件并尝试使用以下方法进行合并:
merge=pd.merge(df1,df2, on=['POINTID'], how='outer')
仍然出现内存错误,发生了一些奇怪的事情。
当我在 32 位 Anaconda 中 运行 同样的事情时,我得到 ValueError: negative dimensions are not allowed
编辑:
整个问题源于这里给出的解决方案:
已编辑 根据评论:
试试这个(只使用一个 if
逻辑条件 and
语句就足够了):
import os
import pandas as pd
from simpledbf import Dbf5
folders = r'F:\dbf_tables'
out = r'F:\merged'
if not os.path.isdir(out):
os.mkdir(out)
for folder in os.listdir(folders):
if not os.path.isdir(os.path.join(out, folder)):
os.mkdir(os.path.join(out, folder))
# Initialize empty dataframe by folders
dfs = pd.DataFrame(columns=['POINTID'])
for f in os.listdir(os.path.join(folders, folder)):
if ('.xml' not in f) and ('.cpg' not in f):
table = Dbf5(os.path.join(folders, folder, f))
df = table.to_dataframe()
# Merge actual dataframe to result dataframe
dfs = dfs.merge(df, on=['POINTID'], how='outer')
# Save results by folder
dfs.to_csv(os.path.join(out, folder, 'combined.csv'), index=False)
我正在将 4 个文件夹中的文件合并在一起。在这 4 个文件夹中,我将 80 个 .dbf
文件合并在一起,每个文件大小为 35 兆字节。我正在使用以下代码:
import os
import pandas as pd
from simpledbf import Dbf5
list1=[]
folders=r'F:\dbf_tables'
out=r'F:\merged'
if not os.path.isdir(out):
os.mkdir(out)
for folder in os.listdir(folders):
if not os.path.isdir(os.path.join(out,folder)):
os.mkdir(os.path.join(out,folder))
for f in os.listdir(os.path.join(folders,folder)):
if '.xml' not in f:
if '.cpg' not in f:
table=Dbf5(os.path.join(folders,folder,f))
df=table.to_dataframe()
list1.append(df)
dfs = reduce(lambda left,right: pd.merge(left,right,on=['POINTID'],how='outer',),list1)
dfs.to_csv(os.path.join(out,folder,'combined.csv'), index=False)
几乎在运行输入代码后我收到了这个错误:
Traceback (most recent call last):
File "<ipython-input-1-77eb6fd0cda7>", line 1, in <module>
runfile('F:/python codes/prelim_codes/raster_to_point.py', wdir='F:/python codes/prelim_codes')
File "C:\Users\spotter\AppData\Local\Continuum\Anaconda_64\lib\site-packages\spyderlib\widgets\externalshell\sitecustomize.py", line 714, in runfile
execfile(filename, namespace)
File "C:\Users\spotter\AppData\Local\Continuum\Anaconda_64\lib\site-packages\spyderlib\widgets\externalshell\sitecustomize.py", line 74, in execfile
exec(compile(scripttext, filename, 'exec'), glob, loc)
File "F:/python codes/prelim_codes/raster_to_point.py", line 66, in <module>
dfs = reduce(lambda left,right: pd.merge(left,right,on=['POINTID'],how='outer',),list1)
File "F:/python codes/prelim_codes/raster_to_point.py", line 66, in <lambda>
dfs = reduce(lambda left,right: pd.merge(left,right,on=['POINTID'],how='outer',),list1)
File "C:\Users\spotter\AppData\Local\Continuum\Anaconda_64\lib\site-packages\pandas\tools\merge.py", line 39, in merge
return op.get_result()
File "C:\Users\spotter\AppData\Local\Continuum\Anaconda_64\lib\site-packages\pandas\tools\merge.py", line 217, in get_result
join_index, left_indexer, right_indexer = self._get_join_info()
File "C:\Users\spotter\AppData\Local\Continuum\Anaconda_64\lib\site-packages\pandas\tools\merge.py", line 353, in _get_join_info
sort=self.sort, how=self.how)
File "C:\Users\spotter\AppData\Local\Continuum\Anaconda_64\lib\site-packages\pandas\tools\merge.py", line 559, in _get_join_indexers
return join_func(lkey, rkey, count, **kwargs)
File "pandas\src\join.pyx", line 160, in pandas.algos.full_outer_join (pandas\algos.c:61256)
MemoryError
但只有 30% 的内存被使用,这几乎是基准。
编辑:
我只挑选了 2 个文件并尝试使用以下方法进行合并:
merge=pd.merge(df1,df2, on=['POINTID'], how='outer')
仍然出现内存错误,发生了一些奇怪的事情。
当我在 32 位 Anaconda 中 运行 同样的事情时,我得到 ValueError: negative dimensions are not allowed
编辑:
整个问题源于这里给出的解决方案:
已编辑 根据评论:
试试这个(只使用一个 if
逻辑条件 and
语句就足够了):
import os
import pandas as pd
from simpledbf import Dbf5
folders = r'F:\dbf_tables'
out = r'F:\merged'
if not os.path.isdir(out):
os.mkdir(out)
for folder in os.listdir(folders):
if not os.path.isdir(os.path.join(out, folder)):
os.mkdir(os.path.join(out, folder))
# Initialize empty dataframe by folders
dfs = pd.DataFrame(columns=['POINTID'])
for f in os.listdir(os.path.join(folders, folder)):
if ('.xml' not in f) and ('.cpg' not in f):
table = Dbf5(os.path.join(folders, folder, f))
df = table.to_dataframe()
# Merge actual dataframe to result dataframe
dfs = dfs.merge(df, on=['POINTID'], how='outer')
# Save results by folder
dfs.to_csv(os.path.join(out, folder, 'combined.csv'), index=False)