使用 Pandas 从 HDFStore 读取特殊字符时出现 UnicodeDecodeerror
UnicodeDecode error when reading special characters from HDFStore with Pandas
我需要在 HDFStore 中存储大量消息,其中一些包含表情符号或特殊字符,如 éěščřžýáí。一切似乎都正常,直到我尝试加载它,然后它因以下错误而崩溃。这是以错误结束的示例代码
import pandas as pd
df = pd.DataFrame(columns=["A"])
toAppend = {"A": "é"}
df = df.append(toAppend, ignore_index = True)
df['A'] = df['A'].astype(str)
store = pd.HDFStore(r'thiswillcrash.h5')
store.put('df', df, format='table', encoding="utf-8")
d = store["df"]
print(d)
store.close()
这里是错误
---------------------------------------------------------------------------
UnicodeDecodeError Traceback (most recent call last)
C:\Users\Filip\Anaconda3\lib\site-packages\pandas\io\pytables.py in _unconvert_string_array(data, nan_rep, encoding)
4407 dtype = "S{0}".format(itemsize)
-> 4408 data = data.astype(dtype, copy=False).astype(object, copy=False)
4409 except (Exception) as e:
UnicodeDecodeError: 'ascii' codec can't decode byte 0xc3 in position 0: ordinal not in range(128)
During handling of the above exception, another exception occurred:
UnicodeDecodeError Traceback (most recent call last)
<ipython-input-8-f2a5372d5498> in <module>()
8 store = pd.HDFStore(r'iwillcrash18.h5')
9 store.put('df', df, format='table', encoding="utf-8")
---> 10 d = store["df"]
11 print(d)
12
C:\Users\Filip\Anaconda3\lib\site-packages\pandas\io\pytables.py in __getitem__(self, key)
416
417 def __getitem__(self, key):
--> 418 return self.get(key)
419
420 def __setitem__(self, key, value):
C:\Users\Filip\Anaconda3\lib\site-packages\pandas\io\pytables.py in get(self, key)
626 if group is None:
627 raise KeyError('No object named %s in the file' % key)
--> 628 return self._read_group(group)
629
630 def select(self, key, where=None, start=None, stop=None, columns=None,
C:\Users\Filip\Anaconda3\lib\site-packages\pandas\io\pytables.py in _read_group(self, group, **kwargs)
1274 s = self._create_storer(group)
1275 s.infer_axes()
-> 1276 return s.read(**kwargs)
1277
1278
C:\Users\Filip\Anaconda3\lib\site-packages\pandas\io\pytables.py in read(self, where, columns, **kwargs)
4006 def read(self, where=None, columns=None, **kwargs):
4007
-> 4008 if not self.read_axes(where=where, **kwargs):
4009 return None
4010
C:\Users\Filip\Anaconda3\lib\site-packages\pandas\io\pytables.py in read_axes(self, where, **kwargs)
3218 for a in self.axes:
3219 a.set_info(self.info)
-> 3220 a.convert(values, nan_rep=self.nan_rep, encoding=self.encoding)
3221
3222 return True
C:\Users\Filip\Anaconda3\lib\site-packages\pandas\io\pytables.py in convert(self, values, nan_rep, encoding)
2071 if _ensure_decoded(self.kind) == u('string'):
2072 self.data = _unconvert_string_array(
-> 2073 self.data, nan_rep=nan_rep, encoding=encoding)
2074
2075 return self
C:\Users\Filip\Anaconda3\lib\site-packages\pandas\io\pytables.py in _unconvert_string_array(data, nan_rep, encoding)
4409 except (Exception) as e:
4410 f = np.vectorize(lambda x: x.decode(encoding), otypes=[np.object])
-> 4411 data = f(data)
4412
4413 if nan_rep is None:
C:\Users\Filip\Anaconda3\lib\site-packages\numpy\lib\function_base.py in __call__(self, *args, **kwargs)
1698 vargs.extend([kwargs[_n] for _n in names])
1699
-> 1700 return self._vectorize_call(func=func, args=vargs)
1701
1702 def _get_ufunc_and_otypes(self, func, args):
C:\Users\Filip\Anaconda3\lib\site-packages\numpy\lib\function_base.py in _vectorize_call(self, func, args)
1767 for _a in args]
1768
-> 1769 outputs = ufunc(*inputs)
1770
1771 if ufunc.nout == 1:
C:\Users\Filip\Anaconda3\lib\site-packages\pandas\io\pytables.py in <lambda>(x)
4408 data = data.astype(dtype, copy=False).astype(object, copy=False)
4409 except (Exception) as e:
-> 4410 f = np.vectorize(lambda x: x.decode(encoding), otypes=[np.object])
4411 data = f(data)
4412
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xc3 in position 0: unexpected end of data
我有 Pandas 0.16.2 和 PyTables 3.2.2
这是一个错误,现在应该已解决,请参阅 this link 查看更多详细信息
我需要在 HDFStore 中存储大量消息,其中一些包含表情符号或特殊字符,如 éěščřžýáí。一切似乎都正常,直到我尝试加载它,然后它因以下错误而崩溃。这是以错误结束的示例代码
import pandas as pd
df = pd.DataFrame(columns=["A"])
toAppend = {"A": "é"}
df = df.append(toAppend, ignore_index = True)
df['A'] = df['A'].astype(str)
store = pd.HDFStore(r'thiswillcrash.h5')
store.put('df', df, format='table', encoding="utf-8")
d = store["df"]
print(d)
store.close()
这里是错误
---------------------------------------------------------------------------
UnicodeDecodeError Traceback (most recent call last)
C:\Users\Filip\Anaconda3\lib\site-packages\pandas\io\pytables.py in _unconvert_string_array(data, nan_rep, encoding)
4407 dtype = "S{0}".format(itemsize)
-> 4408 data = data.astype(dtype, copy=False).astype(object, copy=False)
4409 except (Exception) as e:
UnicodeDecodeError: 'ascii' codec can't decode byte 0xc3 in position 0: ordinal not in range(128)
During handling of the above exception, another exception occurred:
UnicodeDecodeError Traceback (most recent call last)
<ipython-input-8-f2a5372d5498> in <module>()
8 store = pd.HDFStore(r'iwillcrash18.h5')
9 store.put('df', df, format='table', encoding="utf-8")
---> 10 d = store["df"]
11 print(d)
12
C:\Users\Filip\Anaconda3\lib\site-packages\pandas\io\pytables.py in __getitem__(self, key)
416
417 def __getitem__(self, key):
--> 418 return self.get(key)
419
420 def __setitem__(self, key, value):
C:\Users\Filip\Anaconda3\lib\site-packages\pandas\io\pytables.py in get(self, key)
626 if group is None:
627 raise KeyError('No object named %s in the file' % key)
--> 628 return self._read_group(group)
629
630 def select(self, key, where=None, start=None, stop=None, columns=None,
C:\Users\Filip\Anaconda3\lib\site-packages\pandas\io\pytables.py in _read_group(self, group, **kwargs)
1274 s = self._create_storer(group)
1275 s.infer_axes()
-> 1276 return s.read(**kwargs)
1277
1278
C:\Users\Filip\Anaconda3\lib\site-packages\pandas\io\pytables.py in read(self, where, columns, **kwargs)
4006 def read(self, where=None, columns=None, **kwargs):
4007
-> 4008 if not self.read_axes(where=where, **kwargs):
4009 return None
4010
C:\Users\Filip\Anaconda3\lib\site-packages\pandas\io\pytables.py in read_axes(self, where, **kwargs)
3218 for a in self.axes:
3219 a.set_info(self.info)
-> 3220 a.convert(values, nan_rep=self.nan_rep, encoding=self.encoding)
3221
3222 return True
C:\Users\Filip\Anaconda3\lib\site-packages\pandas\io\pytables.py in convert(self, values, nan_rep, encoding)
2071 if _ensure_decoded(self.kind) == u('string'):
2072 self.data = _unconvert_string_array(
-> 2073 self.data, nan_rep=nan_rep, encoding=encoding)
2074
2075 return self
C:\Users\Filip\Anaconda3\lib\site-packages\pandas\io\pytables.py in _unconvert_string_array(data, nan_rep, encoding)
4409 except (Exception) as e:
4410 f = np.vectorize(lambda x: x.decode(encoding), otypes=[np.object])
-> 4411 data = f(data)
4412
4413 if nan_rep is None:
C:\Users\Filip\Anaconda3\lib\site-packages\numpy\lib\function_base.py in __call__(self, *args, **kwargs)
1698 vargs.extend([kwargs[_n] for _n in names])
1699
-> 1700 return self._vectorize_call(func=func, args=vargs)
1701
1702 def _get_ufunc_and_otypes(self, func, args):
C:\Users\Filip\Anaconda3\lib\site-packages\numpy\lib\function_base.py in _vectorize_call(self, func, args)
1767 for _a in args]
1768
-> 1769 outputs = ufunc(*inputs)
1770
1771 if ufunc.nout == 1:
C:\Users\Filip\Anaconda3\lib\site-packages\pandas\io\pytables.py in <lambda>(x)
4408 data = data.astype(dtype, copy=False).astype(object, copy=False)
4409 except (Exception) as e:
-> 4410 f = np.vectorize(lambda x: x.decode(encoding), otypes=[np.object])
4411 data = f(data)
4412
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xc3 in position 0: unexpected end of data
我有 Pandas 0.16.2 和 PyTables 3.2.2
这是一个错误,现在应该已解决,请参阅 this link 查看更多详细信息