Pandas 哈希表 KeyError
Pandas Hashtable KeyError
我在 Kaggle 中找到了以下代码。
import re
from nltk.corpus import stopwords # Import the stop word list
def description_to_words(review_text):
# 2. Remove non-letters
letters_only = re.sub("[^a-zA-Z]", " ", review_text)
# 3. Convert to lower case, split into individual words
words = letters_only.lower().split()
# 4. In Python, searching a set is much faster than searching
# a list, so convert the stop words to a set
stops = set(stopwords.words("english"))
# 5. Remove stop words
meaningful_words = [w for w in words if not w in stops]
# 6. Join the words back into one string separated by space,
# and return the result.
return( " ".join( meaningful_words ))
以上代码与以下函数调用配合使用效果很好
clean_review = description_to_words(df['MaterialDescription'][3] )
print(clean_review)
但是当我尝试上述相同的操作时,例如将 DataFrame 分配给另一个变量,如下所示,
X = df['MaterialDescription']
clean_review = description_to_words(X[3] )
print(clean_review)
我收到以下非常荒谬的错误。我确定我需要在 Pandas
中澄清一些
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
C:\Anaconda\envs\tensorflow\lib\site-packages\pandas\indexes\base.py in get_loc(self, key, method, tolerance)
2133 try:
-> 2134 return self._engine.get_loc(key)
2135 except KeyError:
pandas\index.pyx in pandas.index.IndexEngine.get_loc (pandas\index.c:4433)()
pandas\index.pyx in pandas.index.IndexEngine.get_loc (pandas\index.c:4279)()
pandas\src\hashtable_class_helper.pxi in pandas.hashtable.PyObjectHashTable.get_item (pandas\hashtable.c:13742)()
pandas\src\hashtable_class_helper.pxi in pandas.hashtable.PyObjectHashTable.get_item (pandas\hashtable.c:13696)()
KeyError: 3
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-15-5c63f93c009a> in <module>()
----> 1 clean_review = description_to_words(X[3] )
2 print(clean_review)
C:\Anaconda\envs\tensorflow\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
2057 return self._getitem_multilevel(key)
2058 else:
-> 2059 return self._getitem_column(key)
2060
2061 def _getitem_column(self, key):
C:\Anaconda\envs\tensorflow\lib\site-packages\pandas\core\frame.py in _getitem_column(self, key)
2064 # get column
2065 if self.columns.is_unique:
-> 2066 return self._get_item_cache(key)
2067
2068 # duplicate columns & possible reduce dimensionality
C:\Anaconda\envs\tensorflow\lib\site-packages\pandas\core\generic.py in _get_item_cache(self, item)
1384 res = cache.get(item)
1385 if res is None:
-> 1386 values = self._data.get(item)
1387 res = self._box_item_values(item, values)
1388 cache[item] = res
C:\Anaconda\envs\tensorflow\lib\site-packages\pandas\core\internals.py in get(self, item, fastpath)
3541
3542 if not isnull(item):
-> 3543 loc = self.items.get_loc(item)
3544 else:
3545 indexer = np.arange(len(self.items))[isnull(self.items)]
C:\Anaconda\envs\tensorflow\lib\site-packages\pandas\indexes\base.py in get_loc(self, key, method, tolerance)
2134 return self._engine.get_loc(key)
2135 except KeyError:
-> 2136 return self._engine.get_loc(self._maybe_cast_indexer(key))
2137
2138 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
pandas\index.pyx in pandas.index.IndexEngine.get_loc (pandas\index.c:4433)()
pandas\index.pyx in pandas.index.IndexEngine.get_loc (pandas\index.c:4279)()
pandas\src\hashtable_class_helper.pxi in pandas.hashtable.PyObjectHashTable.get_item (pandas\hashtable.c:13742)()
pandas\src\hashtable_class_helper.pxi in pandas.hashtable.PyObjectHashTable.get_item (pandas\hashtable.c:13696)()
KeyError: 3
我也试过在 pandas 对象中给出切片。这给出了另一个错误
clean_review = description_to_words(X[:3] )
print(clean_review)
以下是上述两行代码的堆栈跟踪
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-18-f8b01af18e4b> in <module>()
----> 1 clean_review = description_to_words(X[:3] )
2 print(clean_review)
<ipython-input-6-70647cd7caba> in description_to_words(review_text)
6
7 # 2. Remove non-letters
----> 8 letters_only = re.sub("[^a-zA-Z]", " ", review_text)
9 # 3. Convert to lower case, split into individual words
10 words = letters_only.lower().split()
C:\Anaconda\envs\tensorflow\lib\re.py in sub(pattern, repl, string, count, flags)
180 a callable, it's passed the match object and must return
181 a replacement string to be used."""
--> 182 return _compile(pattern, flags).sub(repl, string, count)
183
184 def subn(pattern, repl, string, count=0, flags=0):
TypeError: expected string or bytes-like object
如果有人帮助我理解这里到底发生了什么,那将是一个很大的帮助。
下面几行
X = df['MaterialDescription']
clean_review = description_to_words(X[3] )
给予 python description_to_words(df['MaterialDescription'][3] )
您必须通过以下方式找到您的索引:
clean_review = description_to_words(df.iloc[3]['MaterialDescription'] )
我在我的一个代码中遇到了类似的错误。事实证明,由于我删除了空行,所以我的数据框中缺少该特定索引。
如果是这种情况,您可以执行 df.reset_index(inplace=True) 并且应该解决错误。
我在 Kaggle 中找到了以下代码。
import re
from nltk.corpus import stopwords # Import the stop word list
def description_to_words(review_text):
# 2. Remove non-letters
letters_only = re.sub("[^a-zA-Z]", " ", review_text)
# 3. Convert to lower case, split into individual words
words = letters_only.lower().split()
# 4. In Python, searching a set is much faster than searching
# a list, so convert the stop words to a set
stops = set(stopwords.words("english"))
# 5. Remove stop words
meaningful_words = [w for w in words if not w in stops]
# 6. Join the words back into one string separated by space,
# and return the result.
return( " ".join( meaningful_words ))
以上代码与以下函数调用配合使用效果很好
clean_review = description_to_words(df['MaterialDescription'][3] )
print(clean_review)
但是当我尝试上述相同的操作时,例如将 DataFrame 分配给另一个变量,如下所示,
X = df['MaterialDescription']
clean_review = description_to_words(X[3] )
print(clean_review)
我收到以下非常荒谬的错误。我确定我需要在 Pandas
中澄清一些---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
C:\Anaconda\envs\tensorflow\lib\site-packages\pandas\indexes\base.py in get_loc(self, key, method, tolerance)
2133 try:
-> 2134 return self._engine.get_loc(key)
2135 except KeyError:
pandas\index.pyx in pandas.index.IndexEngine.get_loc (pandas\index.c:4433)()
pandas\index.pyx in pandas.index.IndexEngine.get_loc (pandas\index.c:4279)()
pandas\src\hashtable_class_helper.pxi in pandas.hashtable.PyObjectHashTable.get_item (pandas\hashtable.c:13742)()
pandas\src\hashtable_class_helper.pxi in pandas.hashtable.PyObjectHashTable.get_item (pandas\hashtable.c:13696)()
KeyError: 3
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-15-5c63f93c009a> in <module>()
----> 1 clean_review = description_to_words(X[3] )
2 print(clean_review)
C:\Anaconda\envs\tensorflow\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
2057 return self._getitem_multilevel(key)
2058 else:
-> 2059 return self._getitem_column(key)
2060
2061 def _getitem_column(self, key):
C:\Anaconda\envs\tensorflow\lib\site-packages\pandas\core\frame.py in _getitem_column(self, key)
2064 # get column
2065 if self.columns.is_unique:
-> 2066 return self._get_item_cache(key)
2067
2068 # duplicate columns & possible reduce dimensionality
C:\Anaconda\envs\tensorflow\lib\site-packages\pandas\core\generic.py in _get_item_cache(self, item)
1384 res = cache.get(item)
1385 if res is None:
-> 1386 values = self._data.get(item)
1387 res = self._box_item_values(item, values)
1388 cache[item] = res
C:\Anaconda\envs\tensorflow\lib\site-packages\pandas\core\internals.py in get(self, item, fastpath)
3541
3542 if not isnull(item):
-> 3543 loc = self.items.get_loc(item)
3544 else:
3545 indexer = np.arange(len(self.items))[isnull(self.items)]
C:\Anaconda\envs\tensorflow\lib\site-packages\pandas\indexes\base.py in get_loc(self, key, method, tolerance)
2134 return self._engine.get_loc(key)
2135 except KeyError:
-> 2136 return self._engine.get_loc(self._maybe_cast_indexer(key))
2137
2138 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
pandas\index.pyx in pandas.index.IndexEngine.get_loc (pandas\index.c:4433)()
pandas\index.pyx in pandas.index.IndexEngine.get_loc (pandas\index.c:4279)()
pandas\src\hashtable_class_helper.pxi in pandas.hashtable.PyObjectHashTable.get_item (pandas\hashtable.c:13742)()
pandas\src\hashtable_class_helper.pxi in pandas.hashtable.PyObjectHashTable.get_item (pandas\hashtable.c:13696)()
KeyError: 3
我也试过在 pandas 对象中给出切片。这给出了另一个错误
clean_review = description_to_words(X[:3] )
print(clean_review)
以下是上述两行代码的堆栈跟踪
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-18-f8b01af18e4b> in <module>()
----> 1 clean_review = description_to_words(X[:3] )
2 print(clean_review)
<ipython-input-6-70647cd7caba> in description_to_words(review_text)
6
7 # 2. Remove non-letters
----> 8 letters_only = re.sub("[^a-zA-Z]", " ", review_text)
9 # 3. Convert to lower case, split into individual words
10 words = letters_only.lower().split()
C:\Anaconda\envs\tensorflow\lib\re.py in sub(pattern, repl, string, count, flags)
180 a callable, it's passed the match object and must return
181 a replacement string to be used."""
--> 182 return _compile(pattern, flags).sub(repl, string, count)
183
184 def subn(pattern, repl, string, count=0, flags=0):
TypeError: expected string or bytes-like object
如果有人帮助我理解这里到底发生了什么,那将是一个很大的帮助。
下面几行
X = df['MaterialDescription']
clean_review = description_to_words(X[3] )
给予 python description_to_words(df['MaterialDescription'][3] )
您必须通过以下方式找到您的索引:
clean_review = description_to_words(df.iloc[3]['MaterialDescription'] )
我在我的一个代码中遇到了类似的错误。事实证明,由于我删除了空行,所以我的数据框中缺少该特定索引。 如果是这种情况,您可以执行 df.reset_index(inplace=True) 并且应该解决错误。