Unable to instantiate a python class - AttributeError: class object has no attribute 'language'
Unable to instantiate a python class - AttributeError: class object has no attribute 'language'
我从这个 book 中复制了一个 TextNormalizer
class
import nltk
from nltk.stem.wordnet import WordNetLemmatizer
from sklearn.base import BaseEstimator, TransformerMixin
class TextNormalizer(BaseEstimator, TransformerMixin):
def __init__(self, language='english'):
self.stopwords = set(nltk.corpus.stopwords.words(language))
self.lemmatizer = WordNetLemmatizer()
def remove_concat(self, narrative):
chars_to_remove = ['-', '_', '+']
reg_ex = '[' + re.escape (''. join (chars_to_remove)) + ']'
return re.sub(reg_ex, ' ', narrative)
def process_narrative(self, narrative):
cleaned_narrative = self.remove_concat(narrative)
tokens = nltk.word_tokenize(cleaned_narrative)
return [token.lower() for token in tokens if token.lower() not in self.stopwords]
我想通过这样的测试一步步学习代码
tn = TextNormalizer()
tn
出现以下错误
AttributeError Traceback (most recent call last)
~\Anaconda3\lib\site-packages\IPython\core\formatters.py in __call__(self, obj, include, exclude)
968
969 if method is not None:
--> 970 return method(include=include, exclude=exclude)
971 return None
972 else:
~\Anaconda3\lib\site-packages\sklearn\base.py in _repr_mimebundle_(self, **kwargs)
462 def _repr_mimebundle_(self, **kwargs):
463 """Mime bundle used by jupyter kernels to display estimator"""
--> 464 output = {"text/plain": repr(self)}
465 if get_config()["display"] == 'diagram':
466 output["text/html"] = estimator_html_repr(self)
~\Anaconda3\lib\site-packages\sklearn\base.py in __repr__(self, N_CHAR_MAX)
258 n_max_elements_to_show=N_MAX_ELEMENTS_TO_SHOW)
259
--> 260 repr_ = pp.pformat(self)
261
262 # Use bruteforce ellipsis when there are a lot of non-blank characters
~\Anaconda3\lib\pprint.py in pformat(self, object)
151 def pformat(self, object):
152 sio = _StringIO()
--> 153 self._format(object, sio, 0, 0, {}, 0)
154 return sio.getvalue()
155
~\Anaconda3\lib\pprint.py in _format(self, object, stream, indent, allowance, context, level)
168 self._readable = False
169 return
--> 170 rep = self._repr(object, context, level)
171 max_width = self._width - indent - allowance
172 if len(rep) > max_width:
~\Anaconda3\lib\pprint.py in _repr(self, object, context, level)
402
403 def _repr(self, object, context, level):
--> 404 repr, readable, recursive = self.format(object, context.copy(),
405 self._depth, level)
406 if not readable:
~\Anaconda3\lib\site-packages\sklearn\utils\_pprint.py in format(self, object, context, maxlevels, level)
178
179 def format(self, object, context, maxlevels, level):
--> 180 return _safe_repr(object, context, maxlevels, level,
181 changed_only=self._changed_only)
182
~\Anaconda3\lib\site-packages\sklearn\utils\_pprint.py in _safe_repr(object, context, maxlevels, level, changed_only)
423 recursive = False
424 if changed_only:
--> 425 params = _changed_params(object)
426 else:
427 params = object.get_params(deep=False)
~\Anaconda3\lib\site-packages\sklearn\utils\_pprint.py in _changed_params(estimator)
89 estimator with non-default values."""
90
---> 91 params = estimator.get_params(deep=False)
92 init_func = getattr(estimator.__init__, 'deprecated_original',
93 estimator.__init__)
~\Anaconda3\lib\site-packages\sklearn\base.py in get_params(self, deep)
193 out = dict()
194 for key in self._get_param_names():
--> 195 value = getattr(self, key)
196 if deep and hasattr(value, 'get_params'):
197 deep_items = value.get_params().items()
AttributeError: 'TextNormalizer' object has no attribute 'language'
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
~\Anaconda3\lib\site-packages\IPython\core\formatters.py in __call__(self, obj)
700 type_pprinters=self.type_printers,
701 deferred_pprinters=self.deferred_printers)
--> 702 printer.pretty(obj)
703 printer.flush()
704 return stream.getvalue()
~\Anaconda3\lib\site-packages\IPython\lib\pretty.py in pretty(self, obj)
392 if cls is not object \
393 and callable(cls.__dict__.get('__repr__')):
--> 394 return _repr_pprint(obj, self, cycle)
395
396 return _default_pprint(obj, self, cycle)
~\Anaconda3\lib\site-packages\IPython\lib\pretty.py in _repr_pprint(obj, p, cycle)
698 """A pprint that just redirects to the normal repr function."""
699 # Find newlines and replace them with p.break_()
--> 700 output = repr(obj)
701 lines = output.splitlines()
702 with p.group():
~\Anaconda3\lib\site-packages\sklearn\base.py in __repr__(self, N_CHAR_MAX)
258 n_max_elements_to_show=N_MAX_ELEMENTS_TO_SHOW)
259
--> 260 repr_ = pp.pformat(self)
261
262 # Use bruteforce ellipsis when there are a lot of non-blank characters
~\Anaconda3\lib\pprint.py in pformat(self, object)
151 def pformat(self, object):
152 sio = _StringIO()
--> 153 self._format(object, sio, 0, 0, {}, 0)
154 return sio.getvalue()
155
~\Anaconda3\lib\pprint.py in _format(self, object, stream, indent, allowance, context, level)
168 self._readable = False
169 return
--> 170 rep = self._repr(object, context, level)
171 max_width = self._width - indent - allowance
172 if len(rep) > max_width:
~\Anaconda3\lib\pprint.py in _repr(self, object, context, level)
402
403 def _repr(self, object, context, level):
--> 404 repr, readable, recursive = self.format(object, context.copy(),
405 self._depth, level)
406 if not readable:
~\Anaconda3\lib\site-packages\sklearn\utils\_pprint.py in format(self, object, context, maxlevels, level)
178
179 def format(self, object, context, maxlevels, level):
--> 180 return _safe_repr(object, context, maxlevels, level,
181 changed_only=self._changed_only)
182
~\Anaconda3\lib\site-packages\sklearn\utils\_pprint.py in _safe_repr(object, context, maxlevels, level, changed_only)
423 recursive = False
424 if changed_only:
--> 425 params = _changed_params(object)
426 else:
427 params = object.get_params(deep=False)
~\Anaconda3\lib\site-packages\sklearn\utils\_pprint.py in _changed_params(estimator)
89 estimator with non-default values."""
90
---> 91 params = estimator.get_params(deep=False)
92 init_func = getattr(estimator.__init__, 'deprecated_original',
93 estimator.__init__)
~\Anaconda3\lib\site-packages\sklearn\base.py in get_params(self, deep)
193 out = dict()
194 for key in self._get_param_names():
--> 195 value = getattr(self, key)
196 if deep and hasattr(value, 'get_params'):
197 deep_items = value.get_params().items()
AttributeError: 'TextNormalizer' object has no attribute 'language'
虽然 class TextNormalizer
如果我尝试实例化它会抛出上述错误,但如果应用于这样的文本它会起作用
df = pd.DataFrame({'description': ['My order_number is A-08', 'It cost me +.00']})
tn = TextNormalizer()
df['description'].apply(tn.process_narrative)
产生了这个输出
0 [order, number, 08]
1 [cost, $, 80.00]
Name: description, dtype: object
有人可以解释一下发生了什么吗?我的意思是它有效,虽然它看起来是错误的。出现这种“现象”的原因是什么?
我还必须下载停用词,但是在代码中设置 self.language = language
然后使用 language
或 self.language
检索正确的列表可以解决错误。
删除 language
参数和所有使用它的行也 运行 没有问题,所以它只是不喜欢指定但未使用的参数。
import nltk
import pandas as pd
nltk.download('stopwords')
nltk.download('punkt')
from nltk.stem.wordnet import WordNetLemmatizer
from sklearn.base import BaseEstimator, TransformerMixin
class TextNormalizer(BaseEstimator, TransformerMixin):
def __init__(self, language='english'):
self.language = language
self.stopwords = set(nltk.corpus.stopwords.words(self.language))
self.lemmatizer = WordNetLemmatizer()
test = TextNormalizer()
test
附录
原因
tn
来自 OP returns 的错误是因为 TextNormalizer
对象实例化时未设置 language
,并且因为该值用于对象自身的表示.
单行时:
tn
是运行,这不会实例化对象tn
,它必须已经存在。它评估 returns __repr__
的值(在 nltk
某处定义)。 class
本身的创建不会产生错误,因为 __repr__
方法在您 运行 tn
自己或 repr(tn)
.[=31 之前不会被评估=]
df['description'].apply(tn.process_narrative)
不会抛出错误,因为 tn
已经存在,并且其 __repr__
方法未被使用。
您可以通过如上所述分配 self.language
或在创建项目后执行此操作来解决此问题:
tn = TextNormalizer()
tn.language = 'english'
tn
我从这个 book 中复制了一个 TextNormalizer
class
import nltk
from nltk.stem.wordnet import WordNetLemmatizer
from sklearn.base import BaseEstimator, TransformerMixin
class TextNormalizer(BaseEstimator, TransformerMixin):
def __init__(self, language='english'):
self.stopwords = set(nltk.corpus.stopwords.words(language))
self.lemmatizer = WordNetLemmatizer()
def remove_concat(self, narrative):
chars_to_remove = ['-', '_', '+']
reg_ex = '[' + re.escape (''. join (chars_to_remove)) + ']'
return re.sub(reg_ex, ' ', narrative)
def process_narrative(self, narrative):
cleaned_narrative = self.remove_concat(narrative)
tokens = nltk.word_tokenize(cleaned_narrative)
return [token.lower() for token in tokens if token.lower() not in self.stopwords]
我想通过这样的测试一步步学习代码
tn = TextNormalizer()
tn
出现以下错误
AttributeError Traceback (most recent call last)
~\Anaconda3\lib\site-packages\IPython\core\formatters.py in __call__(self, obj, include, exclude)
968
969 if method is not None:
--> 970 return method(include=include, exclude=exclude)
971 return None
972 else:
~\Anaconda3\lib\site-packages\sklearn\base.py in _repr_mimebundle_(self, **kwargs)
462 def _repr_mimebundle_(self, **kwargs):
463 """Mime bundle used by jupyter kernels to display estimator"""
--> 464 output = {"text/plain": repr(self)}
465 if get_config()["display"] == 'diagram':
466 output["text/html"] = estimator_html_repr(self)
~\Anaconda3\lib\site-packages\sklearn\base.py in __repr__(self, N_CHAR_MAX)
258 n_max_elements_to_show=N_MAX_ELEMENTS_TO_SHOW)
259
--> 260 repr_ = pp.pformat(self)
261
262 # Use bruteforce ellipsis when there are a lot of non-blank characters
~\Anaconda3\lib\pprint.py in pformat(self, object)
151 def pformat(self, object):
152 sio = _StringIO()
--> 153 self._format(object, sio, 0, 0, {}, 0)
154 return sio.getvalue()
155
~\Anaconda3\lib\pprint.py in _format(self, object, stream, indent, allowance, context, level)
168 self._readable = False
169 return
--> 170 rep = self._repr(object, context, level)
171 max_width = self._width - indent - allowance
172 if len(rep) > max_width:
~\Anaconda3\lib\pprint.py in _repr(self, object, context, level)
402
403 def _repr(self, object, context, level):
--> 404 repr, readable, recursive = self.format(object, context.copy(),
405 self._depth, level)
406 if not readable:
~\Anaconda3\lib\site-packages\sklearn\utils\_pprint.py in format(self, object, context, maxlevels, level)
178
179 def format(self, object, context, maxlevels, level):
--> 180 return _safe_repr(object, context, maxlevels, level,
181 changed_only=self._changed_only)
182
~\Anaconda3\lib\site-packages\sklearn\utils\_pprint.py in _safe_repr(object, context, maxlevels, level, changed_only)
423 recursive = False
424 if changed_only:
--> 425 params = _changed_params(object)
426 else:
427 params = object.get_params(deep=False)
~\Anaconda3\lib\site-packages\sklearn\utils\_pprint.py in _changed_params(estimator)
89 estimator with non-default values."""
90
---> 91 params = estimator.get_params(deep=False)
92 init_func = getattr(estimator.__init__, 'deprecated_original',
93 estimator.__init__)
~\Anaconda3\lib\site-packages\sklearn\base.py in get_params(self, deep)
193 out = dict()
194 for key in self._get_param_names():
--> 195 value = getattr(self, key)
196 if deep and hasattr(value, 'get_params'):
197 deep_items = value.get_params().items()
AttributeError: 'TextNormalizer' object has no attribute 'language'
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
~\Anaconda3\lib\site-packages\IPython\core\formatters.py in __call__(self, obj)
700 type_pprinters=self.type_printers,
701 deferred_pprinters=self.deferred_printers)
--> 702 printer.pretty(obj)
703 printer.flush()
704 return stream.getvalue()
~\Anaconda3\lib\site-packages\IPython\lib\pretty.py in pretty(self, obj)
392 if cls is not object \
393 and callable(cls.__dict__.get('__repr__')):
--> 394 return _repr_pprint(obj, self, cycle)
395
396 return _default_pprint(obj, self, cycle)
~\Anaconda3\lib\site-packages\IPython\lib\pretty.py in _repr_pprint(obj, p, cycle)
698 """A pprint that just redirects to the normal repr function."""
699 # Find newlines and replace them with p.break_()
--> 700 output = repr(obj)
701 lines = output.splitlines()
702 with p.group():
~\Anaconda3\lib\site-packages\sklearn\base.py in __repr__(self, N_CHAR_MAX)
258 n_max_elements_to_show=N_MAX_ELEMENTS_TO_SHOW)
259
--> 260 repr_ = pp.pformat(self)
261
262 # Use bruteforce ellipsis when there are a lot of non-blank characters
~\Anaconda3\lib\pprint.py in pformat(self, object)
151 def pformat(self, object):
152 sio = _StringIO()
--> 153 self._format(object, sio, 0, 0, {}, 0)
154 return sio.getvalue()
155
~\Anaconda3\lib\pprint.py in _format(self, object, stream, indent, allowance, context, level)
168 self._readable = False
169 return
--> 170 rep = self._repr(object, context, level)
171 max_width = self._width - indent - allowance
172 if len(rep) > max_width:
~\Anaconda3\lib\pprint.py in _repr(self, object, context, level)
402
403 def _repr(self, object, context, level):
--> 404 repr, readable, recursive = self.format(object, context.copy(),
405 self._depth, level)
406 if not readable:
~\Anaconda3\lib\site-packages\sklearn\utils\_pprint.py in format(self, object, context, maxlevels, level)
178
179 def format(self, object, context, maxlevels, level):
--> 180 return _safe_repr(object, context, maxlevels, level,
181 changed_only=self._changed_only)
182
~\Anaconda3\lib\site-packages\sklearn\utils\_pprint.py in _safe_repr(object, context, maxlevels, level, changed_only)
423 recursive = False
424 if changed_only:
--> 425 params = _changed_params(object)
426 else:
427 params = object.get_params(deep=False)
~\Anaconda3\lib\site-packages\sklearn\utils\_pprint.py in _changed_params(estimator)
89 estimator with non-default values."""
90
---> 91 params = estimator.get_params(deep=False)
92 init_func = getattr(estimator.__init__, 'deprecated_original',
93 estimator.__init__)
~\Anaconda3\lib\site-packages\sklearn\base.py in get_params(self, deep)
193 out = dict()
194 for key in self._get_param_names():
--> 195 value = getattr(self, key)
196 if deep and hasattr(value, 'get_params'):
197 deep_items = value.get_params().items()
AttributeError: 'TextNormalizer' object has no attribute 'language'
虽然 class TextNormalizer
如果我尝试实例化它会抛出上述错误,但如果应用于这样的文本它会起作用
df = pd.DataFrame({'description': ['My order_number is A-08', 'It cost me +.00']})
tn = TextNormalizer()
df['description'].apply(tn.process_narrative)
产生了这个输出
0 [order, number, 08]
1 [cost, $, 80.00]
Name: description, dtype: object
有人可以解释一下发生了什么吗?我的意思是它有效,虽然它看起来是错误的。出现这种“现象”的原因是什么?
我还必须下载停用词,但是在代码中设置 self.language = language
然后使用 language
或 self.language
检索正确的列表可以解决错误。
删除 language
参数和所有使用它的行也 运行 没有问题,所以它只是不喜欢指定但未使用的参数。
import nltk
import pandas as pd
nltk.download('stopwords')
nltk.download('punkt')
from nltk.stem.wordnet import WordNetLemmatizer
from sklearn.base import BaseEstimator, TransformerMixin
class TextNormalizer(BaseEstimator, TransformerMixin):
def __init__(self, language='english'):
self.language = language
self.stopwords = set(nltk.corpus.stopwords.words(self.language))
self.lemmatizer = WordNetLemmatizer()
test = TextNormalizer()
test
附录
原因
tn
来自 OP returns 的错误是因为 TextNormalizer
对象实例化时未设置 language
,并且因为该值用于对象自身的表示.
单行时:
tn
是运行,这不会实例化对象tn
,它必须已经存在。它评估 returns __repr__
的值(在 nltk
某处定义)。 class
本身的创建不会产生错误,因为 __repr__
方法在您 运行 tn
自己或 repr(tn)
.[=31 之前不会被评估=]
df['description'].apply(tn.process_narrative)
不会抛出错误,因为 tn
已经存在,并且其 __repr__
方法未被使用。
您可以通过如上所述分配 self.language
或在创建项目后执行此操作来解决此问题:
tn = TextNormalizer()
tn.language = 'english'
tn