Unable to instantiate a python class - AttributeError: class object has no attribute 'language'

Unable to instantiate a python class - AttributeError: class object has no attribute 'language'

我从这个 book 中复制了一个 TextNormalizer class

import nltk
from nltk.stem.wordnet import WordNetLemmatizer
from sklearn.base import BaseEstimator, TransformerMixin
class TextNormalizer(BaseEstimator, TransformerMixin):
    def __init__(self, language='english'):
        self.stopwords = set(nltk.corpus.stopwords.words(language))
        self.lemmatizer = WordNetLemmatizer()

    def remove_concat(self, narrative):
        chars_to_remove = ['-', '_', '+']
        reg_ex = '[' + re.escape (''. join (chars_to_remove)) + ']'
        return re.sub(reg_ex, ' ', narrative) 
    
    def process_narrative(self, narrative):
        cleaned_narrative = self.remove_concat(narrative)
        tokens = nltk.word_tokenize(cleaned_narrative)
        return [token.lower() for token in tokens if token.lower() not in self.stopwords]

我想通过这样的测试一步步学习代码

tn = TextNormalizer()
tn

出现以下错误

AttributeError                            Traceback (most recent call last)
~\Anaconda3\lib\site-packages\IPython\core\formatters.py in __call__(self, obj, include, exclude)
    968 
    969             if method is not None:
--> 970                 return method(include=include, exclude=exclude)
    971             return None
    972         else:

~\Anaconda3\lib\site-packages\sklearn\base.py in _repr_mimebundle_(self, **kwargs)
    462     def _repr_mimebundle_(self, **kwargs):
    463         """Mime bundle used by jupyter kernels to display estimator"""
--> 464         output = {"text/plain": repr(self)}
    465         if get_config()["display"] == 'diagram':
    466             output["text/html"] = estimator_html_repr(self)

~\Anaconda3\lib\site-packages\sklearn\base.py in __repr__(self, N_CHAR_MAX)
    258             n_max_elements_to_show=N_MAX_ELEMENTS_TO_SHOW)
    259 
--> 260         repr_ = pp.pformat(self)
    261 
    262         # Use bruteforce ellipsis when there are a lot of non-blank characters

~\Anaconda3\lib\pprint.py in pformat(self, object)
    151     def pformat(self, object):
    152         sio = _StringIO()
--> 153         self._format(object, sio, 0, 0, {}, 0)
    154         return sio.getvalue()
    155 

~\Anaconda3\lib\pprint.py in _format(self, object, stream, indent, allowance, context, level)
    168             self._readable = False
    169             return
--> 170         rep = self._repr(object, context, level)
    171         max_width = self._width - indent - allowance
    172         if len(rep) > max_width:

~\Anaconda3\lib\pprint.py in _repr(self, object, context, level)
    402 
    403     def _repr(self, object, context, level):
--> 404         repr, readable, recursive = self.format(object, context.copy(),
    405                                                 self._depth, level)
    406         if not readable:

~\Anaconda3\lib\site-packages\sklearn\utils\_pprint.py in format(self, object, context, maxlevels, level)
    178 
    179     def format(self, object, context, maxlevels, level):
--> 180         return _safe_repr(object, context, maxlevels, level,
    181                           changed_only=self._changed_only)
    182 

~\Anaconda3\lib\site-packages\sklearn\utils\_pprint.py in _safe_repr(object, context, maxlevels, level, changed_only)
    423         recursive = False
    424         if changed_only:
--> 425             params = _changed_params(object)
    426         else:
    427             params = object.get_params(deep=False)

~\Anaconda3\lib\site-packages\sklearn\utils\_pprint.py in _changed_params(estimator)
     89     estimator with non-default values."""
     90 
---> 91     params = estimator.get_params(deep=False)
     92     init_func = getattr(estimator.__init__, 'deprecated_original',
     93                         estimator.__init__)

~\Anaconda3\lib\site-packages\sklearn\base.py in get_params(self, deep)
    193         out = dict()
    194         for key in self._get_param_names():
--> 195             value = getattr(self, key)
    196             if deep and hasattr(value, 'get_params'):
    197                 deep_items = value.get_params().items()

AttributeError: 'TextNormalizer' object has no attribute 'language'
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
~\Anaconda3\lib\site-packages\IPython\core\formatters.py in __call__(self, obj)
    700                 type_pprinters=self.type_printers,
    701                 deferred_pprinters=self.deferred_printers)
--> 702             printer.pretty(obj)
    703             printer.flush()
    704             return stream.getvalue()

~\Anaconda3\lib\site-packages\IPython\lib\pretty.py in pretty(self, obj)
    392                         if cls is not object \
    393                                 and callable(cls.__dict__.get('__repr__')):
--> 394                             return _repr_pprint(obj, self, cycle)
    395 
    396             return _default_pprint(obj, self, cycle)

~\Anaconda3\lib\site-packages\IPython\lib\pretty.py in _repr_pprint(obj, p, cycle)
    698     """A pprint that just redirects to the normal repr function."""
    699     # Find newlines and replace them with p.break_()
--> 700     output = repr(obj)
    701     lines = output.splitlines()
    702     with p.group():

~\Anaconda3\lib\site-packages\sklearn\base.py in __repr__(self, N_CHAR_MAX)
    258             n_max_elements_to_show=N_MAX_ELEMENTS_TO_SHOW)
    259 
--> 260         repr_ = pp.pformat(self)
    261 
    262         # Use bruteforce ellipsis when there are a lot of non-blank characters

~\Anaconda3\lib\pprint.py in pformat(self, object)
    151     def pformat(self, object):
    152         sio = _StringIO()
--> 153         self._format(object, sio, 0, 0, {}, 0)
    154         return sio.getvalue()
    155 

~\Anaconda3\lib\pprint.py in _format(self, object, stream, indent, allowance, context, level)
    168             self._readable = False
    169             return
--> 170         rep = self._repr(object, context, level)
    171         max_width = self._width - indent - allowance
    172         if len(rep) > max_width:

~\Anaconda3\lib\pprint.py in _repr(self, object, context, level)
    402 
    403     def _repr(self, object, context, level):
--> 404         repr, readable, recursive = self.format(object, context.copy(),
    405                                                 self._depth, level)
    406         if not readable:

~\Anaconda3\lib\site-packages\sklearn\utils\_pprint.py in format(self, object, context, maxlevels, level)
    178 
    179     def format(self, object, context, maxlevels, level):
--> 180         return _safe_repr(object, context, maxlevels, level,
    181                           changed_only=self._changed_only)
    182 

~\Anaconda3\lib\site-packages\sklearn\utils\_pprint.py in _safe_repr(object, context, maxlevels, level, changed_only)
    423         recursive = False
    424         if changed_only:
--> 425             params = _changed_params(object)
    426         else:
    427             params = object.get_params(deep=False)

~\Anaconda3\lib\site-packages\sklearn\utils\_pprint.py in _changed_params(estimator)
     89     estimator with non-default values."""
     90 
---> 91     params = estimator.get_params(deep=False)
     92     init_func = getattr(estimator.__init__, 'deprecated_original',
     93                         estimator.__init__)

~\Anaconda3\lib\site-packages\sklearn\base.py in get_params(self, deep)
    193         out = dict()
    194         for key in self._get_param_names():
--> 195             value = getattr(self, key)
    196             if deep and hasattr(value, 'get_params'):
    197                 deep_items = value.get_params().items()

AttributeError: 'TextNormalizer' object has no attribute 'language'

虽然 class TextNormalizer 如果我尝试实例化它会抛出上述错误,但如果应用于这样的文本它会起作用

df = pd.DataFrame({'description': ['My order_number is A-08', 'It cost me +.00']})
tn = TextNormalizer()
df['description'].apply(tn.process_narrative)

产生了这个输出

0    [order, number, 08]
1       [cost, $, 80.00]
Name: description, dtype: object

有人可以解释一下发生了什么吗?我的意思是它有效,虽然它看起来是错误的。出现这种“现象”的原因是什么?

我还必须下载停用词,但是在代码中设置 self.language = language 然后使用 languageself.language 检索正确的列表可以解决错误。

删除 language 参数和所有使用它的行也 运行 没有问题,所以它只是不喜欢指定但未使用的参数。

import nltk
import pandas as pd
nltk.download('stopwords')
nltk.download('punkt')
from nltk.stem.wordnet import WordNetLemmatizer
from sklearn.base import BaseEstimator, TransformerMixin
class TextNormalizer(BaseEstimator, TransformerMixin):
    def __init__(self, language='english'):
        self.language = language
        self.stopwords = set(nltk.corpus.stopwords.words(self.language))
        self.lemmatizer = WordNetLemmatizer()

test = TextNormalizer()
test

附录

原因

tn

来自 OP returns 的错误是因为 TextNormalizer 对象实例化时未设置 language,并且因为该值用于对象自身的表示.

单行时:

tn

是运行,这不会实例化对象tn,它必须已经存在。它评估 returns __repr__ 的值(在 nltk 某处定义)。 class 本身的创建不会产生错误,因为 __repr__ 方法在您 运行 tn 自己或 repr(tn).[=31 之前不会被评估=]

df['description'].apply(tn.process_narrative)

不会抛出错误,因为 tn 已经存在,并且其 __repr__ 方法未被使用。

您可以通过如上所述分配 self.language 或在创建项目后执行此操作来解决此问题:

tn = TextNormalizer()
tn.language = 'english'
tn