Recursion Error: Maximum Recursion depth exceeded
Recursion Error: Maximum Recursion depth exceeded
from __future__ import print_function
import os, codecs, nltk.stem
english_stemmer = nltk.stem.SnowballStemmer('english')
for root, dirs, files in os.walk("/Users/Documents/corpus/source-document/test1"):
for file in files:
if file.endswith(".txt"):
posts = codecs.open(os.path.join(root,file),"r", "utf-8-sig")
from sklearn.feature_extraction.text import CountVectorizer
class StemmedCountVectorizer(CountVectorizer):
def build_analyzer(self):
analyzer = super(StemmedCountVectorizer, self.build_analyzer())
return lambda doc: (english_stemmer.stem(w) for w in analyzer(doc))
vectorizer = StemmedCountVectorizer(min_df = 1, stop_words = 'english')
X_train = vectorizer.fit_transform(posts)
num_samples, num_features = X_train.shape
print("#samples: %d, #features: %d" % (num_samples, num_features)) #samples: 5, #features: 25
print(vectorizer.get_feature_names())
当我对目录中包含的所有文本文件执行 运行 以上代码时,它抛出以下错误:
RecursionError: 超出最大递归深度。
我试图用 sys.setrecursionlimit 解决问题,但都是徒劳。当我提供像 20000 这样的大值时,发生内核崩溃错误。
你的错误在analyzer = super(StemmedCountVectorizer, self.build_analyzer())
这里你在超级调用之前调用了函数build_analyzer
,这导致了无限递归循环。将其更改为 analyzer = super(StemmedCountVectorizer, self).build_analyzer()
from __future__ import print_function
import os, codecs, nltk.stem
english_stemmer = nltk.stem.SnowballStemmer('english')
for root, dirs, files in os.walk("/Users/Documents/corpus/source-document/test1"):
for file in files:
if file.endswith(".txt"):
posts = codecs.open(os.path.join(root,file),"r", "utf-8-sig")
from sklearn.feature_extraction.text import CountVectorizer
class StemmedCountVectorizer(CountVectorizer):
def build_analyzer(self):
analyzer = super(StemmedCountVectorizer, self.build_analyzer())
return lambda doc: (english_stemmer.stem(w) for w in analyzer(doc))
vectorizer = StemmedCountVectorizer(min_df = 1, stop_words = 'english')
X_train = vectorizer.fit_transform(posts)
num_samples, num_features = X_train.shape
print("#samples: %d, #features: %d" % (num_samples, num_features)) #samples: 5, #features: 25
print(vectorizer.get_feature_names())
当我对目录中包含的所有文本文件执行 运行 以上代码时,它抛出以下错误: RecursionError: 超出最大递归深度。
我试图用 sys.setrecursionlimit 解决问题,但都是徒劳。当我提供像 20000 这样的大值时,发生内核崩溃错误。
你的错误在analyzer = super(StemmedCountVectorizer, self.build_analyzer())
这里你在超级调用之前调用了函数build_analyzer
,这导致了无限递归循环。将其更改为 analyzer = super(StemmedCountVectorizer, self).build_analyzer()