Python 2 : AttributeError: 'list' object has no attribute 'split'
Python 2 : AttributeError: 'list' object has no attribute 'split'
这是我的 LSA 程序,在这个功能中我想对我所有的文本进行标记化,然后将其转换为词干。我正在尝试整合他们的词干提取程序,然后我得到了这个:对于 titles.split(" ") 中的单词:
AttributeError: 'list' 对象没有属性 'split'
此代码 lsa:
# -*- coding: utf-8 -*-
from numpy import zeros
from scipy.linalg import svd
from math import log
from numpy import asarray, sum
#from nltk.corpus import stopwords
from sklearn.metrics.pairwise import cosine_similarity
#from nltk.stem import PorterStemmer
#from nltk.stem.isri import ISRIStemmer
import nltk
#from matplotlib import pyplot as plt
from snowballstemmer import stemmer
titles = [" ذهبت الاخت الى المدرسة","تقع المدرسة في الجبال",
"ذهب الام لزيارة ابنتها في المدرسة ","تحضر الام الكعكة" ]
ar_stemmer = stemmer("arabic")
stopwords = ['ثم','و','حتى','الى','على','في']
ignorechars = ''',:'!'''
class LSA(object):
def __init__(self, stopwords, ignorechars):
self.stopwords = stopwords
self.ignorechars = ignorechars
self.wdict = {}
self.dcount = 0
def parse(self, doc):
for word in titles.split(" "):
stem = ar_stemmer.stemWord(word)
if stem in self.stopwords:
pass
elif stem in self.wdict:
self.wdict[stem].append(self.dcount)
else:
self.wdict[stem] = [self.dcount]
self.dcount += 1
这就是我想要整合的内容:
from snowballstemmer import stemmer
ar_stemmer = stemmer("arabic")
sentence = u" ذهبت الاخت الى المدرسة, تقع المدرسة في الجبال"
for word in sentence.split(" "):
stem = ar_stemmer.stemWord(word)
print stem
titles
已经是一个列表;改为这样做:
for sentence in titles:
for word in sentence.split(" "):
...
列表对象不像字符串那样有 split
方法。如果您想拆分 titles
列表中的每个字符串,您可以嵌套一个循环并执行如下操作:
def parse(self, doc):
for title in titles:
for word in title.split():
stem = ar_stemmer.stemWord(word)
if stem in self.stopwords:
pass
...
这是我的 LSA 程序,在这个功能中我想对我所有的文本进行标记化,然后将其转换为词干。我正在尝试整合他们的词干提取程序,然后我得到了这个:对于 titles.split(" ") 中的单词: AttributeError: 'list' 对象没有属性 'split'
此代码 lsa:
# -*- coding: utf-8 -*-
from numpy import zeros
from scipy.linalg import svd
from math import log
from numpy import asarray, sum
#from nltk.corpus import stopwords
from sklearn.metrics.pairwise import cosine_similarity
#from nltk.stem import PorterStemmer
#from nltk.stem.isri import ISRIStemmer
import nltk
#from matplotlib import pyplot as plt
from snowballstemmer import stemmer
titles = [" ذهبت الاخت الى المدرسة","تقع المدرسة في الجبال",
"ذهب الام لزيارة ابنتها في المدرسة ","تحضر الام الكعكة" ]
ar_stemmer = stemmer("arabic")
stopwords = ['ثم','و','حتى','الى','على','في']
ignorechars = ''',:'!'''
class LSA(object):
def __init__(self, stopwords, ignorechars):
self.stopwords = stopwords
self.ignorechars = ignorechars
self.wdict = {}
self.dcount = 0
def parse(self, doc):
for word in titles.split(" "):
stem = ar_stemmer.stemWord(word)
if stem in self.stopwords:
pass
elif stem in self.wdict:
self.wdict[stem].append(self.dcount)
else:
self.wdict[stem] = [self.dcount]
self.dcount += 1
这就是我想要整合的内容:
from snowballstemmer import stemmer
ar_stemmer = stemmer("arabic")
sentence = u" ذهبت الاخت الى المدرسة, تقع المدرسة في الجبال"
for word in sentence.split(" "):
stem = ar_stemmer.stemWord(word)
print stem
titles
已经是一个列表;改为这样做:
for sentence in titles:
for word in sentence.split(" "):
...
列表对象不像字符串那样有 split
方法。如果您想拆分 titles
列表中的每个字符串,您可以嵌套一个循环并执行如下操作:
def parse(self, doc):
for title in titles:
for word in title.split():
stem = ar_stemmer.stemWord(word)
if stem in self.stopwords:
pass
...