从 python 中的列表元素中删除后缀
Remove Suffixes from list elements in python
我必须创建一个程序来读取代码行直到一个“.”。输入后,我必须删除标点符号,全部更改为小写,删除停用词和后缀。除了能够删除后缀之外,我已经管理了所有这些,我已经尝试了 .strip,如您所见,但它只接受一个参数,实际上并没有从列表元素中删除后缀。有 advice/pointers/help 吗?谢谢
stopWords = [ "a", "i", "it", "am", "at", "on", "in", "to", "too", "very", \
"of", "from", "here", "even", "the", "but", "and", "is", "my", \
"them", "then", "this", "that", "than", "though", "so", "are" ]
noStemWords = [ "feed", "sages", "yearling", "mass", "make", "sly", "ring" ]
# -------- Replace with your code - e.g. delete line, add your code here ------------
Text = raw_input("Indexer: Type in lines, that finish with a . at start of line only: ").lower()
while Text != ".":
LineNo = 0
x=0
y=0
i= 0
#creates new string, cycles through strint Text and removes puctutaiton
PuncRemover = ""
for c in Text:
if c in ".,:;!?&'":
c=""
PuncRemover += c
SplitWords = PuncRemover.split()
#loops through SplitWords list, removes value at x if found in StopWords list
while x < len(SplitWords)-1:
if SplitWords[x] in stopWords:
del SplitWords[x]
else:
x=x+1
while y < len(SplitWords)-1:
if SplitWords[y] in noStemWords:
y=y+1
else:
SplitWords[y].strip("ed")
y=y+1
Text = raw_input().lower()
print "lines with stopwords removed:" + str(SplitWords)
print Text
print LineNo
print x
print y
print PuncRemover
以下函数应删除任何给定字符串的后缀。
from itertools import groupby
def removeSuffixs(sentence):
suffixList = ["ing", "ation"] #add more as nessecary
for item in suffixList:
if item in sentence:
sentence = sentence.replace(item, "")
repeatLetters = next((True for char, group in groupby(sentence)
if sum(1 for _ in group) >= 2), False)
if repeatLetters:
sentence = sentence[:-1]
return sentence
示例:
print(removeSuffixs("climbing running")) # 'climb run'
print(removeSuffixs("summation")) # 'sum'
在您的代码中,替换 SplitWords[y].strip("ed")
有,
SplitWords[y] = removeSuffixs(SplitWords[y])
我必须创建一个程序来读取代码行直到一个“.”。输入后,我必须删除标点符号,全部更改为小写,删除停用词和后缀。除了能够删除后缀之外,我已经管理了所有这些,我已经尝试了 .strip,如您所见,但它只接受一个参数,实际上并没有从列表元素中删除后缀。有 advice/pointers/help 吗?谢谢
stopWords = [ "a", "i", "it", "am", "at", "on", "in", "to", "too", "very", \
"of", "from", "here", "even", "the", "but", "and", "is", "my", \
"them", "then", "this", "that", "than", "though", "so", "are" ]
noStemWords = [ "feed", "sages", "yearling", "mass", "make", "sly", "ring" ]
# -------- Replace with your code - e.g. delete line, add your code here ------------
Text = raw_input("Indexer: Type in lines, that finish with a . at start of line only: ").lower()
while Text != ".":
LineNo = 0
x=0
y=0
i= 0
#creates new string, cycles through strint Text and removes puctutaiton
PuncRemover = ""
for c in Text:
if c in ".,:;!?&'":
c=""
PuncRemover += c
SplitWords = PuncRemover.split()
#loops through SplitWords list, removes value at x if found in StopWords list
while x < len(SplitWords)-1:
if SplitWords[x] in stopWords:
del SplitWords[x]
else:
x=x+1
while y < len(SplitWords)-1:
if SplitWords[y] in noStemWords:
y=y+1
else:
SplitWords[y].strip("ed")
y=y+1
Text = raw_input().lower()
print "lines with stopwords removed:" + str(SplitWords)
print Text
print LineNo
print x
print y
print PuncRemover
以下函数应删除任何给定字符串的后缀。
from itertools import groupby
def removeSuffixs(sentence):
suffixList = ["ing", "ation"] #add more as nessecary
for item in suffixList:
if item in sentence:
sentence = sentence.replace(item, "")
repeatLetters = next((True for char, group in groupby(sentence)
if sum(1 for _ in group) >= 2), False)
if repeatLetters:
sentence = sentence[:-1]
return sentence
示例:
print(removeSuffixs("climbing running")) # 'climb run'
print(removeSuffixs("summation")) # 'sum'
在您的代码中,替换 SplitWords[y].strip("ed")
有,
SplitWords[y] = removeSuffixs(SplitWords[y])