如何获取字符串列表中只有一个字符串的同义词集?
how to get a synsets for only one string in a list of strings?
我正在尝试在列表中查找字符串的同义词集。这是我的代码:
from nltk.corpus import wordnet as wn
from nltk import pos_tag
word1 = ['orange','man','bench']
def getSynonyms(word1):
synonymList1 = []
for data1 in word1:
wordnetSynset1 = wn.synsets(data1)
tempList1=[]
for synset1 in wordnetSynset1:
synLemmas = synset1.lemma_names()
for i in xrange(len(synLemmas)):
word = synLemmas[i].replace('_',' ')
if pos_tag(word.split()) not in tempList1:
tempList1.append(pos_tag(word.split()))
synonymList1.append(tempList1)
return synonymList1
print getSynonyms(word1)
print
结果如下:
[[[(u'orange', 'NN')], [(u'orangeness', 'NN')], [(u'orange', 'NN'),
(u'tree', 'NN')], [(u'Orange', 'NN')], [(u'Orange', 'NNP'), (u'River',
'NNP')], [(u'orangish', 'JJ')]], [[(u'man', 'NN')], [(u'adult', 'NN'),
(u'male', 'NN')], [(u'serviceman', 'NN')], [(u'military', 'JJ'), (u'man',
'NN')], [(u'military', 'JJ'), (u'personnel', 'NNS')], [(u'homo', 'NN')],
[(u'human', 'JJ'), (u'being', 'VBG')], [(u'human', 'NN')], [(u'valet',
'NN')], [(u'valet', 'NN'), (u'de', 'IN'), (u'chambre', 'NN')],
[(u'gentleman', 'NN')], [(u"gentleman's", 'NN'), (u'gentleman', 'NN')],
[(u'Man', 'NN')], [(u'Isle', 'NNP'), (u'of', 'IN'), (u'Man', 'NNP')],
[(u'piece', 'NN')], [(u'world', 'NN')], [(u'human', 'JJ'), (u'race', 'NN')],
[(u'humanity', 'NN')], [(u'humankind', 'NN')], [(u'human', 'JJ'),
(u'beings', 'NNS')], [(u'humans', 'NNS')], [(u'mankind', 'NN')]],
[[(u'bench', 'NN')], [(u'terrace', 'NN')], [(u'judiciary', 'NN')],
[(u'workbench', 'NN')], [(u'work', 'NN'), (u'bench', 'NN')], [(u'Bench',
'NN')]]]
但是如果我只想获得一个字符串的同义词集怎么办?
例如,如果我得到 'orange'
的同义词集,我只会得到并打印 :
[(u'orange', 'NN')], [(u'orangeness', 'NN')], [(u'orange', 'NN'),
(u'tree', 'NN')], [(u'Orange', 'NN')], [(u'Orange', 'NNP'), (u'River',
'NNP')], [(u'orangish', 'JJ')]
如果我得到 'man'
的同义词集,我只会得到并打印 :
[(u'man', 'NN')], [(u'adult', 'NN'),
(u'male', 'NN')], [(u'serviceman', 'NN')], [(u'military', 'JJ'), (u'man',
'NN')], [(u'military', 'JJ'), (u'personnel', 'NNS')], [(u'homo', 'NN')],
[(u'human', 'JJ'), (u'being', 'VBG')], [(u'human', 'NN')], [(u'valet',
'NN')], [(u'valet', 'NN'), (u'de', 'IN'), (u'chambre', 'NN')],
[(u'gentleman', 'NN')], [(u"gentleman's", 'NN'), (u'gentleman', 'NN')],
[(u'Man', 'NN')], [(u'Isle', 'NNP'), (u'of', 'IN'), (u'Man', 'NNP')],
[(u'piece', 'NN')], [(u'world', 'NN')], [(u'human', 'JJ'), (u'race', 'NN')],
[(u'humanity', 'NN')], [(u'humankind', 'NN')], [(u'human', 'JJ'),
(u'beings', 'NNS')], [(u'humans', 'NNS')], [(u'mankind', 'NN')]
'bench'
也是如此
我试过了print getSynonyms(word1[0])
但是结果真的很奇怪
有人可以帮忙吗?谢谢
那个函数需要一个数组,这就是为什么你传递一个字符串会得到奇怪的结果。如果你给它传递一个单词,它会为单词的每个字母寻找同义词集。你可以传递一个只有一个值的数组:
print getSynonyms([word1[0]])
您还可以重写该函数以删除 out 循环,从而允许您通过类似以下内容传递单个单词:
def getSynonyms(word):
synonymList1 = []
wordnetSynset1 = wn.synsets(word)
tempList1=[]
for synset1 in wordnetSynset1:
synLemmas = synset1.lemma_names()
for i in xrange(len(synLemmas)):
word = synLemmas[i].replace('_',' ')
if pos_tag(word.split()) not in tempList1:
tempList1.append(pos_tag(word.split()))
synonymList1.append(tempList1)
return synonymList1
我正在尝试在列表中查找字符串的同义词集。这是我的代码:
from nltk.corpus import wordnet as wn
from nltk import pos_tag
word1 = ['orange','man','bench']
def getSynonyms(word1):
synonymList1 = []
for data1 in word1:
wordnetSynset1 = wn.synsets(data1)
tempList1=[]
for synset1 in wordnetSynset1:
synLemmas = synset1.lemma_names()
for i in xrange(len(synLemmas)):
word = synLemmas[i].replace('_',' ')
if pos_tag(word.split()) not in tempList1:
tempList1.append(pos_tag(word.split()))
synonymList1.append(tempList1)
return synonymList1
print getSynonyms(word1)
print
结果如下:
[[[(u'orange', 'NN')], [(u'orangeness', 'NN')], [(u'orange', 'NN'),
(u'tree', 'NN')], [(u'Orange', 'NN')], [(u'Orange', 'NNP'), (u'River',
'NNP')], [(u'orangish', 'JJ')]], [[(u'man', 'NN')], [(u'adult', 'NN'),
(u'male', 'NN')], [(u'serviceman', 'NN')], [(u'military', 'JJ'), (u'man',
'NN')], [(u'military', 'JJ'), (u'personnel', 'NNS')], [(u'homo', 'NN')],
[(u'human', 'JJ'), (u'being', 'VBG')], [(u'human', 'NN')], [(u'valet',
'NN')], [(u'valet', 'NN'), (u'de', 'IN'), (u'chambre', 'NN')],
[(u'gentleman', 'NN')], [(u"gentleman's", 'NN'), (u'gentleman', 'NN')],
[(u'Man', 'NN')], [(u'Isle', 'NNP'), (u'of', 'IN'), (u'Man', 'NNP')],
[(u'piece', 'NN')], [(u'world', 'NN')], [(u'human', 'JJ'), (u'race', 'NN')],
[(u'humanity', 'NN')], [(u'humankind', 'NN')], [(u'human', 'JJ'),
(u'beings', 'NNS')], [(u'humans', 'NNS')], [(u'mankind', 'NN')]],
[[(u'bench', 'NN')], [(u'terrace', 'NN')], [(u'judiciary', 'NN')],
[(u'workbench', 'NN')], [(u'work', 'NN'), (u'bench', 'NN')], [(u'Bench',
'NN')]]]
但是如果我只想获得一个字符串的同义词集怎么办?
例如,如果我得到 'orange'
的同义词集,我只会得到并打印 :
[(u'orange', 'NN')], [(u'orangeness', 'NN')], [(u'orange', 'NN'),
(u'tree', 'NN')], [(u'Orange', 'NN')], [(u'Orange', 'NNP'), (u'River',
'NNP')], [(u'orangish', 'JJ')]
如果我得到 'man'
的同义词集,我只会得到并打印 :
[(u'man', 'NN')], [(u'adult', 'NN'),
(u'male', 'NN')], [(u'serviceman', 'NN')], [(u'military', 'JJ'), (u'man',
'NN')], [(u'military', 'JJ'), (u'personnel', 'NNS')], [(u'homo', 'NN')],
[(u'human', 'JJ'), (u'being', 'VBG')], [(u'human', 'NN')], [(u'valet',
'NN')], [(u'valet', 'NN'), (u'de', 'IN'), (u'chambre', 'NN')],
[(u'gentleman', 'NN')], [(u"gentleman's", 'NN'), (u'gentleman', 'NN')],
[(u'Man', 'NN')], [(u'Isle', 'NNP'), (u'of', 'IN'), (u'Man', 'NNP')],
[(u'piece', 'NN')], [(u'world', 'NN')], [(u'human', 'JJ'), (u'race', 'NN')],
[(u'humanity', 'NN')], [(u'humankind', 'NN')], [(u'human', 'JJ'),
(u'beings', 'NNS')], [(u'humans', 'NNS')], [(u'mankind', 'NN')]
'bench'
我试过了print getSynonyms(word1[0])
但是结果真的很奇怪
有人可以帮忙吗?谢谢
那个函数需要一个数组,这就是为什么你传递一个字符串会得到奇怪的结果。如果你给它传递一个单词,它会为单词的每个字母寻找同义词集。你可以传递一个只有一个值的数组:
print getSynonyms([word1[0]])
您还可以重写该函数以删除 out 循环,从而允许您通过类似以下内容传递单个单词:
def getSynonyms(word):
synonymList1 = []
wordnetSynset1 = wn.synsets(word)
tempList1=[]
for synset1 in wordnetSynset1:
synLemmas = synset1.lemma_names()
for i in xrange(len(synLemmas)):
word = synLemmas[i].replace('_',' ')
if pos_tag(word.split()) not in tempList1:
tempList1.append(pos_tag(word.split()))
synonymList1.append(tempList1)
return synonymList1