使用 Python 将句子中的单词替换为同义词
Replace words in a sentence with synonyms using Python
我有一个名为 news_collection.csv 的数据集,其中有新闻,而我一直在努力做的是替换单词。
def similarity():
tweets = pd.read_csv(r'news_collection.csv')
df = pd.DataFrame(tweets, columns=['created_at', 'text'])
df['created_at'] = pd.to_datetime(df['created_at'])
df['text'] = df['text'].apply(lambda x: str(x))
df["text"] = df["text"].apply(lambda x: replacesynonyms(x))
return df
def replacesynonyms(text):
file = open('syno.txt', 'r', encoding="utf8")
//code to be added
有人可以帮忙解决这个算法吗?
试试这个:
def similarity():
tweets = pd.read_csv(r'news_collection.csv')
df = pd.DataFrame(tweets, columns=['created_at', 'text'])
df['created_at'] = pd.to_datetime(df['created_at'])
df['text'] = df['text'].apply(lambda x: str(x))
df["text"] = df["text"].apply(lambda x: replacesynonyms(x))
return df
def create_sets():
lists_sets = []
file = open('syno.txt', 'r', encoding="utf8")
lines = file.readlines()
for line in lines:
s = set()
words = line.split(',')
for word in words:
s.add(word.strip())
lists_sets.append(s)
return lists_sets
def create_syn_list():
first_syn_name = []
file = open('syno.txt', 'r', encoding="utf8")
lines = file.readlines()
for line in lines:
first_syn_name.append(line.split(',')[0].strip())
return first_syn_name
lists_sets = create_sets()
first_syn_list = create_syn_list()
def replacesynonyms(text):
words = text.split()
new_sentence_l = []
for word in words:
to_add = True
for idx, syn_set in enumerate(lists_sets):
if word in syn_set:
new_sentence_l.append(first_syn_list[idx])
to_add = False
break
if to_add:
new_sentence_l.append(word)
return ' '.join(new_sentence_l)
df = similarity()
sen = list(df['text'])
for i in sen:
print(i)
我有一个名为 news_collection.csv 的数据集,其中有新闻,而我一直在努力做的是替换单词。
def similarity():
tweets = pd.read_csv(r'news_collection.csv')
df = pd.DataFrame(tweets, columns=['created_at', 'text'])
df['created_at'] = pd.to_datetime(df['created_at'])
df['text'] = df['text'].apply(lambda x: str(x))
df["text"] = df["text"].apply(lambda x: replacesynonyms(x))
return df
def replacesynonyms(text):
file = open('syno.txt', 'r', encoding="utf8")
//code to be added
有人可以帮忙解决这个算法吗?
试试这个:
def similarity():
tweets = pd.read_csv(r'news_collection.csv')
df = pd.DataFrame(tweets, columns=['created_at', 'text'])
df['created_at'] = pd.to_datetime(df['created_at'])
df['text'] = df['text'].apply(lambda x: str(x))
df["text"] = df["text"].apply(lambda x: replacesynonyms(x))
return df
def create_sets():
lists_sets = []
file = open('syno.txt', 'r', encoding="utf8")
lines = file.readlines()
for line in lines:
s = set()
words = line.split(',')
for word in words:
s.add(word.strip())
lists_sets.append(s)
return lists_sets
def create_syn_list():
first_syn_name = []
file = open('syno.txt', 'r', encoding="utf8")
lines = file.readlines()
for line in lines:
first_syn_name.append(line.split(',')[0].strip())
return first_syn_name
lists_sets = create_sets()
first_syn_list = create_syn_list()
def replacesynonyms(text):
words = text.split()
new_sentence_l = []
for word in words:
to_add = True
for idx, syn_set in enumerate(lists_sets):
if word in syn_set:
new_sentence_l.append(first_syn_list[idx])
to_add = False
break
if to_add:
new_sentence_l.append(word)
return ' '.join(new_sentence_l)
df = similarity()
sen = list(df['text'])
for i in sen:
print(i)