Python 文本文档翻译比较
Python Text Document Translation Comparison
很简单的问题。我正在尝试创建一个 "translation comparison" 程序来读取和比较两个文档,然后 returns 另一个文档中没有的每个单词。这是给初学者 class 的,所以我尽量避免使用晦涩的内部方法,即使这意味着代码效率较低。这是我目前所拥有的...
def translation_comparison():
import re
file1 = open("Desktop/file1.txt","r")
file2 = open("Desktop/file2.txt","r")
text1 = file1.read()
text2 = file2.read()
text1 = re.findall(r'\w+',text1)
text2 = re.findall(r'\w+',text2)
for item in text2:
if item not in text1:
return item
你可以尝试这样的事情
#######Test data
#file1.txt = this is a test
#file2.txt = this a test
#results#
#is
def translation_comparison():
with open("file1.txt", 'r') as f1:
f1 = f1.read().split()
with open("file2.txt", 'r') as f2:
f2 = f2.read().split()
for word in f1:
if word not in f2:
print(word)
translation_comparison()
使用
也是一个好习惯
with open("file1.txt", 'r') as f1:
f1 =f1.read().split()
因为当使用 with 打开文件时,它会在您不使用时关闭文件。 Python 非常擅长释放和管理内存,但确保释放它或调用
始终是个好习惯
file1.close()
完成后。
假设您想要逐字比较,例如 a b c
与 b a c
会 return a
和 b
,然后 b
和 a
(与原始代码中的 None
相反)
import string
import itertools
class FileExhausted(Exception): pass
def read_by_word(file):
def read_word():
while True:
l = file.read(1)
if l:
if l in string.whitespace:
break
yield l
else:
raise FileExhausted
while True:
this_word_gen = read_word()
try:
this_word = "".join(this_word_gen)
except FileExhausted:
break
else:
if this_word:
yield this_word
def translation_comparison():
with open("file1.txt") as file1, open("file2.txt") as file2:
words1 = read_by_word(file1)
words2 = read_by_word(file2)
for (word1, word2) in itertools.zip_longest(words1, words2, fillvalue=None):
if word1 != word2:
yield (word1, word2)
很简单的问题。我正在尝试创建一个 "translation comparison" 程序来读取和比较两个文档,然后 returns 另一个文档中没有的每个单词。这是给初学者 class 的,所以我尽量避免使用晦涩的内部方法,即使这意味着代码效率较低。这是我目前所拥有的...
def translation_comparison():
import re
file1 = open("Desktop/file1.txt","r")
file2 = open("Desktop/file2.txt","r")
text1 = file1.read()
text2 = file2.read()
text1 = re.findall(r'\w+',text1)
text2 = re.findall(r'\w+',text2)
for item in text2:
if item not in text1:
return item
你可以尝试这样的事情
#######Test data
#file1.txt = this is a test
#file2.txt = this a test
#results#
#is
def translation_comparison():
with open("file1.txt", 'r') as f1:
f1 = f1.read().split()
with open("file2.txt", 'r') as f2:
f2 = f2.read().split()
for word in f1:
if word not in f2:
print(word)
translation_comparison()
使用
也是一个好习惯with open("file1.txt", 'r') as f1:
f1 =f1.read().split()
因为当使用 with 打开文件时,它会在您不使用时关闭文件。 Python 非常擅长释放和管理内存,但确保释放它或调用
始终是个好习惯file1.close()
完成后。
假设您想要逐字比较,例如 a b c
与 b a c
会 return a
和 b
,然后 b
和 a
(与原始代码中的 None
相反)
import string
import itertools
class FileExhausted(Exception): pass
def read_by_word(file):
def read_word():
while True:
l = file.read(1)
if l:
if l in string.whitespace:
break
yield l
else:
raise FileExhausted
while True:
this_word_gen = read_word()
try:
this_word = "".join(this_word_gen)
except FileExhausted:
break
else:
if this_word:
yield this_word
def translation_comparison():
with open("file1.txt") as file1, open("file2.txt") as file2:
words1 = read_by_word(file1)
words2 = read_by_word(file2)
for (word1, word2) in itertools.zip_longest(words1, words2, fillvalue=None):
if word1 != word2:
yield (word1, word2)