Python3 如何将文本文件转换为列表
How to Convert a Text File into a List in Python3
在 Python3 中,来自包含 lyric/subtitle/other 的现有 .txt 文件,
我想做一个简单的列表(没有任何嵌套)
现有单词,没有空格或其他插入符号。
根据其他 StackExchange 请求,我做了这个
import csv
crimefile = open('she_loves_you.txt', 'r')
reader = csv.reader(crimefile)
allRows = list(reader) # result is a list with nested lists
ultimate = []
for i in allRows:
ultimate += i # result is a list with elements longer than one word
ultimate2 = []
for i in ultimate:
ultimate2 += i # result is a list with elements which are single letters
我希望的结果是这样的
['She', 'loves', 'you', 'yeah', 'yeah', 'yeah', 'She', 'loves', 'you', ...]
=========================================== =========================
同样有趣的是理解为什么代码(它作为上述代码的扩展运行):
import re
print (re.findall(r"[\w']+", ultimate))
出现以下错误:
Traceback (most recent call last):
File "4.4.4.csv.into.list.py", line 72, in <module>
print (re.findall(r"[\w']+", ultimate))
File "/usr/lib/python3.7/re.py", line 223, in findall
return _compile(pattern, flags).findall(string)
TypeError: expected string or bytes-like object
错误消息完全清楚"expected string or bytes-like object"
。
这意味着您的 ultimate
应该转换为字符串 (str)
,当您检查 ultimate
的 type
是 list
对象时。
>>> type(ultimate)
<class 'list'>
# or
>>> type([])
<class 'list'>
你的情况;
print (re.findall(r"[\w']+", str(ultimate))) # original text
# or
print (re.findall(r"[\w']+", ' '.join(ultimate))) # joined words
试试这个:
import csv
crimefile = open('she_loves_you.txt', 'r')
reader = csv.reader(crimefile)
allRows = list(reader) # result is a list with nested lists
ultimate = []
for i in allRows:
ultimate += i.split(" ")
下面是我在这个问题领域所做的工作的完整输出
import csv
import re
import json
#1 def1
#def decomposition(file):
'''
opening the text file,
and in 3 steps creating a list containing signle words that appears in the text file
'''
crimefile = open('she_loves_you.txt', 'r')
reader = csv.reader(crimefile)
#step1 : list with nested lists
allRows = list(reader) # result is a list with nested lists, on which we are going to work later
#step2 : one list, with elements longer that one word
ultimate = []
for i in allRows:
ultimate += i
#step3 : one list, with elements which are lenght of one word
#print (re.findall(r"[\w']+", ultimate)) # does not work
#print (re.findall(r"[\w']+", str(ultimate))) # works
list_of_words = re.findall(r"[\w']+", ' '.join(ultimate)) # works even better!
#2 def2
def saving():
'''
# creating/opening writable file (as a variable),
# and saving into it 'list of words'
'''
with open('she_loves_you_list.txt', 'w') as fp:
#Save as JSON with
json.dump(list_of_words, fp)
#3 def3
def lyric_to_frequencies(lyrics):
'''
# you provide a list,
# and recieve a dictionary, which contain amount of unique words in this list
'''
myDict = {}
for word in lyrics:
if word in myDict:
myDict[word] += 1
else :
myDict[word] = 1
#print (myDict)
return myDict
#4 def4
def most_common_words(freqs):
'''
you provide a list of words ('freqs')
and recieve how often they appear
'''
values = freqs.values()
best = max(values) #finding biggest value very easily
words = []
for k in freqs : # and here we are checking which entries have biggers (best) values
if freqs[k] == best:
words.append(k) #just add it to the list
print(words,best)
return(words,best)
#5 def5
def words_often(freqs, minTimes):
'''
you provide a list of words ('freqs') AND minimumTimes how the word suppose to appear in file to be printed out
and recieve how often they appear
'''
result = []
done = False
while not done :
temp = most_common_words(freqs)
if temp[1] >= minTimes:
result.append(temp)
for w in temp[0]:
del(freqs[w])
else:
done = True
return result
#1
decomposition('she_loves_you.txt')
#2
saving()
#3
lyric_to_frequencies(list_of_words)
#4
most_common_words(lyric_to_frequencies(list_of_words))
#5
words_often(lyric_to_frequencies(list_of_words), 5)
在 Python3 中,来自包含 lyric/subtitle/other 的现有 .txt 文件, 我想做一个简单的列表(没有任何嵌套) 现有单词,没有空格或其他插入符号。
根据其他 StackExchange 请求,我做了这个
import csv
crimefile = open('she_loves_you.txt', 'r')
reader = csv.reader(crimefile)
allRows = list(reader) # result is a list with nested lists
ultimate = []
for i in allRows:
ultimate += i # result is a list with elements longer than one word
ultimate2 = []
for i in ultimate:
ultimate2 += i # result is a list with elements which are single letters
我希望的结果是这样的
['She', 'loves', 'you', 'yeah', 'yeah', 'yeah', 'She', 'loves', 'you', ...]
=========================================== =========================
同样有趣的是理解为什么代码(它作为上述代码的扩展运行):
import re
print (re.findall(r"[\w']+", ultimate))
出现以下错误:
Traceback (most recent call last):
File "4.4.4.csv.into.list.py", line 72, in <module>
print (re.findall(r"[\w']+", ultimate))
File "/usr/lib/python3.7/re.py", line 223, in findall
return _compile(pattern, flags).findall(string)
TypeError: expected string or bytes-like object
错误消息完全清楚"expected string or bytes-like object"
。
这意味着您的 ultimate
应该转换为字符串 (str)
,当您检查 ultimate
的 type
是 list
对象时。
>>> type(ultimate)
<class 'list'>
# or
>>> type([])
<class 'list'>
你的情况;
print (re.findall(r"[\w']+", str(ultimate))) # original text
# or
print (re.findall(r"[\w']+", ' '.join(ultimate))) # joined words
试试这个:
import csv
crimefile = open('she_loves_you.txt', 'r')
reader = csv.reader(crimefile)
allRows = list(reader) # result is a list with nested lists
ultimate = []
for i in allRows:
ultimate += i.split(" ")
下面是我在这个问题领域所做的工作的完整输出
import csv
import re
import json
#1 def1
#def decomposition(file):
'''
opening the text file,
and in 3 steps creating a list containing signle words that appears in the text file
'''
crimefile = open('she_loves_you.txt', 'r')
reader = csv.reader(crimefile)
#step1 : list with nested lists
allRows = list(reader) # result is a list with nested lists, on which we are going to work later
#step2 : one list, with elements longer that one word
ultimate = []
for i in allRows:
ultimate += i
#step3 : one list, with elements which are lenght of one word
#print (re.findall(r"[\w']+", ultimate)) # does not work
#print (re.findall(r"[\w']+", str(ultimate))) # works
list_of_words = re.findall(r"[\w']+", ' '.join(ultimate)) # works even better!
#2 def2
def saving():
'''
# creating/opening writable file (as a variable),
# and saving into it 'list of words'
'''
with open('she_loves_you_list.txt', 'w') as fp:
#Save as JSON with
json.dump(list_of_words, fp)
#3 def3
def lyric_to_frequencies(lyrics):
'''
# you provide a list,
# and recieve a dictionary, which contain amount of unique words in this list
'''
myDict = {}
for word in lyrics:
if word in myDict:
myDict[word] += 1
else :
myDict[word] = 1
#print (myDict)
return myDict
#4 def4
def most_common_words(freqs):
'''
you provide a list of words ('freqs')
and recieve how often they appear
'''
values = freqs.values()
best = max(values) #finding biggest value very easily
words = []
for k in freqs : # and here we are checking which entries have biggers (best) values
if freqs[k] == best:
words.append(k) #just add it to the list
print(words,best)
return(words,best)
#5 def5
def words_often(freqs, minTimes):
'''
you provide a list of words ('freqs') AND minimumTimes how the word suppose to appear in file to be printed out
and recieve how often they appear
'''
result = []
done = False
while not done :
temp = most_common_words(freqs)
if temp[1] >= minTimes:
result.append(temp)
for w in temp[0]:
del(freqs[w])
else:
done = True
return result
#1
decomposition('she_loves_you.txt')
#2
saving()
#3
lyric_to_frequencies(list_of_words)
#4
most_common_words(lyric_to_frequencies(list_of_words))
#5
words_often(lyric_to_frequencies(list_of_words), 5)