从保存在二维列表中的三元组单词构建文本
Structure text from words of triplets saved in a 2D list
我目前有一个文本,其单词在二维列表中保存为三元组。
我的二维列表:
[['Python', 'is', 'an'], ['interpreted,', 'high-level', 'and'], ['general-purpose', 'programming', 'language.'], ["Python's", 'design', 'philosophy'], ['emphasizes', 'code', 'readability'], ['with', 'its', 'notable'], ['use', 'of', 'significant'], ['whitespace.', 'Its', 'language'], ['constructs', 'and', 'object-oriented'], ['approach', 'aim', 'to'], ['help', 'programmers', 'write'], ['clear,', 'logical', 'code'], ['for', 'small', 'and'], ['large-scale', 'projects.']]
我正在创建一个 Python 代码,它随机选择一组这些三元组,然后尝试使用最后 2 个单词并选择以这两个单词开头的三元组来创建新的随机文本。最后,我的程序在写完 200 个单词或 none 可以选择其他三元组时结束。
目前的代码:
import random
with open(r'c:\python_TRIPLETS\Sample.txt', 'r') as file:
data = file.read().replace('\n', '').split()
lines = [data[i:i + 3] for i in range(0, len(data), 3)]
random.shuffle([random.shuffle(i) for i in lines])
first_triplet = random.choice(lines)
last_two = first_triplet[1:3]
output_text=[]
while True:
candidates = [t for t in lines if t[0:2] == last_two]
if not candidates:
break
next_triplet = random.choice(candidates)
last_two = next_triplet[1:3]
output_text.append(next_triplet)
我无法自动执行搜索匹配并将它们存储在新列表中的重复过程。
有什么想法吗?
你可以使用递归函数(更改了部分代码,查看注释):
import random
#adding ["is", "an", "experiment"] to check if it works (no other triplet were present that satisfy the condition)
lines = [['Python', 'is', 'an'], ['interpreted,', 'high-level', 'and'], ['general-purpose', 'programming', 'language.'], ["Python's", 'design', 'philosophy'], ['emphasizes', 'code', 'readability'], ['with', 'its', 'notable'], ['use', 'of', 'significant'], ['whitespace.', 'Its', 'language'], ['constructs', 'and', 'object-oriented'], ['approach', 'aim', 'to'], ['help', 'programmers', 'write'], ['clear,', 'logical', 'code'], ['for', 'small', 'and'], ['large-scale', 'projects.'], ["is", "an", "experiment"]]
first_triplet = ['Python', 'is', 'an'] # random.choice(lines)
def appendNextTriplet(output_text, lines):
if len(output_text) >= 200:
return output_text
candidates = [t for t in lines if t[:2] == output_text[-2:]]
if not candidates:
return output_text
next_triplet = random.choice(candidates)
output_text += next_triplet # changed from append to concatenation, it was not correct
return appendNextTriplet(output_text, lines)
print(appendNextTriplet(first_triplet, lines)) # ['Python', 'is', 'an', 'is', 'an', 'experiment']
import random
import sys
import os
import json
outlist = []
file_in = sys.argv[1]
file_ot = str(file_in) + ".ot"
with open(file_in, 'r') as file:
data = file.read().replace('\n', '').split()
lines = [data[i:i + 3] for i in range(0, len(data), 1)]
print("\nΧωρισμένο Κείμενο σε Λίστα Τριπλετών:\n", lines)
triplet = random.choice(lines)
last_two = triplet[1:3]
print("\nΕπιλεγμένη Τριπλέτα: \n", triplet)
print("\nΔύο Τελευταίες Λέξεις Αυτής:\n", last_two)
outlist.extend(triplet)
proc_list = lines
# first selected, remove from list
proc_list.remove(triplet)
n = 0
while True:
n += 1
print("\nΕπανάληψη {0}\n".format(n))
if proc_list == 0:
print("a")
break
random.shuffle(proc_list)
candidates = []
for element in proc_list:
if element[:2] == last_two:
candidates.append(element)
print(candidates)
if not candidates:
print("b")
break
if len(outlist) >= 200:
print("c")
break
triplet = random.choice(candidates)
outlist.append(triplet[-1])
proc_list.remove(triplet)
last_two = triplet[1:3]
print(outlist)
with open(file_ot, 'w') as f:
f.write(json.dumps(outlist, indent=10))
print(" ".join(outlist))
我目前有一个文本,其单词在二维列表中保存为三元组。
我的二维列表:
[['Python', 'is', 'an'], ['interpreted,', 'high-level', 'and'], ['general-purpose', 'programming', 'language.'], ["Python's", 'design', 'philosophy'], ['emphasizes', 'code', 'readability'], ['with', 'its', 'notable'], ['use', 'of', 'significant'], ['whitespace.', 'Its', 'language'], ['constructs', 'and', 'object-oriented'], ['approach', 'aim', 'to'], ['help', 'programmers', 'write'], ['clear,', 'logical', 'code'], ['for', 'small', 'and'], ['large-scale', 'projects.']]
我正在创建一个 Python 代码,它随机选择一组这些三元组,然后尝试使用最后 2 个单词并选择以这两个单词开头的三元组来创建新的随机文本。最后,我的程序在写完 200 个单词或 none 可以选择其他三元组时结束。
目前的代码:
import random
with open(r'c:\python_TRIPLETS\Sample.txt', 'r') as file:
data = file.read().replace('\n', '').split()
lines = [data[i:i + 3] for i in range(0, len(data), 3)]
random.shuffle([random.shuffle(i) for i in lines])
first_triplet = random.choice(lines)
last_two = first_triplet[1:3]
output_text=[]
while True:
candidates = [t for t in lines if t[0:2] == last_two]
if not candidates:
break
next_triplet = random.choice(candidates)
last_two = next_triplet[1:3]
output_text.append(next_triplet)
我无法自动执行搜索匹配并将它们存储在新列表中的重复过程。
有什么想法吗?
你可以使用递归函数(更改了部分代码,查看注释):
import random
#adding ["is", "an", "experiment"] to check if it works (no other triplet were present that satisfy the condition)
lines = [['Python', 'is', 'an'], ['interpreted,', 'high-level', 'and'], ['general-purpose', 'programming', 'language.'], ["Python's", 'design', 'philosophy'], ['emphasizes', 'code', 'readability'], ['with', 'its', 'notable'], ['use', 'of', 'significant'], ['whitespace.', 'Its', 'language'], ['constructs', 'and', 'object-oriented'], ['approach', 'aim', 'to'], ['help', 'programmers', 'write'], ['clear,', 'logical', 'code'], ['for', 'small', 'and'], ['large-scale', 'projects.'], ["is", "an", "experiment"]]
first_triplet = ['Python', 'is', 'an'] # random.choice(lines)
def appendNextTriplet(output_text, lines):
if len(output_text) >= 200:
return output_text
candidates = [t for t in lines if t[:2] == output_text[-2:]]
if not candidates:
return output_text
next_triplet = random.choice(candidates)
output_text += next_triplet # changed from append to concatenation, it was not correct
return appendNextTriplet(output_text, lines)
print(appendNextTriplet(first_triplet, lines)) # ['Python', 'is', 'an', 'is', 'an', 'experiment']
import random
import sys
import os
import json
outlist = []
file_in = sys.argv[1]
file_ot = str(file_in) + ".ot"
with open(file_in, 'r') as file:
data = file.read().replace('\n', '').split()
lines = [data[i:i + 3] for i in range(0, len(data), 1)]
print("\nΧωρισμένο Κείμενο σε Λίστα Τριπλετών:\n", lines)
triplet = random.choice(lines)
last_two = triplet[1:3]
print("\nΕπιλεγμένη Τριπλέτα: \n", triplet)
print("\nΔύο Τελευταίες Λέξεις Αυτής:\n", last_two)
outlist.extend(triplet)
proc_list = lines
# first selected, remove from list
proc_list.remove(triplet)
n = 0
while True:
n += 1
print("\nΕπανάληψη {0}\n".format(n))
if proc_list == 0:
print("a")
break
random.shuffle(proc_list)
candidates = []
for element in proc_list:
if element[:2] == last_two:
candidates.append(element)
print(candidates)
if not candidates:
print("b")
break
if len(outlist) >= 200:
print("c")
break
triplet = random.choice(candidates)
outlist.append(triplet[-1])
proc_list.remove(triplet)
last_two = triplet[1:3]
print(outlist)
with open(file_ot, 'w') as f:
f.write(json.dumps(outlist, indent=10))
print(" ".join(outlist))