如何根据开始和结束元素从列表创建子列表?
How to create sublists from list based on start and end elements?
正在尝试根据开始和结束元素从列表创建子列表。我无法获取所有出现的开始和结束元素
lst = ['value0','<!program start>','value1','value2','<!program end>',
'value3','<!program start>','value4','<!program end>','value5']
预期输出:
[['value0'],['<!program start>','value1','value2','<!program end>'],
['value3'],['<!program start>','value4','<!program end>'],['value5']]
代码:
start_idx = lst.index('<!program start>')
end_idx = lst.index('<!program end>')
final_result = lst[:start_idx] + [lst[start_idx:end_idx+1]] + lst[end_idx+1:]
print(final_result)
使用迭代:
lst = ['value0', '<!program start>', 'value1', 'value2', '<!program end>',
'value3', '<!program start>', 'value4', '<!program end>', 'value5']
res = []
start = False
temp = []
for item in lst:
if item == '<!program start>':
start = True
temp.append(item)
elif item == '<!program end>':
start = False
temp.append(item)
res.append(temp)
temp = []
elif start:
temp.append(item)
else:
res.append([item])
print(res)
输出:
[['value0'], ['<!program start>', 'value1', 'value2', '<!program end>'], ['value3'], ['<!program start>', 'value4', '<!program end>'], ['value5']]
通过start
标志,我处理了项目是否在开始和结束标签的中间。
它不像你的单线笔那么酷,但看起来很管用:
def process(input_list, start, end):
output = []
while len(input_list) != 0:
if input_list[0] != start:
# This isn't a start token, so just add it to the output
output.append([input_list[0]])
input_list = input_list[1:]
continue
# Looks like we've found a start token, look for the end
# associated with it and append that. NOTE: You could
# try/except here if you didn't know that the end token was
# actually there.
end_index = input_list.index(end)
output.append(input_list[:end_index + 1])
input_list = input_list[end_index + 1:]
return output
我得到:
[['value0'],
['<!program start>', 'value1', 'value2', '<!program end>'],
['value3'],
['<!program start>', 'value4', '<!program end>'],
['value5']]
作为我认为正确的输出
您的代码的问题是索引 returns 第一个看到的索引,而不是所有索引。
但可以简单地使用 while 循环来完成。
final_list = []
i = 0
while i < len(lst):
inner_list = []
word = lst[i]
if word == "<!program start>":
while word != '<!program end>':
word = lst[i]
inner_list.append(word)
i += 1
else:
inner_list.append(word)
i += 1
final_list.append(inner_list)
print(final_list)
您可以使用相对简单的 FSM (Finite State Machine) 处理数据:
def fsm(lst):
result = []
state = 0
for elem in lst:
if state == 0:
result.append([elem])
state = 1
elif state == 1:
if elem == '<!program start>':
subl = [elem]
state = 2
else:
break # End of pattern.
elif state == 2:
subl.append(elem)
if elem == '<!program end>':
result.append(subl)
state = 0
return result
lst = ['value0','<!program start>','value1','value2','<!program end>',
'value3','<!program start>','value4','<!program end>','value5']
print(fsm(lst))
嵌套 while 循环的类似解决方案。
test_list = ['value0','<!program start>','value1','value2','<!program end>',
'value3','<!program start>','value4','<!program end>','value5']
answer_list = []
i = 0
while i < len(test_list):
if test_list[i] == '<!program start>':
sublist = []
while test_list[i] != '<!program end>':
sublist.append(test_list[i])
i += 1
elif test_list[i] == '<!program end>':
sublist.append(test_list[i])
answer_list.append(sublist)
i += 1
else:
answer_list.append(test_list[i])
i += 1
print(answer_list)
生产:
['value0', ['<!program start>', 'value1', 'value2', '<!program end>'], 'value3', ['<!program start>', 'value4', '<!program end>'], 'value5']
实际上有一些有趣的综合方法利用基本 str
和 list
处理。
例如,您可以首先根据开始和结束标记的通用子字符串将 lst
分成 chunks
:
chunks = [s for s in " ".join(lst).split("<!program ")]
这些块固有地包含区分单个元素和标签之间元素的特征。
list comp 是获得所需输出的一种很好且优雅的方式:
output = [[s.strip('end> ')] if not s.startswith('start>') else ["<!program start>"] + s.strip("start> ").split() + ["<!program end>"] for s in chunks]
正在尝试根据开始和结束元素从列表创建子列表。我无法获取所有出现的开始和结束元素
lst = ['value0','<!program start>','value1','value2','<!program end>',
'value3','<!program start>','value4','<!program end>','value5']
预期输出:
[['value0'],['<!program start>','value1','value2','<!program end>'],
['value3'],['<!program start>','value4','<!program end>'],['value5']]
代码:
start_idx = lst.index('<!program start>')
end_idx = lst.index('<!program end>')
final_result = lst[:start_idx] + [lst[start_idx:end_idx+1]] + lst[end_idx+1:]
print(final_result)
使用迭代:
lst = ['value0', '<!program start>', 'value1', 'value2', '<!program end>',
'value3', '<!program start>', 'value4', '<!program end>', 'value5']
res = []
start = False
temp = []
for item in lst:
if item == '<!program start>':
start = True
temp.append(item)
elif item == '<!program end>':
start = False
temp.append(item)
res.append(temp)
temp = []
elif start:
temp.append(item)
else:
res.append([item])
print(res)
输出:
[['value0'], ['<!program start>', 'value1', 'value2', '<!program end>'], ['value3'], ['<!program start>', 'value4', '<!program end>'], ['value5']]
通过start
标志,我处理了项目是否在开始和结束标签的中间。
它不像你的单线笔那么酷,但看起来很管用:
def process(input_list, start, end):
output = []
while len(input_list) != 0:
if input_list[0] != start:
# This isn't a start token, so just add it to the output
output.append([input_list[0]])
input_list = input_list[1:]
continue
# Looks like we've found a start token, look for the end
# associated with it and append that. NOTE: You could
# try/except here if you didn't know that the end token was
# actually there.
end_index = input_list.index(end)
output.append(input_list[:end_index + 1])
input_list = input_list[end_index + 1:]
return output
我得到:
[['value0'],
['<!program start>', 'value1', 'value2', '<!program end>'],
['value3'],
['<!program start>', 'value4', '<!program end>'],
['value5']]
作为我认为正确的输出
您的代码的问题是索引 returns 第一个看到的索引,而不是所有索引。 但可以简单地使用 while 循环来完成。
final_list = []
i = 0
while i < len(lst):
inner_list = []
word = lst[i]
if word == "<!program start>":
while word != '<!program end>':
word = lst[i]
inner_list.append(word)
i += 1
else:
inner_list.append(word)
i += 1
final_list.append(inner_list)
print(final_list)
您可以使用相对简单的 FSM (Finite State Machine) 处理数据:
def fsm(lst):
result = []
state = 0
for elem in lst:
if state == 0:
result.append([elem])
state = 1
elif state == 1:
if elem == '<!program start>':
subl = [elem]
state = 2
else:
break # End of pattern.
elif state == 2:
subl.append(elem)
if elem == '<!program end>':
result.append(subl)
state = 0
return result
lst = ['value0','<!program start>','value1','value2','<!program end>',
'value3','<!program start>','value4','<!program end>','value5']
print(fsm(lst))
嵌套 while 循环的类似解决方案。
test_list = ['value0','<!program start>','value1','value2','<!program end>',
'value3','<!program start>','value4','<!program end>','value5']
answer_list = []
i = 0
while i < len(test_list):
if test_list[i] == '<!program start>':
sublist = []
while test_list[i] != '<!program end>':
sublist.append(test_list[i])
i += 1
elif test_list[i] == '<!program end>':
sublist.append(test_list[i])
answer_list.append(sublist)
i += 1
else:
answer_list.append(test_list[i])
i += 1
print(answer_list)
生产:
['value0', ['<!program start>', 'value1', 'value2', '<!program end>'], 'value3', ['<!program start>', 'value4', '<!program end>'], 'value5']
实际上有一些有趣的综合方法利用基本 str
和 list
处理。
例如,您可以首先根据开始和结束标记的通用子字符串将 lst
分成 chunks
:
chunks = [s for s in " ".join(lst).split("<!program ")]
这些块固有地包含区分单个元素和标签之间元素的特征。
list comp 是获得所需输出的一种很好且优雅的方式:
output = [[s.strip('end> ')] if not s.startswith('start>') else ["<!program start>"] + s.strip("start> ").split() + ["<!program end>"] for s in chunks]