如何在 python 中仅展平列表列表的第二级(稍后将其变成字典)?
How to only flatten the second level of a list of lists (to later turn it into a dictionary) in python?
我有一个列表列表,稍后我想把它变成字典。问题是列表的列表非常不规则:对于一个键,存在 1 到 4 个列表,其中的数字属于该键。原始数据集存储在json中。
这是一个示例数据集:
data = [36146779,
[17628,35633, 2847385, 71393, 41814],[51068348,49722,3255134,66598],[103475099, 1337536, 1136863360,257],
22971125,
[230806,116805,118456,9031, 3573662],[719279707,299836,40722,35134,668],[1337536, 1136863360,257],
111125168,
[719279707,299836,40722,35138],[17628,35633, 2847385],
71280747,
[806,116805,11845],[17628,35633, 2847385]]
我能够完全展平列表列表,但我无法将这个平面列表变成具有给定键的字典
ex_eco = ["36146779","22971125","111125168","71280747"]
(我确定也可以直接将第一个嵌套列表转成字典,但我找不到解决方案,所以我尝试了这种方法)
def flatten(l):
out = []
for item in l:
if isinstance(item, (list, tuple)):
out.extend(flatten(item))
else:
out.append(item)
return out
flattened_eco = flatten(data)
print(flattened_eco[0:100])
我得到的是一个平面列表:
[36146779, 17628, 35633, 2847385, 71393, 41814, 51068348, 49722, 3255134, 66598, 103475099, 1337536, 1136863360, 257, 22971125, 230806, 116805, 118456, 9031, 3573662, 719279707, 299836, 40722, 35134, 668, 1337536, 1136863360, 257, 111125168, 719279707, 299836, 40722, 35138, 17628, 35633, 2847385, 71280747, 806, 116805, 11845, 17628, 35633, 2847385]
我需要的是这样的:
[36146779,
[17628,35633, 2847385, 71393,41814,51068348,49722,3255134,66598,103475099,1337536, 1136863360,257],
22971125,
[230806,116805,118456,9031,573662,719279707,299836,40722,35134,668,1337536, 1136863360,257],
111125168,
[719279707,299836,40722,35138,17628,35633, 2847385],
71280747,
[806,116805,11845,17628,35633, 2847385]
]
import itertools
def flatten(data):
flattened = []
for key, value in itertools.groupby(data, type):
if key == int:
flattened.append(next(value))
else:
flattened.append(list(itertools.chain.from_iterable(value)))
return flattened
示例
>>> data = [36146779,
[17628,35633, 2847385, 71393, 41814],[51068348,49722,3255134,66598],[103475099, 1337536, 1136863360,257],
22971125,
[230806,116805,118456,9031, 3573662],[719279707,299836,40722,35134,668],[1337536, 1136863360,257],
111125168,
[719279707,299836,40722,35138],[17628,35633, 2847385],
71280747,
[806,116805,11845],[17628,35633, 2847385]]
>>> flatten(data)
[36146779,
[17628, 35633, 2847385, 71393, 41814, 51068348, 49722, 3255134, 66598, 103475099, 1337536, 1136863360, 257],
22971125,
[230806, 116805, 118456, 9031, 3573662, 719279707, 299836, 40722, 35134, 668, 1337536, 1136863360, 257],
111125168,
[719279707, 299836, 40722, 35138, 17628, 35633, 2847385],
71280747,
[806, 116805, 11845, 17628, 35633, 2847385]]
要更进一步创建字典,只需将函数的最后一行更改为 dict
理解
def dictify(data):
flattened = []
for key, value in itertools.groupby(data, type):
if key == int:
flattened.append(next(value))
else:
flattened.append(list(itertools.chain.from_iterable(value)))
return {key:value for key,value in zip(flattened[::2], flattened[1::2])}
>>> dictify(data)
{36146779: [17628, 35633, 2847385, 71393, 41814, 51068348, 49722, 3255134, 66598, 103475099, 1337536, 1136863360, 257],
22971125: [230806, 116805, 118456, 9031, 3573662, 719279707, 299836, 40722, 35134, 668, 1337536, 1136863360, 257],
111125168: [719279707, 299836, 40722, 35138, 17628, 35633, 2847385],
71280747: [806, 116805, 11845, 17628, 35633, 2847385]}
从您的输入来看,您似乎不需要递归函数,只需使用迭代:
result = {}
keys = []
for d in data:
if type(d) == int:
result[d] = []
keys.append(d)
else:
result[keys[-1]] += d
使用提供的数据输出:
{36146779: [17628, 35633, 2847385, 71393, 41814, 51068348, 49722, 3255134, 66598, 103475099, 1337536, 1136863360, 257], 22971125: [230806, 116805, 118456, 9031, 3573662, 719279707, 299836, 40722, 35134, 668, 1337536, 1136863360, 257], 111125168: [719279707, 299836, 40722, 35138, 17628, 35633, 2847385], 71280747: [806, 116805, 11845, 17628, 35633, 2847385]}
- extend()- 用于合并两个列表。
例如
data = [36146779,
[17628,35633, 2847385, 71393, 41814],[51068348,49722,3255134,66598],[103475099, 1337536, 1136863360,257],
22971125,
[230806,116805,118456,9031, 3573662],[719279707,299836,40722,35134,668],[1337536, 1136863360,257],
111125168,
[719279707,299836,40722,35138],[17628,35633, 2847385],
71280747,
[806,116805,11845],[17628,35633, 2847385]]
new_dict = {}
temp=None
for x in data:
if not isinstance(x, list):
new_dict[x] = []
temp = x
else:
new_dict[temp].extend(x)
print(new_dict)
O/P:
{36146779: [17628, 35633, 2847385, 71393, 41814, 51068348, 49722, 3255134, 66598, 103475099, 1337536, 1136863360, 257], 22971125: [230806, 116805, 118456, 9031, 3573662, 719279707, 299836, 40722, 35134, 668, 1337536, 1136863360, 257], 111125168: [719279707, 299836, 40722, 35138, 17628, 35633, 2847385], 71280747: [806, 116805, 11845, 17628, 35633, 2847385]}
一个版本使用 itertools.groupby
:
data = [36146779,
[17628,35633, 2847385, 71393, 41814],[51068348,49722,3255134,66598],[103475099, 1337536, 1136863360,257],
22971125,
[230806,116805,118456,9031, 3573662],[719279707,299836,40722,35134,668],[1337536, 1136863360,257],
111125168,
[719279707,299836,40722,35138],[17628,35633, 2847385],
71280747,
[806,116805,11845],[17628,35633, 2847385]]
from itertools import groupby
def generate(d):
for v, g in groupby(d, lambda k: isinstance(k, (tuple, list))):
if not v:
yield from g
else:
yield sum(g, [])
from pprint import pprint
pprint([*generate(data)], width=180)
打印:
[36146779,
[17628, 35633, 2847385, 71393, 41814, 51068348, 49722, 3255134, 66598, 103475099, 1337536, 1136863360, 257],
22971125,
[230806, 116805, 118456, 9031, 3573662, 719279707, 299836, 40722, 35134, 668, 1337536, 1136863360, 257],
111125168,
[719279707, 299836, 40722, 35138, 17628, 35633, 2847385],
71280747,
[806, 116805, 11845, 17628, 35633, 2847385]]
我有一个列表列表,稍后我想把它变成字典。问题是列表的列表非常不规则:对于一个键,存在 1 到 4 个列表,其中的数字属于该键。原始数据集存储在json中。 这是一个示例数据集:
data = [36146779,
[17628,35633, 2847385, 71393, 41814],[51068348,49722,3255134,66598],[103475099, 1337536, 1136863360,257],
22971125,
[230806,116805,118456,9031, 3573662],[719279707,299836,40722,35134,668],[1337536, 1136863360,257],
111125168,
[719279707,299836,40722,35138],[17628,35633, 2847385],
71280747,
[806,116805,11845],[17628,35633, 2847385]]
我能够完全展平列表列表,但我无法将这个平面列表变成具有给定键的字典
ex_eco = ["36146779","22971125","111125168","71280747"]
(我确定也可以直接将第一个嵌套列表转成字典,但我找不到解决方案,所以我尝试了这种方法)
def flatten(l):
out = []
for item in l:
if isinstance(item, (list, tuple)):
out.extend(flatten(item))
else:
out.append(item)
return out
flattened_eco = flatten(data)
print(flattened_eco[0:100])
我得到的是一个平面列表:
[36146779, 17628, 35633, 2847385, 71393, 41814, 51068348, 49722, 3255134, 66598, 103475099, 1337536, 1136863360, 257, 22971125, 230806, 116805, 118456, 9031, 3573662, 719279707, 299836, 40722, 35134, 668, 1337536, 1136863360, 257, 111125168, 719279707, 299836, 40722, 35138, 17628, 35633, 2847385, 71280747, 806, 116805, 11845, 17628, 35633, 2847385]
我需要的是这样的:
[36146779,
[17628,35633, 2847385, 71393,41814,51068348,49722,3255134,66598,103475099,1337536, 1136863360,257],
22971125,
[230806,116805,118456,9031,573662,719279707,299836,40722,35134,668,1337536, 1136863360,257],
111125168,
[719279707,299836,40722,35138,17628,35633, 2847385],
71280747,
[806,116805,11845,17628,35633, 2847385]
]
import itertools
def flatten(data):
flattened = []
for key, value in itertools.groupby(data, type):
if key == int:
flattened.append(next(value))
else:
flattened.append(list(itertools.chain.from_iterable(value)))
return flattened
示例
>>> data = [36146779,
[17628,35633, 2847385, 71393, 41814],[51068348,49722,3255134,66598],[103475099, 1337536, 1136863360,257],
22971125,
[230806,116805,118456,9031, 3573662],[719279707,299836,40722,35134,668],[1337536, 1136863360,257],
111125168,
[719279707,299836,40722,35138],[17628,35633, 2847385],
71280747,
[806,116805,11845],[17628,35633, 2847385]]
>>> flatten(data)
[36146779,
[17628, 35633, 2847385, 71393, 41814, 51068348, 49722, 3255134, 66598, 103475099, 1337536, 1136863360, 257],
22971125,
[230806, 116805, 118456, 9031, 3573662, 719279707, 299836, 40722, 35134, 668, 1337536, 1136863360, 257],
111125168,
[719279707, 299836, 40722, 35138, 17628, 35633, 2847385],
71280747,
[806, 116805, 11845, 17628, 35633, 2847385]]
要更进一步创建字典,只需将函数的最后一行更改为 dict
理解
def dictify(data):
flattened = []
for key, value in itertools.groupby(data, type):
if key == int:
flattened.append(next(value))
else:
flattened.append(list(itertools.chain.from_iterable(value)))
return {key:value for key,value in zip(flattened[::2], flattened[1::2])}
>>> dictify(data)
{36146779: [17628, 35633, 2847385, 71393, 41814, 51068348, 49722, 3255134, 66598, 103475099, 1337536, 1136863360, 257],
22971125: [230806, 116805, 118456, 9031, 3573662, 719279707, 299836, 40722, 35134, 668, 1337536, 1136863360, 257],
111125168: [719279707, 299836, 40722, 35138, 17628, 35633, 2847385],
71280747: [806, 116805, 11845, 17628, 35633, 2847385]}
从您的输入来看,您似乎不需要递归函数,只需使用迭代:
result = {}
keys = []
for d in data:
if type(d) == int:
result[d] = []
keys.append(d)
else:
result[keys[-1]] += d
使用提供的数据输出:
{36146779: [17628, 35633, 2847385, 71393, 41814, 51068348, 49722, 3255134, 66598, 103475099, 1337536, 1136863360, 257], 22971125: [230806, 116805, 118456, 9031, 3573662, 719279707, 299836, 40722, 35134, 668, 1337536, 1136863360, 257], 111125168: [719279707, 299836, 40722, 35138, 17628, 35633, 2847385], 71280747: [806, 116805, 11845, 17628, 35633, 2847385]}
- extend()- 用于合并两个列表。
例如
data = [36146779,
[17628,35633, 2847385, 71393, 41814],[51068348,49722,3255134,66598],[103475099, 1337536, 1136863360,257],
22971125,
[230806,116805,118456,9031, 3573662],[719279707,299836,40722,35134,668],[1337536, 1136863360,257],
111125168,
[719279707,299836,40722,35138],[17628,35633, 2847385],
71280747,
[806,116805,11845],[17628,35633, 2847385]]
new_dict = {}
temp=None
for x in data:
if not isinstance(x, list):
new_dict[x] = []
temp = x
else:
new_dict[temp].extend(x)
print(new_dict)
O/P:
{36146779: [17628, 35633, 2847385, 71393, 41814, 51068348, 49722, 3255134, 66598, 103475099, 1337536, 1136863360, 257], 22971125: [230806, 116805, 118456, 9031, 3573662, 719279707, 299836, 40722, 35134, 668, 1337536, 1136863360, 257], 111125168: [719279707, 299836, 40722, 35138, 17628, 35633, 2847385], 71280747: [806, 116805, 11845, 17628, 35633, 2847385]}
一个版本使用 itertools.groupby
:
data = [36146779,
[17628,35633, 2847385, 71393, 41814],[51068348,49722,3255134,66598],[103475099, 1337536, 1136863360,257],
22971125,
[230806,116805,118456,9031, 3573662],[719279707,299836,40722,35134,668],[1337536, 1136863360,257],
111125168,
[719279707,299836,40722,35138],[17628,35633, 2847385],
71280747,
[806,116805,11845],[17628,35633, 2847385]]
from itertools import groupby
def generate(d):
for v, g in groupby(d, lambda k: isinstance(k, (tuple, list))):
if not v:
yield from g
else:
yield sum(g, [])
from pprint import pprint
pprint([*generate(data)], width=180)
打印:
[36146779,
[17628, 35633, 2847385, 71393, 41814, 51068348, 49722, 3255134, 66598, 103475099, 1337536, 1136863360, 257],
22971125,
[230806, 116805, 118456, 9031, 3573662, 719279707, 299836, 40722, 35134, 668, 1337536, 1136863360, 257],
111125168,
[719279707, 299836, 40722, 35138, 17628, 35633, 2847385],
71280747,
[806, 116805, 11845, 17628, 35633, 2847385]]