获取嵌套字典与列表的组合数
Get number of combinations of nested dictionary with lists
我有一个嵌套字典列表,里面有列表,我想通过所有字典计算所有可能组合的数量。
输入示例
json =
[
{
"lang": "fr",
"dates": ["d1", "d2"],
"address": [
{
"city": "Paris",
"zip": "75001"
},
{
"city": "Lyon",
"zip": "69600"
}
]
},
{
"lang": "fr",
"dates": ["d2", "d3"],
"address": [
{
"city": "Paris",
"zip": "75001"
}
]
}
]
预期输出为:
{
'address.city_dates': {"Paris_d1": 1, "Paris_d2": 2 ,"Paris_d3": 1, "Lyon_d1": 1, "Lyon_d2": 1},
'address.city_lang': {"Paris_fr": 2, "Lyon_fr": 1},
'address.city_address.zip': {"Lyon_69600": 1, "Paris_75001": 2},
'address.zip_dates': {"75001_d1": 1, "75001_d2": 2 ,"75001_d3": 1, "69600_d1": 1, "69600_d2": 1},
'address.zip_lang': {"75001_fr": 2, "69600_fr": 1},
'dates_lang': {"d1_fr": 1, "d2_fr": ,"d3_fr": 1}
}
注意:在我的例子中关系是可以互换的:X_Y 类似于 Y_X
尝试:
我尝试了以下将被递归调用的函数,但我一直停留在如何处理列表和寻找所有级别的组合上。
def get_combs(element, contextKey, relations):
if type(element) is list:
# for each element of the list, treat element
for tokenElement in element:
get_combs(tokenElement, contextKey, relations)
elif type(element) is dict:
keys = list(element.keys())
keys.sort()
for first_key in keys:
# remove current first key to avoid X_Y and Y_X being different
keys.remove(first_key)
keys.sort()
for second_key in keys:
key = first_key + "_" + second_key
value_key = str(element[first_key]) + "_" + str(element[second_key])
if not key in relations:
relations[key] = {value_key: 1}
else:
if not value_key in relations[key]:
relations[key][value_key] = 1
else:
relations[key][value_key] += 1
# recall function to deal wit elements inside the dict
get_combs(element[first_key], first_key, relations)
def main(json):
relations = {}
# loop over all the elements inside json
for element in json:
get_combs(element, 'root', relations)
return relations
pippo = main(json)
print(pippo)
输出:
{'address_dates': {"[{'city': 'Paris', 'zip': '75001'}, {'city': 'Lyon', 'zip': '69600'}]_['d1', 'd2']": 1,
"[{'city': 'Paris', 'zip': '75001'}]_['d2', 'd3']": 1},
'address_lang': {"[{'city': 'Paris', 'zip': '75001'}, {'city': 'Lyon', 'zip': '69600'}]_fr": 1,
"[{'city': 'Paris', 'zip': '75001'}]_fr": 1},
'city_zip': {"Lyon_69600": 1, "Paris_75001": 2},
'lang_dates': {"fr_['d1', 'd2']": 1, "fr_['d2', 'd3']": 1}}
您可以递归地展开结构,然后按形成的键进行分组。从那里开始,可以应用基本组合:
from collections import defaultdict, Counter
import itertools as it
json = [{'lang': 'fr', 'dates': ['d1', 'd2'], 'address': [{'city': 'Paris', 'zip': '75001'}, {'city': 'Lyon', 'zip': '69600'}]}, {'lang': 'fr', 'dates': ['d2', 'd3'], 'address': [{'city': 'Paris', 'zip': '75001'}]}]
def get_keys(d, c = [], j = None):
if not isinstance(d, (dict, list)):
yield ('.'.join(c), d, j)
elif isinstance(d, list):
yield from [i for j, b in enumerate(d) for i in get_keys(b, c = c, j = j if isinstance(b, dict) else None)]
else:
for a, b in d.items():
yield from get_keys(b, c = c+[a], j = j)
def get_combos(data):
d = defaultdict(list)
for a, *b in get_keys(data):
d[a].append(b)
return {f'{b}_{a}':Counter([f'{y}_{x}' for [x, l1], [y, l2] in it.product(d[a], d[b]) if type(l1) != type(l2) or l1 == l2]) for a, b in it.combinations(d, 2)}
new_d = {}
for i in json:
for a, b in get_combos(i).items():
for c, d in b.items():
new_d[a] = {**(l:=new_d.get(a, {})), c:d if c not in l else l[c]+d}
输出:
{'dates_lang': {'d1_fr': 1, 'd2_fr': 2, 'd3_fr': 1}, 'address.city_lang': {'Paris_fr': 2, 'Lyon_fr': 1}, 'address.zip_lang': {'75001_fr': 2, '69600_fr': 1}, 'address.city_dates': {'Paris_d1': 1, 'Lyon_d1': 1, 'Paris_d2': 2, 'Lyon_d2': 1, 'Paris_d3': 1}, 'address.zip_dates': {'75001_d1': 1, '69600_d1': 1, '75001_d2': 2, '69600_d2': 1, '75001_d3': 1}, 'address.zip_address.city': {'75001_Paris': 2, '69600_Lyon': 1}}
编辑:要打印展平结构,首先存储 get_keys
调用:
def get_combos(data):
d = defaultdict(list)
t_result = list(get_keys(data))
print(t_result) #displaying flattened input dict
for a, *b in t_result:
d[a].append(b)
return {f'{b}_{a}':Counter([f'{y}_{x}' for [x, l1], [y, l2] in it.product(d[a], d[b]) if type(l1) != type(l2) or l1 == l2]) for a, b in it.combinations(d, 2)}
我有一个嵌套字典列表,里面有列表,我想通过所有字典计算所有可能组合的数量。
输入示例
json =
[
{
"lang": "fr",
"dates": ["d1", "d2"],
"address": [
{
"city": "Paris",
"zip": "75001"
},
{
"city": "Lyon",
"zip": "69600"
}
]
},
{
"lang": "fr",
"dates": ["d2", "d3"],
"address": [
{
"city": "Paris",
"zip": "75001"
}
]
}
]
预期输出为:
{
'address.city_dates': {"Paris_d1": 1, "Paris_d2": 2 ,"Paris_d3": 1, "Lyon_d1": 1, "Lyon_d2": 1},
'address.city_lang': {"Paris_fr": 2, "Lyon_fr": 1},
'address.city_address.zip': {"Lyon_69600": 1, "Paris_75001": 2},
'address.zip_dates': {"75001_d1": 1, "75001_d2": 2 ,"75001_d3": 1, "69600_d1": 1, "69600_d2": 1},
'address.zip_lang': {"75001_fr": 2, "69600_fr": 1},
'dates_lang': {"d1_fr": 1, "d2_fr": ,"d3_fr": 1}
}
注意:在我的例子中关系是可以互换的:X_Y 类似于 Y_X
尝试: 我尝试了以下将被递归调用的函数,但我一直停留在如何处理列表和寻找所有级别的组合上。
def get_combs(element, contextKey, relations):
if type(element) is list:
# for each element of the list, treat element
for tokenElement in element:
get_combs(tokenElement, contextKey, relations)
elif type(element) is dict:
keys = list(element.keys())
keys.sort()
for first_key in keys:
# remove current first key to avoid X_Y and Y_X being different
keys.remove(first_key)
keys.sort()
for second_key in keys:
key = first_key + "_" + second_key
value_key = str(element[first_key]) + "_" + str(element[second_key])
if not key in relations:
relations[key] = {value_key: 1}
else:
if not value_key in relations[key]:
relations[key][value_key] = 1
else:
relations[key][value_key] += 1
# recall function to deal wit elements inside the dict
get_combs(element[first_key], first_key, relations)
def main(json):
relations = {}
# loop over all the elements inside json
for element in json:
get_combs(element, 'root', relations)
return relations
pippo = main(json)
print(pippo)
输出:
{'address_dates': {"[{'city': 'Paris', 'zip': '75001'}, {'city': 'Lyon', 'zip': '69600'}]_['d1', 'd2']": 1,
"[{'city': 'Paris', 'zip': '75001'}]_['d2', 'd3']": 1},
'address_lang': {"[{'city': 'Paris', 'zip': '75001'}, {'city': 'Lyon', 'zip': '69600'}]_fr": 1,
"[{'city': 'Paris', 'zip': '75001'}]_fr": 1},
'city_zip': {"Lyon_69600": 1, "Paris_75001": 2},
'lang_dates': {"fr_['d1', 'd2']": 1, "fr_['d2', 'd3']": 1}}
您可以递归地展开结构,然后按形成的键进行分组。从那里开始,可以应用基本组合:
from collections import defaultdict, Counter
import itertools as it
json = [{'lang': 'fr', 'dates': ['d1', 'd2'], 'address': [{'city': 'Paris', 'zip': '75001'}, {'city': 'Lyon', 'zip': '69600'}]}, {'lang': 'fr', 'dates': ['d2', 'd3'], 'address': [{'city': 'Paris', 'zip': '75001'}]}]
def get_keys(d, c = [], j = None):
if not isinstance(d, (dict, list)):
yield ('.'.join(c), d, j)
elif isinstance(d, list):
yield from [i for j, b in enumerate(d) for i in get_keys(b, c = c, j = j if isinstance(b, dict) else None)]
else:
for a, b in d.items():
yield from get_keys(b, c = c+[a], j = j)
def get_combos(data):
d = defaultdict(list)
for a, *b in get_keys(data):
d[a].append(b)
return {f'{b}_{a}':Counter([f'{y}_{x}' for [x, l1], [y, l2] in it.product(d[a], d[b]) if type(l1) != type(l2) or l1 == l2]) for a, b in it.combinations(d, 2)}
new_d = {}
for i in json:
for a, b in get_combos(i).items():
for c, d in b.items():
new_d[a] = {**(l:=new_d.get(a, {})), c:d if c not in l else l[c]+d}
输出:
{'dates_lang': {'d1_fr': 1, 'd2_fr': 2, 'd3_fr': 1}, 'address.city_lang': {'Paris_fr': 2, 'Lyon_fr': 1}, 'address.zip_lang': {'75001_fr': 2, '69600_fr': 1}, 'address.city_dates': {'Paris_d1': 1, 'Lyon_d1': 1, 'Paris_d2': 2, 'Lyon_d2': 1, 'Paris_d3': 1}, 'address.zip_dates': {'75001_d1': 1, '69600_d1': 1, '75001_d2': 2, '69600_d2': 1, '75001_d3': 1}, 'address.zip_address.city': {'75001_Paris': 2, '69600_Lyon': 1}}
编辑:要打印展平结构,首先存储 get_keys
调用:
def get_combos(data):
d = defaultdict(list)
t_result = list(get_keys(data))
print(t_result) #displaying flattened input dict
for a, *b in t_result:
d[a].append(b)
return {f'{b}_{a}':Counter([f'{y}_{x}' for [x, l1], [y, l2] in it.product(d[a], d[b]) if type(l1) != type(l2) or l1 == l2]) for a, b in it.combinations(d, 2)}