将列表与 JSON 进行比较并计算出现次数
Comparing list with JSON and counting occurences
给出的列表如下:
make = ['ford', 'fiat', 'nissan', 'suzuki', 'dacia']
model = ['x', 'y', 'z']
version = ['A', 'B', 'C']
typ = ['sedan', 'coupe', 'van', 'kombi']
infos = ['steering wheel problems', 'gearbox problems', 'broken engine', 'throttle problems', None]
total.append(make)
total.append(model)
total.append(version)
total.append(typ)
total.append(infos)
我需要创建这些列表的所有可能组合的列表列表,所以我做了:
combos = list(itertools.product(*total))
all_combos = [list(elem) for elem in combos]
现在我想比较,在 JSON 对象中查找与 all_combos
项中出现的值集相同的项,并计算这些项出现的次数。我的 JSON 很大,看起来有点像:
data = [
{ 'make': 'dacia'
'model': 'x',
'version': 'A',
'typ': 'sedan',
'infos': 'steering wheel problems'
}, ...]
我想得到这样的输出:
output = [
{ 'make': 'dacia'
'model': 'x',
'version': 'A',
'typ': 'sedan',
'infos': 'steering wheel problems',
'number_of_occurences_of_such_combination_of_fields_with__such_values': 75
}, ...]
如何解决这样的任务?
如果我没理解错的话,你想在你的数据键中添加每个字典number_of_occurences_of_such_combination_of_fields_with__such_values
:
from operator import itemgetter
from itertools import product
make = ["ford", "fiat", "nissan", "suzuki", "dacia"]
model = ["x", "y", "z"]
version = ["A", "B", "C"]
typ = ["sedan", "coupe", "van", "kombi"]
infos = [
"steering wheel problems",
"gearbox problems",
"broken engine",
"throttle problems",
None,
]
total = [make, model, version, typ, infos]
data = [
{
"make": "dacia",
"model": "x",
"version": "A",
"typ": "sedan",
"infos": "steering wheel problems",
},
{
"make": "dacia",
"model": "x",
"version": "A",
"typ": "sedan",
"infos": "steering wheel problems",
},
{
"make": "ford",
"model": "x",
"version": "A",
"typ": "sedan",
"infos": "steering wheel problems",
},
]
i = itemgetter("make", "model", "version", "typ", "infos")
cnt = {}
for c in itertools.product(*total):
for d in data:
if i(d) == c:
cnt.setdefault(c, []).append(d)
for k, v in cnt.items():
for d in v:
d[
"number_of_occurences_of_such_combination_of_fields_with__such_values"
] = len(v)
print(data)
打印:
[
{
"make": "dacia",
"model": "x",
"version": "A",
"typ": "sedan",
"infos": "steering wheel problems",
"number_of_occurences_of_such_combination_of_fields_with__such_values": 2,
},
{
"make": "dacia",
"model": "x",
"version": "A",
"typ": "sedan",
"infos": "steering wheel problems",
"number_of_occurences_of_such_combination_of_fields_with__such_values": 2,
},
{
"make": "ford",
"model": "x",
"version": "A",
"typ": "sedan",
"infos": "steering wheel problems",
"number_of_occurences_of_such_combination_of_fields_with__such_values": 1,
},
]
版本 2:(没有 itertools.product):
from operator import itemgetter
i = itemgetter("make", "model", "version", "typ", "infos")
cnt = {}
for d in data:
c = i(d)
cnt[c] = cnt.get(c, 0) + 1
for d in data:
d[
"number_of_occurences_of_such_combination_of_fields_with__such_values"
] = cnt[i(d)]
print(data)
给出的列表如下:
make = ['ford', 'fiat', 'nissan', 'suzuki', 'dacia']
model = ['x', 'y', 'z']
version = ['A', 'B', 'C']
typ = ['sedan', 'coupe', 'van', 'kombi']
infos = ['steering wheel problems', 'gearbox problems', 'broken engine', 'throttle problems', None]
total.append(make)
total.append(model)
total.append(version)
total.append(typ)
total.append(infos)
我需要创建这些列表的所有可能组合的列表列表,所以我做了:
combos = list(itertools.product(*total))
all_combos = [list(elem) for elem in combos]
现在我想比较,在 JSON 对象中查找与 all_combos
项中出现的值集相同的项,并计算这些项出现的次数。我的 JSON 很大,看起来有点像:
data = [
{ 'make': 'dacia'
'model': 'x',
'version': 'A',
'typ': 'sedan',
'infos': 'steering wheel problems'
}, ...]
我想得到这样的输出:
output = [
{ 'make': 'dacia'
'model': 'x',
'version': 'A',
'typ': 'sedan',
'infos': 'steering wheel problems',
'number_of_occurences_of_such_combination_of_fields_with__such_values': 75
}, ...]
如何解决这样的任务?
如果我没理解错的话,你想在你的数据键中添加每个字典number_of_occurences_of_such_combination_of_fields_with__such_values
:
from operator import itemgetter
from itertools import product
make = ["ford", "fiat", "nissan", "suzuki", "dacia"]
model = ["x", "y", "z"]
version = ["A", "B", "C"]
typ = ["sedan", "coupe", "van", "kombi"]
infos = [
"steering wheel problems",
"gearbox problems",
"broken engine",
"throttle problems",
None,
]
total = [make, model, version, typ, infos]
data = [
{
"make": "dacia",
"model": "x",
"version": "A",
"typ": "sedan",
"infos": "steering wheel problems",
},
{
"make": "dacia",
"model": "x",
"version": "A",
"typ": "sedan",
"infos": "steering wheel problems",
},
{
"make": "ford",
"model": "x",
"version": "A",
"typ": "sedan",
"infos": "steering wheel problems",
},
]
i = itemgetter("make", "model", "version", "typ", "infos")
cnt = {}
for c in itertools.product(*total):
for d in data:
if i(d) == c:
cnt.setdefault(c, []).append(d)
for k, v in cnt.items():
for d in v:
d[
"number_of_occurences_of_such_combination_of_fields_with__such_values"
] = len(v)
print(data)
打印:
[
{
"make": "dacia",
"model": "x",
"version": "A",
"typ": "sedan",
"infos": "steering wheel problems",
"number_of_occurences_of_such_combination_of_fields_with__such_values": 2,
},
{
"make": "dacia",
"model": "x",
"version": "A",
"typ": "sedan",
"infos": "steering wheel problems",
"number_of_occurences_of_such_combination_of_fields_with__such_values": 2,
},
{
"make": "ford",
"model": "x",
"version": "A",
"typ": "sedan",
"infos": "steering wheel problems",
"number_of_occurences_of_such_combination_of_fields_with__such_values": 1,
},
]
版本 2:(没有 itertools.product):
from operator import itemgetter
i = itemgetter("make", "model", "version", "typ", "infos")
cnt = {}
for d in data:
c = i(d)
cnt[c] = cnt.get(c, 0) + 1
for d in data:
d[
"number_of_occurences_of_such_combination_of_fields_with__such_values"
] = cnt[i(d)]
print(data)