如何在 python 中嵌套 for 循环的情况下使用 threadPoolExecutor 的 map()
How to use map() of threadPoolExecutor in case of nested for loop in python
我有 2 部词典,
data1 = {
"key": [
{
"id": "key1",
"name": "key1"
},
{
"id": "key2",
"name": "key2"
},
{
"id": "key3",
"name": "key3"
},
]
}
data2 = {
"key": [
{
"id": "key2"
"name": "TEST key2"
},
{
"id": "key1",
"name": "TEST key1"
},
]
}
我正在使用下面的代码[=19]制作data1
和data2
中key
列表中具有匹配id
的对象的元组列表=]
common_keys = [
(each_data1_key, each_data2_key)
for each_data1_key in data1.get("key", [])
for each_data2_key in data2.get("key", [])
if each_data1_key.get("id") == each_data2_key.get("id")
]
# Example result = [({"id":"key1", "name": "key1"}, {"id": "key1", "name": "TEST key1"}), ...]
现在我想使用这些元组在threadPoolExecutor 的map 函数中进一步处理。目前,我正在使用下面的代码,
def func(object1, object2):
"""
func is being run in the thread to do some task parallelly with object1 and object2
"""
<SOME CODE HERE> ...
def myfunc(common_keys):
if common_keys:
with ThreadPoolExecutor(max_workers=10) as executor:
executor.map(lambda x: func(*x), common_keys)
# func is a function that accepts 2 objects as parameters
# since we are sending tuple of the object in threads, in order to process some task
我的任务是通过减少循环来优化代码(我使用嵌套 for 循环来查找 common_keys
列表`
任何人都可以帮助我找到任何解决方案,其中为了获得具有相同 id 的对象的元组列表,我不需要使用嵌套循环(或者,使用另一种优化方式) ?
在 的基础上,如果您有一些空闲内存,可以制作您的 ids 字典键,以便稍后从快速的类似集合的操作中受益。
# Loop once for each list
dict1 = {item["id"]: item for item in data1.get("key", [])}
dict2 = {item["id"]: item for item in data2.get("key", [])}
# Set intersection is fast
common_keys = [(dict1[key], dict2[key])
for key
in dict1.keys() & dict2.keys()]
此外,如果您将字典传递给 myfunc
,而不是 common_keys
,您可以使用生成器来避免创建该列表。
def func(object1, object2):
print(f"Got 1: {object1}, and 2: {object2}")
def generate_pairs(d1, d2):
for key in d1.keys() & d2.keys():
yield d1[key], d2[key]
def myfunc(d1, d2):
if common_keys:
with ThreadPoolExecutor(max_workers=10) as executor:
executor.map(lambda x: func(*x), generate_pairs(d1, d2))
myfunc(dict1, dict2)
>>> Got object1: {'id': 'key2', 'name': 'key2'}, object2: {'id': 'key2', 'name': 'TEST key2'}
>>> Got object1: {'id': 'key1', 'name': 'key1'}, object2: {'id': 'key1', 'name': 'TEST key1'}
最后,为了保持速度和备用内存,您可以只创建两个字典中最小的一个,将 "key"
列表传递给生成器:
def generate_pairs(l1, l2):
little, big = (l1, l2) if (len(l1) < len(l2)) else (l2, l1)
d1 = {item["id"]: item for item in little}
# loop once over the second list
for key_data_2 in big:
key_data_1 = d1.get(key_data_2["id"], None) # Average case fast too
if key_data_1 is not None:
yield key_data_1, key_data_2
# with the same `myfunc` except for parameters types
def myfunc(l1, l2):
if common_keys:
with ThreadPoolExecutor(max_workers=10) as executor:
executor.map(lambda x: func(*x), generate_pairs(l1, l2))
# and you'd call
myfunc(data1.get("key", []), data2.get("key", []))
我有 2 部词典,
data1 = {
"key": [
{
"id": "key1",
"name": "key1"
},
{
"id": "key2",
"name": "key2"
},
{
"id": "key3",
"name": "key3"
},
]
}
data2 = {
"key": [
{
"id": "key2"
"name": "TEST key2"
},
{
"id": "key1",
"name": "TEST key1"
},
]
}
我正在使用下面的代码[=19]制作data1
和data2
中key
列表中具有匹配id
的对象的元组列表=]
common_keys = [
(each_data1_key, each_data2_key)
for each_data1_key in data1.get("key", [])
for each_data2_key in data2.get("key", [])
if each_data1_key.get("id") == each_data2_key.get("id")
]
# Example result = [({"id":"key1", "name": "key1"}, {"id": "key1", "name": "TEST key1"}), ...]
现在我想使用这些元组在threadPoolExecutor 的map 函数中进一步处理。目前,我正在使用下面的代码,
def func(object1, object2):
"""
func is being run in the thread to do some task parallelly with object1 and object2
"""
<SOME CODE HERE> ...
def myfunc(common_keys):
if common_keys:
with ThreadPoolExecutor(max_workers=10) as executor:
executor.map(lambda x: func(*x), common_keys)
# func is a function that accepts 2 objects as parameters
# since we are sending tuple of the object in threads, in order to process some task
我的任务是通过减少循环来优化代码(我使用嵌套 for 循环来查找 common_keys
列表`
任何人都可以帮助我找到任何解决方案,其中为了获得具有相同 id 的对象的元组列表,我不需要使用嵌套循环(或者,使用另一种优化方式) ?
在 的基础上,如果您有一些空闲内存,可以制作您的 ids 字典键,以便稍后从快速的类似集合的操作中受益。
# Loop once for each list
dict1 = {item["id"]: item for item in data1.get("key", [])}
dict2 = {item["id"]: item for item in data2.get("key", [])}
# Set intersection is fast
common_keys = [(dict1[key], dict2[key])
for key
in dict1.keys() & dict2.keys()]
此外,如果您将字典传递给 myfunc
,而不是 common_keys
,您可以使用生成器来避免创建该列表。
def func(object1, object2):
print(f"Got 1: {object1}, and 2: {object2}")
def generate_pairs(d1, d2):
for key in d1.keys() & d2.keys():
yield d1[key], d2[key]
def myfunc(d1, d2):
if common_keys:
with ThreadPoolExecutor(max_workers=10) as executor:
executor.map(lambda x: func(*x), generate_pairs(d1, d2))
myfunc(dict1, dict2)
>>> Got object1: {'id': 'key2', 'name': 'key2'}, object2: {'id': 'key2', 'name': 'TEST key2'}
>>> Got object1: {'id': 'key1', 'name': 'key1'}, object2: {'id': 'key1', 'name': 'TEST key1'}
最后,为了保持速度和备用内存,您可以只创建两个字典中最小的一个,将 "key"
列表传递给生成器:
def generate_pairs(l1, l2):
little, big = (l1, l2) if (len(l1) < len(l2)) else (l2, l1)
d1 = {item["id"]: item for item in little}
# loop once over the second list
for key_data_2 in big:
key_data_1 = d1.get(key_data_2["id"], None) # Average case fast too
if key_data_1 is not None:
yield key_data_1, key_data_2
# with the same `myfunc` except for parameters types
def myfunc(l1, l2):
if common_keys:
with ThreadPoolExecutor(max_workers=10) as executor:
executor.map(lambda x: func(*x), generate_pairs(l1, l2))
# and you'd call
myfunc(data1.get("key", []), data2.get("key", []))