如何让 concurrent.futures ThreadPoolExecutor 使用字典作为两个参数之一?
How to get concurrent.futures ThreadPoolExecutor work with a dictionary as one of two params?
我尝试创建一个带有两个参数的 ThreadPoolExecutor,其中一个是字典。
我的意图是将整个字典作为参数传递,但目前我的 multi_thread_load
加载第一个 player_ID 和第一个字典值,第二个 player_ID 加载第二个字典值。
有没有办法将整个字典作为参数传递给每个player_ID?
我的目标是每个 player_ID 都有一个 class,其中包含一个 player_dictionary 和几个属性,如位置、俱乐部、姓名、目标、玩过的游戏等。在 get_base_data
中收集并将俱乐部的当前联赛位置附加到 player_dictionary。
一种解决方法是将俱乐部联赛排名抓取到 get_base_data
,但我的代码会不必要地多次抓取具有联赛排名的网站,我想避免这种情况。
文件Player.py
class Player:
def __init__(self, player_ID, ranks_dictionary):
self.player_ID = player_ID
self.club_ranks = ranks_dictionary
self.player_dictionary = {}
self.get_base_data()
self.get_club_rank()
def get_base_data(self):
# do something with self.player_ID
# and fill player_dictionary (Name, Club, Position, ...)
pass
def get_club_rank(self):
# get the national league rank from given dictionary
try:
rank = self.club_ranks[self.player_dictionary["club"]]
self.dictionary["club_rank"] = rank
except:
pass
文件Process.py
import Player
import concurrent.futures
class Process:
def __init__(self):
self.dataset = []
def create_player_class(self, player_ID, ranks_dictionary):
player = Player.Player(player_ID, ranks_dictionary)
self.dataset.append(player.dictionary)
def multi_thread_load(self, given_set, club_ranks):
# creates multiple player classes at once & loads the data for those player
max_thread = 25 # a higher number increases processing speed
player_IDs = given_set
ranks_dictionary = club_ranks
threads = min(max_thread, len(player_IDs))
with concurrent.futures.ThreadPoolExecutor(max_workers=threads) as executor:
executor.map(self.create_player_class, player_IDs, ranks_dictionary)
执行文件
import Process
dictionary = {
"FC Bayern München": "1",
"Borussia Dortmund": "2",
"RB Leipzig": "3",
"Bayer 04 Leverkusen": "4",
"Borussia M'gladbach": "5",
"FC Augsburg": "6",
"VfB Stuttgart": "7",
"1. FC Union Berlin": "7",
"SV Werder Bremen": "9",
"Eintracht Frankfurt": "10",
"VfL Wolfsburg": "11",
"1899 Hoffenheim": "12",
"SC Freiburg": "13",
"Hertha BSC": "14",
"Arminia Bielefeld": "15",
"1. FC Köln": "16",
"FC Schalke 04": "17",
"1. FSV Mainz 05": "18",
}
player_ID_list = (32445, 31663, 31362, 32553) # Dummy list, in reality the list as around 500 elements
Process = Process.Process()
club_ranks = dictionary
Process.multi_thread_load(player_ID_list, club_ranks)
正如您可能看到的那样,我还没有太多使用 python/coding 的经验,所以如果您看到我犯的任何进一步错误或知道更好的解决方案,我也将不胜感激这些评论。
谢谢!
解决方案是让字典成为“Process”的实例变量,这样所有线程都可以访问它而无需在 multi_thread_load
.
中传递它
import Player
import concurrent.futures
class Process:
def __init__(self):
self.dataset = []
self.club_table_dict = self.get_all_club_ranks()
def create_player_class(self, player_ID):
player = Player.Player(player_ID, self.club_table_dict)
self.dataset.append(player.dictionary)
def get_all_club_ranks(self):
# gets the current ranking
return dictionary
def multi_thread_load(self, given_set, club_ranks):
# creates multiple player classes at once & loads the data for those player
max_thread = 25 # a higher number increases processing speed
player_IDs = given_set
ranks_dictionary = club_ranks
threads = min(max_thread, len(player_IDs))
with concurrent.futures.ThreadPoolExecutor(max_workers=threads) as executor:
executor.map(self.create_player_class, player_IDs)
我尝试创建一个带有两个参数的 ThreadPoolExecutor,其中一个是字典。
我的意图是将整个字典作为参数传递,但目前我的 multi_thread_load
加载第一个 player_ID 和第一个字典值,第二个 player_ID 加载第二个字典值。
有没有办法将整个字典作为参数传递给每个player_ID?
我的目标是每个 player_ID 都有一个 class,其中包含一个 player_dictionary 和几个属性,如位置、俱乐部、姓名、目标、玩过的游戏等。在 get_base_data
中收集并将俱乐部的当前联赛位置附加到 player_dictionary。
一种解决方法是将俱乐部联赛排名抓取到 get_base_data
,但我的代码会不必要地多次抓取具有联赛排名的网站,我想避免这种情况。
文件Player.py
class Player:
def __init__(self, player_ID, ranks_dictionary):
self.player_ID = player_ID
self.club_ranks = ranks_dictionary
self.player_dictionary = {}
self.get_base_data()
self.get_club_rank()
def get_base_data(self):
# do something with self.player_ID
# and fill player_dictionary (Name, Club, Position, ...)
pass
def get_club_rank(self):
# get the national league rank from given dictionary
try:
rank = self.club_ranks[self.player_dictionary["club"]]
self.dictionary["club_rank"] = rank
except:
pass
文件Process.py
import Player
import concurrent.futures
class Process:
def __init__(self):
self.dataset = []
def create_player_class(self, player_ID, ranks_dictionary):
player = Player.Player(player_ID, ranks_dictionary)
self.dataset.append(player.dictionary)
def multi_thread_load(self, given_set, club_ranks):
# creates multiple player classes at once & loads the data for those player
max_thread = 25 # a higher number increases processing speed
player_IDs = given_set
ranks_dictionary = club_ranks
threads = min(max_thread, len(player_IDs))
with concurrent.futures.ThreadPoolExecutor(max_workers=threads) as executor:
executor.map(self.create_player_class, player_IDs, ranks_dictionary)
执行文件
import Process
dictionary = {
"FC Bayern München": "1",
"Borussia Dortmund": "2",
"RB Leipzig": "3",
"Bayer 04 Leverkusen": "4",
"Borussia M'gladbach": "5",
"FC Augsburg": "6",
"VfB Stuttgart": "7",
"1. FC Union Berlin": "7",
"SV Werder Bremen": "9",
"Eintracht Frankfurt": "10",
"VfL Wolfsburg": "11",
"1899 Hoffenheim": "12",
"SC Freiburg": "13",
"Hertha BSC": "14",
"Arminia Bielefeld": "15",
"1. FC Köln": "16",
"FC Schalke 04": "17",
"1. FSV Mainz 05": "18",
}
player_ID_list = (32445, 31663, 31362, 32553) # Dummy list, in reality the list as around 500 elements
Process = Process.Process()
club_ranks = dictionary
Process.multi_thread_load(player_ID_list, club_ranks)
正如您可能看到的那样,我还没有太多使用 python/coding 的经验,所以如果您看到我犯的任何进一步错误或知道更好的解决方案,我也将不胜感激这些评论。
谢谢!
解决方案是让字典成为“Process”的实例变量,这样所有线程都可以访问它而无需在 multi_thread_load
.
import Player
import concurrent.futures
class Process:
def __init__(self):
self.dataset = []
self.club_table_dict = self.get_all_club_ranks()
def create_player_class(self, player_ID):
player = Player.Player(player_ID, self.club_table_dict)
self.dataset.append(player.dictionary)
def get_all_club_ranks(self):
# gets the current ranking
return dictionary
def multi_thread_load(self, given_set, club_ranks):
# creates multiple player classes at once & loads the data for those player
max_thread = 25 # a higher number increases processing speed
player_IDs = given_set
ranks_dictionary = club_ranks
threads = min(max_thread, len(player_IDs))
with concurrent.futures.ThreadPoolExecutor(max_workers=threads) as executor:
executor.map(self.create_player_class, player_IDs)