在多个数据集上训练神经网络模型
Train neural network model on multiple datasets
我有:
- 一个神经网络模型
- 10 个结构相同的数据集
我想要的:
- 分别在所有数据集上训练模型
- 分别保存他们的模型
我可以单独训练数据集并一次保存一个模型。但是我想加载我的 10 个数据集并在一个 运行 中用它们创建 10 个模型。解决方案可能很明显,但我对此还很陌生。我该如何实现?
提前致谢。
您可以使用 concurrency and parallelism
的概念之一,即 Multi-Threading
, or in some cases, Multi-Processing
来实现此目的。
最简单的编码方法是使用 python.
的 concurrent-futures
模块
You can call the training function on model for each dataset to be used, all under the ThreadPoolExecutor, in order to fire parallel threads for performing individual trainings.
代码可以有点像这样:
第 1 步:必要的导入
from concurrent.futures import ThreadPoolExecutor, as_completed
import tensorflow as tf
from tensorflow.keras.models import load_model, Sequential
from tensorflow.keras.layers import Dense, Activation, Flatten
第 2 步:创建和构建模型
def create_model(): # responsible for creating model
model = Sequential()
model.add(Flatten()) # adding NN layers
model.add(Dense(64))
model.add(Activation('relu'))
# ........ so on
model.compile(optimizer='..', loss='..', metrics=[...]) # compiling the model
return model # finally returning the model
第三步:定义拟合函数(进行模型训练)
def fit(model, XY_train): # performs model.fit(...parameters...)
model.fit(XY_train[0], XY_train[1], epochs=5, validation_split=0.3) # use your already defined x_train, y_train
return model # finally returns trained model
第 4 步:并行训练器方法,使用 TPE 上下文管理器启动同步训练
# trains provided model on each dataset parallelly by using multi-threading
def parallel_trainer(model, XY_train_datasets : list[tuple]):
with ThreadPoolExecutor(max_workers = len(XY_train_datasets)) as executor:
futureObjs = [
executor.submit(
lambda ds: fit(model, ds), XY_train_datasets) # Call Fit for each dataset iterate through the datasets
]
for i, obj in enumerate(as_completed(futureObjs)): # iterate through trained models
(obj.result()).save(f"{i}.model") # save models
第 5 步:创建模型、加载数据集、调用并行训练器
model = create_model() # create the model
mnist = tf.keras.datasets.mnist # get dataset - for example :- mnist dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data() # get (x_train, y_train), (x_test, y_test)
datasets = [(x_train, y_train)]*10 # list of dataset paths (in your case, same dataset used 10 times)
parallel_trainer(model, datasets) # call parallel trainer
整个节目
from concurrent.futures import ThreadPoolExecutor, as_completed
import tensorflow as tf
from tensorflow.keras.models import load_model, Sequential
from tensorflow.keras.layers import Dense, Activation, Flatten
def create_model(): # responsible for creating model
model = Sequential()
model.add(Flatten()) # adding NN layers
model.add(Dense(64))
model.add(Activation('relu'))
# ........ so on
model.compile(optimizer='..', loss='..', metrics=[...]) # compiling the model
return model # finally returning the model
def fit(model, XY_train): # performs model.fit(...parameters...)
model.fit(XY_train[0], XY_train[1], epochs=5, validation_split=0.3) # use your already defined x_train, y_train
return model # finally returns trained model
# trains provided model on each dataset parallelly by using multi-threading
def parallel_trainer(model, XY_train_datasets : list[tuple]):
with ThreadPoolExecutor(max_workers = len(XY_train_datasets)) as executor:
futureObjs = [
executor.submit(
lambda ds: fit(model, ds), XY_train_datasets) # Call Fit for each dataset iterate through the datasets
]
for i, obj in enumerate(as_completed(futureObjs)): # iterate through trained models
(obj.result()).save(f"{i}.model") # save models
model = create_model() # create the model
mnist = tf.keras.datasets.mnist # get dataset - for example :- mnist dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data() # get (x_train, y_train), (x_test, y_test)
datasets = [(x_train, y_train)]*10 # list of dataset paths (in your case, same dataset used 10 times)
parallel_trainer(model, datasets) # call parallel trainer
我有:
- 一个神经网络模型
- 10 个结构相同的数据集
我想要的:
- 分别在所有数据集上训练模型
- 分别保存他们的模型
我可以单独训练数据集并一次保存一个模型。但是我想加载我的 10 个数据集并在一个 运行 中用它们创建 10 个模型。解决方案可能很明显,但我对此还很陌生。我该如何实现?
提前致谢。
您可以使用 concurrency and parallelism
的概念之一,即 Multi-Threading
, or in some cases, Multi-Processing
来实现此目的。
最简单的编码方法是使用 python.
concurrent-futures
模块
You can call the training function on model for each dataset to be used, all under the ThreadPoolExecutor, in order to fire parallel threads for performing individual trainings.
代码可以有点像这样:
第 1 步:必要的导入
from concurrent.futures import ThreadPoolExecutor, as_completed
import tensorflow as tf
from tensorflow.keras.models import load_model, Sequential
from tensorflow.keras.layers import Dense, Activation, Flatten
第 2 步:创建和构建模型
def create_model(): # responsible for creating model
model = Sequential()
model.add(Flatten()) # adding NN layers
model.add(Dense(64))
model.add(Activation('relu'))
# ........ so on
model.compile(optimizer='..', loss='..', metrics=[...]) # compiling the model
return model # finally returning the model
第三步:定义拟合函数(进行模型训练)
def fit(model, XY_train): # performs model.fit(...parameters...)
model.fit(XY_train[0], XY_train[1], epochs=5, validation_split=0.3) # use your already defined x_train, y_train
return model # finally returns trained model
第 4 步:并行训练器方法,使用 TPE 上下文管理器启动同步训练
# trains provided model on each dataset parallelly by using multi-threading
def parallel_trainer(model, XY_train_datasets : list[tuple]):
with ThreadPoolExecutor(max_workers = len(XY_train_datasets)) as executor:
futureObjs = [
executor.submit(
lambda ds: fit(model, ds), XY_train_datasets) # Call Fit for each dataset iterate through the datasets
]
for i, obj in enumerate(as_completed(futureObjs)): # iterate through trained models
(obj.result()).save(f"{i}.model") # save models
第 5 步:创建模型、加载数据集、调用并行训练器
model = create_model() # create the model
mnist = tf.keras.datasets.mnist # get dataset - for example :- mnist dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data() # get (x_train, y_train), (x_test, y_test)
datasets = [(x_train, y_train)]*10 # list of dataset paths (in your case, same dataset used 10 times)
parallel_trainer(model, datasets) # call parallel trainer
整个节目
from concurrent.futures import ThreadPoolExecutor, as_completed
import tensorflow as tf
from tensorflow.keras.models import load_model, Sequential
from tensorflow.keras.layers import Dense, Activation, Flatten
def create_model(): # responsible for creating model
model = Sequential()
model.add(Flatten()) # adding NN layers
model.add(Dense(64))
model.add(Activation('relu'))
# ........ so on
model.compile(optimizer='..', loss='..', metrics=[...]) # compiling the model
return model # finally returning the model
def fit(model, XY_train): # performs model.fit(...parameters...)
model.fit(XY_train[0], XY_train[1], epochs=5, validation_split=0.3) # use your already defined x_train, y_train
return model # finally returns trained model
# trains provided model on each dataset parallelly by using multi-threading
def parallel_trainer(model, XY_train_datasets : list[tuple]):
with ThreadPoolExecutor(max_workers = len(XY_train_datasets)) as executor:
futureObjs = [
executor.submit(
lambda ds: fit(model, ds), XY_train_datasets) # Call Fit for each dataset iterate through the datasets
]
for i, obj in enumerate(as_completed(futureObjs)): # iterate through trained models
(obj.result()).save(f"{i}.model") # save models
model = create_model() # create the model
mnist = tf.keras.datasets.mnist # get dataset - for example :- mnist dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data() # get (x_train, y_train), (x_test, y_test)
datasets = [(x_train, y_train)]*10 # list of dataset paths (in your case, same dataset used 10 times)
parallel_trainer(model, datasets) # call parallel trainer