使用 GridSearchCV 为动态创建的层改变每个隐藏层的神经元数量

Varying the number of neurons per hidden layer for dynamically created layers using GridSearchCV

我正在尝试为动态创建的层更新每个隐藏层中的神经元数量。例如,在 model.summary() 中看到的输出显示每个层都有 5 个或 10 个神经元,但不是我想要的 5 然后 10 或 10 然后 5。

import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV

names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']

dataset = 'https://raw.githubusercontent.com/plotly/datasets/master/diabetes.csv'
df_diabetes = pd.read_csv(dataset, names = names)
df_diabetes.head()
df_diabetes = df_diabetes[1:]
X = df_diabetes[['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age']]
y = df_diabetes['class']
### Normalize the input features so that they are on the same scale. This is so that the errors calculated for back-propagation are calculated from a similar scale of features. This would mean smaller initial errors compared to that from non-normalised feature data. Smaller scale of errors leads to faster convergence of the gradient descent when adjusting the weights using the chosen cost function.
scaler = MinMaxScaler(feature_range=[0, 1])
X_rescaled = scaler.fit_transform(X)
X = pd.DataFrame(data = X_rescaled, columns = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age'])
X.head(3)

X= X.to_numpy()
y = y.to_numpy()
X_train, X_test, y_train, y_test = train_test_split(X,y,train_size=0.75, random_state = 1, stratify =y, shuffle = True)

def createmodel(n_layers, neurons, activation_func, optimizer, loss_func):
    model = Sequential()

    for i in range(1, n_layers):
        if i == 1:
            model.add(Dense(neurons, input_dim=X_train.shape[1], activation=activation_func))
        else:
            model.add(Dense(neurons, activation=activation_func))
    
    # Finally, the output layer should have a single node in binary classification
    model.summary()
    model.add(Dense(1, activation=activation_func))
    model.compile(optimizer=optimizer, loss=loss_func, metrics=["accuracy"])  # note: metrics could also be 'mse'

    return model

##Wrap model into scikit-learn
model = KerasClassifier(build_fn=createmodel, verbose=False)

# activation_funcs = ['sigmoid', 'relu', 'tanh']
activation_funcs = ['relu']
# loss_funcs = ['binary_crossentropy','hinge']
loss_funcs = ['binary_crossentropy']
# optimizers = ['rmsprop', 'adam','sgd']
optimizers = ['adam']
param_grid = dict(n_layers=[2,3], neurons=[5,10],  activation_func = activation_funcs,
                  optimizer=optimizers, loss_func = loss_funcs, batch_size = [100], epochs = [20])
grid = GridSearchCV(estimator = model, param_grid = param_grid, verbose=2)
grid.fit(X,y)

print(grid.best_score_)
print(grid.best_params_)

pd.DataFrame(grid.cv_results_)[['mean_test_score', 'std_test_score', 'params']].to_csv('GridOptimization.csv')

您可以尝试将 n_layers 定义为列表的列表:

n_layers = [[5, 10], [10, 5], [5, 10, 10],[5, 5, 10]]

其中每个列表的长度表示层数,它们的元素是层的节点。这是一个工作示例(只需复制并粘贴):

import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV

names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']

dataset = 'https://raw.githubusercontent.com/plotly/datasets/master/diabetes.csv'
df_diabetes = pd.read_csv(dataset, names = names)
df_diabetes.head()
df_diabetes = df_diabetes[1:]
X = df_diabetes[['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age']]
y = df_diabetes['class']
### Normalize the input features so that they are on the same scale. This is so that the errors calculated for back-propagation are calculated from a similar scale of features. This would mean smaller initial errors compared to that from non-normalised feature data. Smaller scale of errors leads to faster convergence of the gradient descent when adjusting the weights using the chosen cost function.
scaler = MinMaxScaler(feature_range=[0, 1])
X_rescaled = scaler.fit_transform(X)
X = pd.DataFrame(data = X_rescaled, columns = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age'])
X.head(3)

X= X.to_numpy()
y = y.to_numpy()
X_train, X_test, y_train, y_test = train_test_split(X,y,train_size=0.75, random_state = 1, stratify =y, shuffle = True)

def createmodel(n_layers, activation_func, optimizer, loss_func):
    model = Sequential()

    for i, neurons in enumerate(n_layers):
      if i == 0:
        model.add(Dense(neurons, input_dim=X_train.shape[1], activation=activation_func))
      else: model.add(Dense(neurons, activation=activation_func))

    # Finally, the output layer should have a single node in binary classification
    model.summary()
    model.add(Dense(1, activation=activation_func))
    model.compile(optimizer=optimizer, loss=loss_func, metrics=["accuracy"])  # note: metrics could also be 'mse'

    return model

model = KerasClassifier(build_fn=createmodel, verbose=False)

activation_funcs = ['relu']
loss_funcs = ['binary_crossentropy']
optimizers = ['adam']

n_layers = [[5, 10], [10, 5], [5, 10, 10],[5, 5, 10]] # The length of each list represents the number of layers and their elements are the nodes for the layers.
param_grid = dict(n_layers = n_layers,  activation_func = activation_funcs,
                  optimizer=optimizers, loss_func = loss_funcs, batch_size = [100], epochs = [20])
grid = GridSearchCV(estimator = model, param_grid = param_grid, verbose=2)
grid.fit(X,y)

print(grid.best_score_)
print(grid.best_params_)

pd.DataFrame(grid.cv_results_)[['mean_test_score', 'std_test_score', 'params']].to_csv('GridOptimization.csv')