Neuraxle 的 RandomSearch() 继任者
Neuraxle's RandomSearch() successor
我将 Neuraxle 更新到最新版本 (3.4)。
我注意到整个 auto_ml.py
都重做了。我检查了文档,但没有任何内容。在 git 上,方法 RandomSearch()
似乎很久以前就被 AutoML()
方法取代了。但是参数不同。
有人知道如何在最新的 Neuraxle 版本 (3.4) 中将 Boston Housing 示例管道引导至自动参数搜索吗?
import numpy as np
from sklearn.cluster import KMeans
from sklearn.datasets import load_boston
from sklearn.decomposition import PCA, FastICA
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.linear_model import Ridge
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from neuraxle.hyperparams.distributions import RandInt, LogUniform, Boolean
from neuraxle.hyperparams.space import HyperparameterSpace
from neuraxle.metaopt.auto_ml import RandomSearch
from neuraxle.metaopt.random import KFoldCrossValidationWrapper
from neuraxle.pipeline import Pipeline
from neuraxle.steps.numpy import NumpyTranspose
from neuraxle.steps.sklearn import SKLearnWrapper
from neuraxle.union import AddFeatures, ModelStacking
def main():
boston = load_boston()
X, y = shuffle(boston.data, boston.target, random_state=13)
X = X.astype(np.float32)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, shuffle=False)
# Note that the hyperparameter spaces are defined here during the pipeline definition, but it could be already set
# within the classes ar their definition if using custom classes, or also it could be defined after declaring the
# pipeline using a flat dict or a nested dict.
p = Pipeline([
AddFeatures([
SKLearnWrapper(
PCA(n_components=2),
HyperparameterSpace({"n_components": RandInt(1, 3)})
),
SKLearnWrapper(
FastICA(n_components=2),
HyperparameterSpace({"n_components": RandInt(1, 3)})
),
]),
ModelStacking([
SKLearnWrapper(
GradientBoostingRegressor(),
HyperparameterSpace({
"n_estimators": RandInt(50, 600), "max_depth": RandInt(1, 10),
"learning_rate": LogUniform(0.07, 0.7)
})
),
SKLearnWrapper(
KMeans(),
HyperparameterSpace({"n_clusters": RandInt(5, 10)})
),
],
joiner=NumpyTranspose(),
judge=SKLearnWrapper(
Ridge(),
HyperparameterSpace({"alpha": LogUniform(0.7, 1.4), "fit_intercept": Boolean()})
),
)
])
print("Meta-fitting on train:")
p = p.meta_fit(X_train, y_train, metastep=RandomSearch(
n_iter=10,
higher_score_is_better=True,
validation_technique=KFoldCrossValidationWrapper(scoring_function=r2_score, k_fold=10)
))
# Here is an alternative way to do it, more "pipeliney":
# p = RandomSearch(
# p,
# n_iter=15,
# higher_score_is_better=True,
# validation_technique=KFoldCrossValidation(scoring_function=r2_score, k_fold=3)
# ).fit(X_train, y_train)
print("")
print("Transforming train and test:")
y_train_predicted = p.predict(X_train)
y_test_predicted = p.predict(X_test)
print("")
print("Evaluating transformed train:")
score_transform = r2_score(y_train_predicted, y_train)
print('R2 regression score:', score_transform)
print("")
print("Evaluating transformed test:")
score_test = r2_score(y_test_predicted, y_test)
print('R2 regression score:', score_test)
if __name__ == "__main__":
main()
这是您的问题的解决方案,这是一个尚未在文档站点上发布的新示例:
- https://drive.google.com/drive/u/0/folders/12uzcNKU7n0EUyFzgitSt1wSaSvV4qJbs(从那里查看第二个编码套路的解决方案)
上面 link 中的示例管道代码:
from neuraxle.base import Identity
from neuraxle.steps.flow import TrainOnlyWrapper, ChooseOneStepOf
from neuraxle.steps.numpy import NumpyConcatenateInnerFeatures, NumpyShapePrinter, NumpyFlattenDatum
from neuraxle.union import FeatureUnion
pipeline = Pipeline([
TrainOnlyWrapper(NumpyShapePrinter(custom_message="Input shape before feature union")),
FeatureUnion([
Pipeline([
NumpyFFT(),
NumpyAbs(),
FeatureUnion([
NumpyFlattenDatum(), # Reshape from 3D to flat 2D: flattening data except on batch size
FFTPeakBinWithValue() # Extract 2D features from the 3D FFT bins
], joiner=NumpyConcatenateInnerFeatures())
]),
NumpyMean(),
NumpyMedian(),
NumpyMin(),
NumpyMax()
], joiner=NumpyConcatenateInnerFeatures()),
# TODO, optional: Add some feature selection right here for the motivated ones:
# https://scikit-learn.org/stable/modules/feature_selection.html
# TODO, optional: Add normalization right here (if using other classifiers)
# https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.normalize.html
TrainOnlyWrapper(NumpyShapePrinter(custom_message="Shape after feature union, before classification")),
# Shape: [batch_size, remade_features]
ChooseOneStepOf([
decision_tree_classifier,
# extra_tree_classifier, # TODO
# ridge_classifier, # TODO
logistic_regression,
# random_forest_classifier # TODO
]),
TrainOnlyWrapper(NumpyShapePrinter(custom_message="Shape at output after classification")),
# Shape: [batch_size]
Identity()
])
然后做 AutoML:
from neuraxle.metaopt.auto_ml import AutoML, InMemoryHyperparamsRepository, validation_splitter, \
RandomSearchHyperparameterSelectionStrategy
from neuraxle.metaopt.callbacks import ScoringCallback
from sklearn.metrics import accuracy_score
auto_ml = AutoML(
pipeline=pipeline,
hyperparams_optimizer=RandomSearchHyperparameterSelectionStrategy(),
validation_split_function=validation_splitter(test_size=0.20),
scoring_callback=ScoringCallback(accuracy_score, higher_score_is_better=False),
n_trials=7,
epochs=1,
hyperparams_repository=InMemoryHyperparamsRepository(cache_folder=cache_folder),
refit_trial=True,
)
此示例也在 Neuraxio 的清洁机器学习培训中进行了研究:
我将 Neuraxle 更新到最新版本 (3.4)。
我注意到整个 auto_ml.py
都重做了。我检查了文档,但没有任何内容。在 git 上,方法 RandomSearch()
似乎很久以前就被 AutoML()
方法取代了。但是参数不同。
有人知道如何在最新的 Neuraxle 版本 (3.4) 中将 Boston Housing 示例管道引导至自动参数搜索吗?
import numpy as np
from sklearn.cluster import KMeans
from sklearn.datasets import load_boston
from sklearn.decomposition import PCA, FastICA
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.linear_model import Ridge
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from neuraxle.hyperparams.distributions import RandInt, LogUniform, Boolean
from neuraxle.hyperparams.space import HyperparameterSpace
from neuraxle.metaopt.auto_ml import RandomSearch
from neuraxle.metaopt.random import KFoldCrossValidationWrapper
from neuraxle.pipeline import Pipeline
from neuraxle.steps.numpy import NumpyTranspose
from neuraxle.steps.sklearn import SKLearnWrapper
from neuraxle.union import AddFeatures, ModelStacking
def main():
boston = load_boston()
X, y = shuffle(boston.data, boston.target, random_state=13)
X = X.astype(np.float32)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, shuffle=False)
# Note that the hyperparameter spaces are defined here during the pipeline definition, but it could be already set
# within the classes ar their definition if using custom classes, or also it could be defined after declaring the
# pipeline using a flat dict or a nested dict.
p = Pipeline([
AddFeatures([
SKLearnWrapper(
PCA(n_components=2),
HyperparameterSpace({"n_components": RandInt(1, 3)})
),
SKLearnWrapper(
FastICA(n_components=2),
HyperparameterSpace({"n_components": RandInt(1, 3)})
),
]),
ModelStacking([
SKLearnWrapper(
GradientBoostingRegressor(),
HyperparameterSpace({
"n_estimators": RandInt(50, 600), "max_depth": RandInt(1, 10),
"learning_rate": LogUniform(0.07, 0.7)
})
),
SKLearnWrapper(
KMeans(),
HyperparameterSpace({"n_clusters": RandInt(5, 10)})
),
],
joiner=NumpyTranspose(),
judge=SKLearnWrapper(
Ridge(),
HyperparameterSpace({"alpha": LogUniform(0.7, 1.4), "fit_intercept": Boolean()})
),
)
])
print("Meta-fitting on train:")
p = p.meta_fit(X_train, y_train, metastep=RandomSearch(
n_iter=10,
higher_score_is_better=True,
validation_technique=KFoldCrossValidationWrapper(scoring_function=r2_score, k_fold=10)
))
# Here is an alternative way to do it, more "pipeliney":
# p = RandomSearch(
# p,
# n_iter=15,
# higher_score_is_better=True,
# validation_technique=KFoldCrossValidation(scoring_function=r2_score, k_fold=3)
# ).fit(X_train, y_train)
print("")
print("Transforming train and test:")
y_train_predicted = p.predict(X_train)
y_test_predicted = p.predict(X_test)
print("")
print("Evaluating transformed train:")
score_transform = r2_score(y_train_predicted, y_train)
print('R2 regression score:', score_transform)
print("")
print("Evaluating transformed test:")
score_test = r2_score(y_test_predicted, y_test)
print('R2 regression score:', score_test)
if __name__ == "__main__":
main()
这是您的问题的解决方案,这是一个尚未在文档站点上发布的新示例:
- https://drive.google.com/drive/u/0/folders/12uzcNKU7n0EUyFzgitSt1wSaSvV4qJbs(从那里查看第二个编码套路的解决方案)
上面 link 中的示例管道代码:
from neuraxle.base import Identity
from neuraxle.steps.flow import TrainOnlyWrapper, ChooseOneStepOf
from neuraxle.steps.numpy import NumpyConcatenateInnerFeatures, NumpyShapePrinter, NumpyFlattenDatum
from neuraxle.union import FeatureUnion
pipeline = Pipeline([
TrainOnlyWrapper(NumpyShapePrinter(custom_message="Input shape before feature union")),
FeatureUnion([
Pipeline([
NumpyFFT(),
NumpyAbs(),
FeatureUnion([
NumpyFlattenDatum(), # Reshape from 3D to flat 2D: flattening data except on batch size
FFTPeakBinWithValue() # Extract 2D features from the 3D FFT bins
], joiner=NumpyConcatenateInnerFeatures())
]),
NumpyMean(),
NumpyMedian(),
NumpyMin(),
NumpyMax()
], joiner=NumpyConcatenateInnerFeatures()),
# TODO, optional: Add some feature selection right here for the motivated ones:
# https://scikit-learn.org/stable/modules/feature_selection.html
# TODO, optional: Add normalization right here (if using other classifiers)
# https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.normalize.html
TrainOnlyWrapper(NumpyShapePrinter(custom_message="Shape after feature union, before classification")),
# Shape: [batch_size, remade_features]
ChooseOneStepOf([
decision_tree_classifier,
# extra_tree_classifier, # TODO
# ridge_classifier, # TODO
logistic_regression,
# random_forest_classifier # TODO
]),
TrainOnlyWrapper(NumpyShapePrinter(custom_message="Shape at output after classification")),
# Shape: [batch_size]
Identity()
])
然后做 AutoML:
from neuraxle.metaopt.auto_ml import AutoML, InMemoryHyperparamsRepository, validation_splitter, \
RandomSearchHyperparameterSelectionStrategy
from neuraxle.metaopt.callbacks import ScoringCallback
from sklearn.metrics import accuracy_score
auto_ml = AutoML(
pipeline=pipeline,
hyperparams_optimizer=RandomSearchHyperparameterSelectionStrategy(),
validation_split_function=validation_splitter(test_size=0.20),
scoring_callback=ScoringCallback(accuracy_score, higher_score_is_better=False),
n_trials=7,
epochs=1,
hyperparams_repository=InMemoryHyperparamsRepository(cache_folder=cache_folder),
refit_trial=True,
)
此示例也在 Neuraxio 的清洁机器学习培训中进行了研究: