为 H2O 模型绘制决策边界 python

draw decision boundaries for H2O model python

我想在 Python 中绘制 H20 随机森林模型的决策边界,如下所示:

到目前为止我找到的所有示例都是用 scikit learn 完成的。

要绘制 H2O 模型的决策边界,您需要使用 matplotlib。要使用 matplotlib,您需要在绘图之前将 H2O 预测转换为 numpy 数组或 pandas 数据帧。这是二维二元分类问题的示例:

import h2o
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from h2o.estimators.random_forest import H2ORandomForestEstimator

h2o.init()
# import the data into H2O frame
hf = h2o.import_file('data.csv')

# Convert the target into a factor for classification
hf[:,-1] = hf[:,-1].asfactor()

# Split the data into train/test
hf_train, hf_test = hf.split_frame(ratios=[0.75])

# columns used for the training
X_cols = hf_train.col_names[:-1]

# last column is the target
y_col = hf_train.col_names[-1]

# Random Forest classifier
rf_clf = H2ORandomForestEstimator(ntrees=10)
rf_clf.train(X_cols, y_col, training_frame=hf_train, validation_frame=hf_test)
y_pred = rf_clf.predict(test_data=hf_test[:,X_cols])

# Convert to pandas df and create a mesh
df = hf.as_data_frame()
x1_min, x1_max = df.ix[:, 0].min() - .5, df.ix[:, 0].max() + .5
x2_min, x2_max = df.ix[:, 1].min() - .5, df.ix[:, 1].max() + .5
xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, 0.02), 
                       np.arange(x2_min, x2_max, 0.02))

# predict the mesh values using H2O Random Forest and convert back to pandas df
Z = (rf_clf.predict(h2o.H2OFrame(np.c_[xx1.ravel(), xx2.ravel()]))).as_data_frame()
# reshape back to a 2d grid
zz = Z['p1'].values.reshape(xx1.shape)

# Plot the results
cm_scatt = ListedColormap(['b', 'r'])
fig = plt.figure(figsize=(12, 9))
cm_bright = ListedColormap(['b', 'g'])
# decision boundary
plt.contourf(xx1, xx2, zz, cmap='jet', alpha=.8)

# scatter plot of the full dataset
plt.scatter(df.ix[:, 0], df.ix[:, 1], c=df.ix[:, 2], cmap=cm_scatt,
                   edgecolors='k')
# Annotate with a model score
plt.text(xx1.max(), xx2.min(), round(rf_clf.r2(), 2), horizontalalignment='right', 
         color='w', fontsize=18)

# shutdown H2O cluster
h2o.cluster().shutdown()