如何将四个图放在同一个子图上

How to put four plots on the same subplot

我使用以下代码为四个分类器创建我想要的图:

我有一个包含两列的 dfmean 是 mda 重要性,std 是特征重要性的 std

import numpy as np 
import pandas as pd
from matplotlib import pyplot as plt

from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split

from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier

'''Function to use to calculate importances'''  

def featImpMDA(clf,X,y,n_splits=5):
# feat importance based on OOS score reduction
    from sklearn.metrics import log_loss
    from sklearn.model_selection._split import KFold
    cvGen=KFold(n_splits=n_splits)
    scr0,scr1=pd.Series(),pd.DataFrame(columns=X.columns)
    for i,(train,test) in enumerate(cvGen.split(X=X)):
        X0,y0=X.iloc[train,:],y.iloc[train]
        X1,y1=X.iloc[test,:],y.iloc[test]
        fit=clf.fit(X=X0,y=y0) # the fit occurs here
        prob=fit.predict_proba(X1) # prediction before shuffling
        scr0.loc[i]=-log_loss(y1,prob,labels=clf.classes_)
        for j in X.columns:
            X1_=X1.copy(deep=True)
            np.random.shuffle(X1_[j].values) # shuffle one column
            prob=fit.predict_proba(X1_) # prediction after shuffling
            scr1.loc[i,j]=-log_loss(y1,prob,labels=clf.classes_)
    imp=(-1*scr1).add(scr0,axis=0)
    imp=imp/(-1*scr1)
    imp=pd.concat({'mean':imp.mean(),
                   'std':imp.std()*imp.shape[0]**-.5},axis=1) # CLT
    return imp

'''Import data''' 

data = load_breast_cancer()
X, y = data.data, data.target
X = pd.DataFrame(X, columns = [data.feature_names])

X_train, X_test, y_train, y_test = train_test_split(X, pd.DataFrame(y), random_state=42)

'''Calculate importances'''

clf = RandomForestClassifier(n_estimators=100, random_state=42)
bc_rf = featImpMDA(clf,X_train,y_train,n_splits=5)

clf = ExtraTreesClassifier(n_estimators=100, random_state=42)
bc_et = featImpMDA(clf,X_train,y_train,n_splits=5)

clf = XGBClassifier(n_estimators=100, random_state=42)
bc_xgb = featImpMDA(clf,X_train,y_train,n_splits=5)

clf = DecisionTreeClassifier(random_state=42)
bc_dt = featImpMDA(clf,X_train,y_train,n_splits=5)

'''Plot 1 RF'''

bc_rf.sort_values(by='mean', ascending=False, inplace=True)
bc_rf = bc_rf.iloc[:30,:]
bc_rf.sort_values(by='mean', ascending=True, inplace=True)
plt.figure(figsize=(3,10))
ax = bc_rf['mean'].plot(kind='barh',color='b', alpha=.40, xerr=bc_rf['std'],
                         error_kw={'ecolor':'r'})
plt.title('{} full {} feature importances '.format('US', 'RF'), fontsize=10)
plt.xlabel("Permutation Importance (MDA)")   

'''Plot 2 ET'''

bc_et.sort_values(by='mean', ascending=False, inplace=True)
bc_et = bc_et.iloc[:30,:]
bc_et.sort_values(by='mean', ascending=True, inplace=True)
plt.figure(figsize=(3,10))
ax = bc_et['mean'].plot(kind='barh',color='b', alpha=.40, xerr=bc_et['std'],
                         error_kw={'ecolor':'r'})
plt.title('{} full {} feature importances '.format('US', 'ET'), fontsize=10)
plt.xlabel("Permutation Importance (MDA)")   

'''Plot 3 XGB'''

bc_xgb.sort_values(by='mean', ascending=False, inplace=True)
bc_xgb = bc_xgb.iloc[:30,:]
bc_xgb.sort_values(by='mean', ascending=True, inplace=True)
plt.figure(figsize=(3,10))
ax = bc_xgb['mean'].plot(kind='barh',color='b', alpha=.40, xerr=bc_xgb['std'],
                         error_kw={'ecolor':'r'})
plt.title('{} full {} feature importances '.format('US', 'XGB'), fontsize=10)
plt.xlabel("Permutation Importance (MDA)")   

'''Plot 4 DT'''

bc_dt.sort_values(by='mean', ascending=False, inplace=True)
bc_dt = bc_dt.iloc[:30,:]
bc_dt.sort_values(by='mean', ascending=True, inplace=True)
plt.figure(figsize=(3,10))
ax = bc_dt['mean'].plot(kind='barh',color='b', alpha=.40, xerr=bc_dt['std'],
                         error_kw={'ecolor':'r'})
plt.title('{} full {} feature importances '.format('US', 'DT'), fontsize=10)
plt.xlabel("Permutation Importance (MDA)")  

但是,我有兴趣为 4 个分类器创建一个包含 4 个图的子图。我不确定在这里做什么。任何帮助将不胜感激!!

这是一个最小的工作示例,它产生 4 个子图。我没有更改任何格式,但这样做很容易。希望这就是你想要的!

import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from matplotlib.gridspec import GridSpec

from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split

from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier

'''Function to use to calculate importances'''

def featImpMDA(clf,X,y,n_splits=5):
# feat importance based on OOS score reduction
    from sklearn.metrics import log_loss
    from sklearn.model_selection._split import KFold
    cvGen=KFold(n_splits=n_splits)
    scr0,scr1=pd.Series(),pd.DataFrame(columns=X.columns)
    for i,(train,test) in enumerate(cvGen.split(X=X)):
        X0,y0=X.iloc[train,:],y.iloc[train]
        X1,y1=X.iloc[test,:],y.iloc[test]
        fit=clf.fit(X=X0,y=y0) # the fit occurs here
        prob=fit.predict_proba(X1) # prediction before shuffling
        scr0.loc[i]=-log_loss(y1,prob,labels=clf.classes_)
        for j in X.columns:
            X1_=X1.copy(deep=True)
            np.random.shuffle(X1_[j].values) # shuffle one column
            prob=fit.predict_proba(X1_) # prediction after shuffling
            scr1.loc[i,j]=-log_loss(y1,prob,labels=clf.classes_)
    imp=(-1*scr1).add(scr0,axis=0)
    imp=imp/(-1*scr1)
    imp=pd.concat({'mean':imp.mean(),
                   'std':imp.std()*imp.shape[0]**-.5},axis=1) # CLT
    return imp

'''Import data'''

data = load_breast_cancer()
X, y = data.data, data.target
X = pd.DataFrame(X, columns = [data.feature_names])

X_train, X_test, y_train, y_test = train_test_split(X, pd.DataFrame(y), random_state=42)

'''Calculate importances'''

clf = RandomForestClassifier(n_estimators=100, random_state=42)
bc_rf = featImpMDA(clf,X_train,y_train,n_splits=5)

clf = ExtraTreesClassifier(n_estimators=100, random_state=42)
bc_et = featImpMDA(clf,X_train,y_train,n_splits=5)

clf = XGBClassifier(n_estimators=100, random_state=42)
bc_xgb = featImpMDA(clf,X_train,y_train,n_splits=5)

clf = DecisionTreeClassifier(random_state=42)
bc_dt = featImpMDA(clf,X_train,y_train,n_splits=5)

# Set up a grid of axes objects
gs1 = GridSpec(2, 2)

# Get axes objects which you can then use to plot
ax1 = plt.subplot(gs1[0, 0])
ax2 = plt.subplot(gs1[0, 1], sharex=ax1, sharey=ax1)
ax3 = plt.subplot(gs1[1, 0], sharex=ax1, sharey=ax1)
ax4 = plt.subplot(gs1[1, 1], sharex=ax1, sharey=ax1)

'''Plot 1 RF'''

bc_rf.sort_values(by='mean', ascending=False, inplace=True)
bc_rf = bc_rf.iloc[:30,:]
bc_rf.sort_values(by='mean', ascending=True, inplace=True)
ax = bc_rf['mean'].plot(kind='barh',color='b', alpha=.40, xerr=bc_rf['std'], error_kw={'ecolor':'r'}, ax=ax1)
ax1.set_title('{} full {} feature importances '.format('US', 'RF'), fontsize=10)
ax1.set_xlabel("Permutation Importance (MDA)")

'''Plot 2 ET'''

bc_et.sort_values(by='mean', ascending=False, inplace=True)
bc_et = bc_et.iloc[:30,:]
bc_et.sort_values(by='mean', ascending=True, inplace=True)
ax = bc_et['mean'].plot(kind='barh',color='b', alpha=.40, xerr=bc_et['std'],
                         error_kw={'ecolor':'r'}, ax=ax2)
ax2.set_title('{} full {} feature importances '.format('US', 'ET'), fontsize=10)
ax2.set_xlabel("Permutation Importance (MDA)")

'''Plot 3 XGB'''

bc_xgb.sort_values(by='mean', ascending=False, inplace=True)
bc_xgb = bc_xgb.iloc[:30,:]
bc_xgb.sort_values(by='mean', ascending=True, inplace=True)
ax = bc_xgb['mean'].plot(kind='barh',color='b', alpha=.40, xerr=bc_xgb['std'],
                         error_kw={'ecolor':'r'}, ax=ax3)
ax3.set_title('{} full {} feature importances '.format('US', 'XGB'), fontsize=10)
ax3.set_xlabel("Permutation Importance (MDA)")

'''Plot 4 DT'''

bc_dt.sort_values(by='mean', ascending=False, inplace=True)
bc_dt = bc_dt.iloc[:30,:]
bc_dt.sort_values(by='mean', ascending=True, inplace=True)
ax = bc_dt['mean'].plot(kind='barh',color='b', alpha=.40, xerr=bc_dt['std'],
                         error_kw={'ecolor':'r'}, ax=ax4)
ax4.set_title('{} full {} feature importances '.format('US', 'DT'), fontsize=10)
ax4.set_xlabel("Permutation Importance (MDA)")
plt.show()

主要区别在于,您在这里使用 matplotlib 轴来生成实际绘图,而不是 pandas 数据框。您仍然需要从数据框中获取数据,然后将其作为输入传递给 ax1.plot() 或 ax1.scatter() 例如。