
Does anyone know how to put a stopwatch before and after training each model to evaluate which one is faster?

我创建了贷款风险预测 python 机器学习模型来预测借款人是否能够支付银行贷款。我的模型运行良好,准确率为 78%。然而我的教授告诉我“ 在训练每个模型之前和之后放一个秒表来评估哪个模型更快,甚至更好,在速度和准确性之间取得最好的权衡(我们想要快速和准确的模型)。 “,但我不知道如何在模型中添加秒表。我在互联网上搜索过这个东西,我没有得到任何关于如何在模型中放置秒表的信息。如果有人知道如何在模型中放置秒表,请告诉我在训练每个模型后


# Importing the Libraries
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.preprocessing import StandardScaler
import seaborn as sns
sns.set(style="white", color_codes=True)

# Importimg the dataset and displaying first 10 values 
data = pd.read_csv("credit_train.csv")

# Find null values

# Drop null records
data = data.dropna(axis=0)

#To get basic information and statistics

# Check number of unique values
data["Home Ownership"].unique()
data["Home Ownership"].value_counts()

# Data Representation
sns.FacetGrid(data,hue="Loan Status",size=4) \
.map(plt.scatter,"Current Loan Amount","Monthly Debt") \

# Categorical attributes visualization
sns.countplot(x="Loan Status",data=data)
sns.countplot(x="Years in current job",data=data)
sns.countplot(x="Home Ownership",data=data)
sns.countplot(x="Loan Status",hue="Home Ownership",data=data)
sns.countplot(x="Loan Status",hue="Term",data=data)

# Numerical attributes visualization
sns.distplot(data['Current Loan Amount'])
sns.distplot(data['Annual Income'])
sns.distplot(data['Credit Score'])
sns.distplot(data['Monthly Debt'])
sns.distplot(data['Current Credit Balance'])

#Normalization and log transformation 
data['Current Loan Amount Log'] = np.log(data['Current Loan Amount']+1)
sns.distplot(data["Current Loan Amount Log"])
data['Credit Score Log'] = np.log(data['Credit Score']+1)
sns.distplot(data["Credit Score Log"])
data['Annual Income Log'] = np.log(data['Annual Income']+1)
sns.distplot(data["Annual Income Log"])
data['Monthly Debt Log'] = np.log(data['Monthly Debt']+1)
sns.distplot(data["Monthly Debt Log"])
data['Current Credit Balance Log'] = np.log(data['Current Credit Balance']+1)
sns.distplot(data["Current Credit Balance Log"])

# Drop unnecessary columns
data = data.drop(['Loan ID', 'Customer ID', "Current Loan Amount", "Credit Score", "Annual Income", 'Years in current job', 'Current Credit Balance', 'Purpose', 'Monthly Debt'], axis=1)

# Correlation Matrix of the columns given below
cols = ['Credit Score Log','Annual Income Log','Monthly Debt Log',
        'Current Credit Balance Log','Current Credit Balance Log','Current Loan Amount Log','Tax Liens','Years of Credit History', 'Maximum Open Credit']
f, ax = plt.subplots(figsize=(15, 10))
cm = np.corrcoef(df.values.T)
hm = sns.heatmap(cm, cbar=True, annot=True, square=True, fmt='.2f', annot_kws={'size': 15}, yticklabels=cols, xticklabels=cols)

# Label Encoding
from sklearn.preprocessing import LabelEncoder
cols = ['Loan Status',"Term","Home Ownership"]
le = LabelEncoder()
for col in cols:
    data[col] = le.fit_transform(data[col])

# data slicing
x = data.drop(columns=['Loan Status'], axis=1)
y = data['Loan Status']

# Train-Test Split
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=0)

# Random forest model
# Importing libraries and classes
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier()

# Find accuracy in training and testing model

# Predict the value of test dataset
predicted = model.predict(x_test)

# Generating Report
from sklearn import metrics
print(metrics.classification_report(y_test, predicted))

# Confusion Matrix
print(metrics.confusion_matrix(y_test, predicted))
from time import time

t_bef = time()
t_aft = time()

print("function took", t_aft-t_bef, "seconds")                # stmt1
print("function took", (t_aft-t_bef)*1000, "microseconds")    # stmt2

您可以通过使用 time.time() 函数读取上下文中代码前后的时间来复制计时器。请注意,可能需要像 stmt2 中那样更改时间计算以获得精度。