matplotlib plot_surface 用于二维多元线性回归

matplotlib plot_surface for 2-dimensional multiple linear regression

我有很多具有三个维度的数据点:x1、x2 和 y。我能够计算这些点的多元线性回归,并且能够在 3D 散点图上显示这些点,但我不知道如何绘制我为这些点计算的多元线性回归:相同你可以在 1D 线性回归中绘制一条最适合的线,我有兴趣为 2D 线性回归绘制 "plane of best fit"。

我的代码如下:

import numpy as np
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt

# collect data into numpy arrays
X = []
Y = []
for line in open('data_2d.csv'):
    x1, x2, y = line.split(',')
    X.append([1, float(x1), float(x2)]) # here X[i][0] represents x0 = 1
    Y.append(float(y))
X = np.array(X)
Y = np.array(Y)

# calculate weights
w = np.linalg.solve(np.dot(X.T,X), np.dot(X.T, Y))
Yhat = np.dot(X, w) # results of linear regression for data points

# plot results
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(X[:,1], X[:,2], Y)
ax.plot_surface(X[:,1], X[:,2], Yhat) # doesn't seem to work
plt.show()

这是我拼凑的一个简单示例,演示了 3D 散点图、3D 曲面图和等高线图。

import numpy, scipy
import matplotlib
from mpl_toolkits.mplot3d import  Axes3D
from matplotlib import cm # to colormap 3D surfaces from blue to red
import matplotlib.pyplot as plt

graphWidth = 800 # units are pixels
graphHeight = 600 # units are pixels

# 3D contour plot lines
numberOfContourLines = 16


def SurfacePlot(equationFunc, data):
    f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)

    matplotlib.pyplot.grid(True)
    axes = Axes3D(f)

    x_data = data[0]
    y_data = data[1]
    z_data = data[2]

    xModel = numpy.linspace(min(x_data), max(x_data), 20)
    yModel = numpy.linspace(min(y_data), max(y_data), 20)
    X, Y = numpy.meshgrid(xModel, yModel)

    Z = equationFunc(numpy.array([X, Y]))

    axes.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.coolwarm, linewidth=1, antialiased=True)

    axes.scatter(x_data, y_data, z_data) # show data along with plotted surface

    axes.set_title('Surface Plot (click-drag with mouse)') # add a title for surface plot
    axes.set_xlabel('X Data') # X axis data label
    axes.set_ylabel('Y Data') # Y axis data label
    axes.set_zlabel('Z Data') # Z axis data label

    plt.show()
    plt.close('all') # clean up after using pyplot or else thaere can be memory and process problems


def ContourPlot(equationFunc, data):
    f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
    axes = f.add_subplot(111)

    x_data = data[0]
    y_data = data[1]
    z_data = data[2]

    xModel = numpy.linspace(min(x_data), max(x_data), 20)
    yModel = numpy.linspace(min(y_data), max(y_data), 20)
    X, Y = numpy.meshgrid(xModel, yModel)

    Z = equationFunc(numpy.array([X, Y]))

    axes.plot(x_data, y_data, 'o')

    axes.set_title('Contour Plot') # add a title for contour plot
    axes.set_xlabel('X Data') # X axis data label
    axes.set_ylabel('Y Data') # Y axis data label

    CS = matplotlib.pyplot.contour(X, Y, Z, numberOfContourLines, colors='k')
    matplotlib.pyplot.clabel(CS, inline=1, fontsize=10) # labels for contours

    plt.show()
    plt.close('all') # clean up after using pyplot or else thaere can be memory and process problems


def ScatterPlot(data):
    f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)

    matplotlib.pyplot.grid(True)
    axes = Axes3D(f)
    x_data = data[0]
    y_data = data[1]
    z_data = data[2]

    axes.scatter(x_data, y_data, z_data)

    axes.set_title('Scatter Plot (click-drag with mouse)')
    axes.set_xlabel('X Data')
    axes.set_ylabel('Y Data')
    axes.set_zlabel('Z Data')

    plt.show()
    plt.close('all') # clean up after using pyplot or else thaere can be memory and process problems


def EquationFunc(data):
    return 5.0 + numpy.sqrt(data[0]) + numpy.cos(data[1] / 5.0)


if __name__ == "__main__":
    xData = numpy.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0])
    yData = numpy.array([11.0, 12.1, 13.0, 14.1, 15.0, 16.1, 17.0, 18.1, 90.0])
    zData = numpy.array([1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.0, 9.9])

    data = [xData, yData, zData]

    ScatterPlot(data)
    SurfacePlot(EquationFunc, data)
    ContourPlot(EquationFunc, data)

原来 plot_surface 要求它的每个输入都是坐标矩阵而不是值列表,就像我之前使用的那样。我的解决方法如下:

import numpy as np
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt

# collect data into numpy arrays
X = []
Y = []
for line in open('data_2d.csv'): # contains 3 columns: x1, x2, and y
    x1, x2, y = line.split(',')
    X.append([1, float(x1), float(x2)]) # here X[i][0] represents x0 = 1
    Y.append(float(y))
X = np.array(X)
Y = np.array(Y)

# calculate weights for computing solutions
w = np.linalg.solve(X.T.dot(X), X.T.dot(Y))

# calculate r-squared error given weights
Yhat = X.dot(w)
d1 = Y - Yhat
d2 = Y - Y.mean()
r2 = 1 - d1.dot(d1) / d2.dot(d2)
print("r-squared value of", r2)

# calculate plane of best fit
divs = 2 # number of divisions in surface: generates divs^2 points.
         # The surface is a plane, so just 2^2 = 4 points can define it.
# plane spans all values of x1 and x2 from data
x1_range = np.linspace(min(X[:,1]),max(X[:,1]),divs)
x2_range = np.linspace(min(X[:,2]),max(X[:,2]),divs)
X_plane = []
for i in range(divs):
    for j in range(divs):
        X_plane.append([1, x1_range[i], x2_range[j]])
X_plane = np.array(X_plane)
# values of y are equal to the linear regression of points on the plane
Yhat2 = X_plane.dot(w)

# rearrange Yhat2 into a coordinate matrix for display as a surface
Yhat2_surface = []
for i in range(divs):
    Yhat2_surface.append(Yhat2[ divs*i : divs*i+divs ])
Yhat2_surface = np.array(Yhat2_surface)
Yhat2 = Yhat2_surface

# generate coordinate matrices for x1 and x2 values
X2, X1 = np.meshgrid(x1_range, x2_range) # intentional ordering: X2, *then* X1

# plot results
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(X[:,1], X[:,2], Y) # supplied data
ax.plot_surface(X1, X2, Yhat2, color='y', alpha=0.1) # plane of best fit
plt.show()

The output is shown here.圆点代表输入数据,黄色矩形代表其最佳拟合平面,用plot_surface.

绘制