使用 matplotlib 绘制缩放和旋转的双变量分布

Question

我正在尝试 plot bivariate gaussian distribution 使用 matplotlib。我想使用两个 scatter 点（A 组）、（B 组）的 xy 坐标来执行此操作。

我想通过调整 COV matrix 来调整 distribution 以说明每个组 velocity 及其与附加 xy 坐标的距离用作参考点。

我计算了每组xy坐标到参考点坐标的距离。距离表示为radius，标记为[GrA_Rad]，[GrB_Rad]。

所以他们离参考点越远，radius就越大。我还计算了标记为 [GrA_Vel]、[GrB_Vel] 的 velocity。每组的direction表示为orientation。这被标记为 [GrA_Rotation]、[GrB_Rotation]

关于distribution如何针对velocity和距离(radius)进行调整的问题：

我希望使用 SVD。具体来说，如果我有每个 scatter 的 rotation angle，这将提供 direction。 velocity可以用来描述一个scalingmatrix[GrA_Scaling],[GrB_Scaling]。所以这个scalingmatrix可以用来扩大x-direction中的radius，收缩y-direction中的radius。这表示COVmatrix。

最后，distribution mean 值是通过将组 location (x,y) 平移 velocity.

的一半得到的

简单地说：radius应用于每个组的scatter点。 COV矩阵由radius和velocity调整。因此，使用 scaling matrix 扩展 x-direction 中的 radius 并收缩 y-direction 中的 radius。 direction 是从 rotation angle 测得的。然后通过将组位置 (x,y) 平移 velocity.

的一半来确定 distribution mean 值

下面是这些变量的df

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.animation as animation

d = ({
    'Time' : [1,2,3,4,5,6,7,8],       
    'GrA_X' : [10,12,17,16,16,14,12,8],                 
    'GrA_Y' : [10,12,13,7,6,7,8,8], 
    'GrB_X' : [5,8,13,16,19,15,13,5],                 
    'GrB_Y' : [6,15,12,7,8,9,10,8],   
    'Reference_X' : [6,8,14,18,13,11,16,15],                 
    'Reference_Y' : [10,12,8,12,15,12,10,8],                  
    'GrA_Rad' : [8.3,8.25,8.2,8,8.15,8.15,8.2,8.3],  
    'GrB_Rad' : [8.3,8.25,8.3,8.4,8.6,8.4,8.3,8.65],               
    'GrA_Vel' : [0,2.8,5.1,6.1,1.0,2.2,2.2,4.0],
    'GrB_Vel' : [0,9.5,5.8,5.8,3.16,4.12,2.2,8.2],               
    'GrA_Scaling' : [0,0.22,0.39,0.47,0.07,0.17,0.17,0.31],
    'GrB_Scaling' : [0,0.53,0.2,0.2,0.06,0.1,0.03,0.4],                   
    'GrA_Rotation' : [0,45,23.2,-26.56,-33.69,-36.86,-45,-135], 
    'GrB_Rotation' : [0,71.6,36.87,5.2,8.13,16.70,26.57,90],                       
     })

df = pd.DataFrame(data = d)

我已经为每个 xy 坐标制作了一个 animated plot。

GrA_X = [10,12,17,16,16,14,12,8]
GrA_Y = [10,12,13,7,6,7,8,8]

GrB_X = [5,8,13,16,19,15,13,5]                 
GrB_Y = [6,15,12,10,8,9,10,8]

Item_X = [6,8,14,18,13,11,16,15]  
Item_Y = [10,12,8,12,15,12,10,8]

scatter_GrA = ax.scatter(GrA_X, GrA_Y) 
scatter_GrB = ax.scatter(GrB_X, GrB_Y) 
scatter_Item = ax.scatter(Item_X, Item_Y) 

def animate(i) :
    scatter_GrA.set_offsets([[GrA_X[0+i], GrA_Y[0+i]]])
    scatter_GrB.set_offsets([[GrB_X[0+i], GrB_Y[0+i]]])
    scatter_Item.set_offsets([[Item_X[0+i], Item_Y[0+i]]])    

ani = animation.FuncAnimation(fig, animate, np.arange(0,9),
                              interval = 1000, blit = False)

Answer 1

更新

问题已更新，并且变得更加清晰。我已经更新了我的代码以匹配。这是最新的输出：

除了样式，我认为这与 OP 描述的相符。

这是用于生成上述图的代码：

dfake = ({    
    'GrA_X' : [15,15],                 
    'GrA_Y' : [15,15], 
    'Reference_X' : [15,3],                 
    'Reference_Y' : [15,15],                  
    'GrA_Rad' : [15,25],                 
    'GrA_Vel' : [0,10],
    'GrA_Scaling' : [0,0.5],
    'GrA_Rotation' : [0,45]                     
})

dffake = pd.DataFrame(dfake)
fig,axs = plt.subplots(1, 2, figsize=(16,8))
fig.subplots_adjust(0,0,1,1)
plotone(dffake, 'A', 0, xlim=(0,30), ylim=(0,30), fig=fig, ax=axs[0])
plotone(dffake, 'A', 1, xlim=(0,30), ylim=(0,30), fig=fig, ax=axs[1])
plt.show()

我使用的 plotone 函数的完整实现在下面的代码块中。如果您只想了解用于生成和转换二维高斯 PDF 的数学，请查看 mvpdf 函数（以及它所依赖的 rot 和 getcov 函数）：

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as sts

def rot(theta):
    theta = np.deg2rad(theta)
    return np.array([
        [np.cos(theta), -np.sin(theta)],
        [np.sin(theta), np.cos(theta)]
    ])

def getcov(radius=1, scale=1, theta=0):
    cov = np.array([
        [radius*(scale + 1), 0],
        [0, radius/(scale + 1)]
    ])

    r = rot(theta)
    return r @ cov @ r.T

def mvpdf(x, y, xlim, ylim, radius=1, velocity=0, scale=0, theta=0):
    """Creates a grid of data that represents the PDF of a multivariate gaussian.

    x, y: The center of the returned PDF
    (xy)lim: The extent of the returned PDF
    radius: The PDF will be dilated by this factor
    scale: The PDF be stretched by a factor of (scale + 1) in the x direction, and squashed by a factor of 1/(scale + 1) in the y direction
    theta: The PDF will be rotated by this many degrees

    returns: X, Y, PDF. X and Y hold the coordinates of the PDF.
    """
    # create the coordinate grids
    X,Y = np.meshgrid(np.linspace(*xlim), np.linspace(*ylim))

    # stack them into the format expected by the multivariate pdf
    XY = np.stack([X, Y], 2)

    # displace xy by half the velocity
    x,y = rot(theta) @ (velocity/2, 0) + (x, y)

    # get the covariance matrix with the appropriate transforms
    cov = getcov(radius=radius, scale=scale, theta=theta)

    # generate the data grid that represents the PDF
    PDF = sts.multivariate_normal([x, y], cov).pdf(XY)

    return X, Y, PDF

def plotmv(x, y, xlim=None, ylim=None, radius=1, velocity=0, scale=0, theta=0, xref=None, yref=None, fig=None, ax=None):
    """Plot an xy point with an appropriately tranformed 2D gaussian around it.
    Also plots other related data like the reference point.
    """
    if xlim is None: xlim = (x - 5, x + 5)
    if ylim is None: ylim = (y - 5, y + 5)

    if fig is None:
        fig = plt.figure(figsize=(8,8))
        ax = fig.gca()
    elif ax is None:
        ax = fig.gca()

    # plot the xy point
    ax.plot(x, y, '.', c='C0', ms=20)

    if not (xref is None or yref is None):
        # plot the reference point, if supplied
        ax.plot(xref, yref, '.', c='w', ms=12)

    # plot the arrow leading from the xy point
    if velocity > 0:
        ax.arrow(x, y, *rot(theta) @ (velocity, 0), 
                 width=.4, length_includes_head=True, ec='C0', fc='C0')

    # fetch the PDF of the 2D gaussian
    X, Y, PDF = mvpdf(x, y, xlim=xlim, ylim=ylim, radius=radius, velocity=velocity, scale=scale, theta=theta)

    # normalize PDF by shifting and scaling, so that the smallest value is 0 and the largest is 1
    normPDF = PDF - PDF.min()
    normPDF = normPDF/normPDF.max()

    # plot and label the contour lines of the 2D gaussian
    cs = ax.contour(X, Y, normPDF, levels=6, colors='w', alpha=.5)
    ax.clabel(cs, fmt='%.3f', fontsize=12)

    # plot the filled contours of the 2D gaussian. Set levels high for smooth contours
    cfs = ax.contourf(X, Y, normPDF, levels=50, cmap='viridis', vmin=-.9, vmax=1)

    # create the colorbar and ensure that it goes from 0 -> 1
    cbar = fig.colorbar(cfs, ax=ax)
    cbar.set_ticks([0, .2, .4, .6, .8, 1])

    # add some labels
    ax.grid()
    ax.set_xlabel('X distance (M)')
    ax.set_ylabel('Y distance (M)')

    # ensure that x vs y scaling doesn't disrupt the transforms applied to the 2D gaussian
    ax.set_aspect('equal', 'box')

    return fig, ax

def fetchone(df, l, i, **kwargs):
    """Fetch all the needed data for one xy point
    """
    keytups = (
        ('x', 'Gr%s_X'%l),
        ('y', 'Gr%s_Y'%l),
        ('radius', 'Gr%s_Rad'%l),
        ('velocity', 'Gr%s_Vel'%l),
        ('scale', 'Gr%s_Scaling'%l),
        ('theta', 'Gr%s_Rotation'%l),
        ('xref', 'Reference_X'),
        ('yref', 'Reference_Y')
    )

    ret = {k:df.loc[i, l] for k,l in keytups}
    # add in any overrides
    ret.update(kwargs)

    return ret

def plotone(df, l, i, xlim=None, ylim=None, fig=None, ax=None, **kwargs):
    """Plot exactly one point from the dataset
    """
    # look up all the data to plot one datapoint
    xydata = fetchone(df, l, i, **kwargs)

    # do the plot
    return plotmv(xlim=xlim, ylim=ylim, fig=fig, ax=ax, **xydata)

旧答案-2

我已经调整了我的答案以匹配 OP 发布的示例：

生成上图的代码如下：

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as sts

def rot(theta):
    theta = np.deg2rad(theta)
    return np.array([
        [np.cos(theta), -np.sin(theta)],
        [np.sin(theta), np.cos(theta)]
    ])

def getcov(radius=1, scale=1, theta=0):
    cov = np.array([
        [radius*(scale + 1), 0],
        [0, radius/(scale + 1)]
    ])

    r = rot(theta)
    return r @ cov @ r.T

def datalimits(*data, pad=.15):
    dmin,dmax = min(d.min() for d in data), max(d.max() for d in data)
    spad = pad*(dmax - dmin)
    return dmin - spad, dmax + spad

d = ({
    'Time' : [1,2,3,4,5,6,7,8],       
    'GrA_X' : [10,12,17,16,16,14,12,8],                 
    'GrA_Y' : [10,12,13,7,6,7,8,8], 
    'GrB_X' : [5,8,13,16,19,15,13,5],                 
    'GrB_Y' : [6,15,12,7,8,9,10,8],   
    'Reference_X' : [6,8,14,18,13,11,16,15],                 
    'Reference_Y' : [10,12,8,12,15,12,10,8],                  
    'GrA_Rad' : [8.3,8.25,8.2,8,8.15,8.15,8.2,8.3],  
    'GrB_Rad' : [8.3,8.25,8.3,8.4,8.6,8.4,8.3,8.65],               
    'GrA_Vel' : [0,2.8,5.1,6.1,1.0,2.2,2.2,4.0],
    'GrB_Vel' : [0,9.5,5.8,5.8,3.16,4.12,2.2,8.2],               
    'GrA_Scaling' : [0,0.22,0.39,0.47,0.07,0.17,0.17,0.31],
    'GrB_Scaling' : [0,0.53,0.2,0.2,0.06,0.1,0.03,0.4],                   
    'GrA_Rotation' : [0,45,23.2,-26.56,-33.69,-36.86,-45,-135], 
    'GrB_Rotation' : [0,71.6,36.87,5.2,8.13,16.70,26.57,90],                       
     })

df = pd.DataFrame(data=d)

limitpad = .5
clevels = 5
cflevels = 50

xmin,xmax = datalimits(df['GrA_X'], df['GrB_X'], pad=limitpad)
ymin,ymax = datalimits(df['GrA_Y'], df['GrB_Y'], pad=limitpad)

X,Y = np.meshgrid(np.linspace(xmin, xmax), np.linspace(ymin, ymax))

fig = plt.figure(figsize=(10,6))
ax = plt.gca()

Zs = []
for l,color in zip('AB', ('red', 'yellow')):
    # plot all of the points from a single group
    ax.plot(df['Gr%s_X'%l], df['Gr%s_Y'%l], '.', c=color, ms=15, label=l)

    Zrows = []
    for _,row in df.iterrows():
        x,y = row['Gr%s_X'%l], row['Gr%s_Y'%l]

        cov = getcov(radius=row['Gr%s_Rad'%l], scale=row['Gr%s_Scaling'%l], theta=row['Gr%s_Rotation'%l])
        mnorm = sts.multivariate_normal([x, y], cov)
        Z = mnorm.pdf(np.stack([X, Y], 2))
        Zrows.append(Z)

    Zs.append(np.sum(Zrows, axis=0))

# plot the reference points

# create Z from the difference of the sums of the 2D Gaussians from group A and group B
Z = Zs[0] - Zs[1]

# normalize Z by shifting and scaling, so that the smallest value is 0 and the largest is 1
normZ = Z - Z.min()
normZ = normZ/normZ.max()

# plot and label the contour lines
cs = ax.contour(X, Y, normZ, levels=clevels, colors='w', alpha=.5)
ax.clabel(cs, fmt='%2.1f', colors='w')#, fontsize=14)

# plot the filled contours. Set levels high for smooth contours
cfs = ax.contourf(X, Y, normZ, levels=cflevels, cmap='viridis', vmin=0, vmax=1)
# create the colorbar and ensure that it goes from 0 -> 1
cbar = fig.colorbar(cfs, ax=ax)
cbar.set_ticks([0, .2, .4, .6, .8, 1])


ax.set_aspect('equal', 'box')

旧答案-1

很难确切地说出您在寻找什么。可以通过协方差矩阵缩放和旋转多元高斯分布。以下是如何根据您的数据执行此操作的示例：

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as sts

def rot(theta):
    theta = np.deg2rad(theta)
    return np.array([
        [np.cos(theta), -np.sin(theta)],
        [np.sin(theta), np.cos(theta)]
    ])

def getcov(scale, theta):
    cov = np.array([
        [1*(scale + 1), 0],
        [0, 1/(scale + 1)]
    ])

    r = rot(theta)
    return r @ cov @ r.T

d = ({
    'Time' : [1,2,3,4,5,6,7,8],       
    'GrA_X' : [10,12,17,16,16,14,12,8],                 
    'GrA_Y' : [10,12,13,7,6,7,8,8], 
    'GrB_X' : [5,8,13,16,19,15,13,5],                 
    'GrB_Y' : [6,15,12,7,8,9,10,8],   
    'Reference_X' : [6,8,14,18,13,11,16,15],                 
    'Reference_Y' : [10,12,8,12,15,12,10,8],                  
    'GrA_Rad' : [8.3,8.25,8.2,8,8.15,8.15,8.2,8.3],  
    'GrB_Rad' : [8.3,8.25,8.3,8.4,8.6,8.4,8.3,8.65],               
    'GrA_Vel' : [0,2.8,5.1,6.1,1.0,2.2,2.2,4.0],
    'GrB_Vel' : [0,9.5,5.8,5.8,3.16,4.12,2.2,8.2],               
    'GrA_Scaling' : [0,0.22,0.39,0.47,0.07,0.17,0.17,0.31],
    'GrB_Scaling' : [0,0.53,0.2,0.2,0.06,0.1,0.03,0.4],                   
    'GrA_Rotation' : [0,45,23.2,-26.56,-33.69,-36.86,-45,-135], 
    'GrB_Rotation' : [0,71.6,36.87,5.2,8.13,16.70,26.57,90],                       
     })

df = pd.DataFrame(data=d)
xmin,xmax = min(df['GrA_X'].min(), df['GrB_X'].min()), max(df['GrA_X'].max(), df['GrB_X'].max())
ymin,ymax = min(df['GrA_Y'].min(), df['GrB_Y'].min()), max(df['GrA_Y'].max(), df['GrB_Y'].max())

X,Y = np.meshgrid(
    np.linspace(xmin - (xmax - xmin)*.1, xmax + (xmax - xmin)*.1),
    np.linspace(ymin - (ymax - ymin)*.1, ymax + (ymax - ymin)*.1)
)

fig,axs = plt.subplots(df.shape[0], sharex=True, figsize=(4, 4*df.shape[0]))
fig.subplots_adjust(0,0,1,1,0,-.82)

for (_,row),ax in zip(df.iterrows(), axs):
    for c in 'AB':
        x,y = row['Gr%s_X'%c], row['Gr%s_Y'%c]

        cov = getcov(scale=row['Gr%s_Scaling'%c], theta=row['Gr%s_Rotation'%c])
        mnorm = sts.multivariate_normal([x, y], cov)
        Z = mnorm.pdf(np.stack([X, Y], 2))

        ax.contour(X, Y, Z)

        ax.plot(row['Gr%s_X'%c], row['Gr%s_Y'%c], 'x')
        ax.set_aspect('equal', 'box')

这输出：

使用 matplotlib 绘制缩放和旋转的双变量分布

Plot scaled and rotated bivariate distribution using matplotlib

python

gaussian

matplotlib

covariance

pandas

更新

旧答案-2

旧答案-1