Python：计算散点图项目符号的二进制平均值

Question

我有三个相等的一维数组（A、B、C）length/size。我绘制了 B 与 A 的散点图，其中我通过 C 数组中的相应值为每个散点图项目符号着色（参见下面的代码）。

# Imports
import matplotlib.cm as cm
import matplotlib.colors as colors
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np

# Create the Arrays
A = 10 * np.random.random_sample((20, 20))
B = 10 * np.random.random_sample((20, 20))
C = 100 * np.random.random_sample((20, 20))

A = A.reshape(20*20)
B = B.reshape(20*20)
C = C.reshape(20*20)

# Create the Colormap and Define Boundaries
cmap_C = cm.jet
cmap_C.set_bad(color='white')
bounds_C =  np.arange(0, 110, 10)
norm_C = mpl.colors.BoundaryNorm(bounds_C, cmap_C.N)

# Plot the Figure
plt.figure()
plt.scatter(A, B, c=C, marker='o', s=100, cmap=cmap_C, norm=norm_C)
plt.xlim([-1, 11])
plt.ylim([-1, 11])
plt.xticks(np.arange(0, 11, 1))
plt.yticks(np.arange(0, 11, 1))
plt.xlabel('A')
plt.ylabel('B')
plt.grid()
plt.colorbar(label='Value of C')
plt.show()

图中有些项目符号重叠，看不清楚。因此，接下来我想计算并绘制图中每个 1 个整数 x 1 个整数区间内所有散点图项目符号的平均 C 值，以便每个方形网格点都用一种颜色着色（这些区间如图所示网格化）。我该怎么做？

Answer 1

不是很清楚你想做什么，但我认为你的问题在你努力工作之前有一个分析结果。颜色（C 向量）的预期平均值为 50，因为您生成了均匀分布的样本 [0, 100]。坐标也是均匀分布的，但这无关紧要。当然，每个方格都会有一些差异。

如果您需要继续练习，我会构建一个 coordinate:color 映射字典来帮助设置屏幕...

color_map = {(x, y): color for x, y, color in zip(A,B,C)}

然后您可以设置一个字典来收集每个网格的结果，并且可能通过获取坐标的 int() 值将数据放入网格的正确数据字段中

Answer 2

以下是适合我的目的的解决方案。

# Imports
import matplotlib.cm as cm
import matplotlib.colors as colors
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
from zipfile import ZipFile 

# Create the Arrays
xx = 5
yy = 5

A = 10 * np.random.random_sample((xx, yy))
B = 10 * np.random.random_sample((xx, yy))
C = 100 * np.random.random_sample((xx, yy))

A = A.reshape(xx*yy)
B = B.reshape(xx*yy)
C = C.reshape(xx*yy)

color_map = {(x, y): color for x, y, color in zip(A,B,C)}

xedges = np.arange(11)
yedges = np.arange(11)
H, xedges, yedges = np.histogram2d(A, B, bins=(xedges, yedges))
HT = H.T

ca = np.asarray(list(color_map))
print(ca)

cai = ca.astype(int)
print(cai)

# Extracting all dictionary values using loop + keys() 
res = [] 
for key in color_map.keys() : 
    res.append(color_map[key]) 

res = np.asarray(res)
resi = res.astype(int)
print(resi)

BMC = np.zeros([10, 10])

for i in np.arange(len(resi)):
    BMC[cai[i,1],cai[i,0]] = BMC[cai[i,1],cai[i,0]] + resi[i]
    print(cai[i])
    print(resi[i])
    print(BMC[cai[i,1],cai[i,0]])

print(HT)
print(BMC)
BMC = BMC/HT
print(BMC)

# Create the Colormap and Define Boundaries
cmap_C = cm.jet
cmap_C.set_bad(color='white')
bounds_C =  np.arange(-5, 115, 10)
norm_C = mpl.colors.BoundaryNorm(bounds_C, cmap_C.N)

cmap_hist2d = cm.CMRmap_r
cmap_hist2d.set_bad(color='white')
bounds_hist2d =  np.arange(-0.5, 4.5, 1)
norm_hist2d = mpl.colors.BoundaryNorm(bounds_hist2d, cmap_hist2d.N)

cmap_C = cm.jet
cmap_C.set_bad(color='white')
BMC_plot = np.ma.array ( BMC, mask=np.isnan(BMC))     # Mask NaN
bounds_C =  np.arange(-5, 115, 10)
norm_C = mpl.colors.BoundaryNorm(bounds_C, cmap_C.N)

plt.subplot(311)
plt.scatter(A, B, c=C, marker='o', s=100, cmap=cmap_C, norm=norm_C)
plt.xlim([-1, 11])
plt.ylim([-1, 11])
plt.xticks(np.arange(0, 11, 1))
plt.yticks(np.arange(0, 11, 1))
plt.ylabel('B')
plt.grid()
plt.colorbar(label='Value of C', ticks=[0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100])

plt.subplot(312)
x, y = np.meshgrid(xedges, yedges)
plt.pcolor(x, y, HT, cmap=cmap_hist2d, norm=norm_hist2d)
plt.xlim([-1, 11])
plt.ylim([-1, 11])
plt.xticks(np.arange(0, 11, 1))
plt.yticks(np.arange(0, 11, 1))
plt.ylabel('B')
plt.grid()
plt.colorbar(label='Number of Data in Bin', ticks=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])

plt.subplot(313)
plt.pcolor(x, y, BMC_plot, cmap=cmap_C, norm=norm_C)
plt.xlim([-1, 11])
plt.ylim([-1, 11])
plt.xticks(np.arange(0, 11, 1))
plt.yticks(np.arange(0, 11, 1))
plt.xlabel('A')
plt.ylabel('B')
plt.grid()
plt.colorbar(label='Bin-Mean C Value', ticks=[0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100])
plt.show()

Python：计算散点图项目符号的二进制平均值

Python: Compute Bin-Mean Value of Scatter Plot Bullets

mean

binning

python-3.x