为清楚起见,在散点图中使用 "bin" 水平微调点
Horizontally microadjust points with a "bin" in a scatter plot for clarity
我有一个看起来像这样的散点图:
| x
2 | x
| o
| o x
1 | x o
| o x o
| x x
|________________
foo bar baz
代码类似于:
data = pd.read_csv("data", index_col = [0,1,2,3,4])
variable_x = data.xs("var_x", level = 0)
a_list = ["a1", "a2", "a3", "a4", "a5"]
b_list = variable_x.index.get_level_values(1).unique().to_list()
c_list = variable_x.index.get_level_values(2).unique().to_list()
colours = {"a1" : "r",
"a2" : "g",
"a3" : "b",
"a4" : "c",
"a5" : "k"
}
markers = {"b1" : "x",
"b2" : "o",
"b3" : "D",
"b4" : "X",
"b5" : "*"
}
fig, axs = plt.subplots(1, 3, sharey = True)
ax = axs[0]
for a in a_list:
color = colours[a]
for b in b_list:
marker = markers[b]
for c in c_list:
vals = variable_x.loc[:, a_list, :, :, :].xs(b, level = 1).xs(c, level = 2)
for val in vals:
ax.scatter(c, val, color = color, marker = marker, s = 5)
抱歉,如果我的伪代码不太有意义,我可能从我的实际代码中错误地转移了它。
实际数据的点数较多,所以点的竖线比较杂乱,不易区分。有没有办法调整 x
和 o
等的水平位置,使它们相距一小段距离但仍在正确的“bin”内?
我在使用以下辅助函数之前做过类似的事情:
def raw_data_scatter(array, xcenter, spread):
y = array
x = np.random.uniform(0,(spread/2), size=len(y))
half = int(len(y)/2)
for i in range(half):
x[i] *= -1
np.random.shuffle(x)
x += xcenter
return x,y
给定一个 y 值数组和一个以它们为中心的 x 点,它会在 x 方向上生成噪声以进行绘图。它是随机的(所以点不会根据它们的密度分布),但它很简单而且我认为看起来仍然不错。这是一个例子:
df = pd.DataFrame({'foo':np.random.randint(1,100,20),
'bar':np.random.randint(25,125,20),
'baz':np.random.randint(10,60,20)})
fig, ax = plt.subplots()
ax.set_xticks(range(len(df.columns)))
ax.set_xticklabels(df.columns)
for i, col in enumerate(df.columns):
x, y = raw_data_scatter(df[col], xcenter=i, spread=.16)
ax.scatter(x, y)
从另一个答案中得到灵感,我的最终方法是这样的
data = pd.read_csv("data", index_col = [0,1,2,3,4])
variable_x = data.xs("var_x", level = 0)
a_list = ["a1", "a2", "a3", "a4", "a5"]
b_list = variable_x.index.get_level_values(1).unique().to_list()
c_list = variable_x.index.get_level_values(2).unique().to_list()
colours = {"a1" : "r",
"a2" : "g",
"a3" : "b",
"a4" : "c",
"a5" : "k"
}
markers = {"b1" : "x",
"b2" : "o",
"b3" : "D",
"b4" : "X",
"b5" : "*"
}
fig, axs = plt.subplots(1, 3, sharey = True)
ax = axs[0]
offset_scale = .14
for a_num, a in enumerate(a_list):
offset = (- len(a_list)/2 + a_num) * offset_scale
color = colours[a]
for b in b_list:
marker = markers[b]
for c_num, c in enumerate(c_list):
vals = variable_x.loc[:, a_list, :, :, :].xs(b, level = 1).xs(c, level = 2)
for val in vals:
ax.scatter(c_num + offset, val, color = color, marker = marker, s = 5)
ax.set_xticks(range(len(c_list)))
ax.set_xticklabels(c_list)
我的绘图区域(省略轴)如下所示:
我有一个看起来像这样的散点图:
| x
2 | x
| o
| o x
1 | x o
| o x o
| x x
|________________
foo bar baz
代码类似于:
data = pd.read_csv("data", index_col = [0,1,2,3,4])
variable_x = data.xs("var_x", level = 0)
a_list = ["a1", "a2", "a3", "a4", "a5"]
b_list = variable_x.index.get_level_values(1).unique().to_list()
c_list = variable_x.index.get_level_values(2).unique().to_list()
colours = {"a1" : "r",
"a2" : "g",
"a3" : "b",
"a4" : "c",
"a5" : "k"
}
markers = {"b1" : "x",
"b2" : "o",
"b3" : "D",
"b4" : "X",
"b5" : "*"
}
fig, axs = plt.subplots(1, 3, sharey = True)
ax = axs[0]
for a in a_list:
color = colours[a]
for b in b_list:
marker = markers[b]
for c in c_list:
vals = variable_x.loc[:, a_list, :, :, :].xs(b, level = 1).xs(c, level = 2)
for val in vals:
ax.scatter(c, val, color = color, marker = marker, s = 5)
抱歉,如果我的伪代码不太有意义,我可能从我的实际代码中错误地转移了它。
实际数据的点数较多,所以点的竖线比较杂乱,不易区分。有没有办法调整 x
和 o
等的水平位置,使它们相距一小段距离但仍在正确的“bin”内?
我在使用以下辅助函数之前做过类似的事情:
def raw_data_scatter(array, xcenter, spread):
y = array
x = np.random.uniform(0,(spread/2), size=len(y))
half = int(len(y)/2)
for i in range(half):
x[i] *= -1
np.random.shuffle(x)
x += xcenter
return x,y
给定一个 y 值数组和一个以它们为中心的 x 点,它会在 x 方向上生成噪声以进行绘图。它是随机的(所以点不会根据它们的密度分布),但它很简单而且我认为看起来仍然不错。这是一个例子:
df = pd.DataFrame({'foo':np.random.randint(1,100,20),
'bar':np.random.randint(25,125,20),
'baz':np.random.randint(10,60,20)})
fig, ax = plt.subplots()
ax.set_xticks(range(len(df.columns)))
ax.set_xticklabels(df.columns)
for i, col in enumerate(df.columns):
x, y = raw_data_scatter(df[col], xcenter=i, spread=.16)
ax.scatter(x, y)
从另一个答案中得到灵感,我的最终方法是这样的
data = pd.read_csv("data", index_col = [0,1,2,3,4])
variable_x = data.xs("var_x", level = 0)
a_list = ["a1", "a2", "a3", "a4", "a5"]
b_list = variable_x.index.get_level_values(1).unique().to_list()
c_list = variable_x.index.get_level_values(2).unique().to_list()
colours = {"a1" : "r",
"a2" : "g",
"a3" : "b",
"a4" : "c",
"a5" : "k"
}
markers = {"b1" : "x",
"b2" : "o",
"b3" : "D",
"b4" : "X",
"b5" : "*"
}
fig, axs = plt.subplots(1, 3, sharey = True)
ax = axs[0]
offset_scale = .14
for a_num, a in enumerate(a_list):
offset = (- len(a_list)/2 + a_num) * offset_scale
color = colours[a]
for b in b_list:
marker = markers[b]
for c_num, c in enumerate(c_list):
vals = variable_x.loc[:, a_list, :, :, :].xs(b, level = 1).xs(c, level = 2)
for val in vals:
ax.scatter(c_num + offset, val, color = color, marker = marker, s = 5)
ax.set_xticks(range(len(c_list)))
ax.set_xticklabels(c_list)
我的绘图区域(省略轴)如下所示: