如何计算 scipy 中散点图的相关系数
how to calculate correlation coefficient for a scatter-plot in scipy
我正在尝试用 scipy 计算散点图的相关系数,问题是,我在 ndarray 中有一种复杂的数据集,基本语法对我不起作用...
这是我的完整代码:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.patches as mpatches
from matplotlib.pyplot import figure
figure(figsize=(12, 10), dpi=200)
import scipy.stats
from scipy.stats import t
plt.close('all')
data = np.array([
[22.8, 14.4],
[19.6, 3.6],
[0.3, 16.6],
[8.9, 7],
[13.7, 13.4],
[14.7, 1.5],
[1.9, 0.4],
[-1.8, 0.3],
[-3, -15.3],
[-5.9, -6.3],
[-13.4, -15],
[-5.7, -34.8],
[-6.8, -12.9],
])
custom_annotations = ["K464E", "K472E", "R470E", "K464A", "M155E", "K472A", "M155A", "Q539A", "M155R", "D244A", "E247A", "E247R", "D244K"]
class_colours = ["r", "r", "r", "r", "r", "r", "g", "g", "b", "b", "b", "b", "b"]
for i, point in enumerate(data):
plt.scatter(point[0], point[1], marker='o', label=custom_annotations[i], c=class_colours[i], edgecolors='black', linewidths=1, alpha=0.75)
plt.annotate(custom_annotations[i], (data[i,0], data[i,1]))
plt.xlabel(r'$\Delta V_{0.5}$ Apo wild-type mHCN2 (mV)', fontsize=10)
plt.ylabel(r'$\Delta \psi$ cAMP-bound wild-type mHCN2 (mV)', fontsize=10)
plt.title('$\Delta \psi$ cAMP-bound wild-type mHCN2 (HHU) vs Change in relative current (Jena)', fontsize=10)
plt.axvline(0, c=(.5, .5, .5), ls= '--')
plt.axhline(0, c=(.5, .5, .5), ls= '--')
scipy.stats.pearsonr(data[i,0], data[i,1])
plt.legend(ncol=3, loc=(1.04,0))
plt.show()
pearsonr
适用于您的数据
scipy.stats.pearsonr(data[:,0], data[:,1]) #change i to : to get the whole col.
# this returns (r_coeff, p_value)
如错误所述,您传递了两个浮点数(即第 i
行的值),但是 corr
需要两个数组,在您的例子中是两列。
我正在尝试用 scipy 计算散点图的相关系数,问题是,我在 ndarray 中有一种复杂的数据集,基本语法对我不起作用...
这是我的完整代码:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.patches as mpatches
from matplotlib.pyplot import figure
figure(figsize=(12, 10), dpi=200)
import scipy.stats
from scipy.stats import t
plt.close('all')
data = np.array([
[22.8, 14.4],
[19.6, 3.6],
[0.3, 16.6],
[8.9, 7],
[13.7, 13.4],
[14.7, 1.5],
[1.9, 0.4],
[-1.8, 0.3],
[-3, -15.3],
[-5.9, -6.3],
[-13.4, -15],
[-5.7, -34.8],
[-6.8, -12.9],
])
custom_annotations = ["K464E", "K472E", "R470E", "K464A", "M155E", "K472A", "M155A", "Q539A", "M155R", "D244A", "E247A", "E247R", "D244K"]
class_colours = ["r", "r", "r", "r", "r", "r", "g", "g", "b", "b", "b", "b", "b"]
for i, point in enumerate(data):
plt.scatter(point[0], point[1], marker='o', label=custom_annotations[i], c=class_colours[i], edgecolors='black', linewidths=1, alpha=0.75)
plt.annotate(custom_annotations[i], (data[i,0], data[i,1]))
plt.xlabel(r'$\Delta V_{0.5}$ Apo wild-type mHCN2 (mV)', fontsize=10)
plt.ylabel(r'$\Delta \psi$ cAMP-bound wild-type mHCN2 (mV)', fontsize=10)
plt.title('$\Delta \psi$ cAMP-bound wild-type mHCN2 (HHU) vs Change in relative current (Jena)', fontsize=10)
plt.axvline(0, c=(.5, .5, .5), ls= '--')
plt.axhline(0, c=(.5, .5, .5), ls= '--')
scipy.stats.pearsonr(data[i,0], data[i,1])
plt.legend(ncol=3, loc=(1.04,0))
plt.show()
pearsonr
适用于您的数据
scipy.stats.pearsonr(data[:,0], data[:,1]) #change i to : to get the whole col.
# this returns (r_coeff, p_value)
如错误所述,您传递了两个浮点数(即第 i
行的值),但是 corr
需要两个数组,在您的例子中是两列。