如何在 python 中使用 matplotlib 创建曼哈顿图?
How to create a Manhattan plot with matplotlib in python?
很遗憾,我自己还没有找到解决办法。如何使用例如 matplotlib / pandas 在 python 中创建 Manhattan plot。问题是在这些图中 x 轴是离散的。
from pandas import DataFrame
from scipy.stats import uniform
from scipy.stats import randint
import numpy as np
# some sample data
df = DataFrame({'gene' : ['gene-%i' % i for i in np.arange(1000)],
'pvalue' : uniform.rvs(size=1000),
'chromosome' : ['ch-%i' % i for i in randint.rvs(0,12,size=1000)]})
# -log_10(pvalue)
df['minuslog10pvalue'] = -np.log10(df.pvalue)
df = df.sort_values('chromosome')
# How to plot gene vs. -log10(pvalue) and colour it by chromosome?
import matplotlib.pyplot as plt
from numpy.random import randn, random_sample
g = random_sample(int(1e5))*10 # uniform random values between 0 and 10
p = abs(randn(int(1e5))) # abs of normally distributed data
"""
plot g vs p in groups with different colors
colors are cycled automatically by matplotlib
use another colormap or define own colors for a different cycle
"""
for i in range(1,11):
plt.plot(g[abs(g-i)<1], p[abs(g-i)<1], ls='', marker='.')
plt.show()
您也可以查看 this script,它似乎为您的问题提供了完整的解决方案。
你可以这样使用:
from pandas import DataFrame
from scipy.stats import uniform
from scipy.stats import randint
import numpy as np
import matplotlib.pyplot as plt
# some sample data
df = DataFrame({'gene' : ['gene-%i' % i for i in np.arange(10000)],
'pvalue' : uniform.rvs(size=10000),
'chromosome' : ['ch-%i' % i for i in randint.rvs(0,12,size=10000)]})
# -log_10(pvalue)
df['minuslog10pvalue'] = -np.log10(df.pvalue)
df.chromosome = df.chromosome.astype('category')
df.chromosome = df.chromosome.cat.set_categories(['ch-%i' % i for i in range(12)], ordered=True)
df = df.sort_values('chromosome')
# How to plot gene vs. -log10(pvalue) and colour it by chromosome?
df['ind'] = range(len(df))
df_grouped = df.groupby(('chromosome'))
fig = plt.figure()
ax = fig.add_subplot(111)
colors = ['red','green','blue', 'yellow']
x_labels = []
x_labels_pos = []
for num, (name, group) in enumerate(df_grouped):
group.plot(kind='scatter', x='ind', y='minuslog10pvalue',color=colors[num % len(colors)], ax=ax)
x_labels.append(name)
x_labels_pos.append((group['ind'].iloc[-1] - (group['ind'].iloc[-1] - group['ind'].iloc[0])/2))
ax.set_xticks(x_labels_pos)
ax.set_xticklabels(x_labels)
ax.set_xlim([0, len(df)])
ax.set_ylim([0, 3.5])
ax.set_xlabel('Chromosome')
我刚刚创建了一个额外的 运行 索引列来控制 x 标签位置。
你也可以使用 seaborn,这会让事情变得更容易和更可控。
import numpy as np
import pandas as pd
import seaborn as sns
from scipy.stats import uniform, randint
# Simulate DataFrame
df = pd.DataFrame({
'rsid' : ['rs{}'.format(i) for i in np.arange(10000)],
'chrom' : [i for i in randint.rvs(1,23+1,size=10000)],
'pos' : [i for i in randint.rvs(0,10**5,size=10000)],
'pval' : uniform.rvs(size=10000)})
df['-logp'] = -np.log10(df.pval); df = df.sort_values(['chrom','pos'])
df.reset_index(inplace=True, drop=True); df['i'] = df.index
# Generate Manhattan plot: (#optional tweaks for relplot: linewidth=0, s=9)
plot = sns.relplot(data=df, x='i', y='-logp', aspect=3.7,
hue='chrom', palette = 'bright', legend=None)
chrom_df=df.groupby('chrom')['i'].median()
plot.ax.set_xlabel('chrom'); plot.ax.set_xticks(chrom_df);
plot.ax.set_xticklabels(chrom_df.index)
plot.fig.suptitle('Manhattan plot');
我在寻找使用 Python 制作漂亮的曼哈顿地块的方法时在这里遇到了其他答案,但最终使用了这种 seaborn 方法。您还可以查看此笔记本(= 不是我的)以获得更多灵感:
https://github.com/mojones/video_notebooks/blob/master/Manhattan%20plots%20in%20Python.ipynb
很遗憾,我自己还没有找到解决办法。如何使用例如 matplotlib / pandas 在 python 中创建 Manhattan plot。问题是在这些图中 x 轴是离散的。
from pandas import DataFrame
from scipy.stats import uniform
from scipy.stats import randint
import numpy as np
# some sample data
df = DataFrame({'gene' : ['gene-%i' % i for i in np.arange(1000)],
'pvalue' : uniform.rvs(size=1000),
'chromosome' : ['ch-%i' % i for i in randint.rvs(0,12,size=1000)]})
# -log_10(pvalue)
df['minuslog10pvalue'] = -np.log10(df.pvalue)
df = df.sort_values('chromosome')
# How to plot gene vs. -log10(pvalue) and colour it by chromosome?
import matplotlib.pyplot as plt
from numpy.random import randn, random_sample
g = random_sample(int(1e5))*10 # uniform random values between 0 and 10
p = abs(randn(int(1e5))) # abs of normally distributed data
"""
plot g vs p in groups with different colors
colors are cycled automatically by matplotlib
use another colormap or define own colors for a different cycle
"""
for i in range(1,11):
plt.plot(g[abs(g-i)<1], p[abs(g-i)<1], ls='', marker='.')
plt.show()
您也可以查看 this script,它似乎为您的问题提供了完整的解决方案。
你可以这样使用:
from pandas import DataFrame
from scipy.stats import uniform
from scipy.stats import randint
import numpy as np
import matplotlib.pyplot as plt
# some sample data
df = DataFrame({'gene' : ['gene-%i' % i for i in np.arange(10000)],
'pvalue' : uniform.rvs(size=10000),
'chromosome' : ['ch-%i' % i for i in randint.rvs(0,12,size=10000)]})
# -log_10(pvalue)
df['minuslog10pvalue'] = -np.log10(df.pvalue)
df.chromosome = df.chromosome.astype('category')
df.chromosome = df.chromosome.cat.set_categories(['ch-%i' % i for i in range(12)], ordered=True)
df = df.sort_values('chromosome')
# How to plot gene vs. -log10(pvalue) and colour it by chromosome?
df['ind'] = range(len(df))
df_grouped = df.groupby(('chromosome'))
fig = plt.figure()
ax = fig.add_subplot(111)
colors = ['red','green','blue', 'yellow']
x_labels = []
x_labels_pos = []
for num, (name, group) in enumerate(df_grouped):
group.plot(kind='scatter', x='ind', y='minuslog10pvalue',color=colors[num % len(colors)], ax=ax)
x_labels.append(name)
x_labels_pos.append((group['ind'].iloc[-1] - (group['ind'].iloc[-1] - group['ind'].iloc[0])/2))
ax.set_xticks(x_labels_pos)
ax.set_xticklabels(x_labels)
ax.set_xlim([0, len(df)])
ax.set_ylim([0, 3.5])
ax.set_xlabel('Chromosome')
我刚刚创建了一个额外的 运行 索引列来控制 x 标签位置。
你也可以使用 seaborn,这会让事情变得更容易和更可控。
import numpy as np
import pandas as pd
import seaborn as sns
from scipy.stats import uniform, randint
# Simulate DataFrame
df = pd.DataFrame({
'rsid' : ['rs{}'.format(i) for i in np.arange(10000)],
'chrom' : [i for i in randint.rvs(1,23+1,size=10000)],
'pos' : [i for i in randint.rvs(0,10**5,size=10000)],
'pval' : uniform.rvs(size=10000)})
df['-logp'] = -np.log10(df.pval); df = df.sort_values(['chrom','pos'])
df.reset_index(inplace=True, drop=True); df['i'] = df.index
# Generate Manhattan plot: (#optional tweaks for relplot: linewidth=0, s=9)
plot = sns.relplot(data=df, x='i', y='-logp', aspect=3.7,
hue='chrom', palette = 'bright', legend=None)
chrom_df=df.groupby('chrom')['i'].median()
plot.ax.set_xlabel('chrom'); plot.ax.set_xticks(chrom_df);
plot.ax.set_xticklabels(chrom_df.index)
plot.fig.suptitle('Manhattan plot');
我在寻找使用 Python 制作漂亮的曼哈顿地块的方法时在这里遇到了其他答案,但最终使用了这种 seaborn 方法。您还可以查看此笔记本(= 不是我的)以获得更多灵感:
https://github.com/mojones/video_notebooks/blob/master/Manhattan%20plots%20in%20Python.ipynb