PCA 使用 MDA 分析 (python3.7)
PCA using MDAanalysis (python3.7)
我开始在计算化学领域工作,我被要求对分子动力学的一些轨迹进行主成分分析。我被告知要使用 MDAnalysis 包,因此我在他们的页面上找到了一个教程并试图遵循它(当然我包括了我自己的输入)以查看它是否有效。我从来没有像这个广告那样做过分析我也是 python 编码的新手。
我附上了受教程启发的代码。但它对我不起作用,它会引发很多错误,其中一个错误是它不能接受我的输入(拓扑是 PDB 文件,坐标是 XTC 文件),但那些是在支持格式中列出的格式或其他错误是"class PCA" 未定义。
我没有从其他人那里找到太多关于使用 MDA 分析处理 PCA 的信息,因此我希望在这里我能找到曾经做过这样的事情并且可以帮助我的人。我已经尝试过相关的 subreddits,但没有结果。
from __future__ import division, absolute_import
import MDAnalysis as mda
import MDAnalysis.analysis.pca as pca
from six.moves import range
import warnings
import numpy as np
import scipy.integrate
from MDAnalysis import Universe
from MDAnalysis.analysis.align import _fit_to
from MDAnalysis.lib.log import ProgressMeter
u = mda.Universe("L22trial.pdb", "L22trial.xtc")
PCA = mda.analysis.pca.PCA
class PCA():
pca = PCA(u, select='backbone').run()
pca_space = pca.transform(u.select_atoms('backbone'))
def __init__(self, universe, select='all', align=False, mean=None,
n_components=None, **kwargs):
super(PCA, self).__init__(universe.trajectory, **kwargs)
self._u = universe
self.align = align
self._calculated = False
self.n_components = n_components
self._select = select
self._mean = mean
def _prepare(self):
self._u.trajectory[self.start]
self._reference = self._u.select_atoms(self._select)
self._atoms = self._u.select_atoms(self._select)
self._n_atoms = self._atoms.n_atoms
if self._mean is None:
self.mean = np.zeros(self._n_atoms*3)
self._calc_mean = True
else:
self.mean = self._mean.positions
self._calc_mean = False
if self.n_frames == 1:
raise ValueError('No covariance information can be gathered from a single trajectory frame.\n')
n_dim = self._n_atoms * 3
self.cov = np.zeros((n_dim, n_dim))
self._ref_atom_positions = self._reference.positions
self._ref_cog = self._reference.center_of_geometry()
self._ref_atom_positions -= self._ref_cog
if self._calc_mean:
interval = int(self.n_frames // 100)
interval = interval if interval > 0 else 1
format = ("Mean Calculation Step %(step)5d/%(numsteps)d [%(percentage)5.1f%%]")
mean_pm = ProgressMeter(self.n_frames if self.n_frames else 1, interval=interval, verbose=self._verbose, format=format)
for i, ts in enumerate(self._u.trajectory[self.start:self.stop:self.step]):
if self.align:
mobile_cog = self._atoms.center_of_geometry()
mobile_atoms, old_rmsd = _fit_to(self._atoms.positions, self._ref_atom_positions, self._atoms, mobile_com=mobile_cog, ref_com=self._ref_cog)
else:
self.mean += self._atoms.positions.ravel()
mean_pm.echo(i)
self.mean /= self.n_frames
self.mean_atoms = self._atoms
self.mean_atoms.positions = self._atoms.positions
def _single_frame(self):
if self.align:
mobile_cog = self._atoms.center_of_geometry()
mobile_atoms, old_rmsd = _fit_to(self._atoms.positions, self._ref_atom_positions, self._atoms, mobile_com=mobile_cog, ref_com=self._ref_cog)
x = mobile_atoms.positions.ravel()
else:
x = self._atoms.positions.ravel()
x -= self.mean
self.cov += np.dot(x[:, np.newaxis], x[:, np.newaxis].T)
def _conclude(self):
self.cov /= self.n_frames - 1
e_vals, e_vects = np.linalg.eig(self.cov)
sort_idx = np.argsort(e_vals)[::-1]
self.variance = e_vals[sort_idx]
self.variance = self.variance[:self.n_components]
self.p_components = e_vects[:self.n_components, sort_idx]
self.cumulated_variance = (np.cumsum(self.variance) / np.sum(self.variance))
self._calculated = True
def transform(self, atomgroup, n_components=None, start=None, stop=None, step=None):
if not self._calculated:
raise ValueError('Call run() on the PCA before using transform')
if isinstance(atomgroup, Universe):
atomgroup = atomgroup.atoms
if(self._n_atoms != atomgroup.n_atoms):
raise ValueError('PCA has been fit for {} atoms. Your atomgroup has {} atoms'.format(self._n_atoms, atomgroup.n_atoms))
if not (self._atoms.types == atomgroup.types).all():
warnings.warn('Atom types do not match with types used to fit PCA')
traj = atomgroup.universe.trajectory
start, stop, step = traj.check_slice_indices(start, stop, step)
n_frames = len(range(start, stop, step))
dim = (n_components if n_components is not None else self.p_components.shape[1])
dot = np.zeros((n_frames, dim))
for i, ts in enumerate(traj[start:stop:step]):
xyz = atomgroup.positions.ravel() - self.mean
dot[i] = np.dot(xyz, self.p_components[:, :n_components])
return dot
def cosine_content(pca_space, i):
t = np.arange(len(pca_space))
T = len(pca_space)
cos = np.cos(np.pi * t * (i + 1) / T)
return ((2.0 / T) * (scipy.integrate.simps(cos*pca_space[:, i])) ** 2 /
scipy.integrate.simps(pca_space[:, i] ** 2))
您似乎复制并粘贴了 PCA class 本身。我的猜测是您不需要这样做(我从未使用过该模块,所以这只是一个猜测)。
文档 ( https://www.mdanalysis.org/docs/documentation_pages/analysis/pca.html ) 似乎表明您唯一需要做的就是以下
import MDAnalysis as mda
import MDAnalysis.analysis.pca as pca
u = mda.Universe("L22trial.pdb", "L22trial.xtc")
mypca = pca.PCA(u, select='backbone').run()
pca_space = mypca.transform(u.select_atoms('backbone'))
如果您有一条错误消息 "No module named 'MDAnalysis.analysis.pca.PCA'; 'MDAnalysis.analysis.pca' is not a package",它就是它所说的:-)。
这意味着您的计算机上没有名为 MDAnalysis 的包。要解决此问题,您需要使用 pip install 命令或 conda(如果您使用 conda 包管理器)进行安装。看到这个linkhttps://www.mdanalysis.org/pages/installation_quick_start/
看看 link https://www.mdanalysis.org/docs/_modules/MDAnalysis/analysis/pca.html 你从中得到启发,它证实了我的第一个猜测,我认为我的回答应该允许你使用那个包。
我开始在计算化学领域工作,我被要求对分子动力学的一些轨迹进行主成分分析。我被告知要使用 MDAnalysis 包,因此我在他们的页面上找到了一个教程并试图遵循它(当然我包括了我自己的输入)以查看它是否有效。我从来没有像这个广告那样做过分析我也是 python 编码的新手。 我附上了受教程启发的代码。但它对我不起作用,它会引发很多错误,其中一个错误是它不能接受我的输入(拓扑是 PDB 文件,坐标是 XTC 文件),但那些是在支持格式中列出的格式或其他错误是"class PCA" 未定义。 我没有从其他人那里找到太多关于使用 MDA 分析处理 PCA 的信息,因此我希望在这里我能找到曾经做过这样的事情并且可以帮助我的人。我已经尝试过相关的 subreddits,但没有结果。
from __future__ import division, absolute_import
import MDAnalysis as mda
import MDAnalysis.analysis.pca as pca
from six.moves import range
import warnings
import numpy as np
import scipy.integrate
from MDAnalysis import Universe
from MDAnalysis.analysis.align import _fit_to
from MDAnalysis.lib.log import ProgressMeter
u = mda.Universe("L22trial.pdb", "L22trial.xtc")
PCA = mda.analysis.pca.PCA
class PCA():
pca = PCA(u, select='backbone').run()
pca_space = pca.transform(u.select_atoms('backbone'))
def __init__(self, universe, select='all', align=False, mean=None,
n_components=None, **kwargs):
super(PCA, self).__init__(universe.trajectory, **kwargs)
self._u = universe
self.align = align
self._calculated = False
self.n_components = n_components
self._select = select
self._mean = mean
def _prepare(self):
self._u.trajectory[self.start]
self._reference = self._u.select_atoms(self._select)
self._atoms = self._u.select_atoms(self._select)
self._n_atoms = self._atoms.n_atoms
if self._mean is None:
self.mean = np.zeros(self._n_atoms*3)
self._calc_mean = True
else:
self.mean = self._mean.positions
self._calc_mean = False
if self.n_frames == 1:
raise ValueError('No covariance information can be gathered from a single trajectory frame.\n')
n_dim = self._n_atoms * 3
self.cov = np.zeros((n_dim, n_dim))
self._ref_atom_positions = self._reference.positions
self._ref_cog = self._reference.center_of_geometry()
self._ref_atom_positions -= self._ref_cog
if self._calc_mean:
interval = int(self.n_frames // 100)
interval = interval if interval > 0 else 1
format = ("Mean Calculation Step %(step)5d/%(numsteps)d [%(percentage)5.1f%%]")
mean_pm = ProgressMeter(self.n_frames if self.n_frames else 1, interval=interval, verbose=self._verbose, format=format)
for i, ts in enumerate(self._u.trajectory[self.start:self.stop:self.step]):
if self.align:
mobile_cog = self._atoms.center_of_geometry()
mobile_atoms, old_rmsd = _fit_to(self._atoms.positions, self._ref_atom_positions, self._atoms, mobile_com=mobile_cog, ref_com=self._ref_cog)
else:
self.mean += self._atoms.positions.ravel()
mean_pm.echo(i)
self.mean /= self.n_frames
self.mean_atoms = self._atoms
self.mean_atoms.positions = self._atoms.positions
def _single_frame(self):
if self.align:
mobile_cog = self._atoms.center_of_geometry()
mobile_atoms, old_rmsd = _fit_to(self._atoms.positions, self._ref_atom_positions, self._atoms, mobile_com=mobile_cog, ref_com=self._ref_cog)
x = mobile_atoms.positions.ravel()
else:
x = self._atoms.positions.ravel()
x -= self.mean
self.cov += np.dot(x[:, np.newaxis], x[:, np.newaxis].T)
def _conclude(self):
self.cov /= self.n_frames - 1
e_vals, e_vects = np.linalg.eig(self.cov)
sort_idx = np.argsort(e_vals)[::-1]
self.variance = e_vals[sort_idx]
self.variance = self.variance[:self.n_components]
self.p_components = e_vects[:self.n_components, sort_idx]
self.cumulated_variance = (np.cumsum(self.variance) / np.sum(self.variance))
self._calculated = True
def transform(self, atomgroup, n_components=None, start=None, stop=None, step=None):
if not self._calculated:
raise ValueError('Call run() on the PCA before using transform')
if isinstance(atomgroup, Universe):
atomgroup = atomgroup.atoms
if(self._n_atoms != atomgroup.n_atoms):
raise ValueError('PCA has been fit for {} atoms. Your atomgroup has {} atoms'.format(self._n_atoms, atomgroup.n_atoms))
if not (self._atoms.types == atomgroup.types).all():
warnings.warn('Atom types do not match with types used to fit PCA')
traj = atomgroup.universe.trajectory
start, stop, step = traj.check_slice_indices(start, stop, step)
n_frames = len(range(start, stop, step))
dim = (n_components if n_components is not None else self.p_components.shape[1])
dot = np.zeros((n_frames, dim))
for i, ts in enumerate(traj[start:stop:step]):
xyz = atomgroup.positions.ravel() - self.mean
dot[i] = np.dot(xyz, self.p_components[:, :n_components])
return dot
def cosine_content(pca_space, i):
t = np.arange(len(pca_space))
T = len(pca_space)
cos = np.cos(np.pi * t * (i + 1) / T)
return ((2.0 / T) * (scipy.integrate.simps(cos*pca_space[:, i])) ** 2 /
scipy.integrate.simps(pca_space[:, i] ** 2))
您似乎复制并粘贴了 PCA class 本身。我的猜测是您不需要这样做(我从未使用过该模块,所以这只是一个猜测)。 文档 ( https://www.mdanalysis.org/docs/documentation_pages/analysis/pca.html ) 似乎表明您唯一需要做的就是以下
import MDAnalysis as mda
import MDAnalysis.analysis.pca as pca
u = mda.Universe("L22trial.pdb", "L22trial.xtc")
mypca = pca.PCA(u, select='backbone').run()
pca_space = mypca.transform(u.select_atoms('backbone'))
如果您有一条错误消息 "No module named 'MDAnalysis.analysis.pca.PCA'; 'MDAnalysis.analysis.pca' is not a package",它就是它所说的:-)。 这意味着您的计算机上没有名为 MDAnalysis 的包。要解决此问题,您需要使用 pip install 命令或 conda(如果您使用 conda 包管理器)进行安装。看到这个linkhttps://www.mdanalysis.org/pages/installation_quick_start/
看看 link https://www.mdanalysis.org/docs/_modules/MDAnalysis/analysis/pca.html 你从中得到启发,它证实了我的第一个猜测,我认为我的回答应该允许你使用那个包。