从 PDB 文件中仅提取我们需要的链
Extracting only the chains that we need from a PDB file
我需要从 PDB 文件中提取特定的链(有时不止一个链)。 How to extract chains from a PDB file?。这是同一个问题和 "marked" 答案,回答了我的问题。但它在python 3中不起作用。它一个接一个地给出错误。有人知道我如何在 python 3 中工作吗?
或同类问题的任何其他代码
提前致谢。
import os
from Bio import PDB
class ChainSplitter:
def __init__(self, out_dir=None):
""" Create parsing and writing objects, specify output directory. """
self.parser = PDB.PDBParser()
self.writer = PDB.PDBIO()
if out_dir is None:
out_dir = os.path.join(os.getcwd(), "chain_PDBs")
self.out_dir = out_dir
def make_pdb(self, pdb_path, chain_letters, overwrite=False, struct=None):
""" Create a new PDB file containing only the specified chains.
Returns the path to the created file.
:param pdb_path: full path to the crystal structure
:param chain_letters: iterable of chain characters (case insensitive)
:param overwrite: write over the output file if it exists
"""
chain_letters = [chain.upper() for chain in chain_letters]
# Input/output files
(pdb_dir, pdb_fn) = os.path.split(pdb_path)
pdb_id = pdb_fn[3:7]
out_name = "pdb%s_%s.ent" % (pdb_id, "".join(chain_letters))
out_path = os.path.join(self.out_dir, out_name)
print ("OUT PATH:",out_path)
plural = "s" if (len(chain_letters) > 1) else "" # for printing
# Skip PDB generation if the file already exists
if (not overwrite) and (os.path.isfile(out_path)):
print("Chain%s %s of '%s' already extracted to '%s'." %
(plural, ", ".join(chain_letters), pdb_id, out_name))
return out_path
print("Extracting chain%s %s from %s..." % (plural,
", ".join(chain_letters), pdb_fn))
# Get structure, write new file with only given chains
if struct is None:
struct = self.parser.get_structure(pdb_id, pdb_path)
self.writer.set_structure(struct)
self.writer.save(out_path, select=SelectChains(chain_letters))
return out_path
class SelectChains(PDB.Select):
""" Only accept the specified chains when saving. """
def __init__(self, chain_letters):
self.chain_letters = chain_letters
def accept_chain(self, chain):
return (chain.get_id() in self.chain_letters)
if __name__ == "__main__":
""" Parses PDB id's desired chains, and creates new PDB structures. """
import sys
if not len(sys.argv) == 2:
print ("Usage: $ python %s 'pdb.txt'" % __file__)
sys.exit()
pdb_textfn = sys.argv[1]
pdbList = PDB.PDBList()
splitter = ChainSplitter("/home/patrick/Desktop/chain_splitting")
with open(pdb_textfn) as pdb_textfile:
for line in pdb_textfile:
pdb_id = line[:4].lower()
chain = line[4]
pdb_fn = pdbList.retrieve_pdb_file(pdb_id)
splitter.make_pdb(pdb_fn, chain)
retrieve_pdb_file
有可选参数 file_format
。当没有提供信息时,PDB 服务器 returns cif 文件。 Biopython 的解析器需要一个 PDB 文件。
您可以将行更改为
pdbList.retrieve_pdb_file(pdb_id, file_format='pdb')
你应该得到一个 PDB 文件,剩下的代码贯穿其中..
我需要从 PDB 文件中提取特定的链(有时不止一个链)。 How to extract chains from a PDB file?。这是同一个问题和 "marked" 答案,回答了我的问题。但它在python 3中不起作用。它一个接一个地给出错误。有人知道我如何在 python 3 中工作吗?
或同类问题的任何其他代码
提前致谢。
import os
from Bio import PDB
class ChainSplitter:
def __init__(self, out_dir=None):
""" Create parsing and writing objects, specify output directory. """
self.parser = PDB.PDBParser()
self.writer = PDB.PDBIO()
if out_dir is None:
out_dir = os.path.join(os.getcwd(), "chain_PDBs")
self.out_dir = out_dir
def make_pdb(self, pdb_path, chain_letters, overwrite=False, struct=None):
""" Create a new PDB file containing only the specified chains.
Returns the path to the created file.
:param pdb_path: full path to the crystal structure
:param chain_letters: iterable of chain characters (case insensitive)
:param overwrite: write over the output file if it exists
"""
chain_letters = [chain.upper() for chain in chain_letters]
# Input/output files
(pdb_dir, pdb_fn) = os.path.split(pdb_path)
pdb_id = pdb_fn[3:7]
out_name = "pdb%s_%s.ent" % (pdb_id, "".join(chain_letters))
out_path = os.path.join(self.out_dir, out_name)
print ("OUT PATH:",out_path)
plural = "s" if (len(chain_letters) > 1) else "" # for printing
# Skip PDB generation if the file already exists
if (not overwrite) and (os.path.isfile(out_path)):
print("Chain%s %s of '%s' already extracted to '%s'." %
(plural, ", ".join(chain_letters), pdb_id, out_name))
return out_path
print("Extracting chain%s %s from %s..." % (plural,
", ".join(chain_letters), pdb_fn))
# Get structure, write new file with only given chains
if struct is None:
struct = self.parser.get_structure(pdb_id, pdb_path)
self.writer.set_structure(struct)
self.writer.save(out_path, select=SelectChains(chain_letters))
return out_path
class SelectChains(PDB.Select):
""" Only accept the specified chains when saving. """
def __init__(self, chain_letters):
self.chain_letters = chain_letters
def accept_chain(self, chain):
return (chain.get_id() in self.chain_letters)
if __name__ == "__main__":
""" Parses PDB id's desired chains, and creates new PDB structures. """
import sys
if not len(sys.argv) == 2:
print ("Usage: $ python %s 'pdb.txt'" % __file__)
sys.exit()
pdb_textfn = sys.argv[1]
pdbList = PDB.PDBList()
splitter = ChainSplitter("/home/patrick/Desktop/chain_splitting")
with open(pdb_textfn) as pdb_textfile:
for line in pdb_textfile:
pdb_id = line[:4].lower()
chain = line[4]
pdb_fn = pdbList.retrieve_pdb_file(pdb_id)
splitter.make_pdb(pdb_fn, chain)
retrieve_pdb_file
有可选参数 file_format
。当没有提供信息时,PDB 服务器 returns cif 文件。 Biopython 的解析器需要一个 PDB 文件。
您可以将行更改为
pdbList.retrieve_pdb_file(pdb_id, file_format='pdb')
你应该得到一个 PDB 文件,剩下的代码贯穿其中..