如何从 python 中的列表创建多个子类?
How to create multiple subclasses from a list in python?
我试图以分层方式获取数据,因此我决定转向 subclasses。我从中获取数据的文件格式如下:
2WQZ_chain_A
Score = 338.0
53-164
208-317
327-595
611-654
2WQZ_chain_B
Score = 344.0
53-164
205-317
327-595
611-655
2XB6_chain_A
Score = 319.0
64-163
211-317
327-596
613-654
2XB6_chain_B
Score = 329.0
53-163
212-317
327-596
613-654
而我想要获得的是第一个 class 称为 PDB 名称(即:2WQZ),子 class 称为 chain_A、chain_B等等。这些 subclasses 应该包含一个名为“score”的对象和第三个 subclass 名为“intervals”的对象,其中包含可能的间隔。总体思路类似于 this.
目前我尝试使用字典但最终得到了正确的 PDB class,但只有第二个链,我的代码是
class PDB(object):
def __init__(self, pdbname):
self.pdbid = pdbname
class Chain(PDB):
def __init__(self, chainame, score, pdbname):
self.chainid = chainame
self.score = score
super().__init__(pdbname)
making_class = open("covered_intervals.txt", "r").readlines()
pdblist = []
for i in making_class:
if "chain" in i:
pdblist.append(i[:4])
pdblist = list(dict.fromkeys(pdblist))
pdblist2 = dict.fromkeys(pdblist)
for i in pdblist:
pdblist2[i] = PDB(i)
for j in making_class:
if i in j:
chainame = j[5:12]
pdblist2[i] = Chain(chainame, 4, i)
4 是一个占位符,我知道为什么我只得到最后一个链,但不知道如何在同一个 PDB 下得到两个链。
首先我建议创建一些东西,它可以将文件中的一个文本块解析为可用的变量,例如:
def parse_block(lines):
pdb_name = lines[0][:4]
chain = lines[0][5:]
score = lines[1].split("=")[1].strip()
intervals = lines[2:]
return (pdb_name, chain, score, intervals)
使用它,您可以构建您的 类,或使用嵌套字典,这也能很好地适应数据结构。
from collections import defaultdict
with open("pdbdata", "r") as f:
content = f.read()
pdb_dict = defaultdict(dict)
for block in content.split("\n\n"):
pdb_name, chain, score, intervals = parse_block(block.splitlines())
pdb_dict[pdb_name][chain] = {"score": score, "intervals": intervals}
生成的嵌套字典如下所示:
{'2WQZ': {'chain_A': {'intervals': ['53-164', '208-317', '327-595', '611-654'],
'score': '338.0'},
'chain_B': {'intervals': ['53-164', '205-317', '327-595', '611-655'],
'score': '344.0'}},
'2XB6': {'chain_A': {'intervals': ['64-163', '211-317', '327-596', '613-654'],
'score': '319.0'},
'chain_B': {'intervals': ['53-163', '212-317', '327-596', '613-654'],
'score': '329.0'}}}
在这种情况下,可以为top节点创建一个字典,并且由于是固定深度的树,所以不需要class嵌套。链 Class 将包含三个组件
- 连锁店名称
- 得分
- 范围列表 - 我已经为范围
实施了class
class Chain():
def __init__(self, chainame, score=None):
self.chainid = chainame
self.score = score
self.ranges=[]
def add_range(self, range):
self.ranges.append(range)
def add_score(self, score):
self.score = score
class range1():
def __init__(self, str):
x = str.split("-")
self.start = int(x[0])
self.end = int(x[1])
counter = 0
pdb = ""
ch = None
data = {}
with open("covered_intervals.txt", "r") as f:
line = f.readline()
while line:
line = line.strip()
if line.strip()=="":
counter=0
x = data.get(pdb, [])
x.append(ch)
data[pdb] = x
elif counter==0:
x = line.split("_", 1)
pdb = x[0]
chainname = x[1]
ch = Chain(chainname)
counter = counter +1
elif counter==1:
ch.add_score(float(line.split("=")[1]))
counter = counter +1
else:
ch.add_range(range1(line))
line = f.readline()
if counter != 0:
x = data.get(pdb, [])
x.append(ch)
data[pdb] = x
我试图以分层方式获取数据,因此我决定转向 subclasses。我从中获取数据的文件格式如下:
2WQZ_chain_A
Score = 338.0
53-164
208-317
327-595
611-654
2WQZ_chain_B
Score = 344.0
53-164
205-317
327-595
611-655
2XB6_chain_A
Score = 319.0
64-163
211-317
327-596
613-654
2XB6_chain_B
Score = 329.0
53-163
212-317
327-596
613-654
而我想要获得的是第一个 class 称为 PDB 名称(即:2WQZ),子 class 称为 chain_A、chain_B等等。这些 subclasses 应该包含一个名为“score”的对象和第三个 subclass 名为“intervals”的对象,其中包含可能的间隔。总体思路类似于 this.
目前我尝试使用字典但最终得到了正确的 PDB class,但只有第二个链,我的代码是
class PDB(object):
def __init__(self, pdbname):
self.pdbid = pdbname
class Chain(PDB):
def __init__(self, chainame, score, pdbname):
self.chainid = chainame
self.score = score
super().__init__(pdbname)
making_class = open("covered_intervals.txt", "r").readlines()
pdblist = []
for i in making_class:
if "chain" in i:
pdblist.append(i[:4])
pdblist = list(dict.fromkeys(pdblist))
pdblist2 = dict.fromkeys(pdblist)
for i in pdblist:
pdblist2[i] = PDB(i)
for j in making_class:
if i in j:
chainame = j[5:12]
pdblist2[i] = Chain(chainame, 4, i)
4 是一个占位符,我知道为什么我只得到最后一个链,但不知道如何在同一个 PDB 下得到两个链。
首先我建议创建一些东西,它可以将文件中的一个文本块解析为可用的变量,例如:
def parse_block(lines):
pdb_name = lines[0][:4]
chain = lines[0][5:]
score = lines[1].split("=")[1].strip()
intervals = lines[2:]
return (pdb_name, chain, score, intervals)
使用它,您可以构建您的 类,或使用嵌套字典,这也能很好地适应数据结构。
from collections import defaultdict
with open("pdbdata", "r") as f:
content = f.read()
pdb_dict = defaultdict(dict)
for block in content.split("\n\n"):
pdb_name, chain, score, intervals = parse_block(block.splitlines())
pdb_dict[pdb_name][chain] = {"score": score, "intervals": intervals}
生成的嵌套字典如下所示:
{'2WQZ': {'chain_A': {'intervals': ['53-164', '208-317', '327-595', '611-654'],
'score': '338.0'},
'chain_B': {'intervals': ['53-164', '205-317', '327-595', '611-655'],
'score': '344.0'}},
'2XB6': {'chain_A': {'intervals': ['64-163', '211-317', '327-596', '613-654'],
'score': '319.0'},
'chain_B': {'intervals': ['53-163', '212-317', '327-596', '613-654'],
'score': '329.0'}}}
在这种情况下,可以为top节点创建一个字典,并且由于是固定深度的树,所以不需要class嵌套。链 Class 将包含三个组件
- 连锁店名称
- 得分
- 范围列表 - 我已经为范围 实施了class
class Chain():
def __init__(self, chainame, score=None):
self.chainid = chainame
self.score = score
self.ranges=[]
def add_range(self, range):
self.ranges.append(range)
def add_score(self, score):
self.score = score
class range1():
def __init__(self, str):
x = str.split("-")
self.start = int(x[0])
self.end = int(x[1])
counter = 0
pdb = ""
ch = None
data = {}
with open("covered_intervals.txt", "r") as f:
line = f.readline()
while line:
line = line.strip()
if line.strip()=="":
counter=0
x = data.get(pdb, [])
x.append(ch)
data[pdb] = x
elif counter==0:
x = line.split("_", 1)
pdb = x[0]
chainname = x[1]
ch = Chain(chainname)
counter = counter +1
elif counter==1:
ch.add_score(float(line.split("=")[1]))
counter = counter +1
else:
ch.add_range(range1(line))
line = f.readline()
if counter != 0:
x = data.get(pdb, [])
x.append(ch)
data[pdb] = x