在 Biojava 中计算氨基酸的物理化学性质
Calculating physico-chemical properties of amino acids in Biojava
我需要使用 BioJava 计算我从 UNIPROT 获得的这个蛋白质序列中 polar/non-polar、aliphatic/aromatic/heterocyclic 氨基酸的数量和百分比。
我在 BioJava 教程中找到了如何读取 Fasta 文件并实现了这段代码。但是我不知道如何解决这个问题。
如果你有什么想法请帮助我。
也许有一些我可以查看的来源。
这是代码。
package biojava.biojava_project;
import java.net.URL;
import org.biojava.nbio.core.sequence.ProteinSequence;
import org.biojava.nbio.core.sequence.io.FastaReaderHelper;
public class BioSeq {
// Inserting the sequence from UNIPROT
public static ProteinSequence getSequenceForId(String uniProtId) throws Exception {
URL uniprotFasta = new URL(String.format("https://rest.uniprot.org/uniprotkb/P31574.fasta", uniProtId));
ProteinSequence seq = FastaReaderHelper.readFastaProteinSequence(uniprotFasta.openStream()).get(uniProtId);
System.out.printf("id : P31574", uniProtId, seq, System.getProperty("line.separator"), seq.getOriginalHeader());
System.out.println();
return seq;
}
public static void main(String[] args) {
try {
System.out.println(getSequenceForId("P31574"));
} catch (Exception e) {
e.printStackTrace();
}
}
}
我不知道 BioJava 是否将这些属性存储在任何地方。但是手动列出所有氨基酸及其属性很容易。然后遍历序列并计算满足 属性 的那些。这是极性的示例:
import java.io.InputStream;
import java.net.URL;
import java.util.Set;
import org.biojava.nbio.core.sequence.ProteinSequence;
import org.biojava.nbio.core.sequence.compound.AminoAcidCompound;
import org.biojava.nbio.core.sequence.io.FastaReaderHelper;
public class BioSeq {
public static void main(String[] args) throws Exception {
ProteinSequence seq = loadFromUniprot("P31574");
int polarCount = numberOfOccurrences(seq, /*Polar AAs:*/ Set.of("Y", "S", "T", "N", "Q", "C"));
System.out.println("% of polar AAs: " + ((double)polarCount)/seq.getLength());
}
public static ProteinSequence loadFromUniprot(String uniProtId) throws Exception {
URL uniprotFasta = new URL(String.format("https://rest.uniprot.org/uniprotkb/%s.fasta", uniProtId));
try (InputStream is = uniprotFasta.openStream()) {
return FastaReaderHelper.readFastaProteinSequence(is).get(uniProtId);
}
}
private static int numberOfOccurrences(ProteinSequence seq, Set<String> bases) {
int count = 0;
for (AminoAcidCompound aminoAcid : seq)
if(bases.contains(aminoAcid.getBase()))
count++;
return count;
}
}
PS:不要忘记在使用后关闭 IO 流。在上面的示例中,我使用了自动关闭 InputStream 的 try-with-resources 语法。
我需要使用 BioJava 计算我从 UNIPROT 获得的这个蛋白质序列中 polar/non-polar、aliphatic/aromatic/heterocyclic 氨基酸的数量和百分比。
我在 BioJava 教程中找到了如何读取 Fasta 文件并实现了这段代码。但是我不知道如何解决这个问题。
如果你有什么想法请帮助我。
也许有一些我可以查看的来源。
这是代码。
package biojava.biojava_project;
import java.net.URL;
import org.biojava.nbio.core.sequence.ProteinSequence;
import org.biojava.nbio.core.sequence.io.FastaReaderHelper;
public class BioSeq {
// Inserting the sequence from UNIPROT
public static ProteinSequence getSequenceForId(String uniProtId) throws Exception {
URL uniprotFasta = new URL(String.format("https://rest.uniprot.org/uniprotkb/P31574.fasta", uniProtId));
ProteinSequence seq = FastaReaderHelper.readFastaProteinSequence(uniprotFasta.openStream()).get(uniProtId);
System.out.printf("id : P31574", uniProtId, seq, System.getProperty("line.separator"), seq.getOriginalHeader());
System.out.println();
return seq;
}
public static void main(String[] args) {
try {
System.out.println(getSequenceForId("P31574"));
} catch (Exception e) {
e.printStackTrace();
}
}
}
我不知道 BioJava 是否将这些属性存储在任何地方。但是手动列出所有氨基酸及其属性很容易。然后遍历序列并计算满足 属性 的那些。这是极性的示例:
import java.io.InputStream;
import java.net.URL;
import java.util.Set;
import org.biojava.nbio.core.sequence.ProteinSequence;
import org.biojava.nbio.core.sequence.compound.AminoAcidCompound;
import org.biojava.nbio.core.sequence.io.FastaReaderHelper;
public class BioSeq {
public static void main(String[] args) throws Exception {
ProteinSequence seq = loadFromUniprot("P31574");
int polarCount = numberOfOccurrences(seq, /*Polar AAs:*/ Set.of("Y", "S", "T", "N", "Q", "C"));
System.out.println("% of polar AAs: " + ((double)polarCount)/seq.getLength());
}
public static ProteinSequence loadFromUniprot(String uniProtId) throws Exception {
URL uniprotFasta = new URL(String.format("https://rest.uniprot.org/uniprotkb/%s.fasta", uniProtId));
try (InputStream is = uniprotFasta.openStream()) {
return FastaReaderHelper.readFastaProteinSequence(is).get(uniProtId);
}
}
private static int numberOfOccurrences(ProteinSequence seq, Set<String> bases) {
int count = 0;
for (AminoAcidCompound aminoAcid : seq)
if(bases.contains(aminoAcid.getBase()))
count++;
return count;
}
}
PS:不要忘记在使用后关闭 IO 流。在上面的示例中,我使用了自动关闭 InputStream 的 try-with-resources 语法。