如何在没有 space 的情况下从新行读取序列的其余部分? C++
How to read the rest of a sequence from new line and without the space? c++
所以现在看第 2 行,奶牛 DNA 序列;这在第 13 行和第 24 行继续......我想为每个序列获得这个长序列,忽略白色 space 和中间的新行。
这是文件的格式:1
这是代码,只读取前 10 个序列
ifstream file ("txt");
string line;
vector <string> vec;
stringstream s;
string name;
string strip(string & s)
{
size_t b = s.find_first_not_of(' ');
size_t e = s.find_last_not_of(' ');
if (b == string::npos) {
return "";
} else {
return s.substr(b, e - b + 1);
}
}
void getSequence(){
int i;
int row;
int col;
if (file.is_open())
{
file >> row >> col;
for (i = 0; i < row; i++) {
vec.push_back("");
}
i = 0;
while (getline(file, line))
{
file >> name;
if (line == " ")
{
continue;
}
vec[i % row] += strip(line);
i++;
}
}
else {
cerr << "Error: file did not open!" << endl;
}
for (const string & v : vec) {
cout << v << endl;
}
}
提前感谢您的帮助。
也许这会有所帮助。这个想法是读取 row & col 然后读取 header 行的行数。之后重复读取下一行并将每一行附加到正确的项目,假设这些行是交错的。
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
struct Sequence
{
std::string name;
std::string data;
};
using SeqVec = std::vector<Sequence>;
bool readHeader(std::ifstream& f, SeqVec& v)
{
for (size_t i = 0; i < v.size(); ++i)
{
if (!(f >> v[i].name >> v[i].data))
{
return false;
}
}
return true;
}
int readChunk(std::ifstream& f, SeqVec& v)
{
int linesRead = 0;
std::string chunk;
for (size_t i = 0; i < v.size(); ++i)
{
if(!(f >> chunk))
{
break;
}
v[i].data += chunk;
++linesRead;
}
return linesRead;
}
int main()
{
std::vector<Sequence> v;
const std::string filename = "test.txt";
std::ifstream f(filename);
if (!f)
{
return -1;
}
int row;
int col;
if (f >> row >> col)
{
v.resize(row);
if (!readHeader(f, v))
{
return -1;
}
for (;;)
{
int linesRead = readChunk(f, v);
if (linesRead == 0 && v[0].data.size() == col)
{
//If we read nothing and the lines are the correct length we're done.
break;
}
else if (linesRead < v.size())
{
//partial read is an error.
return -1;
}
}
}
for (auto& seq : v)
{
std::cout << seq.name << " : " << seq.data << "\n";
}
return 0;
}
所以现在看第 2 行,奶牛 DNA 序列;这在第 13 行和第 24 行继续......我想为每个序列获得这个长序列,忽略白色 space 和中间的新行。
这是文件的格式:1
这是代码,只读取前 10 个序列
ifstream file ("txt");
string line;
vector <string> vec;
stringstream s;
string name;
string strip(string & s)
{
size_t b = s.find_first_not_of(' ');
size_t e = s.find_last_not_of(' ');
if (b == string::npos) {
return "";
} else {
return s.substr(b, e - b + 1);
}
}
void getSequence(){
int i;
int row;
int col;
if (file.is_open())
{
file >> row >> col;
for (i = 0; i < row; i++) {
vec.push_back("");
}
i = 0;
while (getline(file, line))
{
file >> name;
if (line == " ")
{
continue;
}
vec[i % row] += strip(line);
i++;
}
}
else {
cerr << "Error: file did not open!" << endl;
}
for (const string & v : vec) {
cout << v << endl;
}
}
提前感谢您的帮助。
也许这会有所帮助。这个想法是读取 row & col 然后读取 header 行的行数。之后重复读取下一行并将每一行附加到正确的项目,假设这些行是交错的。
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
struct Sequence
{
std::string name;
std::string data;
};
using SeqVec = std::vector<Sequence>;
bool readHeader(std::ifstream& f, SeqVec& v)
{
for (size_t i = 0; i < v.size(); ++i)
{
if (!(f >> v[i].name >> v[i].data))
{
return false;
}
}
return true;
}
int readChunk(std::ifstream& f, SeqVec& v)
{
int linesRead = 0;
std::string chunk;
for (size_t i = 0; i < v.size(); ++i)
{
if(!(f >> chunk))
{
break;
}
v[i].data += chunk;
++linesRead;
}
return linesRead;
}
int main()
{
std::vector<Sequence> v;
const std::string filename = "test.txt";
std::ifstream f(filename);
if (!f)
{
return -1;
}
int row;
int col;
if (f >> row >> col)
{
v.resize(row);
if (!readHeader(f, v))
{
return -1;
}
for (;;)
{
int linesRead = readChunk(f, v);
if (linesRead == 0 && v[0].data.size() == col)
{
//If we read nothing and the lines are the correct length we're done.
break;
}
else if (linesRead < v.size())
{
//partial read is an error.
return -1;
}
}
}
for (auto& seq : v)
{
std::cout << seq.name << " : " << seq.data << "\n";
}
return 0;
}