如何在没有 space 的情况下从新行读取序列的其余部分? C++

How to read the rest of a sequence from new line and without the space? c++

所以现在看第 2 行,奶牛 DNA 序列;这在第 13 行和第 24 行继续......我想为每个序列获得这个长序列,忽略白色 space 和中间的新行。

这是文件的格式:1

这是代码,只读取前 10 个序列

ifstream file ("txt");
string line;
vector <string> vec;
stringstream s;
string name;

string strip(string & s)
{
    size_t b = s.find_first_not_of(' ');
    size_t e = s.find_last_not_of(' ');
    if (b == string::npos) {
        return "";
    } else {
        return s.substr(b, e - b + 1);
    }
}

void getSequence(){
    int i;
    int row;
    int col;
    if (file.is_open()) 
    {
        file >> row >> col;
        for (i = 0; i < row; i++) {
            vec.push_back("");
        }
        i = 0;
        while (getline(file, line)) 
        {
            file >> name;
            if (line == " ") 
            {
                continue;
            }

            vec[i % row] += strip(line);
            i++;
        }
    } 
    else {
        cerr << "Error: file did not open!" << endl;
    }
    for (const string & v : vec) {
        cout << v << endl;
    }
}

提前感谢您的帮助。

也许这会有所帮助。这个想法是读取 row & col 然后读取 header 行的行数。之后重复读取下一行并将每一行附加到正确的项目,假设这些行是交错的。

#include <iostream>
#include <fstream>
#include <string>
#include <vector>

struct Sequence
{
    std::string name;
    std::string data;
};
using SeqVec = std::vector<Sequence>;

bool readHeader(std::ifstream& f, SeqVec& v)
{
    for (size_t i = 0; i < v.size(); ++i)
    {
        if (!(f >> v[i].name >> v[i].data))
        {
            return false;
        }
    }
    return true;
}

int readChunk(std::ifstream& f, SeqVec& v)
{
    int linesRead = 0;
    std::string chunk;
    for (size_t i = 0; i < v.size(); ++i)
    {
        if(!(f >> chunk))
        {
            break;
        }
        v[i].data += chunk;
        ++linesRead;
    }
    return linesRead;
}

int main()
{
    std::vector<Sequence> v;

    const std::string filename = "test.txt";
    std::ifstream f(filename);
    if (!f)
    {
        return -1;
    }

    int row;
    int col;
    if (f >> row >> col)
    {
        v.resize(row);
        if (!readHeader(f, v))
        {
            return -1;
        }
        for (;;)
        {
            int linesRead = readChunk(f, v);
            if (linesRead == 0 && v[0].data.size() == col)
            {
                //If we read nothing and the lines are the correct length we're done.
                break;
            }
            else if (linesRead < v.size())
            {
                //partial read is an error.
                return -1;
            }
        }
    }

    for (auto& seq : v)
    {
        std::cout << seq.name << " : " << seq.data << "\n";
    }
    return 0;
}