使用 Spirit 引用和正常解析 MemoryMapped 文件
Parse MemoryMapped files using Spirit quoted and normal
根据 the answer from sehe 我想解析内存映射文件中的引用内容和正常内容,尽快。
实际的解析器看起来像:
namespace qi = boost::spirit::qi;
using MatrixType = std::vector<std::vector<boost::string_ref>>;
template<typename It>
struct parser : qi::grammar<It, MatrixType(), qi::blank_type, qi::locals<char> >
{
parser()
: parser::base_type( table, "parser" )
{
using namespace boost::phoenix;
using namespace qi;
delimiter = ',';
quoted =
omit [ char_("'\"") [_a = _1] ]
>> raw [ *(char_ - char_(_a)) ] [ _val = construct<boost::string_ref>(begin(_1), size(_1)) ]
>> lit(_a);
unquoted = raw[ *(char_ - (eol | delimiter) ) ] [ _val = construct<boost::string_ref>(begin(_1), size(_1))]; //raw [ *(char_ - char_("\"',")) ] [ _val = construct<boost::string_ref>(begin(_1), size(_1)) ];
any_string = quoted | unquoted;
line = any_string % delimiter;
table = line % eol;
}
qi::rule<It, boost::string_ref() ,qi::locals<char> , qi::blank_type> any_string;
qi::rule<It, boost::string_ref() ,qi::locals<char> , qi::blank_type> quoted;
qi::rule<It, boost::string_ref() ,qi::locals<char> , qi::blank_type> unquoted;
qi::rule<It> delimiter;
qi::rule<It, std::vector<boost::string_ref>(), qi::blank_type> line;
qi::rule<It, MatrixType(), qi::blank_type, qi::locals<char>> table;
};
示例输入文件:
"a","b", "c", "d,e,f"
"a", -1, abc, 0.1
实际的解析器添加一个,而不是现有的空行。文件末尾没有“\n”
问题是行尾和输入结束是隐式定界符。
由于未加引号的字段允许 "empty"(零长度),这将只解析包含单个空字段的最后一行。
我建议特别检查输入结束:
row = !eoi >> any_string % delimiter;
如果根本没有可读内容,行将被拒绝。为了宽容并允许尾随空行,您可以 "eat" 那些:
table = row % eol >> *eol;
最后,如果您还想 "eat" 在 table 行之间空行,只需添加重复 (kleene plus):
table = row % +eol >> *eol;
#define BOOST_SPIRIT_DEBUG
#include <boost/utility/string_ref.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
namespace qi = boost::spirit::qi;
using MatrixType = std::vector<std::vector<boost::string_ref>>;
template<typename It>
struct parser : qi::grammar<It, MatrixType(), qi::blank_type >
{
parser() : parser::base_type(table, "parser")
{
namespace px = boost::phoenix;
using namespace qi;
delimiter = ',';
quoted =
char_("'\"") [_a = _1]
>> raw [ *(char_ - char_(_a)) ] [ _val = px::construct<boost::string_ref>(px::begin(_1), px::size(_1)) ]
>> lit(_a);
unquoted = raw[ *(char_ - (eol | delimiter) ) ] [ _val = px::construct<boost::string_ref>(px::begin(_1), px::size(_1))];
any_string = quoted | unquoted;
row = !eoi >> any_string % delimiter;
table = row % +eol >> *eol;
BOOST_SPIRIT_DEBUG_NODES((delimiter)(quoted)(unquoted)(any_string)(row)(table))
}
private:
qi::rule<It, MatrixType(), qi::blank_type> table;
qi::rule<It, MatrixType::value_type(), qi::blank_type> row;
// lexemes
qi::rule<It, boost::string_ref(), qi::locals<char> > quoted;
qi::rule<It, boost::string_ref()> any_string, unquoted;
qi::rule<It> delimiter;
};
#include <fstream>
#include <boost/iostreams/device/mapped_file.hpp>
int main() {
using It = const char*;
boost::iostreams::mapped_file_source source("input.txt");
It first = source.begin();
It last = source.end();
parser<It> grammar;
MatrixType data;
bool ok = qi::phrase_parse(first, last, grammar, qi::blank, data);
if (ok) {
std::cout << "Parsed: \n";
for (auto& row : data)
{
for (auto& cell : row)
std::cout << cell << "|";
std::cout << "\n";
}
} else
{
std::cout << "Failed to parse\n";
}
if (first != last) {
std::cout << "Remaining input unparsed: '" << std::string(first, last) << "'\n";
}
}
打印:
Parsed:
a|b|c|d,e,f|
a|-1|abc|0.1|
a||abc|0.1|
根据 the answer from sehe 我想解析内存映射文件中的引用内容和正常内容,尽快。
实际的解析器看起来像:
namespace qi = boost::spirit::qi;
using MatrixType = std::vector<std::vector<boost::string_ref>>;
template<typename It>
struct parser : qi::grammar<It, MatrixType(), qi::blank_type, qi::locals<char> >
{
parser()
: parser::base_type( table, "parser" )
{
using namespace boost::phoenix;
using namespace qi;
delimiter = ',';
quoted =
omit [ char_("'\"") [_a = _1] ]
>> raw [ *(char_ - char_(_a)) ] [ _val = construct<boost::string_ref>(begin(_1), size(_1)) ]
>> lit(_a);
unquoted = raw[ *(char_ - (eol | delimiter) ) ] [ _val = construct<boost::string_ref>(begin(_1), size(_1))]; //raw [ *(char_ - char_("\"',")) ] [ _val = construct<boost::string_ref>(begin(_1), size(_1)) ];
any_string = quoted | unquoted;
line = any_string % delimiter;
table = line % eol;
}
qi::rule<It, boost::string_ref() ,qi::locals<char> , qi::blank_type> any_string;
qi::rule<It, boost::string_ref() ,qi::locals<char> , qi::blank_type> quoted;
qi::rule<It, boost::string_ref() ,qi::locals<char> , qi::blank_type> unquoted;
qi::rule<It> delimiter;
qi::rule<It, std::vector<boost::string_ref>(), qi::blank_type> line;
qi::rule<It, MatrixType(), qi::blank_type, qi::locals<char>> table;
};
示例输入文件:
"a","b", "c", "d,e,f"
"a", -1, abc, 0.1
实际的解析器添加一个,而不是现有的空行。文件末尾没有“\n”
问题是行尾和输入结束是隐式定界符。
由于未加引号的字段允许 "empty"(零长度),这将只解析包含单个空字段的最后一行。
我建议特别检查输入结束:
row = !eoi >> any_string % delimiter;
如果根本没有可读内容,行将被拒绝。为了宽容并允许尾随空行,您可以 "eat" 那些:
table = row % eol >> *eol;
最后,如果您还想 "eat" 在 table 行之间空行,只需添加重复 (kleene plus):
table = row % +eol >> *eol;
#define BOOST_SPIRIT_DEBUG
#include <boost/utility/string_ref.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
namespace qi = boost::spirit::qi;
using MatrixType = std::vector<std::vector<boost::string_ref>>;
template<typename It>
struct parser : qi::grammar<It, MatrixType(), qi::blank_type >
{
parser() : parser::base_type(table, "parser")
{
namespace px = boost::phoenix;
using namespace qi;
delimiter = ',';
quoted =
char_("'\"") [_a = _1]
>> raw [ *(char_ - char_(_a)) ] [ _val = px::construct<boost::string_ref>(px::begin(_1), px::size(_1)) ]
>> lit(_a);
unquoted = raw[ *(char_ - (eol | delimiter) ) ] [ _val = px::construct<boost::string_ref>(px::begin(_1), px::size(_1))];
any_string = quoted | unquoted;
row = !eoi >> any_string % delimiter;
table = row % +eol >> *eol;
BOOST_SPIRIT_DEBUG_NODES((delimiter)(quoted)(unquoted)(any_string)(row)(table))
}
private:
qi::rule<It, MatrixType(), qi::blank_type> table;
qi::rule<It, MatrixType::value_type(), qi::blank_type> row;
// lexemes
qi::rule<It, boost::string_ref(), qi::locals<char> > quoted;
qi::rule<It, boost::string_ref()> any_string, unquoted;
qi::rule<It> delimiter;
};
#include <fstream>
#include <boost/iostreams/device/mapped_file.hpp>
int main() {
using It = const char*;
boost::iostreams::mapped_file_source source("input.txt");
It first = source.begin();
It last = source.end();
parser<It> grammar;
MatrixType data;
bool ok = qi::phrase_parse(first, last, grammar, qi::blank, data);
if (ok) {
std::cout << "Parsed: \n";
for (auto& row : data)
{
for (auto& cell : row)
std::cout << cell << "|";
std::cout << "\n";
}
} else
{
std::cout << "Failed to parse\n";
}
if (first != last) {
std::cout << "Remaining input unparsed: '" << std::string(first, last) << "'\n";
}
}
打印:
Parsed:
a|b|c|d,e,f|
a|-1|abc|0.1|
a||abc|0.1|