Boost Spirit Parser 可选表达式求值
Boost Spirit Parser optional expression evaluation
我正在尝试从文本文件中解析一行,格式如下:
[int_:] [int_/int_] [(int_, string)] string [string:int_]...
其中 []
是可选参数,但将包含诸如 (":"
、"("
、")"
、"/"
之类的标记。
最后一个格式也是重复格式 "key:value"
组合。例如:
10: 0x1/2 (8, INC) rd API:2 SI:100
当所有参数都可用时,我能够解析整行。
但是,如果缺少任何起始可选参数,则解析器将失败。
如何忽略Boost Spirit库中的可选参数? (即跳过将可选变量分配给默认值。)
气语法规则如下:
quoted_string = lexeme[+(char_ -(lit(' ') | lit(')')))];
hex_num = ((lit("0x") | lit("0X")) >> hex) | uint_;
start = (hex_num >> lit(":"))
>> (hex_num >> lit("/") >> hex_num )
>> lit("(") >> hex_num >> lit(",") >> quoted_string >> lit(")")
>> quoted_string
>> quoted_string;
qi::rule<Iterator, std::string(), ascii::space_type> quoted_string;
qi::rule<Iterator, uint32_t(), ascii::space_type> hex_num;
qi::rule<Iterator, employee(), ascii::space_type> start;
为您的 AST 节点建模以反映解析器树:
struct ratio_t { uint32_t a,b; };
struct opcode_t { uint32_t id; std::string name; };
struct Node {
uint32_t label; // prefix:
boost::optional<ratio_t> ratio; // a/b
boost::optional<opcode_t> opcode; // (id, name)
std::string extra;
std::multimap<std::string, uint32_t> params;
};
(只是边走边编造东西,因为我只能猜测数据的含义。我假设 employee
、hex_num
和 quoted_string
不知何故来自样本您开始使用的代码)。
现在当你调整这些结构时:
BOOST_FUSION_ADAPT_STRUCT(AST::ratio_t, a, b)
BOOST_FUSION_ADAPT_STRUCT(AST::opcode_t, id, name)
BOOST_FUSION_ADAPT_STRUCT(AST::Node, label, ratio, opcode, extra, params)
您可以使用类似的解析树简单地解析它:
// lexemes
unquoted_string = +(graph - ')');
num = (no_case[ "0x" ] >> hex) | uint_;
param = +(graph - ':') >> ':' >> num;
// skipping productions
opcode = '(' >> num >> ',' >> unquoted_string >> ')';
ratio = num >> '/' >> num;
prefix = (num >> ':') | attr(0); // defaults to 0
start = prefix
>> -ratio
>> -opcode
>> unquoted_string
>> *param;
现在当你解析这些测试用例时:
for (std::string const input : {
"10: 0x1/2 (8, INC) rd API:2 SI:100",
"10: 0x1/2 (8, INC) rd API:2",
"10: 0x1/2 (8, INC) rd",
"10: 0x1/2 rd API:2 SI:100",
"10: rd API:2 SI:100",
"0x1/2 rd API:2 SI:100",
"rd API:2 SI:100",
})
{
It f = input.begin(), l = input.end();
AST::Node data;
bool ok = qi::phrase_parse(f, l, p, qi::ascii::space, data);
if (ok) {
std::cout << "Parse success: " << data << "\n";
}
else {
std::cout << "Parse failure ('" << input << "')\n";
}
if (f!=l) {
std::cout << "Remaining unparsed input: '" << std::string(f,l) << "'\n";
}
}
你得到:
Parse success: 10: 1/2 (8, 'INC') rd API:2 SI:100
Parse success: 10: 1/2 (8, 'INC') rd API:2
Parse success: 10: 1/2 (8, 'INC') rd
Parse success: 10: 1/2 -- rd API:2 SI:100
Parse success: 10: -- -- rd API:2 SI:100
Parse success: 0: 1/2 -- rd API:2 SI:100
Parse success: 0: -- -- rd API:2 SI:100
完整演示
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/adapted.hpp>
#include <boost/optional/optional_io.hpp>
#include <map>
namespace qi = boost::spirit::qi;
namespace AST {
struct ratio_t { uint32_t a,b; };
struct opcode_t { uint32_t id; std::string name; };
struct Node {
uint32_t label; // prefix:
boost::optional<ratio_t> ratio; // a/b
boost::optional<opcode_t> opcode; // (id, name)
std::string extra;
std::multimap<std::string, uint32_t> params;
};
std::ostream& operator<<(std::ostream& os, ratio_t const& v) {
return os << v.a << "/" << v.b;
}
std::ostream& operator<<(std::ostream& os, opcode_t const& v) {
return os << "(" << v.id << ", '" << v.name << "')";
}
std::ostream& operator<<(std::ostream& os, Node const& v) {
os << v.label << ": " << v.ratio << " " << v.opcode << " " << v.extra;
for (auto& p : v.params) os << " " << p.first << ":" << p.second;
return os;
}
}
BOOST_FUSION_ADAPT_STRUCT(AST::ratio_t, a, b)
BOOST_FUSION_ADAPT_STRUCT(AST::opcode_t, id, name)
BOOST_FUSION_ADAPT_STRUCT(AST::Node, label, ratio, opcode, extra, params)
template <typename It, typename Skipper = qi::ascii::space_type>
struct P : qi::grammar<It, AST::Node(), Skipper> {
P() : P::base_type(start)
{
using namespace qi;
// lexemes
unquoted_string = +(graph - ')');
num = (no_case[ "0x" ] >> hex) | uint_;
param = +(graph - ':') >> ':' >> num;
// skipping productions
opcode = '(' >> num >> ',' >> unquoted_string >> ')';
ratio = num >> '/' >> num;
prefix = (num >> ':') | attr(0); // defaults to 0
start = prefix
>> -ratio
>> -opcode
>> unquoted_string
>> *param;
BOOST_SPIRIT_DEBUG_NODES((start)(unquoted_string)(num)(prefix)(ratio)(opcode)(param))
}
private:
qi::rule<It, AST::ratio_t(), Skipper> ratio;
qi::rule<It, AST::opcode_t(), Skipper> opcode;
qi::rule<It, AST::Node(), Skipper> start;
qi::rule<It, uint32_t(), Skipper> prefix;
//lexemes
qi::rule<It, std::string()> unquoted_string;
qi::rule<It, uint32_t()> num;
qi::rule<It, std::pair<std::string, uint32_t>> param;
};
int main() {
using It = std::string::const_iterator;
P<It> const p;
for (std::string const input : {
"10: 0x1/2 (8, INC) rd API:2 SI:100",
"10: 0x1/2 (8, INC) rd API:2",
"10: 0x1/2 (8, INC) rd",
"10: 0x1/2 rd API:2 SI:100",
"10: rd API:2 SI:100",
"0x1/2 rd API:2 SI:100",
"rd API:2 SI:100",
})
{
It f = input.begin(), l = input.end();
AST::Node data;
bool ok = qi::phrase_parse(f, l, p, qi::ascii::space, data);
if (ok) {
std::cout << "Parse success: " << data << "\n";
}
else {
std::cout << "Parse failure ('" << input << "')\n";
}
if (f!=l) {
std::cout << "Remaining unparsed input: '" << std::string(f,l) << "'\n";
}
}
}
我正在尝试从文本文件中解析一行,格式如下:
[int_:] [int_/int_] [(int_, string)] string [string:int_]...
其中 []
是可选参数,但将包含诸如 (":"
、"("
、")"
、"/"
之类的标记。
最后一个格式也是重复格式 "key:value"
组合。例如:
10: 0x1/2 (8, INC) rd API:2 SI:100
当所有参数都可用时,我能够解析整行。 但是,如果缺少任何起始可选参数,则解析器将失败。
如何忽略Boost Spirit库中的可选参数? (即跳过将可选变量分配给默认值。)
气语法规则如下:
quoted_string = lexeme[+(char_ -(lit(' ') | lit(')')))];
hex_num = ((lit("0x") | lit("0X")) >> hex) | uint_;
start = (hex_num >> lit(":"))
>> (hex_num >> lit("/") >> hex_num )
>> lit("(") >> hex_num >> lit(",") >> quoted_string >> lit(")")
>> quoted_string
>> quoted_string;
qi::rule<Iterator, std::string(), ascii::space_type> quoted_string;
qi::rule<Iterator, uint32_t(), ascii::space_type> hex_num;
qi::rule<Iterator, employee(), ascii::space_type> start;
为您的 AST 节点建模以反映解析器树:
struct ratio_t { uint32_t a,b; };
struct opcode_t { uint32_t id; std::string name; };
struct Node {
uint32_t label; // prefix:
boost::optional<ratio_t> ratio; // a/b
boost::optional<opcode_t> opcode; // (id, name)
std::string extra;
std::multimap<std::string, uint32_t> params;
};
(只是边走边编造东西,因为我只能猜测数据的含义。我假设 employee
、hex_num
和 quoted_string
不知何故来自样本您开始使用的代码)。
现在当你调整这些结构时:
BOOST_FUSION_ADAPT_STRUCT(AST::ratio_t, a, b)
BOOST_FUSION_ADAPT_STRUCT(AST::opcode_t, id, name)
BOOST_FUSION_ADAPT_STRUCT(AST::Node, label, ratio, opcode, extra, params)
您可以使用类似的解析树简单地解析它:
// lexemes
unquoted_string = +(graph - ')');
num = (no_case[ "0x" ] >> hex) | uint_;
param = +(graph - ':') >> ':' >> num;
// skipping productions
opcode = '(' >> num >> ',' >> unquoted_string >> ')';
ratio = num >> '/' >> num;
prefix = (num >> ':') | attr(0); // defaults to 0
start = prefix
>> -ratio
>> -opcode
>> unquoted_string
>> *param;
现在当你解析这些测试用例时:
for (std::string const input : {
"10: 0x1/2 (8, INC) rd API:2 SI:100",
"10: 0x1/2 (8, INC) rd API:2",
"10: 0x1/2 (8, INC) rd",
"10: 0x1/2 rd API:2 SI:100",
"10: rd API:2 SI:100",
"0x1/2 rd API:2 SI:100",
"rd API:2 SI:100",
})
{
It f = input.begin(), l = input.end();
AST::Node data;
bool ok = qi::phrase_parse(f, l, p, qi::ascii::space, data);
if (ok) {
std::cout << "Parse success: " << data << "\n";
}
else {
std::cout << "Parse failure ('" << input << "')\n";
}
if (f!=l) {
std::cout << "Remaining unparsed input: '" << std::string(f,l) << "'\n";
}
}
你得到:
Parse success: 10: 1/2 (8, 'INC') rd API:2 SI:100
Parse success: 10: 1/2 (8, 'INC') rd API:2
Parse success: 10: 1/2 (8, 'INC') rd
Parse success: 10: 1/2 -- rd API:2 SI:100
Parse success: 10: -- -- rd API:2 SI:100
Parse success: 0: 1/2 -- rd API:2 SI:100
Parse success: 0: -- -- rd API:2 SI:100
完整演示
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/adapted.hpp>
#include <boost/optional/optional_io.hpp>
#include <map>
namespace qi = boost::spirit::qi;
namespace AST {
struct ratio_t { uint32_t a,b; };
struct opcode_t { uint32_t id; std::string name; };
struct Node {
uint32_t label; // prefix:
boost::optional<ratio_t> ratio; // a/b
boost::optional<opcode_t> opcode; // (id, name)
std::string extra;
std::multimap<std::string, uint32_t> params;
};
std::ostream& operator<<(std::ostream& os, ratio_t const& v) {
return os << v.a << "/" << v.b;
}
std::ostream& operator<<(std::ostream& os, opcode_t const& v) {
return os << "(" << v.id << ", '" << v.name << "')";
}
std::ostream& operator<<(std::ostream& os, Node const& v) {
os << v.label << ": " << v.ratio << " " << v.opcode << " " << v.extra;
for (auto& p : v.params) os << " " << p.first << ":" << p.second;
return os;
}
}
BOOST_FUSION_ADAPT_STRUCT(AST::ratio_t, a, b)
BOOST_FUSION_ADAPT_STRUCT(AST::opcode_t, id, name)
BOOST_FUSION_ADAPT_STRUCT(AST::Node, label, ratio, opcode, extra, params)
template <typename It, typename Skipper = qi::ascii::space_type>
struct P : qi::grammar<It, AST::Node(), Skipper> {
P() : P::base_type(start)
{
using namespace qi;
// lexemes
unquoted_string = +(graph - ')');
num = (no_case[ "0x" ] >> hex) | uint_;
param = +(graph - ':') >> ':' >> num;
// skipping productions
opcode = '(' >> num >> ',' >> unquoted_string >> ')';
ratio = num >> '/' >> num;
prefix = (num >> ':') | attr(0); // defaults to 0
start = prefix
>> -ratio
>> -opcode
>> unquoted_string
>> *param;
BOOST_SPIRIT_DEBUG_NODES((start)(unquoted_string)(num)(prefix)(ratio)(opcode)(param))
}
private:
qi::rule<It, AST::ratio_t(), Skipper> ratio;
qi::rule<It, AST::opcode_t(), Skipper> opcode;
qi::rule<It, AST::Node(), Skipper> start;
qi::rule<It, uint32_t(), Skipper> prefix;
//lexemes
qi::rule<It, std::string()> unquoted_string;
qi::rule<It, uint32_t()> num;
qi::rule<It, std::pair<std::string, uint32_t>> param;
};
int main() {
using It = std::string::const_iterator;
P<It> const p;
for (std::string const input : {
"10: 0x1/2 (8, INC) rd API:2 SI:100",
"10: 0x1/2 (8, INC) rd API:2",
"10: 0x1/2 (8, INC) rd",
"10: 0x1/2 rd API:2 SI:100",
"10: rd API:2 SI:100",
"0x1/2 rd API:2 SI:100",
"rd API:2 SI:100",
})
{
It f = input.begin(), l = input.end();
AST::Node data;
bool ok = qi::phrase_parse(f, l, p, qi::ascii::space, data);
if (ok) {
std::cout << "Parse success: " << data << "\n";
}
else {
std::cout << "Parse failure ('" << input << "')\n";
}
if (f!=l) {
std::cout << "Remaining unparsed input: '" << std::string(f,l) << "'\n";
}
}
}