X3 解析规则不编译
X3 parse rule doesn't compile
我正在通过编写一个解析器来学习 Boost Spirit,该解析器解析 NAMS 使用的两种十六进制数变体:
- 带有
0x
/0h
后缀或 h
/x
. 前缀的十六进制数
- 十六进制数,前缀为
$
,并且必须后跟 十进制 数字。
这是我到目前为止的想法 Coliru Session:
//#define BOOST_SPIRIT_X3_DEBUG
#include <iostream>
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/x3/support/ast/variant.hpp>
#include <boost/spirit/include/support_extended_variant.hpp>
namespace x3 = boost::spirit::x3;
namespace ast {
struct hex_data : std::string {};
struct pascal_hex_data : std::string {};
struct declared_data : boost::spirit::extended_variant<hex_data, pascal_hex_data>
{
declared_data () : base_type () { std::cout << "ctor default\n"; }
declared_data (hex_data const& rhs) : base_type (rhs) { std::cout << "ctor hex: " << rhs << "\n"; }
declared_data (pascal_hex_data const& rhs) : base_type (rhs) { std::cout << "ctor pascal: " << rhs << "\n"; }
};
} // namespace ast
typedef x3::rule<struct hex_digits_class, std::string> hex_digit_type;
typedef x3::rule<struct hex_data_class, ast::hex_data> hex_data_type;
typedef x3::rule<struct pascalhex_data_class, ast::pascal_hex_data> pascalhex_data_type;
typedef x3::rule<struct declared_data_class, ast::declared_data> declared_data_type;
const hex_data_type hex_data = "hex_data";
const hex_digit_type hex_digit = "hex_digit";
const pascalhex_data_type pascalhex_data = "pascal_hex_data";
const declared_data_type declared_data = "declared_data";
auto const hex_digit_def =
= x3::skip(x3::char_('_'))
[
x3::no_case
[
x3::char_ ('0', '9') | x3::char_ ("a", "f")
]
]
;
auto const hex_data_def
= x3::no_case[x3::lit ("0h") | "0x"] >> +hex_digit_def
| +hex_digit_def >> x3::no_case[x3::lit ("h") | "x"]
;
auto const pascalhex_data_def
= x3::lit ("$") >> x3::char_ ('0', '9') >> +hex_digit_def;
auto const declared_data_def
= hex_data_def
| pascalhex_data_def
;
BOOST_SPIRIT_DEFINE (hex_digit, hex_data, pascalhex_data, declared_data)
struct Visitor
{
using result_type = std::string;
std::string operator()(ast::hex_data const & v) const { return "hex_data"; }
std::string operator()(ast::pascal_hex_data const & v) const { return "pascal_hex_data"; }
};
int main()
{
std::string input = "";
ast::declared_data parsed;
bool r =
x3::parse (input.begin (), input.end (),
declared_data_def,
parsed);
std::cout << "r = " << r << "\n";
Visitor v;
std::cout << "result = " << boost::apply_visitor(v, parsed) << "\n";
}
但是,规则 pascalhex_data_def
无法编译,错误消息看起来像是 spirit 正在推断规则的属性是 char
和 vector
的融合元组 variant
即使规则被指定为具有从 string
:
派生的 ast 属性
typedef x3::rule<struct pascalhex_data_class, ast::pascal_hex_data> pascalhex_data_type;
谁能指出为什么boost推导的属性不是指定的?无论如何强制规则生成字符串而不是 tuple
boost 试图 return?
就其实现的内容而言,您的代码似乎极其复杂。但是,看了半天,我发现你是在声明规则(强制他们的属性类型),但在关键时刻没有使用它们:
auto const declared_data_def = hex_data_def | pascalhex_data_def;
这意味着您直接从表达式模板 (_def
) 初始值设定项构建表达式树,而不是规则:
auto const declared_data_def = hex_data | pascalhex_data;
即编译。它仍然存在一些问题:
你can/should没有变体构造函数:
struct declared_data : boost::spirit::extended_variant<hex_data, pascal_hex_data> {
using extended_variant::extended_variant;
};
你可以把x3::char_ ('0', '9')
写成x3::char_("0-9")
,这样你就可以写成
x3::no_case
[
x3::char_ ('0', '9') | x3::char_ ("a", "f")
]
而不是
x3::no_case [ x3::char_ ("0-9a-f") ]
甚至
x3::char_ ("0-9a-fA-F")
或者,也许只是:
x3::xdigit
hex_digits_type
声明了一个 std::string
属性,但只解析一个字符。不要使用 +hex_digits_def
,只需使用 hex_digits
并写入:
auto const hex_digits_def = x3::skip(x3::char_('_')) [ +x3::xdigit ];
你的定义
"$" >> x3::char_("0-9") >> hex_digits
消耗十六进制数的第一位。这会导致错误(解析空字符串,例如 </code>)。相反,您可能想检查 <code>operator&
:
'$' >> &x3::char_("0-9") >> hex_digits
或者,确实:
'$' >> &x3::digit >> hex_digits
none 的规则实际上是递归的,因此 none 的规则需要声明和定义的任何分离。这大大减少了代码
简化,第 1 步
我怀疑您想要解释 十六进制数据为数字,而不是字符串。您 could/should 可能会相应地简化 AST。第 1 步:删除从 1 或其他格式解析的事物之间的区别:
namespace ast {
using hex_literal = std::string;
}
现在整个程序简化为Live On Coliru
#include <iostream>
#include <boost/spirit/home/x3.hpp>
namespace ast {
using hex_literal = std::string;
}
namespace parser {
namespace x3 = boost::spirit::x3;
auto const hex_digits = x3::rule<struct hex_digits_class, ast::hex_literal> {"hex_digits"}
= x3::skip(x3::char_('_')) [ +x3::xdigit ];
auto const hex_qualifier = x3::omit [ x3::char_("hxHX") ];
auto const hex_literal =
('$' >> &x3::xdigit | '0' >> hex_qualifier) >> hex_digits
| hex_digits >> hex_qualifier;
}
int main()
{
for (std::string const input : {
"", "0x1b", "0h1c", "1dh", "1ex",
"_f", "0x1_fb", "0h1_fc", "1_fdh", "1_fex"
}) {
ast::hex_literal parsed;
bool r = parse(input.begin(), input.end(), parser::hex_literal, parsed);
std::cout << "r = " << std::boolalpha << r << ", result = " << parsed << "\n";
}
}
正在打印:
r = true, result = 9
r = true, result = 1b
r = true, result = 1c
r = true, result = 1d
r = true, result = 1e
r = true, result = 9f
r = true, result = 1fb
r = true, result = 1fc
r = true, result = 1fd
r = true, result = 1fe
第2步(打破下划线解析)
现在,很显然,您想知道数值:
#include <iostream>
#include <boost/spirit/home/x3.hpp>
namespace ast {
using hex_literal = uintmax_t;
}
namespace parser {
namespace x3 = boost::spirit::x3;
auto const hex_qualifier = x3::omit [ x3::char_("hxHX") ];
auto const hex_literal
= ('$' >> &x3::xdigit | '0' >> hex_qualifier) >> x3::hex
| x3::hex >> hex_qualifier
;
}
int main()
{
for (std::string const input : {
"", "0x1b", "0h1c", "1dh", "1ex",
"_f", "0x1_fb", "0h1_fc", "1_fdh", "1_fex"
}) {
ast::hex_literal parsed;
auto f = input.begin(), l = input.end();
bool r = parse(f, l, parser::hex_literal, parsed) && f==l;
std::cout << std::boolalpha
<< "r = " << r
<< ",\tresult = " << parsed
<< ",\tremaining: '" << std::string(f,l) << "'\n";
}
}
打印
r = true, result = 9, remaining: ''
r = true, result = 27, remaining: ''
r = true, result = 28, remaining: ''
r = true, result = 29, remaining: ''
r = true, result = 30, remaining: ''
r = false, result = 9, remaining: '_f'
r = false, result = 1, remaining: '_fb'
r = false, result = 1, remaining: '_fc'
r = false, result = 1, remaining: '1_fdh'
r = false, result = 1, remaining: '1_fex'
第 3 步:再次使用下划线
这是我开始考虑自定义解析器的地方。这是因为它将开始涉及语义操作¹以及多个属性强制转换,坦率地说,将它们打包是最方便的,这样您就可以像其他人一样编写命令式 C++14:
#include <iostream>
#include <boost/spirit/home/x3.hpp>
namespace ast {
using hex_literal = uintmax_t;
}
namespace parser {
namespace x3 = boost::spirit::x3;
struct hex_literal_type : x3::parser_base {
using attribute_type = ast::hex_literal;
template <typename It, typename Ctx, typename RCtx>
static bool parse(It& f, It l, Ctx& ctx, RCtx&, attribute_type& attr) {
std::string digits;
skip_over(f, l, ctx); // pre-skip using surrounding skipper
auto constexpr max_digits = std::numeric_limits<attribute_type>::digits / 8;
auto digits_ = x3::skip(x3::as_parser('_')) [x3::repeat(1, max_digits) [ x3::xdigit ] ];
auto qualifier = x3::omit [ x3::char_("hxHX") ];
auto forms
= ('$' >> &x3::digit | '0' >> qualifier) >> digits_
| digits_ >> qualifier
;
if (x3::parse(f, l, forms, digits)) {
attr = std::stoull(digits, nullptr, 16);
return true;
}
return false;
}
};
hex_literal_type static const hex_literal;
}
int main() {
for (std::string const input : {
"", "0x1b", "0h1c", "1dh", "1ex",
"_f", "0x1_fb", "0h1_fc", "1_fdh", "1_fex",
// edge cases
"ffffffffH", // fits
"1ffffffffH", // too big
"[=26=]_00___01___________0__________0", // fine
"0x", // fine, same as "0h"
"$",
// upper case
"", "0X1B", "0H1C", "1DH", "1EX",
"_F", "0X1_FB", "0H1_FC", "1_FDH", "1_FEX",
}) {
ast::hex_literal parsed = 0;
auto f = input.begin(), l = input.end();
bool r = parse(f, l, parser::hex_literal, parsed) && f==l;
std::cout << std::boolalpha
<< "r = " << r
<< ",\tresult = " << parsed
<< ",\tremaining: '" << std::string(f,l) << "'\n";
}
}
Note how I included max_digits
to avoid runaway parsing (say when the input has 10 gigabyte of hex digits). You might want improve this by preskipping insignificant 0
digits.
现在的输出是:
r = true, result = 9, remaining: ''
r = true, result = 27, remaining: ''
r = true, result = 28, remaining: ''
r = true, result = 29, remaining: ''
r = true, result = 30, remaining: ''
r = true, result = 159, remaining: ''
r = true, result = 507, remaining: ''
r = true, result = 508, remaining: ''
r = true, result = 509, remaining: ''
r = true, result = 510, remaining: ''
r = true, result = 4294967295, remaining: ''
r = false, result = 0, remaining: '1ffffffffH'
r = true, result = 256, remaining: ''
r = true, result = 0, remaining: ''
r = false, result = 0, remaining: '$'
r = true, result = 9, remaining: ''
r = true, result = 27, remaining: ''
r = true, result = 28, remaining: ''
r = true, result = 29, remaining: ''
r = true, result = 30, remaining: ''
r = true, result = 159, remaining: ''
r = true, result = 507, remaining: ''
r = true, result = 508, remaining: ''
r = true, result = 509, remaining: ''
r = true, result = 510, remaining: ''
第 4 步:锦上添花
如果您想保留往返的输入格式,您现在可以简单地将其添加到 AST 中:
#include <iostream>
#include <boost/spirit/home/x3.hpp>
namespace ast {
struct hex_literal {
uintmax_t value;
std::string source;
};
}
namespace parser {
namespace x3 = boost::spirit::x3;
struct hex_literal_type : x3::parser_base {
using attribute_type = ast::hex_literal;
template <typename It, typename Ctx, typename RCtx>
static bool parse(It& f, It l, Ctx& ctx, RCtx&, attribute_type& attr) {
std::string digits;
skip_over(f, l, ctx); // pre-skip using surrounding skipper
It b = f; // save start
auto constexpr max_digits = std::numeric_limits<decltype(attr.value)>::digits / 8;
auto digits_ = x3::skip(x3::as_parser('_')) [x3::repeat(1, max_digits) [ x3::xdigit ] ];
auto qualifier = x3::omit [ x3::char_("hxHX") ];
auto forms
= ('$' >> &x3::digit | '0' >> qualifier) >> digits_
| digits_ >> qualifier
;
if (x3::parse(f, l, forms, digits)) {
attr.value = std::stoull(digits, nullptr, 16);
attr.source.assign(b,l);
return true;
}
return false;
}
};
hex_literal_type static const hex_literal;
}
int main()
{
for (std::string const input : {
"", "0x1b", "0h1c", "1dh", "1ex",
"_f", "0x1_fb", "0h1_fc", "1_fdh", "1_fex",
// edge cases
"ffffffffH", // fits
"1ffffffffH", // too big
"[=28=]_00___01___________0__________0", // fine
"0x", // fine, same as "0h"
"$",
// upper case
"", "0X1B", "0H1C", "1DH", "1EX",
"_F", "0X1_FB", "0H1_FC", "1_FDH", "1_FEX",
}) {
ast::hex_literal parsed = {};
auto f = input.begin(), l = input.end();
bool r = parse(f, l, parser::hex_literal, parsed) && f==l;
if (r) {
std::cout << "result = " << parsed.value
<< ",\tsource = '" << parsed.source << "'\n";
}
else {
std::cout << "FAILED"
<< ",\tremaining: '" << std::string(f,l) << "'\n";
}
}
}
打印:
result = 9, source = ''
result = 27, source = '0x1b'
result = 28, source = '0h1c'
result = 29, source = '1dh'
result = 30, source = '1ex'
result = 159, source = '_f'
result = 507, source = '0x1_fb'
result = 508, source = '0h1_fc'
result = 509, source = '1_fdh'
result = 510, source = '1_fex'
result = 4294967295, source = 'ffffffffH'
FAILED, remaining: '1ffffffffH'
result = 256, source = '[=29=]_00___01___________0__________0'
result = 0, source = '0x'
FAILED, remaining: '$'
result = 9, source = ''
result = 27, source = '0X1B'
result = 28, source = '0H1C'
result = 29, source = '1DH'
result = 30, source = '1EX'
result = 159, source = '_F'
result = 507, source = '0X1_FB'
result = 508, source = '0H1_FC'
result = 509, source = '1_FDH'
result = 510, source = '1_FEX'
¹ Boost Spirit: "Semantic actions are evil"?
我正在通过编写一个解析器来学习 Boost Spirit,该解析器解析 NAMS 使用的两种十六进制数变体:
- 带有
0x
/0h
后缀或h
/x
. 前缀的十六进制数
- 十六进制数,前缀为
$
,并且必须后跟 十进制 数字。
这是我到目前为止的想法 Coliru Session:
//#define BOOST_SPIRIT_X3_DEBUG
#include <iostream>
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/x3/support/ast/variant.hpp>
#include <boost/spirit/include/support_extended_variant.hpp>
namespace x3 = boost::spirit::x3;
namespace ast {
struct hex_data : std::string {};
struct pascal_hex_data : std::string {};
struct declared_data : boost::spirit::extended_variant<hex_data, pascal_hex_data>
{
declared_data () : base_type () { std::cout << "ctor default\n"; }
declared_data (hex_data const& rhs) : base_type (rhs) { std::cout << "ctor hex: " << rhs << "\n"; }
declared_data (pascal_hex_data const& rhs) : base_type (rhs) { std::cout << "ctor pascal: " << rhs << "\n"; }
};
} // namespace ast
typedef x3::rule<struct hex_digits_class, std::string> hex_digit_type;
typedef x3::rule<struct hex_data_class, ast::hex_data> hex_data_type;
typedef x3::rule<struct pascalhex_data_class, ast::pascal_hex_data> pascalhex_data_type;
typedef x3::rule<struct declared_data_class, ast::declared_data> declared_data_type;
const hex_data_type hex_data = "hex_data";
const hex_digit_type hex_digit = "hex_digit";
const pascalhex_data_type pascalhex_data = "pascal_hex_data";
const declared_data_type declared_data = "declared_data";
auto const hex_digit_def =
= x3::skip(x3::char_('_'))
[
x3::no_case
[
x3::char_ ('0', '9') | x3::char_ ("a", "f")
]
]
;
auto const hex_data_def
= x3::no_case[x3::lit ("0h") | "0x"] >> +hex_digit_def
| +hex_digit_def >> x3::no_case[x3::lit ("h") | "x"]
;
auto const pascalhex_data_def
= x3::lit ("$") >> x3::char_ ('0', '9') >> +hex_digit_def;
auto const declared_data_def
= hex_data_def
| pascalhex_data_def
;
BOOST_SPIRIT_DEFINE (hex_digit, hex_data, pascalhex_data, declared_data)
struct Visitor
{
using result_type = std::string;
std::string operator()(ast::hex_data const & v) const { return "hex_data"; }
std::string operator()(ast::pascal_hex_data const & v) const { return "pascal_hex_data"; }
};
int main()
{
std::string input = "";
ast::declared_data parsed;
bool r =
x3::parse (input.begin (), input.end (),
declared_data_def,
parsed);
std::cout << "r = " << r << "\n";
Visitor v;
std::cout << "result = " << boost::apply_visitor(v, parsed) << "\n";
}
但是,规则 pascalhex_data_def
无法编译,错误消息看起来像是 spirit 正在推断规则的属性是 char
和 vector
的融合元组 variant
即使规则被指定为具有从 string
:
typedef x3::rule<struct pascalhex_data_class, ast::pascal_hex_data> pascalhex_data_type;
谁能指出为什么boost推导的属性不是指定的?无论如何强制规则生成字符串而不是 tuple
boost 试图 return?
就其实现的内容而言,您的代码似乎极其复杂。但是,看了半天,我发现你是在声明规则(强制他们的属性类型),但在关键时刻没有使用它们:
auto const declared_data_def = hex_data_def | pascalhex_data_def;
这意味着您直接从表达式模板 (_def
) 初始值设定项构建表达式树,而不是规则:
auto const declared_data_def = hex_data | pascalhex_data;
即编译。它仍然存在一些问题:
你can/should没有变体构造函数:
struct declared_data : boost::spirit::extended_variant<hex_data, pascal_hex_data> { using extended_variant::extended_variant; };
你可以把
x3::char_ ('0', '9')
写成x3::char_("0-9")
,这样你就可以写成x3::no_case [ x3::char_ ('0', '9') | x3::char_ ("a", "f") ]
而不是
x3::no_case [ x3::char_ ("0-9a-f") ]
甚至
x3::char_ ("0-9a-fA-F")
或者,也许只是:
x3::xdigit
hex_digits_type
声明了一个std::string
属性,但只解析一个字符。不要使用+hex_digits_def
,只需使用hex_digits
并写入:auto const hex_digits_def = x3::skip(x3::char_('_')) [ +x3::xdigit ];
你的定义
"$" >> x3::char_("0-9") >> hex_digits
消耗十六进制数的第一位。这会导致错误(解析空字符串,例如
</code>)。相反,您可能想检查 <code>operator&
:'$' >> &x3::char_("0-9") >> hex_digits
或者,确实:
'$' >> &x3::digit >> hex_digits
none 的规则实际上是递归的,因此 none 的规则需要声明和定义的任何分离。这大大减少了代码
简化,第 1 步
我怀疑您想要解释 十六进制数据为数字,而不是字符串。您 could/should 可能会相应地简化 AST。第 1 步:删除从 1 或其他格式解析的事物之间的区别:
namespace ast {
using hex_literal = std::string;
}
现在整个程序简化为Live On Coliru
#include <iostream>
#include <boost/spirit/home/x3.hpp>
namespace ast {
using hex_literal = std::string;
}
namespace parser {
namespace x3 = boost::spirit::x3;
auto const hex_digits = x3::rule<struct hex_digits_class, ast::hex_literal> {"hex_digits"}
= x3::skip(x3::char_('_')) [ +x3::xdigit ];
auto const hex_qualifier = x3::omit [ x3::char_("hxHX") ];
auto const hex_literal =
('$' >> &x3::xdigit | '0' >> hex_qualifier) >> hex_digits
| hex_digits >> hex_qualifier;
}
int main()
{
for (std::string const input : {
"", "0x1b", "0h1c", "1dh", "1ex",
"_f", "0x1_fb", "0h1_fc", "1_fdh", "1_fex"
}) {
ast::hex_literal parsed;
bool r = parse(input.begin(), input.end(), parser::hex_literal, parsed);
std::cout << "r = " << std::boolalpha << r << ", result = " << parsed << "\n";
}
}
正在打印:
r = true, result = 9
r = true, result = 1b
r = true, result = 1c
r = true, result = 1d
r = true, result = 1e
r = true, result = 9f
r = true, result = 1fb
r = true, result = 1fc
r = true, result = 1fd
r = true, result = 1fe
第2步(打破下划线解析)
现在,很显然,您想知道数值:
#include <iostream>
#include <boost/spirit/home/x3.hpp>
namespace ast {
using hex_literal = uintmax_t;
}
namespace parser {
namespace x3 = boost::spirit::x3;
auto const hex_qualifier = x3::omit [ x3::char_("hxHX") ];
auto const hex_literal
= ('$' >> &x3::xdigit | '0' >> hex_qualifier) >> x3::hex
| x3::hex >> hex_qualifier
;
}
int main()
{
for (std::string const input : {
"", "0x1b", "0h1c", "1dh", "1ex",
"_f", "0x1_fb", "0h1_fc", "1_fdh", "1_fex"
}) {
ast::hex_literal parsed;
auto f = input.begin(), l = input.end();
bool r = parse(f, l, parser::hex_literal, parsed) && f==l;
std::cout << std::boolalpha
<< "r = " << r
<< ",\tresult = " << parsed
<< ",\tremaining: '" << std::string(f,l) << "'\n";
}
}
打印
r = true, result = 9, remaining: ''
r = true, result = 27, remaining: ''
r = true, result = 28, remaining: ''
r = true, result = 29, remaining: ''
r = true, result = 30, remaining: ''
r = false, result = 9, remaining: '_f'
r = false, result = 1, remaining: '_fb'
r = false, result = 1, remaining: '_fc'
r = false, result = 1, remaining: '1_fdh'
r = false, result = 1, remaining: '1_fex'
第 3 步:再次使用下划线
这是我开始考虑自定义解析器的地方。这是因为它将开始涉及语义操作¹以及多个属性强制转换,坦率地说,将它们打包是最方便的,这样您就可以像其他人一样编写命令式 C++14:
#include <iostream>
#include <boost/spirit/home/x3.hpp>
namespace ast {
using hex_literal = uintmax_t;
}
namespace parser {
namespace x3 = boost::spirit::x3;
struct hex_literal_type : x3::parser_base {
using attribute_type = ast::hex_literal;
template <typename It, typename Ctx, typename RCtx>
static bool parse(It& f, It l, Ctx& ctx, RCtx&, attribute_type& attr) {
std::string digits;
skip_over(f, l, ctx); // pre-skip using surrounding skipper
auto constexpr max_digits = std::numeric_limits<attribute_type>::digits / 8;
auto digits_ = x3::skip(x3::as_parser('_')) [x3::repeat(1, max_digits) [ x3::xdigit ] ];
auto qualifier = x3::omit [ x3::char_("hxHX") ];
auto forms
= ('$' >> &x3::digit | '0' >> qualifier) >> digits_
| digits_ >> qualifier
;
if (x3::parse(f, l, forms, digits)) {
attr = std::stoull(digits, nullptr, 16);
return true;
}
return false;
}
};
hex_literal_type static const hex_literal;
}
int main() {
for (std::string const input : {
"", "0x1b", "0h1c", "1dh", "1ex",
"_f", "0x1_fb", "0h1_fc", "1_fdh", "1_fex",
// edge cases
"ffffffffH", // fits
"1ffffffffH", // too big
"[=26=]_00___01___________0__________0", // fine
"0x", // fine, same as "0h"
"$",
// upper case
"", "0X1B", "0H1C", "1DH", "1EX",
"_F", "0X1_FB", "0H1_FC", "1_FDH", "1_FEX",
}) {
ast::hex_literal parsed = 0;
auto f = input.begin(), l = input.end();
bool r = parse(f, l, parser::hex_literal, parsed) && f==l;
std::cout << std::boolalpha
<< "r = " << r
<< ",\tresult = " << parsed
<< ",\tremaining: '" << std::string(f,l) << "'\n";
}
}
Note how I included
max_digits
to avoid runaway parsing (say when the input has 10 gigabyte of hex digits). You might want improve this by preskipping insignificant0
digits.
现在的输出是:
r = true, result = 9, remaining: ''
r = true, result = 27, remaining: ''
r = true, result = 28, remaining: ''
r = true, result = 29, remaining: ''
r = true, result = 30, remaining: ''
r = true, result = 159, remaining: ''
r = true, result = 507, remaining: ''
r = true, result = 508, remaining: ''
r = true, result = 509, remaining: ''
r = true, result = 510, remaining: ''
r = true, result = 4294967295, remaining: ''
r = false, result = 0, remaining: '1ffffffffH'
r = true, result = 256, remaining: ''
r = true, result = 0, remaining: ''
r = false, result = 0, remaining: '$'
r = true, result = 9, remaining: ''
r = true, result = 27, remaining: ''
r = true, result = 28, remaining: ''
r = true, result = 29, remaining: ''
r = true, result = 30, remaining: ''
r = true, result = 159, remaining: ''
r = true, result = 507, remaining: ''
r = true, result = 508, remaining: ''
r = true, result = 509, remaining: ''
r = true, result = 510, remaining: ''
第 4 步:锦上添花
如果您想保留往返的输入格式,您现在可以简单地将其添加到 AST 中:
#include <iostream>
#include <boost/spirit/home/x3.hpp>
namespace ast {
struct hex_literal {
uintmax_t value;
std::string source;
};
}
namespace parser {
namespace x3 = boost::spirit::x3;
struct hex_literal_type : x3::parser_base {
using attribute_type = ast::hex_literal;
template <typename It, typename Ctx, typename RCtx>
static bool parse(It& f, It l, Ctx& ctx, RCtx&, attribute_type& attr) {
std::string digits;
skip_over(f, l, ctx); // pre-skip using surrounding skipper
It b = f; // save start
auto constexpr max_digits = std::numeric_limits<decltype(attr.value)>::digits / 8;
auto digits_ = x3::skip(x3::as_parser('_')) [x3::repeat(1, max_digits) [ x3::xdigit ] ];
auto qualifier = x3::omit [ x3::char_("hxHX") ];
auto forms
= ('$' >> &x3::digit | '0' >> qualifier) >> digits_
| digits_ >> qualifier
;
if (x3::parse(f, l, forms, digits)) {
attr.value = std::stoull(digits, nullptr, 16);
attr.source.assign(b,l);
return true;
}
return false;
}
};
hex_literal_type static const hex_literal;
}
int main()
{
for (std::string const input : {
"", "0x1b", "0h1c", "1dh", "1ex",
"_f", "0x1_fb", "0h1_fc", "1_fdh", "1_fex",
// edge cases
"ffffffffH", // fits
"1ffffffffH", // too big
"[=28=]_00___01___________0__________0", // fine
"0x", // fine, same as "0h"
"$",
// upper case
"", "0X1B", "0H1C", "1DH", "1EX",
"_F", "0X1_FB", "0H1_FC", "1_FDH", "1_FEX",
}) {
ast::hex_literal parsed = {};
auto f = input.begin(), l = input.end();
bool r = parse(f, l, parser::hex_literal, parsed) && f==l;
if (r) {
std::cout << "result = " << parsed.value
<< ",\tsource = '" << parsed.source << "'\n";
}
else {
std::cout << "FAILED"
<< ",\tremaining: '" << std::string(f,l) << "'\n";
}
}
}
打印:
result = 9, source = ''
result = 27, source = '0x1b'
result = 28, source = '0h1c'
result = 29, source = '1dh'
result = 30, source = '1ex'
result = 159, source = '_f'
result = 507, source = '0x1_fb'
result = 508, source = '0h1_fc'
result = 509, source = '1_fdh'
result = 510, source = '1_fex'
result = 4294967295, source = 'ffffffffH'
FAILED, remaining: '1ffffffffH'
result = 256, source = '[=29=]_00___01___________0__________0'
result = 0, source = '0x'
FAILED, remaining: '$'
result = 9, source = ''
result = 27, source = '0X1B'
result = 28, source = '0H1C'
result = 29, source = '1DH'
result = 30, source = '1EX'
result = 159, source = '_F'
result = 507, source = '0X1_FB'
result = 508, source = '0H1_FC'
result = 509, source = '1_FDH'
result = 510, source = '1_FEX'
¹ Boost Spirit: "Semantic actions are evil"?