后续:使用 boost::spirit::qi 解析带分隔符的数字
Followup: Using boost::spirit::qi to parse numbers with separators
这是 的后续问题。
根据 sehe 的非常好的建议,我成功地进行了数字解析。然后我尝试更新它以拥有一个辅助解析器,该解析器处理带有可选符号的数字。第二次尝试失败了。我怀疑我在处理子语法方面做错了什么。代码如下:
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
namespace phoenix = boost::phoenix;
template <typename Iterator, typename Num>
struct unsigned_parser : qi::grammar<Iterator, Num()> {
unsigned_parser() : unsigned_parser::base_type(start) {
using qi::_val;
using qi::_1;
using qi::eps;
using qi::debug;
using ascii::char_;
bin = eps[_val=0] >> *(char_("01")[_val = _val * 2 + dval(_1)] | '_');
oct = eps[_val=0] >> *(char_("0-7")[_val = _val * 8 + dval(_1)] | '_');
dec = eps[_val=0]
>> *(char_("0-9")[_val = _val * 10 + dval(_1)] | '_');
hex = eps[_val=0]
>> *(char_("0-9a-fA-F")[_val = _val * 16 + dval(_1)] | '_');
start = (char_('0') >>
((char_("xXhH") >> hex[_val=_1])
| (char_("bByY") >> bin[_val=_1])
| (char_("oOqQ") >> oct[_val=_1])
| (char_("dDtT") >> dec[_val=_1])))
| (hex[_val=_1] >> char_("xXhH"))
| (bin[_val=_1] >> char_("bByY"))
| (oct[_val=_1] >> char_("oOqQ"))
| (dec[_val=_1] >> -char_("dDtT"));
start.name("unum");
hex.name("hex");
oct.name("oct");
dec.name("dec");
bin.name("bin");
debug(start);
debug(hex);
debug(oct);
debug(dec);
debug(bin);
}
qi::rule<Iterator, Num()> start;
qi::rule<Iterator, Num()> hex;
qi::rule<Iterator, Num()> oct;
qi::rule<Iterator, Num()> dec;
qi::rule<Iterator, Num()> bin;
struct _dval {
template <typename> struct result { typedef uint8_t type; };
template <typename T> uint8_t operator()(T ch) const {
if (ch >= '0' || ch <= '9') {
return ch - '0';
}
ch = std::tolower(ch);
if (ch >= 'a' || ch <= 'f') {
return ch - 'a' + 10;
}
assert(false);
}
};
boost::phoenix::function<_dval> dval;
};
template <typename Iterator, typename Num>
struct signed_parser : qi::grammar<Iterator, Num()> {
signed_parser() : signed_parser::base_type(start) {
using qi::eps;
using qi::_val;
using qi::_1;
using ascii::char_;
using phoenix::static_cast_;
unum = unsigned_parser<Iterator, Num>();
start = (char_('-') >> unum[_val=-_1])
| (-char_('+') >> unum[_val=_1]);
unum.name("unum");
start.name("snum");
debug(start);
/* debug(unum); */
}
qi::rule<Iterator, Num()> start;
qi::rule<Iterator, Num()> unum;
};
int main(int argv, const char *argc[]) {
using phoenix::ref;
using qi::eoi;
using qi::_1;
typedef std::string::const_iterator iter;
signed_parser<iter, int64_t> sp;
int64_t val;
if (argv != 2) {
std::cerr << "Usage: " << argc[0] << " <input>" << std::endl;
return 1;
}
std::string test(argc[1]);
iter i = test.begin();
iter end = test.end();
bool rv = phrase_parse(i, end, sp[ref(val)=_1] >> eoi, ascii::space);
if (rv) {
assert(i == end);
std::cout << "Succeeded: " << val << std::endl;
return 0;
}
std::cout << "Failed." << std::endl;
return 1;
}
对于 signed_parser,每次解析都会失败。此外,如果我取消注释掉的 debug(),程序会出现段错误。
我觉得我已经接近开始理解如何使用它了,所以任何帮助将不胜感激。
使用所有这些单独的规则会扼杀编译器优化解析的机会。
您不能引用临时 grammar/rule。你需要有语法实例:
template <typename Iterator, typename Num>
struct signed_parser : qi::grammar<Iterator, Num()> {
signed_parser() : signed_parser::base_type(snum) {
using namespace qi;
snum = lit('-') >> unum
| -lit('+') >> unum
;
BOOST_SPIRIT_DEBUG_NODES((snum))
}
private:
qi::rule<Iterator, Num()> snum;
unsigned_parser<Iterator, Num> unum;
};
这里有一些清理工作:
- 交换
argc
和 argv
你会吗:)
使用 BOOST_SPIRIT_DEBUG* 宏
BOOST_SPIRIT_DEBUG_NODES((unum) (hex) (oct) (dec) (bin));
如果 lit()
或(更糟!)char_()
,请改用纯文字
更喜欢使用自动属性传播 (Boost Spirit: "Semantic actions are evil"?)。例如。规则可以简单得多:
snum = lit('-') >> unum
| -lit('+') >> unum
;
使用%=
在存在语义动作的情况下保持自动传播:
snum %= lit('-') >> unum [ _val = -_1 ]
| -lit('+') >> unum
;
phrase_parse
调用本身也是如此:您可以传递属性的绑定引用。不需要语义动作
执行 tolower(ch)
可能会更慢(因为您知道它是 ASCII),可能不正确(如果您的编译器已签署 char
,您将获得符号扩展)
更新 你的 dval
演员中有一个相当可怕的错误。范围检查是错误的!这是我的固定版本:
struct accum_f {
template <typename...> struct result { typedef void type; };
void operator()(char ch, Num& accum, int base) const {
accum *= base;
if (ch >= '0' && ch <= '9') accum += ch - '0';
else if (ch >= 'a' && ch <= 'f') accum += ch - 'a' + 10;
else if (ch >= 'A' && ch <= 'F') accum += ch - 'A' + 10;
else assert(false);
}
};
boost::phoenix::function<accum_f> _accum;
请参阅下文了解语义操作的结果 changes/simplifications
可以使用前缀分支的建筑int_parser
;这可能会(多)快
警告:当您编写 unum
无语义动作时,重要的是您不要 "capture" '0'
与 qi::char_
就像你一样。否则,您会想知道为什么任何前缀格式数字的结果总是 48
。
unum = ('0' >>
( (omit[ char_("xXhH") ] >> hex)
| (omit[ char_("bByY") ] >> bin)
| (omit[ char_("oOqQ") ] >> oct)
| (omit[ char_("dDtT") ] >> dec))
)
| (hex >> omit[ char_("xXhH") ])
| (bin >> omit[ char_("bByY") ])
| (oct >> omit[ char_("oOqQ") ])
| (dec >> omit[ -char_("dDtT") ]);
使用 phrase_parse
和船长只要您使用的解析器表达式不使用船长(请参阅 Boost spirit skipper issues)
//#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
template <typename Iterator, typename Num>
struct unsigned_parser : qi::grammar<Iterator, Num()> {
unsigned_parser() : unsigned_parser::base_type(unum) {
using namespace qi;
bin = eps[_val=0] >> *(char_("01") [ _accum(_1, _val, 2 )] | '_');
oct = eps[_val=0] >> *(char_("0-7") [ _accum(_1, _val, 8 )] | '_');
dec = eps[_val=0] >> *(char_("0-9") [ _accum(_1, _val, 10)] | '_');
hex = eps[_val=0] >> *(char_("0-9a-fA-F") [ _accum(_1, _val, 16)] | '_');
unum = ('0' >>
( (omit[ char_("xXhH") ] >> hex)
| (omit[ char_("bByY") ] >> bin)
| (omit[ char_("oOqQ") ] >> oct)
| (omit[ char_("dDtT") ] >> dec))
)
| (hex >> omit[ char_("xXhH") ])
| (bin >> omit[ char_("bByY") ])
| (oct >> omit[ char_("oOqQ") ])
| (dec >> omit[ -char_("dDtT") ]);
BOOST_SPIRIT_DEBUG_NODES((unum) (hex) (oct) (dec) (bin));
}
private:
qi::rule<Iterator, Num()> unum, hex, oct, dec, bin;
struct accum_f {
template <typename...> struct result { typedef void type; };
void operator()(char ch, Num& accum, int base) const {
accum *= base;
if (ch >= '0' && ch <= '9') accum += ch - '0';
else if (ch >= 'a' && ch <= 'f') accum += ch - 'a' + 10;
else if (ch >= 'A' && ch <= 'F') accum += ch - 'A' + 10;
else assert(false);
}
};
boost::phoenix::function<accum_f> _accum;
};
template <typename Iterator, typename Num>
struct signed_parser : qi::grammar<Iterator, Num()> {
signed_parser() : signed_parser::base_type(snum) {
using namespace qi;
snum %= lit('-') >> unum [ _val = -_1 ]
| -lit('+') >> unum
;
BOOST_SPIRIT_DEBUG_NODES((snum))
}
private:
qi::rule<Iterator, Num()> snum;
unsigned_parser<Iterator, Num> unum;
};
int main(int argc, const char *argv[]) {
typedef std::string::const_iterator iter;
signed_parser<iter, int64_t> const sp;
for (std::string const& s : boost::make_iterator_range(argv+1, argv+argc))
{
std::cout << "\n-----------------------------\nParsing '" << s << "':\n";
int64_t val;
iter i = s.begin(), end = s.end();
bool rv = phrase_parse(i, end, sp >> qi::eoi, ascii::space, val);
if (rv) {
std::cout << "Succeeded: " << val << std::endl;
} else {
std::cout << "Failed." << std::endl;
}
if (i!=end) {
std::cout << "Remaining unparsed: '" << std::string(i,end) << "'\n";
}
}
}
输出:
-----------------------------
Parsing '-124_456d':
Succeeded: -124456
-----------------------------
Parsing '123_456D':
Succeeded: 123456
-----------------------------
Parsing '-123_456T':
Succeeded: -123456
-----------------------------
Parsing '123456t':
Succeeded: 123456
-----------------------------
Parsing '+1_bh':
Succeeded: 27
-----------------------------
Parsing '0_010Q':
Succeeded: 8
-----------------------------
Parsing '+1010_1010_0111_0111_b':
Succeeded: 43639
-----------------------------
Parsing '123_456':
Succeeded: 123456
-----------------------------
Parsing '-123456':
Succeeded: -123456
-----------------------------
Parsing '1_bh':
Succeeded: 27
-----------------------------
Parsing '-0_010Q':
Succeeded: -8
-----------------------------
Parsing '1010_1010_0111_0111_b':
Succeeded: 43639
-----------------------------
Parsing '+0d124_456':
Succeeded: 124456
-----------------------------
Parsing '0D123_456':
Succeeded: 123456
-----------------------------
Parsing '+0T123_456':
Succeeded: 123456
-----------------------------
Parsing '0t123456':
Succeeded: 123456
-----------------------------
Parsing '0h1_b':
Succeeded: 27
-----------------------------
Parsing '0Q0_010':
Succeeded: 8
-----------------------------
Parsing '0b1010_1010_0111_0111_':
Succeeded: 43639
-----------------------------
Parsing '06123_45':
Succeeded: 612345
-----------------------------
Parsing '0612345':
Succeeded: 612345
-----------------------------
Parsing '0h1_b':
Succeeded: 27
-----------------------------
Parsing '-0Q0_010':
Succeeded: -8
-----------------------------
Parsing '0b1010_1010_0111_0111_':
Succeeded: 43639
这是
根据 sehe 的非常好的建议,我成功地进行了数字解析。然后我尝试更新它以拥有一个辅助解析器,该解析器处理带有可选符号的数字。第二次尝试失败了。我怀疑我在处理子语法方面做错了什么。代码如下:
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
namespace phoenix = boost::phoenix;
template <typename Iterator, typename Num>
struct unsigned_parser : qi::grammar<Iterator, Num()> {
unsigned_parser() : unsigned_parser::base_type(start) {
using qi::_val;
using qi::_1;
using qi::eps;
using qi::debug;
using ascii::char_;
bin = eps[_val=0] >> *(char_("01")[_val = _val * 2 + dval(_1)] | '_');
oct = eps[_val=0] >> *(char_("0-7")[_val = _val * 8 + dval(_1)] | '_');
dec = eps[_val=0]
>> *(char_("0-9")[_val = _val * 10 + dval(_1)] | '_');
hex = eps[_val=0]
>> *(char_("0-9a-fA-F")[_val = _val * 16 + dval(_1)] | '_');
start = (char_('0') >>
((char_("xXhH") >> hex[_val=_1])
| (char_("bByY") >> bin[_val=_1])
| (char_("oOqQ") >> oct[_val=_1])
| (char_("dDtT") >> dec[_val=_1])))
| (hex[_val=_1] >> char_("xXhH"))
| (bin[_val=_1] >> char_("bByY"))
| (oct[_val=_1] >> char_("oOqQ"))
| (dec[_val=_1] >> -char_("dDtT"));
start.name("unum");
hex.name("hex");
oct.name("oct");
dec.name("dec");
bin.name("bin");
debug(start);
debug(hex);
debug(oct);
debug(dec);
debug(bin);
}
qi::rule<Iterator, Num()> start;
qi::rule<Iterator, Num()> hex;
qi::rule<Iterator, Num()> oct;
qi::rule<Iterator, Num()> dec;
qi::rule<Iterator, Num()> bin;
struct _dval {
template <typename> struct result { typedef uint8_t type; };
template <typename T> uint8_t operator()(T ch) const {
if (ch >= '0' || ch <= '9') {
return ch - '0';
}
ch = std::tolower(ch);
if (ch >= 'a' || ch <= 'f') {
return ch - 'a' + 10;
}
assert(false);
}
};
boost::phoenix::function<_dval> dval;
};
template <typename Iterator, typename Num>
struct signed_parser : qi::grammar<Iterator, Num()> {
signed_parser() : signed_parser::base_type(start) {
using qi::eps;
using qi::_val;
using qi::_1;
using ascii::char_;
using phoenix::static_cast_;
unum = unsigned_parser<Iterator, Num>();
start = (char_('-') >> unum[_val=-_1])
| (-char_('+') >> unum[_val=_1]);
unum.name("unum");
start.name("snum");
debug(start);
/* debug(unum); */
}
qi::rule<Iterator, Num()> start;
qi::rule<Iterator, Num()> unum;
};
int main(int argv, const char *argc[]) {
using phoenix::ref;
using qi::eoi;
using qi::_1;
typedef std::string::const_iterator iter;
signed_parser<iter, int64_t> sp;
int64_t val;
if (argv != 2) {
std::cerr << "Usage: " << argc[0] << " <input>" << std::endl;
return 1;
}
std::string test(argc[1]);
iter i = test.begin();
iter end = test.end();
bool rv = phrase_parse(i, end, sp[ref(val)=_1] >> eoi, ascii::space);
if (rv) {
assert(i == end);
std::cout << "Succeeded: " << val << std::endl;
return 0;
}
std::cout << "Failed." << std::endl;
return 1;
}
对于 signed_parser,每次解析都会失败。此外,如果我取消注释掉的 debug(),程序会出现段错误。
我觉得我已经接近开始理解如何使用它了,所以任何帮助将不胜感激。
使用所有这些单独的规则会扼杀编译器优化解析的机会。
您不能引用临时 grammar/rule。你需要有语法实例:
template <typename Iterator, typename Num>
struct signed_parser : qi::grammar<Iterator, Num()> {
signed_parser() : signed_parser::base_type(snum) {
using namespace qi;
snum = lit('-') >> unum
| -lit('+') >> unum
;
BOOST_SPIRIT_DEBUG_NODES((snum))
}
private:
qi::rule<Iterator, Num()> snum;
unsigned_parser<Iterator, Num> unum;
};
这里有一些清理工作:
- 交换
argc
和argv
你会吗:) 使用 BOOST_SPIRIT_DEBUG* 宏
BOOST_SPIRIT_DEBUG_NODES((unum) (hex) (oct) (dec) (bin));
如果
lit()
或(更糟!)char_()
,请改用纯文字
更喜欢使用自动属性传播 (Boost Spirit: "Semantic actions are evil"?)。例如。规则可以简单得多:
snum = lit('-') >> unum | -lit('+') >> unum ;
使用
%=
在存在语义动作的情况下保持自动传播:snum %= lit('-') >> unum [ _val = -_1 ] | -lit('+') >> unum ;
phrase_parse
调用本身也是如此:您可以传递属性的绑定引用。不需要语义动作执行
tolower(ch)
可能会更慢(因为您知道它是 ASCII),可能不正确(如果您的编译器已签署char
,您将获得符号扩展)更新 你的
dval
演员中有一个相当可怕的错误。范围检查是错误的!这是我的固定版本:struct accum_f { template <typename...> struct result { typedef void type; }; void operator()(char ch, Num& accum, int base) const { accum *= base; if (ch >= '0' && ch <= '9') accum += ch - '0'; else if (ch >= 'a' && ch <= 'f') accum += ch - 'a' + 10; else if (ch >= 'A' && ch <= 'F') accum += ch - 'A' + 10; else assert(false); } }; boost::phoenix::function<accum_f> _accum;
请参阅下文了解语义操作的结果 changes/simplifications
可以使用前缀分支的建筑
int_parser
;这可能会(多)快警告:当您编写
unum
无语义动作时,重要的是您不要 "capture"'0'
与qi::char_
就像你一样。否则,您会想知道为什么任何前缀格式数字的结果总是48
。unum = ('0' >> ( (omit[ char_("xXhH") ] >> hex) | (omit[ char_("bByY") ] >> bin) | (omit[ char_("oOqQ") ] >> oct) | (omit[ char_("dDtT") ] >> dec)) ) | (hex >> omit[ char_("xXhH") ]) | (bin >> omit[ char_("bByY") ]) | (oct >> omit[ char_("oOqQ") ]) | (dec >> omit[ -char_("dDtT") ]);
使用
phrase_parse
和船长只要您使用的解析器表达式不使用船长(请参阅 Boost spirit skipper issues)
//#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
template <typename Iterator, typename Num>
struct unsigned_parser : qi::grammar<Iterator, Num()> {
unsigned_parser() : unsigned_parser::base_type(unum) {
using namespace qi;
bin = eps[_val=0] >> *(char_("01") [ _accum(_1, _val, 2 )] | '_');
oct = eps[_val=0] >> *(char_("0-7") [ _accum(_1, _val, 8 )] | '_');
dec = eps[_val=0] >> *(char_("0-9") [ _accum(_1, _val, 10)] | '_');
hex = eps[_val=0] >> *(char_("0-9a-fA-F") [ _accum(_1, _val, 16)] | '_');
unum = ('0' >>
( (omit[ char_("xXhH") ] >> hex)
| (omit[ char_("bByY") ] >> bin)
| (omit[ char_("oOqQ") ] >> oct)
| (omit[ char_("dDtT") ] >> dec))
)
| (hex >> omit[ char_("xXhH") ])
| (bin >> omit[ char_("bByY") ])
| (oct >> omit[ char_("oOqQ") ])
| (dec >> omit[ -char_("dDtT") ]);
BOOST_SPIRIT_DEBUG_NODES((unum) (hex) (oct) (dec) (bin));
}
private:
qi::rule<Iterator, Num()> unum, hex, oct, dec, bin;
struct accum_f {
template <typename...> struct result { typedef void type; };
void operator()(char ch, Num& accum, int base) const {
accum *= base;
if (ch >= '0' && ch <= '9') accum += ch - '0';
else if (ch >= 'a' && ch <= 'f') accum += ch - 'a' + 10;
else if (ch >= 'A' && ch <= 'F') accum += ch - 'A' + 10;
else assert(false);
}
};
boost::phoenix::function<accum_f> _accum;
};
template <typename Iterator, typename Num>
struct signed_parser : qi::grammar<Iterator, Num()> {
signed_parser() : signed_parser::base_type(snum) {
using namespace qi;
snum %= lit('-') >> unum [ _val = -_1 ]
| -lit('+') >> unum
;
BOOST_SPIRIT_DEBUG_NODES((snum))
}
private:
qi::rule<Iterator, Num()> snum;
unsigned_parser<Iterator, Num> unum;
};
int main(int argc, const char *argv[]) {
typedef std::string::const_iterator iter;
signed_parser<iter, int64_t> const sp;
for (std::string const& s : boost::make_iterator_range(argv+1, argv+argc))
{
std::cout << "\n-----------------------------\nParsing '" << s << "':\n";
int64_t val;
iter i = s.begin(), end = s.end();
bool rv = phrase_parse(i, end, sp >> qi::eoi, ascii::space, val);
if (rv) {
std::cout << "Succeeded: " << val << std::endl;
} else {
std::cout << "Failed." << std::endl;
}
if (i!=end) {
std::cout << "Remaining unparsed: '" << std::string(i,end) << "'\n";
}
}
}
输出:
-----------------------------
Parsing '-124_456d':
Succeeded: -124456
-----------------------------
Parsing '123_456D':
Succeeded: 123456
-----------------------------
Parsing '-123_456T':
Succeeded: -123456
-----------------------------
Parsing '123456t':
Succeeded: 123456
-----------------------------
Parsing '+1_bh':
Succeeded: 27
-----------------------------
Parsing '0_010Q':
Succeeded: 8
-----------------------------
Parsing '+1010_1010_0111_0111_b':
Succeeded: 43639
-----------------------------
Parsing '123_456':
Succeeded: 123456
-----------------------------
Parsing '-123456':
Succeeded: -123456
-----------------------------
Parsing '1_bh':
Succeeded: 27
-----------------------------
Parsing '-0_010Q':
Succeeded: -8
-----------------------------
Parsing '1010_1010_0111_0111_b':
Succeeded: 43639
-----------------------------
Parsing '+0d124_456':
Succeeded: 124456
-----------------------------
Parsing '0D123_456':
Succeeded: 123456
-----------------------------
Parsing '+0T123_456':
Succeeded: 123456
-----------------------------
Parsing '0t123456':
Succeeded: 123456
-----------------------------
Parsing '0h1_b':
Succeeded: 27
-----------------------------
Parsing '0Q0_010':
Succeeded: 8
-----------------------------
Parsing '0b1010_1010_0111_0111_':
Succeeded: 43639
-----------------------------
Parsing '06123_45':
Succeeded: 612345
-----------------------------
Parsing '0612345':
Succeeded: 612345
-----------------------------
Parsing '0h1_b':
Succeeded: 27
-----------------------------
Parsing '-0Q0_010':
Succeeded: -8
-----------------------------
Parsing '0b1010_1010_0111_0111_':
Succeeded: 43639