Boost Spirit (x3) 在解析字符转义时未能消耗最后一个标记
Boost Spirit (x3) failing to consume last token when parsing character escapes
使用 boost spirit x3 解析转义的 ascii 字符串我遇到了 但我遇到了预期异常。我已经 将原来的 expectation 运算符更改为序列运算符以禁用下面代码中的异常。 运行 代码解析输入并为属性分配正确的值,但 returns false 并且不使用输入。知道我在这里做错了什么吗?
gcc 版本 10.3.0
提升 1.71
std = c++17
#include <boost/spirit/home/x3.hpp>
#include <string>
#include <iostream>
namespace x3 = boost::spirit::x3;
using namespace std::string_literals;
//changed expectation to sequence
auto const qstring = x3::lexeme['"' >> *(
"\n" >> x3::attr('\n')
| "\b" >> x3::attr('\b')
| "\f" >> x3::attr('\f')
| "\t" >> x3::attr('\t')
| "\v" >> x3::attr('\v')
| "\0" >> x3::attr('[=10=]')
| "\r" >> x3::attr('\r')
| "\n" >> x3::attr('\n')
| "\" >> x3::char_("\"\")
| "\\"" >> x3::char_('"')
| ~x3::char_('"')
) >> '"'];
int main(int, char**){
auto const quoted = "\"Hel\\"lo Wor\\"ld"s;
auto const expected = "Hel\"lo Wor\"ld"s;
std::string result;
auto first = quoted.begin();
auto const last = quoted.end();
bool ok = x3::phrase_parse(first, last, qstring, x3::ascii::space, result);
std::cout << "parse returned " << std::boolalpha << ok << '\n';
std::cout << result << " == " << expected << " is " << std::boolalpha << (result == expected) << '\n';
std::cout << "first == last = " << (first == last) << '\n';
std::cout << "first = " << *first << '\n';
return 0;
}
您的输入未以引号字符结尾。将其写为原始字符串文字有助于:
std::string const qinput = R"("Hel\"lo Wor\"ld)";
应该是
std::string const qinput = R"("Hel\"lo Wor\"ld")";
现在,剩下的就是常见的容器处理:在 Spirit 中,当规则失败时(也就是当它只是回溯一个分支时),容器属性不会回滚。参见例如boost::spirit::qi duplicate parsing on the output, Understanding Boost.spirit's string parser,等等
基本上,如果解析失败,您不能依赖结果。这可能就是为什么原来有一个期望点:引发异常。
正确工作的完整演示:
#include <boost/spirit/home/x3.hpp>
#include <string>
#include <iostream>
#include <iomanip>
namespace x3 = boost::spirit::x3;
auto escapes = []{
x3::symbols<char> sym;
sym.add
("\b", '\b')
("\f", '\f')
("\t", '\t')
("\v", '\v')
("\0", '[=12=]')
("\r", '\r')
("\n", '\n')
("\\", '\')
("\\"", '"')
;
return sym;
}();
auto const qstring = x3::lexeme['"' >> *(escapes | ~x3::char_('"')) >> '"'];
int main(){
auto squote = [](std::string_view s) { return std::quoted(s, '\''); };
std::string const expected = R"(Hel"lo Wor"ld)";
for (std::string const qinput : {
R"("Hel\"lo Wor\"ld)", // oops no closing quote
R"("Hel\"lo Wor\"ld")",
"\"Hel\\"lo Wor\\"ld\"", // if you insist
R"("Hel\"lo Wor\"ld" trailing data)",
})
{
std::cout << "\n -- input " << squote(qinput) << "\n";
std::string result;
auto first = cbegin(qinput);
auto last = cend(qinput);
bool ok = x3::phrase_parse(first, last, qstring, x3::space, result);
ok &= (first == last);
std::cout << "parse returned " << std::boolalpha << ok << "\n";
std::cout << squote(result) << " == " << squote(expected) << " is "
<< (result == expected) << "\n";
if (first != last)
std::cout << "Remaining input unparsed: " << squote({first, last})
<< "\n";
}
}
版画
-- input '"Hel\"lo Wor\"ld'
parse returned false
'Hel"lo Wor"ld' == 'Hel"lo Wor"ld' is true
Remaining input unparsed: '"Hel\"lo Wor\"ld'
-- input '"Hel\"lo Wor\"ld"'
parse returned true
'Hel"lo Wor"ld' == 'Hel"lo Wor"ld' is true
-- input '"Hel\"lo Wor\"ld"'
parse returned true
'Hel"lo Wor"ld' == 'Hel"lo Wor"ld' is true
-- input '"Hel\"lo Wor\"ld" trailing data'
parse returned false
'Hel"lo Wor"ld' == 'Hel"lo Wor"ld' is true
Remaining input unparsed: 'trailing data'
使用 boost spirit x3 解析转义的 ascii 字符串我遇到了
gcc 版本 10.3.0
提升 1.71
std = c++17
#include <boost/spirit/home/x3.hpp>
#include <string>
#include <iostream>
namespace x3 = boost::spirit::x3;
using namespace std::string_literals;
//changed expectation to sequence
auto const qstring = x3::lexeme['"' >> *(
"\n" >> x3::attr('\n')
| "\b" >> x3::attr('\b')
| "\f" >> x3::attr('\f')
| "\t" >> x3::attr('\t')
| "\v" >> x3::attr('\v')
| "\0" >> x3::attr('[=10=]')
| "\r" >> x3::attr('\r')
| "\n" >> x3::attr('\n')
| "\" >> x3::char_("\"\")
| "\\"" >> x3::char_('"')
| ~x3::char_('"')
) >> '"'];
int main(int, char**){
auto const quoted = "\"Hel\\"lo Wor\\"ld"s;
auto const expected = "Hel\"lo Wor\"ld"s;
std::string result;
auto first = quoted.begin();
auto const last = quoted.end();
bool ok = x3::phrase_parse(first, last, qstring, x3::ascii::space, result);
std::cout << "parse returned " << std::boolalpha << ok << '\n';
std::cout << result << " == " << expected << " is " << std::boolalpha << (result == expected) << '\n';
std::cout << "first == last = " << (first == last) << '\n';
std::cout << "first = " << *first << '\n';
return 0;
}
您的输入未以引号字符结尾。将其写为原始字符串文字有助于:
std::string const qinput = R"("Hel\"lo Wor\"ld)";
应该是
std::string const qinput = R"("Hel\"lo Wor\"ld")";
现在,剩下的就是常见的容器处理:在 Spirit 中,当规则失败时(也就是当它只是回溯一个分支时),容器属性不会回滚。参见例如boost::spirit::qi duplicate parsing on the output, Understanding Boost.spirit's string parser,等等
基本上,如果解析失败,您不能依赖结果。这可能就是为什么原来有一个期望点:引发异常。
正确工作的完整演示:
#include <boost/spirit/home/x3.hpp>
#include <string>
#include <iostream>
#include <iomanip>
namespace x3 = boost::spirit::x3;
auto escapes = []{
x3::symbols<char> sym;
sym.add
("\b", '\b')
("\f", '\f')
("\t", '\t')
("\v", '\v')
("\0", '[=12=]')
("\r", '\r')
("\n", '\n')
("\\", '\')
("\\"", '"')
;
return sym;
}();
auto const qstring = x3::lexeme['"' >> *(escapes | ~x3::char_('"')) >> '"'];
int main(){
auto squote = [](std::string_view s) { return std::quoted(s, '\''); };
std::string const expected = R"(Hel"lo Wor"ld)";
for (std::string const qinput : {
R"("Hel\"lo Wor\"ld)", // oops no closing quote
R"("Hel\"lo Wor\"ld")",
"\"Hel\\"lo Wor\\"ld\"", // if you insist
R"("Hel\"lo Wor\"ld" trailing data)",
})
{
std::cout << "\n -- input " << squote(qinput) << "\n";
std::string result;
auto first = cbegin(qinput);
auto last = cend(qinput);
bool ok = x3::phrase_parse(first, last, qstring, x3::space, result);
ok &= (first == last);
std::cout << "parse returned " << std::boolalpha << ok << "\n";
std::cout << squote(result) << " == " << squote(expected) << " is "
<< (result == expected) << "\n";
if (first != last)
std::cout << "Remaining input unparsed: " << squote({first, last})
<< "\n";
}
}
版画
-- input '"Hel\"lo Wor\"ld'
parse returned false
'Hel"lo Wor"ld' == 'Hel"lo Wor"ld' is true
Remaining input unparsed: '"Hel\"lo Wor\"ld'
-- input '"Hel\"lo Wor\"ld"'
parse returned true
'Hel"lo Wor"ld' == 'Hel"lo Wor"ld' is true
-- input '"Hel\"lo Wor\"ld"'
parse returned true
'Hel"lo Wor"ld' == 'Hel"lo Wor"ld' is true
-- input '"Hel\"lo Wor\"ld" trailing data'
parse returned false
'Hel"lo Wor"ld' == 'Hel"lo Wor"ld' is true
Remaining input unparsed: 'trailing data'