boost spirit 解析引用字符串失败
boost spirit parsing quote string fails
这是我的语法
unesc_char.add(L"\a", L'\a')(L"\b", L'\b')(L"\f", L'\f')(L"\n", L'\n')
(L"\r", L'\r')(L"\t", L'\t')(L"\v", L'\v')(L"\\", L'\')
(L"\\'", L'\'')(L"\\"", L'\"');
unesc_str = '\"' >> *((boost::spirit::standard_wide::char_ - '\"') | unesc_char) >> '\"';
与
qi::rule<Iterator, std::wstring()> unesc_str;
qi::symbols<wchar_t const, wchar_t const> unesc_char;
解析失败:"Hello\"" -> 应该 return 你好"
正确解析:"Hello\" -> 应该 return Hello\
将规则更改为
unesc_str = '\"' >> *(unesc_char | (boost::spirit::standard_wide::char_ - '\"')) >> '\"';
解析正确:"Hello\"" -> 应该 return 你好"
解析失败:"Hello\" -> 应该 return Hello\
如何同时获得两者 运行?
PEG 语法解析 left-to-right,因此您需要在前面加上 unesc_char
,以处理转义。
此外,我认为您可能对输入转义的级别感到困惑:
#include <boost/spirit/include/qi.hpp>
namespace qi = boost::spirit::qi;
template <typename It>
struct Parser : qi::grammar<It, std::wstring()> {
Parser() : Parser::base_type(unesc_str) {
unesc_char.add
(L"\a", L'\a')
(L"\b", L'\b')
(L"\f", L'\f')
(L"\n", L'\n')
(L"\r", L'\r')
(L"\t", L'\t')
(L"\v", L'\v')
(L"\\", L'\')
(L"\'", L'\'')
(L"\\"", L'\"');
unesc_str = L'"' >> *(unesc_char | ~qi::standard_wide::char_(L'"')) >> L'"';
}
private:
qi::rule<It, std::wstring()> unesc_str;
qi::symbols<wchar_t const, wchar_t const> unesc_char;
};
int main() {
using It = std::wstring::const_iterator;
Parser<It> const p {};
for (std::wstring const input : {
L"\"abaca\tdabra\"",
LR"("Hello\"")", L"\"Hello\\"\"", // equivalent
LR"("Hello\")", L"\"Hello\\\"",
}) {
It f = input.begin(), l = input.end();
std::wstring s;
if (parse(f, l, p, s)) {
std::wcout << L"Unescape: " << input << L" -> " << s << L"\n";
}
if (f != l)
std::wcout << "Remaining input: '" << std::wstring(f,l) << "'\n";
}
}
版画
Unescape: "abaca\tdabra" -> abaca dabra
Unescape: "Hello\"" -> Hello"
Unescape: "Hello\"" -> Hello"
Unescape: "Hello\" -> Hello\
Unescape: "Hello\" -> Hello\
奖金
如果没有 symbols
,我可能会很简单。除非您需要动态的转义列表,否则这更灵活并且可能更有效:
namespace enc = qi::standard_wide;
unesc_str = '"' >> *(
'\' >> (
'a' >> qi::attr('\a')
| 'b' >> qi::attr('\b')
| 'f' >> qi::attr('\f')
| 'n' >> qi::attr('\n')
| 'r' >> qi::attr('\r')
| 't' >> qi::attr('\t')
| 'v' >> qi::attr('\v')
| enc::char_
) | ~enc::char_('"')) >> '"';
这是我的语法
unesc_char.add(L"\a", L'\a')(L"\b", L'\b')(L"\f", L'\f')(L"\n", L'\n')
(L"\r", L'\r')(L"\t", L'\t')(L"\v", L'\v')(L"\\", L'\')
(L"\\'", L'\'')(L"\\"", L'\"');
unesc_str = '\"' >> *((boost::spirit::standard_wide::char_ - '\"') | unesc_char) >> '\"';
与
qi::rule<Iterator, std::wstring()> unesc_str;
qi::symbols<wchar_t const, wchar_t const> unesc_char;
解析失败:"Hello\"" -> 应该 return 你好"
正确解析:"Hello\" -> 应该 return Hello\
将规则更改为
unesc_str = '\"' >> *(unesc_char | (boost::spirit::standard_wide::char_ - '\"')) >> '\"';
解析正确:"Hello\"" -> 应该 return 你好"
解析失败:"Hello\" -> 应该 return Hello\
如何同时获得两者 运行?
PEG 语法解析 left-to-right,因此您需要在前面加上 unesc_char
,以处理转义。
此外,我认为您可能对输入转义的级别感到困惑:
#include <boost/spirit/include/qi.hpp>
namespace qi = boost::spirit::qi;
template <typename It>
struct Parser : qi::grammar<It, std::wstring()> {
Parser() : Parser::base_type(unesc_str) {
unesc_char.add
(L"\a", L'\a')
(L"\b", L'\b')
(L"\f", L'\f')
(L"\n", L'\n')
(L"\r", L'\r')
(L"\t", L'\t')
(L"\v", L'\v')
(L"\\", L'\')
(L"\'", L'\'')
(L"\\"", L'\"');
unesc_str = L'"' >> *(unesc_char | ~qi::standard_wide::char_(L'"')) >> L'"';
}
private:
qi::rule<It, std::wstring()> unesc_str;
qi::symbols<wchar_t const, wchar_t const> unesc_char;
};
int main() {
using It = std::wstring::const_iterator;
Parser<It> const p {};
for (std::wstring const input : {
L"\"abaca\tdabra\"",
LR"("Hello\"")", L"\"Hello\\"\"", // equivalent
LR"("Hello\")", L"\"Hello\\\"",
}) {
It f = input.begin(), l = input.end();
std::wstring s;
if (parse(f, l, p, s)) {
std::wcout << L"Unescape: " << input << L" -> " << s << L"\n";
}
if (f != l)
std::wcout << "Remaining input: '" << std::wstring(f,l) << "'\n";
}
}
版画
Unescape: "abaca\tdabra" -> abaca dabra
Unescape: "Hello\"" -> Hello"
Unescape: "Hello\"" -> Hello"
Unescape: "Hello\" -> Hello\
Unescape: "Hello\" -> Hello\
奖金
如果没有 symbols
,我可能会很简单。除非您需要动态的转义列表,否则这更灵活并且可能更有效:
namespace enc = qi::standard_wide;
unesc_str = '"' >> *(
'\' >> (
'a' >> qi::attr('\a')
| 'b' >> qi::attr('\b')
| 'f' >> qi::attr('\f')
| 'n' >> qi::attr('\n')
| 'r' >> qi::attr('\r')
| 't' >> qi::attr('\t')
| 'v' >> qi::attr('\v')
| enc::char_
) | ~enc::char_('"')) >> '"';