如何绕过贪婪的路?
How to get around greedy rd?
我想解析一个可以包含“-”但既不以它开头也不以它结尾的字符串。
我希望这个解析器可以工作:
auto const parser = alnum >> -(*(alnum | char_('-')) >> alnum);
但在我的测试输入 "something" 中它只解析 "so" 并且不吃掉其余的。
问题是中间位 *(alnum | char_('-'))
一直吃到最后(包括最后一个字符,所以整个可选括号失败)。
我想知道的是,我怎样才能绕过它并制作这个解析器?
我通过告诉贪婪的 kleene 星内的解析器忽略 'eoi'(输入结束)来修复它。一个更强大的修复也会让它因空格而失败:
所以 *(alnum | char_('-'))
变成 *((alnum | char_('-')) >> !(eoi | space))
我会亲自写"positively":
auto const rule = raw [ lexeme [
alnum >> *('-' >> alnum | alnum) >> !(alnum|'-')
] ];
这使用
lexeme
处理空白意义,
raw
以避免必须主动匹配您想要作为输出一部分的每个字符(您只需要所有字符)。
'-' >> alnum
positively 要求任何破折号后跟一个字母。请注意,这也禁止输入中的 "--"
。请参阅下面的 VARIANT
#include <boost/spirit/home/x3.hpp>
#include <iostream>
#include <string>
#include <algorithm>
namespace x3 = boost::spirit::x3;
namespace parser {
using namespace boost::spirit::x3;
auto const rule = raw [ lexeme [
alnum >> *('-' >> alnum | alnum) >> !(alnum|'-')
] ];
}
int main() {
struct test { std::string input; bool expected; };
for (auto const t : {
test { "some-where", true },
test { " some-where", true },
test { "some-where ", true },
test { "s", true },
test { " s", true },
test { "s ", true },
test { "-", false },
test { " -", false },
test { "- ", false },
test { "some-", false },
test { " some-", false },
test { "some- ", false },
test { "some--where", false },
test { " some--where", false },
test { "some--where ", false },
})
{
std::string output;
bool ok = x3::phrase_parse(t.input.begin(), t.input.end(), parser::rule, x3::space, output);
if (ok != t.expected)
std::cout << "FAILURE: '" << t.input << "'\t" << std::boolalpha << ok << "\t'" << output << "'\n";
}
}
变体
为了也允许 some--thing
和类似的输入,我将 '-'
更改为 +lit('-')
:
alnum >> *(+lit('-') >> alnum | alnum) >> !(alnum|'-')
#include <boost/spirit/home/x3.hpp>
#include <iostream>
#include <string>
#include <algorithm>
namespace x3 = boost::spirit::x3;
namespace parser {
using namespace boost::spirit::x3;
auto const rule = raw [ lexeme [
alnum >> *(+lit('-') >> alnum | alnum) >> !(alnum|'-')
] ];
}
int main() {
struct test { std::string input; bool expected; };
for (auto const t : {
test { "some-where", true },
test { " some-where", true },
test { "some-where ", true },
test { "s", true },
test { " s", true },
test { "s ", true },
test { "-", false },
test { " -", false },
test { "- ", false },
test { "some-", false },
test { " some-", false },
test { "some- ", false },
test { "some--where", true },
test { " some--where", true },
test { "some--where ", true },
})
{
std::string output;
bool ok = x3::phrase_parse(t.input.begin(), t.input.end(), parser::rule, x3::space, output);
if (ok != t.expected)
std::cout << "FAILURE: '" << t.input << "'\t" << std::boolalpha << ok << "\t'" << output << "'\n";
}
}
我想解析一个可以包含“-”但既不以它开头也不以它结尾的字符串。
我希望这个解析器可以工作:
auto const parser = alnum >> -(*(alnum | char_('-')) >> alnum);
但在我的测试输入 "something" 中它只解析 "so" 并且不吃掉其余的。
问题是中间位 *(alnum | char_('-'))
一直吃到最后(包括最后一个字符,所以整个可选括号失败)。
我想知道的是,我怎样才能绕过它并制作这个解析器?
我通过告诉贪婪的 kleene 星内的解析器忽略 'eoi'(输入结束)来修复它。一个更强大的修复也会让它因空格而失败:
所以 *(alnum | char_('-'))
变成 *((alnum | char_('-')) >> !(eoi | space))
我会亲自写"positively":
auto const rule = raw [ lexeme [
alnum >> *('-' >> alnum | alnum) >> !(alnum|'-')
] ];
这使用
lexeme
处理空白意义,raw
以避免必须主动匹配您想要作为输出一部分的每个字符(您只需要所有字符)。'-' >> alnum
positively 要求任何破折号后跟一个字母。请注意,这也禁止输入中的"--"
。请参阅下面的 VARIANT
#include <boost/spirit/home/x3.hpp>
#include <iostream>
#include <string>
#include <algorithm>
namespace x3 = boost::spirit::x3;
namespace parser {
using namespace boost::spirit::x3;
auto const rule = raw [ lexeme [
alnum >> *('-' >> alnum | alnum) >> !(alnum|'-')
] ];
}
int main() {
struct test { std::string input; bool expected; };
for (auto const t : {
test { "some-where", true },
test { " some-where", true },
test { "some-where ", true },
test { "s", true },
test { " s", true },
test { "s ", true },
test { "-", false },
test { " -", false },
test { "- ", false },
test { "some-", false },
test { " some-", false },
test { "some- ", false },
test { "some--where", false },
test { " some--where", false },
test { "some--where ", false },
})
{
std::string output;
bool ok = x3::phrase_parse(t.input.begin(), t.input.end(), parser::rule, x3::space, output);
if (ok != t.expected)
std::cout << "FAILURE: '" << t.input << "'\t" << std::boolalpha << ok << "\t'" << output << "'\n";
}
}
变体
为了也允许 some--thing
和类似的输入,我将 '-'
更改为 +lit('-')
:
alnum >> *(+lit('-') >> alnum | alnum) >> !(alnum|'-')
#include <boost/spirit/home/x3.hpp>
#include <iostream>
#include <string>
#include <algorithm>
namespace x3 = boost::spirit::x3;
namespace parser {
using namespace boost::spirit::x3;
auto const rule = raw [ lexeme [
alnum >> *(+lit('-') >> alnum | alnum) >> !(alnum|'-')
] ];
}
int main() {
struct test { std::string input; bool expected; };
for (auto const t : {
test { "some-where", true },
test { " some-where", true },
test { "some-where ", true },
test { "s", true },
test { " s", true },
test { "s ", true },
test { "-", false },
test { " -", false },
test { "- ", false },
test { "some-", false },
test { " some-", false },
test { "some- ", false },
test { "some--where", true },
test { " some--where", true },
test { "some--where ", true },
})
{
std::string output;
bool ok = x3::phrase_parse(t.input.begin(), t.input.end(), parser::rule, x3::space, output);
if (ok != t.expected)
std::cout << "FAILURE: '" << t.input << "'\t" << std::boolalpha << ok << "\t'" << output << "'\n";
}
}