更新解析器以在带引号的字符串中接受括号
Update a parser to admit parentheses within quoted strings
我需要更新解析器以接受这些新功能,但我无法一次管理所有这些功能:
- 命令必须接受不确定数量的参数 (> 0)。
- 参数可以是数字、不带引号的字符串或带引号的字符串。
- 参数之间用逗号分隔。
- 在带引号的字符串中,允许使用opening/closing括号。
(看源代码示例更容易理解这些要求)
我目前的代码,包括检查,如下:
神马 link: https://godbolt.org/z/5d6o53n9h
#include <boost/fusion/adapted/struct/adapt_struct.hpp>
#include <boost/spirit/include/qi.hpp>
namespace script
{
struct Command
{
enum Type { NONE, WRITE_LOG, INSERT_LABEL, START_PROCESS, END_PROCESS, COMMENT, FAIL };
Type type{ Type::NONE };
std::vector<std::string> args;
};
using Commands = std::vector<Command>;
}//namespace script
BOOST_FUSION_ADAPT_STRUCT(script::Command, type, args)
namespace script
{
namespace qi = boost::spirit::qi;
template <typename It>
class Parser : public qi::grammar<It, Commands()>
{
private:
qi::symbols<char, Command::Type> type;
qi::rule<It, Command(), qi::blank_type> none, command, comment, fail;//By its very nature "fail" must be the last one to be checked
qi::rule<It, Commands()> start;
public:
Parser() : Parser::base_type(start)
{
using namespace qi;//NOTE: "as_string" is neccessary in all args due to std::vector<std::string>
auto empty_args = copy(attr(std::vector<std::string>{}));
type.add
("WriteLog", Command::WRITE_LOG)
("InsertLabel", Command::INSERT_LABEL)
("StartProcess", Command::START_PROCESS)
("EndProcess", Command::END_PROCESS);
none = omit[*blank] >> &(eol | eoi)
>> attr(Command::NONE)
>> empty_args;//ignore args
command = type >> '('
>> as_string[lexeme[+~char_("(),\r\n")]] % ',' >> ')';
comment = lit("//")
>> attr(Command::COMMENT)
>> as_string[lexeme[*~char_("\r\n")]];
fail = omit[*~char_("\r\n")]
>> attr(Command::FAIL)
>> empty_args;//ignore args
start = skip(blank)[(none | command | comment | fail) % eol] >> eoi;
}
};
Commands parse(std::istream& in)
{
using It = boost::spirit::istream_iterator;
static const Parser<It> parser;
Commands commands;
It first(in >> std::noskipws), last;//No white space skipping
if (!qi::parse(first, last, parser, commands))
throw std::runtime_error("command parse error");
return commands;
}
}//namespace script
std::stringstream ss{
R"(// just a comment
WriteLog("this is a log")
WriteLog("this is also (in another way) a log")
WriteLog("but this is just a fail)
StartProcess(17, "program.exe", True)
StartProcess(17, "this_is_a_fail.exe, True)
)"};
int main()
{
using namespace script;
try
{
auto commands = script::parse(ss);
std::array args{ 0, 0, 1, 1, -1, 0, 3, -1, 0 };//Fails may have any number of arguments. It doesn't care. Sets as -1 by convenience flag
std::array types{ Command::COMMENT, Command::NONE, Command::WRITE_LOG, Command::WRITE_LOG, Command::FAIL, Command::NONE, Command::START_PROCESS, Command::FAIL, Command::NONE };
std::cout << std::boolalpha << "size correct? " << (commands.size() == 9) << std::endl;
std::cout << "types correct? " << std::equal(commands.begin(), commands.end(), types.begin(), types.end(), [](auto& cmd, auto& type) { return cmd.type == type; }) << std::endl;
std::cout << "arguments correct? " << std::equal(commands.begin(), commands.end(), args.begin(), args.end(), [](auto& cmd, auto arg) { return cmd.args.size() == arg || arg == -1; }) << std::endl;
}
catch (std::exception const& e)
{
std::cout << e.what() << "\n";
}
}
如有任何帮助,我们将不胜感激。
你说你想在引用的字符串中使用括号。但是你甚至不支持带引号的字符串!
所以问题出在你的参数规则上。这甚至不存在。大概是这部分:
argument = +~char_("(),\r\n");
command = type >> '(' >> argument % ',' >> ')';
其中 argument
可能声明为
qi::rule<It, Argument()> argument;
事实上,以有组织的方式重写测试,这就是我们现在得到的:
static const Commands expected{
{Command::COMMENT, {"just a comment"}},
{Command::NONE, {}},
{Command::WRITE_LOG, {"this is a log"}},
{Command::WRITE_LOG, {"this is also (in another way) a log"}},
{Command::FAIL, {}},
{Command::NONE, {}},
{Command::START_PROCESS, {"17", "program.exe", "True"}},
{Command::FAIL, {}},
{Command::NONE, {}},
};
try {
auto parsed = script::parse(ss);
fmt::print("Parsed all correct? {} -- {} parsed (vs. {} expected)\n",
(parsed == expected), parsed.size(), expected.size());
for (auto i = 0u; i < std::min(expected.size(), parsed.size()); ++i) {
if (expected[i] != parsed[i]) {
fmt::print("index #{} expected {}\n"
" actual: {}\n",
i, expected[i], parsed[i]);
} else {
fmt::print("index #{} CORRECT ({})\n", i, parsed[i]);
}
}
} catch (std::exception const& e) {
fmt::print("Exception: {}\n", e.what());
}
版画
Parsed all correct? false -- 9 parsed (vs. 9 expected)
index #0 CORRECT (Command(COMMENT, ["just a comment"]))
index #1 CORRECT (Command(NONE, []))
index #2 expected Command(WRITE_LOG, ["this is a log"])
actual: Command(WRITE_LOG, ["\"this is a log\""])
index #3 expected Command(WRITE_LOG, ["this is also (in another way) a log"])
actual: Command(FAIL, [])
index #4 expected Command(FAIL, [])
actual: Command(WRITE_LOG, ["\"but this is just a fail"])
index #5 CORRECT (Command(NONE, []))
index #6 expected Command(START_PROCESS, ["17", "program.exe", "True"])
actual: Command(START_PROCESS, ["17", "\"program.exe\"", "True"])
index #7 expected Command(FAIL, [])
actual: Command(START_PROCESS, ["17", "\"this_is_a_fail.exe", "True"])
index #8 CORRECT (Command(NONE, []))
如您所见,在我的预期中,它也无法使用引号引起来的字符串。那是因为引用是一种语言结构。在 AST(解析结果)中,您不关心它是如何用代码编写的。例如。 "hello\ world1"
也可能等价于 "hello world!"
所以两者都应该产生参数值 hello world!
.
所以,让我们按照我们说的去做:
argument = quoted_string | number | boolean | raw_string;
我们可以添加一些规则:
// notice these are lexemes (no internal skipping):
qi::rule<It, Argument()> argument, quoted_string, number, boolean, raw_string;
并定义它们:
quoted_string = '"' >> *~char_('"') >> '"';
number = raw[double_];
boolean = raw[bool_];
raw_string = +~char_("(),\r\n");
argument = quoted_string | number | boolean | raw_string;
(If you want to allow escaped quotes, something like this:
quoted_string = '"' >> *('\' >> char_ | ~char_('"')) >> '"';
现在,我想说您可能希望 Argument
类似于 variant<double, std::string, bool>
,而不仅仅是 std::string
。
仅此更改,所有问题几乎都消失了:Live On Compiler Explorer:
Parsed all correct? false -- 9 parsed (vs. 9 expected)
index #0 CORRECT (Command(COMMENT, ["just a comment"]))
index #1 CORRECT (Command(NONE, []))
index #2 CORRECT (Command(WRITE_LOG, ["this is a log"]))
index #3 CORRECT (Command(WRITE_LOG, ["this is also (in another way) a log"]))
index #4 CORRECT (Command(FAIL, []))
index #5 CORRECT (Command(NONE, []))
index #6 CORRECT (Command(START_PROCESS, ["17", "program.exe", "True"]))
index #7 expected Command(FAIL, [])
actual: Command(START_PROCESS, ["17", "this_is_a_fail.exe, True)\n\"this_is_a_fail.exe", "True"])
index #8 CORRECT (Command(NONE, []))
现在,索引 #7 看起来 非常时髦,但它实际上是 Spirit 中的一个 well-known 现象¹。 Enabling BOOST_SPIRIT_DEBUG 演示:
<argument>
<try>"this_is_a_fail.exe,</try>
<quoted_string>
<try>"this_is_a_fail.exe,</try>
<fail/>
</quoted_string>
<number>
<try>"this_is_a_fail.exe,</try>
<fail/>
</number>
<boolean>
<try>"this_is_a_fail.exe,</try>
<fail/>
</boolean>
<raw_string>
<try>"this_is_a_fail.exe,</try>
<success>, True)</success>
<attributes>[[t, h, i, s, _, i, s, _, a, _, f, a, i, l, ., e, x, e, ,, , T, r, u, e, ), ", t, h, i, s, _, i, s, _, a, _, f, a, i, l, ., e, x, e]]</attributes>
</raw_string>
<success>, True)</success>
<attributes>[[t, h, i, s, _, i, s, _, a, _, f, a, i, l, ., e, x, e, ,, , T, r, u, e, ), ", t, h, i, s, _, i, s, _, a, _, f, a, i, l, ., e, x, e]]</attributes>
</argument>
因此,该字符串被接受为原始字符串,即使它以 "
开头。这很容易修复,但我们甚至不需要。我们可以只应用 qi::hold
来避免重复:
argument = qi::hold[quoted_string] | number | boolean | raw_string;
结果:
actual: Command(START_PROCESS, ["17", "\"this_is_a_fail.exe", "True"])
但是,如果您预计它会失败,请解决其他问题:
raw_string = +~char_("\"(),\r\n"); // note the \"
Note: In the off-chance you really only require it to not start with
a quote:
raw_string = !lit('"') >> +~char_("(),\r\n");
I guess by now you see the problem with a "loose rule" like that, so I
don't recommend it.
You could express the requirement another way though, saying "if an
argument starts with '"'
then is MUST be a quoted_string
. Use
an expectation point there:
quoted_string = '"' > *('\' >> char_ | ~char_('"')) > '"';
This has the effect that failure to parse a complete quoted_string
will throw an expectation_failed
exception.
总结/清单
这就是我们最终得到的:
//#define BOOST_SPIRIT_DEBUG
#include <boost/fusion/adapted/struct/adapt_struct.hpp>
#include <boost/spirit/include/qi.hpp>
#include <fmt/ranges.h>
namespace script {
using Argument = std::string;
using Arguments = std::vector<Argument>;
struct Command {
enum Type {
NONE,
WRITE_LOG,
INSERT_LABEL,
START_PROCESS,
END_PROCESS,
COMMENT,
FAIL
};
Type type{Type::NONE};
Arguments args;
auto operator<=>(Command const&) const = default;
};
using Commands = std::vector<Command>;
} // namespace script
BOOST_FUSION_ADAPT_STRUCT(script::Command, type, args)
namespace script {
namespace qi = boost::spirit::qi;
template <typename It> class Parser : public qi::grammar<It, Commands()> {
public:
Parser() : Parser::base_type(start) {
using namespace qi; // NOTE: "as_string" is neccessary in all args
auto empty_args = copy(attr(Arguments{}));
type.add //
("WriteLog", Command::WRITE_LOG) //
("InsertLabel", Command::INSERT_LABEL) //
("StartProcess", Command::START_PROCESS) //
("EndProcess", Command::END_PROCESS); //
none = omit[*blank] >> &(eol | eoi) //
>> attr(Command{Command::NONE, {}});
quoted_string = '"' >> *('\' >> char_ | ~char_('"')) >> '"';
number = raw[double_];
boolean = raw[bool_];
raw_string = +~char_("\"(),\r\n");
argument = qi::hold[quoted_string] | number | boolean | raw_string;
command = type >> '(' >> argument % ',' >> ')';
comment = "//" //
>> attr(Command::COMMENT) //
>> as_string[lexeme[*~char_("\r\n")]]; //
fail = omit[*~char_("\r\n")] >> attr(Command{Command::FAIL, {}});
line = none | command | comment | fail; // keep fail last
start = skip(blank)[line % eol] >> eoi;
BOOST_SPIRIT_DEBUG_NODES((start)(line)(fail)(comment)(command)(
argument)(none)(quoted_string)(raw_string)(boolean)(number))
}
private:
qi::symbols<char, Command::Type> type;
qi::rule<It, Command(), qi::blank_type> line, none, command, comment, fail;
// notice these are lexemes (no internal skipping):
qi::rule<It, Argument()> argument, quoted_string, number, boolean, raw_string;
qi::rule<It, Commands()> start;
};
Commands parse(std::istream& in)
{
using It = boost::spirit::istream_iterator;
static const Parser<It> parser;
Commands commands;
return qi::parse(It{in >> std::noskipws}, {}, parser, commands)
? commands
: throw std::runtime_error("command parse error");
}
struct Formatter {
static constexpr auto name(script::Command::Type type) {
return std::array{"NONE", "WRITE_LOG", "INSERT_LABEL",
"START_PROCESS", "END_PROCESS", "COMMENT",
"FAIL"}
.at(static_cast<int>(type));
}
auto parse(auto& ctx) const { return ctx.begin(); }
auto format(script::Command const& cmd, auto& ctx) const {
return format_to(ctx.out(), "Command({}, {})", name(cmd.type), cmd.args);
}
};
} // namespace script
template <> struct fmt::formatter<script::Command> : script::Formatter {};
std::stringstream ss{
R"(// just a comment
WriteLog("this is a log")
WriteLog("this is also (in another way) a log")
WriteLog("but this is just a fail)
StartProcess(17, "program.exe", True)
StartProcess(17, "this_is_a_fail.exe, True)
)"};
int main() {
using namespace script;
static const Commands expected{
{Command::COMMENT, {"just a comment"}},
{Command::NONE, {}},
{Command::WRITE_LOG, {"this is a log"}},
{Command::WRITE_LOG, {"this is also (in another way) a log"}},
{Command::FAIL, {}},
{Command::NONE, {}},
{Command::START_PROCESS, {"17", "program.exe", "True"}},
{Command::FAIL, {}},
{Command::NONE, {}},
};
try {
auto parsed = script::parse(ss);
fmt::print("Parsed all correct? {} -- {} parsed (vs. {} expected)\n",
(parsed == expected), parsed.size(), expected.size());
for (auto i = 0u; i < std::min(expected.size(), parsed.size()); ++i) {
if (expected[i] != parsed[i]) {
fmt::print("index #{} expected {}\n"
" actual: {}\n",
i, expected[i], parsed[i]);
} else {
fmt::print("index #{} CORRECT ({})\n", i, parsed[i]);
}
}
} catch (std::exception const& e) {
fmt::print("Exception: {}\n", e.what());
}
}
版画
Parsed all correct? true -- 9 parsed (vs. 9 expected)
index #0 CORRECT (Command(COMMENT, ["just a comment"]))
index #1 CORRECT (Command(NONE, []))
index #2 CORRECT (Command(WRITE_LOG, ["this is a log"]))
index #3 CORRECT (Command(WRITE_LOG, ["this is also (in another way) a log"]))
index #4 CORRECT (Command(FAIL, []))
index #5 CORRECT (Command(NONE, []))
index #6 CORRECT (Command(START_PROCESS, ["17", "program.exe", "True"]))
index #7 CORRECT (Command(FAIL, []))
index #8 CORRECT (Command(NONE, []))
¹ 参见示例boost::spirit alternative parsers return duplicates(链接到另外三个同类)
我需要更新解析器以接受这些新功能,但我无法一次管理所有这些功能:
- 命令必须接受不确定数量的参数 (> 0)。
- 参数可以是数字、不带引号的字符串或带引号的字符串。
- 参数之间用逗号分隔。
- 在带引号的字符串中,允许使用opening/closing括号。
(看源代码示例更容易理解这些要求)
我目前的代码,包括检查,如下:
神马 link: https://godbolt.org/z/5d6o53n9h
#include <boost/fusion/adapted/struct/adapt_struct.hpp>
#include <boost/spirit/include/qi.hpp>
namespace script
{
struct Command
{
enum Type { NONE, WRITE_LOG, INSERT_LABEL, START_PROCESS, END_PROCESS, COMMENT, FAIL };
Type type{ Type::NONE };
std::vector<std::string> args;
};
using Commands = std::vector<Command>;
}//namespace script
BOOST_FUSION_ADAPT_STRUCT(script::Command, type, args)
namespace script
{
namespace qi = boost::spirit::qi;
template <typename It>
class Parser : public qi::grammar<It, Commands()>
{
private:
qi::symbols<char, Command::Type> type;
qi::rule<It, Command(), qi::blank_type> none, command, comment, fail;//By its very nature "fail" must be the last one to be checked
qi::rule<It, Commands()> start;
public:
Parser() : Parser::base_type(start)
{
using namespace qi;//NOTE: "as_string" is neccessary in all args due to std::vector<std::string>
auto empty_args = copy(attr(std::vector<std::string>{}));
type.add
("WriteLog", Command::WRITE_LOG)
("InsertLabel", Command::INSERT_LABEL)
("StartProcess", Command::START_PROCESS)
("EndProcess", Command::END_PROCESS);
none = omit[*blank] >> &(eol | eoi)
>> attr(Command::NONE)
>> empty_args;//ignore args
command = type >> '('
>> as_string[lexeme[+~char_("(),\r\n")]] % ',' >> ')';
comment = lit("//")
>> attr(Command::COMMENT)
>> as_string[lexeme[*~char_("\r\n")]];
fail = omit[*~char_("\r\n")]
>> attr(Command::FAIL)
>> empty_args;//ignore args
start = skip(blank)[(none | command | comment | fail) % eol] >> eoi;
}
};
Commands parse(std::istream& in)
{
using It = boost::spirit::istream_iterator;
static const Parser<It> parser;
Commands commands;
It first(in >> std::noskipws), last;//No white space skipping
if (!qi::parse(first, last, parser, commands))
throw std::runtime_error("command parse error");
return commands;
}
}//namespace script
std::stringstream ss{
R"(// just a comment
WriteLog("this is a log")
WriteLog("this is also (in another way) a log")
WriteLog("but this is just a fail)
StartProcess(17, "program.exe", True)
StartProcess(17, "this_is_a_fail.exe, True)
)"};
int main()
{
using namespace script;
try
{
auto commands = script::parse(ss);
std::array args{ 0, 0, 1, 1, -1, 0, 3, -1, 0 };//Fails may have any number of arguments. It doesn't care. Sets as -1 by convenience flag
std::array types{ Command::COMMENT, Command::NONE, Command::WRITE_LOG, Command::WRITE_LOG, Command::FAIL, Command::NONE, Command::START_PROCESS, Command::FAIL, Command::NONE };
std::cout << std::boolalpha << "size correct? " << (commands.size() == 9) << std::endl;
std::cout << "types correct? " << std::equal(commands.begin(), commands.end(), types.begin(), types.end(), [](auto& cmd, auto& type) { return cmd.type == type; }) << std::endl;
std::cout << "arguments correct? " << std::equal(commands.begin(), commands.end(), args.begin(), args.end(), [](auto& cmd, auto arg) { return cmd.args.size() == arg || arg == -1; }) << std::endl;
}
catch (std::exception const& e)
{
std::cout << e.what() << "\n";
}
}
如有任何帮助,我们将不胜感激。
你说你想在引用的字符串中使用括号。但是你甚至不支持带引号的字符串!
所以问题出在你的参数规则上。这甚至不存在。大概是这部分:
argument = +~char_("(),\r\n");
command = type >> '(' >> argument % ',' >> ')';
其中 argument
可能声明为
qi::rule<It, Argument()> argument;
事实上,以有组织的方式重写测试,这就是我们现在得到的:
static const Commands expected{
{Command::COMMENT, {"just a comment"}},
{Command::NONE, {}},
{Command::WRITE_LOG, {"this is a log"}},
{Command::WRITE_LOG, {"this is also (in another way) a log"}},
{Command::FAIL, {}},
{Command::NONE, {}},
{Command::START_PROCESS, {"17", "program.exe", "True"}},
{Command::FAIL, {}},
{Command::NONE, {}},
};
try {
auto parsed = script::parse(ss);
fmt::print("Parsed all correct? {} -- {} parsed (vs. {} expected)\n",
(parsed == expected), parsed.size(), expected.size());
for (auto i = 0u; i < std::min(expected.size(), parsed.size()); ++i) {
if (expected[i] != parsed[i]) {
fmt::print("index #{} expected {}\n"
" actual: {}\n",
i, expected[i], parsed[i]);
} else {
fmt::print("index #{} CORRECT ({})\n", i, parsed[i]);
}
}
} catch (std::exception const& e) {
fmt::print("Exception: {}\n", e.what());
}
版画
Parsed all correct? false -- 9 parsed (vs. 9 expected)
index #0 CORRECT (Command(COMMENT, ["just a comment"]))
index #1 CORRECT (Command(NONE, []))
index #2 expected Command(WRITE_LOG, ["this is a log"])
actual: Command(WRITE_LOG, ["\"this is a log\""])
index #3 expected Command(WRITE_LOG, ["this is also (in another way) a log"])
actual: Command(FAIL, [])
index #4 expected Command(FAIL, [])
actual: Command(WRITE_LOG, ["\"but this is just a fail"])
index #5 CORRECT (Command(NONE, []))
index #6 expected Command(START_PROCESS, ["17", "program.exe", "True"])
actual: Command(START_PROCESS, ["17", "\"program.exe\"", "True"])
index #7 expected Command(FAIL, [])
actual: Command(START_PROCESS, ["17", "\"this_is_a_fail.exe", "True"])
index #8 CORRECT (Command(NONE, []))
如您所见,在我的预期中,它也无法使用引号引起来的字符串。那是因为引用是一种语言结构。在 AST(解析结果)中,您不关心它是如何用代码编写的。例如。 "hello\ world1"
也可能等价于 "hello world!"
所以两者都应该产生参数值 hello world!
.
所以,让我们按照我们说的去做:
argument = quoted_string | number | boolean | raw_string;
我们可以添加一些规则:
// notice these are lexemes (no internal skipping):
qi::rule<It, Argument()> argument, quoted_string, number, boolean, raw_string;
并定义它们:
quoted_string = '"' >> *~char_('"') >> '"';
number = raw[double_];
boolean = raw[bool_];
raw_string = +~char_("(),\r\n");
argument = quoted_string | number | boolean | raw_string;
(If you want to allow escaped quotes, something like this:
quoted_string = '"' >> *('\' >> char_ | ~char_('"')) >> '"';
现在,我想说您可能希望 Argument
类似于 variant<double, std::string, bool>
,而不仅仅是 std::string
。
仅此更改,所有问题几乎都消失了:Live On Compiler Explorer:
Parsed all correct? false -- 9 parsed (vs. 9 expected)
index #0 CORRECT (Command(COMMENT, ["just a comment"]))
index #1 CORRECT (Command(NONE, []))
index #2 CORRECT (Command(WRITE_LOG, ["this is a log"]))
index #3 CORRECT (Command(WRITE_LOG, ["this is also (in another way) a log"]))
index #4 CORRECT (Command(FAIL, []))
index #5 CORRECT (Command(NONE, []))
index #6 CORRECT (Command(START_PROCESS, ["17", "program.exe", "True"]))
index #7 expected Command(FAIL, [])
actual: Command(START_PROCESS, ["17", "this_is_a_fail.exe, True)\n\"this_is_a_fail.exe", "True"])
index #8 CORRECT (Command(NONE, []))
现在,索引 #7 看起来 非常时髦,但它实际上是 Spirit 中的一个 well-known 现象¹。 Enabling BOOST_SPIRIT_DEBUG 演示:
<argument>
<try>"this_is_a_fail.exe,</try>
<quoted_string>
<try>"this_is_a_fail.exe,</try>
<fail/>
</quoted_string>
<number>
<try>"this_is_a_fail.exe,</try>
<fail/>
</number>
<boolean>
<try>"this_is_a_fail.exe,</try>
<fail/>
</boolean>
<raw_string>
<try>"this_is_a_fail.exe,</try>
<success>, True)</success>
<attributes>[[t, h, i, s, _, i, s, _, a, _, f, a, i, l, ., e, x, e, ,, , T, r, u, e, ), ", t, h, i, s, _, i, s, _, a, _, f, a, i, l, ., e, x, e]]</attributes>
</raw_string>
<success>, True)</success>
<attributes>[[t, h, i, s, _, i, s, _, a, _, f, a, i, l, ., e, x, e, ,, , T, r, u, e, ), ", t, h, i, s, _, i, s, _, a, _, f, a, i, l, ., e, x, e]]</attributes>
</argument>
因此,该字符串被接受为原始字符串,即使它以 "
开头。这很容易修复,但我们甚至不需要。我们可以只应用 qi::hold
来避免重复:
argument = qi::hold[quoted_string] | number | boolean | raw_string;
结果:
actual: Command(START_PROCESS, ["17", "\"this_is_a_fail.exe", "True"])
但是,如果您预计它会失败,请解决其他问题:
raw_string = +~char_("\"(),\r\n"); // note the \"
Note: In the off-chance you really only require it to not start with a quote:
raw_string = !lit('"') >> +~char_("(),\r\n");
I guess by now you see the problem with a "loose rule" like that, so I don't recommend it.
You could express the requirement another way though, saying "if an argument starts with
'"'
then is MUST be aquoted_string
. Use an expectation point there:quoted_string = '"' > *('\' >> char_ | ~char_('"')) > '"';
This has the effect that failure to parse a complete
quoted_string
will throw anexpectation_failed
exception.
总结/清单
这就是我们最终得到的:
//#define BOOST_SPIRIT_DEBUG
#include <boost/fusion/adapted/struct/adapt_struct.hpp>
#include <boost/spirit/include/qi.hpp>
#include <fmt/ranges.h>
namespace script {
using Argument = std::string;
using Arguments = std::vector<Argument>;
struct Command {
enum Type {
NONE,
WRITE_LOG,
INSERT_LABEL,
START_PROCESS,
END_PROCESS,
COMMENT,
FAIL
};
Type type{Type::NONE};
Arguments args;
auto operator<=>(Command const&) const = default;
};
using Commands = std::vector<Command>;
} // namespace script
BOOST_FUSION_ADAPT_STRUCT(script::Command, type, args)
namespace script {
namespace qi = boost::spirit::qi;
template <typename It> class Parser : public qi::grammar<It, Commands()> {
public:
Parser() : Parser::base_type(start) {
using namespace qi; // NOTE: "as_string" is neccessary in all args
auto empty_args = copy(attr(Arguments{}));
type.add //
("WriteLog", Command::WRITE_LOG) //
("InsertLabel", Command::INSERT_LABEL) //
("StartProcess", Command::START_PROCESS) //
("EndProcess", Command::END_PROCESS); //
none = omit[*blank] >> &(eol | eoi) //
>> attr(Command{Command::NONE, {}});
quoted_string = '"' >> *('\' >> char_ | ~char_('"')) >> '"';
number = raw[double_];
boolean = raw[bool_];
raw_string = +~char_("\"(),\r\n");
argument = qi::hold[quoted_string] | number | boolean | raw_string;
command = type >> '(' >> argument % ',' >> ')';
comment = "//" //
>> attr(Command::COMMENT) //
>> as_string[lexeme[*~char_("\r\n")]]; //
fail = omit[*~char_("\r\n")] >> attr(Command{Command::FAIL, {}});
line = none | command | comment | fail; // keep fail last
start = skip(blank)[line % eol] >> eoi;
BOOST_SPIRIT_DEBUG_NODES((start)(line)(fail)(comment)(command)(
argument)(none)(quoted_string)(raw_string)(boolean)(number))
}
private:
qi::symbols<char, Command::Type> type;
qi::rule<It, Command(), qi::blank_type> line, none, command, comment, fail;
// notice these are lexemes (no internal skipping):
qi::rule<It, Argument()> argument, quoted_string, number, boolean, raw_string;
qi::rule<It, Commands()> start;
};
Commands parse(std::istream& in)
{
using It = boost::spirit::istream_iterator;
static const Parser<It> parser;
Commands commands;
return qi::parse(It{in >> std::noskipws}, {}, parser, commands)
? commands
: throw std::runtime_error("command parse error");
}
struct Formatter {
static constexpr auto name(script::Command::Type type) {
return std::array{"NONE", "WRITE_LOG", "INSERT_LABEL",
"START_PROCESS", "END_PROCESS", "COMMENT",
"FAIL"}
.at(static_cast<int>(type));
}
auto parse(auto& ctx) const { return ctx.begin(); }
auto format(script::Command const& cmd, auto& ctx) const {
return format_to(ctx.out(), "Command({}, {})", name(cmd.type), cmd.args);
}
};
} // namespace script
template <> struct fmt::formatter<script::Command> : script::Formatter {};
std::stringstream ss{
R"(// just a comment
WriteLog("this is a log")
WriteLog("this is also (in another way) a log")
WriteLog("but this is just a fail)
StartProcess(17, "program.exe", True)
StartProcess(17, "this_is_a_fail.exe, True)
)"};
int main() {
using namespace script;
static const Commands expected{
{Command::COMMENT, {"just a comment"}},
{Command::NONE, {}},
{Command::WRITE_LOG, {"this is a log"}},
{Command::WRITE_LOG, {"this is also (in another way) a log"}},
{Command::FAIL, {}},
{Command::NONE, {}},
{Command::START_PROCESS, {"17", "program.exe", "True"}},
{Command::FAIL, {}},
{Command::NONE, {}},
};
try {
auto parsed = script::parse(ss);
fmt::print("Parsed all correct? {} -- {} parsed (vs. {} expected)\n",
(parsed == expected), parsed.size(), expected.size());
for (auto i = 0u; i < std::min(expected.size(), parsed.size()); ++i) {
if (expected[i] != parsed[i]) {
fmt::print("index #{} expected {}\n"
" actual: {}\n",
i, expected[i], parsed[i]);
} else {
fmt::print("index #{} CORRECT ({})\n", i, parsed[i]);
}
}
} catch (std::exception const& e) {
fmt::print("Exception: {}\n", e.what());
}
}
版画
Parsed all correct? true -- 9 parsed (vs. 9 expected)
index #0 CORRECT (Command(COMMENT, ["just a comment"]))
index #1 CORRECT (Command(NONE, []))
index #2 CORRECT (Command(WRITE_LOG, ["this is a log"]))
index #3 CORRECT (Command(WRITE_LOG, ["this is also (in another way) a log"]))
index #4 CORRECT (Command(FAIL, []))
index #5 CORRECT (Command(NONE, []))
index #6 CORRECT (Command(START_PROCESS, ["17", "program.exe", "True"]))
index #7 CORRECT (Command(FAIL, []))
index #8 CORRECT (Command(NONE, []))
¹ 参见示例boost::spirit alternative parsers return duplicates(链接到另外三个同类)