Spirit X3,指的是之前匹配的值

Spirit X3, referring to a previously matched value

我正在 Spirit X3 中编写解析器以熟悉它,尽管我非常熟悉 Qi,但我仍然在 X3 中遇到一些绊脚石。

例如,Qi 示例包括一个基本的 XML 解析器,您应该如何使用 Phoenix 占位符匹配先前匹配的值。但是,我只能在 X3 中弄清楚它:

#include <iostream>
#include <boost/spirit/home/x3.hpp>
#include <boost/fusion/include/adapt_struct.hpp>

namespace x3 = boost::spirit::x3;

namespace mytest
{

struct SimpleElement
{
    std::string tag;
    std::string content;
};

} // namespace bbspirit

BOOST_FUSION_ADAPT_STRUCT
(
    mytest::SimpleElement, tag, content
)

namespace mytest
{

namespace x3 = boost::spirit::x3;
namespace ascii = boost::spirit::x3::ascii;

using x3::lit;
using x3::lexeme;
using ascii::char_;

const x3::rule<class SimpleElementID, SimpleElement> simpleTag = "simpleTag";

auto assignTag = [](auto& ctx)
{
    x3::_val(ctx).tag = x3::_attr(ctx);
};

auto testTag = [](auto& ctx)
{
    x3::_pass(ctx) = 
        (x3::_val(ctx).tag == x3::_attr(ctx));
};

auto assignContent = [](auto& ctx)
{
    x3::_val(ctx).content = x3::_attr(ctx);
};

auto const simpleTag_def
    = '['
    >> x3::lexeme[+(char_ - ']')][assignTag]
    >> ']'
    >> x3::lexeme[
        +(char_ - x3::lit("[/"))]
            [assignContent]
    >> "[/"
    >> x3::lexeme[+(char_ - ']')][testTag]
    >> ']'
    ;

BOOST_SPIRIT_DEFINE(simpleTag);

} // namespace bbspirit


int main() 
{

const std::string text = "[test]Hello World![/test]";
std::string::const_iterator start = std::begin(text);
const std::string::const_iterator stop = std::end(text);

mytest::SimpleElement element{};

bool result = 
    phrase_parse(start, stop, mytest::simpleTag, x3::ascii::space, element);

if (!result)
{
    std::cout << "failed to parse!\n";
}
else
{
    std::cout << "tag    : " << element.tag << '\n';
    std::cout << "content: " << element.content << '\n';
}

}

(Link: https://wandbox.org/permlink/xLZN9plcOwkSKCrD )

这行得通,但是如果我尝试解析类似 [test]Hello [/World[/test] 的内容,它就不起作用,因为我没有在此处指定正确的遗漏:

    >> x3::lexeme[
        +(char_ - x3::lit("[/"))]
            [assignContent]

基本上我想告诉解析器类似这样的信息:

    >> x3::lexeme[
        +(char_ - (x3::lit("[/")  << *the start tag* << ']') )]
            [assignContent]

我该怎么做呢?另外,我引用开始标记并稍后匹配它的方式是在 X3 中执行此操作的 "best" 方式还是有 better/more 首选方式?

谢谢!

问得好。

最好的答案是完全按照 XML 做:在标签数据 中 取缔 [/。事实上,XML 违反了 <(因为它可能会打开一个嵌套标签,并且您不希望必须潜在地预读整个流来确定它是否是一个有效的子标签)。

XML uses character entities ("escapes" like &lt; and &gt;) or unparsed character data (CDATA[]) to encode contents that requires these characters.

接下来,您当然可以像您已经做的那样使用 tag 属性成员执行否定前瞻断言(!closeTag-closeTag)。

重新调整拼写规则,还算不错

Note I removed the need for manual propagation of the tag/contents using the , true> template argument on simpleTag rule. See Boost Spirit: "Semantic actions are evil"?

const x3::rule<class SimpleElementID, SimpleElement, true> simpleTag = "simpleTag";
auto testTag = [](auto& ctx) { _pass(ctx) = (_val(ctx).tag == _attr(ctx)); };

auto openTag     = '[' >> x3::lexeme[+(char_ - ']')] >> ']';
auto closeTag    = "[/" >> x3::lexeme[+(char_ - ']')] [testTag] >> ']';
auto tagContents = x3::lexeme[ +(char_ - closeTag) ];

auto const simpleTag_def
    =  openTag
    >> tagContents
    >> x3::omit [ closeTag ]
    ;

看到了Live On Coliru

背景

这行得通,但最终变得相当笨拙,因为这意味着到处使用语义操作,而且还违背了属性引用的自然绑定。

稍微跳出框框思考:

齐国用qi::locals or inherited attributes for this (see a very similar example in the docs: MiniXML).

这两者都会产生用您的信息扩展解析器上下文的净效果。

X3 没有这样的 "high-level" 功能。但它确实具有扩展上下文的构建块:x3::witt<>(data) [ p ].

x3::与

在这个简单的示例中,它似乎有点过分了,但在某些时候,您会欣赏如何在规则中使用额外的上下文,而不会将属性类型作为人质:

struct TagName{};
auto openTag
    = x3::rule<struct openTagID, std::string, true> {"openTag"}
    = ('[' >> x3::lexeme[+(char_ - ']')] >> ']')
        [([](auto& ctx) { x3::get<TagName>(ctx) = _attr(ctx); })]
    ;
auto closeTag
    = x3::rule<struct closeTagID, std::string, true> {"closeTag"}
    = ("[/" >> x3::lexeme[+(char_ - ']')] >> ']')
        [([](auto& ctx) { _pass(ctx) = (x3::get<TagName>(ctx) == _attr(ctx)); })]
    ;
auto tagContents
    = x3::rule<struct openTagID, std::string> {"tagContents"}
    = x3::lexeme[ +(char_ - closeTag) ];

auto const simpleTag
    = x3::rule<class SimpleElementID, SimpleElement, true> {"simpleTag"}
    = x3::with<TagName>(std::string()) [
        openTag
        >> tagContents
        >> x3::omit [ closeTag ]
    ];

看到了Live On Coliru


与其尝试用绳子和火柴建造一艘船,我建议制作一个适合这项工作的工具。

#include <boost/spirit/home/x3.hpp>

namespace x3e
{

struct confix_tag {};

namespace x3 = boost::spirit::x3;

template <typename Parser, typename Iterator,
    typename Context, typename RContext>
inline Iterator seek(Parser const& p, Iterator& iter, Iterator const& last,
    Context const& context, RContext& rcontext)
{
    Iterator start = iter;
    for (;; iter = ++start)
        if (p.parse(iter, last, context, rcontext, x3::unused))
            return start;
    return last;
}


template <typename Prefix, typename Subject, typename Postfix>
struct confix_directive : x3::unary_parser<Subject, confix_directive<Prefix, Subject, Postfix>>
{
    typedef x3::unary_parser<Subject, confix_directive<Prefix, Subject, Postfix>> base_type;
    static bool const is_pass_through_unary = true;

    constexpr confix_directive(Prefix const& prefix, Subject const& subject, Postfix const& postfix)
        : base_type(subject),
          prefix(prefix),
          postfix(postfix)
    {
    }

    template <typename Iterator,
        typename Context, typename RContext, typename Attribute>
    bool parse(Iterator& first, Iterator const& last,
        Context const& context, RContext& rcontext, Attribute& attr) const
    {
        auto& confix_val = boost::fusion::at_c<0>(attr);

        Iterator iter = first;
        if (!prefix.parse(iter, last, context, rcontext, confix_val))
            return false;

        Iterator postfix_iter = iter;
        do {
            Iterator postfix_start = x3e::seek(postfix, postfix_iter, last, x3::make_context<confix_tag>(confix_val, context), rcontext);
            if (postfix_start == last)
                return false;

            if (this->subject.parse(iter, postfix_start, context, rcontext, boost::fusion::at_c<1>(attr))) {
                first = postfix_iter;
                return true;
            }
        } while (postfix_iter != last);

        return false;
    }

    Prefix prefix;
    Postfix postfix;
};

template<typename Prefix, typename Postfix>
struct confix_gen
{
    template<typename Subject>
    constexpr confix_directive<
        Prefix, typename x3::extension::as_parser<Subject>::value_type, Postfix>
    operator[](Subject const& subject) const
    {
        return { prefix, as_parser(subject), postfix };
    }

    Prefix prefix;
    Postfix postfix;
};


template <typename Prefix, typename Postfix>
constexpr confix_gen<typename x3::extension::as_parser<Prefix>::value_type,
    typename x3::extension::as_parser<Postfix>::value_type>
confix(Prefix const& prefix, Postfix const& postfix)
{
    return { as_parser(prefix), as_parser(postfix) };
}

struct confix_value_matcher : x3::parser<confix_value_matcher>
{
    typedef x3::unused_type attribute_type;
    static bool const has_attribute = false;

    template <typename Iterator, typename Context, typename RContext>
    static bool parse(Iterator& iter, Iterator const& last,
        Context const& context, RContext&, x3::unused_type)
    {
        x3::skip_over(iter, last, context);
        for (auto const& e : x3::get<confix_tag>(context))
            if (iter == last || e != *iter++)
                return false;
        return true;
    }
};

constexpr confix_value_matcher confix_value{};
}

#include <boost/fusion/include/adapt_struct.hpp>

namespace mytest
{

struct SimpleElement
{
    std::string tag;
    std::string content;
};

} // namespace bbspirit

BOOST_FUSION_ADAPT_STRUCT
(
    mytest::SimpleElement, tag, content
)

#include <iostream>

int main()
{
    namespace x3 = boost::spirit::x3;

    for (auto text : { "[test]Hello World![/test]",
                       "[test]Hello [/World[/test]" }) {
        std::cout << "text   : " << text << '\n';
        auto start = text, stop = text + std::strlen(text);

        mytest::SimpleElement element;

        auto const simpleTag
            = x3e::confix(x3::lexeme['[' >> +~x3::char_(']') >> ']'],
                          x3::lexeme["[/" >> x3e::confix_value >> ']'])
                              [x3::lexeme[*x3::char_]];

        bool result =
            phrase_parse(start, stop, simpleTag, x3::ascii::space, element);

        if (!result) {
            std::cout << "failed to parse!\n";
        }
        else {
            std::cout << "tag    : " << element.tag << '\n';
            std::cout << "content: " << element.content << '\n';
        }
        std::cout << '\n';
    }
}

输出:

text   : [test]Hello World![/test]
tag    : test
content: Hello World!

text   : [test]Hello [/World[/test]
tag    : test
content: Hello [/World

https://wandbox.org/permlink/qxIaQYtgaWdk9Dog