单程解析器中的多个匹配项?

Several matches in a one pass parser?

我正在尝试(还)用从日志中解析的数据填充几个向量。关键是尽可能快和高效地完成它,所以我想只通过一次收集所有数据(而不是 "or" 规则之间)。

我发现了下一个问题:

1) 每次我使用 spirit 但它没有按预期工作时,我发现自己完全迷失并尝试了两个小时的测试和错误。是否有任何调试指令可以提示出现了什么问题?

2) 我使用凤凰构造的方式有效吗?我的意思是,是否可以像我在代码中所做的那样使用它来避免使用符号 table?

3) 是否有任何方法可以获取规则的信息并将其用于另一个规则?我试过 phoenix::ref 但是当与 BOOST_FUSION_ADAPT_STRUCT.

结合使用时它会混淆数据

4) 我像这样使用代码是否犯了严重错误?我的意思是,我应该使用一个用自动规则包装它的语法,还是只使用两个规则来简化,一个用于 "location",另一个用于 "location + event",然后使用 phoenix?

#include <boost/fusion/adapted/struct.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/repository/include/qi_seek.hpp>
#include <boost/phoenix/phoenix.hpp>
#include <cstring> // strlen

typedef char const* It;
enum kind { SLOPE, GEAR };

struct Location {
    int driver;
    double time;
    double vel;
    double km;
    std::string date;
    std::string road;
};

struct Event {
    int event;
    double time;
    double value;
};

BOOST_FUSION_ADAPT_STRUCT(Location, date, time, driver, vel, road, km)
BOOST_FUSION_ADAPT_STRUCT(Event, event, value)//Same "time" as its previous "Location" header. Please do not adapt "time" unless necesssary.


//They shall be defined in another compilation unit and defined as extern in production code. Please do not insert within dispatcher struct.
std::vector<Location> container1;
std::vector<Event> container2;

struct dispatcher
{
    static void add(const Location& loc) { container1.push_back(loc); }
    static void add(const Event& ev)     { container2.push_back(ev);  }
};

namespace qi = boost::spirit::qi;
namespace px = boost::phoenix;

namespace boost { namespace spirit { namespace traits
{
    template <> struct is_container<dispatcher> : std::true_type { };

    template <> struct container_value<dispatcher>
    {
        typedef boost::variant<Location, Event> type;
    };

    template <typename T> struct push_back_container<dispatcher, T>
    {
        struct Visitor
        {
            typedef void result_type;
            template <typename U> void operator()(U const& ev) const { dispatcher::add(ev); }
        };

        static bool call(dispatcher& log, T const& attribute)
        {
            boost::apply_visitor(Visitor(), attribute);
            return true;
        }
    };
} } }

void parse_test_1(It b, It e) {
    using namespace qi;

    auto date = copy(
        repeat(4)[digit] >> '-' >> repeat(3)[alpha] >> '-' >> repeat(2)[digit] >> ' ' >> 
        repeat(2)[digit] >> ':' >> repeat(2)[digit] >> ':' >> repeat(2)[digit] >> '.' >> +digit);

    qi::rule<It, Event()> slope = lit(" - SLOPE: ")[px::construct<int>(kind::SLOPE)] >> double_;
    qi::rule<It, Event()> gear = lit(" - GEAR: ")[px::construct<int>(kind::GEAR)] >> double_;

    qi::rule<It, Location()> line = '[' >> raw[date] >> "] - "
        >> double_ >> " s"
        >> " => Driver: "  >> int_
        >> " - Speed: "    >> double_
        >> " - Road: "     >> raw[+graph]
        >> " - Km: "       >> double_
        >> -(slope | gear)
        >> (eol | eoi);

    parse(b, e, *boost::spirit::repository::qi::seek[line], dispatcher());
}

void parse_test_2(It b, It e) {
    using namespace qi;

    double t = 0;
    auto date = copy(
        repeat(4)[digit] >> '-' >> repeat(3)[alpha] >> '-' >> repeat(2)[digit] >> ' ' >> 
        repeat(2)[digit] >> ':' >> repeat(2)[digit] >> ':' >> repeat(2)[digit] >> '.' >> +digit);

    qi::rule<It, Event()> slope = lit(" - SLOPE: ")[px::construct<int>(kind::SLOPE)] >> double_;
    qi::rule<It, Event()> gear = lit(" - GEAR: ")[px::construct<int>(kind::GEAR)] >> double_;

    qi::rule<It, Location()> line = '[' >> raw[date] >> "] - "
        >> double_ >> " s"
        >> " => Driver: "  >> int_
        >> " - Speed: "    >> double_
        >> " - Road: "     >> raw[+graph]
        >> " - Km: "       >> double_
        >> -(slope | gear)
        >> (eol | eoi);

    parse(b, e, *line, dispatcher());
}

//Not all the lines will match the parser!
static char input1[] = 
"[2018-Mar-13 13:13:59.580482] - 0.200 s => Driver: 0 - Speed: 0.0 - Road: A-11 - Km: 90.0 - SLOPE: 5.5\n\
[2018-Mar-13 13:14:01.170203] - 1.790 s => Driver: 0 - Speed: 0.0 - Road: A-11 - Km: 90.0 - GEAR: 1\n\
[2018-Mar-13 13:14:01.170203] - 1.790 s => Driver: 0 - Speed: 0.1 - Road: A-11 - Km: 90.0\n\
[2018-Mar-13 13:14:01.170203] - 1.790 s => I do not care about this line\n\
[2018-Mar-13 13:14:01.819966] - 2.440 s => Driver: 0 - Speed: 0.1 - Road: A-11 - Km: 90.0\n\
[2018-Mar-13 13:14:01.170203] - 2.440 s => Neither I do about this other line\n\
[2018-Mar-13 13:15:01.819966] - 3.440 s => Driver: 0 - Speed: 0.2 - Road: A-11 - Km: 90.0 - SLOPE: 10\n";

static const size_t len1 = strlen(input1);

//All the lines shall match the parser!
static char input2[] = 
"[2018-Mar-13 13:13:59.580482] - 0.200 s => Driver: 0 - Speed: 0.0 - Road: A-11 - Km: 90.0 - SLOPE: 5.5\n\
[2018-Mar-13 13:14:01.170203] - 1.790 s => Driver: 0 - Speed: 0.0 - Road: A-11 - Km: 90.0 - GEAR: 1\n\
[2018-Mar-13 13:14:01.170203] - 1.790 s => Driver: 0 - Speed: 0.1 - Road: A-11 - Km: 90.0\n\
[2018-Mar-13 13:14:01.819966] - 2.440 s => Driver: 0 - Speed: 0.1 - Road: A-11 - Km: 90.0\n\
[2018-Mar-13 13:15:01.819966] - 3.440 s => Driver: 0 - Speed: 0.2 - Road: A-11 - Km: 90.0 - SLOPE: 10\n";

static const size_t len2 = strlen(input2);

int main()
{
    parse_test_1(input1, input1+len1);
    std::cout << "TEST 1:\n";
    std::cout << "Locations:\n";
    std::for_each(std::begin(container1), std::end(container1), [](const Location& loc)
    {
        std::cout << "[" << loc.date << "] - " << loc.time << " s => Driver: " << loc.driver << " - Speed: " << loc.vel << " - Road: " << loc.road << " - Km: " << loc.km << std::endl;
    });

    std::cout << "Events:\n";
    std::for_each(std::begin(container2), std::end(container2), [](const Event& ev)
    {
        std::cout << ev.time << " s => EVENT(" << ev.event << ") : " << ev.value << std::endl;
    });

    container1.clear();
    container2.clear();

    parse_test_2(input2, input2+len2);
    std::cout << "\nTEST 2:\n";
    std::cout << "Locations:\n";
    std::for_each(std::begin(container1), std::end(container1), [](const Location& loc)
    {
        std::cout << "[" << loc.date << "] - " << loc.time << " s => Driver: " << loc.driver << " - Speed: " << loc.vel << " - Road: " << loc.road << " - Km: " << loc.km << std::endl;
    });

    std::cout << "Events:\n";
    std::for_each(std::begin(container2), std::end(container2), [](const Event& ev)
    {
        std::cout << ev.time << " s => EVENT(" << ev.event << ") : " << ev.value << std::endl;
    });

    return 0;
}

结果:预期结果应该是这个:

TEST 1:
Locations:
[2018-Mar-13 13:13:59.580482] - 0.2 s => Driver: 0 - Speed: 0 - Road: A-11 - Km: 90
[2018-Mar-13 13:14:01.170203] - 1.79 s => Driver: 0 - Speed: 0 - Road: A-11 - Km: 90
[2018-Mar-13 13:14:01.170203] - 1.79 s => Driver: 0 - Speed: 0.1 - Road: A-11 - Km: 90
[2018-Mar-13 13:14:01.819966] - 2.44 s => Driver: 0 - Speed: 0.1 - Road: A-11 - Km: 90
[2018-Mar-13 13:15:01.819966] - 3.44 s => Driver: 0 - Speed: 0.2 - Road: A-11 - Km: 90
Events:
0.2 s => EVENT(0): 5.5
1.79 s => EVENT(1): 1
3.44 s => EVENT(0): 10

TEST 2:
Locations:
[2018-Mar-13 13:13:59.580482] - 0.2 s => Driver: 0 - Speed: 0 - Road: A-11 - Km: 90
[2018-Mar-13 13:14:01.170203] - 1.79 s => Driver: 0 - Speed: 0 - Road: A-11 - Km: 90
[2018-Mar-13 13:14:01.170203] - 1.79 s => Driver: 0 - Speed: 0.1 - Road: A-11 - Km: 90
[2018-Mar-13 13:14:01.819966] - 2.44 s => Driver: 0 - Speed: 0.1 - Road: A-11 - Km: 90
[2018-Mar-13 13:15:01.819966] - 3.44 s => Driver: 0 - Speed: 0.2 - Road: A-11 - Km: 90
Events:
0.2 s => EVENT(0): 5.5
1.79 s => EVENT(1): 1
3.44 s => EVENT(0): 10

First off: I gave you all of that in , under "Separate vectors with a trait". The only difference appears to be the types and the fact that you made LogEvents members global variables (ick).

关于你的问题代码:

 parse(b, e, *boost::spirit::repository::qi::seek[line], dispatcher());

你为什么要经过调度员那里? Dispatcher 不是兼容属性(实际上只有静态 non-data 成员)。

所以,让我们把它修复成一个正常的数据结构(而不​​是全局变量):

struct ParsedData
{
    std::vector<Location> _locations;
    std::vector<Event> _events;
    void add(const Location& loc) { _locations.push_back(loc); }
    void add(const Event& ev)     { _events.push_back(ev);  }
};

注意容器 aren't global any more and they have proper names.

boost::spirit::traits 特化是相同的(mutatis mutandis)除了我们现在有一个数据实例,所以我们绑定它(再次,如 the original example linked above, line 52,所以让我们修复用法:

ParsedData data;
parse(b, e, *boost::spirit::repository::qi::seek[line], data);
return data;

从这里开始,一切顺利。

进一步清理和演示

备注:

  • 没有理由在 C++ 中使用原始字符数组和 strlen(我使用 std::string
  • 没有理由复制所有代码并将所有内容命名为 _1_2。我做了主要的:

    int main() {
        do_test("TEST 1", input1, parse_test_1);
        do_test("TEST 2", input2, parse_test_2);
    }
    
  • 没有理由将 for_each 与 ranged-for 就足够的 lambda 一起使用。这是 do_test:

    void do_test(std::string caption, std::string const& input, ParsedData(*f)(It,It)) {
        ParsedData const data = f(input.begin(), input.end());
        std::cout << caption << ":\n";
        std::cout << "Locations:\n";
        for (Location const& loc : data._locations) {
            std::cout << "[" << loc.date << "] - " << loc.time << " s => Driver: " << loc.driver << " - Speed: " << loc.vel << " - Road: " << loc.road << " - Km: " << loc.km << std::endl;
        }
    
        std::cout << "Events:\n";
        for (Event const& ev : data._events) {
            std::cout << " EVENT(" << ev.event << ") : " << ev.value << std::endl;
        }
    }
    
  • 我从 Event 删除了 time 成员,因为它未被使用。

完整列表

Live On Coliru

#include <boost/fusion/adapted/struct.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/repository/include/qi_seek.hpp>
#include <boost/phoenix/phoenix.hpp>
#include <cstring> // strlen

typedef std::string::const_iterator It;
enum kind { SLOPE, GEAR };

struct Location {
    int driver;
    double time;
    double vel;
    double km;
    std::string date;
    std::string road;
};

struct Event {
    int event;
    double value;
};

BOOST_FUSION_ADAPT_STRUCT(Location, date, time, driver, vel, road, km)
BOOST_FUSION_ADAPT_STRUCT(Event, event, value)

struct ParsedData {
    std::vector<Location> _locations;
    std::vector<Event> _events;
    void add(const Location& loc) { _locations.push_back(loc); }
    void add(const Event& ev)     { _events.push_back(ev);  }
};

namespace qi = boost::spirit::qi;
namespace px = boost::phoenix;

namespace boost { namespace spirit { namespace traits {
    template <> struct is_container<ParsedData> : std::true_type {};
    template <> struct container_value<ParsedData> { typedef boost::variant<Location, Event> type; };

    template <typename T> struct push_back_container<ParsedData, T> {
        struct Visitor {
            ParsedData &data;
            typedef void result_type;
            template <typename U> void operator()(U const &ev) const { data.add(ev); }
        };

        static bool call(ParsedData &log, T const &attribute) {
            boost::apply_visitor(Visitor{ log }, attribute);
            return true;
        }
    };
} } } // namespace boost::spirit::traits

ParsedData parse_test_1(It b, It e) {
    using namespace qi;

    auto date = copy(
        repeat(4)[digit] >> '-' >> repeat(3)[alpha] >> '-' >> repeat(2)[digit] >> ' ' >> 
        repeat(2)[digit] >> ':' >> repeat(2)[digit] >> ':' >> repeat(2)[digit] >> '.' >> +digit);

    qi::rule<It, Event()> slope = lit(" - SLOPE: ")[px::construct<int>(kind::SLOPE)] >> double_;
    qi::rule<It, Event()> gear = lit(" - GEAR: ")[px::construct<int>(kind::GEAR)] >> double_;

    qi::rule<It, Location()> line = '[' >> raw[date] >> "] - "
        >> double_ >> " s"
        >> " => Driver: "  >> int_
        >> " - Speed: "    >> double_
        >> " - Road: "     >> raw[+graph]
        >> " - Km: "       >> double_
        >> -(slope | gear)
        >> (eol | eoi);

    ParsedData data;
    parse(b, e, *boost::spirit::repository::qi::seek[line], data);
    return data;
}

ParsedData parse_test_2(It b, It e) {
    using namespace qi;

    auto date = copy(
        repeat(4)[digit] >> '-' >> repeat(3)[alpha] >> '-' >> repeat(2)[digit] >> ' ' >> 
        repeat(2)[digit] >> ':' >> repeat(2)[digit] >> ':' >> repeat(2)[digit] >> '.' >> +digit);

    qi::rule<It, Event()> slope = lit(" - SLOPE: ")[px::construct<int>(kind::SLOPE)] >> double_;
    qi::rule<It, Event()> gear = lit(" - GEAR: ")[px::construct<int>(kind::GEAR)] >> double_;

    qi::rule<It, Location()> line = '[' >> raw[date] >> "] - "
        >> double_ >> " s"
        >> " => Driver: "  >> int_
        >> " - Speed: "    >> double_
        >> " - Road: "     >> raw[+graph]
        >> " - Km: "       >> double_
        >> -(slope | gear)
        >> (eol | eoi);

    ParsedData data;
    parse(b, e, *line, data);
    return data;
}

//Not all the lines will match the parser!
static std::string const input1 = 
"[2018-Mar-13 13:13:59.580482] - 0.200 s => Driver: 0 - Speed: 0.0 - Road: A-11 - Km: 90.0 - SLOPE: 5.5\n\
[2018-Mar-13 13:14:01.170203] - 1.790 s => Driver: 0 - Speed: 0.0 - Road: A-11 - Km: 90.0 - GEAR: 1\n\
[2018-Mar-13 13:14:01.170203] - 1.790 s => Driver: 0 - Speed: 0.1 - Road: A-11 - Km: 90.0\n\
[2018-Mar-13 13:14:01.170203] - 1.790 s => I do not care about this line\n\
[2018-Mar-13 13:14:01.819966] - 2.440 s => Driver: 0 - Speed: 0.1 - Road: A-11 - Km: 90.0\n\
[2018-Mar-13 13:14:01.170203] - 2.440 s => Neither I do about this other line\n\
[2018-Mar-13 13:15:01.819966] - 3.440 s => Driver: 0 - Speed: 0.2 - Road: A-11 - Km: 90.0 - SLOPE: 10\n";

//All the lines shall match the parser!
static std::string const input2 = 
"[2018-Mar-13 13:13:59.580482] - 0.200 s => Driver: 0 - Speed: 0.0 - Road: A-11 - Km: 90.0 - SLOPE: 5.5\n\
[2018-Mar-13 13:14:01.170203] - 1.790 s => Driver: 0 - Speed: 0.0 - Road: A-11 - Km: 90.0 - GEAR: 1\n\
[2018-Mar-13 13:14:01.170203] - 1.790 s => Driver: 0 - Speed: 0.1 - Road: A-11 - Km: 90.0\n\
[2018-Mar-13 13:14:01.819966] - 2.440 s => Driver: 0 - Speed: 0.1 - Road: A-11 - Km: 90.0\n\
[2018-Mar-13 13:15:01.819966] - 3.440 s => Driver: 0 - Speed: 0.2 - Road: A-11 - Km: 90.0 - SLOPE: 10\n";

void do_test(std::string caption, std::string const& input, ParsedData(*f)(It,It)) {
    ParsedData const data = f(input.begin(), input.end());
    std::cout << caption << ":\n";
    std::cout << "Locations:\n";
    for (Location const& loc : data._locations) {
        std::cout << "[" << loc.date << "] - " << loc.time << " s => Driver: " << loc.driver << " - Speed: " << loc.vel << " - Road: " << loc.road << " - Km: " << loc.km << std::endl;
    }

    std::cout << "Events:\n";
    for (Event const& ev : data._events) {
        std::cout << " EVENT(" << ev.event << ") : " << ev.value << std::endl;
    }
}

int main() {
    do_test("TEST 1", input1, parse_test_1);
    do_test("TEST 2", input2, parse_test_2);
}

进一步观察:

  1. 我不清楚您何时期望 Event 规则 (slope/gear) 匹配或合成属性。我也不清楚为什么这些是可选的(没有那部分,一行的位置部分不可能匹配)。

  2. 此外,像

    这样的规则暴露的自然属性
    qi::rule<It, Location()> line = '[' >> raw[date] >> "] - "
        >> double_ >> " s"
        >> " => Driver: "  >> int_
        >> " - Speed: "    >> double_
        >> " - Road: "     >> raw[+graph]
        >> " - Km: "       >> double_
        >> -(slope | gear)
        >> (eol | eoi);
    

    Location 会包含一个额外的字段:

    struct Location {
        int driver;
        double time;
        double vel;
        double km;
        std::string date;
        std::string road;
        boost::optional<Event> event;
    };
    
    BOOST_FUSION_ADAPT_STRUCT(Event, event, value)
    BOOST_FUSION_ADAPT_STRUCT(Location, date, time, driver, vel, road, km, event)
    
  3. 这些规则很奇怪:

    qi::rule<It, Event()> slope = lit(" - SLOPE: ")[px::construct<int>(kind::SLOPE)] >> double_;
    qi::rule<It, Event()> gear = lit(" - GEAR: ")[px::construct<int>(kind::GEAR)] >> double_;
    

    为什么不使用 symbols approach exactly as I showed in the linked answer (line 57/98)? If you insist on doing it "clumsy", do not use semantic actions (Boost Spirit: "Semantic actions are evil"?) 而使用 qi::attr:

    qi::rule<It, Event()> slope = " - SLOPE: " >> attr(kind::SLOPE) >> double_;
    qi::rule<It, Event()> gear = " - GEAR: " >> attr(kind::GEAR) >> double_;
    

    其中的一些有益效果是您的编译时间可以减少一半,而且属性值实际上会传播(您的语义操作根本没有效果,并且主动抑制了自动属性传播...)。

有了这些改进,我们得到:

Live On Coliru

#include <boost/fusion/adapted/struct.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/repository/include/qi_seek.hpp>

typedef std::string::const_iterator It;
enum kind { SLOPE, GEAR };

struct Event {
    int event;
    double value;
};

struct Location {
    int driver;
    double time;
    double vel;
    double km;
    std::string date;
    std::string road;
    boost::optional<Event> event;
};

BOOST_FUSION_ADAPT_STRUCT(Event, event, value)
BOOST_FUSION_ADAPT_STRUCT(Location, date, time, driver, vel, road, km, event)

using ParsedData = std::vector<Location>;

namespace qi = boost::spirit::qi;
namespace px = boost::phoenix;

ParsedData parse_test(It b, It e) {
    using namespace qi;

    auto date = copy(
        repeat(4)[digit] >> '-' >> repeat(3)[alpha] >> '-' >> repeat(2)[digit] >> ' ' >> 
        repeat(2)[digit] >> ':' >> repeat(2)[digit] >> ':' >> repeat(2)[digit] >> '.' >> +digit);

    qi::rule<It, Event()> slope = " - SLOPE: " >> attr(kind::SLOPE) >> double_;
    qi::rule<It, Event()> gear = " - GEAR: " >> attr(kind::GEAR) >> double_;

    qi::rule<It, Location()> line = '[' >> raw[date] >> "] - "
        >> double_ >> " s"
        >> " => Driver: "  >> int_
        >> " - Speed: "    >> double_
        >> " - Road: "     >> raw[+graph]
        >> " - Km: "       >> double_
        >> -(slope | gear)
        >> (eol | eoi);

    ParsedData data;
    parse(b, e, *boost::spirit::repository::qi::seek[line], data);
    return data;
}

//Not all the lines will match the parser!
static std::string const input = 
"[2018-Mar-13 13:13:59.580482] - 0.200 s => Driver: 0 - Speed: 0.0 - Road: A-11 - Km: 90.0 - SLOPE: 5.5\n\
[2018-Mar-13 13:14:01.170203] - 1.790 s => Driver: 0 - Speed: 0.0 - Road: A-11 - Km: 90.0 - GEAR: 1\n\
[2018-Mar-13 13:14:01.170203] - 1.790 s => Driver: 0 - Speed: 0.1 - Road: A-11 - Km: 90.0\n\
[2018-Mar-13 13:14:01.170203] - 1.790 s => I do not care about this line\n\
[2018-Mar-13 13:14:01.819966] - 2.440 s => Driver: 0 - Speed: 0.1 - Road: A-11 - Km: 90.0\n\
[2018-Mar-13 13:14:01.170203] - 2.440 s => Neither I do about this other line\n\
[2018-Mar-13 13:15:01.819966] - 3.440 s => Driver: 0 - Speed: 0.2 - Road: A-11 - Km: 90.0 - SLOPE: 10\n";

int main() {
    auto parsed = parse_test(input.begin(), input.end());
    std::cout << "Locations:\n";
    for (Location const& loc : parsed) {
        std::cout << "[" << loc.date << "] - " << loc.time << " s => Driver: " << loc.driver << " - Speed: " << loc.vel << " - Road: " << loc.road << " - Km: " << loc.km << std::endl;
        if (loc.event)
            std::cout << " - event: " << loc.event->event << " value: " << loc.event->value << "\n";
    }
}

打印

Locations:
[2018-Mar-13 13:13:59.580482] - 0.2 s => Driver: 0 - Speed: 0 - Road: A-11 - Km: 90
 - event: 0 value: 5.5
[2018-Mar-13 13:14:01.170203] - 1.79 s => Driver: 0 - Speed: 0 - Road: A-11 - Km: 90
 - event: 1 value: 1
[2018-Mar-13 13:14:01.170203] - 1.79 s => Driver: 0 - Speed: 0.1 - Road: A-11 - Km: 90
[2018-Mar-13 13:14:01.1702032018-Mar-13 13:14:01.819966] - 2.44 s => Driver: 0 - Speed: 0.1 - Road: A-11 - Km: 90
[2018-Mar-13 13:14:01.1702032018-Mar-13 13:15:01.819966] - 3.44 s => Driver: 0 - Speed: 0.2 - Road: A-11 - Km: 90
 - event: 0 value: 10