使用 istream 迭代器时解析失败

parsing fails when using istream iterator

我正在使用 boost::spirit 来解析 csv 输入(请不要提出替代方案,这只是测试)。当我将 stdin 的内容读入一个字符串并对其进行迭代时,解析成功;然而,当 std::cin 的内容被直接读取时(通过我自己编写的包装器,因为 phrase_parse 需要一个继承自 std::iterator<std::forward_iterator_tag, T> 的迭代器,而 std::istream_iterator<T> 不这样做),解析失败,我不明白为什么,因为调试输出似乎表明在两种情况下都解析了相同的文本,但结果不同。

我什至尝试遍历 std::cin 并将其放入字符串中,并且解析正确;我不明白为什么提供的迭代器类型会影响结果。这是我正在处理的示例(抱歉,它太大了,但您可以将其插入并轻松编译)。尝试定义宏 SECTION_STRINGSTREAM(成功)或 SECTION_CIN(失败)以观察奇怪的行为(默认行为(成功)是当 std::cin 被读取为字符串时)。

如果您使用 echo "\"f\",111,222,333,\"ref_type\",\"spc\",\"type\",\"lan\",\"name\",\"scop\"" | ./spirit_csv 编译并 运行,调试输出清楚地显示正在解析整个字符串。我还添加了 if (++start == end) std::cerr << "woah"; 并且在所有情况下都会被触发,所以它似乎肯定是在解析到输入的末尾。

// following example from:
// http://www.boost.org/doc/libs/1_58_0/libs/spirit/example/qi/employee.cpp, and
// num_list4.cpp, and others

#define BOOST_SPIRIT_DEBUG 1
#define BOOST_SPIRIT_DEBUG_PRINT_SOME 200
#define BOOST_SPIRIT_DEBUG_OUT std::cerr

// std includes
#include <iostream>
#include <string>
// boost includes
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_stl.hpp>

namespace frontend {
namespace spirit = boost::spirit;
namespace qi = spirit::qi;
namespace ascii = spirit::ascii;

struct cursor {
  std::string file;
  unsigned long long offset;
  unsigned long long line;
  unsigned long long col;
  // verify inputs using enum
  // decl/ref/defn/call
  std::string reference_type;
  // variable/function/scope/label/type
  std::string specifier;
  // if variable/function, then type
  std::string type;
  std::string language;
  std::string name;
  std::string scope;
};
}

// adapt struct to boost fusion
BOOST_FUSION_ADAPT_STRUCT(frontend::cursor, (std::string, file),
                          (unsigned long long, offset),
                          (unsigned long long, line), (unsigned long long, col),
                          (std::string, reference_type),
                          (std::string, specifier), (std::string, type),
                          (std::string, language), (std::string, name),
                          (std::string, scope));

// note: blank_type is so that newlines aren't counted as skippable, because
// they are significant for csv! however, typically you'll be wanting to use
// boost::spirit::ascii::space as your whitespace operator if you really do not
// care about whitespace
namespace frontend {
template <typename Iterator>
struct cursor_parser
    : public qi::grammar<Iterator, std::vector<cursor>(), qi::blank_type> {
  qi::rule<Iterator, std::string(), qi::blank_type> quoted_string;
  qi::rule<Iterator, cursor(), qi::blank_type> start;
  qi::rule<Iterator, std::vector<cursor>(), qi::blank_type> vec;

  cursor_parser() : cursor_parser::base_type(vec) {
    using qi::uint_;
    using qi::eol;
    using qi::lexeme;
    using qi::_1;
    using ascii::char_;
    using boost::phoenix::push_back;
    using boost::phoenix::ref;
    using boost::spirit::_val;

    quoted_string %= lexeme['"' >> *(char_ - '"') >> '"'];

    start %=
        // file
        quoted_string >> ',' >>
        // offset
        uint_ >> ',' >>
        // line
        uint_ >> ',' >>
        // col
        uint_ >> ',' >>
        // reference_type
        quoted_string >> ',' >>
        // specifier
        quoted_string >> ',' >>
        // type
        quoted_string >> ',' >>
        // language
        quoted_string >> ',' >>
        // name
        quoted_string >> ',' >>
        // scope
        quoted_string;

    vec %= start % eol;

    quoted_string.name("qs");
    debug(quoted_string);
    start.name("s");
    debug(start);
    vec.name("v");
    debug(vec);
  }
};

template <typename T>
class cin_forward_iterator : std::iterator<std::forward_iterator_tag, T> {
private:
  std::istream_iterator<T> i;

public:
  cin_forward_iterator() : i(std::istream_iterator<T>()) {}
  cin_forward_iterator(std::istream &in) : i(std::istream_iterator<T>(in)) {}
  const T &operator*() const { return *i; }
  cin_forward_iterator<T> operator++() {
    ++i;
    return *this;
  };
  cin_forward_iterator<T> operator++(int) {
    cin_forward_iterator<T> tmp = *this;
    i++;
    return tmp;
  };
  bool operator==(const cin_forward_iterator<T> &rhs) const {
    return i == rhs.i;
  }
  bool operator!=(const cin_forward_iterator<T> &rhs) const {
    return not(*this == rhs);
  }
};
}

namespace std {
template <typename T> class iterator_traits<frontend::cin_forward_iterator<T>> {
public:
  typedef typename std::istream_iterator<T>::value_type value_type;
  typedef typename std::istream_iterator<T>::difference_type difference_type;
  typedef typename std::istream_iterator<T>::reference reference;
  typedef typename std::istream_iterator<T>::pointer pointer;
  typedef std::forward_iterator_tag iterator_category;
};
}

/* try:
echo \
  "\"f\",111,222,333,\"ref_type\",\"spc\",\"type\",\"lan\",\"name\",\"scop\"" \
  | ./spirit_csv
*/
int main() {
  std::vector<frontend::cursor> v;
// succeeds
#ifdef SECTION_STRINGSTREAM
  std::stringstream ss;
  ss << std::cin.rdbuf();
  std::string s(ss.str());
  auto start = s.cbegin();
  auto end = s.cend();
// fails
#elif SECTION_CIN
  noskipws(std::cin);
  frontend::cin_forward_iterator<char> start(std::cin);
  frontend::cin_forward_iterator<char> end;
// succeeds
#else
  noskipws(std::cin);
  frontend::cin_forward_iterator<char> start_in(std::cin);
  frontend::cin_forward_iterator<char> end_in;
  std::string s;
  for (; start_in != end_in; ++start_in) {
    s += *start_in;
  }
  auto start = s.begin();
  auto end = s.end();
#endif
  if (phrase_parse(start, end,
#ifdef SECTION_STRINGSTREAM
                   frontend::cursor_parser<std::string::const_iterator>(),
#elif SECTION_CIN
                   frontend::cursor_parser<
                       frontend::cin_forward_iterator<char>>(),
#else
                   frontend::cursor_parser<std::string::iterator>(),
#endif
                   boost::spirit::qi::blank, v)) {
    for (auto &c : v) {
      std::cout << boost::fusion::as_vector(c) << std::endl;
    }
    std::cerr << "success!" << std::endl;
    return 0;
  } else {
    std::cerr << "failure!" << std::endl;
    return 1;
  }
}

为什么要有自己的迭代器?

很难做到正确,而且你看起来肯定不像是多通道感知的。

有一个 原因 为什么输入迭代器与前向迭代器具有不同的类别!只是掩盖它没有帮助。前向迭代器 必须 是可复制的,并且在取消引用时具有可重复的值。输入迭代器不满足这些条件。

In fact you should either just use boost::spirit::istream_iterator or you could compose an iterator using Spirit's multi_pass adaptor:

这是一个经过修复和清理的版本:

Live On Coliru

#define BOOST_SPIRIT_DEBUG 1
#define BOOST_SPIRIT_DEBUG_PRINT_SOME 200
#define BOOST_SPIRIT_DEBUG_OUT std::cerr

// std includes
#include <iostream>
#include <string>
// boost includes
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/adapted.hpp>
#include <boost/fusion/include/as_vector.hpp>

namespace frontend {
    namespace qi     = boost::spirit::qi;

    struct cursor {
        std::string file;
        unsigned long long offset;
        unsigned long long line;
        unsigned long long col;
        // verify inputs using enum
        // decl/ref/defn/call
        std::string reference_type;
        // variable/function/scope/label/type
        std::string specifier;
        // if variable/function, then type
        std::string type;
        std::string language;
        std::string name;
        std::string scope;
    };
}

// adapt struct to boost fusion
BOOST_FUSION_ADAPT_STRUCT(frontend::cursor, 
        (std::string, file)
        (unsigned long long, offset)
        (unsigned long long, line)
        (unsigned long long, col)
        (std::string, reference_type)
        (std::string, specifier)
        (std::string, type)
        (std::string, language)
        (std::string, name)
        (std::string, scope))

namespace frontend {

    // NOTE: blank_type doesn't skip newlines
    template <typename Iterator>
    struct cursor_parser : public qi::grammar<Iterator, std::vector<cursor>(), qi::blank_type> {

        cursor_parser() : cursor_parser::base_type(vec) {
            using qi::uint_;
            using qi::eol;
            using qi::lexeme;
            using qi::char_;

            quoted_string %= lexeme['"' >> *(char_ - '"') >> '"'];

            start %=
                quoted_string  >> ','   >>  // file
                uint_          >> ','   >>  // offset
                uint_          >> ','   >>  // line
                uint_          >> ','   >>  // col
                quoted_string  >> ','   >>  // reference_type
                quoted_string  >> ','   >>  // specifier
                quoted_string  >> ','   >>  // type
                quoted_string  >> ','   >>  // language
                quoted_string  >> ','   >>  // name
                quoted_string; // scope

            vec %= start % eol;

            BOOST_SPIRIT_DEBUG_NODES((quoted_string)(start)(vec))
        }

        private:
        qi::rule<Iterator, std::string()        , qi::blank_type> quoted_string;
        qi::rule<Iterator, cursor()             , qi::blank_type> start;
        qi::rule<Iterator, std::vector<cursor>(), qi::blank_type> vec;
    };
}

int main() {
    // '"f",111,222,333,"ref_type","spc","type","lan","name","scop"'
    using It = boost::spirit::istream_iterator;

    It start_in(std::cin >> std::noskipws), end_in;
    std::vector<frontend::cursor> v;

    if (phrase_parse(start_in, end_in, frontend::cursor_parser<It>(), frontend::qi::blank, v)) {
        for (auto &c : v) {
            std::cout << boost::fusion::as_vector(c) << std::endl;
        }
        std::cerr << "success!" << std::endl;
    } else {
        std::cerr << "failure!" << std::endl;
        return 1;
    }
}

输出

(f 111 222 333 ref_type spc type lan name scop)
success!

调试输出:

<vec>
  <try>"f",111,222,333,"ref_type","spc","type","lan","name","scop"\n</try>
  <start>
    <try>"f",111,222,333,"ref_type","spc","type","lan","name","scop"\n</try>
    <quoted_string>
      <try>"f",111,222,333,"ref_type","spc","type","lan","name","scop"\n</try>
      <success>,111,222,333,"ref_type","spc","type","lan","name","scop"\n</success>
      <attributes>[[f]]</attributes>
    </quoted_string>
    <quoted_string>
      <try>"ref_type","spc","type","lan","name","scop"\n</try>
      <success>,"spc","type","lan","name","scop"\n</success>
      <attributes>[[r, e, f, _, t, y, p, e]]</attributes>
    </quoted_string>
    <quoted_string>
      <try>"spc","type","lan","name","scop"\n</try>
      <success>,"type","lan","name","scop"\n</success>
      <attributes>[[s, p, c]]</attributes>
    </quoted_string>
    <quoted_string>
      <try>"type","lan","name","scop"\n</try>
      <success>,"lan","name","scop"\n</success>
      <attributes>[[t, y, p, e]]</attributes>
    </quoted_string>
    <quoted_string>
      <try>"lan","name","scop"\n</try>
      <success>,"name","scop"\n</success>
      <attributes>[[l, a, n]]</attributes>
    </quoted_string>
    <quoted_string>
      <try>"name","scop"\n</try>
      <success>,"scop"\n</success>
      <attributes>[[n, a, m, e]]</attributes>
    </quoted_string>
    <quoted_string>
      <try>"scop"\n</try>
      <success>\n</success>
      <attributes>[[s, c, o, p]]</attributes>
    </quoted_string>
    <success>\n</success>
    <attributes>[[[f], 111, 222, 333, [r, e, f, _, t, y, p, e], [s, p, c], [t, y, p, e], [l, a, n], [n, a, m, e], [s, c, o, p]]]</attributes>
  </start>
  <start>
    <try></try>
    <quoted_string>
      <try></try>
      <fail/>
    </quoted_string>
    <fail/>
  </start>
  <success>\n</success>
  <attributes>[[[[f], 111, 222, 333, [r, e, f, _, t, y, p, e], [s, p, c], [t, y, p, e], [l, a, n], [n, a, m, e], [s, c, o, p]]]]</attributes>
</vec>

备注:

  • 您在 BOOST_FUSION_ADAPT_STRUCT 宏调用中出错(逗号太多)