使用 istream 迭代器时解析失败
parsing fails when using istream iterator
我正在使用 boost::spirit 来解析 csv 输入(请不要提出替代方案,这只是测试)。当我将 stdin 的内容读入一个字符串并对其进行迭代时,解析成功;然而,当 std::cin
的内容被直接读取时(通过我自己编写的包装器,因为 phrase_parse 需要一个继承自 std::iterator<std::forward_iterator_tag, T>
的迭代器,而 std::istream_iterator<T>
不这样做),解析失败,我不明白为什么,因为调试输出似乎表明在两种情况下都解析了相同的文本,但结果不同。
我什至尝试遍历 std::cin
并将其放入字符串中,并且解析正确;我不明白为什么提供的迭代器类型会影响结果。这是我正在处理的示例(抱歉,它太大了,但您可以将其插入并轻松编译)。尝试定义宏 SECTION_STRINGSTREAM
(成功)或 SECTION_CIN
(失败)以观察奇怪的行为(默认行为(成功)是当 std::cin
被读取为字符串时)。
如果您使用 echo "\"f\",111,222,333,\"ref_type\",\"spc\",\"type\",\"lan\",\"name\",\"scop\"" | ./spirit_csv
编译并 运行,调试输出清楚地显示正在解析整个字符串。我还添加了 if (++start == end) std::cerr << "woah";
并且在所有情况下都会被触发,所以它似乎肯定是在解析到输入的末尾。
// following example from:
// http://www.boost.org/doc/libs/1_58_0/libs/spirit/example/qi/employee.cpp, and
// num_list4.cpp, and others
#define BOOST_SPIRIT_DEBUG 1
#define BOOST_SPIRIT_DEBUG_PRINT_SOME 200
#define BOOST_SPIRIT_DEBUG_OUT std::cerr
// std includes
#include <iostream>
#include <string>
// boost includes
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_stl.hpp>
namespace frontend {
namespace spirit = boost::spirit;
namespace qi = spirit::qi;
namespace ascii = spirit::ascii;
struct cursor {
std::string file;
unsigned long long offset;
unsigned long long line;
unsigned long long col;
// verify inputs using enum
// decl/ref/defn/call
std::string reference_type;
// variable/function/scope/label/type
std::string specifier;
// if variable/function, then type
std::string type;
std::string language;
std::string name;
std::string scope;
};
}
// adapt struct to boost fusion
BOOST_FUSION_ADAPT_STRUCT(frontend::cursor, (std::string, file),
(unsigned long long, offset),
(unsigned long long, line), (unsigned long long, col),
(std::string, reference_type),
(std::string, specifier), (std::string, type),
(std::string, language), (std::string, name),
(std::string, scope));
// note: blank_type is so that newlines aren't counted as skippable, because
// they are significant for csv! however, typically you'll be wanting to use
// boost::spirit::ascii::space as your whitespace operator if you really do not
// care about whitespace
namespace frontend {
template <typename Iterator>
struct cursor_parser
: public qi::grammar<Iterator, std::vector<cursor>(), qi::blank_type> {
qi::rule<Iterator, std::string(), qi::blank_type> quoted_string;
qi::rule<Iterator, cursor(), qi::blank_type> start;
qi::rule<Iterator, std::vector<cursor>(), qi::blank_type> vec;
cursor_parser() : cursor_parser::base_type(vec) {
using qi::uint_;
using qi::eol;
using qi::lexeme;
using qi::_1;
using ascii::char_;
using boost::phoenix::push_back;
using boost::phoenix::ref;
using boost::spirit::_val;
quoted_string %= lexeme['"' >> *(char_ - '"') >> '"'];
start %=
// file
quoted_string >> ',' >>
// offset
uint_ >> ',' >>
// line
uint_ >> ',' >>
// col
uint_ >> ',' >>
// reference_type
quoted_string >> ',' >>
// specifier
quoted_string >> ',' >>
// type
quoted_string >> ',' >>
// language
quoted_string >> ',' >>
// name
quoted_string >> ',' >>
// scope
quoted_string;
vec %= start % eol;
quoted_string.name("qs");
debug(quoted_string);
start.name("s");
debug(start);
vec.name("v");
debug(vec);
}
};
template <typename T>
class cin_forward_iterator : std::iterator<std::forward_iterator_tag, T> {
private:
std::istream_iterator<T> i;
public:
cin_forward_iterator() : i(std::istream_iterator<T>()) {}
cin_forward_iterator(std::istream &in) : i(std::istream_iterator<T>(in)) {}
const T &operator*() const { return *i; }
cin_forward_iterator<T> operator++() {
++i;
return *this;
};
cin_forward_iterator<T> operator++(int) {
cin_forward_iterator<T> tmp = *this;
i++;
return tmp;
};
bool operator==(const cin_forward_iterator<T> &rhs) const {
return i == rhs.i;
}
bool operator!=(const cin_forward_iterator<T> &rhs) const {
return not(*this == rhs);
}
};
}
namespace std {
template <typename T> class iterator_traits<frontend::cin_forward_iterator<T>> {
public:
typedef typename std::istream_iterator<T>::value_type value_type;
typedef typename std::istream_iterator<T>::difference_type difference_type;
typedef typename std::istream_iterator<T>::reference reference;
typedef typename std::istream_iterator<T>::pointer pointer;
typedef std::forward_iterator_tag iterator_category;
};
}
/* try:
echo \
"\"f\",111,222,333,\"ref_type\",\"spc\",\"type\",\"lan\",\"name\",\"scop\"" \
| ./spirit_csv
*/
int main() {
std::vector<frontend::cursor> v;
// succeeds
#ifdef SECTION_STRINGSTREAM
std::stringstream ss;
ss << std::cin.rdbuf();
std::string s(ss.str());
auto start = s.cbegin();
auto end = s.cend();
// fails
#elif SECTION_CIN
noskipws(std::cin);
frontend::cin_forward_iterator<char> start(std::cin);
frontend::cin_forward_iterator<char> end;
// succeeds
#else
noskipws(std::cin);
frontend::cin_forward_iterator<char> start_in(std::cin);
frontend::cin_forward_iterator<char> end_in;
std::string s;
for (; start_in != end_in; ++start_in) {
s += *start_in;
}
auto start = s.begin();
auto end = s.end();
#endif
if (phrase_parse(start, end,
#ifdef SECTION_STRINGSTREAM
frontend::cursor_parser<std::string::const_iterator>(),
#elif SECTION_CIN
frontend::cursor_parser<
frontend::cin_forward_iterator<char>>(),
#else
frontend::cursor_parser<std::string::iterator>(),
#endif
boost::spirit::qi::blank, v)) {
for (auto &c : v) {
std::cout << boost::fusion::as_vector(c) << std::endl;
}
std::cerr << "success!" << std::endl;
return 0;
} else {
std::cerr << "failure!" << std::endl;
return 1;
}
}
为什么要有自己的迭代器?
很难做到正确,而且你看起来肯定不像是多通道感知的。
有一个 原因 为什么输入迭代器与前向迭代器具有不同的类别!只是掩盖它没有帮助。前向迭代器 必须 是可复制的,并且在取消引用时具有可重复的值。输入迭代器不满足这些条件。
In fact you should either just use boost::spirit::istream_iterator
or you could compose an iterator using Spirit's multi_pass adaptor:
这是一个经过修复和清理的版本:
#define BOOST_SPIRIT_DEBUG 1
#define BOOST_SPIRIT_DEBUG_PRINT_SOME 200
#define BOOST_SPIRIT_DEBUG_OUT std::cerr
// std includes
#include <iostream>
#include <string>
// boost includes
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/adapted.hpp>
#include <boost/fusion/include/as_vector.hpp>
namespace frontend {
namespace qi = boost::spirit::qi;
struct cursor {
std::string file;
unsigned long long offset;
unsigned long long line;
unsigned long long col;
// verify inputs using enum
// decl/ref/defn/call
std::string reference_type;
// variable/function/scope/label/type
std::string specifier;
// if variable/function, then type
std::string type;
std::string language;
std::string name;
std::string scope;
};
}
// adapt struct to boost fusion
BOOST_FUSION_ADAPT_STRUCT(frontend::cursor,
(std::string, file)
(unsigned long long, offset)
(unsigned long long, line)
(unsigned long long, col)
(std::string, reference_type)
(std::string, specifier)
(std::string, type)
(std::string, language)
(std::string, name)
(std::string, scope))
namespace frontend {
// NOTE: blank_type doesn't skip newlines
template <typename Iterator>
struct cursor_parser : public qi::grammar<Iterator, std::vector<cursor>(), qi::blank_type> {
cursor_parser() : cursor_parser::base_type(vec) {
using qi::uint_;
using qi::eol;
using qi::lexeme;
using qi::char_;
quoted_string %= lexeme['"' >> *(char_ - '"') >> '"'];
start %=
quoted_string >> ',' >> // file
uint_ >> ',' >> // offset
uint_ >> ',' >> // line
uint_ >> ',' >> // col
quoted_string >> ',' >> // reference_type
quoted_string >> ',' >> // specifier
quoted_string >> ',' >> // type
quoted_string >> ',' >> // language
quoted_string >> ',' >> // name
quoted_string; // scope
vec %= start % eol;
BOOST_SPIRIT_DEBUG_NODES((quoted_string)(start)(vec))
}
private:
qi::rule<Iterator, std::string() , qi::blank_type> quoted_string;
qi::rule<Iterator, cursor() , qi::blank_type> start;
qi::rule<Iterator, std::vector<cursor>(), qi::blank_type> vec;
};
}
int main() {
// '"f",111,222,333,"ref_type","spc","type","lan","name","scop"'
using It = boost::spirit::istream_iterator;
It start_in(std::cin >> std::noskipws), end_in;
std::vector<frontend::cursor> v;
if (phrase_parse(start_in, end_in, frontend::cursor_parser<It>(), frontend::qi::blank, v)) {
for (auto &c : v) {
std::cout << boost::fusion::as_vector(c) << std::endl;
}
std::cerr << "success!" << std::endl;
} else {
std::cerr << "failure!" << std::endl;
return 1;
}
}
输出
(f 111 222 333 ref_type spc type lan name scop)
success!
调试输出:
<vec>
<try>"f",111,222,333,"ref_type","spc","type","lan","name","scop"\n</try>
<start>
<try>"f",111,222,333,"ref_type","spc","type","lan","name","scop"\n</try>
<quoted_string>
<try>"f",111,222,333,"ref_type","spc","type","lan","name","scop"\n</try>
<success>,111,222,333,"ref_type","spc","type","lan","name","scop"\n</success>
<attributes>[[f]]</attributes>
</quoted_string>
<quoted_string>
<try>"ref_type","spc","type","lan","name","scop"\n</try>
<success>,"spc","type","lan","name","scop"\n</success>
<attributes>[[r, e, f, _, t, y, p, e]]</attributes>
</quoted_string>
<quoted_string>
<try>"spc","type","lan","name","scop"\n</try>
<success>,"type","lan","name","scop"\n</success>
<attributes>[[s, p, c]]</attributes>
</quoted_string>
<quoted_string>
<try>"type","lan","name","scop"\n</try>
<success>,"lan","name","scop"\n</success>
<attributes>[[t, y, p, e]]</attributes>
</quoted_string>
<quoted_string>
<try>"lan","name","scop"\n</try>
<success>,"name","scop"\n</success>
<attributes>[[l, a, n]]</attributes>
</quoted_string>
<quoted_string>
<try>"name","scop"\n</try>
<success>,"scop"\n</success>
<attributes>[[n, a, m, e]]</attributes>
</quoted_string>
<quoted_string>
<try>"scop"\n</try>
<success>\n</success>
<attributes>[[s, c, o, p]]</attributes>
</quoted_string>
<success>\n</success>
<attributes>[[[f], 111, 222, 333, [r, e, f, _, t, y, p, e], [s, p, c], [t, y, p, e], [l, a, n], [n, a, m, e], [s, c, o, p]]]</attributes>
</start>
<start>
<try></try>
<quoted_string>
<try></try>
<fail/>
</quoted_string>
<fail/>
</start>
<success>\n</success>
<attributes>[[[[f], 111, 222, 333, [r, e, f, _, t, y, p, e], [s, p, c], [t, y, p, e], [l, a, n], [n, a, m, e], [s, c, o, p]]]]</attributes>
</vec>
备注:
- 您在
BOOST_FUSION_ADAPT_STRUCT
宏调用中出错(逗号太多)
我正在使用 boost::spirit 来解析 csv 输入(请不要提出替代方案,这只是测试)。当我将 stdin 的内容读入一个字符串并对其进行迭代时,解析成功;然而,当 std::cin
的内容被直接读取时(通过我自己编写的包装器,因为 phrase_parse 需要一个继承自 std::iterator<std::forward_iterator_tag, T>
的迭代器,而 std::istream_iterator<T>
不这样做),解析失败,我不明白为什么,因为调试输出似乎表明在两种情况下都解析了相同的文本,但结果不同。
我什至尝试遍历 std::cin
并将其放入字符串中,并且解析正确;我不明白为什么提供的迭代器类型会影响结果。这是我正在处理的示例(抱歉,它太大了,但您可以将其插入并轻松编译)。尝试定义宏 SECTION_STRINGSTREAM
(成功)或 SECTION_CIN
(失败)以观察奇怪的行为(默认行为(成功)是当 std::cin
被读取为字符串时)。
如果您使用 echo "\"f\",111,222,333,\"ref_type\",\"spc\",\"type\",\"lan\",\"name\",\"scop\"" | ./spirit_csv
编译并 运行,调试输出清楚地显示正在解析整个字符串。我还添加了 if (++start == end) std::cerr << "woah";
并且在所有情况下都会被触发,所以它似乎肯定是在解析到输入的末尾。
// following example from:
// http://www.boost.org/doc/libs/1_58_0/libs/spirit/example/qi/employee.cpp, and
// num_list4.cpp, and others
#define BOOST_SPIRIT_DEBUG 1
#define BOOST_SPIRIT_DEBUG_PRINT_SOME 200
#define BOOST_SPIRIT_DEBUG_OUT std::cerr
// std includes
#include <iostream>
#include <string>
// boost includes
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_stl.hpp>
namespace frontend {
namespace spirit = boost::spirit;
namespace qi = spirit::qi;
namespace ascii = spirit::ascii;
struct cursor {
std::string file;
unsigned long long offset;
unsigned long long line;
unsigned long long col;
// verify inputs using enum
// decl/ref/defn/call
std::string reference_type;
// variable/function/scope/label/type
std::string specifier;
// if variable/function, then type
std::string type;
std::string language;
std::string name;
std::string scope;
};
}
// adapt struct to boost fusion
BOOST_FUSION_ADAPT_STRUCT(frontend::cursor, (std::string, file),
(unsigned long long, offset),
(unsigned long long, line), (unsigned long long, col),
(std::string, reference_type),
(std::string, specifier), (std::string, type),
(std::string, language), (std::string, name),
(std::string, scope));
// note: blank_type is so that newlines aren't counted as skippable, because
// they are significant for csv! however, typically you'll be wanting to use
// boost::spirit::ascii::space as your whitespace operator if you really do not
// care about whitespace
namespace frontend {
template <typename Iterator>
struct cursor_parser
: public qi::grammar<Iterator, std::vector<cursor>(), qi::blank_type> {
qi::rule<Iterator, std::string(), qi::blank_type> quoted_string;
qi::rule<Iterator, cursor(), qi::blank_type> start;
qi::rule<Iterator, std::vector<cursor>(), qi::blank_type> vec;
cursor_parser() : cursor_parser::base_type(vec) {
using qi::uint_;
using qi::eol;
using qi::lexeme;
using qi::_1;
using ascii::char_;
using boost::phoenix::push_back;
using boost::phoenix::ref;
using boost::spirit::_val;
quoted_string %= lexeme['"' >> *(char_ - '"') >> '"'];
start %=
// file
quoted_string >> ',' >>
// offset
uint_ >> ',' >>
// line
uint_ >> ',' >>
// col
uint_ >> ',' >>
// reference_type
quoted_string >> ',' >>
// specifier
quoted_string >> ',' >>
// type
quoted_string >> ',' >>
// language
quoted_string >> ',' >>
// name
quoted_string >> ',' >>
// scope
quoted_string;
vec %= start % eol;
quoted_string.name("qs");
debug(quoted_string);
start.name("s");
debug(start);
vec.name("v");
debug(vec);
}
};
template <typename T>
class cin_forward_iterator : std::iterator<std::forward_iterator_tag, T> {
private:
std::istream_iterator<T> i;
public:
cin_forward_iterator() : i(std::istream_iterator<T>()) {}
cin_forward_iterator(std::istream &in) : i(std::istream_iterator<T>(in)) {}
const T &operator*() const { return *i; }
cin_forward_iterator<T> operator++() {
++i;
return *this;
};
cin_forward_iterator<T> operator++(int) {
cin_forward_iterator<T> tmp = *this;
i++;
return tmp;
};
bool operator==(const cin_forward_iterator<T> &rhs) const {
return i == rhs.i;
}
bool operator!=(const cin_forward_iterator<T> &rhs) const {
return not(*this == rhs);
}
};
}
namespace std {
template <typename T> class iterator_traits<frontend::cin_forward_iterator<T>> {
public:
typedef typename std::istream_iterator<T>::value_type value_type;
typedef typename std::istream_iterator<T>::difference_type difference_type;
typedef typename std::istream_iterator<T>::reference reference;
typedef typename std::istream_iterator<T>::pointer pointer;
typedef std::forward_iterator_tag iterator_category;
};
}
/* try:
echo \
"\"f\",111,222,333,\"ref_type\",\"spc\",\"type\",\"lan\",\"name\",\"scop\"" \
| ./spirit_csv
*/
int main() {
std::vector<frontend::cursor> v;
// succeeds
#ifdef SECTION_STRINGSTREAM
std::stringstream ss;
ss << std::cin.rdbuf();
std::string s(ss.str());
auto start = s.cbegin();
auto end = s.cend();
// fails
#elif SECTION_CIN
noskipws(std::cin);
frontend::cin_forward_iterator<char> start(std::cin);
frontend::cin_forward_iterator<char> end;
// succeeds
#else
noskipws(std::cin);
frontend::cin_forward_iterator<char> start_in(std::cin);
frontend::cin_forward_iterator<char> end_in;
std::string s;
for (; start_in != end_in; ++start_in) {
s += *start_in;
}
auto start = s.begin();
auto end = s.end();
#endif
if (phrase_parse(start, end,
#ifdef SECTION_STRINGSTREAM
frontend::cursor_parser<std::string::const_iterator>(),
#elif SECTION_CIN
frontend::cursor_parser<
frontend::cin_forward_iterator<char>>(),
#else
frontend::cursor_parser<std::string::iterator>(),
#endif
boost::spirit::qi::blank, v)) {
for (auto &c : v) {
std::cout << boost::fusion::as_vector(c) << std::endl;
}
std::cerr << "success!" << std::endl;
return 0;
} else {
std::cerr << "failure!" << std::endl;
return 1;
}
}
为什么要有自己的迭代器?
很难做到正确,而且你看起来肯定不像是多通道感知的。
有一个 原因 为什么输入迭代器与前向迭代器具有不同的类别!只是掩盖它没有帮助。前向迭代器 必须 是可复制的,并且在取消引用时具有可重复的值。输入迭代器不满足这些条件。
In fact you should either just use
boost::spirit::istream_iterator
or you could compose an iterator using Spirit's multi_pass adaptor:
这是一个经过修复和清理的版本:
#define BOOST_SPIRIT_DEBUG 1
#define BOOST_SPIRIT_DEBUG_PRINT_SOME 200
#define BOOST_SPIRIT_DEBUG_OUT std::cerr
// std includes
#include <iostream>
#include <string>
// boost includes
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/adapted.hpp>
#include <boost/fusion/include/as_vector.hpp>
namespace frontend {
namespace qi = boost::spirit::qi;
struct cursor {
std::string file;
unsigned long long offset;
unsigned long long line;
unsigned long long col;
// verify inputs using enum
// decl/ref/defn/call
std::string reference_type;
// variable/function/scope/label/type
std::string specifier;
// if variable/function, then type
std::string type;
std::string language;
std::string name;
std::string scope;
};
}
// adapt struct to boost fusion
BOOST_FUSION_ADAPT_STRUCT(frontend::cursor,
(std::string, file)
(unsigned long long, offset)
(unsigned long long, line)
(unsigned long long, col)
(std::string, reference_type)
(std::string, specifier)
(std::string, type)
(std::string, language)
(std::string, name)
(std::string, scope))
namespace frontend {
// NOTE: blank_type doesn't skip newlines
template <typename Iterator>
struct cursor_parser : public qi::grammar<Iterator, std::vector<cursor>(), qi::blank_type> {
cursor_parser() : cursor_parser::base_type(vec) {
using qi::uint_;
using qi::eol;
using qi::lexeme;
using qi::char_;
quoted_string %= lexeme['"' >> *(char_ - '"') >> '"'];
start %=
quoted_string >> ',' >> // file
uint_ >> ',' >> // offset
uint_ >> ',' >> // line
uint_ >> ',' >> // col
quoted_string >> ',' >> // reference_type
quoted_string >> ',' >> // specifier
quoted_string >> ',' >> // type
quoted_string >> ',' >> // language
quoted_string >> ',' >> // name
quoted_string; // scope
vec %= start % eol;
BOOST_SPIRIT_DEBUG_NODES((quoted_string)(start)(vec))
}
private:
qi::rule<Iterator, std::string() , qi::blank_type> quoted_string;
qi::rule<Iterator, cursor() , qi::blank_type> start;
qi::rule<Iterator, std::vector<cursor>(), qi::blank_type> vec;
};
}
int main() {
// '"f",111,222,333,"ref_type","spc","type","lan","name","scop"'
using It = boost::spirit::istream_iterator;
It start_in(std::cin >> std::noskipws), end_in;
std::vector<frontend::cursor> v;
if (phrase_parse(start_in, end_in, frontend::cursor_parser<It>(), frontend::qi::blank, v)) {
for (auto &c : v) {
std::cout << boost::fusion::as_vector(c) << std::endl;
}
std::cerr << "success!" << std::endl;
} else {
std::cerr << "failure!" << std::endl;
return 1;
}
}
输出
(f 111 222 333 ref_type spc type lan name scop)
success!
调试输出:
<vec>
<try>"f",111,222,333,"ref_type","spc","type","lan","name","scop"\n</try>
<start>
<try>"f",111,222,333,"ref_type","spc","type","lan","name","scop"\n</try>
<quoted_string>
<try>"f",111,222,333,"ref_type","spc","type","lan","name","scop"\n</try>
<success>,111,222,333,"ref_type","spc","type","lan","name","scop"\n</success>
<attributes>[[f]]</attributes>
</quoted_string>
<quoted_string>
<try>"ref_type","spc","type","lan","name","scop"\n</try>
<success>,"spc","type","lan","name","scop"\n</success>
<attributes>[[r, e, f, _, t, y, p, e]]</attributes>
</quoted_string>
<quoted_string>
<try>"spc","type","lan","name","scop"\n</try>
<success>,"type","lan","name","scop"\n</success>
<attributes>[[s, p, c]]</attributes>
</quoted_string>
<quoted_string>
<try>"type","lan","name","scop"\n</try>
<success>,"lan","name","scop"\n</success>
<attributes>[[t, y, p, e]]</attributes>
</quoted_string>
<quoted_string>
<try>"lan","name","scop"\n</try>
<success>,"name","scop"\n</success>
<attributes>[[l, a, n]]</attributes>
</quoted_string>
<quoted_string>
<try>"name","scop"\n</try>
<success>,"scop"\n</success>
<attributes>[[n, a, m, e]]</attributes>
</quoted_string>
<quoted_string>
<try>"scop"\n</try>
<success>\n</success>
<attributes>[[s, c, o, p]]</attributes>
</quoted_string>
<success>\n</success>
<attributes>[[[f], 111, 222, 333, [r, e, f, _, t, y, p, e], [s, p, c], [t, y, p, e], [l, a, n], [n, a, m, e], [s, c, o, p]]]</attributes>
</start>
<start>
<try></try>
<quoted_string>
<try></try>
<fail/>
</quoted_string>
<fail/>
</start>
<success>\n</success>
<attributes>[[[[f], 111, 222, 333, [r, e, f, _, t, y, p, e], [s, p, c], [t, y, p, e], [l, a, n], [n, a, m, e], [s, c, o, p]]]]</attributes>
</vec>
备注:
- 您在
BOOST_FUSION_ADAPT_STRUCT
宏调用中出错(逗号太多)