提升精神 istream_iterator 从流中消耗过多
boost spirit istream_iterator consumes too much from stream
考虑从更复杂的代码中提取的以下示例:
#include <boost/fusion/adapted.hpp>
#include <boost/fusion/include/std_pair.hpp>
#include <boost/phoenix.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/support_istream_iterator.hpp>
#include <map>
#include <string>
namespace qi = boost::spirit::qi;
namespace phx = boost::phoenix;
// The class implements a XML tag storing the name and a variable number of attributes:
struct Tag
{
// The typedef defines the type used for a XML name:
typedef std::string name_type;
// The typedef defines the type used for a XML value:
typedef std::string value_type;
// The typedef defines the type of a XML attribute:
typedef std::pair<
name_type,
value_type
> attribute_type;
// The type defines a list of attributes.
// Note: We use a std::map to simplify the attribute search.
typedef std::map<
name_type,
value_type
> list_type;
// Clear all information stored within the instance:
void clear( )
{
m_name.clear( ); m_attribute.clear( );
}
std::size_t m_indent; // The tag shall be / is indented by m_indent number of tabs.
name_type m_name; // Name of the tag.
list_type m_attribute; // List of tag attributes.
};
// Define the mapping between Tools::Serialization::Archive::Type::Xml::Format::Tag and boost::fusion:
BOOST_FUSION_ADAPT_STRUCT( Tag,
( std::size_t , m_indent )
( Tag::name_type, m_name )
( Tag::list_type, m_attribute ) )
// This class implements the decoder skipper grammar:
template < typename _Iterator >
struct skipper
: qi::grammar< _Iterator >
{
skipper( ) : skipper::base_type( m_skipper )
{
// The rule defines the default skipper grammar:
m_skipper = ( qi::space ) // Skip all "spaces".
|
( qi::cntrl ); // Skip all "cntrl".
}
// The following variables define the rules used within this grammar:
qi::rule< _Iterator > m_skipper;
};
// This class implements the grammar used to parse a XML "begin tag".
// The expected format is as follows: <name a="xyz" b="xyz" ... N="xyz">
template < typename _Iterator, typename _Skipper >
struct tag_begin : qi::grammar< _Iterator, Tag( ), _Skipper >
{
tag_begin( ) : tag_begin::base_type( m_tag )
{
// The rule for a XML name shall stop when a ' ' or '>' is detected:
m_string = qi::lexeme[ *( qi::char_( "a-zA-Z0-9_.:" ) ) ];
// The rule for a XML attribute shall parse the following format: 'name="value"':
m_attribute = m_string
>> "=\""
>> m_string
>> '"';
// The rule for an XML attribute list is a sequence of attributes separated by ' ':
m_list = *( m_attribute - '>' );
// Finally the resulting XML tag has the following format: <name a="xyz" b="xyz" ... N="xyz">
m_tag = '<'
>> -qi::int_
>> m_string
>> m_list
>> '>';
// Enable debug support for the used rules. To activate the debug output define macro BOOST_SPIRIT_DEBUG:
BOOST_SPIRIT_DEBUG_NODES( ( m_string )( m_attribute )( m_list ) )
}
// The following variables define the rules used within this grammar:
qi::rule< _Iterator, Tag::name_type( ) , _Skipper > m_string;
qi::rule< _Iterator, Tag::attribute_type( ), _Skipper > m_attribute;
qi::rule< _Iterator, Tag::list_type( ) , _Skipper > m_list;
qi::rule< _Iterator, Tag( ) , _Skipper > m_tag;
};
bool beginTag( std::istream& stream, Tag& tag )
{
// Ensure that no whitespace characters are skipped:
stream.unsetf( std::ios::skipws );
// Create begin and end iterator for given stream:
boost::spirit::istream_iterator begin( stream );
boost::spirit::istream_iterator end;
// Define the grammar skipper type:
typedef skipper<
boost::spirit::istream_iterator
> skipper_type;
// Create an instance of the used skipper:
skipper_type sk;
// Create an instance of the used grammar:
tag_begin<
boost::spirit::istream_iterator,
skipper_type
> gr;
// Try to parse the data stored within the stream according the grammar and store the result in the tag variable:
bool r = boost::spirit::qi::phrase_parse( begin,
end,
gr,
sk,
tag );
char nextSym = 0;
stream >> nextSym;
for( auto i = tag.m_attribute.begin( ); i != tag.m_attribute.end( ); ++i )
{
std::cout << i->first << " : " << i->second << std::endl;
}
std::cout << "Next symbol: " << nextSym << std::endl;
return r;
}
int main( )
{
std::stringstream s;
s << "<object cName=\"bool\" cVersion=\"1\" vName=\"bool\"> <value>0</value></object>";
Tag t;
beginTag( s, t );
return 0;
}
我使用语法提取xml标签内容。原则上这按预期工作,结果如下:
cName : bool
cVersion : 1
vName : bool
Next symbol: v
问题是解析器消耗了太多数据。我的期望是解析器在第一个标记关闭“>”时停止。但似乎解析器还使用了以下空格和“<”符号。因此从流中读取的下一个符号等于 'v'。我想避免这种情况,因为以下解析器调用需要“<”符号。有什么想法吗?
没有可靠的方法可以做到这一点。
问题是您没有在解析调用中重复使用 istream_iterator
。 boost::spirit::istream_iterator
的全部目的是在 InputIterator¹ 之上提供一个具有 multi_pass
能力的迭代器。
因为 Spirit 允许具有任意回溯的任意语法,所以您无法避免消耗比实际 成功 解析的输入更多的内容。
这里显而易见的解决方案是将所有后续步骤集成到同一个语法中and/or以重用迭代器(因此迭代器存储的回溯缓冲区仍然包含您需要的字符)。
演示/概念验证
这是一个循环解析开放标签的版本
while (boost::spirit::qi::phrase_parse(begin, end, gr, sk, tag)) {
std::cout << "============\nParsed open tag '" << tag.m_name << "'\n";
for (auto const& p: tag.m_attribute)
std::cout << p.first << ": " << p.second << "\n";
count += 1;
tag.clear();
};
std::cout << "Next symbol: ";
std::copy(begin, end, std::ostream_iterator<char>(std::cout));
并打印:
============
Parsed open tag 'object'
cName: bool
cVersion: 1
vName: bool
============
Parsed open tag 'value'
Next symbol: 0</value>
</object>
//#define BOOST_SPIRIT_DEBUG
#include <boost/fusion/adapted.hpp>
#include <boost/spirit/include/qi.hpp>
#include <map>
namespace qi = boost::spirit::qi;
// The class implements a XML tag storing the name and a variable number of
// attributes:
struct Tag {
typedef std::string name_type;
typedef std::string value_type;
typedef std::pair<name_type, value_type> attribute_type;
typedef std::map<name_type, value_type> list_type;
// Clear all information stored within the instance:
void clear() {
m_name.clear();
m_attribute.clear();
}
std::size_t m_indent; // The tag shall be / is indented by m_indent number of tabs.
name_type m_name; // Name of the tag.
list_type m_attribute; // List of tag attributes.
};
BOOST_FUSION_ADAPT_STRUCT(Tag, m_indent, m_name, m_attribute)
// This class implements the grammar used to parse a "XML" begin tag.
// The expected format is as follows: <name a="xyz" b="xyz" ... N="xyz">
template <typename Iterator, typename Skipper> struct tag_begin : qi::grammar<Iterator, Tag(), Skipper> {
tag_begin() : tag_begin::base_type(m_tag) {
m_string = *qi::char_("a-zA-Z0-9_.:");
m_attribute = m_string >> '=' >> qi::lexeme['"' >> m_string >> '"'];
m_attributes = *m_attribute;
m_tag = '<' >> -qi::int_ >> m_string >> m_attributes >> '>';
BOOST_SPIRIT_DEBUG_NODES((m_string)(m_attribute)(m_attributes))
}
private:
// The following variables define the rules used within this grammar:
qi::rule<Iterator, Tag::attribute_type(), Skipper> m_attribute;
qi::rule<Iterator, Tag::list_type(), Skipper> m_attributes;
qi::rule<Iterator, Tag(), Skipper> m_tag;
// lexemes
qi::rule<Iterator, Tag::name_type()> m_string;
};
bool beginTag(std::istream &stream, Tag &tag) {
// Ensure that no whitespace characters are skipped:
stream.unsetf(std::ios::skipws);
typedef boost::spirit::istream_iterator It;
typedef qi::rule<It> skipper_type;
skipper_type sk = qi::space | qi::cntrl;
tag_begin<boost::spirit::istream_iterator, skipper_type> gr;
It begin(stream), end;
int count = 0;
while (boost::spirit::qi::phrase_parse(begin, end, gr, sk, tag)) {
std::cout << "============\nParsed open tag '" << tag.m_name << "'\n";
for (auto const& p: tag.m_attribute)
std::cout << p.first << ": " << p.second << "\n";
count += 1;
tag.clear();
};
std::cout << "Next symbol: ";
std::copy(begin, end, std::ostream_iterator<char>(std::cout));
return count > 0;
}
int main() {
std::stringstream s;
s << R"(
<object cName="bool" cVersion="1" vName="bool">
<value>0</value>
</object>
)";
Tag t;
beginTag(s, t);
}
¹(严格向前,不能重复取消引用)
考虑从更复杂的代码中提取的以下示例:
#include <boost/fusion/adapted.hpp>
#include <boost/fusion/include/std_pair.hpp>
#include <boost/phoenix.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/support_istream_iterator.hpp>
#include <map>
#include <string>
namespace qi = boost::spirit::qi;
namespace phx = boost::phoenix;
// The class implements a XML tag storing the name and a variable number of attributes:
struct Tag
{
// The typedef defines the type used for a XML name:
typedef std::string name_type;
// The typedef defines the type used for a XML value:
typedef std::string value_type;
// The typedef defines the type of a XML attribute:
typedef std::pair<
name_type,
value_type
> attribute_type;
// The type defines a list of attributes.
// Note: We use a std::map to simplify the attribute search.
typedef std::map<
name_type,
value_type
> list_type;
// Clear all information stored within the instance:
void clear( )
{
m_name.clear( ); m_attribute.clear( );
}
std::size_t m_indent; // The tag shall be / is indented by m_indent number of tabs.
name_type m_name; // Name of the tag.
list_type m_attribute; // List of tag attributes.
};
// Define the mapping between Tools::Serialization::Archive::Type::Xml::Format::Tag and boost::fusion:
BOOST_FUSION_ADAPT_STRUCT( Tag,
( std::size_t , m_indent )
( Tag::name_type, m_name )
( Tag::list_type, m_attribute ) )
// This class implements the decoder skipper grammar:
template < typename _Iterator >
struct skipper
: qi::grammar< _Iterator >
{
skipper( ) : skipper::base_type( m_skipper )
{
// The rule defines the default skipper grammar:
m_skipper = ( qi::space ) // Skip all "spaces".
|
( qi::cntrl ); // Skip all "cntrl".
}
// The following variables define the rules used within this grammar:
qi::rule< _Iterator > m_skipper;
};
// This class implements the grammar used to parse a XML "begin tag".
// The expected format is as follows: <name a="xyz" b="xyz" ... N="xyz">
template < typename _Iterator, typename _Skipper >
struct tag_begin : qi::grammar< _Iterator, Tag( ), _Skipper >
{
tag_begin( ) : tag_begin::base_type( m_tag )
{
// The rule for a XML name shall stop when a ' ' or '>' is detected:
m_string = qi::lexeme[ *( qi::char_( "a-zA-Z0-9_.:" ) ) ];
// The rule for a XML attribute shall parse the following format: 'name="value"':
m_attribute = m_string
>> "=\""
>> m_string
>> '"';
// The rule for an XML attribute list is a sequence of attributes separated by ' ':
m_list = *( m_attribute - '>' );
// Finally the resulting XML tag has the following format: <name a="xyz" b="xyz" ... N="xyz">
m_tag = '<'
>> -qi::int_
>> m_string
>> m_list
>> '>';
// Enable debug support for the used rules. To activate the debug output define macro BOOST_SPIRIT_DEBUG:
BOOST_SPIRIT_DEBUG_NODES( ( m_string )( m_attribute )( m_list ) )
}
// The following variables define the rules used within this grammar:
qi::rule< _Iterator, Tag::name_type( ) , _Skipper > m_string;
qi::rule< _Iterator, Tag::attribute_type( ), _Skipper > m_attribute;
qi::rule< _Iterator, Tag::list_type( ) , _Skipper > m_list;
qi::rule< _Iterator, Tag( ) , _Skipper > m_tag;
};
bool beginTag( std::istream& stream, Tag& tag )
{
// Ensure that no whitespace characters are skipped:
stream.unsetf( std::ios::skipws );
// Create begin and end iterator for given stream:
boost::spirit::istream_iterator begin( stream );
boost::spirit::istream_iterator end;
// Define the grammar skipper type:
typedef skipper<
boost::spirit::istream_iterator
> skipper_type;
// Create an instance of the used skipper:
skipper_type sk;
// Create an instance of the used grammar:
tag_begin<
boost::spirit::istream_iterator,
skipper_type
> gr;
// Try to parse the data stored within the stream according the grammar and store the result in the tag variable:
bool r = boost::spirit::qi::phrase_parse( begin,
end,
gr,
sk,
tag );
char nextSym = 0;
stream >> nextSym;
for( auto i = tag.m_attribute.begin( ); i != tag.m_attribute.end( ); ++i )
{
std::cout << i->first << " : " << i->second << std::endl;
}
std::cout << "Next symbol: " << nextSym << std::endl;
return r;
}
int main( )
{
std::stringstream s;
s << "<object cName=\"bool\" cVersion=\"1\" vName=\"bool\"> <value>0</value></object>";
Tag t;
beginTag( s, t );
return 0;
}
我使用语法提取xml标签内容。原则上这按预期工作,结果如下:
cName : bool
cVersion : 1
vName : bool
Next symbol: v
问题是解析器消耗了太多数据。我的期望是解析器在第一个标记关闭“>”时停止。但似乎解析器还使用了以下空格和“<”符号。因此从流中读取的下一个符号等于 'v'。我想避免这种情况,因为以下解析器调用需要“<”符号。有什么想法吗?
没有可靠的方法可以做到这一点。
问题是您没有在解析调用中重复使用 istream_iterator
。 boost::spirit::istream_iterator
的全部目的是在 InputIterator¹ 之上提供一个具有 multi_pass
能力的迭代器。
因为 Spirit 允许具有任意回溯的任意语法,所以您无法避免消耗比实际 成功 解析的输入更多的内容。
这里显而易见的解决方案是将所有后续步骤集成到同一个语法中and/or以重用迭代器(因此迭代器存储的回溯缓冲区仍然包含您需要的字符)。
演示/概念验证
这是一个循环解析开放标签的版本
while (boost::spirit::qi::phrase_parse(begin, end, gr, sk, tag)) {
std::cout << "============\nParsed open tag '" << tag.m_name << "'\n";
for (auto const& p: tag.m_attribute)
std::cout << p.first << ": " << p.second << "\n";
count += 1;
tag.clear();
};
std::cout << "Next symbol: ";
std::copy(begin, end, std::ostream_iterator<char>(std::cout));
并打印:
============
Parsed open tag 'object'
cName: bool
cVersion: 1
vName: bool
============
Parsed open tag 'value'
Next symbol: 0</value>
</object>
//#define BOOST_SPIRIT_DEBUG
#include <boost/fusion/adapted.hpp>
#include <boost/spirit/include/qi.hpp>
#include <map>
namespace qi = boost::spirit::qi;
// The class implements a XML tag storing the name and a variable number of
// attributes:
struct Tag {
typedef std::string name_type;
typedef std::string value_type;
typedef std::pair<name_type, value_type> attribute_type;
typedef std::map<name_type, value_type> list_type;
// Clear all information stored within the instance:
void clear() {
m_name.clear();
m_attribute.clear();
}
std::size_t m_indent; // The tag shall be / is indented by m_indent number of tabs.
name_type m_name; // Name of the tag.
list_type m_attribute; // List of tag attributes.
};
BOOST_FUSION_ADAPT_STRUCT(Tag, m_indent, m_name, m_attribute)
// This class implements the grammar used to parse a "XML" begin tag.
// The expected format is as follows: <name a="xyz" b="xyz" ... N="xyz">
template <typename Iterator, typename Skipper> struct tag_begin : qi::grammar<Iterator, Tag(), Skipper> {
tag_begin() : tag_begin::base_type(m_tag) {
m_string = *qi::char_("a-zA-Z0-9_.:");
m_attribute = m_string >> '=' >> qi::lexeme['"' >> m_string >> '"'];
m_attributes = *m_attribute;
m_tag = '<' >> -qi::int_ >> m_string >> m_attributes >> '>';
BOOST_SPIRIT_DEBUG_NODES((m_string)(m_attribute)(m_attributes))
}
private:
// The following variables define the rules used within this grammar:
qi::rule<Iterator, Tag::attribute_type(), Skipper> m_attribute;
qi::rule<Iterator, Tag::list_type(), Skipper> m_attributes;
qi::rule<Iterator, Tag(), Skipper> m_tag;
// lexemes
qi::rule<Iterator, Tag::name_type()> m_string;
};
bool beginTag(std::istream &stream, Tag &tag) {
// Ensure that no whitespace characters are skipped:
stream.unsetf(std::ios::skipws);
typedef boost::spirit::istream_iterator It;
typedef qi::rule<It> skipper_type;
skipper_type sk = qi::space | qi::cntrl;
tag_begin<boost::spirit::istream_iterator, skipper_type> gr;
It begin(stream), end;
int count = 0;
while (boost::spirit::qi::phrase_parse(begin, end, gr, sk, tag)) {
std::cout << "============\nParsed open tag '" << tag.m_name << "'\n";
for (auto const& p: tag.m_attribute)
std::cout << p.first << ": " << p.second << "\n";
count += 1;
tag.clear();
};
std::cout << "Next symbol: ";
std::copy(begin, end, std::ostream_iterator<char>(std::cout));
return count > 0;
}
int main() {
std::stringstream s;
s << R"(
<object cName="bool" cVersion="1" vName="bool">
<value>0</value>
</object>
)";
Tag t;
beginTag(s, t);
}
¹(严格向前,不能重复取消引用)