如何使用 boost::karma/qi 解析日期时间?
How do I parse datetime with boost::karma/qi?
我正在解析以下格式的日志文件:
<line id>,<time>,<data_1>,<data_2>,<event_description>
时间格式为dd-MM-yy HH:mm:ss.fff
。
我需要提取解析时间,data_1 和 data_2。
这是我对每一行所做的:
auto unquoted_string = lexeme[+(boost::spirit::qi::char_ - ',')];
double data_1=-1, data_2=-1;
boost::fusion::vector<char> datestr;
bool r = phrase_parse(
std::begin(line),
std::end(line),
int_>>','>>unquoted_string[ref(datestr)=_1]>>',' >> double_[ref(data_1) = _1] >> ',' >> double_[ref(data_2) = _1] >>','>>unquoted_string,
boost::spirit::qi::space
);
现在 boost::fusion::vector<char>
包含需要解析的日期时间字符串。如何将其转换为 std::string?有没有更好的方法来解析 boost::karma/qi?
中的时间
Spirit Karma 用于生成输出,而不是用于解析,因此您不能将其用于此目的。
对于这样的工作,我建议不要解析整个日期格式,而是解析您提供的行的一般形式:
<line id>,<time>,<data_1>,<data_2>,<event_description>
让我们定义一个收件人类型:
struct Event {
size_t id;
std::string date, data1, data2, description;
};
使其适应自动属性传播:
BOOST_FUSION_ADAPT_STRUCT(Event, id, date, data1, data2, description)
一个简单的规则:
qi::rule<boost::spirit::istream_iterator, Event(), qi::blank_type> rule;
rule =
qi::ulong_long >> ',' >> // id
qi::raw[*~qi::char_(',')] >> ',' >> // date
qi::raw[*~qi::char_(',')] >> ',' >> // data1
qi::raw[*~qi::char_(',')] >> ',' >> // data2
qi::raw[*(qi::char_ - qi::eol)] // description
;
我们开始:
if (qi::phrase_parse(f, l, rule % qi::eol, qi::blank, events)) {
for (auto& event : events) {
std::cout << event << "\n----\n";
}
} else {
std::cout << "Parse failed\n";
}
if (f != l) {
std::cout << "Remaining unparsed: " << std::quoted(std::string(f,l)) << "\n";
}
id:11886
date:"05/09/20 01:01:06.338053260"
data1:"26168"
data2:"5374"
description:"if (_mode & full_match) {"
----
id:30215
date:"05/09/20 01:01:15.391796323"
data1:"23936"
data2:"15742"
description:"auto const& shape = shapes.at(id);"
----
id:7386
date:"05/09/20 01:01:15.463584888"
data1:"26798"
data2:"13486"
description:"into.emplace_back();"
----
id:24377
date:"05/09/20 01:01:15.531308865"
data1:"11735"
data2:"15257"
description:"auto pattern = _formats.begin();"
----
id:11744
date:"05/09/20 01:01:15.590114069"
data1:"3451"
data2:"17645"
description:"auto bounds = field.bounds();"
----
id:20148
date:"05/09/20 01:01:15.652360522"
data1:"12228"
data2:"29033"
description:"if ((_mode & mru) && pattern != _formats.begin()) {"
----
id:9196
date:"05/09/20 01:01:15.699402632"
data1:"6639"
data2:"27448"
description:"#include <boost/archive/text_oarchive.hpp>"
----
id:7341
date:"05/09/20 01:01:15.754603212"
data1:"21142"
data2:"30650"
description:"namespace attrs = boost::log::attributes;"
----
id:14990
date:"05/09/20 01:01:15.802583615"
data1:"18421"
data2:"10623"
description:"BOOST_LOG_GLOBAL_LOGGER_INIT(logger, src::severity_logger_mt) {"
----
id:19490
date:"05/09/20 01:01:15.860306470"
data1:"2883"
data2:"848"
description:"void Server::getNextSamples(std::vector<sf::Int16> oSamples) {"
----
id:30360
date:"05/09/20 01:01:15.918505128"
data1:"4369"
data2:"1998"
description:"case shape::circle: return os << \"circle\";"
----
Remaining unparsed: "
"
完整列表
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/adapted/struct.hpp>
#include <iostream>
#include <iomanip>
struct Event {
size_t id;
std::string date, data1, data2, description;
};
// for parsing
BOOST_FUSION_ADAPT_STRUCT(Event, id, date, data1, data2, description)
// for debug output
static std::ostream& operator<<(std::ostream& os, Event const& evt) {
os << " id:" << evt.id << "\n";
os << " date:" << std::quoted(evt.date) << "\n";
os << " data1:" << std::quoted(evt.data1) << "\n";
os << " data2:" << std::quoted(evt.data2) << "\n";
os << "description:" << std::quoted(evt.description) << "\n";
return os;
}
int main() {
//<line id>,<time>,<data_1>,<data_2>,<event_description>
std::istringstream iss(R"(11886,05/09/20 01:01:06.338053260,26168,5374, if (_mode & full_match) {
30215,05/09/20 01:01:15.391796323,23936,15742, auto const& shape = shapes.at(id);
7386,05/09/20 01:01:15.463584888,26798,13486, into.emplace_back();
24377,05/09/20 01:01:15.531308865,11735,15257, auto pattern = _formats.begin();
11744,05/09/20 01:01:15.590114069,3451,17645, auto bounds = field.bounds();
20148,05/09/20 01:01:15.652360522,12228,29033, if ((_mode & mru) && pattern != _formats.begin()) {
9196,05/09/20 01:01:15.699402632,6639,27448,#include <boost/archive/text_oarchive.hpp>
7341,05/09/20 01:01:15.754603212,21142,30650,namespace attrs = boost::log::attributes;
14990,05/09/20 01:01:15.802583615,18421,10623,BOOST_LOG_GLOBAL_LOGGER_INIT(logger, src::severity_logger_mt) {
19490,05/09/20 01:01:15.860306470,2883,848,void Server::getNextSamples(std::vector<sf::Int16> oSamples) {
30360,05/09/20 01:01:15.918505128,4369,1998, case shape::circle: return os << "circle";
)");
boost::spirit::istream_iterator f(iss >> std::noskipws), l;
std::vector<Event> events;
namespace qi = boost::spirit::qi;
qi::rule<boost::spirit::istream_iterator, Event(), qi::blank_type> rule;
rule =
qi::ulong_long >> ',' >> // id
qi::raw[*~qi::char_(',')] >> ',' >> // date
qi::raw[*~qi::char_(',')] >> ',' >> // data1
qi::raw[*~qi::char_(',')] >> ',' >> // data2
qi::raw[*(qi::char_ - qi::eol)] // description
;
if (qi::phrase_parse(f, l, rule % qi::eol, qi::blank, events)) {
for (auto& event : events) {
std::cout << event << "\n----\n";
}
} else {
std::cout << "Parse failed\n";
}
if (f != l) {
std::cout << "Remaining unparsed: " << std::quoted(std::string(f,l)) << "\n";
}
}
奖金
要真正解析日期时间,我建议使用 Boost DateTime。或者,在此处查找基于 strptime
的真正通用的东西:
我正在解析以下格式的日志文件:
<line id>,<time>,<data_1>,<data_2>,<event_description>
时间格式为dd-MM-yy HH:mm:ss.fff
。
我需要提取解析时间,data_1 和 data_2。
这是我对每一行所做的:
auto unquoted_string = lexeme[+(boost::spirit::qi::char_ - ',')];
double data_1=-1, data_2=-1;
boost::fusion::vector<char> datestr;
bool r = phrase_parse(
std::begin(line),
std::end(line),
int_>>','>>unquoted_string[ref(datestr)=_1]>>',' >> double_[ref(data_1) = _1] >> ',' >> double_[ref(data_2) = _1] >>','>>unquoted_string,
boost::spirit::qi::space
);
现在 boost::fusion::vector<char>
包含需要解析的日期时间字符串。如何将其转换为 std::string?有没有更好的方法来解析 boost::karma/qi?
Spirit Karma 用于生成输出,而不是用于解析,因此您不能将其用于此目的。
对于这样的工作,我建议不要解析整个日期格式,而是解析您提供的行的一般形式:
<line id>,<time>,<data_1>,<data_2>,<event_description>
让我们定义一个收件人类型:
struct Event {
size_t id;
std::string date, data1, data2, description;
};
使其适应自动属性传播:
BOOST_FUSION_ADAPT_STRUCT(Event, id, date, data1, data2, description)
一个简单的规则:
qi::rule<boost::spirit::istream_iterator, Event(), qi::blank_type> rule;
rule =
qi::ulong_long >> ',' >> // id
qi::raw[*~qi::char_(',')] >> ',' >> // date
qi::raw[*~qi::char_(',')] >> ',' >> // data1
qi::raw[*~qi::char_(',')] >> ',' >> // data2
qi::raw[*(qi::char_ - qi::eol)] // description
;
我们开始:
if (qi::phrase_parse(f, l, rule % qi::eol, qi::blank, events)) {
for (auto& event : events) {
std::cout << event << "\n----\n";
}
} else {
std::cout << "Parse failed\n";
}
if (f != l) {
std::cout << "Remaining unparsed: " << std::quoted(std::string(f,l)) << "\n";
}
id:11886
date:"05/09/20 01:01:06.338053260"
data1:"26168"
data2:"5374"
description:"if (_mode & full_match) {"
----
id:30215
date:"05/09/20 01:01:15.391796323"
data1:"23936"
data2:"15742"
description:"auto const& shape = shapes.at(id);"
----
id:7386
date:"05/09/20 01:01:15.463584888"
data1:"26798"
data2:"13486"
description:"into.emplace_back();"
----
id:24377
date:"05/09/20 01:01:15.531308865"
data1:"11735"
data2:"15257"
description:"auto pattern = _formats.begin();"
----
id:11744
date:"05/09/20 01:01:15.590114069"
data1:"3451"
data2:"17645"
description:"auto bounds = field.bounds();"
----
id:20148
date:"05/09/20 01:01:15.652360522"
data1:"12228"
data2:"29033"
description:"if ((_mode & mru) && pattern != _formats.begin()) {"
----
id:9196
date:"05/09/20 01:01:15.699402632"
data1:"6639"
data2:"27448"
description:"#include <boost/archive/text_oarchive.hpp>"
----
id:7341
date:"05/09/20 01:01:15.754603212"
data1:"21142"
data2:"30650"
description:"namespace attrs = boost::log::attributes;"
----
id:14990
date:"05/09/20 01:01:15.802583615"
data1:"18421"
data2:"10623"
description:"BOOST_LOG_GLOBAL_LOGGER_INIT(logger, src::severity_logger_mt) {"
----
id:19490
date:"05/09/20 01:01:15.860306470"
data1:"2883"
data2:"848"
description:"void Server::getNextSamples(std::vector<sf::Int16> oSamples) {"
----
id:30360
date:"05/09/20 01:01:15.918505128"
data1:"4369"
data2:"1998"
description:"case shape::circle: return os << \"circle\";"
----
Remaining unparsed: "
"
完整列表
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/adapted/struct.hpp>
#include <iostream>
#include <iomanip>
struct Event {
size_t id;
std::string date, data1, data2, description;
};
// for parsing
BOOST_FUSION_ADAPT_STRUCT(Event, id, date, data1, data2, description)
// for debug output
static std::ostream& operator<<(std::ostream& os, Event const& evt) {
os << " id:" << evt.id << "\n";
os << " date:" << std::quoted(evt.date) << "\n";
os << " data1:" << std::quoted(evt.data1) << "\n";
os << " data2:" << std::quoted(evt.data2) << "\n";
os << "description:" << std::quoted(evt.description) << "\n";
return os;
}
int main() {
//<line id>,<time>,<data_1>,<data_2>,<event_description>
std::istringstream iss(R"(11886,05/09/20 01:01:06.338053260,26168,5374, if (_mode & full_match) {
30215,05/09/20 01:01:15.391796323,23936,15742, auto const& shape = shapes.at(id);
7386,05/09/20 01:01:15.463584888,26798,13486, into.emplace_back();
24377,05/09/20 01:01:15.531308865,11735,15257, auto pattern = _formats.begin();
11744,05/09/20 01:01:15.590114069,3451,17645, auto bounds = field.bounds();
20148,05/09/20 01:01:15.652360522,12228,29033, if ((_mode & mru) && pattern != _formats.begin()) {
9196,05/09/20 01:01:15.699402632,6639,27448,#include <boost/archive/text_oarchive.hpp>
7341,05/09/20 01:01:15.754603212,21142,30650,namespace attrs = boost::log::attributes;
14990,05/09/20 01:01:15.802583615,18421,10623,BOOST_LOG_GLOBAL_LOGGER_INIT(logger, src::severity_logger_mt) {
19490,05/09/20 01:01:15.860306470,2883,848,void Server::getNextSamples(std::vector<sf::Int16> oSamples) {
30360,05/09/20 01:01:15.918505128,4369,1998, case shape::circle: return os << "circle";
)");
boost::spirit::istream_iterator f(iss >> std::noskipws), l;
std::vector<Event> events;
namespace qi = boost::spirit::qi;
qi::rule<boost::spirit::istream_iterator, Event(), qi::blank_type> rule;
rule =
qi::ulong_long >> ',' >> // id
qi::raw[*~qi::char_(',')] >> ',' >> // date
qi::raw[*~qi::char_(',')] >> ',' >> // data1
qi::raw[*~qi::char_(',')] >> ',' >> // data2
qi::raw[*(qi::char_ - qi::eol)] // description
;
if (qi::phrase_parse(f, l, rule % qi::eol, qi::blank, events)) {
for (auto& event : events) {
std::cout << event << "\n----\n";
}
} else {
std::cout << "Parse failed\n";
}
if (f != l) {
std::cout << "Remaining unparsed: " << std::quoted(std::string(f,l)) << "\n";
}
}
奖金
要真正解析日期时间,我建议使用 Boost DateTime。或者,在此处查找基于 strptime
的真正通用的东西: