使用特征来解析 boost::spirit 中的日期
Use a trait for parsing a date in boost::spirit
我一直在解析包含日期的日志文件并将它们存储为字符串,但这是内存消耗密集型的,并且由于字符串分配而有点昂贵。
有人建议我使用 Timestamp 来存储日期并使用 boost spirit stream 来解析它,所以我尝试了 "boost::posix_time::ptime" 和旧的 "std::time_t + std::get_time",但它们都严重损害了性能。
我想试试这个新方法:将日期解析为普通整数,然后使用特征将它们转换为 std::time_t。我已经尽可能多地尝试遵循下一个精神示例 https://www.boost.org/doc/libs/1_60_0/libs/spirit/example/qi/parse_date.cpp,但我什至无法编译自己的测试。模糊的 boost spirit transform_attribute 语法也无济于事。
有人可以帮我解决这个问题吗?
#include <boost/fusion/adapted/struct.hpp>
#include <boost/spirit/include/qi.hpp>
#include <ctime>
typedef std::string::const_iterator It;
namespace structs {
typedef boost::fusion::vector<int, int, int, int, int, int, int> date_parts;
struct Timestamp
{
std::time_t date;
int ms;
};
struct Record {
Timestamp date;
double time;
std::string str;
};
typedef std::vector<Record> Records;
}
BOOST_FUSION_ADAPT_STRUCT(structs::Record,
(Timestamp, date)
(double, time)
(std::string, str))
namespace boost { namespace spirit { namespace traits
{
template<>
struct transform_attribute<structs::Timestamp, structs::date_parts, qi::domain>
{
typedef structs::date_parts type;
static type pre(structs::Timestamp) { return type(); }
static void fail(structs::Timestamp&) { }
static void post(structs::Timestamp& timestamp, type const& v)
{
std::tm time = { fusion::at_c<5>(v), fusion::at_c<4>(v), fusion::at_c<3>(v),
fusion::at_c<2>(v), fusion::at_c<1>(v) - 1900, fusion::at_c<0>(v), 0, 0, 0 };
timestamp.date = std::mktime(&time);
timestamp.ms = fusion::at_c<6>(v);
}
};
} } }
namespace qi = boost::spirit::qi;
namespace QiParsers {
template <typename It>
struct Parser : qi::grammar<It, structs::Records()> {
Parser() : Parser::base_type(start) {
using namespace qi;
month.add
("Jan", 0)
("Feb", 1)
("Mar", 2)
("Apr", 3)
("May", 4)
("Jun", 5)
("Jul", 6)
("Aug", 7)
("Sep", 8)
("Oct", 9)
("Nov", 10)
("Dec", 11);
date = repeat(4)[digit] >> '-' >> month >> '-' >> repeat(2)[digit] >> ' ' >>
repeat(2)[digit] >> ':' >> repeat(2)[digit] >> ':' >> repeat(2)[digit] >> '.' >> repeat(6)[digit];
line = '[' >> date >> ']'
>> " - " >> double_ >> " s"
>> " => String: " >> raw[+graph];
start = line % eol;
}
private:
qi::symbols<char, int> month;
qi::rule<It, structs::date_parts()> date;
qi::rule<It, structs::Record()> line;
qi::rule<It, structs::Records()> start;
};
}
structs::Records parse_string(It b, It e)
{
static const QiParsers::Parser<It> parser;
structs::Records records;
parse(b, e, parser, records);
return records;
}
static const std::string input =
"[2018-Mar-13 13:13:59.580482] - 0.200 s => String: Test_1\n\
[2018-Mar-14 13:13:59.580482] - 0.400 s => String: Test_2\n\
[2018-Mar-15 13:13:59.580482] - 0.600 s => String: Test_3\n";
int main() {
const auto records = parse_string(input.begin(), input.end());
return 0;
}
在 posting 之后我 re-read 你的 post 并在此处添加了方法。
特征和解析器规则的声明方式存在相当多的问题。
值得注意的是,repeat(2)[digit_]
不会转换为整数属性。我怀疑您可能得到了很多 49、50 等值('1'
、'2'
等的 ASCII 代码),也许还有一些不确定的值
你从月份值中减去 1900
解析器
简化为:
namespace QiParsers {
struct Months : qi::symbols<char, int> {
Months() { this->add
("Jan", 0)
("Feb", 1)
("Mar", 2)
("Apr", 3)
("May", 4)
("Jun", 5)
("Jul", 6)
("Aug", 7)
("Sep", 8)
("Oct", 9)
("Nov", 10)
("Dec", 11);
}
} static const mmm_;
static const qi::uint_parser<int, 10, 4, 4> yyyy_;
static const qi::uint_parser<int, 10, 2, 2> dd_, hh_, mm_, ss_;
static const qi::uint_parser<int, 10, 6, 6> fff_;
}
现在可以清晰地编写解析器¹,例如:
template <typename It>
struct Parser2 : qi::grammar<It, structs::Record2()>
{
Parser2() : Parser2::base_type(start) {
using namespace qi;
date = '[' >> yyyy_ >> '-' >> mmm_ >> '-' >> dd_
>> ' ' >> hh_ >> ':' >> mm_ >> ':' >> ss_ >> '.' >> fff_ >> ']';
start =
date //'[' >> raw[*~char_(']')] >> ']'
>> " - " >> double_ >> " s"
>> " => String: " >> raw[+graph]
>> eol;
}
private:
qi::rule<It, structs::Record2()> start;
qi::rule<It, boost::fusion::vector<int, int, int, int, int, int, int>()> date;
};
特质
基本上是你所拥有的,但消除了一些怪癖:
template <typename Attr>
struct transform_attribute<structs::Timestamp, Attr, qi::domain> {
using type = Attr;
static type pre(structs::Timestamp) { return type(); }
static void fail(structs::Timestamp&) { }
static void post(structs::Timestamp& timestamp, type const& v) {
/*
* struct tm
* {
* int tm_sec; [> Seconds. [0-60] (1 leap second) <]
* int tm_min; [> Minutes. [0-59] <]
* int tm_hour; [> Hours. [0-23] <]
* int tm_mday; [> Day. [1-31] <]
* int tm_mon; [> Month. [0-11] <]
* int tm_year; [> Year - 1900. <]
* int tm_wday; [> Day of week. [0-6] <]
* int tm_yday; [> Days in year.[0-365] <]
* int tm_isdst; [> DST. [-1/0/1]<]
*
* # ifdef __USE_MISC
* long int tm_gmtoff; [> Seconds east of UTC. <]
* const char *tm_zone; [> Timezone abbreviation. <]
* # else
* long int __tm_gmtoff; [> Seconds east of UTC. <]
* const char *__tm_zone; [> Timezone abbreviation. <]
* # endif
* };
*/
std::tm time = { fusion::at_c<5>(v), // seconds
fusion::at_c<4>(v), // minutes
fusion::at_c<3>(v), // hours
fusion::at_c<2>(v), // day (1-31)
fusion::at_c<1>(v), // month
fusion::at_c<0>(v) - 1900, // year - 1900
0, 0, // wday, yday
0, 0, 0 // isdst, tm_gmtoff, tm_zone
};
timestamp.date = std::mktime(&time);
timestamp.ms = fusion::at_c<6>(v)/1000000.0;
}
};
基准测试!
基准 运行s,并正确解析:
#include <boost/fusion/adapted/struct.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/repository/include/qi_seek.hpp>
#include <boost/chrono/chrono.hpp>
#include <iomanip>
#include <ctime>
namespace structs {
struct Timestamp {
std::time_t date;
double ms;
};
struct Record1 {
std::string date;
double time;
std::string str;
};
struct Record2 {
Timestamp date;
double time;
std::string str;
};
typedef std::vector<Record1> Records1;
typedef std::vector<Record2> Records2;
}
BOOST_FUSION_ADAPT_STRUCT(structs::Record1,
(std::string, date)
(double, time)
(std::string, str))
BOOST_FUSION_ADAPT_STRUCT(structs::Record2,
(structs::Timestamp, date)
(double, time)
(std::string, str))
namespace boost { namespace spirit { namespace traits {
template <typename It>
struct assign_to_attribute_from_iterators<std::string, It, void> {
static inline void call(It f, It l, std::string& attr) {
attr = std::string(&*f, std::distance(f,l));
}
};
template <typename Attr>
struct transform_attribute<structs::Timestamp, Attr, qi::domain> {
using type = Attr;
static type pre(structs::Timestamp) { return type(); }
static void fail(structs::Timestamp&) { }
static void post(structs::Timestamp& timestamp, type const& v) {
/*
* struct tm
* {
* int tm_sec; [> Seconds. [0-60] (1 leap second) <]
* int tm_min; [> Minutes. [0-59] <]
* int tm_hour; [> Hours. [0-23] <]
* int tm_mday; [> Day. [1-31] <]
* int tm_mon; [> Month. [0-11] <]
* int tm_year; [> Year - 1900. <]
* int tm_wday; [> Day of week. [0-6] <]
* int tm_yday; [> Days in year.[0-365] <]
* int tm_isdst; [> DST. [-1/0/1]<]
*
* # ifdef __USE_MISC
* long int tm_gmtoff; [> Seconds east of UTC. <]
* const char *tm_zone; [> Timezone abbreviation. <]
* # else
* long int __tm_gmtoff; [> Seconds east of UTC. <]
* const char *__tm_zone; [> Timezone abbreviation. <]
* # endif
* };
*/
std::tm time = { fusion::at_c<5>(v), // seconds
fusion::at_c<4>(v), // minutes
fusion::at_c<3>(v), // hours
fusion::at_c<2>(v), // day (1-31)
fusion::at_c<1>(v), // month
fusion::at_c<0>(v) - 1900, // year - 1900
0, 0, // wday, yday
0, 0, 0 // isdst, tm_gmtoff, tm_zone
};
timestamp.date = std::mktime(&time);
timestamp.ms = fusion::at_c<6>(v)/1000000.0;
}
};
} } }
namespace qi = boost::spirit::qi;
namespace QiParsers {
struct Months : qi::symbols<char, int> {
Months() { this->add
("Jan", 0)
("Feb", 1)
("Mar", 2)
("Apr", 3)
("May", 4)
("Jun", 5)
("Jul", 6)
("Aug", 7)
("Sep", 8)
("Oct", 9)
("Nov", 10)
("Dec", 11);
}
} static const mmm_;
static const qi::uint_parser<int, 10, 4, 4> yyyy_;
static const qi::uint_parser<int, 10, 2, 2> dd_, hh_, mm_, ss_;
static const qi::uint_parser<int, 10, 6, 6> fff_;
template <typename It>
struct Parser1 : qi::grammar<It, structs::Record1()>
{
Parser1() : Parser1::base_type(start) {
using namespace qi;
start = '[' >> raw[*~char_(']')] >> ']'
>> " - " >> double_ >> " s"
>> " => String: " >> raw[+graph]
>> eol;
}
private:
qi::rule<It, structs::Record1()> start;
};
template <typename It>
struct Parser2 : qi::grammar<It, structs::Record2()>
{
Parser2() : Parser2::base_type(start) {
using namespace qi;
date = '[' >> yyyy_ >> '-' >> mmm_ >> '-' >> dd_
>> ' ' >> hh_ >> ':' >> mm_ >> ':' >> ss_ >> '.' >> fff_ >> ']';
start =
date //'[' >> raw[*~char_(']')] >> ']'
>> " - " >> double_ >> " s"
>> " => String: " >> raw[+graph]
>> eol;
}
private:
qi::rule<It, structs::Record2()> start;
qi::rule<It, boost::fusion::vector<int, int, int, int, int, int, int>()> date;
};
template <typename It>
struct Parser3 : qi::grammar<It, structs::Records1()>
{
Parser3() : Parser3::base_type(start) {
using namespace qi;
using boost::phoenix::push_back;
line = '[' >> raw[*~char_(']')] >> ']'
>> " - " >> double_ >> " s"
>> " => String: " >> raw[+graph];
ignore = *~char_("\r\n");
start = (line[push_back(_val, _1)] | ignore) % eol;
}
private:
qi::rule<It> ignore;
qi::rule<It, structs::Record1()> line;
qi::rule<It, structs::Records1()> start;
};
template <typename It>
struct Parser4 : qi::grammar<It, structs::Records2()>
{
Parser4() : Parser4::base_type(start) {
using namespace qi;
using boost::phoenix::push_back;
date = '[' >> yyyy_ >> '-' >> mmm_ >> '-' >> dd_
>> ' ' >> hh_ >> ':' >> mm_ >> ':' >> ss_ >> '.' >> fff_ >> ']';
line = date
>> " - " >> double_ >> " s"
>> " => String: " >> raw[+graph];
ignore = *~char_("\r\n");
start = (line[push_back(_val, _1)] | ignore) % eol;
}
private:
qi::rule<It> ignore;
qi::rule<It, structs::Record2()> line;
qi::rule<It, structs::Records2()> start;
qi::rule<It, boost::fusion::vector<int, int, int, int, int, int, int>()> date;
};
}
template <typename Parser> static const Parser s_instance {};
template<template <typename> class Parser, typename Container, typename It>
Container parse_seek(It b, It e, const std::string& message)
{
Container records;
auto const t0 = boost::chrono::high_resolution_clock::now();
parse(b, e, *boost::spirit::repository::qi::seek[s_instance<Parser<It> >], records);
auto const t1 = boost::chrono::high_resolution_clock::now();
auto elapsed = boost::chrono::duration_cast<boost::chrono::milliseconds>(t1 - t0);
std::cout << "Elapsed time: " << elapsed.count() << " ms (" << message << ")\n";
return records;
}
template<template <typename> class Parser, typename Container, typename It>
Container parse_ignoring(It b, It e, const std::string& message)
{
Container records;
auto const t0 = boost::chrono::high_resolution_clock::now();
parse(b, e, s_instance<Parser<It> >, records);
auto const t1 = boost::chrono::high_resolution_clock::now();
auto elapsed = boost::chrono::duration_cast<boost::chrono::milliseconds>(t1 - t0);
std::cout << "Elapsed time: " << elapsed.count() << " ms (" << message << ")\n";
return records;
}
static const std::string input1 = "[2018-Mar-01 00:01:02.012345] - 1.000 s => String: Valid_string\n";
static const std::string input2 = "[2018-Mar-02 00:01:02.012345] - 2.000 s => I dont care\n";
std::string prepare_input() {
std::string input;
const int N1 = 10;
const int N2 = 1000;
input.reserve(N1 * (input1.size() + N2*input2.size()));
for (int i = N1; i--;) {
input += input1;
for (int j = N2; j--;)
input += input2;
}
return input;
}
int main() {
auto const input = prepare_input();
auto f = input.data(), l = f + input.length();
for (auto& r: parse_seek<QiParsers::Parser1, structs::Records1>(f, l, "std::string + seek")) {
std::cout << r.date << "\n";
break;
}
for (auto& r: parse_seek<QiParsers::Parser2, structs::Records2>(f, l, "stream + seek")) {
auto tm = *std::localtime(&r.date.date);
std::cout << std::put_time(&tm, "%Y-%b-%d %H:%M:%S") << " " << r.date.ms << "\n";
break;
}
for (auto& r: parse_ignoring<QiParsers::Parser3, structs::Records1>(f, l, "std::string + ignoring")) {
std::cout << r.date << "\n";
break;
}
for (auto& r: parse_ignoring<QiParsers::Parser4, structs::Records2>(f, l, "stream + ignoring")) {
auto tm = *std::localtime(&r.date.date);
std::cout << std::put_time(&tm, "%Y-%b-%d %H:%M:%S") << " " << r.date.ms << "\n";
break;
}
}
版画
Elapsed time: 14 ms (std::string + seek)
2018-Mar-01 00:01:02.012345
Elapsed time: 42 ms (stream + seek)
2018-Mar-01 00:01:02 0.012345
Elapsed time: 2 ms (std::string + ignoring)
2018-Mar-01 00:01:02.012345
Elapsed time: 31 ms (stream + ignoring)
2018-Mar-01 00:01:02 0.012345
结论
解析和 mktime
有很大的成本(10% 的配置文件 运行 下面)。你不会比 boost::posix_time::from_time_string
做得更好,除非你愿意选择退出 std::time_t
。
这里方法的一个显着优点是如果忽略一行,对mktime
的调用不会完成.它显示:
- 解析器 1:21.12%
- 解析器 2:47.60 %
- 解析器 3:8.91%
- 解析器 4:20.57%
忽略解析器现在确实比 string-based non-ignoring 解析器快。
分析图:
¹ 从其他答案中获取代码,因此很容易比较基准测试结果
我一直在解析包含日期的日志文件并将它们存储为字符串,但这是内存消耗密集型的,并且由于字符串分配而有点昂贵。
有人建议我使用 Timestamp 来存储日期并使用 boost spirit stream 来解析它,所以我尝试了 "boost::posix_time::ptime" 和旧的 "std::time_t + std::get_time",但它们都严重损害了性能。
我想试试这个新方法:将日期解析为普通整数,然后使用特征将它们转换为 std::time_t。我已经尽可能多地尝试遵循下一个精神示例 https://www.boost.org/doc/libs/1_60_0/libs/spirit/example/qi/parse_date.cpp,但我什至无法编译自己的测试。模糊的 boost spirit transform_attribute 语法也无济于事。
有人可以帮我解决这个问题吗?
#include <boost/fusion/adapted/struct.hpp>
#include <boost/spirit/include/qi.hpp>
#include <ctime>
typedef std::string::const_iterator It;
namespace structs {
typedef boost::fusion::vector<int, int, int, int, int, int, int> date_parts;
struct Timestamp
{
std::time_t date;
int ms;
};
struct Record {
Timestamp date;
double time;
std::string str;
};
typedef std::vector<Record> Records;
}
BOOST_FUSION_ADAPT_STRUCT(structs::Record,
(Timestamp, date)
(double, time)
(std::string, str))
namespace boost { namespace spirit { namespace traits
{
template<>
struct transform_attribute<structs::Timestamp, structs::date_parts, qi::domain>
{
typedef structs::date_parts type;
static type pre(structs::Timestamp) { return type(); }
static void fail(structs::Timestamp&) { }
static void post(structs::Timestamp& timestamp, type const& v)
{
std::tm time = { fusion::at_c<5>(v), fusion::at_c<4>(v), fusion::at_c<3>(v),
fusion::at_c<2>(v), fusion::at_c<1>(v) - 1900, fusion::at_c<0>(v), 0, 0, 0 };
timestamp.date = std::mktime(&time);
timestamp.ms = fusion::at_c<6>(v);
}
};
} } }
namespace qi = boost::spirit::qi;
namespace QiParsers {
template <typename It>
struct Parser : qi::grammar<It, structs::Records()> {
Parser() : Parser::base_type(start) {
using namespace qi;
month.add
("Jan", 0)
("Feb", 1)
("Mar", 2)
("Apr", 3)
("May", 4)
("Jun", 5)
("Jul", 6)
("Aug", 7)
("Sep", 8)
("Oct", 9)
("Nov", 10)
("Dec", 11);
date = repeat(4)[digit] >> '-' >> month >> '-' >> repeat(2)[digit] >> ' ' >>
repeat(2)[digit] >> ':' >> repeat(2)[digit] >> ':' >> repeat(2)[digit] >> '.' >> repeat(6)[digit];
line = '[' >> date >> ']'
>> " - " >> double_ >> " s"
>> " => String: " >> raw[+graph];
start = line % eol;
}
private:
qi::symbols<char, int> month;
qi::rule<It, structs::date_parts()> date;
qi::rule<It, structs::Record()> line;
qi::rule<It, structs::Records()> start;
};
}
structs::Records parse_string(It b, It e)
{
static const QiParsers::Parser<It> parser;
structs::Records records;
parse(b, e, parser, records);
return records;
}
static const std::string input =
"[2018-Mar-13 13:13:59.580482] - 0.200 s => String: Test_1\n\
[2018-Mar-14 13:13:59.580482] - 0.400 s => String: Test_2\n\
[2018-Mar-15 13:13:59.580482] - 0.600 s => String: Test_3\n";
int main() {
const auto records = parse_string(input.begin(), input.end());
return 0;
}
在 posting
特征和解析器规则的声明方式存在相当多的问题。
值得注意的是,
repeat(2)[digit_]
不会转换为整数属性。我怀疑您可能得到了很多 49、50 等值('1'
、'2'
等的 ASCII 代码),也许还有一些不确定的值你从月份值中减去 1900
解析器
简化为:
namespace QiParsers {
struct Months : qi::symbols<char, int> {
Months() { this->add
("Jan", 0)
("Feb", 1)
("Mar", 2)
("Apr", 3)
("May", 4)
("Jun", 5)
("Jul", 6)
("Aug", 7)
("Sep", 8)
("Oct", 9)
("Nov", 10)
("Dec", 11);
}
} static const mmm_;
static const qi::uint_parser<int, 10, 4, 4> yyyy_;
static const qi::uint_parser<int, 10, 2, 2> dd_, hh_, mm_, ss_;
static const qi::uint_parser<int, 10, 6, 6> fff_;
}
现在可以清晰地编写解析器¹,例如:
template <typename It>
struct Parser2 : qi::grammar<It, structs::Record2()>
{
Parser2() : Parser2::base_type(start) {
using namespace qi;
date = '[' >> yyyy_ >> '-' >> mmm_ >> '-' >> dd_
>> ' ' >> hh_ >> ':' >> mm_ >> ':' >> ss_ >> '.' >> fff_ >> ']';
start =
date //'[' >> raw[*~char_(']')] >> ']'
>> " - " >> double_ >> " s"
>> " => String: " >> raw[+graph]
>> eol;
}
private:
qi::rule<It, structs::Record2()> start;
qi::rule<It, boost::fusion::vector<int, int, int, int, int, int, int>()> date;
};
特质
基本上是你所拥有的,但消除了一些怪癖:
template <typename Attr>
struct transform_attribute<structs::Timestamp, Attr, qi::domain> {
using type = Attr;
static type pre(structs::Timestamp) { return type(); }
static void fail(structs::Timestamp&) { }
static void post(structs::Timestamp& timestamp, type const& v) {
/*
* struct tm
* {
* int tm_sec; [> Seconds. [0-60] (1 leap second) <]
* int tm_min; [> Minutes. [0-59] <]
* int tm_hour; [> Hours. [0-23] <]
* int tm_mday; [> Day. [1-31] <]
* int tm_mon; [> Month. [0-11] <]
* int tm_year; [> Year - 1900. <]
* int tm_wday; [> Day of week. [0-6] <]
* int tm_yday; [> Days in year.[0-365] <]
* int tm_isdst; [> DST. [-1/0/1]<]
*
* # ifdef __USE_MISC
* long int tm_gmtoff; [> Seconds east of UTC. <]
* const char *tm_zone; [> Timezone abbreviation. <]
* # else
* long int __tm_gmtoff; [> Seconds east of UTC. <]
* const char *__tm_zone; [> Timezone abbreviation. <]
* # endif
* };
*/
std::tm time = { fusion::at_c<5>(v), // seconds
fusion::at_c<4>(v), // minutes
fusion::at_c<3>(v), // hours
fusion::at_c<2>(v), // day (1-31)
fusion::at_c<1>(v), // month
fusion::at_c<0>(v) - 1900, // year - 1900
0, 0, // wday, yday
0, 0, 0 // isdst, tm_gmtoff, tm_zone
};
timestamp.date = std::mktime(&time);
timestamp.ms = fusion::at_c<6>(v)/1000000.0;
}
};
基准测试!
基准 运行s,并正确解析:
#include <boost/fusion/adapted/struct.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/repository/include/qi_seek.hpp>
#include <boost/chrono/chrono.hpp>
#include <iomanip>
#include <ctime>
namespace structs {
struct Timestamp {
std::time_t date;
double ms;
};
struct Record1 {
std::string date;
double time;
std::string str;
};
struct Record2 {
Timestamp date;
double time;
std::string str;
};
typedef std::vector<Record1> Records1;
typedef std::vector<Record2> Records2;
}
BOOST_FUSION_ADAPT_STRUCT(structs::Record1,
(std::string, date)
(double, time)
(std::string, str))
BOOST_FUSION_ADAPT_STRUCT(structs::Record2,
(structs::Timestamp, date)
(double, time)
(std::string, str))
namespace boost { namespace spirit { namespace traits {
template <typename It>
struct assign_to_attribute_from_iterators<std::string, It, void> {
static inline void call(It f, It l, std::string& attr) {
attr = std::string(&*f, std::distance(f,l));
}
};
template <typename Attr>
struct transform_attribute<structs::Timestamp, Attr, qi::domain> {
using type = Attr;
static type pre(structs::Timestamp) { return type(); }
static void fail(structs::Timestamp&) { }
static void post(structs::Timestamp& timestamp, type const& v) {
/*
* struct tm
* {
* int tm_sec; [> Seconds. [0-60] (1 leap second) <]
* int tm_min; [> Minutes. [0-59] <]
* int tm_hour; [> Hours. [0-23] <]
* int tm_mday; [> Day. [1-31] <]
* int tm_mon; [> Month. [0-11] <]
* int tm_year; [> Year - 1900. <]
* int tm_wday; [> Day of week. [0-6] <]
* int tm_yday; [> Days in year.[0-365] <]
* int tm_isdst; [> DST. [-1/0/1]<]
*
* # ifdef __USE_MISC
* long int tm_gmtoff; [> Seconds east of UTC. <]
* const char *tm_zone; [> Timezone abbreviation. <]
* # else
* long int __tm_gmtoff; [> Seconds east of UTC. <]
* const char *__tm_zone; [> Timezone abbreviation. <]
* # endif
* };
*/
std::tm time = { fusion::at_c<5>(v), // seconds
fusion::at_c<4>(v), // minutes
fusion::at_c<3>(v), // hours
fusion::at_c<2>(v), // day (1-31)
fusion::at_c<1>(v), // month
fusion::at_c<0>(v) - 1900, // year - 1900
0, 0, // wday, yday
0, 0, 0 // isdst, tm_gmtoff, tm_zone
};
timestamp.date = std::mktime(&time);
timestamp.ms = fusion::at_c<6>(v)/1000000.0;
}
};
} } }
namespace qi = boost::spirit::qi;
namespace QiParsers {
struct Months : qi::symbols<char, int> {
Months() { this->add
("Jan", 0)
("Feb", 1)
("Mar", 2)
("Apr", 3)
("May", 4)
("Jun", 5)
("Jul", 6)
("Aug", 7)
("Sep", 8)
("Oct", 9)
("Nov", 10)
("Dec", 11);
}
} static const mmm_;
static const qi::uint_parser<int, 10, 4, 4> yyyy_;
static const qi::uint_parser<int, 10, 2, 2> dd_, hh_, mm_, ss_;
static const qi::uint_parser<int, 10, 6, 6> fff_;
template <typename It>
struct Parser1 : qi::grammar<It, structs::Record1()>
{
Parser1() : Parser1::base_type(start) {
using namespace qi;
start = '[' >> raw[*~char_(']')] >> ']'
>> " - " >> double_ >> " s"
>> " => String: " >> raw[+graph]
>> eol;
}
private:
qi::rule<It, structs::Record1()> start;
};
template <typename It>
struct Parser2 : qi::grammar<It, structs::Record2()>
{
Parser2() : Parser2::base_type(start) {
using namespace qi;
date = '[' >> yyyy_ >> '-' >> mmm_ >> '-' >> dd_
>> ' ' >> hh_ >> ':' >> mm_ >> ':' >> ss_ >> '.' >> fff_ >> ']';
start =
date //'[' >> raw[*~char_(']')] >> ']'
>> " - " >> double_ >> " s"
>> " => String: " >> raw[+graph]
>> eol;
}
private:
qi::rule<It, structs::Record2()> start;
qi::rule<It, boost::fusion::vector<int, int, int, int, int, int, int>()> date;
};
template <typename It>
struct Parser3 : qi::grammar<It, structs::Records1()>
{
Parser3() : Parser3::base_type(start) {
using namespace qi;
using boost::phoenix::push_back;
line = '[' >> raw[*~char_(']')] >> ']'
>> " - " >> double_ >> " s"
>> " => String: " >> raw[+graph];
ignore = *~char_("\r\n");
start = (line[push_back(_val, _1)] | ignore) % eol;
}
private:
qi::rule<It> ignore;
qi::rule<It, structs::Record1()> line;
qi::rule<It, structs::Records1()> start;
};
template <typename It>
struct Parser4 : qi::grammar<It, structs::Records2()>
{
Parser4() : Parser4::base_type(start) {
using namespace qi;
using boost::phoenix::push_back;
date = '[' >> yyyy_ >> '-' >> mmm_ >> '-' >> dd_
>> ' ' >> hh_ >> ':' >> mm_ >> ':' >> ss_ >> '.' >> fff_ >> ']';
line = date
>> " - " >> double_ >> " s"
>> " => String: " >> raw[+graph];
ignore = *~char_("\r\n");
start = (line[push_back(_val, _1)] | ignore) % eol;
}
private:
qi::rule<It> ignore;
qi::rule<It, structs::Record2()> line;
qi::rule<It, structs::Records2()> start;
qi::rule<It, boost::fusion::vector<int, int, int, int, int, int, int>()> date;
};
}
template <typename Parser> static const Parser s_instance {};
template<template <typename> class Parser, typename Container, typename It>
Container parse_seek(It b, It e, const std::string& message)
{
Container records;
auto const t0 = boost::chrono::high_resolution_clock::now();
parse(b, e, *boost::spirit::repository::qi::seek[s_instance<Parser<It> >], records);
auto const t1 = boost::chrono::high_resolution_clock::now();
auto elapsed = boost::chrono::duration_cast<boost::chrono::milliseconds>(t1 - t0);
std::cout << "Elapsed time: " << elapsed.count() << " ms (" << message << ")\n";
return records;
}
template<template <typename> class Parser, typename Container, typename It>
Container parse_ignoring(It b, It e, const std::string& message)
{
Container records;
auto const t0 = boost::chrono::high_resolution_clock::now();
parse(b, e, s_instance<Parser<It> >, records);
auto const t1 = boost::chrono::high_resolution_clock::now();
auto elapsed = boost::chrono::duration_cast<boost::chrono::milliseconds>(t1 - t0);
std::cout << "Elapsed time: " << elapsed.count() << " ms (" << message << ")\n";
return records;
}
static const std::string input1 = "[2018-Mar-01 00:01:02.012345] - 1.000 s => String: Valid_string\n";
static const std::string input2 = "[2018-Mar-02 00:01:02.012345] - 2.000 s => I dont care\n";
std::string prepare_input() {
std::string input;
const int N1 = 10;
const int N2 = 1000;
input.reserve(N1 * (input1.size() + N2*input2.size()));
for (int i = N1; i--;) {
input += input1;
for (int j = N2; j--;)
input += input2;
}
return input;
}
int main() {
auto const input = prepare_input();
auto f = input.data(), l = f + input.length();
for (auto& r: parse_seek<QiParsers::Parser1, structs::Records1>(f, l, "std::string + seek")) {
std::cout << r.date << "\n";
break;
}
for (auto& r: parse_seek<QiParsers::Parser2, structs::Records2>(f, l, "stream + seek")) {
auto tm = *std::localtime(&r.date.date);
std::cout << std::put_time(&tm, "%Y-%b-%d %H:%M:%S") << " " << r.date.ms << "\n";
break;
}
for (auto& r: parse_ignoring<QiParsers::Parser3, structs::Records1>(f, l, "std::string + ignoring")) {
std::cout << r.date << "\n";
break;
}
for (auto& r: parse_ignoring<QiParsers::Parser4, structs::Records2>(f, l, "stream + ignoring")) {
auto tm = *std::localtime(&r.date.date);
std::cout << std::put_time(&tm, "%Y-%b-%d %H:%M:%S") << " " << r.date.ms << "\n";
break;
}
}
版画
Elapsed time: 14 ms (std::string + seek)
2018-Mar-01 00:01:02.012345
Elapsed time: 42 ms (stream + seek)
2018-Mar-01 00:01:02 0.012345
Elapsed time: 2 ms (std::string + ignoring)
2018-Mar-01 00:01:02.012345
Elapsed time: 31 ms (stream + ignoring)
2018-Mar-01 00:01:02 0.012345
结论
解析和 mktime
有很大的成本(10% 的配置文件 运行 下面)。你不会比 boost::posix_time::from_time_string
做得更好,除非你愿意选择退出 std::time_t
。
这里方法的一个显着优点是如果忽略一行,对mktime
的调用不会完成.它显示:
- 解析器 1:21.12%
- 解析器 2:47.60 %
- 解析器 3:8.91%
- 解析器 4:20.57%
忽略解析器现在确实比 string-based non-ignoring 解析器快。
分析图:
¹ 从其他答案中获取代码,因此很容易比较基准测试结果