使用 boost parse datetime string:使用一位数小时格式
Using boost parse datetime string: With single digit hour format
我正在编写需要在 NDK 工具链上编译的代码。不幸的是,最新版本只支持到不支持 C++11 日期时间解析的 gcc4.9。
我有一个日期时间字符串,我需要通过两三种格式发送它来找出解析方法。
所以我尝试了 linux API stftime,它有时会在错误的解析方法上给出值。我不得不放弃它并转向提升。
现在开始提升我正在使用 1_64 版本。根据文档 here
我找不到解析个位数小时格式的方法。
bool getepochtime(const std::string &str, const std::string &format, unsigned long &epoch){
epoch = 0;
namespace bt = boost::posix_time;
std::locale lformat = std::locale(std::locale::classic(), new bt::time_input_facet(format));
bt::ptime pt;
std::istringstream is(str);
is.imbue(lformat);
is >> pt;
if (pt == bt::ptime()) {
//epoch = 0;
return false;
}
bt::ptime timet_start(boost::gregorian::date(1970, 1, 1));
bt::time_duration diff = pt - timet_start;
epoch = (1000 * diff.ticks()/bt::time_duration::rep_type::ticks_per_second);
return true;
}
int main() {
unsigned long eval;
// this works.
getepochtime("28th january 11:50 PM", "%dth %B %H:%M %p", eval);
// this does not work.
getepochtime("28th january 1:50 PM", "%dth %B %I:%M %p", eval);
// nor this.
getepochtime("28th january 1:50 PM", "%dth %B %H:%M %p", eval);
return 0;
}
任何帮助将不胜感激。
我将留给您解决您希望如何解释没有年份的日期。但是,这是使用 /just/ strptime
.
的快速开始
我在一个更大的代码库中使用它,我们需要一些非常通用的日期识别。看:自适应日期时间解析器:
#pragma once
#include <string>
#include <chrono>
#include <cstdint>
#include <list>
namespace mylib { namespace datetime {
/*
* Multi-format capable date time parser
*
* Intended to be seeded with a list of supported formats, in order of
* preference. By default, parser is not adaptive (mode is `fixed`).
*
* In adaptive modes the format can be required to be
*
* - sticky (consistently reuse the first matched format)
* - ban_failed (remove failed patterns from the list; banning only occurs
* on successful parse to avoid banning all patterns on invalid input)
* - mru (preserves the list but re-orders for performance)
*
* CAUTION:
* If formats are ambiguous (e.g. mm-dd-yyyy vs dd-mm-yyyy) allowing
* re-ordering results in unpredictable results.
* => Only use `mru` when there are no ambiguous formats
*
* NOTE:
* The function object is stateful. In algorithms, pass it by reference
* (`std::ref(obj)`) to avoid copying the patterns and to ensure correct
* adaptive behaviour
*
* NOTE:
* - use %z before %Z to correctly handle [-+]hhmm POSIX TZ indications
* - adaptive_parser is thread-safe as long as it's not in any adaptive
* mode (the only allowed flag is `full_match`)
*/
class adaptive_parser {
public:
typedef std::list<std::string> list_t;
enum mode_t {
fixed = 0, // not adapting; keep trying same formats in same order
sticky = 1, // re-use first successful format consistently
ban_failed = 2, // forget formats that have failed
mru = 4, // optimize by putting last known good in front
full_match = 8, // require full matches to be accepted
};
adaptive_parser(mode_t m = full_match);
adaptive_parser(mode_t m, list_t formats);
// returns seconds since epoch
std::chrono::seconds operator()(std::string);
private:
mode_t _mode;
list_t _formats;
};
static inline adaptive_parser::mode_t operator|(adaptive_parser::mode_t lhs, adaptive_parser::mode_t rhs) {
return static_cast<adaptive_parser::mode_t>(static_cast<int>(lhs) | static_cast<int>(rhs));
}
} }
您可以这样使用它:
#include "adaptive_parser.h"
#include <string>
#include <iostream>
int main() {
using namespace mylib::datetime;
adaptive_parser parser { adaptive_parser::full_match, {
"%Y %dth %B %H:%M %p",
"%dth %B %H:%M %p",
"%Y %dth %B %I:%M %p",
"%dth %B %I:%M %p",
} };
for (std::string const input : {
"2017 28th january 11:50 PM",
"28th january 11:50 PM",
"2017 28th january 1:50 PM",
"28th january 1:50 PM",
})
try {
std::cout << "Parsing '" << input << "'\n";
std::cout << " -> epoch " << parser(input).count() << "\n";
} catch(std::exception const& e) {
std::cout << "Exception: " << e.what() << "\n";
}
}
正在打印:
Parsing '2017 28th january 11:50 PM'
-> epoch 1485604200
Parsing '28th january 11:50 PM'
-> epoch -2206613400
Parsing '2017 28th january 1:50 PM'
-> epoch 1485568200
Parsing '28th january 1:50 PM'
-> epoch -2206649400
Note that epoch -2206613400 corresponds to 28 jan 1900
实施
该实现带有一堆经过精心调整的明确日期模式。我们的项目使用了一些 "hacks" 来规范化奇怪的输入格式,这些都被省略了(你可以看到对 detail::normalize_...
功能的注释引用的想法):
#include "adaptive_parser.h"
#include "time.h"
#include <vector>
#include <algorithm>
#include <cassert>
#include <cstring>
#include <iostream>
namespace {
enum level { LOG_DEBUG };
static std::ostream s_devnull { nullptr };
struct {
std::ostream& log(int) const {
#ifdef NDEBUG
return s_devnull;
#else
return std::cerr;
#endif
};
} s_trace;
}
namespace mylib { namespace datetime {
adaptive_parser::adaptive_parser(mode_t m)
: _mode(m), _formats {
// use EOL_MARK to debug patterns when you suspect ambiguity or partial matches
#define EOL_MARK "" // " EOL_MARK"
// use %z before %Z to correctly handle [-+]hhmm POSIX time zone offsets
#if __GLIBC__ == 2 && __GLIBC_MINOR__ <= 15
// ubuntu 12.04 used eglibc and doesn't parse all bells and whistles
#define WITH_TZ(prefix, suffix) prefix " %z" suffix, prefix " %Z" suffix, prefix " Z" suffix, prefix " (UTC)" suffix, prefix suffix
#else
#define WITH_TZ(prefix, suffix) prefix " %z" suffix, prefix " %Z" suffix, prefix suffix
#endif
WITH_TZ("%Y-%m-%dT%H:%M:%S.%f", EOL_MARK),
WITH_TZ("%Y-%m-%dT%H:%M:%S", EOL_MARK),
WITH_TZ("%Y-%m-%dT%H:%M", EOL_MARK),
//
WITH_TZ("%Y-%m-%dT%I:%M:%S.%f %p", EOL_MARK),
WITH_TZ("%Y-%m-%dT%I:%M:%S %p", EOL_MARK),
WITH_TZ("%Y-%m-%dT%I:%M %p", EOL_MARK),
//
WITH_TZ("%Y-%m-%d%n%H:%M:%S", EOL_MARK),
WITH_TZ("%Y-%m-%d%n%I:%M:%S %p", EOL_MARK),
//
WITH_TZ("%a %b %d %H:%M:%S %Y", EOL_MARK),
WITH_TZ("%a %b %d %I:%M:%S %p %Y", EOL_MARK),
//
WITH_TZ("%a %d %b %H:%M:%S %Y", EOL_MARK),
WITH_TZ("%a %d %b %I:%M:%S %p %Y", EOL_MARK),
//
WITH_TZ("%a, %b %d %H:%M:%S %Y", EOL_MARK),
WITH_TZ("%a, %b %d %I:%M:%S %p %Y", EOL_MARK),
//
WITH_TZ("%a, %d %b %H:%M:%S %Y", EOL_MARK),
WITH_TZ("%a, %d %b %I:%M:%S %p %Y", EOL_MARK),
//////
WITH_TZ("%a %d %b %Y %H:%M:%S", EOL_MARK),
WITH_TZ("%a %d %b %Y %I:%M:%S %p", EOL_MARK),
//
WITH_TZ("%a, %d %b %Y %H:%M:%S", EOL_MARK),
WITH_TZ("%a, %d %b %Y %I:%M:%S %p", EOL_MARK),
#undef WITH_TZ
/*
* HUMAN DATE:
*
* This pattern would ambiguate the "%s" one (sadly, because it
* leads to obviously bogus results like parsing "1110871987" into
* "2063-04-24 16:25:59" (because "1110-8-7T19:8:7" matches
* "%Y-%m-%dT%H:%M:%S %Z" somehow...).
*
* We work around this issue by normalizing detected
* 'yyyyMMddhhmmss' human dates into iso format as a preprocessing
* step.
*/
//"%Y %m %d %H %M %S" EOL_MARK,
// epoch seconds
"@%s" EOL_MARK,
"%s" EOL_MARK,
}
{ }
adaptive_parser::adaptive_parser(mode_t m, list_t formats)
: _mode(m), _formats(std::move(formats))
{ }
std::chrono::seconds adaptive_parser::operator()(std::string input) {
if (_formats.empty()) throw std::invalid_argument("No candidate patterns in datetime::adaptive_parser");
if (input.empty()) throw std::invalid_argument("Empty input cannot be parsed as a date time");
//detail::normalize_tz(input);
//detail::normalize_tz_utc_w_offset_re(input);
//detail::normalize_date_sep(input);
//detail::normalize_human_date(input);
//detail::normalize_redundant_timezone_description(input);
input += EOL_MARK;
std::vector<list_t::iterator> failed;
bool matched = false;
struct tm time_struct;
auto pattern = _formats.begin();
for (; !matched && pattern != _formats.end(); ++pattern) {
memset(&time_struct, 0, sizeof(time_struct));
auto tail = ::strptime(input.c_str(), pattern->c_str(), &time_struct);
matched = tail;
//if (matched) s_trace.log(LOG_DEBUG) << "Input '" << input << "' successfully matched pattern '" << *pattern << "' leaving '" << tail << "'\n";
if (_mode & full_match) {
while (tail && *tail && std::isspace(*tail))
++tail; // skip trailing whitespace
matched &= tail && !*tail;
}
if (matched)
break;
if (_mode & ban_failed)
failed.push_back(pattern);
}
if (matched) {
for (auto to_ban : failed) {
s_trace.log(LOG_DEBUG) << "Banning failed datetime pattern: " << *to_ban << "\n";
_formats.erase(to_ban);
}
if (_mode & sticky) {
s_trace.log(LOG_DEBUG) << "Made succeeding datetime pattern sticky: " << *pattern << "\n";
_formats = { *pattern };
}
if ((_mode & mru) && pattern != _formats.begin()) {
assert(pattern != _formats.end()); // inconsistent with `matched==true`
s_trace.log(LOG_DEBUG) << "Promote succeeding datetime pattern to the top: " << *pattern << "\n";
std::rotate(_formats.begin(), pattern, std::next(pattern));
}
#ifdef __FreeBSD__
auto raw = (time_struct.tm_gmtoff)? mktime(&time_struct) : timegm(&time_struct);
return std::chrono::seconds(raw);
#else
long offset = time_struct.tm_gmtoff;
return std::chrono::seconds(timegm (&time_struct) - offset);
#endif
}
s_trace.log(LOG_DEBUG) << "Failed to parse datetime input '" << input << "' with " << _formats.size() << " patterns\n";
throw std::runtime_error("Input cannot be parsed as a date time");
}
} }
我正在编写需要在 NDK 工具链上编译的代码。不幸的是,最新版本只支持到不支持 C++11 日期时间解析的 gcc4.9。 我有一个日期时间字符串,我需要通过两三种格式发送它来找出解析方法。
所以我尝试了 linux API stftime,它有时会在错误的解析方法上给出值。我不得不放弃它并转向提升。
现在开始提升我正在使用 1_64 版本。根据文档 here
我找不到解析个位数小时格式的方法。
bool getepochtime(const std::string &str, const std::string &format, unsigned long &epoch){
epoch = 0;
namespace bt = boost::posix_time;
std::locale lformat = std::locale(std::locale::classic(), new bt::time_input_facet(format));
bt::ptime pt;
std::istringstream is(str);
is.imbue(lformat);
is >> pt;
if (pt == bt::ptime()) {
//epoch = 0;
return false;
}
bt::ptime timet_start(boost::gregorian::date(1970, 1, 1));
bt::time_duration diff = pt - timet_start;
epoch = (1000 * diff.ticks()/bt::time_duration::rep_type::ticks_per_second);
return true;
}
int main() {
unsigned long eval;
// this works.
getepochtime("28th january 11:50 PM", "%dth %B %H:%M %p", eval);
// this does not work.
getepochtime("28th january 1:50 PM", "%dth %B %I:%M %p", eval);
// nor this.
getepochtime("28th january 1:50 PM", "%dth %B %H:%M %p", eval);
return 0;
}
任何帮助将不胜感激。
我将留给您解决您希望如何解释没有年份的日期。但是,这是使用 /just/ strptime
.
我在一个更大的代码库中使用它,我们需要一些非常通用的日期识别。看:自适应日期时间解析器:
#pragma once
#include <string>
#include <chrono>
#include <cstdint>
#include <list>
namespace mylib { namespace datetime {
/*
* Multi-format capable date time parser
*
* Intended to be seeded with a list of supported formats, in order of
* preference. By default, parser is not adaptive (mode is `fixed`).
*
* In adaptive modes the format can be required to be
*
* - sticky (consistently reuse the first matched format)
* - ban_failed (remove failed patterns from the list; banning only occurs
* on successful parse to avoid banning all patterns on invalid input)
* - mru (preserves the list but re-orders for performance)
*
* CAUTION:
* If formats are ambiguous (e.g. mm-dd-yyyy vs dd-mm-yyyy) allowing
* re-ordering results in unpredictable results.
* => Only use `mru` when there are no ambiguous formats
*
* NOTE:
* The function object is stateful. In algorithms, pass it by reference
* (`std::ref(obj)`) to avoid copying the patterns and to ensure correct
* adaptive behaviour
*
* NOTE:
* - use %z before %Z to correctly handle [-+]hhmm POSIX TZ indications
* - adaptive_parser is thread-safe as long as it's not in any adaptive
* mode (the only allowed flag is `full_match`)
*/
class adaptive_parser {
public:
typedef std::list<std::string> list_t;
enum mode_t {
fixed = 0, // not adapting; keep trying same formats in same order
sticky = 1, // re-use first successful format consistently
ban_failed = 2, // forget formats that have failed
mru = 4, // optimize by putting last known good in front
full_match = 8, // require full matches to be accepted
};
adaptive_parser(mode_t m = full_match);
adaptive_parser(mode_t m, list_t formats);
// returns seconds since epoch
std::chrono::seconds operator()(std::string);
private:
mode_t _mode;
list_t _formats;
};
static inline adaptive_parser::mode_t operator|(adaptive_parser::mode_t lhs, adaptive_parser::mode_t rhs) {
return static_cast<adaptive_parser::mode_t>(static_cast<int>(lhs) | static_cast<int>(rhs));
}
} }
您可以这样使用它:
#include "adaptive_parser.h"
#include <string>
#include <iostream>
int main() {
using namespace mylib::datetime;
adaptive_parser parser { adaptive_parser::full_match, {
"%Y %dth %B %H:%M %p",
"%dth %B %H:%M %p",
"%Y %dth %B %I:%M %p",
"%dth %B %I:%M %p",
} };
for (std::string const input : {
"2017 28th january 11:50 PM",
"28th january 11:50 PM",
"2017 28th january 1:50 PM",
"28th january 1:50 PM",
})
try {
std::cout << "Parsing '" << input << "'\n";
std::cout << " -> epoch " << parser(input).count() << "\n";
} catch(std::exception const& e) {
std::cout << "Exception: " << e.what() << "\n";
}
}
正在打印:
Parsing '2017 28th january 11:50 PM'
-> epoch 1485604200
Parsing '28th january 11:50 PM'
-> epoch -2206613400
Parsing '2017 28th january 1:50 PM'
-> epoch 1485568200
Parsing '28th january 1:50 PM'
-> epoch -2206649400
Note that epoch -2206613400 corresponds to 28 jan 1900
实施
该实现带有一堆经过精心调整的明确日期模式。我们的项目使用了一些 "hacks" 来规范化奇怪的输入格式,这些都被省略了(你可以看到对 detail::normalize_...
功能的注释引用的想法):
#include "adaptive_parser.h"
#include "time.h"
#include <vector>
#include <algorithm>
#include <cassert>
#include <cstring>
#include <iostream>
namespace {
enum level { LOG_DEBUG };
static std::ostream s_devnull { nullptr };
struct {
std::ostream& log(int) const {
#ifdef NDEBUG
return s_devnull;
#else
return std::cerr;
#endif
};
} s_trace;
}
namespace mylib { namespace datetime {
adaptive_parser::adaptive_parser(mode_t m)
: _mode(m), _formats {
// use EOL_MARK to debug patterns when you suspect ambiguity or partial matches
#define EOL_MARK "" // " EOL_MARK"
// use %z before %Z to correctly handle [-+]hhmm POSIX time zone offsets
#if __GLIBC__ == 2 && __GLIBC_MINOR__ <= 15
// ubuntu 12.04 used eglibc and doesn't parse all bells and whistles
#define WITH_TZ(prefix, suffix) prefix " %z" suffix, prefix " %Z" suffix, prefix " Z" suffix, prefix " (UTC)" suffix, prefix suffix
#else
#define WITH_TZ(prefix, suffix) prefix " %z" suffix, prefix " %Z" suffix, prefix suffix
#endif
WITH_TZ("%Y-%m-%dT%H:%M:%S.%f", EOL_MARK),
WITH_TZ("%Y-%m-%dT%H:%M:%S", EOL_MARK),
WITH_TZ("%Y-%m-%dT%H:%M", EOL_MARK),
//
WITH_TZ("%Y-%m-%dT%I:%M:%S.%f %p", EOL_MARK),
WITH_TZ("%Y-%m-%dT%I:%M:%S %p", EOL_MARK),
WITH_TZ("%Y-%m-%dT%I:%M %p", EOL_MARK),
//
WITH_TZ("%Y-%m-%d%n%H:%M:%S", EOL_MARK),
WITH_TZ("%Y-%m-%d%n%I:%M:%S %p", EOL_MARK),
//
WITH_TZ("%a %b %d %H:%M:%S %Y", EOL_MARK),
WITH_TZ("%a %b %d %I:%M:%S %p %Y", EOL_MARK),
//
WITH_TZ("%a %d %b %H:%M:%S %Y", EOL_MARK),
WITH_TZ("%a %d %b %I:%M:%S %p %Y", EOL_MARK),
//
WITH_TZ("%a, %b %d %H:%M:%S %Y", EOL_MARK),
WITH_TZ("%a, %b %d %I:%M:%S %p %Y", EOL_MARK),
//
WITH_TZ("%a, %d %b %H:%M:%S %Y", EOL_MARK),
WITH_TZ("%a, %d %b %I:%M:%S %p %Y", EOL_MARK),
//////
WITH_TZ("%a %d %b %Y %H:%M:%S", EOL_MARK),
WITH_TZ("%a %d %b %Y %I:%M:%S %p", EOL_MARK),
//
WITH_TZ("%a, %d %b %Y %H:%M:%S", EOL_MARK),
WITH_TZ("%a, %d %b %Y %I:%M:%S %p", EOL_MARK),
#undef WITH_TZ
/*
* HUMAN DATE:
*
* This pattern would ambiguate the "%s" one (sadly, because it
* leads to obviously bogus results like parsing "1110871987" into
* "2063-04-24 16:25:59" (because "1110-8-7T19:8:7" matches
* "%Y-%m-%dT%H:%M:%S %Z" somehow...).
*
* We work around this issue by normalizing detected
* 'yyyyMMddhhmmss' human dates into iso format as a preprocessing
* step.
*/
//"%Y %m %d %H %M %S" EOL_MARK,
// epoch seconds
"@%s" EOL_MARK,
"%s" EOL_MARK,
}
{ }
adaptive_parser::adaptive_parser(mode_t m, list_t formats)
: _mode(m), _formats(std::move(formats))
{ }
std::chrono::seconds adaptive_parser::operator()(std::string input) {
if (_formats.empty()) throw std::invalid_argument("No candidate patterns in datetime::adaptive_parser");
if (input.empty()) throw std::invalid_argument("Empty input cannot be parsed as a date time");
//detail::normalize_tz(input);
//detail::normalize_tz_utc_w_offset_re(input);
//detail::normalize_date_sep(input);
//detail::normalize_human_date(input);
//detail::normalize_redundant_timezone_description(input);
input += EOL_MARK;
std::vector<list_t::iterator> failed;
bool matched = false;
struct tm time_struct;
auto pattern = _formats.begin();
for (; !matched && pattern != _formats.end(); ++pattern) {
memset(&time_struct, 0, sizeof(time_struct));
auto tail = ::strptime(input.c_str(), pattern->c_str(), &time_struct);
matched = tail;
//if (matched) s_trace.log(LOG_DEBUG) << "Input '" << input << "' successfully matched pattern '" << *pattern << "' leaving '" << tail << "'\n";
if (_mode & full_match) {
while (tail && *tail && std::isspace(*tail))
++tail; // skip trailing whitespace
matched &= tail && !*tail;
}
if (matched)
break;
if (_mode & ban_failed)
failed.push_back(pattern);
}
if (matched) {
for (auto to_ban : failed) {
s_trace.log(LOG_DEBUG) << "Banning failed datetime pattern: " << *to_ban << "\n";
_formats.erase(to_ban);
}
if (_mode & sticky) {
s_trace.log(LOG_DEBUG) << "Made succeeding datetime pattern sticky: " << *pattern << "\n";
_formats = { *pattern };
}
if ((_mode & mru) && pattern != _formats.begin()) {
assert(pattern != _formats.end()); // inconsistent with `matched==true`
s_trace.log(LOG_DEBUG) << "Promote succeeding datetime pattern to the top: " << *pattern << "\n";
std::rotate(_formats.begin(), pattern, std::next(pattern));
}
#ifdef __FreeBSD__
auto raw = (time_struct.tm_gmtoff)? mktime(&time_struct) : timegm(&time_struct);
return std::chrono::seconds(raw);
#else
long offset = time_struct.tm_gmtoff;
return std::chrono::seconds(timegm (&time_struct) - offset);
#endif
}
s_trace.log(LOG_DEBUG) << "Failed to parse datetime input '" << input << "' with " << _formats.size() << " patterns\n";
throw std::runtime_error("Input cannot be parsed as a date time");
}
} }