boost::serialization 每个唯一 ID 一个实例
boost::serialization of one instance per unique ID
我正在尝试 boost::serialize 指向对象(例如,符号 class)的结构来实现一个单一实例-每个唯一- 的想法。这意味着,这些对象不是直接创建的,而是使用静态方法 symbol::get()。此方法从某个全局字典中检索现有对象或在必要时创建新对象。
现在最困难的部分是,在我的系统中,我有许多带有指向此类符号的指针的大型结构。这些结构不能同时放入内存中。所以我需要依次构建、预处理和序列化每个结构。稍后我将根据要求反序列化和处理结构。
标准 boost::serialize 方法,即 load(...) 和 save(...) 在这里不起作用。 Uppon 结构的反序列化将失去符号在系统范围内的唯一性,序列化将浪费大量 space(我的符号对象非常大)。我阅读了 boost 文档,发现对于非标准构造函数,我可以使用 save_construct_data 和 load_construct_data 函数。但文档也说,默认 load_construct_data "just uses the default constructor to initialize previously allocated memory"。所以又不是这样。
问题是:我怎样才能绕过这个加载函数,这样我就可以避免任何分配并改用我的 symbol::get()?或者也许有更优雅的解决方案?
编辑:附上演示问题的简单代码。
struct structure_element {
};
class symbol : public structure_element {
symbol(string x);
map<string, symbol> known_symbols;
public:
static symbol *get(string x) {
if (known_symbols.find(x) != known.symbols.end()){
known_symbols[x] = symbol(x);
}
return &known_symbols[x];
}
}
class structure_node : public structure_element {
set<symbol *> some_attributes;
vector<structure_element *> children;
}
一般来说,例外情况可以实现load_construct_data
(显然这意味着你不要' t 依赖于您在问题中已经观察到的默认实现。
更具体地说:使用 Boost Flyweight。或者看看他们如何 implemented serialization 寻找灵感。
没有具体的示例程序,我无法为您演示。
填补一些空白,这里有一个演示程序,应该给人一种感觉:
#include <iostream>
#include <boost/archive/text_oarchive.hpp>
#include <boost/serialization/string.hpp>
#include <boost/serialization/vector.hpp>
#include <boost/serialization/set.hpp>
#include <boost/serialization/map.hpp>
#if 0
# define DEMO_FLYWEIGHT
# include <boost/flyweight/serialize.hpp>
# include <boost/flyweight.hpp>
#endif
struct structure_element {
virtual ~structure_element() {}
private:
friend class boost::serialization::access;
template <typename Ar> void serialize(Ar& /*ar*/, unsigned /*version*/) {
}
};
namespace detail {
struct symbol_impl {
symbol_impl(std::string const& x) : _x(x) { }
#ifdef DEMO_FLYWEIGHT
size_t hash() const { return boost::hash_value(_x); }
//bool operator< (symbol_impl const& other) const { return _x < other._x; }
bool operator==(symbol_impl const& other) const { return _x == other._x; }
#endif
private:
std::string _x;
friend class boost::serialization::access;
template <typename Ar> void serialize(Ar& ar, unsigned /*version*/) {
ar & _x;
}
};
}
#ifdef DEMO_FLYWEIGHT
namespace boost {
template <> struct hash<::detail::symbol_impl> {
size_t operator()(::detail::symbol_impl const& s) const { return s.hash(); }
};
}
#endif
struct symbol : public structure_element {
symbol(std::string const& x) : _impl(x) {}
private:
#ifdef DEMO_FLYWEIGHT
boost::flyweight<detail::symbol_impl> _impl;
#else
detail::symbol_impl _impl;
#endif
friend class boost::serialization::access;
template <typename Ar> void serialize(Ar& ar, unsigned /*version*/) {
ar & boost::serialization::base_object<structure_element>(*this);
ar & _impl;
}
};
struct structure_node : public structure_element {
structure_node(std::set<symbol*> a, std::vector<structure_element*> c)
: some_attributes(std::move(a)), children(std::move(c))
{
}
// TODO value semantics/ownership
private:
std::set<symbol *> some_attributes;
std::vector<structure_element *> children;
friend class boost::serialization::access;
template <typename Ar> void serialize(Ar& ar, unsigned /*version*/) {
ar & boost::serialization::base_object<structure_element>(*this);
ar & some_attributes;
ar & children;
}
};
#include <boost/make_shared.hpp>
int main() {
// everything is leaked, by design
symbol* bar = new symbol("bar");
structure_node data {
{
new symbol("foo"),
bar,
new symbol("foo"),
new symbol("foo"),
bar,
},
{
bar,
}
};
boost::archive::text_oarchive oa(std::cout);
oa << data;
}
备注:
Live On Coliru无蝇量级
22 serialization::archive 11 0 0 1 0
0 0 0 4 0 3 1 0
1
2 0 0 3 bar 3
3
4 3 foo 3
5
6 3 foo 3
7
8 3 foo 0 0 1 0 3 1
Live On Coliru和 已启用享元
22 serialization::archive 11 0 0 1 0
0 0 0 4 0 3 1 0
1
2 0 0 0 0 0 3 bar 3
3
4 1 3 foo 3
5
6 1 3
7
8 1 0 0 1 0 3 1
注意在通过指针序列化时如何跟踪对象。这意味着即使不使用 flyweight 也不会序列化重复项,参见例如bar
对象被使用了 3 次。
对于 foo
对象,您可以看到它的实现是 "deduplicated" 如果您在使用享元时愿意的话。
Boost Flyweight is highly configurable and can be made to perform significantly better than the default. I refer to the library documentation if you want to learn more
我正在尝试 boost::serialize 指向对象(例如,符号 class)的结构来实现一个单一实例-每个唯一-
现在最困难的部分是,在我的系统中,我有许多带有指向此类符号的指针的大型结构。这些结构不能同时放入内存中。所以我需要依次构建、预处理和序列化每个结构。稍后我将根据要求反序列化和处理结构。
标准 boost::serialize 方法,即 load(...) 和 save(...) 在这里不起作用。 Uppon 结构的反序列化将失去符号在系统范围内的唯一性,序列化将浪费大量 space(我的符号对象非常大)。我阅读了 boost 文档,发现对于非标准构造函数,我可以使用 save_construct_data 和 load_construct_data 函数。但文档也说,默认 load_construct_data "just uses the default constructor to initialize previously allocated memory"。所以又不是这样。
问题是:我怎样才能绕过这个加载函数,这样我就可以避免任何分配并改用我的 symbol::get()?或者也许有更优雅的解决方案?
编辑:附上演示问题的简单代码。
struct structure_element {
};
class symbol : public structure_element {
symbol(string x);
map<string, symbol> known_symbols;
public:
static symbol *get(string x) {
if (known_symbols.find(x) != known.symbols.end()){
known_symbols[x] = symbol(x);
}
return &known_symbols[x];
}
}
class structure_node : public structure_element {
set<symbol *> some_attributes;
vector<structure_element *> children;
}
一般来说,例外情况可以实现load_construct_data
(显然这意味着你不要' t 依赖于您在问题中已经观察到的默认实现。
更具体地说:使用 Boost Flyweight。或者看看他们如何 implemented serialization 寻找灵感。
没有具体的示例程序,我无法为您演示。
填补一些空白,这里有一个演示程序,应该给人一种感觉:
#include <iostream>
#include <boost/archive/text_oarchive.hpp>
#include <boost/serialization/string.hpp>
#include <boost/serialization/vector.hpp>
#include <boost/serialization/set.hpp>
#include <boost/serialization/map.hpp>
#if 0
# define DEMO_FLYWEIGHT
# include <boost/flyweight/serialize.hpp>
# include <boost/flyweight.hpp>
#endif
struct structure_element {
virtual ~structure_element() {}
private:
friend class boost::serialization::access;
template <typename Ar> void serialize(Ar& /*ar*/, unsigned /*version*/) {
}
};
namespace detail {
struct symbol_impl {
symbol_impl(std::string const& x) : _x(x) { }
#ifdef DEMO_FLYWEIGHT
size_t hash() const { return boost::hash_value(_x); }
//bool operator< (symbol_impl const& other) const { return _x < other._x; }
bool operator==(symbol_impl const& other) const { return _x == other._x; }
#endif
private:
std::string _x;
friend class boost::serialization::access;
template <typename Ar> void serialize(Ar& ar, unsigned /*version*/) {
ar & _x;
}
};
}
#ifdef DEMO_FLYWEIGHT
namespace boost {
template <> struct hash<::detail::symbol_impl> {
size_t operator()(::detail::symbol_impl const& s) const { return s.hash(); }
};
}
#endif
struct symbol : public structure_element {
symbol(std::string const& x) : _impl(x) {}
private:
#ifdef DEMO_FLYWEIGHT
boost::flyweight<detail::symbol_impl> _impl;
#else
detail::symbol_impl _impl;
#endif
friend class boost::serialization::access;
template <typename Ar> void serialize(Ar& ar, unsigned /*version*/) {
ar & boost::serialization::base_object<structure_element>(*this);
ar & _impl;
}
};
struct structure_node : public structure_element {
structure_node(std::set<symbol*> a, std::vector<structure_element*> c)
: some_attributes(std::move(a)), children(std::move(c))
{
}
// TODO value semantics/ownership
private:
std::set<symbol *> some_attributes;
std::vector<structure_element *> children;
friend class boost::serialization::access;
template <typename Ar> void serialize(Ar& ar, unsigned /*version*/) {
ar & boost::serialization::base_object<structure_element>(*this);
ar & some_attributes;
ar & children;
}
};
#include <boost/make_shared.hpp>
int main() {
// everything is leaked, by design
symbol* bar = new symbol("bar");
structure_node data {
{
new symbol("foo"),
bar,
new symbol("foo"),
new symbol("foo"),
bar,
},
{
bar,
}
};
boost::archive::text_oarchive oa(std::cout);
oa << data;
}
备注:
Live On Coliru无蝇量级
22 serialization::archive 11 0 0 1 0 0 0 0 4 0 3 1 0 1 2 0 0 3 bar 3 3 4 3 foo 3 5 6 3 foo 3 7 8 3 foo 0 0 1 0 3 1
Live On Coliru和 已启用享元
22 serialization::archive 11 0 0 1 0 0 0 0 4 0 3 1 0 1 2 0 0 0 0 0 3 bar 3 3 4 1 3 foo 3 5 6 1 3 7 8 1 0 0 1 0 3 1
注意在通过指针序列化时如何跟踪对象。这意味着即使不使用 flyweight 也不会序列化重复项,参见例如bar
对象被使用了 3 次。
对于 foo
对象,您可以看到它的实现是 "deduplicated" 如果您在使用享元时愿意的话。
Boost Flyweight is highly configurable and can be made to perform significantly better than the default. I refer to the library documentation if you want to learn more