用boost spirit X3高效解析琐碎文件
Efficiently parse trivial files with boost spirit X3
我是 C++ 和 Boost Spirit X3 的新手。对于我的项目,我使用 boost spirit X3 将具有以下结构的两个文件中的地理社交图解析为 boost 图。
我有一个有效的实现。由于我之前没有任何使用这些库的经验,我想知道您对这种方法有何看法,以及您是否建议采用不同的方法。
在图形文件中,每条边一行。
在解析边时,我必须创建图形的节点,以防以前没有看到该节点。我使用语义操作,每次遇到节点 ID 时都会检查该节点是否已在图中。阅读整行后,我使用了一个语义动作,然后添加了边缘。
在位置文件中,给定时间节点的每个已知位置各占一行。我存储图中已知节点的第一个位置(使用自定义增强图 属性)。
我有具体的问题,但很乐意收到任何想法和建议:
- 可以像我对图形文件那样使用嵌套语义操作吗?这会影响性能吗?
- 是否建议使用 Spirit X3 一次解析整个文件,还是应该使用 Spirit X3 单独解析每一行?
图(表示图中的边)
[user1] [user2]
0 3
地点
[user] [check-in time] [latitude] [longitude] [location id]
0 2010-10-19T23:55:27Z 30.2359091167 -97.7951395833 22847
精灵X3解析代码
// Parse the gowalla edge file
boost::spirit::istream_iterator file_iterator(edge_file), eof;
x3::phrase_parse(file_iterator, eof,
// Begin grammar
(
*((x3::int_[add_vertex] >> x3::int_[add_vertex])[add_edge])
),
// End grammar
x3::space
);
// Fail if we couldn't parse the whole edges file
if (file_iterator != eof) {
std::cerr << "Couldn't parse whole edges file" << std::endl;
}
// Parse the gowalla location file
file_iterator = boost::spirit::istream_iterator(location_file);
x3::phrase_parse(file_iterator, eof,
// Begin grammar
(
// vertex_id time of checkin latitude longitude location id
*((x3::int_ >> x3::lexeme[*x3::graph] >> x3::double_ >> x3::double_)[add_location] >> x3::int_ >> x3::eol)
),
// End grammar
x3::blank
);
// Fail if we couldn't parse the whole location file
if (file_iterator != eof) {
std::cerr << "Couldn't parse whole location file" << std::endl;
}
X3 调用的语义动作
// Lambda function that adds vertex to graph if not already added
auto add_vertex = [&](auto& ctx){
// Return if the vertex is already known
if (vertices.find(x3::_attr(ctx)) != vertices.end()) {
return false;
}
// Otherwise add vertex to graph
auto v = boost::add_vertex(g);
// And add vertex descriptor to map
vertices[x3::_attr(ctx)] = v;
};
// Lambda function that adds edge to graph
auto add_edge = [&](auto& ctx){
// _attr(ctx) returns a boost fusion tuple
auto attr = x3::_attr(ctx);
// Add edge from the vertices returned from context
boost::add_edge(vertices[fusion::at_c<0>(attr)],
vertices[fusion::at_c<1>(attr)], g);
};
// Lambda function that adds locations to vertices in the graph
auto add_location = [&](auto& ctx){
// _attr(ctx) returns a boost fusion tuple
auto attr = x3::_attr(ctx);
auto vertex_id = fusion::at_c<0>(attr);
if (location_already_added.find(vertex_id) != location_already_added.end()) {
// Exit, as we already stored the location for this vertex
return true;
}
location_already_added.insert(vertex_id);
// Test if vertex is in our graph
// We are parsing locations from a different file than the graph,
// so there might be inconsistencies
if (vertices.find(vertex_id) == vertices.end()) {
std::cerr << "Tried to add location to vertex " << vertex_id << ", but this vertex is not in our graph" << std::endl;
return false;
}
auto vertex = vertices[vertex_id];
// Add location to the vertex
g[vertex].latitude = fusion::at_c<2>(attr);
g[vertex].longitude = fusion::at_c<3>(attr);
return true;
};
提升图
struct vertex_property {
double longitude;
double latitude;
};
// Define our graph
// We use setS to enforce our graph not to become a multigraph
typedef boost::adjacency_list<boost::setS, boost::vecS, boost::undirectedS, vertex_property, edge_property > graph;
Q. Is it ok to use nested semantic actions as I do for the graph file? Does this hurt performance?
我不会这样做。只添加边缘批发可能更容易:
x3::parse(file_iterator, eof,
*((x3::int_ >> '\t' >> x3::int_ >> x3::eol)[add_edge])
);
其中 add_ege
可以简单为:
auto add_edge = [&](auto& ctx){
// Add edge from from context
vertex_decriptor source, target;
auto tup = std::tie(source, target);
fusion::copy(x3::_attr(ctx), tup);
boost::add_edge(map_vertex(source), map_vertex(target), g);
};
Q. Is it recommended to parse the whole file at once with Spirit X3 or should I parse every line individually with Spirit X3?
我认为精神没有任何推荐。我会一次完成整个文件。我建议使用内存映射文件,这样您可以获得更高的效率(没有 multi_pass
迭代器自适应的随机访问迭代)。
一般备注:
您正在尝试使用 space 感知解析器 ,但 将它们与 istream_iterators 一起使用。你必须记得在流上重置skipws
标志。
vertices
地图似乎是一种资源浪费;考虑是否可以直接使用 [user]
东西 (vertex_id
) 而不是转换为 vertex_descriptor
.
这是一个清理后的版本,可以在大约 19 秒内很好地解析来自 https://snap.stanford.edu/data/loc-gowalla.html 的文件(这已经相当快了):
#include <boost/fusion/adapted/std_tuple.hpp>
#include <boost/graph/adjacency_list.hpp>
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/include/support_istream_iterator.hpp>
#include <fstream>
#include <iostream>
namespace x3 = boost::spirit::x3;
namespace fusion = boost::fusion;
struct vertex_property {
double longitude;
double latitude;
};
struct edge_property { };
struct Reader {
bool read_edges(std::string fname) {
// Lambda function that adds edge to graph
auto add_edge = [this](auto& ctx){
// Add edge from from context
vertex_decriptor source, target;
auto tup = std::tie(source, target);
fusion::copy(x3::_attr(ctx), tup);
boost::add_edge(this->map_vertex(source), this->map_vertex(target), g);
};
// Parse the gowalla edge file
std::ifstream edge_file(fname);
if (!edge_file) return false;
boost::spirit::istream_iterator file_iterator(edge_file >> std::noskipws), eof;
x3::parse(file_iterator, eof, *((x3::int_ >> '\t' >> x3::int_ >> x3::eol)[add_edge]));
// Fail if we couldn't parse the whole edges file
return (file_iterator == eof);
}
bool read_locations(std::string fname) {
// Lambda function that adds locations to vertices in the graph
auto add_location = [&](auto& ctx){
// _attr(ctx) returns a boost fusion tuple
auto attr = x3::_attr(ctx);
auto vertex_id = fusion::at_c<0>(attr);
if (!location_already_added.insert(vertex_id).second)
return true; // Exit, as we already stored the location for this vertex
// Test if vertex is in our graph
// We are parsing locations from a different file than the graph, so
// there might be inconsistencies
auto mapped = mapped_vertices.find(vertex_id);
if (mapped == mapped_vertices.end()) {
std::cerr << "Tried to add location to vertex " << vertex_id << ", but this vertex is not in our graph" << std::endl;
return false;
}
// Add location to the vertex
auto& props = g[mapped->second];
props.latitude = fusion::at_c<1>(attr);
props.longitude = fusion::at_c<2>(attr);
return true;
};
// Parse the gowalla location file
std::ifstream location_file(fname);
if (!location_file) return false;
boost::spirit::istream_iterator file_iterator(location_file >> std::noskipws), eof;
x3::parse(file_iterator, eof,
// [vertex_id] [time of checkin] [latitude] [longitude] [location] id
*((x3::int_ >> '\t' >> x3::omit[*x3::graph] >> '\t' >> x3::double_ >> '\t' >> x3::double_)[add_location] >> '\t' >> x3::int_ >> x3::eol)
);
// Fail if we couldn't parse the whole location file
return (file_iterator == eof);
}
private:
// We use setS to enforce our graph not to become a multigraph
typedef boost::adjacency_list<boost::setS, boost::vecS, boost::undirectedS, vertex_property, edge_property> graph;
using vertex_decriptor = graph::vertex_descriptor;
std::map<int, vertex_decriptor> mapped_vertices;
std::set<int> location_already_added;
graph g;
// Lambda function that adds vertex to graph if not already added
vertex_decriptor map_vertex(int id) {
auto match = mapped_vertices.find(id);
if (match != mapped_vertices.end())
return match->second; // vertex already known
else // Otherwise add vertex
return mapped_vertices[id] = boost::add_vertex(g);
};
};
int main() {
Reader reader;
if (!reader.read_edges("loc-gowalla_edges.txt"))
std::cerr << "Couldn't parse whole edges file" << std::endl;
if (!reader.read_locations("loc-gowalla_totalCheckins.txt"))
std::cerr << "Couldn't parse whole location file" << std::endl;
}
映射文件
为了比较,替换为内存映射文件使它 MUCH 更快:它在 3 秒内完成(再次 超过 6 倍 ):
示例更改片段:
boost::iostreams::mapped_file_source mm(fname);
auto f = mm.begin(), l = mm.end();
x3::parse(f, l, *((x3::int_ >> '\t' >> x3::int_ >> x3::eol)[add_edge]));
内存开销
分析后。看起来 map/set 可能还不错:
据我所知,该程序使用了 152MiB,其中只有 4.1 乍一看显示为 location_already_added
。
减少内存使用和时间
即便如此,用动态位集替换 set<int> location_already_added
并删除 map<int, vertex_descriptor>
确实进一步减少了内存使用 以及 程序 运行 时间。
这次它在 2 秒内完成(又节省了 33%)。
由于显而易见的原因,它大约需要 10% 的内存:138.7 MiB。
变化:
#include <boost/fusion/adapted/std_tuple.hpp>
#include <boost/graph/adjacency_list.hpp>
#include <boost/spirit/home/x3.hpp>
#include <boost/iostreams/device/mapped_file.hpp>
#include <boost/dynamic_bitset.hpp>
#include <fstream>
#include <iostream>
namespace x3 = boost::spirit::x3;
namespace fusion = boost::fusion;
struct vertex_property {
double longitude;
double latitude;
};
struct edge_property { };
struct Reader {
Reader() {
g.m_vertices.reserve(1024);
}
bool read_edges(std::string fname) {
// Lambda function that adds edge to graph
auto add_edge = [this](auto& ctx){
// Add edge from from context
vertex_decriptor source, target;
auto tup = std::tie(source, target);
fusion::copy(x3::_attr(ctx), tup);
boost::add_edge(this->map_vertex(source), this->map_vertex(target), g);
};
// Parse the gowalla edge file
boost::iostreams::mapped_file_source mm(fname);
auto f = mm.begin(), l = mm.end();
x3::parse(f, l, *((x3::int_ >> '\t' >> x3::int_ >> x3::eol)[add_edge]));
// Fail if we couldn't parse the whole edges file
return f == l;
}
bool read_locations(std::string fname) {
boost::dynamic_bitset<> location_already_added(num_vertices(g));
// Lambda function that adds locations to vertices in the graph
auto add_location = [&](auto& ctx){
// _attr(ctx) returns a boost fusion tuple
auto const& attr = x3::_attr(ctx);
auto vertex_id = fusion::at_c<0>(attr);
if (location_already_added.test(vertex_id))
return true; // Exit, as we already stored the location for this vertex
location_already_added.set(vertex_id);
// Test if vertex is in our graph
// We are parsing locations from a different file than the graph, so
// there might be inconsistencies
auto mapped = this->mapped_vertex(vertex_id);
if (graph::null_vertex() == mapped) {
std::cerr << "Tried to add location to vertex " << vertex_id << ", but this vertex is not in our graph" << std::endl;
return false;
}
// Add location to the vertex
auto& props = g[mapped];
props.latitude = fusion::at_c<1>(attr);
props.longitude = fusion::at_c<2>(attr);
return true;
};
// Parse the gowalla location file
std::ifstream location_file(fname);
if (!location_file) return false;
boost::iostreams::mapped_file_source mm(fname);
auto f = mm.begin(), l = mm.end();
x3::parse(f, l,
// [vertex_id] [time of checkin] [latitude] [longitude] [location] id
*((x3::int_ >> '\t' >> x3::omit[*x3::graph] >> '\t' >> x3::double_ >> '\t' >> x3::double_)[add_location] >> '\t' >> x3::int_ >> x3::eol)
);
// Fail if we couldn't parse the whole location file
return f == l;
}
typedef boost::adjacency_list<boost::setS, boost::vecS, boost::undirectedS, vertex_property, edge_property> graph;
private:
// We use setS to enforce our graph not to become a multigraph
using vertex_decriptor = graph::vertex_descriptor;
graph g;
#if USE_VERTEX_DESCRIPTOR_MAPPING
std::map<int, vertex_decriptor> mapped_vertices;
vertex_decriptor map_vertex(int id) {
auto match = mapped_vertices.find(id);
if (match != mapped_vertices.end())
return match->second; // vertex already known
else // Otherwise add vertex
return mapped_vertices[id] = boost::add_vertex(g);
};
vertex_decriptor mapped_vertex(int id) const {
auto mapped = mapped_vertices.find(id);
return mapped == mapped_vertices.end()
? return graph::null_vertex()
: mapped->second;
}
#else
static vertex_decriptor map_vertex(int id) { return id; }
static vertex_decriptor mapped_vertex(int id) { return id; }
#endif
};
int main() {
Reader reader;
if (!reader.read_edges("loc-gowalla_edges.txt"))
std::cerr << "Couldn't parse whole edges file" << std::endl;
if (!reader.read_locations("loc-gowalla_totalCheckins.txt"))
std::cerr << "Couldn't parse whole location file" << std::endl;
}
我是 C++ 和 Boost Spirit X3 的新手。对于我的项目,我使用 boost spirit X3 将具有以下结构的两个文件中的地理社交图解析为 boost 图。
我有一个有效的实现。由于我之前没有任何使用这些库的经验,我想知道您对这种方法有何看法,以及您是否建议采用不同的方法。
在图形文件中,每条边一行。 在解析边时,我必须创建图形的节点,以防以前没有看到该节点。我使用语义操作,每次遇到节点 ID 时都会检查该节点是否已在图中。阅读整行后,我使用了一个语义动作,然后添加了边缘。
在位置文件中,给定时间节点的每个已知位置各占一行。我存储图中已知节点的第一个位置(使用自定义增强图 属性)。
我有具体的问题,但很乐意收到任何想法和建议:
- 可以像我对图形文件那样使用嵌套语义操作吗?这会影响性能吗?
- 是否建议使用 Spirit X3 一次解析整个文件,还是应该使用 Spirit X3 单独解析每一行?
图(表示图中的边)
[user1] [user2]
0 3
地点
[user] [check-in time] [latitude] [longitude] [location id]
0 2010-10-19T23:55:27Z 30.2359091167 -97.7951395833 22847
精灵X3解析代码
// Parse the gowalla edge file
boost::spirit::istream_iterator file_iterator(edge_file), eof;
x3::phrase_parse(file_iterator, eof,
// Begin grammar
(
*((x3::int_[add_vertex] >> x3::int_[add_vertex])[add_edge])
),
// End grammar
x3::space
);
// Fail if we couldn't parse the whole edges file
if (file_iterator != eof) {
std::cerr << "Couldn't parse whole edges file" << std::endl;
}
// Parse the gowalla location file
file_iterator = boost::spirit::istream_iterator(location_file);
x3::phrase_parse(file_iterator, eof,
// Begin grammar
(
// vertex_id time of checkin latitude longitude location id
*((x3::int_ >> x3::lexeme[*x3::graph] >> x3::double_ >> x3::double_)[add_location] >> x3::int_ >> x3::eol)
),
// End grammar
x3::blank
);
// Fail if we couldn't parse the whole location file
if (file_iterator != eof) {
std::cerr << "Couldn't parse whole location file" << std::endl;
}
X3 调用的语义动作
// Lambda function that adds vertex to graph if not already added
auto add_vertex = [&](auto& ctx){
// Return if the vertex is already known
if (vertices.find(x3::_attr(ctx)) != vertices.end()) {
return false;
}
// Otherwise add vertex to graph
auto v = boost::add_vertex(g);
// And add vertex descriptor to map
vertices[x3::_attr(ctx)] = v;
};
// Lambda function that adds edge to graph
auto add_edge = [&](auto& ctx){
// _attr(ctx) returns a boost fusion tuple
auto attr = x3::_attr(ctx);
// Add edge from the vertices returned from context
boost::add_edge(vertices[fusion::at_c<0>(attr)],
vertices[fusion::at_c<1>(attr)], g);
};
// Lambda function that adds locations to vertices in the graph
auto add_location = [&](auto& ctx){
// _attr(ctx) returns a boost fusion tuple
auto attr = x3::_attr(ctx);
auto vertex_id = fusion::at_c<0>(attr);
if (location_already_added.find(vertex_id) != location_already_added.end()) {
// Exit, as we already stored the location for this vertex
return true;
}
location_already_added.insert(vertex_id);
// Test if vertex is in our graph
// We are parsing locations from a different file than the graph,
// so there might be inconsistencies
if (vertices.find(vertex_id) == vertices.end()) {
std::cerr << "Tried to add location to vertex " << vertex_id << ", but this vertex is not in our graph" << std::endl;
return false;
}
auto vertex = vertices[vertex_id];
// Add location to the vertex
g[vertex].latitude = fusion::at_c<2>(attr);
g[vertex].longitude = fusion::at_c<3>(attr);
return true;
};
提升图
struct vertex_property {
double longitude;
double latitude;
};
// Define our graph
// We use setS to enforce our graph not to become a multigraph
typedef boost::adjacency_list<boost::setS, boost::vecS, boost::undirectedS, vertex_property, edge_property > graph;
Q. Is it ok to use nested semantic actions as I do for the graph file? Does this hurt performance?
我不会这样做。只添加边缘批发可能更容易:
x3::parse(file_iterator, eof,
*((x3::int_ >> '\t' >> x3::int_ >> x3::eol)[add_edge])
);
其中 add_ege
可以简单为:
auto add_edge = [&](auto& ctx){
// Add edge from from context
vertex_decriptor source, target;
auto tup = std::tie(source, target);
fusion::copy(x3::_attr(ctx), tup);
boost::add_edge(map_vertex(source), map_vertex(target), g);
};
Q. Is it recommended to parse the whole file at once with Spirit X3 or should I parse every line individually with Spirit X3?
我认为精神没有任何推荐。我会一次完成整个文件。我建议使用内存映射文件,这样您可以获得更高的效率(没有 multi_pass
迭代器自适应的随机访问迭代)。
一般备注:
您正在尝试使用 space 感知解析器 ,但 将它们与 istream_iterators 一起使用。你必须记得在流上重置
skipws
标志。vertices
地图似乎是一种资源浪费;考虑是否可以直接使用[user]
东西 (vertex_id
) 而不是转换为vertex_descriptor
.
这是一个清理后的版本,可以在大约 19 秒内很好地解析来自 https://snap.stanford.edu/data/loc-gowalla.html 的文件(这已经相当快了):
#include <boost/fusion/adapted/std_tuple.hpp>
#include <boost/graph/adjacency_list.hpp>
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/include/support_istream_iterator.hpp>
#include <fstream>
#include <iostream>
namespace x3 = boost::spirit::x3;
namespace fusion = boost::fusion;
struct vertex_property {
double longitude;
double latitude;
};
struct edge_property { };
struct Reader {
bool read_edges(std::string fname) {
// Lambda function that adds edge to graph
auto add_edge = [this](auto& ctx){
// Add edge from from context
vertex_decriptor source, target;
auto tup = std::tie(source, target);
fusion::copy(x3::_attr(ctx), tup);
boost::add_edge(this->map_vertex(source), this->map_vertex(target), g);
};
// Parse the gowalla edge file
std::ifstream edge_file(fname);
if (!edge_file) return false;
boost::spirit::istream_iterator file_iterator(edge_file >> std::noskipws), eof;
x3::parse(file_iterator, eof, *((x3::int_ >> '\t' >> x3::int_ >> x3::eol)[add_edge]));
// Fail if we couldn't parse the whole edges file
return (file_iterator == eof);
}
bool read_locations(std::string fname) {
// Lambda function that adds locations to vertices in the graph
auto add_location = [&](auto& ctx){
// _attr(ctx) returns a boost fusion tuple
auto attr = x3::_attr(ctx);
auto vertex_id = fusion::at_c<0>(attr);
if (!location_already_added.insert(vertex_id).second)
return true; // Exit, as we already stored the location for this vertex
// Test if vertex is in our graph
// We are parsing locations from a different file than the graph, so
// there might be inconsistencies
auto mapped = mapped_vertices.find(vertex_id);
if (mapped == mapped_vertices.end()) {
std::cerr << "Tried to add location to vertex " << vertex_id << ", but this vertex is not in our graph" << std::endl;
return false;
}
// Add location to the vertex
auto& props = g[mapped->second];
props.latitude = fusion::at_c<1>(attr);
props.longitude = fusion::at_c<2>(attr);
return true;
};
// Parse the gowalla location file
std::ifstream location_file(fname);
if (!location_file) return false;
boost::spirit::istream_iterator file_iterator(location_file >> std::noskipws), eof;
x3::parse(file_iterator, eof,
// [vertex_id] [time of checkin] [latitude] [longitude] [location] id
*((x3::int_ >> '\t' >> x3::omit[*x3::graph] >> '\t' >> x3::double_ >> '\t' >> x3::double_)[add_location] >> '\t' >> x3::int_ >> x3::eol)
);
// Fail if we couldn't parse the whole location file
return (file_iterator == eof);
}
private:
// We use setS to enforce our graph not to become a multigraph
typedef boost::adjacency_list<boost::setS, boost::vecS, boost::undirectedS, vertex_property, edge_property> graph;
using vertex_decriptor = graph::vertex_descriptor;
std::map<int, vertex_decriptor> mapped_vertices;
std::set<int> location_already_added;
graph g;
// Lambda function that adds vertex to graph if not already added
vertex_decriptor map_vertex(int id) {
auto match = mapped_vertices.find(id);
if (match != mapped_vertices.end())
return match->second; // vertex already known
else // Otherwise add vertex
return mapped_vertices[id] = boost::add_vertex(g);
};
};
int main() {
Reader reader;
if (!reader.read_edges("loc-gowalla_edges.txt"))
std::cerr << "Couldn't parse whole edges file" << std::endl;
if (!reader.read_locations("loc-gowalla_totalCheckins.txt"))
std::cerr << "Couldn't parse whole location file" << std::endl;
}
映射文件
为了比较,替换为内存映射文件使它 MUCH 更快:它在 3 秒内完成(再次 超过 6 倍 ):
示例更改片段:
boost::iostreams::mapped_file_source mm(fname);
auto f = mm.begin(), l = mm.end();
x3::parse(f, l, *((x3::int_ >> '\t' >> x3::int_ >> x3::eol)[add_edge]));
内存开销
分析后。看起来 map/set 可能还不错:
据我所知,该程序使用了 152MiB,其中只有 4.1 乍一看显示为 location_already_added
。
减少内存使用和时间
即便如此,用动态位集替换 set<int> location_already_added
并删除 map<int, vertex_descriptor>
确实进一步减少了内存使用 以及 程序 运行 时间。
这次它在 2 秒内完成(又节省了 33%)。
由于显而易见的原因,它大约需要 10% 的内存:138.7 MiB。
变化:
#include <boost/fusion/adapted/std_tuple.hpp>
#include <boost/graph/adjacency_list.hpp>
#include <boost/spirit/home/x3.hpp>
#include <boost/iostreams/device/mapped_file.hpp>
#include <boost/dynamic_bitset.hpp>
#include <fstream>
#include <iostream>
namespace x3 = boost::spirit::x3;
namespace fusion = boost::fusion;
struct vertex_property {
double longitude;
double latitude;
};
struct edge_property { };
struct Reader {
Reader() {
g.m_vertices.reserve(1024);
}
bool read_edges(std::string fname) {
// Lambda function that adds edge to graph
auto add_edge = [this](auto& ctx){
// Add edge from from context
vertex_decriptor source, target;
auto tup = std::tie(source, target);
fusion::copy(x3::_attr(ctx), tup);
boost::add_edge(this->map_vertex(source), this->map_vertex(target), g);
};
// Parse the gowalla edge file
boost::iostreams::mapped_file_source mm(fname);
auto f = mm.begin(), l = mm.end();
x3::parse(f, l, *((x3::int_ >> '\t' >> x3::int_ >> x3::eol)[add_edge]));
// Fail if we couldn't parse the whole edges file
return f == l;
}
bool read_locations(std::string fname) {
boost::dynamic_bitset<> location_already_added(num_vertices(g));
// Lambda function that adds locations to vertices in the graph
auto add_location = [&](auto& ctx){
// _attr(ctx) returns a boost fusion tuple
auto const& attr = x3::_attr(ctx);
auto vertex_id = fusion::at_c<0>(attr);
if (location_already_added.test(vertex_id))
return true; // Exit, as we already stored the location for this vertex
location_already_added.set(vertex_id);
// Test if vertex is in our graph
// We are parsing locations from a different file than the graph, so
// there might be inconsistencies
auto mapped = this->mapped_vertex(vertex_id);
if (graph::null_vertex() == mapped) {
std::cerr << "Tried to add location to vertex " << vertex_id << ", but this vertex is not in our graph" << std::endl;
return false;
}
// Add location to the vertex
auto& props = g[mapped];
props.latitude = fusion::at_c<1>(attr);
props.longitude = fusion::at_c<2>(attr);
return true;
};
// Parse the gowalla location file
std::ifstream location_file(fname);
if (!location_file) return false;
boost::iostreams::mapped_file_source mm(fname);
auto f = mm.begin(), l = mm.end();
x3::parse(f, l,
// [vertex_id] [time of checkin] [latitude] [longitude] [location] id
*((x3::int_ >> '\t' >> x3::omit[*x3::graph] >> '\t' >> x3::double_ >> '\t' >> x3::double_)[add_location] >> '\t' >> x3::int_ >> x3::eol)
);
// Fail if we couldn't parse the whole location file
return f == l;
}
typedef boost::adjacency_list<boost::setS, boost::vecS, boost::undirectedS, vertex_property, edge_property> graph;
private:
// We use setS to enforce our graph not to become a multigraph
using vertex_decriptor = graph::vertex_descriptor;
graph g;
#if USE_VERTEX_DESCRIPTOR_MAPPING
std::map<int, vertex_decriptor> mapped_vertices;
vertex_decriptor map_vertex(int id) {
auto match = mapped_vertices.find(id);
if (match != mapped_vertices.end())
return match->second; // vertex already known
else // Otherwise add vertex
return mapped_vertices[id] = boost::add_vertex(g);
};
vertex_decriptor mapped_vertex(int id) const {
auto mapped = mapped_vertices.find(id);
return mapped == mapped_vertices.end()
? return graph::null_vertex()
: mapped->second;
}
#else
static vertex_decriptor map_vertex(int id) { return id; }
static vertex_decriptor mapped_vertex(int id) { return id; }
#endif
};
int main() {
Reader reader;
if (!reader.read_edges("loc-gowalla_edges.txt"))
std::cerr << "Couldn't parse whole edges file" << std::endl;
if (!reader.read_locations("loc-gowalla_totalCheckins.txt"))
std::cerr << "Couldn't parse whole location file" << std::endl;
}