使用 Boost 查询点
Query points using Boost
使用boost查询积分时,我似乎得到了不正确的结果。使用单独的算法(使用 BST),每个查询平均得到 2000 分的答案,但使用 boost 我得到 10 分的答案。这是我第一次使用 Boost,所以有人可以帮助我解决我搞砸的问题。
关于代码:我放置了 1M 个随机点(x,y 在 0 和 1 之间)。然后我 运行 查询 100 个小的随机区域并计算匹配数。
#include <iostream>
#include <vector>
#include <time.h>
#include <math.h>
#include <stdlib.h>
#include <iomanip>
#include <boost/geometry/geometry.hpp>
#include <boost/geometry/geometries/geometries.hpp>
#include <boost/geometry/geometries/point.hpp>
#include <boost/geometry/index/rtree.hpp>
using namespace std;
namespace bg = boost::geometry;
namespace bgi = boost::geometry::index;
typedef bg::model::point<double, 2, bg::cs::cartesian> point;
typedef bg::model::box<point> box;
struct particle
{
double x;
double y;
};
int main(int argc, char *argv[])
{
int N=1000000;
clock_t start,stop;
vector<particle> myvec(N);
vector<particle>::iterator cii;
//Set vector values
for (cii = myvec.begin(); cii != myvec.end(); ++cii)
{
cii->x =1.0*rand()/RAND_MAX;
cii->y =1.0*rand()/RAND_MAX;
}
//Build R-tree
start=clock();
bgi::rtree<point, bgi::quadratic<16> > rtree;
for (cii=myvec.begin();cii!=myvec.end(); ++cii)
{
double x = cii->x;
double y = cii->y;
point p(x,y);
rtree.insert(p);
}
stop=clock();
cout<<"Time for building R tree "<<(stop-start)/(double) CLOCKS_PER_SEC<<endl;
//Build Query List
vector<particle> querylist(100);
for (cii = querylist.begin(); cii != querylist.end(); ++cii)
{
cii->x =1.0*rand()/RAND_MAX;
cii->y =1.0*rand()/RAND_MAX;
}
//Query R-tree
start=clock();
for (cii = querylist.begin(); cii != querylist.end(); ++cii)
{
double x = cii->x;
double y = cii->y;
double lx = x - .001;
double ux = x + .001;
double ly = y - .001;
double uy = y + .001;
point p1(lx,ly), p2(ux,uy);
box query_box(p1, p2);
vector<point> queryresult;
rtree.query(bgi::intersects(query_box), std::back_inserter(queryresult));
std::cout << "The intersection has " << (queryresult.size()) << " elements:\n";
}
stop=clock();
cout<<"Time for query "<<(stop-start)/(double) CLOCKS_PER_SEC<<endl;
return 0;
}
您的样本在统计上不可靠。
让我们将查询框显着放大:
double const delta = .05; // so boxes are .1x.1, i.e. 1/100th of total area
// and later
for (auto& p : querylist) {
box query_box(point {p.x - delta, p.y - delta}, point {p.x + delta, p.y + delta});
结果完全符合预期的命中数:
Time for building R tree 2182054657 nanoseconds
Time for query 52749607 nanoseconds
Average intersections: 10000.3
Standard deviation: 98.9173
Expected mean: 10000
代码
备注
- 将
rand()
替换为 Boost Random(uniform_real<>
分布)
- 修复了框使其不与区域边界重叠(因此我们不会得到倾斜的结果)
不用每次填充一个vector,只测量查询结果的个数:
auto n = std::distance(
bgi::qbegin(rtree, bgi::intersects(query_box)),
bgi::qend(rtree));
而不是 "anecdotally" 报告一些 "intersection" 计数,使用 Boost Accumulator 做一些适当的统计:
std::cout << "Average intersections: " << ba::mean(stats) << "\n";
std::cout << "Standard deviation: " << sqrt(ba::variance(stats)) << "\n";
std::cout << "Expected mean: " << (N * (2*delta) * (2*delta)) << "\n";
完整列表
#include <iostream>
#include <vector>
#include <time.h>
#include <math.h>
#include <stdlib.h>
#include <iomanip>
#include <boost/geometry/geometry.hpp>
#include <boost/geometry/geometries/geometries.hpp>
#include <boost/geometry/geometries/point.hpp>
#include <boost/geometry/index/rtree.hpp>
#include <boost/random.hpp>
#include <boost/chrono.hpp>
#include <boost/accumulators/accumulators.hpp>
#include <boost/accumulators/statistics.hpp>
namespace bg = boost::geometry;
namespace bgi = boost::geometry::index;
double const delta = .05; // so boxes are .1x.1, i.e. 1/100th of total area
typedef bg::model::point<double, 2, bg::cs::cartesian> point;
typedef bg::model::box<point> box;
struct particle {
double x;
double y;
};
namespace ba = boost::accumulators;
namespace bat = ba::tag;
using hrc = boost::chrono::high_resolution_clock;
int main() {
boost::mt19937 prng(std::random_device{}());
size_t N = 1000000ul;
std::vector<particle> myvec;
myvec.reserve(N);
{
boost::uniform_real<double> dist(0,1);
// Set vector values
for (size_t i = 0; i<N; ++i)
myvec.push_back({dist(prng), dist(prng)});
}
// Build R-tree
auto start = hrc::now();
bgi::rtree<point, bgi::quadratic<16> > rtree;
for (auto& p : myvec)
rtree.insert(point {p.x, p.y});
auto stop = hrc::now();
std::cout << "Time for building R tree " << (stop - start) << std::endl;
// Build Query List
std::vector<particle> querylist;
querylist.reserve(100);
{
boost::uniform_real<double> dist(0+delta,1-delta);
for (size_t i = 0; i<100; ++i)
querylist.push_back({dist(prng), dist(prng)});
}
ba::accumulator_set<double, ba::stats<bat::mean, bat::variance>> stats;
// Query R-tree
start = hrc::now();
for (auto& p : querylist) {
box query_box(point {p.x - delta, p.y - delta}, point {p.x + delta, p.y + delta});
#if 0
std::vector<point> queryresult;
rtree.query(bgi::intersects(query_box), std::back_inserter(queryresult));
//std::cout << "The intersection has " << queryresult.size() << " elements:\n";
stats(queryresult.size());
#else
auto n = std::distance(
bgi::qbegin(rtree, bgi::intersects(query_box)),
bgi::qend(rtree));
stats(n);
#endif
}
stop = hrc::now();
std::cout << "Time for query " << (stop - start) << "\n";
std::cout << "Average intersections: " << ba::mean(stats) << "\n";
std::cout << "Standard deviation: " << sqrt(ba::variance(stats)) << "\n";
std::cout << "Expected mean: " << (N * (2*delta) * (2*delta)) << "\n";
}
缩放
注意结果比例:
double const delta = .005; // so boxes are .01x.01, i.e. 1/10000th of total area
Time for building R tree 2101529662 nanoseconds
Time for query 2982070 nanoseconds
Average intersections: 99.69
Standard deviation: 8.96069
Expected mean: 100
甚至
double const delta = .00005; // so boxes are .0001x.0001, i.e. 1/100000000th of total area
Time for building R tree 726361394 nanoseconds
Time for query 216690 nanoseconds
Average intersections: 0.02
Standard deviation: 0.14
Expected mean: 0.01
使用boost查询积分时,我似乎得到了不正确的结果。使用单独的算法(使用 BST),每个查询平均得到 2000 分的答案,但使用 boost 我得到 10 分的答案。这是我第一次使用 Boost,所以有人可以帮助我解决我搞砸的问题。
关于代码:我放置了 1M 个随机点(x,y 在 0 和 1 之间)。然后我 运行 查询 100 个小的随机区域并计算匹配数。
#include <iostream>
#include <vector>
#include <time.h>
#include <math.h>
#include <stdlib.h>
#include <iomanip>
#include <boost/geometry/geometry.hpp>
#include <boost/geometry/geometries/geometries.hpp>
#include <boost/geometry/geometries/point.hpp>
#include <boost/geometry/index/rtree.hpp>
using namespace std;
namespace bg = boost::geometry;
namespace bgi = boost::geometry::index;
typedef bg::model::point<double, 2, bg::cs::cartesian> point;
typedef bg::model::box<point> box;
struct particle
{
double x;
double y;
};
int main(int argc, char *argv[])
{
int N=1000000;
clock_t start,stop;
vector<particle> myvec(N);
vector<particle>::iterator cii;
//Set vector values
for (cii = myvec.begin(); cii != myvec.end(); ++cii)
{
cii->x =1.0*rand()/RAND_MAX;
cii->y =1.0*rand()/RAND_MAX;
}
//Build R-tree
start=clock();
bgi::rtree<point, bgi::quadratic<16> > rtree;
for (cii=myvec.begin();cii!=myvec.end(); ++cii)
{
double x = cii->x;
double y = cii->y;
point p(x,y);
rtree.insert(p);
}
stop=clock();
cout<<"Time for building R tree "<<(stop-start)/(double) CLOCKS_PER_SEC<<endl;
//Build Query List
vector<particle> querylist(100);
for (cii = querylist.begin(); cii != querylist.end(); ++cii)
{
cii->x =1.0*rand()/RAND_MAX;
cii->y =1.0*rand()/RAND_MAX;
}
//Query R-tree
start=clock();
for (cii = querylist.begin(); cii != querylist.end(); ++cii)
{
double x = cii->x;
double y = cii->y;
double lx = x - .001;
double ux = x + .001;
double ly = y - .001;
double uy = y + .001;
point p1(lx,ly), p2(ux,uy);
box query_box(p1, p2);
vector<point> queryresult;
rtree.query(bgi::intersects(query_box), std::back_inserter(queryresult));
std::cout << "The intersection has " << (queryresult.size()) << " elements:\n";
}
stop=clock();
cout<<"Time for query "<<(stop-start)/(double) CLOCKS_PER_SEC<<endl;
return 0;
}
您的样本在统计上不可靠。
让我们将查询框显着放大:
double const delta = .05; // so boxes are .1x.1, i.e. 1/100th of total area
// and later
for (auto& p : querylist) {
box query_box(point {p.x - delta, p.y - delta}, point {p.x + delta, p.y + delta});
结果完全符合预期的命中数:
Time for building R tree 2182054657 nanoseconds
Time for query 52749607 nanoseconds
Average intersections: 10000.3
Standard deviation: 98.9173
Expected mean: 10000
代码
备注
- 将
rand()
替换为 Boost Random(uniform_real<>
分布) - 修复了框使其不与区域边界重叠(因此我们不会得到倾斜的结果)
不用每次填充一个vector,只测量查询结果的个数:
auto n = std::distance( bgi::qbegin(rtree, bgi::intersects(query_box)), bgi::qend(rtree));
而不是 "anecdotally" 报告一些 "intersection" 计数,使用 Boost Accumulator 做一些适当的统计:
std::cout << "Average intersections: " << ba::mean(stats) << "\n"; std::cout << "Standard deviation: " << sqrt(ba::variance(stats)) << "\n"; std::cout << "Expected mean: " << (N * (2*delta) * (2*delta)) << "\n";
完整列表
#include <iostream>
#include <vector>
#include <time.h>
#include <math.h>
#include <stdlib.h>
#include <iomanip>
#include <boost/geometry/geometry.hpp>
#include <boost/geometry/geometries/geometries.hpp>
#include <boost/geometry/geometries/point.hpp>
#include <boost/geometry/index/rtree.hpp>
#include <boost/random.hpp>
#include <boost/chrono.hpp>
#include <boost/accumulators/accumulators.hpp>
#include <boost/accumulators/statistics.hpp>
namespace bg = boost::geometry;
namespace bgi = boost::geometry::index;
double const delta = .05; // so boxes are .1x.1, i.e. 1/100th of total area
typedef bg::model::point<double, 2, bg::cs::cartesian> point;
typedef bg::model::box<point> box;
struct particle {
double x;
double y;
};
namespace ba = boost::accumulators;
namespace bat = ba::tag;
using hrc = boost::chrono::high_resolution_clock;
int main() {
boost::mt19937 prng(std::random_device{}());
size_t N = 1000000ul;
std::vector<particle> myvec;
myvec.reserve(N);
{
boost::uniform_real<double> dist(0,1);
// Set vector values
for (size_t i = 0; i<N; ++i)
myvec.push_back({dist(prng), dist(prng)});
}
// Build R-tree
auto start = hrc::now();
bgi::rtree<point, bgi::quadratic<16> > rtree;
for (auto& p : myvec)
rtree.insert(point {p.x, p.y});
auto stop = hrc::now();
std::cout << "Time for building R tree " << (stop - start) << std::endl;
// Build Query List
std::vector<particle> querylist;
querylist.reserve(100);
{
boost::uniform_real<double> dist(0+delta,1-delta);
for (size_t i = 0; i<100; ++i)
querylist.push_back({dist(prng), dist(prng)});
}
ba::accumulator_set<double, ba::stats<bat::mean, bat::variance>> stats;
// Query R-tree
start = hrc::now();
for (auto& p : querylist) {
box query_box(point {p.x - delta, p.y - delta}, point {p.x + delta, p.y + delta});
#if 0
std::vector<point> queryresult;
rtree.query(bgi::intersects(query_box), std::back_inserter(queryresult));
//std::cout << "The intersection has " << queryresult.size() << " elements:\n";
stats(queryresult.size());
#else
auto n = std::distance(
bgi::qbegin(rtree, bgi::intersects(query_box)),
bgi::qend(rtree));
stats(n);
#endif
}
stop = hrc::now();
std::cout << "Time for query " << (stop - start) << "\n";
std::cout << "Average intersections: " << ba::mean(stats) << "\n";
std::cout << "Standard deviation: " << sqrt(ba::variance(stats)) << "\n";
std::cout << "Expected mean: " << (N * (2*delta) * (2*delta)) << "\n";
}
缩放
注意结果比例:
double const delta = .005; // so boxes are .01x.01, i.e. 1/10000th of total area
Time for building R tree 2101529662 nanoseconds
Time for query 2982070 nanoseconds
Average intersections: 99.69
Standard deviation: 8.96069
Expected mean: 100
甚至
double const delta = .00005; // so boxes are .0001x.0001, i.e. 1/100000000th of total area
Time for building R tree 726361394 nanoseconds
Time for query 216690 nanoseconds
Average intersections: 0.02
Standard deviation: 0.14
Expected mean: 0.01