计算轴对齐立方体并集的体积

Computing the volume of the union of axis-aligned cubes

给定的是一组 Sn 轴对齐的立方体。任务是找出 S 中所有立方体的 union 的体积。这意味着 2 个或更多立方体的每个体积重叠只计算一次。该集合具体包含所有立方体的所有坐标。

我找到了几篇关于这个主题的论文,提出了完成任务的算法。 This paper for example generalizes the problem to any dimension d rather than the trivial d=3, and to boxes rather than cubes. This paper as well as a few others solve the problem in time O(n^1.5) or slightly better. Another paper which looks promising and is specific to 3d-cubes is this one 解决了 O(n^4/3 log n) 中的任务。但是这些论文看起来相当复杂,至少对我来说是这样,我无法理解它们。

是否有任何相对简单的伪代码或过程可以让我遵循来实现这个想法?我正在寻找一组说明,具体说明如何处理这些立方体。任何实施也将非常出色。 O(n^2) 甚至 O(n^3) 都可以。


目前,我的方法是计算总体积,即所有立方体的所有体积之和,然后计算每2个立方体的重叠,并将其从总体积中减去。问题是每个这样的重叠可能(或可能不是)不同的立方体对,这意味着重叠可以由例如 5 个立方体共享。在这种方法中,重叠将被计算 10 次而不是一次。所以我在想也许包含排除原则可能会证明自己有用,但我不知道具体如何实施。计算每个重叠(天真地)已经 O(n^2),但可行。有没有更好的方法来计算所有这些重叠? 无论如何,我不认为这是一个有用的方法,继续沿着这些路线前进。

这里有一些 Python(抱歉,没有注意到 Java 标签)实现了 user3386109 的建议。该算法是 O(n³ log n)。我们可以通过对所有多维数据集的事件进行一次排序并每次提取我们需要的排序子序列来降低 O(n³),但这也许已经足够了。

import collections

Interval = collections.namedtuple("Interval", ["lower", "upper"])
Cube = collections.namedtuple("Cube", ["x", "y", "z"])


def length(interval):
    return interval.upper - interval.lower


def length_of_union(intervals):
    events = []
    for interval in intervals:
        events.append((interval.lower, 1))
        events.append((interval.upper, -1))
    events.sort()
    previous = None
    overlap = 0
    total = 0
    for x, delta in events:
        if overlap > 0:
            total += x - previous
        previous = x
        overlap += delta
    return total


def all_boundaries(intervals):
    boundaries = set()
    for interval in intervals:
        boundaries.add(interval.lower)
        boundaries.add(interval.upper)
    return sorted(boundaries)


def subdivide_at(interval, boundaries):
    lower = interval.lower
    for x in sorted(boundaries):  # Resort is O(n) due to Timsort.
        if x < lower:
            pass
        elif x < interval.upper:
            yield Interval(lower, x)
            lower = x
        else:
            yield Interval(lower, interval.upper)
            break


def volume_of_union(cubes):
    x_boundaries = all_boundaries(cube.x for cube in cubes)
    y_boundaries = all_boundaries(cube.y for cube in cubes)
    sub_problems = collections.defaultdict(list)
    for cube in cubes:
        for x in subdivide_at(cube.x, x_boundaries):
            for y in subdivide_at(cube.y, y_boundaries):
                sub_problems[(x, y)].append(cube.z)
    return sum(
        length(x) * length(y) * length_of_union(z_intervals)
        for ((x, y), z_intervals) in sub_problems.items()
    )


# Test code below.

import random

Point = collections.namedtuple("Point", ["x", "y", "z"])


def mean(sequence):
    n = 0
    x_bar = 0
    for x in sequence:
        n += 1
        x_bar += (x - x_bar) / n
    return x_bar


def random_interval():
    a = random.random()
    b = random.random()
    return Interval(min(a, b), max(a, b))


def in_interval(x, interval):
    return interval.lower <= x <= interval.upper


test_intervals = [random_interval() for i in range(10)]
sample_coordinates = [random.random() for i in range(1000000)]
sampled_length = mean(
    any(in_interval(x, interval) for interval in test_intervals)
    for x in sample_coordinates
)
print(length_of_union(test_intervals), sampled_length)


def random_cube():
    return Cube(random_interval(), random_interval(), random_interval())


def in_cube(point, cube):
    return (
        in_interval(point.x, cube.x)
        and in_interval(point.y, cube.y)
        and in_interval(point.z, cube.z)
    )


test_cubes = [random_cube() for i in range(10)]
sample_points = [
    Point(random.random(), random.random(), random.random()) for i in range(1000000)
]
sampled_volume = mean(
    any(in_cube(point, cube) for cube in test_cubes) for point in sample_points
)
print(volume_of_union(test_cubes), sampled_volume)

我们可以递归地解决这个问题,通过定义一个算法来解决 D 维立方体的问题(其中 D=1 是线,2 是正方形,3 是立方体,等等)。我假设立方体是一个结构,有两个点(起点和终点)定义了它们的边界。

注意:我没有 运行 这段代码,但它应该可以工作(直到愚蠢的 python 错误)。复杂度预计为 O((N log N)^D),其中 D 是维数,代码应该是直截了当的。

# Define a recursive computation, working on a given set of cubes and
# an index of the current dimension.
def find_union_area(cubes, dim_index=0):
  if len(cubes) == 0:
    return 0

  # We want to have a list of tuples of the form `(x_val, side, cube)` where
  # side is either start or end.
  starts = [{
    'val': cube.start[dim_index],
    'side': 'start',
    'cube': cube
  } for cube in cubes]
  ends = [{
    'val': cube.start[dim_index],
    'side': 'start',
    'cube': cube
  } for cube in cubes]

  # Sort all start and end locations of the cubes along the current axis.  
  locations = sorted(starts + ends, key=lambda l: l['val'])

  # Iterate the above list, each time on the interval between x_i and x_(i+1)
  # to compute the overall volume/area minus overlap.
  result = 0

  curr = locations[0]
  assert cur['side'] == 'start'

  # Track the cubes in the current interval
  current_cubes = {loc.cube}

  for loc in locations[1:]:
    prev = curr
    curr = loc
 
    if curr['side'] == 'end':
      current_cubes.remove(curr['cube'])
    else:
      current_cubes.add(curr['cube'])

    # If the current interval has 0 length, it is non-interesting
    # This would happen if two cubes start/end at the same place along the axis
    if curr['val'] == prev['val']:
      continue

    # If the current interval is empty it is also non-interesting
    if len(current_cubes) == 0:
      continue
    
    # If we are in 1-D (i.e. this is the last dimension), the size of
    # the interval is simple
    if dim_index == len(curr.cube.start):
      result += curr['val'] - prev['val']
    # Otherwise we need to check the volume also using the next dimensions
    # for the current cubes, and then multiply by that
    else:
      curr_dim_size = curr['val'] - prev['val']
      next_dims_size = find_union_area(current_cubes, dim_index + 1)
      result += curr_dim_size * next_dims_size
 
  return result

我用 C++ 实现了 Bentley 的算法 (O(n^2 log n))。 (我知道你想要 Java,但 C++ 是我工作的主要斧头,考虑到我正在考虑向 Overmars 和 Yap 努力,模板在这里太有用了。)

// Import some basic stuff from the standard library.
#include <algorithm>
#include <cassert>
#include <cmath>
#include <iostream>
#include <limits>
#include <memory>
#include <optional>
#include <random>
#include <tuple>
#include <utility>
#include <vector>

// Define vocabulary types.

class Interval {
 public:
  Interval(double a, double b) : min_(std::fmin(a, b)), max_(std::fmax(a, b)) {}

  double min() const { return min_; }
  double max() const { return max_; }

 private:
  double min_, max_;
};

// Cartesian product of an interval and a set.
template <typename Projection>
class IntervalTimes {
 public:
  IntervalTimes(Interval interval, Projection projection)
      : interval_(interval), projection_(projection) {}

  Interval interval() const { return interval_; }
  Projection projection() const { return projection_; }

 private:
  Interval interval_;
  Projection projection_;
};

// Zero-dimensional base case.
struct Interval0 {};

using Interval1 = IntervalTimes<Interval0>;
using Interval2 = IntervalTimes<Interval1>;
using Interval3 = IntervalTimes<Interval2>;

using Box = Interval3;
Box MakeBox(Interval x, Interval y, Interval z) {
  return IntervalTimes{x, IntervalTimes{y, IntervalTimes{z, Interval0{}}}};
}

// Define basic operations.

double Length(Interval interval) { return interval.max() - interval.min(); }

double Measure(Interval0) { return 1; }

template <typename Projection>
double Measure(IntervalTimes<Projection> product) {
  return Length(product.interval()) * Measure(product.projection());
}

bool Contains(Interval interval, double x) {
  return interval.min() < x && x < interval.max();
}

bool Contains(Interval i1, Interval i2) {
  return i1.min() <= i2.min() && i2.max() <= i1.max();
}

bool Contains(Box box, double x, double y, double z) {
  return Contains(box.interval(), x) &&
         Contains(box.projection().interval(), y) &&
         Contains(box.projection().projection().interval(), z);
}

bool Intersects(Interval i1, Interval i2) {
  return std::fmax(i1.min(), i2.min()) < std::fmin(i1.max(), i2.max());
}

template <typename Projection>
std::vector<Projection> ProjectEach(
    const std::vector<IntervalTimes<Projection>>& objects) {
  std::vector<Projection> projections;
  projections.reserve(objects.size());
  for (const IntervalTimes<Projection>& object : objects) {
    projections.push_back(object.projection());
  }
  return projections;
}

template <typename T>
std::vector<T> Select(const std::vector<bool>& included,
                      const std::vector<T>& objects) {
  std::vector<T> selected;
  for (std::size_t j = 0; j < included.size() && j < objects.size(); j++) {
    if (included[j]) selected.push_back(objects[j]);
  }
  return selected;
}

// Returns the unique x values that appear in objects, sorted.
template <typename Projection>
std::vector<double> Boundaries(
    const std::vector<IntervalTimes<Projection>>& objects) {
  std::vector<double> boundaries;
  boundaries.reserve(objects.size() * 2);
  for (const IntervalTimes<Projection>& object : objects) {
    boundaries.push_back(object.interval().min());
    boundaries.push_back(object.interval().max());
  }
  std::sort(boundaries.begin(), boundaries.end());
  boundaries.erase(std::unique(boundaries.begin(), boundaries.end()),
                   boundaries.end());
  return boundaries;
}

// The basic offline algorithm for d dimensions uses the online algorithm for
// d-1 dimensions. Each object gives rise to two events. We sweep over the
// events, integrating as we go using the online algorithm.

template <typename Object>
class OnlineMeasure {
 public:
  virtual ~OnlineMeasure() = default;

  virtual void Initialize(std::vector<Object>) {}

  // Adds the object at index j in the objects presented to Initialize().
  virtual void Add(std::size_t j) = 0;

  // Removes the object at index j in the objects presented to Initialize().
  virtual void Remove(std::size_t j) = 0;

  // Returns the measure of the union of the objects added but not removed.
  virtual double Measure() const = 0;
};

enum class Side { kMin, kMax };
// {x, side, index}.
using Event = std::tuple<double, Side, std::size_t>;

template <typename Projection>
double OfflineMeasure(const std::vector<IntervalTimes<Projection>>& objects,
                      OnlineMeasure<Projection>& online_measure) {
  // Construct the events and sort them by x with min before max.
  std::vector<Event> events;
  events.reserve(objects.size() * 2);
  for (std::size_t j = 0; j < objects.size(); j++) {
    Interval interval = objects[j].interval();
    events.push_back({interval.min(), Side::kMin, j});
    events.push_back({interval.max(), Side::kMax, j});
  }
  std::sort(events.begin(), events.end());

  // Sweep x to integrate.
  double measure = 0;
  std::optional<double> previous_x;
  online_measure.Initialize(ProjectEach(objects));
  for (const auto& [x, side, j] : events) {
    if (previous_x) measure += (x - *previous_x) * online_measure.Measure();
    previous_x = x;
    switch (side) {
      case Side::kMin:
        online_measure.Add(j);
        break;
      case Side::kMax:
        online_measure.Remove(j);
        break;
    }
  }
  return measure;
}

// We can use the algorithm above as a slow online algorithm.
template <typename Projection>
class OfflineOnlineMeasure : public OnlineMeasure<IntervalTimes<Projection>> {
 public:
  OfflineOnlineMeasure(
      std::unique_ptr<OnlineMeasure<Projection>> online_measure)
      : online_measure_(std::move(online_measure)) {}

  void Initialize(std::vector<IntervalTimes<Projection>> objects) override {
    objects_ = std::move(objects);
    included_.assign(objects_.size(), false);
  }

  void Add(std::size_t j) override { included_.at(j) = true; }

  void Remove(std::size_t j) override { included_.at(j) = false; }

  double Measure() const override {
    return OfflineMeasure(Select(included_, objects_), *online_measure_);
  }

 private:
  std::unique_ptr<OnlineMeasure<Projection>> online_measure_;
  std::vector<bool> included_;
  std::vector<IntervalTimes<Projection>> objects_;
};

// Zero-dimensional base case for Klee's algorithm.
class KleeOnlineMeasure : public OnlineMeasure<Interval0> {
 public:
  void Add(std::size_t) override { multiplicity_++; }
  void Remove(std::size_t) override { multiplicity_--; }
  double Measure() const override { return multiplicity_ > 0 ? 1 : 0; }

 private:
  std::size_t multiplicity_ = 0;
};

double KleeMeasure(const std::vector<Box>& boxes) {
  std::unique_ptr<OnlineMeasure<Interval0>> measure0 =
      std::make_unique<KleeOnlineMeasure>();
  std::unique_ptr<OnlineMeasure<Interval1>> measure1 =
      std::make_unique<OfflineOnlineMeasure<Interval0>>(std::move(measure0));
  OfflineOnlineMeasure<Interval1> measure2(std::move(measure1));
  return OfflineMeasure(boxes, measure2);
}

// The fundamental insight into Bentley's algorithm is a segment tree that
// solves the online problem in one dimension.
class Segment {
 public:
  explicit Segment(Interval interval)
      : left_{nullptr},
        right_{nullptr},
        interval_{interval},
        multiplicity_{0},
        descendant_length_{0} {}

  Segment(std::unique_ptr<Segment> left, std::unique_ptr<Segment> right)
      : left_{std::move(left)},
        right_{std::move(right)},
        interval_{left_->interval_.min(), right_->interval_.max()},
        multiplicity_{0},
        descendant_length_{left_->LengthOfUnion() + right_->LengthOfUnion()} {
    assert(left_->interval_.max() == right_->interval_.min());
  }

  double LengthOfUnion() const {
    return multiplicity_ > 0 ? Length(interval_) : descendant_length_;
  }

  void Add(const Interval i) { Increment(i, 1); }
  void Remove(const Interval i) { Increment(i, -1); }

 private:
  void Increment(const Interval i, std::size_t delta) {
    if (Contains(i, interval_)) {
      multiplicity_ += delta;
    } else if (Intersects(i, interval_)) {
      left_->Increment(i, delta);
      right_->Increment(i, delta);
      descendant_length_ = left_->LengthOfUnion() + right_->LengthOfUnion();
    }
  }

  // Children.
  std::unique_ptr<Segment> left_;
  std::unique_ptr<Segment> right_;
  // Interval.
  Interval interval_;
  // Adds minus removes for this whole segment.
  std::size_t multiplicity_;
  // Total length from proper descendants.
  double descendant_length_;
};

std::unique_ptr<Segment> ConstructSegmentTree(
    const std::vector<double>& boundaries) {
  if (boundaries.empty()) return nullptr;
  std::vector<std::unique_ptr<Segment>> segments;
  segments.reserve(boundaries.size() - 1);
  for (std::size_t j = 1; j < boundaries.size(); j++) {
    segments.push_back(
        std::make_unique<Segment>(Interval{boundaries[j - 1], boundaries[j]}));
  }
  while (segments.size() > 1) {
    std::vector<std::unique_ptr<Segment>> parent_segments;
    parent_segments.reserve(segments.size() / 2 + segments.size() % 2);
    for (std::size_t j = 1; j < segments.size(); j += 2) {
      parent_segments.push_back(std::make_unique<Segment>(
          std::move(segments[j - 1]), std::move(segments[j])));
    }
    if (segments.size() % 2 == 1) {
      parent_segments.push_back(std::move(segments.back()));
    }
    segments = std::move(parent_segments);
  }
  return std::move(segments.front());
}

class BentleyOnlineMeasure : public OnlineMeasure<Interval1> {
 public:
  void Initialize(std::vector<Interval1> intervals) override {
    intervals_ = std::move(intervals);
    root_ = ConstructSegmentTree(Boundaries(intervals_));
  }

  void Add(std::size_t j) override { root_->Add(intervals_.at(j).interval()); }

  void Remove(std::size_t j) override {
    root_->Remove(intervals_.at(j).interval());
  }

  double Measure() const override {
    return root_ != nullptr ? root_->LengthOfUnion() : 0;
  }

 private:
  std::vector<Interval1> intervals_;
  std::unique_ptr<Segment> root_;
};

double BentleyMeasure(const std::vector<Box>& boxes) {
  std::unique_ptr<OnlineMeasure<Interval1>> measure1 =
      std::make_unique<BentleyOnlineMeasure>();
  OfflineOnlineMeasure<Interval1> measure2(std::move(measure1));
  return OfflineMeasure(boxes, measure2);
}

int main() {
  std::default_random_engine gen;
  std::uniform_real_distribution<double> uniform(0, 1);
  std::vector<Box> boxes;
  static constexpr std::size_t kBoxCount = 20;
  boxes.reserve(kBoxCount);
  for (std::size_t j = 0; j < kBoxCount; j++) {
    boxes.push_back(MakeBox({uniform(gen), uniform(gen)},
                            {uniform(gen), uniform(gen)},
                            {uniform(gen), uniform(gen)}));
  }
  std::cout << KleeMeasure(boxes) << "\n";
  std::cout << BentleyMeasure(boxes) << "\n";

  double hits = 0;
  static constexpr std::size_t kSampleCount = 1000000;
  for (std::size_t j = 0; j < kSampleCount; j++) {
    const double x = uniform(gen);
    const double y = uniform(gen);
    const double z = uniform(gen);
    for (const Box& box : boxes) {
      if (Contains(box, x, y, z)) {
        hits++;
        break;
      }
    }
  }
  std::cout << hits / kSampleCount << "\n";
}