在结构上使用 thrust reduce_by_key

Question

我是 CUDA 的新手，我正在尝试对结构应用 reduce_by_key 操作。

struct index_and_loc {
  int index;
  int3 location;
}

我想做的是，我有一个 index_and_loc 的向量，其中元素将具有相同的 index

thrust::host_vector<index_and_loc> my_vector;
my_vector= { {0, {0, 2, 5}},
             {0, {1, 3, 4}},
             {0, {0, 1, 3}},
             {1, {2, 1, 0}},
             {1, {2, 2, 2}}
           }

我想要两个输出，一个是具有相同索引的所有元素的总和，另一个是存储每个索引的实例数量的向量。所以我的结果输出将是：

// Sum all elements with index 0 and 1
sum_vector = {{0, {1, 6, 12}},
              {1, {4, 3, 2}}
             }

instances_vector = {3, // Number of elements with index = 0
                    2} // Number of elements with index = 1

看了推力文档中的方法，我想我应该使用reduce_by_key。我可以声明我的 input1 是 my_vector 并且 input2 是一个 1 的向量，其长度与 input1 相同，我可以减少使用我的结构的索引并使用thrust::plus<int> 作为我的 BinaryFunction 到 1 的向量。然而，这将无法在我的 input1 向量中应用 int3 元素的总和，因为 BinaryFunction 被应用于 input2。

有什么办法可以做到吗？如果我的问题不是很清楚，请告诉我。

编辑：

我重新解决了这个问题并将其归结为更简单的方法。相反，我向 my_vector 添加了一个实例列并将它们设置为 1。现在，使用 talonmies 答案，我可以获得我正在寻找的两个答案。这是我的代码

#include <thrust/extrema.h>
#include <thrust/reduce.h>
#include <thrust/execution_policy.h>
#include <thrust/functional.h>

struct index_and_loc {
  int index;
  int3 location;
  int instance;

  index_and_loc() = default;

  __host__ __device__
  index_and_loc(int index_, int3 location_, int instance_) :
  index(index_), instance(instance_) {
    location.x = location_.x; 
    location.y = location_.y;
    location.z = location_.z;
  };

  __host__ __device__
  index_and_loc& operator=(const index_and_loc& y) {
    index = y.index;
    location.x = y.location.x;
    location.y = y.location.y;
    location.z = y.location.z;
    instance = y.instance;
    return *this;
  };

  __host__ __device__
  bool operator==(const index_and_loc& y) const {
    return index == y.index;
  };

  __host__ __device__
  index_and_loc operator+(const index_and_loc& y) const {
    return index_and_loc(index,
      make_int3(location.x + y.location.x, 
                location.y + y.location.y, 
                location.z + y.location.z),
      instance + y.instance);
  };

};

int main() {

  int num_points = 5;
  thrust::device_vector<index_and_loc> my_vector(num_points);
  my_vector.push_back(index_and_loc(0, {0, 2, 5}, 1));
  my_vector.push_back(index_and_loc(0, {1, 3, 4}, 1));
  my_vector.push_back(index_and_loc(0, {0, 1, 3}, 1));
  my_vector.push_back(index_and_loc(1, {2, 1, 0}, 1));
  my_vector.push_back(index_and_loc(1, {2, 2, 2}, 1));

  thrust::device_vector<index_and_loc> same_idx(num_points);
  thrust::device_vector<index_and_loc> sum_locs(num_points);

  thrust::equal_to<index_and_loc> pred;
  thrust::plus<index_and_loc> op;

  auto res = thrust::reduce_by_key(
    thrust::device,
    my_vector.begin(),
    my_vector.end(),
    my_vector.begin(),
    same_idx.begin(),
    sum_locs.begin(),
    pred,
    op);

  for(int i=0; i<2; i++) {
    index_and_loc y = same_idx[i];
    index_and_loc x = sum_locs[i];
    std::cout << y.index << " {" << x.location.x 
                         << " " << x.location.y 
                         << " " << x.location.z 
                         << "}, " << x.instance << std::endl;
  }

  return 0;
}

运行这实际上会给我

0 {1 6 12}, 3
1 {4 3 2}, 2

Answer 1

有很多方法可以做你想做的事。我不知道什么会有效，什么不会有效，由你决定。

一种方法是在 class 中定义必要的比较和加法运算符，然后将包装在预先固定的推力二元函子中的那些运算符用于 reduce_by_key 所需的二元运算符和谓词称呼。例如：

#include <thrust/functional.h>
#include <thrust/host_vector.h>
#include <thrust/device_vector.h>
#include <iostream>

struct index_and_loc {
  int index;
  int3 location;

  index_and_loc() = default;

  __host__ __device__
  index_and_loc(int index_, int3 location_) {
      index = index_;
      location.x = location_.x;
      location.y = location_.y;
      location.z = location_.z;
  };

  __host__ __device__
  index_and_loc& operator=(const index_and_loc& y) {
      index = y.index;
      location.x = y.location.x;
      location.y = y.location.y;
      location.z = y.location.z;
      return *this;
  };

  __host__ __device__
  bool operator==(const index_and_loc& y) const {
      return index == y.index;
  };

  __host__ __device__
  index_and_loc operator+(const index_and_loc& y) const {
      return index_and_loc(index, make_int3(location.x + y.location.x, 
                                            location.y + y.location.y, 
                                            location.z + y.location.z));
  };
};

int main()
{
    thrust::host_vector<index_and_loc> my_vector(5); 
    my_vector[0] =  {0, {0, 2, 5}};
    my_vector[1] =  {0, {1, 3, 4}};
    my_vector[2] =  {0, {0, 1, 3}};
    my_vector[3] =  {1, {2, 1, 0}};
    my_vector[4] =  {1, {2, 2, 2}};

    thrust::device_vector<index_and_loc> d_vector = my_vector; 
    thrust::device_vector<index_and_loc> keys_out(5);
    thrust::device_vector<index_and_loc> data_out(5);

    thrust::equal_to<index_and_loc> pred;
    thrust::plus<index_and_loc> op;

    auto res = thrust::reduce_by_key(
            d_vector.begin(),
            d_vector.end(),
            d_vector.begin(),
            keys_out.begin(),
            data_out.begin(),
            pred,
            op);

    for(int i=0; i<2; i++) {
        index_and_loc y = keys_out[i];
        index_and_loc x = data_out[i];
        std::cout << y.index << " {" << x.location.x 
                             << " " << x.location.y 
                             << " " << x.location.z 
                             << "}" << std::endl;
    }

    return 0;
}

它们的关键是正确定义谓词 operator== 和归约 operator+。剩下的只是复制赋值和构造所需要的。

这似乎符合您的要求：

$ nvcc -arch=sm_52 -std=c++11 -o improbable improbable.cu
$ ./improbable 
0 {1 6 12}
1 {4 3 2}

请注意，因为 thrust 期望键和数据是单独的迭代器，所以您必须为减少的键和数据使用单独的输出迭代器。这意味着您有效地获得了解决方案的两个副本。这是否重要由您决定。

在结构上使用 thrust reduce_by_key

Use thrust reduce_by_key on a struct

cuda

thrust