嵌套 thrust::fill 不适用于不同的输入值

Question

我已经测试了用数组 "a"

的每个元素填充数组 "c" 的最小测试代码

表明当用常量输入调用嵌套thrust::fill时，它正确地填充了这个输入值的输入数组。

但是，如果输入值是变化的值，即值数组的每个元素，它可能只用一个（第一个或最后一个）值填充输入数组

#include <thrust/inner_product.h>
#include <thrust/functional.h>
#include <thrust/device_vector.h>
#include <thrust/random.h>

#include <thrust/execution_policy.h>

#include <iostream>
#include <cmath>
#include <boost/concept_check.hpp>

struct bFuntor 
{
    bFuntor(int* av__, int* cv__, const int& N__) : av_(av__), cv_(cv__), N_(N__) {};

    __host__ __device__
    int operator()(const int& idx)
    {
      thrust::device_ptr<int> cv_dpt = thrust::device_pointer_cast(cv_);
      thrust::device_ptr<int> cv_dpt1 = thrust::device_pointer_cast(cv_+N_);

      thrust::detail::normal_iterator<thrust::device_ptr<int>> c0 = thrust::detail::make_normal_iterator<thrust::device_ptr<int>>(cv_dpt);
      thrust::detail::normal_iterator<thrust::device_ptr<int>> c1 = thrust::detail::make_normal_iterator<thrust::device_ptr<int>>(cv_dpt1);

      // ** this thrust::fill with varied values does not work
      thrust::fill(thrust::device,c0,c1,av_[idx]);

      // ** this thrust::fill with constant works
//       thrust::fill(thrust::device,c0,c1,10);

      printf("fill result:\n");
      for (int i=0; i<N_; i++)
        printf("fill value: %d -> return value: %d \n",av_[idx],cv_[i]);
      printf("\n");

      return cv_dpt[idx];
    }

    int* av_;
    int* cv_;
    int N_;
};

int main(void)
{
      int N = 2;
      std::vector<int> av = {0,1};
      std::vector<int> cv = {-1,-2};

      thrust::device_vector<int> av_d(N);
      thrust::device_vector<int> cv_d(N);
      av_d = av; cv_d = cv; 

      // call with nested manner
      thrust::transform(thrust::counting_iterator<int>(0),
            thrust::counting_iterator<int>(N),
            cv_d.begin(),
            bFuntor(thrust::raw_pointer_cast(av_d.data()),
            thrust::raw_pointer_cast(cv_d.data()),
                  N));    

      return 0;
}

不同输入值的输出情况：

fill result:
fill value: 0 -> return value: 1 
fill value: 1 -> return value: 1 
fill value: 0 -> return value: 1 
fill value: 1 -> return value: 1

常量输入值输出情况：

fill result:
fill value: 10 -> return value: 10 
fill value: 10 -> return value: 10 
fill value: 10 -> return value: 10 
fill value: 10 -> return value: 10

是这个推力的问题吗？或者它不应该这样使用？

Answer 1

这是一个数据竞争的例子：

int operator()(const int& idx)
{
  thrust::device_ptr<int> cv_dpt = thrust::device_pointer_cast(cv_);
  thrust::device_ptr<int> cv_dpt1 = thrust::device_pointer_cast(cv_+N_);

  thrust::detail::normal_iterator<thrust::device_ptr<int>> c0 = thrust::detail::make_normal_iterator<thrust::device_ptr<int>>(cv_dpt);
  thrust::detail::normal_iterator<thrust::device_ptr<int>> c1 = thrust::detail::make_normal_iterator<thrust::device_ptr<int>>(cv_dpt1);


  thrust::fill(thrust::device,c0,c1,av_[idx]);

  //.....
}

在这里，每次调用仿函数都会尝试用不同的值填充相同的迭代器范围（c0 到 c1）。显然，当多个仿函数调用并行发生时，这会产生问题。

嵌套 thrust::fill 不适用于不同的输入值

nested thrust::fill does not work for varied input values

cuda

gpu

thrust