嵌套 thrust::fill 不适用于不同的输入值
nested thrust::fill does not work for varied input values
我已经测试了用数组 "a"
的每个元素填充数组 "c" 的最小测试代码
表明当用常量输入调用嵌套thrust::fill时,它正确地填充了这个输入值的输入数组。
但是,如果输入值是变化的值,即值数组的每个元素,它可能只用一个(第一个或最后一个)值填充输入数组
#include <thrust/inner_product.h>
#include <thrust/functional.h>
#include <thrust/device_vector.h>
#include <thrust/random.h>
#include <thrust/execution_policy.h>
#include <iostream>
#include <cmath>
#include <boost/concept_check.hpp>
struct bFuntor
{
bFuntor(int* av__, int* cv__, const int& N__) : av_(av__), cv_(cv__), N_(N__) {};
__host__ __device__
int operator()(const int& idx)
{
thrust::device_ptr<int> cv_dpt = thrust::device_pointer_cast(cv_);
thrust::device_ptr<int> cv_dpt1 = thrust::device_pointer_cast(cv_+N_);
thrust::detail::normal_iterator<thrust::device_ptr<int>> c0 = thrust::detail::make_normal_iterator<thrust::device_ptr<int>>(cv_dpt);
thrust::detail::normal_iterator<thrust::device_ptr<int>> c1 = thrust::detail::make_normal_iterator<thrust::device_ptr<int>>(cv_dpt1);
// ** this thrust::fill with varied values does not work
thrust::fill(thrust::device,c0,c1,av_[idx]);
// ** this thrust::fill with constant works
// thrust::fill(thrust::device,c0,c1,10);
printf("fill result:\n");
for (int i=0; i<N_; i++)
printf("fill value: %d -> return value: %d \n",av_[idx],cv_[i]);
printf("\n");
return cv_dpt[idx];
}
int* av_;
int* cv_;
int N_;
};
int main(void)
{
int N = 2;
std::vector<int> av = {0,1};
std::vector<int> cv = {-1,-2};
thrust::device_vector<int> av_d(N);
thrust::device_vector<int> cv_d(N);
av_d = av; cv_d = cv;
// call with nested manner
thrust::transform(thrust::counting_iterator<int>(0),
thrust::counting_iterator<int>(N),
cv_d.begin(),
bFuntor(thrust::raw_pointer_cast(av_d.data()),
thrust::raw_pointer_cast(cv_d.data()),
N));
return 0;
}
不同输入值的输出情况:
fill result:
fill value: 0 -> return value: 1
fill value: 1 -> return value: 1
fill value: 0 -> return value: 1
fill value: 1 -> return value: 1
常量输入值输出情况:
fill result:
fill value: 10 -> return value: 10
fill value: 10 -> return value: 10
fill value: 10 -> return value: 10
fill value: 10 -> return value: 10
是这个推力的问题吗?或者它不应该这样使用?
这是一个数据竞争的例子:
int operator()(const int& idx)
{
thrust::device_ptr<int> cv_dpt = thrust::device_pointer_cast(cv_);
thrust::device_ptr<int> cv_dpt1 = thrust::device_pointer_cast(cv_+N_);
thrust::detail::normal_iterator<thrust::device_ptr<int>> c0 = thrust::detail::make_normal_iterator<thrust::device_ptr<int>>(cv_dpt);
thrust::detail::normal_iterator<thrust::device_ptr<int>> c1 = thrust::detail::make_normal_iterator<thrust::device_ptr<int>>(cv_dpt1);
thrust::fill(thrust::device,c0,c1,av_[idx]);
//.....
}
在这里,每次调用仿函数都会尝试用不同的值填充相同的迭代器范围(c0 到 c1)。显然,当多个仿函数调用并行发生时,这会产生问题。
我已经测试了用数组 "a"
的每个元素填充数组 "c" 的最小测试代码表明当用常量输入调用嵌套thrust::fill时,它正确地填充了这个输入值的输入数组。
但是,如果输入值是变化的值,即值数组的每个元素,它可能只用一个(第一个或最后一个)值填充输入数组
#include <thrust/inner_product.h>
#include <thrust/functional.h>
#include <thrust/device_vector.h>
#include <thrust/random.h>
#include <thrust/execution_policy.h>
#include <iostream>
#include <cmath>
#include <boost/concept_check.hpp>
struct bFuntor
{
bFuntor(int* av__, int* cv__, const int& N__) : av_(av__), cv_(cv__), N_(N__) {};
__host__ __device__
int operator()(const int& idx)
{
thrust::device_ptr<int> cv_dpt = thrust::device_pointer_cast(cv_);
thrust::device_ptr<int> cv_dpt1 = thrust::device_pointer_cast(cv_+N_);
thrust::detail::normal_iterator<thrust::device_ptr<int>> c0 = thrust::detail::make_normal_iterator<thrust::device_ptr<int>>(cv_dpt);
thrust::detail::normal_iterator<thrust::device_ptr<int>> c1 = thrust::detail::make_normal_iterator<thrust::device_ptr<int>>(cv_dpt1);
// ** this thrust::fill with varied values does not work
thrust::fill(thrust::device,c0,c1,av_[idx]);
// ** this thrust::fill with constant works
// thrust::fill(thrust::device,c0,c1,10);
printf("fill result:\n");
for (int i=0; i<N_; i++)
printf("fill value: %d -> return value: %d \n",av_[idx],cv_[i]);
printf("\n");
return cv_dpt[idx];
}
int* av_;
int* cv_;
int N_;
};
int main(void)
{
int N = 2;
std::vector<int> av = {0,1};
std::vector<int> cv = {-1,-2};
thrust::device_vector<int> av_d(N);
thrust::device_vector<int> cv_d(N);
av_d = av; cv_d = cv;
// call with nested manner
thrust::transform(thrust::counting_iterator<int>(0),
thrust::counting_iterator<int>(N),
cv_d.begin(),
bFuntor(thrust::raw_pointer_cast(av_d.data()),
thrust::raw_pointer_cast(cv_d.data()),
N));
return 0;
}
不同输入值的输出情况:
fill result:
fill value: 0 -> return value: 1
fill value: 1 -> return value: 1
fill value: 0 -> return value: 1
fill value: 1 -> return value: 1
常量输入值输出情况:
fill result:
fill value: 10 -> return value: 10
fill value: 10 -> return value: 10
fill value: 10 -> return value: 10
fill value: 10 -> return value: 10
是这个推力的问题吗?或者它不应该这样使用?
这是一个数据竞争的例子:
int operator()(const int& idx)
{
thrust::device_ptr<int> cv_dpt = thrust::device_pointer_cast(cv_);
thrust::device_ptr<int> cv_dpt1 = thrust::device_pointer_cast(cv_+N_);
thrust::detail::normal_iterator<thrust::device_ptr<int>> c0 = thrust::detail::make_normal_iterator<thrust::device_ptr<int>>(cv_dpt);
thrust::detail::normal_iterator<thrust::device_ptr<int>> c1 = thrust::detail::make_normal_iterator<thrust::device_ptr<int>>(cv_dpt1);
thrust::fill(thrust::device,c0,c1,av_[idx]);
//.....
}
在这里,每次调用仿函数都会尝试用不同的值填充相同的迭代器范围(c0 到 c1)。显然,当多个仿函数调用并行发生时,这会产生问题。