如何对使用 map in thrust 选择的点进行两个 device_vectors 的加权平均?
How to do weighted average of two device_vectors with points selected using map in thrust?
我有两个 device_vector P & Q(比如说 100 码)。
我有两个用于 P & Q 的 device_vector 地图(MapP & MapQ,大小为 10),其中包含要从 P & Q 中选择的点的索引。
我有一个 device_vector D 表示体重。
我需要计算 (P*D+Q)/(D+1) P & Q 中使用各自地图选择的所有点。
我的方法如下。它有效,但太麻烦了。 谁能提出更好的方法?
#include <thrust/device_vector.h>
#include <thrust/random.h>
#include <thrust/sequence.h>
#include <thrust/execution_policy.h>
#include <thrust/transform.h>
#include <thrust/functional.h>
#include <thrust/iterator/transform_iterator.h>
#include <thrust/iterator/permutation_iterator.h>
thrust::device_vector<float> random_vector(const size_t N,
unsigned int seed = thrust::default_random_engine::default_seed)
{
thrust::default_random_engine rng(seed);
thrust::uniform_real_distribution<float> u01(0.0f, 10.0f);
thrust::device_vector<float> temp(N);
for(size_t i = 0; i < N; i++) {
temp[i] = u01(rng);
}
return temp;
}
// note: functor inherits from unary_function
struct increment : public thrust::unary_function<int,int>
{
__host__ __device__
int operator()(int x) const
{
return x + 1;
}
};
int main(int argc, char * argv[])
{
int N=atoi(argv[1]);
thrust::device_vector<float> P = random_vector(N,1);
thrust::device_vector<float> Q = random_vector(N,9);
thrust::device_vector<int> D(N);
thrust::sequence(thrust::device, D.begin(), D.begin() + N, 1);
thrust::device_vector<float> temp(10);
thrust::device_vector<int> MapP(10); // map
thrust::device_vector<int> MapQ(10); // map
MapP[0]=0;MapP[1]=5;MapP[2]=4;MapP[3]=2;MapP[4]=7;MapP[5]=1;MapP[6]=9;MapP[7]=3;MapP[8]=6;MapP[9]=8;
MapQ[0]=10;MapQ[1]=15;MapQ[2]=12;MapQ[3]=14;MapQ[4]=11;MapQ[5]=17;MapQ[6]=13;MapQ[7]=19;MapQ[8]=18;MapQ[9]=16;
// The weighted average is (D*P+Q)/(D+1)
// We compute D*P first
//thrust::transform(thrust::device, P.begin(), P.end(), D.begin(), temp.begin(), thrust::multiplies<float>()); // use permutation iterator
thrust::transform(thrust::device, thrust::make_permutation_iterator(P.begin(),MapP.begin()),
thrust::make_permutation_iterator(P.end(),MapP.end()),
thrust::make_permutation_iterator(D.begin(),MapP.begin()),
temp.begin(), thrust::multiplies<float>());
// Then we add D*p to Q
//thrust::transform(thrust::device, temp.begin(), temp.end(), Q.begin(), temp.begin(), thrust::plus<float>()); // use permutation iterator
thrust::transform(thrust::device, temp.begin(), temp.end(),
thrust::make_permutation_iterator(Q.begin(),MapQ.begin()),
temp.begin(), thrust::plus<float>());
// Then we divide by D+1
//thrust::transform(thrust::device, temp.begin(), temp.end(), thrust::make_transform_iterator(D.begin(), increment()), temp.begin(), thrust::divides<float>());
thrust::transform(thrust::device, temp.begin(), temp.end(),
thrust::make_permutation_iterator(D.begin(),MapP.begin()),
temp.begin(), thrust::divides<float>());
// replace contents of P with the weighted sum using pts in map M
thrust::copy(thrust::device, temp.begin(), temp.end(), thrust::make_permutation_iterator(P.begin(),MapP.begin())); // use permutation iterator
return 0;
}
我假设您希望对向量进行逐元素操作,因为这是您提供的演示代码的行为。
请注意,在传递置换迭代器的末尾时,我们不使用源向量的末尾:
thrust::make_permutation_iterator(P.end(),MapP.end()),
^^^^^
而是开头:
thrust::make_permutation_iterator(P.begin(),MapP.end()),
有关此示例,请参阅 thrust quick start guide。
另请注意,在您的问题和代码中,您都提到除以 D+1,但您的代码实际上是除以 D,而不是 D+1。
关于您的问题,只要使用适当定义的函子调用 thrust::transform
即可完成所有操作。由于在这个实现中需要给thrust::transform
传递多个向量,所以引入thrust::zip_iterator
$ cat t332.cu
#include <thrust/device_vector.h>
#include <thrust/random.h>
#include <thrust/sequence.h>
#include <thrust/execution_policy.h>
#include <thrust/transform.h>
#include <thrust/functional.h>
#include <thrust/iterator/transform_iterator.h>
#include <thrust/iterator/permutation_iterator.h>
#include <thrust/iterator/zip_iterator.h>
thrust::device_vector<float> random_vector(const size_t N,
unsigned int seed = thrust::default_random_engine::default_seed)
{
thrust::default_random_engine rng(seed);
thrust::uniform_real_distribution<float> u01(0.0f, 10.0f);
thrust::device_vector<float> temp(N);
for(size_t i = 0; i < N; i++) {
temp[i] = u01(rng);
}
return temp;
}
// The weighted average is (D*P+Q)/(D+1)
struct w_avg
{
template <typename T>
__host__ __device__
float operator()(T x) const
{
return (thrust::get<0>(x)*thrust::get<1>(x)+thrust::get<2>(x))/(thrust::get<1>(x)+1.0f);
}
};
int main(int argc, char * argv[])
{
int N=atoi(argv[1]);
thrust::device_vector<float> P = random_vector(N,1);
thrust::device_vector<float> Q = random_vector(N,9);
thrust::device_vector<int> D(N);
thrust::sequence(thrust::device, D.begin(), D.begin() + N, 1);
thrust::device_vector<int> MapP(10); // map
thrust::device_vector<int> MapQ(10); // map
MapP[0]=0;MapP[1]=5;MapP[2]=4;MapP[3]=2;MapP[4]=7;MapP[5]=1;MapP[6]=9;MapP[7]=3;MapP[8]=6;MapP[9]=8;
MapQ[0]=10;MapQ[1]=15;MapQ[2]=12;MapQ[3]=14;MapQ[4]=11;MapQ[5]=17;MapQ[6]=13;MapQ[7]=19;MapQ[8]=18;MapQ[9]=16;
// The weighted average is (D*P+Q)/(D+1)
thrust::transform(thrust::device, thrust::make_zip_iterator(thrust::make_tuple(
thrust::make_permutation_iterator(P.begin(),MapP.begin()),
thrust::make_permutation_iterator(D.begin(),MapP.begin()),
thrust::make_permutation_iterator(Q.begin(),MapQ.begin()))),
thrust::make_zip_iterator(thrust::make_tuple(
thrust::make_permutation_iterator(P.begin(),MapP.end()),
thrust::make_permutation_iterator(D.begin(),MapP.end()),
thrust::make_permutation_iterator(Q.begin(),MapQ.end()))),
thrust::make_permutation_iterator(P.begin(),MapP.begin()),
w_avg());
for (int i = 0; i < 5; i++) {
std::cout << P[i] << std::endl;}
return 0;
}
$ nvcc -o t332 t332.cu
$ ./t332 100
4.02976
3.75275
5.32832
8.53189
8.46641
$
注意上面代码中的函子除以D+1。将其改为除以 D 以匹配您的代码(但不是您声明的意图)是微不足道的。
我有两个 device_vector P & Q(比如说 100 码)。 我有两个用于 P & Q 的 device_vector 地图(MapP & MapQ,大小为 10),其中包含要从 P & Q 中选择的点的索引。 我有一个 device_vector D 表示体重。
我需要计算 (P*D+Q)/(D+1) P & Q 中使用各自地图选择的所有点。
我的方法如下。它有效,但太麻烦了。 谁能提出更好的方法?
#include <thrust/device_vector.h>
#include <thrust/random.h>
#include <thrust/sequence.h>
#include <thrust/execution_policy.h>
#include <thrust/transform.h>
#include <thrust/functional.h>
#include <thrust/iterator/transform_iterator.h>
#include <thrust/iterator/permutation_iterator.h>
thrust::device_vector<float> random_vector(const size_t N,
unsigned int seed = thrust::default_random_engine::default_seed)
{
thrust::default_random_engine rng(seed);
thrust::uniform_real_distribution<float> u01(0.0f, 10.0f);
thrust::device_vector<float> temp(N);
for(size_t i = 0; i < N; i++) {
temp[i] = u01(rng);
}
return temp;
}
// note: functor inherits from unary_function
struct increment : public thrust::unary_function<int,int>
{
__host__ __device__
int operator()(int x) const
{
return x + 1;
}
};
int main(int argc, char * argv[])
{
int N=atoi(argv[1]);
thrust::device_vector<float> P = random_vector(N,1);
thrust::device_vector<float> Q = random_vector(N,9);
thrust::device_vector<int> D(N);
thrust::sequence(thrust::device, D.begin(), D.begin() + N, 1);
thrust::device_vector<float> temp(10);
thrust::device_vector<int> MapP(10); // map
thrust::device_vector<int> MapQ(10); // map
MapP[0]=0;MapP[1]=5;MapP[2]=4;MapP[3]=2;MapP[4]=7;MapP[5]=1;MapP[6]=9;MapP[7]=3;MapP[8]=6;MapP[9]=8;
MapQ[0]=10;MapQ[1]=15;MapQ[2]=12;MapQ[3]=14;MapQ[4]=11;MapQ[5]=17;MapQ[6]=13;MapQ[7]=19;MapQ[8]=18;MapQ[9]=16;
// The weighted average is (D*P+Q)/(D+1)
// We compute D*P first
//thrust::transform(thrust::device, P.begin(), P.end(), D.begin(), temp.begin(), thrust::multiplies<float>()); // use permutation iterator
thrust::transform(thrust::device, thrust::make_permutation_iterator(P.begin(),MapP.begin()),
thrust::make_permutation_iterator(P.end(),MapP.end()),
thrust::make_permutation_iterator(D.begin(),MapP.begin()),
temp.begin(), thrust::multiplies<float>());
// Then we add D*p to Q
//thrust::transform(thrust::device, temp.begin(), temp.end(), Q.begin(), temp.begin(), thrust::plus<float>()); // use permutation iterator
thrust::transform(thrust::device, temp.begin(), temp.end(),
thrust::make_permutation_iterator(Q.begin(),MapQ.begin()),
temp.begin(), thrust::plus<float>());
// Then we divide by D+1
//thrust::transform(thrust::device, temp.begin(), temp.end(), thrust::make_transform_iterator(D.begin(), increment()), temp.begin(), thrust::divides<float>());
thrust::transform(thrust::device, temp.begin(), temp.end(),
thrust::make_permutation_iterator(D.begin(),MapP.begin()),
temp.begin(), thrust::divides<float>());
// replace contents of P with the weighted sum using pts in map M
thrust::copy(thrust::device, temp.begin(), temp.end(), thrust::make_permutation_iterator(P.begin(),MapP.begin())); // use permutation iterator
return 0;
}
我假设您希望对向量进行逐元素操作,因为这是您提供的演示代码的行为。
请注意,在传递置换迭代器的末尾时,我们不使用源向量的末尾:
thrust::make_permutation_iterator(P.end(),MapP.end()),
^^^^^
而是开头:
thrust::make_permutation_iterator(P.begin(),MapP.end()),
有关此示例,请参阅 thrust quick start guide。
另请注意,在您的问题和代码中,您都提到除以 D+1,但您的代码实际上是除以 D,而不是 D+1。
关于您的问题,只要使用适当定义的函子调用 thrust::transform
即可完成所有操作。由于在这个实现中需要给thrust::transform
传递多个向量,所以引入thrust::zip_iterator
$ cat t332.cu
#include <thrust/device_vector.h>
#include <thrust/random.h>
#include <thrust/sequence.h>
#include <thrust/execution_policy.h>
#include <thrust/transform.h>
#include <thrust/functional.h>
#include <thrust/iterator/transform_iterator.h>
#include <thrust/iterator/permutation_iterator.h>
#include <thrust/iterator/zip_iterator.h>
thrust::device_vector<float> random_vector(const size_t N,
unsigned int seed = thrust::default_random_engine::default_seed)
{
thrust::default_random_engine rng(seed);
thrust::uniform_real_distribution<float> u01(0.0f, 10.0f);
thrust::device_vector<float> temp(N);
for(size_t i = 0; i < N; i++) {
temp[i] = u01(rng);
}
return temp;
}
// The weighted average is (D*P+Q)/(D+1)
struct w_avg
{
template <typename T>
__host__ __device__
float operator()(T x) const
{
return (thrust::get<0>(x)*thrust::get<1>(x)+thrust::get<2>(x))/(thrust::get<1>(x)+1.0f);
}
};
int main(int argc, char * argv[])
{
int N=atoi(argv[1]);
thrust::device_vector<float> P = random_vector(N,1);
thrust::device_vector<float> Q = random_vector(N,9);
thrust::device_vector<int> D(N);
thrust::sequence(thrust::device, D.begin(), D.begin() + N, 1);
thrust::device_vector<int> MapP(10); // map
thrust::device_vector<int> MapQ(10); // map
MapP[0]=0;MapP[1]=5;MapP[2]=4;MapP[3]=2;MapP[4]=7;MapP[5]=1;MapP[6]=9;MapP[7]=3;MapP[8]=6;MapP[9]=8;
MapQ[0]=10;MapQ[1]=15;MapQ[2]=12;MapQ[3]=14;MapQ[4]=11;MapQ[5]=17;MapQ[6]=13;MapQ[7]=19;MapQ[8]=18;MapQ[9]=16;
// The weighted average is (D*P+Q)/(D+1)
thrust::transform(thrust::device, thrust::make_zip_iterator(thrust::make_tuple(
thrust::make_permutation_iterator(P.begin(),MapP.begin()),
thrust::make_permutation_iterator(D.begin(),MapP.begin()),
thrust::make_permutation_iterator(Q.begin(),MapQ.begin()))),
thrust::make_zip_iterator(thrust::make_tuple(
thrust::make_permutation_iterator(P.begin(),MapP.end()),
thrust::make_permutation_iterator(D.begin(),MapP.end()),
thrust::make_permutation_iterator(Q.begin(),MapQ.end()))),
thrust::make_permutation_iterator(P.begin(),MapP.begin()),
w_avg());
for (int i = 0; i < 5; i++) {
std::cout << P[i] << std::endl;}
return 0;
}
$ nvcc -o t332 t332.cu
$ ./t332 100
4.02976
3.75275
5.32832
8.53189
8.46641
$
注意上面代码中的函子除以D+1。将其改为除以 D 以匹配您的代码(但不是您声明的意图)是微不足道的。