推力:并行计算多个段的set_difference
Thrust: Computing set_difference of multiple segments in parallel
我已经编写了一些代码来使用推力计算每段 set_difference。这个想法是使用额外的数组来指示哪个元素属于哪个段,以及一个自定义比较器。
这会产生正确的输出集,但如果输入段为空,则输出段大小是错误的。 Reduce-by-key 用于使用每个元素的输出段标识符来计算每个段的元素数。如果输入段为空,则相应的段 id 不会出现在输出段 id 中,这会导致错误的结果。
正确:
segmentsizesLeft {4,3}
segmentIdsLeft {0,0,0,0,1,1,1}
outputsegmentIds {0,0,0,1}
outputsegmentsizes {3,1}
错误:
segmentsizesLeft {0,3}
segmentIdsLeft {1,1,1}
outputsegmentIds {1}
outputsegmentsizes {1, uninitialized} //should be {0,1}
如何解决?
#include <thrust/set_operations.h>
#include <thrust/iterator/zip_iterator.h>
#include <thrust/tuple.h>
#include <thrust/reduce.h>
#include <thrust/iterator/constant_iterator.h>
#include <thrust/iterator/discard_iterator.h>
#include <iostream>
int main(){
//compute set_difference(left, right) , per segment
//output: contiguous range of remaining elements, array of output segment sizes
#if 1
//correct result
int numLeft = 7;
int dataLeft[7]{0,1,2,3, 1,3,4};
int segmentsizesLeft[2]{4,3};
int psLeft[3]{0,4,7};
int segmentIdsLeft[7]{0,0,0,0,1,1,1};
#else
//wrong result. empty input segment is no longer present in output segment sizes
int numLeft = 3;
int dataLeft[3]{1,3,4};
int segmentsizesLeft[2]{0,3};
int psLeft[3]{0,0,3};
int segmentIdsLeft[3]{1,1,1};
#endif
int dataRight[7]{2, 3,4};
int segmentsizesRight[2]{1,2};
int psRight[3]{0,1,3};
int segmentIdsRight[3]{0,1,1};
auto first1 = thrust::make_zip_iterator(thrust::make_tuple(&segmentIdsLeft[0], &dataLeft[0]));
auto last1 = thrust::make_zip_iterator(thrust::make_tuple(&segmentIdsLeft[0] + numLeft, &dataLeft[0] + numLeft));
auto first2 = thrust::make_zip_iterator(thrust::make_tuple(&segmentIdsRight[0], &dataRight[0]));
auto last2 = thrust::make_zip_iterator(thrust::make_tuple(&segmentIdsRight[0] + 3, &dataRight[0] + 3));
int segmentIdsOutput[7];
int dataOutput[7];
int segmentsizesOutput[2];
auto output = thrust::make_zip_iterator(thrust::make_tuple(&segmentIdsOutput[0], &dataOutput[0]));
auto comp = [](const auto& t1, const auto& t2){
const int idl = thrust::get<0>(t1);
const int idr = thrust::get<0>(t2);
if(idl < idr) return true;
if(idl > idr) return false;
return thrust::get<1>(t1) < thrust::get<1>(t2);
};
auto outputend = thrust::set_difference(first1, last1, first2, last2, output, comp);
int outputsize = thrust::distance(output, outputend);
thrust::reduce_by_key(&segmentIdsOutput[0], &segmentIdsOutput[0] + outputsize,
thrust::make_constant_iterator(1),
thrust::make_discard_iterator(), &segmentsizesOutput[0]
);
std::cerr << "raw data output: ";
for(int i = 0; i < outputsize; i++){
std::cerr << " " << dataOutput[i];
}
std::cerr << "\n";
std::cerr << "result segment sizes: ";
for(int i = 0; i < 2; i++){
std::cerr << " " << segmentsizesOutput[i];
}
std::cerr << "\n";
}
以下方法现在有效。不是丢弃唯一的段 ID 并将减少的值存储在输出数组中,而是将两个输出都保存到临时存储中。然后,将输出数组初始化为零以说明任何空段,并使用键值对将段大小设置在正确的位置。
int uniqueIds[7];
int reducedCounts[7];
int numUnique = thrust::distance(
&uniqueIds[0],
thrust::reduce_by_key(
&segmentIdsOutput[0],
&segmentIdsOutput[0] + outputsize,
thrust::make_constant_iterator(1),
&uniqueIds[0],
&reducedCounts[0]
).first
);
thrust::fill(&segmentsizesOutput[0], &segmentsizesOutput[0] + 2, 0);
thrust::for_each_n(
thrust::make_zip_iterator(thrust::make_tuple(&uniqueIds[0], &reducedCounts[0])),
numUnique,
[&] (auto tup){
segmentsizesOutput[thrust::get<0>(tup)] = thrust::get<1>(tup);
}
);
我已经编写了一些代码来使用推力计算每段 set_difference。这个想法是使用额外的数组来指示哪个元素属于哪个段,以及一个自定义比较器。 这会产生正确的输出集,但如果输入段为空,则输出段大小是错误的。 Reduce-by-key 用于使用每个元素的输出段标识符来计算每个段的元素数。如果输入段为空,则相应的段 id 不会出现在输出段 id 中,这会导致错误的结果。
正确:
segmentsizesLeft {4,3}
segmentIdsLeft {0,0,0,0,1,1,1}
outputsegmentIds {0,0,0,1}
outputsegmentsizes {3,1}
错误:
segmentsizesLeft {0,3}
segmentIdsLeft {1,1,1}
outputsegmentIds {1}
outputsegmentsizes {1, uninitialized} //should be {0,1}
如何解决?
#include <thrust/set_operations.h>
#include <thrust/iterator/zip_iterator.h>
#include <thrust/tuple.h>
#include <thrust/reduce.h>
#include <thrust/iterator/constant_iterator.h>
#include <thrust/iterator/discard_iterator.h>
#include <iostream>
int main(){
//compute set_difference(left, right) , per segment
//output: contiguous range of remaining elements, array of output segment sizes
#if 1
//correct result
int numLeft = 7;
int dataLeft[7]{0,1,2,3, 1,3,4};
int segmentsizesLeft[2]{4,3};
int psLeft[3]{0,4,7};
int segmentIdsLeft[7]{0,0,0,0,1,1,1};
#else
//wrong result. empty input segment is no longer present in output segment sizes
int numLeft = 3;
int dataLeft[3]{1,3,4};
int segmentsizesLeft[2]{0,3};
int psLeft[3]{0,0,3};
int segmentIdsLeft[3]{1,1,1};
#endif
int dataRight[7]{2, 3,4};
int segmentsizesRight[2]{1,2};
int psRight[3]{0,1,3};
int segmentIdsRight[3]{0,1,1};
auto first1 = thrust::make_zip_iterator(thrust::make_tuple(&segmentIdsLeft[0], &dataLeft[0]));
auto last1 = thrust::make_zip_iterator(thrust::make_tuple(&segmentIdsLeft[0] + numLeft, &dataLeft[0] + numLeft));
auto first2 = thrust::make_zip_iterator(thrust::make_tuple(&segmentIdsRight[0], &dataRight[0]));
auto last2 = thrust::make_zip_iterator(thrust::make_tuple(&segmentIdsRight[0] + 3, &dataRight[0] + 3));
int segmentIdsOutput[7];
int dataOutput[7];
int segmentsizesOutput[2];
auto output = thrust::make_zip_iterator(thrust::make_tuple(&segmentIdsOutput[0], &dataOutput[0]));
auto comp = [](const auto& t1, const auto& t2){
const int idl = thrust::get<0>(t1);
const int idr = thrust::get<0>(t2);
if(idl < idr) return true;
if(idl > idr) return false;
return thrust::get<1>(t1) < thrust::get<1>(t2);
};
auto outputend = thrust::set_difference(first1, last1, first2, last2, output, comp);
int outputsize = thrust::distance(output, outputend);
thrust::reduce_by_key(&segmentIdsOutput[0], &segmentIdsOutput[0] + outputsize,
thrust::make_constant_iterator(1),
thrust::make_discard_iterator(), &segmentsizesOutput[0]
);
std::cerr << "raw data output: ";
for(int i = 0; i < outputsize; i++){
std::cerr << " " << dataOutput[i];
}
std::cerr << "\n";
std::cerr << "result segment sizes: ";
for(int i = 0; i < 2; i++){
std::cerr << " " << segmentsizesOutput[i];
}
std::cerr << "\n";
}
以下方法现在有效。不是丢弃唯一的段 ID 并将减少的值存储在输出数组中,而是将两个输出都保存到临时存储中。然后,将输出数组初始化为零以说明任何空段,并使用键值对将段大小设置在正确的位置。
int uniqueIds[7];
int reducedCounts[7];
int numUnique = thrust::distance(
&uniqueIds[0],
thrust::reduce_by_key(
&segmentIdsOutput[0],
&segmentIdsOutput[0] + outputsize,
thrust::make_constant_iterator(1),
&uniqueIds[0],
&reducedCounts[0]
).first
);
thrust::fill(&segmentsizesOutput[0], &segmentsizesOutput[0] + 2, 0);
thrust::for_each_n(
thrust::make_zip_iterator(thrust::make_tuple(&uniqueIds[0], &reducedCounts[0])),
numUnique,
[&] (auto tup){
segmentsizesOutput[thrust::get<0>(tup)] = thrust::get<1>(tup);
}
);