无法使用 reduce_by_key 编译推力代码

Question

我需要使用推力沿着矩阵的列以及行索引求最小值。我使用以下代码（从橙色 owl 解决方案复制），但是我在编译时遇到错误。我已将其作为问题发布在相应的 git 页面上。错误消息很大，我不知道如何调试它。任何人都可以帮我吗？我使用的是 cuda-8.0，推力版本 1.8.

代码：

#include <iterator>
#include <algorithm>

#include <thrust/device_vector.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/iterator/transform_iterator.h>
#include <thrust/iterator/permutation_iterator.h>
#include <thrust/iterator/zip_iterator.h>
#include <thrust/iterator/discard_iterator.h>
#include <thrust/reduce.h>
#include <thrust/functional.h>
#include <thrust/random.h>

using namespace thrust::placeholders;

int main()
{
    const int Nrows = 6;
    const int Ncols = 8;

    /**************************/
    /* SETTING UP THE PROBLEM */
    /**************************/

    // --- Random uniform integer distribution between 0 and 100
    thrust::default_random_engine rng;
    thrust::uniform_int_distribution<int> dist(0, 20);

    // --- Matrix allocation and initialization
    thrust::device_vector<double> d_matrix(Nrows * Ncols);
    for (size_t i = 0; i < d_matrix.size(); i++) d_matrix[i] = (double)dist(rng);

    printf("\n\nMatrix\n");
    for(int i = 0; i < Nrows; i++) {
        std::cout << " [ ";
        for(int j = 0; j < Ncols; j++)
            std::cout << d_matrix[i * Ncols + j] << " ";
        std::cout << "]\n";
    }

    /**********************************************/
    /* FIND ROW MINIMA ALONG WITH THEIR LOCATIONS */
    /**********************************************/
    thrust::device_vector<float> d_minima(Ncols);
    thrust::device_vector<int> d_indices(Ncols);

    thrust::reduce_by_key(
            thrust::make_transform_iterator(thrust::make_counting_iterator((int) 0), _1 / Nrows),
            thrust::make_transform_iterator(thrust::make_counting_iterator((int) 0), _1 / Nrows) + Nrows * Ncols,
            thrust::make_zip_iterator(
                    thrust::make_tuple(thrust::make_permutation_iterator(
                                    d_matrix.begin(),
                                    thrust::make_transform_iterator(thrust::make_counting_iterator((int) 0), (_1 % Nrows) * Ncols + _1 / Nrows)),
            thrust::make_transform_iterator(thrust::make_counting_iterator((int) 0), _1 % Nrows))),
            thrust::make_discard_iterator(),
            thrust::make_zip_iterator(thrust::make_tuple(d_minima.begin(), d_indices.begin())),
            thrust::equal_to<int>(),
            thrust::minimum<thrust::tuple<float, int> >()
    );

    printf("\n\n");
    for (int i=0; i<Nrows; i++) std::cout << "Min position = " << d_indices[i] << "; Min value = " << d_minima[i] << "\n";

    return 0;
}

错误：

/usr/local/cuda/bin/../targets/x86_64-linux/include/thrust/system/cuda/detail/bulk/algorithm/reduce_by_key.hpp(58): error: ambiguous "?" operation: second operand of type "const thrust::tuple<double, int, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>" can be converted to third operand type "thrust::tuple<float, int, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>", and vice versa
          detected during:
            instantiation of "thrust::system::cuda::detail::bulk_::detail::reduce_by_key_detail::scan_head_flags_functor<FlagType, ValueType, BinaryFunction>::result_type thrust::system::cuda::detail::bulk_::detail::reduce_by_key_detail::scan_head_flags_functor<FlagType, ValueType, BinaryFunction>::operator()(const thrust::system::cuda::detail::bulk_::detail::reduce_by_key_detail::scan_head_flags_functor<FlagType, ValueType, BinaryFunction>::first_argument_type &, const thrust::system::cuda::detail::bulk_::detail::reduce_by_key_detail::scan_head_flags_functor<FlagType, ValueType, BinaryFunction>::second_argument_type &) [with FlagType=int, ValueType=thrust::tuple<double, int, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>, BinaryFunction=thrust::minimum<thrust::tuple<float, int, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>>]"

Answer 1

我猜你正在使用 this 代码。

该代码的一个奇怪特征是矩阵是使用 double 类型定义的，但捕获的最小值存储在 float 向量中。

如果您想按原样使用该代码，根据我的测试，推力（在 CUDA 10 中，显然也在 CUDA 8 中）不喜欢这一行：

        thrust::minimum<thrust::tuple<float, int> >()

该运算符用于比较两个项目以确定哪个更小，并且它被模板化以接受不同种类的项目。但是，它已决定找到其中两个元组中的最小值是一个 "ambiguous" 请求。部分原因是运算符 returns 是一个 float, int 元组，但被赋予了不同的 double,int 元组或 float,int 元组。

我们可以 fix/work 通过传递我们自己的函子来完成这项工作，这在处理传递给它的元组方面是明确的：

$ cat t373.cu
#include <iterator>
#include <algorithm>

#include <thrust/device_vector.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/iterator/transform_iterator.h>
#include <thrust/iterator/permutation_iterator.h>
#include <thrust/iterator/zip_iterator.h>
#include <thrust/iterator/discard_iterator.h>
#include <thrust/reduce.h>
#include <thrust/functional.h>
#include <thrust/random.h>

using namespace thrust::placeholders;
struct my_min
{
template <typename T1, typename T2>
  __host__ __device__
  T2 operator()(T1 t1, T2 t2){
    if (thrust::get<0>(t1) < thrust::get<0>(t2)) return t1;
    return t2;
    }
};

int main()
{
    const int Nrows = 6;
    const int Ncols = 8;

    /**************************/
    /* SETTING UP THE PROBLEM */
    /**************************/

    // --- Random uniform integer distribution between 0 and 100
    thrust::default_random_engine rng;
    thrust::uniform_int_distribution<int> dist(0, 20);

    // --- Matrix allocation and initialization
    thrust::device_vector<double> d_matrix(Nrows * Ncols);
    for (size_t i = 0; i < d_matrix.size(); i++) d_matrix[i] = (double)dist(rng);

    printf("\n\nMatrix\n");
    for(int i = 0; i < Nrows; i++) {
        std::cout << " [ ";
        for(int j = 0; j < Ncols; j++)
            std::cout << d_matrix[i * Ncols + j] << " ";
        std::cout << "]\n";
    }

    /**********************************************/
    /* FIND ROW MINIMA ALONG WITH THEIR LOCATIONS */
    /**********************************************/
    thrust::device_vector<float> d_minima(Ncols);
    thrust::device_vector<int> d_indices(Ncols);

    thrust::reduce_by_key(
            thrust::make_transform_iterator(thrust::make_counting_iterator((int) 0), (_1 / Nrows)),
            thrust::make_transform_iterator(thrust::make_counting_iterator((int) 0), (_1 / Nrows)) + Nrows * Ncols,
            thrust::make_zip_iterator(
                    thrust::make_tuple(thrust::make_permutation_iterator(
                                    d_matrix.begin(),
                                    thrust::make_transform_iterator(thrust::make_counting_iterator((int) 0), ((_1 % Nrows) * Ncols + _1 / Nrows))),
            thrust::make_transform_iterator(thrust::make_counting_iterator((int) 0), (_1 % Nrows)))),
            thrust::make_discard_iterator(),
            thrust::make_zip_iterator(thrust::make_tuple(d_minima.begin(), d_indices.begin())),
            thrust::equal_to<int>(),
            my_min()
//            thrust::minimum<thrust::tuple<float, int> >()
    );

    printf("\n\n");
    for (int i=0; i<Nrows; i++) std::cout << "Min position = " << d_indices[i] << "; Min value = " << d_minima[i] << "\n";

    return 0;
}
$ nvcc -o t373 t373.cu
$ ./t373


Matrix
 [ 0 1 12 18 20 3 10 8 ]
 [ 5 15 1 11 12 17 12 10 ]
 [ 18 20 15 20 6 8 18 13 ]
 [ 18 20 3 18 19 6 19 8 ]
 [ 6 10 8 16 14 11 12 1 ]
 [ 12 9 12 17 10 16 1 4 ]


Min position = 0; Min value = 0
Min position = 0; Min value = 1
Min position = 1; Min value = 1
Min position = 1; Min value = 11
Min position = 2; Min value = 6
Min position = 0; Min value = 3
$

我认为更好的解决方法是只选择其中之一，float 或 double。例如，如果我们将所有 float 类型修改为 double，则 thrust 很高兴，没有任何其他更改：

$ cat t373a.cu
#include <iterator>
#include <algorithm>

#include <thrust/device_vector.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/iterator/transform_iterator.h>
#include <thrust/iterator/permutation_iterator.h>
#include <thrust/iterator/zip_iterator.h>
#include <thrust/iterator/discard_iterator.h>
#include <thrust/reduce.h>
#include <thrust/functional.h>
#include <thrust/random.h>

using namespace thrust::placeholders;

int main()
{
    const int Nrows = 6;
    const int Ncols = 8;

    /**************************/
    /* SETTING UP THE PROBLEM */
    /**************************/

    // --- Random uniform integer distribution between 0 and 100
    thrust::default_random_engine rng;
    thrust::uniform_int_distribution<int> dist(0, 20);

    // --- Matrix allocation and initialization
    thrust::device_vector<double> d_matrix(Nrows * Ncols);
    for (size_t i = 0; i < d_matrix.size(); i++) d_matrix[i] = (double)dist(rng);

    printf("\n\nMatrix\n");
    for(int i = 0; i < Nrows; i++) {
        std::cout << " [ ";
        for(int j = 0; j < Ncols; j++)
            std::cout << d_matrix[i * Ncols + j] << " ";
        std::cout << "]\n";
    }

    /**********************************************/
    /* FIND ROW MINIMA ALONG WITH THEIR LOCATIONS */
    /**********************************************/
    thrust::device_vector<double> d_minima(Ncols);
    thrust::device_vector<int> d_indices(Ncols);

    thrust::reduce_by_key(
            thrust::make_transform_iterator(thrust::make_counting_iterator((int) 0), (_1 / Nrows)),
            thrust::make_transform_iterator(thrust::make_counting_iterator((int) 0), (_1 / Nrows)) + Nrows * Ncols,
            thrust::make_zip_iterator(
                    thrust::make_tuple(thrust::make_permutation_iterator(
                                    d_matrix.begin(),
                                    thrust::make_transform_iterator(thrust::make_counting_iterator((int) 0), ((_1 % Nrows) * Ncols + _1 / Nrows))),
            thrust::make_transform_iterator(thrust::make_counting_iterator((int) 0), (_1 % Nrows)))),
            thrust::make_discard_iterator(),
            thrust::make_zip_iterator(thrust::make_tuple(d_minima.begin(), d_indices.begin())),
            thrust::equal_to<int>(),
            thrust::minimum<thrust::tuple<double, int> >()
    );

    printf("\n\n");
    for (int i=0; i<Nrows; i++) std::cout << "Min position = " << d_indices[i] << "; Min value = " << d_minima[i] << "\n";

    return 0;
}
$ nvcc -o t373a t373a.cu
$ ./t373a


Matrix
 [ 0 1 12 18 20 3 10 8 ]
 [ 5 15 1 11 12 17 12 10 ]
 [ 18 20 15 20 6 8 18 13 ]
 [ 18 20 3 18 19 6 19 8 ]
 [ 6 10 8 16 14 11 12 1 ]
 [ 12 9 12 17 10 16 1 4 ]


Min position = 0; Min value = 0
Min position = 0; Min value = 1
Min position = 1; Min value = 1
Min position = 1; Min value = 11
Min position = 2; Min value = 6
Min position = 0; Min value = 3
$

我认为后一种使用一致类型的解决方案是更明智的解决方案。

无法使用 reduce_by_key 编译推力代码

Unable to compile thrust code using reduce_by_key

cuda

matrix

thrust