Thrust -- 按键排序两个向量

Thrust -- sort two vectors by key

问题

我想按行对矩阵进行排序,但是 return 每个元素的秩。

例子

  Values            Rank
-------------   --------------
[5, 4, 1, 9]     [2, 1, 0, 3]
[1, 4, 3, 2] --> [0, 3, 2, 1]
[2, 4, 2, 0]     [1, 3, 2, 0]

尝试

我有这两个例子:
Ranking rows of a matrix

第一个展示了如何使用索引向量和置换迭代器来 return 排序值的索引。第二个显示如何使用 "back-to-back" 方法按行对矩阵进行排序。 (按键排序 2x)。但是我不知道如何将这两个想法结合起来。

我尝试使用 zip_iterator 将值和索引组合成一个元组,然后执行背靠背方法,但我无法对压缩元组进行按键排序。

我也尝试过使用背靠背排序,然后对值进行索引,但是索引只是已经排序的值,所以每个索引总是 [0, 1, 2, 3]矩阵的行。

代码

#include <iostream>
#include <iomanip>
#include <fstream>
#include <thrust/device_vector.h>
#include <thrust/device_ptr.h>
#include <thrust/host_vector.h>
#include <thrust/sort.h>
#include <thrust/execution_policy.h>
#include <thrust/generate.h>
#include <thrust/equal.h>
#include <thrust/sequence.h>
#include <thrust/for_each.h>
#include <iostream>
#include <stdlib.h>

using namespace std;

#define NSORTS 5
#define DSIZE 4

// -------------------
//      Print
// -------------------

    template <class Vector>
    void print(std::string name, Vector toPrint)
    {
        cout << setw(13) << name << " :: ";

        int i = 0;
        for (auto x : toPrint)
        {
            i++;
            std::cout << setw(2) << x << " ";
            if (!(i%4))
                cout << "   ";
        }
        std::cout << std::endl;
    }

// ---------------------
//      Print Title
// ---------------------
    void print_title(const std::string title)
    {
        cout << "\n\n";
        cout << "-------------------\n";
        cout << "      " << title << "\n";
        cout << "-------------------\n";
    }

// ---------------------
//      My Mod
// ---------------------
    int my_mod_start = 0;
    int my_mod(){
        return (my_mod_start++)/DSIZE;
    }

// ------------------
//      Clamp
// ------------------
    struct clamp
    {
        template <typename T>
        __host__ __device__
        T operator()(T data){
            if (data <= 0) return 0;
            return 1;}
    };


int main()
{
    // Initialize
        thrust::host_vector<int> h_data(DSIZE * NSORTS);
        thrust::generate(h_data.begin(), h_data.end(), rand);
        thrust::transform(h_data.begin(), h_data.end(), h_data.begin(), thrust::placeholders::_1 % 10);
        int size = DSIZE * NSORTS;

    // Device Vectors
        thrust::device_vector<int> d_data = h_data;
        thrust::device_vector<int> d_idx(size);
        thrust::device_vector<int> d_result(size);

        thrust::sequence(d_idx.begin(), d_idx.end());

    // Segments
        thrust::host_vector<int> h_segments(size);
        thrust::generate(h_segments.begin(), h_segments.end(), my_mod);
        thrust::device_vector<int> d_segments = h_segments;

            print_title("Generate");

            print("Data", d_data);
            print("Index", d_idx);
            print("Segments", d_segments);

    // Sort 1
        thrust::stable_sort_by_key(d_data.begin(), d_data.end(), d_segments.begin());

            print_title("Sort 1");

            print("Data", d_data);
            print("Index", d_idx);
            print("Segments", d_segments);

    // Sort 2
        thrust::stable_sort_by_key(d_segments.begin(), d_segments.end(), d_data.begin());

            print_title("Sort 2");

            print("Data", d_data);
            print("Index", d_idx);
            print("Segments", d_segments);

    // Adjacent Difference
        thrust::device_vector<int> d_diff(size);
        thrust::adjacent_difference(d_data.begin(), d_data.end(), d_diff.begin());
        d_diff[0] = 0;

            print_title("Adj Diff");

            print("Data", d_data);
            print("Index", d_idx);
            print("Segments", d_segments);
            print("Difference", d_diff);

    // Transform
        thrust::transform(d_diff.begin(), d_diff.end(), d_diff.begin(), clamp());

            print_title("Transform");

            print("Data", d_data);
            print("Index", d_idx);
            print("Segments", d_segments);
            print("Difference", d_diff);

    // Inclusive Scan
        thrust::inclusive_scan_by_key(d_segments.begin(), d_segments.end(), d_diff.begin(), d_diff.begin());

            print_title("Inclusive");

            print("Data", d_data);
            print("Index", d_idx);
            print("Segments", d_segments);
            print("Difference", d_diff);

    // Results
        thrust::copy(d_diff.begin(), d_diff.end(), thrust::make_permutation_iterator(d_result.begin(), d_idx.begin()));

            print_title("Results");

            print("Data", d_data);
            print("Index", d_idx);
            print("Segments", d_segments);
            print("Difference", d_diff);
            print("Results", d_result);

}

编辑 -- 示例等级矩阵错误

这可以通过单个 sort_by_key 操作完成,然后是 "rearrangement" 排序值。

"keys"(我们排序的内容)将是一个 zip_iterator 组合要排序的实际值,以及一个行指示符。排序函子将被安排为首先按行排序,然后按行内的值排序。 "values"(与键一起移动的东西)将是每一行中的列索引。

排序后的这些"values"可以重新排列成为我们在一行内"rank"的指标。

我们可以在矩阵的第 3 行之后使用一个数值示例:

排序前:

keys:     [2, 4, 2, 0]
values:   [0, 1, 2, 3]

排序后:

keys:     [0, 2, 2, 4]
values:   [3, 0, 2, 1]

重新排列前:

destination map:      [3, 0, 2, 1]
values:               [0, 1, 2, 3]

重新排列后:

destination:          [1, 3, 2, 0]

(碰巧,对于示例中的前两行,排序值和目标值之间没有变化)

这是一个有效的例子:

$ cat t1633.cu
#include <iostream>
#include <thrust/sort.h>
#include <thrust/device_vector.h>
#include <thrust/iterator/zip_iterator.h>
#include <thrust/iterator/transform_iterator.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/iterator/permutation_iterator.h>
#include <thrust/copy.h>


typedef int dtype;

// creates row indices:
// 0 0 0 0 ...
// 1 1 1 1 ...
// 2 2 2 2 ...
struct row_f : public thrust::unary_function<int, int>
{
  int ncols;
  row_f(int _nc) : ncols(_nc) {};
  __host__ __device__
  int operator()(int i){
    return i/ncols;}
};

// creates column indices:
// 0 1 2 3 ...
// 0 1 2 3 ...
// 0 1 2 3 ...
struct col_f : public thrust::unary_function<int, int>
{
  int ncols;
  col_f(int _nc) : ncols(_nc) {};
  __host__ __device__
  int operator()(int i){
    return i%ncols;}
};

struct map_f : public thrust::unary_function<thrust::tuple<int, int>, int>
{
  int ncols;
  map_f(int _nc) : ncols(_nc) {};
  __host__ __device__
  int operator()(thrust::tuple<int, int> t){
    return thrust::get<0>(t) + ncols*thrust::get<1>(t);}
};

struct sort_f
{
  template <typename T1, typename T2>
  __host__ __device__
  bool operator()(T1 k1, T2 k2){
// sort by row first
    if (thrust::get<1>(k1) < thrust::get<1>(k2)) return true;
    if (thrust::get<1>(k1) > thrust::get<1>(k2)) return false;
// then sort within the row
    if (thrust::get<0>(k1) < thrust::get<0>(k2)) return true;
    return false;}
};

int main(){

// example data setup
  dtype keys[] = {5, 4, 1, 9, 1, 4, 3, 2, 2, 4, 2, 0};
  int nrows = 3;
  int ds = sizeof(keys)/sizeof(keys[0]);
  int ncols = ds/nrows;

  thrust::device_vector<dtype> d_keys(keys, keys+ds);
// create values to be moved which is effectively index within row, i.e. column indices
  thrust::device_vector<int> d_vals(nrows*ncols);
  thrust::sequence(d_vals.begin(), d_vals.end());
  thrust::transform(d_vals.begin(), d_vals.end(), d_vals.begin(), col_f(ncols));
// sort
  thrust::sort_by_key(thrust::make_zip_iterator(thrust::make_tuple(d_keys.begin(), thrust::make_transform_iterator(thrust::counting_iterator<int>(0), row_f(ncols)))), thrust::make_zip_iterator(thrust::make_tuple(d_keys.end(), thrust::make_transform_iterator(thrust::counting_iterator<int>(nrows*ncols), row_f(ncols)))), d_vals.begin(), sort_f());
// rearrange
  thrust::device_vector<int> d_rank(nrows*ncols);
  thrust::copy_n(thrust::make_transform_iterator(thrust::counting_iterator<int>(0), col_f(ncols)), nrows*ncols, thrust::make_permutation_iterator(d_rank.begin(), thrust::make_transform_iterator(thrust::make_zip_iterator(thrust::make_tuple(d_vals.begin(), thrust::make_transform_iterator(thrust::counting_iterator<int>(0), row_f(ncols)))), map_f(ncols))));
// print results
  thrust::copy_n(d_rank.begin(), ncols*nrows, std::ostream_iterator<int>(std::cout, ","));
  std::cout << std::endl;
}
$ nvcc -o t1633 t1633.cu
$ ./t1633
2,1,0,3,0,3,2,1,1,3,2,0,
$