thrust::unique 在 float3 元组上

thrust::unique on float3 tuple

我正在尝试使用 thrust::unique 而不是 float3 元组。但是,它似乎没有返回正确的结果。这是一个完整的例子:

#include <iostream>
#include <thrust/tuple.h>
#include <thrust/device_vector.h>
#include <thrust/unique.h>

// --- Equality between two float3's
__host__ __device__ __forceinline__ bool operator==(const float3 &a, const float3 &b) { 
    return ((a.x == b.x) && (a.y == b.y) && (a.z == b.z)); }

// --- Binary predicate for a tuple pair
typedef thrust::tuple<float3, float3> tuple_t;
struct tupleEqual
{
    __host__ __device__
        bool operator()(tuple_t x, tuple_t y)
    {
        return ((x.get<0>() == y.get<0>()) && (x.get<1>() == y.get<1>()));
    }
};

/********/
/* MAIN */
/********/
int main(void)
{
    const int N = 6;

    thrust::device_vector<float3> v(N), d(N);
    thrust::device_vector<tuple_t> vd(N);

    v[0] = make_float3(2.f, 5.f, 9.f);      d[0] = make_float3(2.f, 3.f, 10.f);
    v[1] = make_float3(3.f, 2.f, 1.f);      d[1] = make_float3(2.f, 5.f, 9.f);
    v[2] = make_float3(2.f, 5.f, 9.f);      d[2] = make_float3(2.f, 3.f, 10.f);
    v[3] = make_float3(2.f, 3.f, 10.f);     d[3] = make_float3(2.f, 5.f, 9.f);
    v[4] = make_float3(2.f, 3.f, 10.f);     d[4] = make_float3(1.f, 1.f, 1.f);
    v[5] = make_float3(2.f, 5.f, 9.f);      d[5] = make_float3(2.f, 3.f, 10.f);

    vd[0] = thrust::make_tuple(v[0], d[0]);
    vd[1] = thrust::make_tuple(v[1], d[1]);
    vd[2] = thrust::make_tuple(v[2], d[2]);
    vd[3] = thrust::make_tuple(v[3], d[3]);
    vd[4] = thrust::make_tuple(v[4], d[4]);
    vd[5] = thrust::make_tuple(v[5], d[5]);

    auto new_end = thrust::unique(vd.begin(), vd.end(), tupleEqual());

    const size_t Nnew = new_end - vd.begin();

    printf("Nnew = %d\n", Nnew);
    for (int k = 0; k < Nnew; k++) {
        tuple_t temp = vd[k];
        float3 vtemp = thrust::get<0>(temp);
        float3 dtemp = thrust::get<1>(temp);
        printf("%d %f %f %f %f %f %f\n", k, vtemp.x, vtemp.y, vtemp.z, dtemp.x, dtemp.y, dtemp.z);
    }

    return 0;
}

我得到的结果是

Nnew = 6
0 2.000000 5.000000 9.000000 2.000000 3.000000 10.000000
1 3.000000 2.000000 1.000000 2.000000 5.000000 9.000000
2 2.000000 5.000000 9.000000 2.000000 3.000000 10.000000
3 2.000000 3.000000 10.000000 2.000000 5.000000 9.000000
4 2.000000 3.000000 10.000000 1.000000 1.000000 1.000000
5 2.000000 5.000000 9.000000 2.000000 3.000000 10.000000

这正是没有任何重复删除的输入。

我正在编译 Windows 10,Visual Studio 2015,使用 CUDA 8.0 或 CUDA 9.1(结果相同)。

我的问题是:我做错了什么?

您在输出中看不到任何变化的原因是您的输入不包含任何重复序列。如果我修改你代码中的输入:

#include <iostream>
#include <thrust/tuple.h>
#include <thrust/device_vector.h>
#include <thrust/unique.h>

__host__ __device__ __forceinline__ bool operator==(const float3 &a, const float3 &b) { 
    return ((a.x == b.x) && (a.y == b.y) && (a.z == b.z)); }

typedef thrust::tuple<float3, float3> tuple_t;
struct tupleEqual
{
    __host__ __device__
        bool operator()(tuple_t x, tuple_t y)
    {
        return ((x.get<0>() == y.get<0>()) && (x.get<1>() == y.get<1>()));
    }
};

int main(void)
{
    const int N = 6;

    thrust::device_vector<float3> v(N), d(N);
    thrust::device_vector<tuple_t> vd(N);

    v[0] = make_float3(2.f, 5.f, 9.f);      d[0] = make_float3(2.f, 3.f, 10.f);
    v[1] = make_float3(2.f, 5.f, 9.f);      d[1] = make_float3(2.f, 3.f, 10.f);
    v[2] = make_float3(2.f, 3.f, 10.f);     d[2] = make_float3(2.f, 5.f, 9.f);
    v[3] = make_float3(2.f, 3.f, 10.f);     d[3] = make_float3(2.f, 5.f, 9.f);
    v[4] = make_float3(2.f, 3.f, 10.f);     d[4] = make_float3(2.f, 5.f, 9.f);
    v[5] = make_float3(2.f, 3.f, 10.f);     d[5] = make_float3(2.f, 5.f, 9.f);

    vd[0] = thrust::make_tuple(v[0], d[0]);
    vd[1] = thrust::make_tuple(v[1], d[1]);
    vd[2] = thrust::make_tuple(v[2], d[2]);
    vd[3] = thrust::make_tuple(v[3], d[3]);
    vd[4] = thrust::make_tuple(v[4], d[4]);
    vd[5] = thrust::make_tuple(v[5], d[5]);

    auto new_end = thrust::unique(vd.begin(), vd.end(), tupleEqual());
    const size_t Nnew = new_end - vd.begin();

    printf("Nnew = %zu\n", Nnew);
    for (int k = 0; k < Nnew; k++) {
        tuple_t temp = vd[k];
        float3 vtemp = thrust::get<0>(temp);
        float3 dtemp = thrust::get<1>(temp);
        printf("%d %f %f %f %f %f %f\n", k, vtemp.x, vtemp.y, vtemp.z, dtemp.x, dtemp.y, dtemp.z);
    }

    return 0;
}

以便它包含相同输入的序列,然后删除工作按预期进行:

$ nvcc -arch=sm_52 -std=c++11 -o float3 float3.cu
$ ./float3
Nnew = 2
0 2.000000 5.000000 9.000000 2.000000 3.000000 10.000000
1 2.000000 3.000000 10.000000 2.000000 5.000000 9.000000

thrust::unique 仅删除输入迭代器中重复的相同序列。它不排序。引用文档:

For each group of consecutive elements in the range [first, last) with the same value, unique removes all but the first element of the group.

强调我的。您在这里唯一的错误是对函数执行的操作的理解之一。您编写的代码是正确的,可以按预期工作。

Talonmies 已经回答了我的问题,指出重复元素必须连续,这是我昨天遗漏的。

我在下面通过引入双重排序来修改我的代码,使用 3D Morton code,相对于 vd

#include <iostream>
#include <thrust/tuple.h>
#include <thrust/device_vector.h>
#include <thrust/unique.h>
#include <thrust/sort.h>

/*********************************/
/* EQUALITY BETWEEN TWO FLOAT3'S */
/*********************************/
__host__ __device__ __forceinline__ bool operator==(const float3 &a, const float3 &b) {
    return ((a.x == b.x) && (a.y == b.y) && (a.z == b.z)); }

/*************************************/
/* BINARY PREDICATE FOR A TUPLE PAIR */
/*************************************/
typedef thrust::tuple<float3, float3> tuple_t;
struct tupleEqual
{
    __host__ __device__
        bool operator()(tuple_t x, tuple_t y)
    {
        return ((x.get<0>() == y.get<0>()) && (x.get<1>() == y.get<1>()));
    }
};

/**********************************/
/* MORTON ENCODER KERNEL FUNCTION */
/**********************************/
// --- Expands a 10-bit integer into 30 bits by inserting 2 zeros after each bit.
__host__ __device__ __forceinline__ unsigned int expandBits(unsigned int v)
{
    v = (v * 0x00010001u) & 0xFF0000FFu;
    v = (v * 0x00000101u) & 0x0F00F00Fu;
    v = (v * 0x00000011u) & 0xC30C30C3u;
    v = (v * 0x00000005u) & 0x49249249u;
    return v;
}

// --- Calculates a 30-bit Morton code for the given 3D point located within the unit cube [0,1].
__host__ __device__ __forceinline__ unsigned int morton3D(float x, float y, float z)
{
    x = min(max(x * 1024.0f, 0.0f), 1023.0f);
    y = min(max(y * 1024.0f, 0.0f), 1023.0f);
    z = min(max(z * 1024.0f, 0.0f), 1023.0f);
    unsigned int xx = expandBits((unsigned int)x);
    unsigned int yy = expandBits((unsigned int)y);
    unsigned int zz = expandBits((unsigned int)z);
    return xx * 4 + yy * 2 + zz;
}

/*************************/
/* CUSTOMIZED COMPARATOR */
/*************************/
struct customizedComparator {
    __host__ __device__
        bool operator()(const tuple_t &t1, const tuple_t &t2) {

        float3 v1 = t1.get<0>();
        float3 d1 = t1.get<1>();

        float3 v2 = t2.get<0>();
        float3 d2 = t2.get<1>();

        unsigned int m1 = morton3D(v1.x, v1.y, v1.z);
        unsigned int n1 = morton3D(v2.x, v2.y, v2.z);

        unsigned int p1 = morton3D(d1.x, d1.y, d1.z);
        unsigned int q1 = morton3D(d2.x, d2.y, d2.z);

        if (m1 != n1) return (m1 < n1);
        else return (p1 < q1);

    }
};

/********/
/* MAIN */
/********/
int main(void)
{
    const int N = 6;

    thrust::device_vector<float3> v(N), d(N);

    v[0] = make_float3(.2f, .5f, .09f);     d[0] = make_float3(0.2f, 0.3f, 0.1f);
    v[1] = make_float3(.3f, .2f, .1f);      d[1] = make_float3(.2f, .5f, .09f);
    v[2] = make_float3(.2f, .5f, .09f);     d[2] = make_float3(0.2f, 0.3f, 0.1f);
    v[3] = make_float3(0.2f, 0.3f, 0.1f);   d[3] = make_float3(.2f, .5f, .09f);
    v[4] = make_float3(0.2f, 0.3f, 0.1f);   d[4] = make_float3(.1f, .1f, .1f);
    v[5] = make_float3(.2f, .5f, .09f);     d[5] = make_float3(0.2f, 0.3f, 0.1f);

    thrust::sort(thrust::make_zip_iterator(thrust::make_tuple(v.begin(), d.begin())), thrust::make_zip_iterator(thrust::make_tuple(v.begin(), d.begin())) + N, customizedComparator());

    auto new_end = thrust::unique(thrust::make_zip_iterator(thrust::make_tuple(v.begin(), d.begin())), thrust::make_zip_iterator(thrust::make_tuple(v.begin(), d.begin())) + N, tupleEqual());

    const size_t Nnew = new_end - thrust::make_zip_iterator(thrust::make_tuple(v.begin(), d.begin()));

    printf("Nnew = %d\n", Nnew);
    for (int k = 0; k < Nnew; k++) {
        float3 vtemp = v[k];
        float3 dtemp = d[k];
        printf("%d %f %f %f %f %f %f\n", k, vtemp.x, vtemp.y, vtemp.z, dtemp.x, dtemp.y, dtemp.z);
    }

    return 0;
}