仅当从复数开始时，cuFFT 才会出现错误结果

Question

我之前在的回答中得到了帮助，实现了就地转换，它运行良好，但前提是我从真实数据开始。如果我从复杂的数据开始，IFT+FFT 之后的结果是错误的，并且这种情况只发生在就地版本中，我使用此转换的非就地版本获得了完美的结果。这是代码：

#include <stdio.h>
#include <stdlib.h>
#include <cuda_runtime.h>
#include <complex.h>
#include <cuComplex.h>
#include <cufft.h>
#include <cufftXt.h>

#define N 4
#define N_PAD ( 2*(N/2+1) )

void print_3D_Real(double *array){
    printf("\nPrinting 3D real matrix \n");
    unsigned long int idx;
    for (int z = 0; z < N; z++){
        printf("---------------------------------------------------------------------------- plane %d below\n", z);
        for (int x = 0; x < N; x++){
            for (int y = 0; y < N; y++){
                idx = z + N_PAD * (y + x * N);
                printf("%.3f \t", array[idx]);
            }
            printf("\n");
        }
    }
}

void print_3D_Comp(cuDoubleComplex *array){
    printf("\nPrinting 3D complex matrix \n");
    unsigned long int idx;
    for (int z = 0; z < (N/2+1); z++){
        printf("---------------------------------------------------------------------------- plane %d below\n", z);
        for (int x = 0; x < N; x++){
            for (int y = 0; y < N; y++){
                idx = z + (N/2+1) * (y + x * N);
                printf("%+.3f%+.3fi \t", array[idx].x, array[idx].y);
            }
            printf("\n");
        }
    }
}

// Main function
int main(int argc, char **argv){
    CU_ERR_CHECK( cudaSetDevice(0) );
    unsigned long int idx, in_mem_size, out_mem_size;
    cuDoubleComplex *in = NULL, *d_in = NULL;
    double *out = NULL, *d_out = NULL;
    cufftHandle plan_r2c, plan_c2r;
    in_mem_size = sizeof(cuDoubleComplex) * N*N*(N/2+1);
    out_mem_size = in_mem_size;

    in = (cuDoubleComplex *) malloc (in_mem_size);
    out = (double *) in;

    cudaMalloc((void **)&d_in, in_mem_size);
    d_out = (double *) d_in;

    cufftPlan3d(&plan_c2r, N, N, N, CUFFT_Z2D);
    cufftPlan3d(&plan_r2c, N, N, N, CUFFT_D2Z);

    memset(in, 0, in_mem_size);
    memset(out, 0, out_mem_size);

    // Initial complex data
    for (int x = 0; x < N; x++){
        for (int y = 0; y < N; y++){
            for (int z = 0; z < (N/2+1); z++){
                idx = z + (N/2+1) * (y + x * N);
                in[idx].x = idx;
            }
        }
    }
    print_3D_Comp(in);
    cudaMemcpy(d_in, in, in_mem_size, cudaMemcpyHostToDevice);

    cufftExecZ2D(plan_c2r, (cufftDoubleComplex *)d_in, (cufftDoubleReal *)d_out);

    cudaMemcpy(out, d_out, out_mem_size, cudaMemcpyDeviceToHost);
    // Normalisation
    for (int i = 0; i < N*N*N_PAD; i++)
        out[i] /= (N*N*N);
    print_3D_Real(out);
    cudaMemcpy(d_out, out, out_mem_size, cudaMemcpyHostToDevice);

    cufftExecD2Z(plan_r2c, (cufftDoubleReal *)d_out, (cufftDoubleComplex *)d_in);

    cudaMemcpy(in, d_in, in_mem_size, cudaMemcpyDeviceToHost) );

    print_3D_Comp(in);

    cudaDeviceReset();
    return 0;
}

我程序的输出在这个pastebin.

有人可以指导我走正确的道路吗？非常感谢您。

Answer 1

首先，您的代码无法编译。

在其最一般的定义中，傅立叶变换执行从一个复杂域到另一个复杂域的映射，并且该操作应该是reversible。

但是，C2R 和 R2C 是特例，假设信号在 2 个域之一（"time" 域）中完全可表示为纯实信号（所有虚部均为零） ).

然而，显然会有一些复杂的 "frequency" 域表示无法用纯实时域信号表示。如果反例成立（任何复杂的频域信号都可以表示为纯实时域信号），那么 FFT 对于复杂的时域信号是不可逆的（因为所有频域数据集都映射到纯实时域数据集。)

因此您不能在频域中选择任意数据，并期望它能正确映射到纯实时域信号中。 (*)

作为演示，将您的输入数据集更改为以下内容：

            in[idx].x = (idx)?0:1;

我相信你会得到一个 "passing" 测试用例。

此外，如果您实际上在问题中使用 posted 的特定数据集，我认为您的指控 "I have perfect results with an out-of-place version of this transform" 是无法得到支持的。如果您不同意，请 post 一个完整的代码来证明您通过了不适当的转换的测试用例，在其他方面与您的 posted 代码相同。

最后，我们可以使用 fftw 进行测试。将程序转换为使用 fftw 而不是 cufft 会产生完全相同的输出：

$ cat t355.cpp
#include <stdio.h>
#include <stdlib.h>
#include <fftw3.h>
#include <string.h>

#define N 4
#define N_PAD ( 2*(N/2+1) )

void print_3D_Real(double *array){
    printf("\nPrinting 3D real matrix \n");
    unsigned long int idx;
    for (int z = 0; z < N; z++){
        printf("---------------------------------------------------------------------------- plane %d below\n", z);
        for (int x = 0; x < N; x++){
            for (int y = 0; y < N; y++){
                idx = z + N_PAD * (y + x * N);
                printf("%.3f \t", array[idx]);
            }
            printf("\n");
        }
    }
}

void print_3D_Comp(fftw_complex *array){
    printf("\nPrinting 3D complex matrix \n");
    unsigned long int idx;
    for (int z = 0; z < (N/2+1); z++){
        printf("---------------------------------------------------------------------------- plane %d below\n", z);
        for (int x = 0; x < N; x++){
            for (int y = 0; y < N; y++){
                idx = z + (N/2+1) * (y + x * N);
                printf("%+.3f%+.3fi \t", array[idx][0], array[idx][1]);
            }
            printf("\n");
        }
    }
}

// Main function
int main(int argc, char **argv){
    unsigned long int idx, in_mem_size, out_mem_size;
    fftw_complex *in = NULL;
    double *out = NULL;
    in_mem_size = sizeof(fftw_complex) * N*N*(N/2+1);
    out_mem_size = in_mem_size;

    in = (fftw_complex *) malloc (in_mem_size);
    out = (double *) in;

    memset(in, 0, in_mem_size);
    memset(out, 0, out_mem_size);

    // Initial complex data
    for (int x = 0; x < N; x++){
        for (int y = 0; y < N; y++){
            for (int z = 0; z < (N/2+1); z++){
                idx = z + (N/2+1) * (y + x * N);
                in[idx][0] = idx;
            }
        }
    }
    print_3D_Comp(in);

    fftw_plan plan_c2r = fftw_plan_dft_c2r_3d(N, N, N, in, out, FFTW_ESTIMATE);
    fftw_plan plan_r2c = fftw_plan_dft_r2c_3d(N, N, N, out, in, FFTW_ESTIMATE);

    fftw_execute(plan_c2r);
    // Normalisation
    for (int i = 0; i < N*N*N_PAD; i++)
        out[i] /= (N*N*N);
    print_3D_Real(out);
    fftw_execute(plan_r2c);



    print_3D_Comp(in);

    return 0;
}
$ g++ t355.cpp -o t355 -lfftw3
$ ./t355

Printing 3D complex matrix
---------------------------------------------------------------------------- plane 0 below
+0.000+0.000i   +3.000+0.000i   +6.000+0.000i   +9.000+0.000i
+12.000+0.000i  +15.000+0.000i  +18.000+0.000i  +21.000+0.000i
+24.000+0.000i  +27.000+0.000i  +30.000+0.000i  +33.000+0.000i
+36.000+0.000i  +39.000+0.000i  +42.000+0.000i  +45.000+0.000i
---------------------------------------------------------------------------- plane 1 below
+1.000+0.000i   +4.000+0.000i   +7.000+0.000i   +10.000+0.000i
+13.000+0.000i  +16.000+0.000i  +19.000+0.000i  +22.000+0.000i
+25.000+0.000i  +28.000+0.000i  +31.000+0.000i  +34.000+0.000i
+37.000+0.000i  +40.000+0.000i  +43.000+0.000i  +46.000+0.000i
---------------------------------------------------------------------------- plane 2 below
+2.000+0.000i   +5.000+0.000i   +8.000+0.000i   +11.000+0.000i
+14.000+0.000i  +17.000+0.000i  +20.000+0.000i  +23.000+0.000i
+26.000+0.000i  +29.000+0.000i  +32.000+0.000i  +35.000+0.000i
+38.000+0.000i  +41.000+0.000i  +44.000+0.000i  +47.000+0.000i

Printing 3D real matrix
---------------------------------------------------------------------------- plane 0 below
23.500  -1.500  -1.500  -1.500
-6.000  0.000   0.000   0.000
-6.000  0.000   0.000   0.000
-6.000  0.000   0.000   0.000
---------------------------------------------------------------------------- plane 1 below
-0.500  0.750   0.000   -0.750
3.000   0.000   0.000   0.000
0.000   0.000   0.000   0.000
-3.000  0.000   0.000   0.000
---------------------------------------------------------------------------- plane 2 below
0.000   0.000   0.000   0.000
0.000   0.000   0.000   0.000
0.000   0.000   0.000   0.000
0.000   0.000   0.000   0.000
---------------------------------------------------------------------------- plane 3 below
-0.500  -0.750  0.000   0.750
-3.000  0.000   0.000   0.000
0.000   0.000   0.000   0.000
3.000   0.000   0.000   0.000

Printing 3D complex matrix
---------------------------------------------------------------------------- plane 0 below
+0.000+0.000i   +6.000+0.000i   +6.000+0.000i   +6.000+0.000i
+24.000+0.000i  +30.000+0.000i  +30.000+0.000i  +30.000+0.000i
+24.000+0.000i  +30.000+0.000i  +30.000+0.000i  +30.000+0.000i
+24.000+0.000i  +30.000+0.000i  +30.000+0.000i  +30.000+0.000i
---------------------------------------------------------------------------- plane 1 below
+1.000+0.000i   +4.000+0.000i   +7.000+0.000i   +10.000+0.000i
+13.000+0.000i  +16.000+0.000i  +19.000+0.000i  +22.000+0.000i
+25.000+0.000i  +28.000+0.000i  +31.000+0.000i  +34.000+0.000i
+37.000+0.000i  +40.000+0.000i  +43.000+0.000i  +46.000+0.000i
---------------------------------------------------------------------------- plane 2 below
+2.000+0.000i   +8.000+0.000i   +8.000+0.000i   +8.000+0.000i
+26.000+0.000i  +32.000+0.000i  +32.000+0.000i  +32.000+0.000i
+26.000+0.000i  +32.000+0.000i  +32.000+0.000i  +32.000+0.000i
+26.000+0.000i  +32.000+0.000i  +32.000+0.000i  +32.000+0.000i
$

(*) 如果您愿意，您可以争辩说，C2R 和 R2C 变换的复共轭对称特征应该考虑将所有可能的复 "frequency" 域信号正确映射到唯一的，纯真实 "time" 域信号。我声称，没有证据，它没有，有 2 个数据点：

本题中的示例代码。
由于 C2R 或 R2C 变换中的复数 space 在数值上比实际的 space 大（(2*(N/2+1))/N 倍），按理说不能将所有可能的复杂信号映射到唯一的实信号的唯一 1:1。唯一的 1:1 映射对于完全可逆性是必要的。

有关随机数据中可能缺乏对称性的其他背景，请注意 cufft documentation 中围绕 CUFFT_COMPATIBILITY_FFTW_ASYMMETRIC 的讨论。

仅当从复数开始时，cuFFT 才会出现错误结果

cuFFT wrong results only when starting from complex

cuda

cufft