如何 return 来自 Cudafy c# GPU 计算的值?

How to return a value from Cudafy c# GPU calculation?

我的问题

嘿,所以我正在做这个简单的计算来找到 0 到 100 度之间的罪恶总和(因为我用它作为我的系统的基准),计算不是问题我的问题是我是 Cudafy 的新手,我不确定如何正确传入和 return 值以便可以打印出来这是我的代码:

代码

    public const int N = 33 * 1024;
    public const int threadsPerBlock = 256;
    public const int blocksPerGrid = 32;                                           

    public static void Main()
    {
        Stopwatch watch = new Stopwatch();                                          
        watch.Start();                                                              
        string Text = "";
        int iterations = 1000000;
        CudafyModule km = CudafyTranslator.Cudafy();
        GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId);
        gpu.LoadModule(km);
        double[] dev_Value = gpu.Allocate<double>();
        gpu.Launch(blocksPerGrid, threadsPerBlock).SumOfSines(iterations,dev_Value);                                                     

        double Value;
        gpu.CopyFromDevice(dev_Value, out Value);
        watch.Stop();                                                                                                
        Text = watch.Elapsed.TotalSeconds.ToString();                                                                
        Console.WriteLine("The process took a total of: " + Text + " Seconds");
        Console.WriteLine(Value);
        Console.Read();
        gpu.FreeAll();
    }
    [Cudafy]
    public static void SumOfSines(GThread thread,int iterations,double [] Value)
    {
        double total = new double();
        double degAsRad = Math.PI / 180.0;
        for (int i = 0; i < iterations; i++)
        {
            total = 0.0;
            for (int z = 1; z < 101; z++)
            {
                double angle = (double)z * degAsRad;
                total += Math.Sin(angle);
            }

        }
        Value[0] = total;


    }

我试图从 CUDAfy 部分提取的值是总数,然后将其打印出来以及打印基准测试的时间。如果有人可以 post 提出建议,我们将不胜感激(还有任何关于删除无用行或低效部分的建议也很好)。

没关系,我找到了答案,但我会 post 在这里:

    public const int N = 33 * 1024;
    public const int threadsPerBlock = 256;
    public const int blocksPerGrid = 32;

    public static void Main()
    {
        Stopwatch watch = new Stopwatch();
        watch.Start();
        CudafyModule km = CudafyTranslator.Cudafy();

        GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId);
        gpu.LoadModule(km);

        string Text = "";
        int iterations = 1000000;
        double Value;
        double[] dev_Value = gpu.Allocate<double>(iterations * sizeof(double));
        gpu.Launch(blocksPerGrid, threadsPerBlock).SumOfSines(iterations, dev_Value);
        gpu.CopyFromDevice(dev_Value, out Value);
        watch.Stop();
        Text = watch.Elapsed.TotalSeconds.ToString();
        Console.WriteLine("The process took a total of: " + Text + " Seconds");
        Console.WriteLine(Value);
        Console.Read();
        gpu.FreeAll();
    }

    [Cudafy]
    public static void SumOfSines(GThread thread, int _iterations, double[] Value)
    {
        int threadID = thread.threadIdx.x + thread.blockIdx.x * thread.blockDim.x;
        int numThreads = thread.blockDim.x * thread.gridDim.x;
        if (threadID < _iterations){
            for (int i = threadID; i < _iterations; i += numThreads)
            {
                double _degAsRad = Math.PI / 180;
                Value[i] = 0.0;
                for (int a = 0; a < 100; a++)
                {
                    double angle = (double)a * _degAsRad;
                    Value[i] += Math.Sin(angle);
                }
            }
        }
    }

-杰克