使用 JCuda 计算 Skintone 未给出正确的百分比

Calculate Skintone using JCuda is not giving the right percentage

我正在计算 java 中图像的肤色。

  1. 转换yCbCR中Image的像素。
  2. 检查图像像素是否在特定范围内,然后它是肤色。
  3. 通过将百分比除以总像素来计算百分比。

它在 CPU 代码中工作正常,但是当我将它转换为 GPU 代码时,像素百分比不正确。

让我感到困惑的部分是将像素数据发送到 GPU 并在 GPU 中获取其 r、g、b 值。

所以我按照 JCuda Pixel Invert Example 示例发送像素数据。不同之处在于示例在 int[] 数组中发送像素数据,而我在 byte[] 数组中发送它。

这里是代码。

import static jcuda.driver.JCudaDriver.cuCtxCreate;
import static jcuda.driver.JCudaDriver.cuCtxSynchronize;
import static jcuda.driver.JCudaDriver.cuDeviceGet;
import static jcuda.driver.JCudaDriver.cuInit;
import static jcuda.driver.JCudaDriver.cuLaunchKernel;
import static jcuda.driver.JCudaDriver.cuMemAlloc;
import static jcuda.driver.JCudaDriver.cuMemFree;
import static jcuda.driver.JCudaDriver.cuMemcpyDtoH;
import static jcuda.driver.JCudaDriver.cuMemcpyHtoD;

import java.awt.image.BufferedImage;
import java.awt.image.DataBuffer;
import java.awt.image.DataBufferByte;
import java.awt.image.Raster;
import java.io.File;
import java.io.IOException;

import javax.imageio.ImageIO;

import ij.IJ;
import jcuda.Pointer;
import jcuda.Sizeof;
import jcuda.driver.CUcontext;
import jcuda.driver.CUdevice;
import jcuda.driver.CUdeviceptr;
import jcuda.driver.CUfunction;
import jcuda.driver.JCudaDriver;
import jcuda.nvrtc.JNvrtc;

public class SkinTone {

public static void CalculateSKintoneGPU(File file) throws IOException {
    BufferedImage bufferedImage = ImageIO.read(file);
    if (bufferedImage == null || bufferedImage.getData() == null)
        return;
    Raster raster = bufferedImage.getData();

    DataBuffer dataBuffer = raster.getDataBuffer();
    DataBufferByte dataBufferInt = (DataBufferByte)dataBuffer;
    byte[] pixels =  dataBufferInt.getData();

    int totalPixels = raster.getHeight() * raster.getWidth();

    CUfunction kernelFunction = initlize();

    int output[] = execute(kernelFunction, pixels, raster.getWidth(), raster.getHeight());
    // Flushing memory
    raster = null;
    bufferedImage.flush();
    bufferedImage = null;

    long skintoneThreshold = Math.round(output[0] / (double) totalPixels * 100.0);

    System.err.println("Skintone Using GPU: " + output[0]);
    System.err.println("Total Pixel Of GPU: " + totalPixels);
    System.err.println("SKinTone Percentage Using GPU: " + skintoneThreshold + "%");
}

static int[] execute(CUfunction kernelFunction, byte[] pixels, int w, int h) {
    // Allocate memory on the device, and copy the host data to the device
    int size = w * h * Sizeof.BYTE;
    CUdeviceptr pointer = new CUdeviceptr();
    cuMemAlloc(pointer, size);
    cuMemcpyHtoD(pointer, Pointer.to(pixels), size);

    int numElements = 9;
    int s = 0;
    // Allocate device output memory
    CUdeviceptr deviceOutput = new CUdeviceptr();
    cuMemAlloc(deviceOutput, numElements * Sizeof.INT);

    // Set up the kernel parameters: A pointer to an array
    // of pointers which point to the actual values.
    Pointer kernelParameters = Pointer.to(Pointer.to(pointer), Pointer.to(new int[] { w }),
            Pointer.to(new int[] { h }), Pointer.to(deviceOutput));

    // Call the kernel function
    int blockSize = 16;
    int gridSize = (Math.max(w, h) + blockSize - 1) / blockSize;
    cuLaunchKernel(kernelFunction, gridSize, gridSize, 1, // Grid dimension
            blockSize, blockSize, 1, // Block dimension
            0, null, // Shared memory size and stream
            kernelParameters, null // Kernel- and extra parameters
    );
    cuCtxSynchronize();

    // Allocate host output memory and copy the device output
    // to the host.
    int hostOutput[] = new int[numElements];
    cuMemcpyDtoH(Pointer.to(hostOutput), deviceOutput, numElements * Sizeof.INT);

    // Clean up.
    cuMemFree(deviceOutput);
    cuMemFree(pointer);

    return hostOutput;
}

public static CUfunction initlize() {

    // Enable exceptions and omit all subsequent error checks
    JCudaDriver.setExceptionsEnabled(true);
    JNvrtc.setExceptionsEnabled(true);

    // Initialize the driver and create a context for the first device.
    cuInit(0);
    CUdevice device = new CUdevice();
    cuDeviceGet(device, 0);
    CUcontext context = new CUcontext();
    cuCtxCreate(context, 0, device);

    // Obtain the CUDA source code from the CUDA file
    String cuFileName = "Skintone.cu";
    String sourceCode = CudaUtils.readResourceAsString(cuFileName);
    if (sourceCode == null) {
        IJ.showMessage("Error", "Could not read the kernel source code");
    }

    // Create the kernel function
    return CudaUtils.createFunction(sourceCode, "skintone");
}

public static void CalculateSKintoneCPU(File file) throws IOException {
    BufferedImage bufferedImage = ImageIO.read(file);
    if (bufferedImage == null || bufferedImage.getData() == null)
        return;
    Raster raster = bufferedImage.getData();
    float[] rgb = new float[4];
    int totalPixels = raster.getHeight() * raster.getWidth();

    int skinTonePixels = 0;

    for (int x = 0; x < raster.getWidth(); x++) {
        for (int y = 0; y < raster.getHeight(); y++) {
            raster.getPixel(x, y, rgb);
            if (skintone(rgb)) {
                skinTonePixels++;
            }
        }
    }

    // Flushing memory
    raster = null;
    rgb = null;
    bufferedImage.flush();
    bufferedImage = null;

    long skintoneThreshold = Math.round(skinTonePixels / (double) totalPixels * 100.0);

    System.err.println("Skintone Using CPU: " + skinTonePixels);
    System.err.println("Total Pixel Of CPU: " + totalPixels);
    System.err.println("SKinTone Percentage Using CPU: " + skintoneThreshold + "%");
}

private static boolean skintone(float[] rgb) {
    float yCbCr[] = (float[]) convertRGBtoYUV(rgb);
    if ((yCbCr[1] >= 80 && yCbCr[1] <= 120) && (yCbCr[2] >= 133 && yCbCr[2] <= 173)) {
        return true;
    }
    return false;
}

private static float[] convertRGBtoYUV(float[] rgb) {
    final float[] yCbCr = new float[3];
    float r = rgb[0];
    float g = rgb[1];
    float b = rgb[2];

    yCbCr[0] = 16 + (0.299f * r) + (0.587f * g) + (0.144f * b);
    yCbCr[1] = 128 + (-0.169f * r) - (0.331f * g) + (0.5f * b);
    yCbCr[2] = 128 + (0.5f * r) - (0.419f * g) - (0.081f * b);

    return yCbCr;
}

public static void main(String[] args) throws IOException {
    File file = new File("C:\Users\Aqeel\git\jcuda-imagej-example\src\test\resources\lena512color.png");
    CalculateSKintoneCPU(file);
    CalculateSKintoneGPU(file);
}

}

内核文件

    extern "C"
__global__ void skintone(uchar4* data, int w, int h, int* output)
{
    int x = threadIdx.x+blockIdx.x*blockDim.x;
    int y = threadIdx.y+blockIdx.y*blockDim.y;

if (x < w && y < h)
{
    float r, g, b;
    float cb, cr;

    int index = y*w+x;
    uchar4 pixel = data[index];

    r = pixel.x;
    g = pixel.y;
    b = pixel.z;

    cb = 128 + (-0.169f * r) - (0.331f * g) + (0.5f * b);
    cr = 128 + (0.5f * r) - (0.419f * g) - (0.081f * b);


    if((cb >= 80 &&  cb <= 120) && (cr >= 133 &&  cr <= 173)) {
        atomicAdd(&output[0], 1);
    }
}
}

Complete Example src, Machine Need Nvida Card, Cuda Toolkit V9 and Graphics Drivers

我用命中和试炼的方法解决了这个问题。在内核中,我用 b 改变了 r 的位置,问题解决了,我也必须在 java 中发送 int 数组中的代码而不是字节。

extern "C"
__global__ void skintone(uchar4* data, int w, int h, int* output)
{
    int x = threadIdx.x+blockIdx.x*blockDim.x;
    int y = threadIdx.y+blockIdx.y*blockDim.y;

if (x < w && y < h)
{
    float b, g, r;
    float cb, cr;

    int index = y*w+x;
    uchar4 pixel = data[index];

    b = (float)pixel.x;
    g = (float)pixel.y;
    r = (float)pixel.z;

    cb = 128 + (-0.169f * r) - (0.331f * g) + (0.5f * b);
    cr = 128 + (0.5f * r) - (0.419f * g) - (0.081f * b);


    if((cb >= 80 &&  cb <= 120) && (cr >= 133 &&  cr <= 173)) {
        atomicAdd(&output[0], 1);
    }
}
}

Java 代码更改。

public static void calculateSkintoneGPU() throws IOException {
    BufferedImage img = ImageIO.read(SkinTone.class.getClassLoader().getResource("images.jpg"));
    if (img == null || img.getData() == null)
        return;

    int width = img.getWidth(null);
    int height = img.getHeight(null);
    int[] pixels = new int[width * height];
    PixelGrabber pg = new PixelGrabber(img, 0, 0, width, height, pixels, 0, width);
    try {
        pg.grabPixels();
    } catch (InterruptedException e){};

    int totalPixels = width * height;

    CUfunction kernelFunction = initlize();

    int output[] = execute(kernelFunction, pixels, width, height);
    // Flushing memory
    img.flush();
    img = null;

    long skintoneThreshold = Math.round(output[0] / (double) totalPixels * 100.0);

    System.err.println("Skintone Using GPU: " + output[0]);
    System.err.println("Total Pixel Of GPU: " + totalPixels);
    System.err.println("SKinTone Percentage Using GPU: " + skintoneThreshold + "%");
}