PyOpenCL 错误的输出图像
PyOpenCL wrong output image
我试图在图像上添加渐变 - works.CPU 和 GPU 程序应该做同样的事情。我对输出图像有疑问,因为 GPU 代码给我的图像与 CPU 代码不同,我不知道哪里出错了。我认为 CPU 代码没问题,但 GPU 不行。 Output images - 原始,cpu,gpu - 请检查我的代码。谢谢。
import pyopencl as cl
import sys
import Image
import numpy
from time import time
def gpu_gradient():
if len(sys.argv) != 3:
print "USAGE: " + sys.argv[0] + " <inputImageFile> <outputImageFile>"
return 1
# create context and command queue
ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)
# load image
im = Image.open(sys.argv[1])
if im.mode != "RGBA":
im = im.convert("RGBA")
imgSize = im.size
buffer = im.tostring() # len(buffer) = imgSize[0] * imgSize[1] * 4
# Create ouput image object
clImageFormat = cl.ImageFormat(cl.channel_order.RGBA,
cl.channel_type.UNSIGNED_INT8)
input_image = cl.Image(ctx,
cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR,
clImageFormat,
imgSize,
None,
buffer)
output_image = cl.Image(ctx,
cl.mem_flags.WRITE_ONLY,
clImageFormat,
imgSize)
# load the kernel source code
kernelFile = open("gradient.cl", "r")
kernelSrc = kernelFile.read()
# Create OpenCL program
program = cl.Program(ctx, kernelSrc).build()
# Call the kernel directly
globalWorkSize = ( imgSize[0],imgSize[1] )
gpu_start_time = time()
program.gradientcover(queue,
globalWorkSize,
None,
input_image,
output_image)
# Read the output buffer back to the Host
buffer = numpy.zeros(imgSize[0] * imgSize[1] * 4, numpy.uint8)
origin = ( 0, 0, 0 )
region = ( imgSize[0], imgSize[1], 1 )
cl.enqueue_read_image(queue, output_image,
origin, region, buffer).wait()
# Save the image to disk
gsim = Image.fromstring("RGBA", imgSize, buffer.tostring())
gsim.save("GPU_"+sys.argv[2])
gpu_end_time = time()
print("GPU Time: {0} s".format(gpu_end_time - gpu_start_time))
def cpu_gradient():
if len(sys.argv) != 3:
print "USAGE: " + sys.argv[0] + " <inputImageFile> <outputImageFile>"
return 1
gpu_start_time = time()
im = Image.open(sys.argv[1])
if im.mode != "RGBA":
im = im.convert("RGBA")
pixels = im.load()
for i in range(im.size[0]):
for j in range(im.size[1]):
RGBA= pixels[i,j]
RGBA2=RGBA[0],RGBA[1],0,0
pixel=RGBA[0]+RGBA2[0],RGBA[1]+RGBA2[1],RGBA[2],RGBA[3]
final_pixels=list(pixel)
if final_pixels[0]>255:
final_pixels[0]=255
elif final_pixels[1]>255:
final_pixels[1]=255
pixel=tuple(final_pixels)
pixels[i,j]=pixel
im.save("CPU_"+sys.argv[2])
gpu_end_time = time()
print("CPU Time: {0} s".format(gpu_end_time - gpu_start_time))
cpu_gradient()
gpu_gradient()
内核代码:
const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE |
CLK_ADDRESS_CLAMP |
CLK_FILTER_NEAREST;
__kernel void gradientcover(read_only image2d_t srcImg,
write_only image2d_t dstImg)
{
int2 coord = (int2) (get_global_id(0), get_global_id(1));
uint4 pixel = read_imageui(srcImg, sampler, coord);
uint4 pixel2 = (uint4)(coord.x, coord.y,0,0);
pixel=pixel + pixel2;
if(pixel.x > 255) pixel.x=255;
if(pixel.y > 255) pixel.y=255;
// Write the output value to image
write_imageui(dstImg, coord, pixel);
}
您的 CL 和 Python 代码不做同样的事情!
RGBA= pixels[i,j]
RGBA2=RGBA[0],RGBA[1],0,0
pixel=RGBA[0]+RGBA2[0],RGBA[1]+RGBA2[1],RGBA[2],RGBA[3]
将 RG 分量添加到像素。
uint4 pixel = read_imageui(srcImg, sampler, coord);
uint4 pixel2 = (uint4)(coord.x, coord.y,0,0);
pixel=pixel + pixel2;
将坐标中的 X、Y 添加到像素。
这很可能是造成结果差异的原因。
假设(根据描述)你想通过坐标 变暗 使图像变亮,我建议 python 代码应该是:
RGBA= pixels[i,j]
RGBA2=i,j,0,0
相反。
我试图在图像上添加渐变 - works.CPU 和 GPU 程序应该做同样的事情。我对输出图像有疑问,因为 GPU 代码给我的图像与 CPU 代码不同,我不知道哪里出错了。我认为 CPU 代码没问题,但 GPU 不行。 Output images - 原始,cpu,gpu - 请检查我的代码。谢谢。
import pyopencl as cl
import sys
import Image
import numpy
from time import time
def gpu_gradient():
if len(sys.argv) != 3:
print "USAGE: " + sys.argv[0] + " <inputImageFile> <outputImageFile>"
return 1
# create context and command queue
ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)
# load image
im = Image.open(sys.argv[1])
if im.mode != "RGBA":
im = im.convert("RGBA")
imgSize = im.size
buffer = im.tostring() # len(buffer) = imgSize[0] * imgSize[1] * 4
# Create ouput image object
clImageFormat = cl.ImageFormat(cl.channel_order.RGBA,
cl.channel_type.UNSIGNED_INT8)
input_image = cl.Image(ctx,
cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR,
clImageFormat,
imgSize,
None,
buffer)
output_image = cl.Image(ctx,
cl.mem_flags.WRITE_ONLY,
clImageFormat,
imgSize)
# load the kernel source code
kernelFile = open("gradient.cl", "r")
kernelSrc = kernelFile.read()
# Create OpenCL program
program = cl.Program(ctx, kernelSrc).build()
# Call the kernel directly
globalWorkSize = ( imgSize[0],imgSize[1] )
gpu_start_time = time()
program.gradientcover(queue,
globalWorkSize,
None,
input_image,
output_image)
# Read the output buffer back to the Host
buffer = numpy.zeros(imgSize[0] * imgSize[1] * 4, numpy.uint8)
origin = ( 0, 0, 0 )
region = ( imgSize[0], imgSize[1], 1 )
cl.enqueue_read_image(queue, output_image,
origin, region, buffer).wait()
# Save the image to disk
gsim = Image.fromstring("RGBA", imgSize, buffer.tostring())
gsim.save("GPU_"+sys.argv[2])
gpu_end_time = time()
print("GPU Time: {0} s".format(gpu_end_time - gpu_start_time))
def cpu_gradient():
if len(sys.argv) != 3:
print "USAGE: " + sys.argv[0] + " <inputImageFile> <outputImageFile>"
return 1
gpu_start_time = time()
im = Image.open(sys.argv[1])
if im.mode != "RGBA":
im = im.convert("RGBA")
pixels = im.load()
for i in range(im.size[0]):
for j in range(im.size[1]):
RGBA= pixels[i,j]
RGBA2=RGBA[0],RGBA[1],0,0
pixel=RGBA[0]+RGBA2[0],RGBA[1]+RGBA2[1],RGBA[2],RGBA[3]
final_pixels=list(pixel)
if final_pixels[0]>255:
final_pixels[0]=255
elif final_pixels[1]>255:
final_pixels[1]=255
pixel=tuple(final_pixels)
pixels[i,j]=pixel
im.save("CPU_"+sys.argv[2])
gpu_end_time = time()
print("CPU Time: {0} s".format(gpu_end_time - gpu_start_time))
cpu_gradient()
gpu_gradient()
内核代码:
const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE |
CLK_ADDRESS_CLAMP |
CLK_FILTER_NEAREST;
__kernel void gradientcover(read_only image2d_t srcImg,
write_only image2d_t dstImg)
{
int2 coord = (int2) (get_global_id(0), get_global_id(1));
uint4 pixel = read_imageui(srcImg, sampler, coord);
uint4 pixel2 = (uint4)(coord.x, coord.y,0,0);
pixel=pixel + pixel2;
if(pixel.x > 255) pixel.x=255;
if(pixel.y > 255) pixel.y=255;
// Write the output value to image
write_imageui(dstImg, coord, pixel);
}
您的 CL 和 Python 代码不做同样的事情!
RGBA= pixels[i,j]
RGBA2=RGBA[0],RGBA[1],0,0
pixel=RGBA[0]+RGBA2[0],RGBA[1]+RGBA2[1],RGBA[2],RGBA[3]
将 RG 分量添加到像素。
uint4 pixel = read_imageui(srcImg, sampler, coord);
uint4 pixel2 = (uint4)(coord.x, coord.y,0,0);
pixel=pixel + pixel2;
将坐标中的 X、Y 添加到像素。
这很可能是造成结果差异的原因。
假设(根据描述)你想通过坐标 变暗 使图像变亮,我建议 python 代码应该是:
RGBA= pixels[i,j]
RGBA2=i,j,0,0
相反。