JCuda中的JIT,加载多个ptx模块
JIT in JCuda, loading multiple ptx modules
我在 问题中说我在 JCuda 中加载 ptx 模块时遇到了一些问题,在@talonmies 的想法之后,我实现了他的解决方案的 JCuda 版本以加载多个 ptx 文件并将它们作为单个模块加载.这是代码的相关部分:
import static jcuda.driver.JCudaDriver.cuLinkAddFile;
import static jcuda.driver.JCudaDriver.cuLinkComplete;
import static jcuda.driver.JCudaDriver.cuLinkCreate;
import static jcuda.driver.JCudaDriver.cuLinkDestroy;
import static jcuda.driver.JCudaDriver.cuModuleGetFunction;
import static jcuda.driver.JCudaDriver.cuModuleLoadData;
import jcuda.driver.CUjitInputType;
import jcuda.driver.JITOptions;
import jcuda.driver.CUlinkState;
import jcuda.driver.CUfunction;
public class JCudaTestJIT{
private CUmodule module;
private CUfunction functionKernel;
public void prepareModule(){
String ptxFileName4 = "file4.ptx";
String ptxFileName3 = "file3.ptx";
String ptxFileName2 = "file2.ptx";
String ptxFileName1 = "file1.ptx";
CUlinkState linkState = new CUlinkState();
JITOptions jitOptions = new JITOptions();
cuLinkCreate(jitOptions, linkState);
cuLinkAddFile(linkState, CUjitInputType.CU_JIT_INPUT_PTX, ptxFileName4, jitOptions);
cuLinkAddFile(linkState, CUjitInputType.CU_JIT_INPUT_PTX, ptxFileName3, jitOptions);
cuLinkAddFile(linkState, CUjitInputType.CU_JIT_INPUT_PTX, ptxFileName2, jitOptions);
cuLinkAddFile(linkState, CUjitInputType.CU_JIT_INPUT_PTX, ptxFileName1, jitOptions);
long sizeOut = 32768;
byte[] image = new byte[32768];
Pointer cubinOut = Pointer.to(image);
cuLinkComplete(linkState, cubinOut, (new long[]{sizeOut}));
module = new CUmodule();
// Load the module from the image buffer
cuModuleLoadData(module, cubinOut.getByteBuffer(0, 32768).array());
cuLinkDestroy(linkState);
functionKernel = new CUfunction();
cuModuleGetFunction(functionKernel, module, "kernel");
}
// Other methods
}
但是我在调用cuModuleLoadData
方法时得到了CUDA_ERROR_INVALID_IMAGE
的错误。调试时,我看到在调用 cuLinkComplete
方法并将 image 数组作为输出传递后,数组仍然没有变化并且清晰。我是否正确传递了输出参数?这是在 JCuda 中通过引用传递变量的方式吗?
直到 30 分钟前我才写过一行 Java 代码,更不用说以前使用过 JCUDA,但是我给你的原生 C++ 代码几乎是字面意思的逐行翻译 似乎工作得很好:
import static jcuda.driver.JCudaDriver.*;
import java.io.*;
import jcuda.*;
import jcuda.driver.*;
public class JCudaRuntimeTest
{
public static void main(String args[])
{
JCudaDriver.setExceptionsEnabled(true);
cuInit(0);
CUdevice device = new CUdevice();
cuDeviceGet(device, 0);
CUcontext context = new CUcontext();
cuCtxCreate(context, 0, device);
CUlinkState linkState = new CUlinkState();
JITOptions jitOptions = new JITOptions();
cuLinkCreate(jitOptions, linkState);
String ptxFileName2 = "test_function.ptx";
String ptxFileName1 = "test_kernel.ptx";
cuLinkAddFile(linkState, CUjitInputType.CU_JIT_INPUT_PTX, ptxFileName2, jitOptions);
cuLinkAddFile(linkState, CUjitInputType.CU_JIT_INPUT_PTX, ptxFileName1, jitOptions);
long sz[] = new long[1];
Pointer image = new Pointer();
cuLinkComplete(linkState, image, sz);
System.out.println("Pointer: " + image);
System.out.println("CUBIN size: " + sz[0]);
CUmodule module = new CUmodule();
cuModuleLoadDataEx(module, image, 0, new int[0], Pointer.to(new int[0]));
cuLinkDestroy(linkState);
CUfunction functionKernel = new CUfunction();
String kernelname = "_Z6kernelPfS_S_S_";
cuModuleGetFunction(functionKernel, module, kernelname);
System.out.println("Function: " + functionKernel);
}
}
它是这样工作的:
> nvcc -ptx -arch=sm_21 test_function.cu
test_function.cu
> nvcc -ptx -arch=sm_21 test_kernel.cu
test_kernel.cu
> javac -cp ".;jcuda-0.7.0a.jar" JCudaRuntimeTest.java
> java -cp ".;jcuda-0.7.0a.jar" JCudaRuntimeTest
Pointer: Pointer[nativePointer=0xa5a13a8,byteOffset=0]
CUBIN size: 5924
Function: CUfunction[nativePointer=0xa588160]
这里的关键似乎是使用 cuModuleLoadDataEx
, noting that the return values from cuLinkComplete
是指向链接的 CUBIN 的系统指针和作为 long[]
返回的图像大小。根据 C++ 代码,指针直接传递给模块数据加载。
作为最后的评论,如果你发布了一个可以直接被黑客入侵的适当的重现案例,而不是让我学习 JCUDA 的基础知识和 Java 之前,它会更简单,更容易我可以创建一个有用的重现案例并让它发挥作用。 JCUDA 的文档是基本的,但很完整,并且根据已经提供的工作 C++ 示例,只需阅读几分钟就可以了解如何执行此操作。
我在
import static jcuda.driver.JCudaDriver.cuLinkAddFile;
import static jcuda.driver.JCudaDriver.cuLinkComplete;
import static jcuda.driver.JCudaDriver.cuLinkCreate;
import static jcuda.driver.JCudaDriver.cuLinkDestroy;
import static jcuda.driver.JCudaDriver.cuModuleGetFunction;
import static jcuda.driver.JCudaDriver.cuModuleLoadData;
import jcuda.driver.CUjitInputType;
import jcuda.driver.JITOptions;
import jcuda.driver.CUlinkState;
import jcuda.driver.CUfunction;
public class JCudaTestJIT{
private CUmodule module;
private CUfunction functionKernel;
public void prepareModule(){
String ptxFileName4 = "file4.ptx";
String ptxFileName3 = "file3.ptx";
String ptxFileName2 = "file2.ptx";
String ptxFileName1 = "file1.ptx";
CUlinkState linkState = new CUlinkState();
JITOptions jitOptions = new JITOptions();
cuLinkCreate(jitOptions, linkState);
cuLinkAddFile(linkState, CUjitInputType.CU_JIT_INPUT_PTX, ptxFileName4, jitOptions);
cuLinkAddFile(linkState, CUjitInputType.CU_JIT_INPUT_PTX, ptxFileName3, jitOptions);
cuLinkAddFile(linkState, CUjitInputType.CU_JIT_INPUT_PTX, ptxFileName2, jitOptions);
cuLinkAddFile(linkState, CUjitInputType.CU_JIT_INPUT_PTX, ptxFileName1, jitOptions);
long sizeOut = 32768;
byte[] image = new byte[32768];
Pointer cubinOut = Pointer.to(image);
cuLinkComplete(linkState, cubinOut, (new long[]{sizeOut}));
module = new CUmodule();
// Load the module from the image buffer
cuModuleLoadData(module, cubinOut.getByteBuffer(0, 32768).array());
cuLinkDestroy(linkState);
functionKernel = new CUfunction();
cuModuleGetFunction(functionKernel, module, "kernel");
}
// Other methods
}
但是我在调用cuModuleLoadData
方法时得到了CUDA_ERROR_INVALID_IMAGE
的错误。调试时,我看到在调用 cuLinkComplete
方法并将 image 数组作为输出传递后,数组仍然没有变化并且清晰。我是否正确传递了输出参数?这是在 JCuda 中通过引用传递变量的方式吗?
直到 30 分钟前我才写过一行 Java 代码,更不用说以前使用过 JCUDA,但是我给你的原生 C++ 代码几乎是字面意思的逐行翻译
import static jcuda.driver.JCudaDriver.*;
import java.io.*;
import jcuda.*;
import jcuda.driver.*;
public class JCudaRuntimeTest
{
public static void main(String args[])
{
JCudaDriver.setExceptionsEnabled(true);
cuInit(0);
CUdevice device = new CUdevice();
cuDeviceGet(device, 0);
CUcontext context = new CUcontext();
cuCtxCreate(context, 0, device);
CUlinkState linkState = new CUlinkState();
JITOptions jitOptions = new JITOptions();
cuLinkCreate(jitOptions, linkState);
String ptxFileName2 = "test_function.ptx";
String ptxFileName1 = "test_kernel.ptx";
cuLinkAddFile(linkState, CUjitInputType.CU_JIT_INPUT_PTX, ptxFileName2, jitOptions);
cuLinkAddFile(linkState, CUjitInputType.CU_JIT_INPUT_PTX, ptxFileName1, jitOptions);
long sz[] = new long[1];
Pointer image = new Pointer();
cuLinkComplete(linkState, image, sz);
System.out.println("Pointer: " + image);
System.out.println("CUBIN size: " + sz[0]);
CUmodule module = new CUmodule();
cuModuleLoadDataEx(module, image, 0, new int[0], Pointer.to(new int[0]));
cuLinkDestroy(linkState);
CUfunction functionKernel = new CUfunction();
String kernelname = "_Z6kernelPfS_S_S_";
cuModuleGetFunction(functionKernel, module, kernelname);
System.out.println("Function: " + functionKernel);
}
}
它是这样工作的:
> nvcc -ptx -arch=sm_21 test_function.cu
test_function.cu
> nvcc -ptx -arch=sm_21 test_kernel.cu
test_kernel.cu
> javac -cp ".;jcuda-0.7.0a.jar" JCudaRuntimeTest.java
> java -cp ".;jcuda-0.7.0a.jar" JCudaRuntimeTest
Pointer: Pointer[nativePointer=0xa5a13a8,byteOffset=0]
CUBIN size: 5924
Function: CUfunction[nativePointer=0xa588160]
这里的关键似乎是使用 cuModuleLoadDataEx
, noting that the return values from cuLinkComplete
是指向链接的 CUBIN 的系统指针和作为 long[]
返回的图像大小。根据 C++ 代码,指针直接传递给模块数据加载。
作为最后的评论,如果你发布了一个可以直接被黑客入侵的适当的重现案例,而不是让我学习 JCUDA 的基础知识和 Java 之前,它会更简单,更容易我可以创建一个有用的重现案例并让它发挥作用。 JCUDA 的文档是基本的,但很完整,并且根据已经提供的工作 C++ 示例,只需阅读几分钟就可以了解如何执行此操作。