clang+llvm 为 JIT 编译函数提供了错误的函数指针
clang+llvm provides a bad function pointers for JIT compiled functions
我正在尝试使用 clang+llvm 3.6 来 JIT 编译几个 C 函数(每个最终都可能非常大)。
不幸的是我LLVM提供的函数指针使程序SEGFAULT。
到目前为止我有以下代码:
#include <iostream>
#include <clang/CodeGen/CodeGenAction.h>
#include <clang/Basic/DiagnosticOptions.h>
#include <clang/Basic/TargetInfo.h>
#include <clang/Basic/SourceManager.h>
#include <clang/Frontend/CompilerInstance.h>
#include <clang/Frontend/CompilerInvocation.h>
#include <clang/Frontend/FrontendDiagnostic.h>
#include <clang/Frontend/TextDiagnosticPrinter.h>
#include <clang/Frontend/Utils.h>
#include <clang/Parse/ParseAST.h>
#include <clang/Lex/Preprocessor.h>
#include <llvm/Analysis/Passes.h>
#include <llvm/ExecutionEngine/SectionMemoryManager.h>
#include <llvm/ExecutionEngine/MCJIT.h>
#include <llvm/ExecutionEngine/ExecutionEngine.h>
#include <llvm/IR/Verifier.h>
#include <llvm/IR/Module.h>
#include <llvm/IR/LLVMContext.h>
#include <llvm/IR/LegacyPassManager.h>
#include <llvm/Bitcode/ReaderWriter.h>
#include <llvm/Support/ManagedStatic.h>
#include <llvm/Support/MemoryBuffer.h>
#include <llvm/Support/TargetSelect.h>
#include <llvm/Support/raw_os_ostream.h>
#include <llvm/Linker/Linker.h>
int main(int argc, char *argv[]) {
using namespace llvm;
using namespace clang;
static const char* clangArgv [] = {"program", "-x", "c", "string-input"};
static const int clangArgc = sizeof (clangArgv) / sizeof (clangArgv[0]);
// C functions to be compiled (they could eventually be extremely large)
std::map<std::string, std::string> func2Source;
func2Source["getOne"] = "int getOne() {return 1;}";
func2Source["getTwo"] = "int getTwo() {return 2;}";
llvm::InitializeAllTargets();
llvm::InitializeAllAsmPrinters();
std::unique_ptr<llvm::Linker> linker;
std::unique_ptr<llvm::LLVMContext> context(new llvm::LLVMContext());
std::unique_ptr<llvm::Module> module;
/**
* add each C function to the same module
*/
for (const auto& p : func2Source) {
const std::string& source = p.second;
IntrusiveRefCntPtr<DiagnosticOptions> diagOpts = new DiagnosticOptions();
TextDiagnosticPrinter *diagClient = new TextDiagnosticPrinter(llvm::errs(), &*diagOpts); // will be owned by diags
IntrusiveRefCntPtr<DiagnosticIDs> diagID(new DiagnosticIDs());
IntrusiveRefCntPtr<DiagnosticsEngine> diags(new DiagnosticsEngine(diagID, &*diagOpts, diagClient));
ArrayRef<const char *> args(clangArgv + 1, // skip program name
clangArgc - 1);
std::unique_ptr<CompilerInvocation> invocation(createInvocationFromCommandLine(args, diags));
if (invocation.get() == nullptr) {
std::cerr << "Failed to create compiler invocation" << std::endl;
exit(1);
}
CompilerInvocation::setLangDefaults(*invocation->getLangOpts(), IK_C,
LangStandard::lang_unspecified);
invocation->getFrontendOpts().DisableFree = false; // make sure we free memory (by default it does not)
// Create a compiler instance to handle the actual work.
CompilerInstance compiler;
compiler.setInvocation(invocation.release());
// Create the compilers actual diagnostics engine.
compiler.createDiagnostics(); //compiler.createDiagnostics(argc, const_cast<char**> (argv));
if (!compiler.hasDiagnostics()) {
std::cerr << "No diagnostics" << std::endl;
exit(1);
}
// Create memory buffer with source text
std::unique_ptr<llvm::MemoryBuffer> buffer = llvm::MemoryBuffer::getMemBufferCopy(source, "SIMPLE_BUFFER");
if (buffer.get() == nullptr) {
std::cerr << "Failed to create memory buffer" << std::endl;
exit(1);
}
// Remap auxiliary name "string-input" to memory buffer
PreprocessorOptions& po = compiler.getInvocation().getPreprocessorOpts();
po.addRemappedFile("string-input", buffer.release());
// Create and execute the frontend to generate an LLVM bitcode module.
clang::EmitLLVMOnlyAction action(context.get());
if (!compiler.ExecuteAction(action)) {
std::cerr << "Failed to emit LLVM bitcode" << std::endl;
exit(1);
}
std::unique_ptr<llvm::Module> module1 = action.takeModule();
if (module1.get() == nullptr) {
std::cerr << "No module" << std::endl;
exit(1);
}
if (linker.get() == nullptr) {
module.reset(module1.release());
linker.reset(new llvm::Linker(module.get()));
} else {
if (linker->linkInModule(module1.release())) {
std::cerr << "LLVM failed to link module" << std::endl;
exit(1);
}
}
}
llvm::InitializeNativeTarget();
llvm::Module* m = module.get();
std::string errStr;
std::unique_ptr<llvm::ExecutionEngine> executionEngine(EngineBuilder(std::move(module))
.setErrorStr(&errStr)
.setEngineKind(EngineKind::JIT)
.setMCJITMemoryManager(std::unique_ptr<SectionMemoryManager>(new SectionMemoryManager()))
.setVerifyModules(true)
.create());
if (!executionEngine.get()) {
std::cerr << "Could not create ExecutionEngine: " + errStr << std::endl;
exit(1);
}
executionEngine->finalizeObject();
/**
* Lets try to use each function
*/
for (const auto& p : func2Source) {
const std::string& funcName = p.first;
llvm::Function* func = m->getFunction(funcName);
if (func == nullptr) {
std::cerr << "Unable to find function '" << funcName << "' in LLVM module" << std::endl;
exit(1);
}
// Validate the generated code, checking for consistency.
llvm::raw_os_ostream os(std::cerr);
bool failed = llvm::verifyFunction(*func, &os);
if (failed) {
std::cerr << "Failed to verify function '" << funcName << "' in LLVM module" << std::endl;
exit(1);
}
#if 1
func->dump(); // Dump the function for exposition purposes.
// JIT the function, returning a function pointer.
void *fPtr = executionEngine->getPointerToFunction(func); ///// BAD function pointer!!!!
// Cast it to the right type (takes no arguments, returns a double) so we
// can call it as a native function.
int (*funcPtr)();
*(int **) (&funcPtr) = *(int **) fPtr;
int v = (*funcPtr)();
std::cout << "return: " << v << std::endl;
#else // THIS DOES NOT WORK EITHER:
// JIT the function, returning a function pointer.
uint64_t fPtr = executionEngine->getFunctionAddress(funcName); ///// BAD function pointer!!!!
if (fPtr == 0) {
std::cerr << "Unable to find function '" << funcName << "' in LLVM module" << std::endl;
exit(1);
}
int (*funcPtr)();
*(int **) (&funcPtr) = *(int **) fPtr;
int v = (*funcPtr)();
std::cout << "return: " << v << std::endl;
#endif
}
}
谁能帮我查明问题所在?
(我是 运行 这个 linux-ubuntu 15.04)
这个作业一团糟:
*(int **) (&funcPtr) = *(int **) fPtr;
编写 int*
然后在下一行将其用作函数指针不仅违反了严格别名,而且数据指针通常不够大以容纳整个代码指针。
安全的方法是
memcpy(funcPtr, fPtr, sizeof funcPtr);
或
funcPtr = reinterpret_cast<decltype(funcPtr)>(fPtr);
我正在尝试使用 clang+llvm 3.6 来 JIT 编译几个 C 函数(每个最终都可能非常大)。
不幸的是我LLVM提供的函数指针使程序SEGFAULT。
到目前为止我有以下代码:
#include <iostream>
#include <clang/CodeGen/CodeGenAction.h>
#include <clang/Basic/DiagnosticOptions.h>
#include <clang/Basic/TargetInfo.h>
#include <clang/Basic/SourceManager.h>
#include <clang/Frontend/CompilerInstance.h>
#include <clang/Frontend/CompilerInvocation.h>
#include <clang/Frontend/FrontendDiagnostic.h>
#include <clang/Frontend/TextDiagnosticPrinter.h>
#include <clang/Frontend/Utils.h>
#include <clang/Parse/ParseAST.h>
#include <clang/Lex/Preprocessor.h>
#include <llvm/Analysis/Passes.h>
#include <llvm/ExecutionEngine/SectionMemoryManager.h>
#include <llvm/ExecutionEngine/MCJIT.h>
#include <llvm/ExecutionEngine/ExecutionEngine.h>
#include <llvm/IR/Verifier.h>
#include <llvm/IR/Module.h>
#include <llvm/IR/LLVMContext.h>
#include <llvm/IR/LegacyPassManager.h>
#include <llvm/Bitcode/ReaderWriter.h>
#include <llvm/Support/ManagedStatic.h>
#include <llvm/Support/MemoryBuffer.h>
#include <llvm/Support/TargetSelect.h>
#include <llvm/Support/raw_os_ostream.h>
#include <llvm/Linker/Linker.h>
int main(int argc, char *argv[]) {
using namespace llvm;
using namespace clang;
static const char* clangArgv [] = {"program", "-x", "c", "string-input"};
static const int clangArgc = sizeof (clangArgv) / sizeof (clangArgv[0]);
// C functions to be compiled (they could eventually be extremely large)
std::map<std::string, std::string> func2Source;
func2Source["getOne"] = "int getOne() {return 1;}";
func2Source["getTwo"] = "int getTwo() {return 2;}";
llvm::InitializeAllTargets();
llvm::InitializeAllAsmPrinters();
std::unique_ptr<llvm::Linker> linker;
std::unique_ptr<llvm::LLVMContext> context(new llvm::LLVMContext());
std::unique_ptr<llvm::Module> module;
/**
* add each C function to the same module
*/
for (const auto& p : func2Source) {
const std::string& source = p.second;
IntrusiveRefCntPtr<DiagnosticOptions> diagOpts = new DiagnosticOptions();
TextDiagnosticPrinter *diagClient = new TextDiagnosticPrinter(llvm::errs(), &*diagOpts); // will be owned by diags
IntrusiveRefCntPtr<DiagnosticIDs> diagID(new DiagnosticIDs());
IntrusiveRefCntPtr<DiagnosticsEngine> diags(new DiagnosticsEngine(diagID, &*diagOpts, diagClient));
ArrayRef<const char *> args(clangArgv + 1, // skip program name
clangArgc - 1);
std::unique_ptr<CompilerInvocation> invocation(createInvocationFromCommandLine(args, diags));
if (invocation.get() == nullptr) {
std::cerr << "Failed to create compiler invocation" << std::endl;
exit(1);
}
CompilerInvocation::setLangDefaults(*invocation->getLangOpts(), IK_C,
LangStandard::lang_unspecified);
invocation->getFrontendOpts().DisableFree = false; // make sure we free memory (by default it does not)
// Create a compiler instance to handle the actual work.
CompilerInstance compiler;
compiler.setInvocation(invocation.release());
// Create the compilers actual diagnostics engine.
compiler.createDiagnostics(); //compiler.createDiagnostics(argc, const_cast<char**> (argv));
if (!compiler.hasDiagnostics()) {
std::cerr << "No diagnostics" << std::endl;
exit(1);
}
// Create memory buffer with source text
std::unique_ptr<llvm::MemoryBuffer> buffer = llvm::MemoryBuffer::getMemBufferCopy(source, "SIMPLE_BUFFER");
if (buffer.get() == nullptr) {
std::cerr << "Failed to create memory buffer" << std::endl;
exit(1);
}
// Remap auxiliary name "string-input" to memory buffer
PreprocessorOptions& po = compiler.getInvocation().getPreprocessorOpts();
po.addRemappedFile("string-input", buffer.release());
// Create and execute the frontend to generate an LLVM bitcode module.
clang::EmitLLVMOnlyAction action(context.get());
if (!compiler.ExecuteAction(action)) {
std::cerr << "Failed to emit LLVM bitcode" << std::endl;
exit(1);
}
std::unique_ptr<llvm::Module> module1 = action.takeModule();
if (module1.get() == nullptr) {
std::cerr << "No module" << std::endl;
exit(1);
}
if (linker.get() == nullptr) {
module.reset(module1.release());
linker.reset(new llvm::Linker(module.get()));
} else {
if (linker->linkInModule(module1.release())) {
std::cerr << "LLVM failed to link module" << std::endl;
exit(1);
}
}
}
llvm::InitializeNativeTarget();
llvm::Module* m = module.get();
std::string errStr;
std::unique_ptr<llvm::ExecutionEngine> executionEngine(EngineBuilder(std::move(module))
.setErrorStr(&errStr)
.setEngineKind(EngineKind::JIT)
.setMCJITMemoryManager(std::unique_ptr<SectionMemoryManager>(new SectionMemoryManager()))
.setVerifyModules(true)
.create());
if (!executionEngine.get()) {
std::cerr << "Could not create ExecutionEngine: " + errStr << std::endl;
exit(1);
}
executionEngine->finalizeObject();
/**
* Lets try to use each function
*/
for (const auto& p : func2Source) {
const std::string& funcName = p.first;
llvm::Function* func = m->getFunction(funcName);
if (func == nullptr) {
std::cerr << "Unable to find function '" << funcName << "' in LLVM module" << std::endl;
exit(1);
}
// Validate the generated code, checking for consistency.
llvm::raw_os_ostream os(std::cerr);
bool failed = llvm::verifyFunction(*func, &os);
if (failed) {
std::cerr << "Failed to verify function '" << funcName << "' in LLVM module" << std::endl;
exit(1);
}
#if 1
func->dump(); // Dump the function for exposition purposes.
// JIT the function, returning a function pointer.
void *fPtr = executionEngine->getPointerToFunction(func); ///// BAD function pointer!!!!
// Cast it to the right type (takes no arguments, returns a double) so we
// can call it as a native function.
int (*funcPtr)();
*(int **) (&funcPtr) = *(int **) fPtr;
int v = (*funcPtr)();
std::cout << "return: " << v << std::endl;
#else // THIS DOES NOT WORK EITHER:
// JIT the function, returning a function pointer.
uint64_t fPtr = executionEngine->getFunctionAddress(funcName); ///// BAD function pointer!!!!
if (fPtr == 0) {
std::cerr << "Unable to find function '" << funcName << "' in LLVM module" << std::endl;
exit(1);
}
int (*funcPtr)();
*(int **) (&funcPtr) = *(int **) fPtr;
int v = (*funcPtr)();
std::cout << "return: " << v << std::endl;
#endif
}
}
谁能帮我查明问题所在?
(我是 运行 这个 linux-ubuntu 15.04)
这个作业一团糟:
*(int **) (&funcPtr) = *(int **) fPtr;
编写 int*
然后在下一行将其用作函数指针不仅违反了严格别名,而且数据指针通常不够大以容纳整个代码指针。
安全的方法是
memcpy(funcPtr, fPtr, sizeof funcPtr);
或
funcPtr = reinterpret_cast<decltype(funcPtr)>(fPtr);