错误没有匹配函数调用 'slice_matrix' for Cross Correlation function+Sycl
Error no matching function for call to 'slice_matrix' for Cross Correlation function+Sycl
我尝试在 Sycl 和 OneAPI 中编写 2D 互相关。
这个想法是编写一种 Map 框架,它通过一些指定目标类型的参数(CPU 或 GPU/Accelerator)包装 OneAPI 调用来隐藏硬件定位问题。
这是我的地图Class:
//Definition of Map Skeleton
template<class Tin, class Tout, class Function>
class Map {
private:
Function fun;
public:
Map() {
}
Map(Function f) :
fun(f) {
}
//Overriding () operator
std::vector<std::vector<Tout>> operator()(bool use_tbb,
std::vector<std::vector<Tin>> &img,
std::vector<std::vector<Tin>> &ker) {
int img_row = img.size();
int img_col = img[0].size();
int filt_row = ker.size();
int filt_col = ker[0].size();
int out_row = img_row - filt_row;
int out_col = img_col - filt_col;
std::vector<std::vector<Tout>> out;
if (use_tbb) {
uTimer *timer = new uTimer("Executing Code On CPU");
tbb::parallel_for(
tbb::blocked_range2d<int, int>(0, out_row, 0, out_col),
[&](tbb::blocked_range2d<int, int> &t) {
for (int n = t.rows().begin(); n < t.rows().end();
++n) {
for (int m = t.cols().begin(); m < t.cols().end();
++m) {
out[n][m] = fun(
slice_matrix(img, n, m, filt_row,
filt_col), ker);
}
}
});
timer->~uTimer();
return out;
} else {
/*change 2D Matrices to the 1D linear arrays,
*
*and operate on them as contiguous blocks */
size_t M = img_row + img_col;
size_t N = filt_row + filt_col;
//size_t O = out_row + out_col;
size_t O_row = out_row;
size_t O_col = out_col;
std::vector<Tin> img_host;
std::vector<Tin> ker_host;
std::vector<Tout> out_gpu;
/* A 2D std::vector<std::vector<T>>
* does not have elements stored contiguously in the memory.
* Thus I define a vector<T> and operate on them as contiguous blocks.*/
//Define Buffer for
sycl::buffer<Tin, 1> img_buffer(img_host.data(), M);
sycl::buffer<Tin, 1> ker_buffer(ker_host.data(), N);
sycl::buffer<Tin, 2> out_buffer(out_gpu.data(), sycl::range<2> {
O_row, O_col });
//Profiling GPU
// Initialize property list with profiling information
sycl::property_list propList {
sycl::property::queue::enable_profiling() };
// Build the command queue (constructed to handle event profling)
sycl::queue gpuQueue = cl::sycl::queue(sycl::gpu_selector(),
propList);
// print out the device information used for the kernel code
std::cout << "Device: "
<< gpuQueue.get_device().get_info<sycl::info::device::name>()
<< std::endl;
std::cout << "Compute Units: "
<< gpuQueue.get_device().get_info<
sycl::info::device::max_compute_units>()
<< std::endl;
auto start_overall = std::chrono::system_clock::now();
auto event = gpuQueue.submit(
[&](sycl::handler &h) {
//local copy of fun
auto f = fun;
sycl::accessor img_accessor(img_buffer, h,
sycl::read_only);
sycl::accessor ker_accessor(ker_buffer, h,
sycl::read_only);
sycl::accessor out_accessor(out_buffer, h,
sycl::write_only);
h.parallel_for(sycl::range<2> { O_row, O_col },
[=](sycl::id<2> index) {
int row = index[0];
int col = index[1];
out_accessor[row][col] = f(
slice_matrix(img_accessor, O_row,
O_col, filt_row, filt_col),
ker_accessor);
});
});
event.wait();
auto end_overall = std::chrono::system_clock::now();
cl_ulong submit_time = event.template get_profiling_info<
cl::sycl::info::event_profiling::command_submit>();
cl_ulong start_time = event.template get_profiling_info<
cl::sycl::info::event_profiling::command_start>();
cl_ulong end_time = event.template get_profiling_info<
cl::sycl::info::event_profiling::command_end>();
auto submission_time = (start_time - submit_time) / 1000000.0f;
std::cout << "Submit Time: " << submission_time << " ms"
<< std::endl;
auto execution_time = (end_time - start_time) / 1000000.0f;
std::cout << "Execution Time: " << execution_time << " ms"
<< std::endl;
auto execution_overall = std::chrono::duration_cast<
std::chrono::milliseconds>(end_overall - start_overall);
std::cout << "Overall Execution Time: " << execution_overall.count()
<< " ms" << std::endl;
}
;
return out;
}
};
这是我的 slice_matrix:
//Function which Slice a specific part of my matricx
template<class T>
std::vector<std::vector<T>> slice_matrix(std::vector<std::vector<T>> mat, int i,
int j, int r, int c) {
std::vector<std::vector<T>> out(r, std::vector<T>(c, 0));
for (int k = 0; k < r; k++) {
std::vector<T> temp(mat[i + k].begin() + j, mat[i + k].begin() + j + c);
out[k] = temp;
}
return out;
}
;
问题是,在parallel-for里面的Sycl部分
out_accessor[row][col] = f(
slice_matrix(img_accessor, O_row,
O_col, filt_row, filt_col),
ker_accessor);
});
程序向我显示了一个错误:
no matching function for call to 'slice_matrix'
我试图将我的 slice_matrix 放入地图 Class 中,但没有任何改变。我也考虑过 Sycl 的限制
“本规范定义的 SYCL 设备代码不支持虚函数调用”,因此我定义了 slice_matrix 的本地副本,但我又遇到了错误。
我不明白如何解决这个错误。
您正在将 sycl::accessor 类型传递给 slice_matrix,但是 的签名slice_matrix 是:
//Function which Slice a specific part of my matricx
template<class T>
std::vector<std::vector<T>> slice_matrix(std::vector<std::vector<T>> mat, int i, int j, int r, int c)
所以签名不匹配...
您需要一个 slice_matrix 版本,它采用访问器对象而不是向量。
我尝试在 Sycl 和 OneAPI 中编写 2D 互相关。 这个想法是编写一种 Map 框架,它通过一些指定目标类型的参数(CPU 或 GPU/Accelerator)包装 OneAPI 调用来隐藏硬件定位问题。
这是我的地图Class:
//Definition of Map Skeleton
template<class Tin, class Tout, class Function>
class Map {
private:
Function fun;
public:
Map() {
}
Map(Function f) :
fun(f) {
}
//Overriding () operator
std::vector<std::vector<Tout>> operator()(bool use_tbb,
std::vector<std::vector<Tin>> &img,
std::vector<std::vector<Tin>> &ker) {
int img_row = img.size();
int img_col = img[0].size();
int filt_row = ker.size();
int filt_col = ker[0].size();
int out_row = img_row - filt_row;
int out_col = img_col - filt_col;
std::vector<std::vector<Tout>> out;
if (use_tbb) {
uTimer *timer = new uTimer("Executing Code On CPU");
tbb::parallel_for(
tbb::blocked_range2d<int, int>(0, out_row, 0, out_col),
[&](tbb::blocked_range2d<int, int> &t) {
for (int n = t.rows().begin(); n < t.rows().end();
++n) {
for (int m = t.cols().begin(); m < t.cols().end();
++m) {
out[n][m] = fun(
slice_matrix(img, n, m, filt_row,
filt_col), ker);
}
}
});
timer->~uTimer();
return out;
} else {
/*change 2D Matrices to the 1D linear arrays,
*
*and operate on them as contiguous blocks */
size_t M = img_row + img_col;
size_t N = filt_row + filt_col;
//size_t O = out_row + out_col;
size_t O_row = out_row;
size_t O_col = out_col;
std::vector<Tin> img_host;
std::vector<Tin> ker_host;
std::vector<Tout> out_gpu;
/* A 2D std::vector<std::vector<T>>
* does not have elements stored contiguously in the memory.
* Thus I define a vector<T> and operate on them as contiguous blocks.*/
//Define Buffer for
sycl::buffer<Tin, 1> img_buffer(img_host.data(), M);
sycl::buffer<Tin, 1> ker_buffer(ker_host.data(), N);
sycl::buffer<Tin, 2> out_buffer(out_gpu.data(), sycl::range<2> {
O_row, O_col });
//Profiling GPU
// Initialize property list with profiling information
sycl::property_list propList {
sycl::property::queue::enable_profiling() };
// Build the command queue (constructed to handle event profling)
sycl::queue gpuQueue = cl::sycl::queue(sycl::gpu_selector(),
propList);
// print out the device information used for the kernel code
std::cout << "Device: "
<< gpuQueue.get_device().get_info<sycl::info::device::name>()
<< std::endl;
std::cout << "Compute Units: "
<< gpuQueue.get_device().get_info<
sycl::info::device::max_compute_units>()
<< std::endl;
auto start_overall = std::chrono::system_clock::now();
auto event = gpuQueue.submit(
[&](sycl::handler &h) {
//local copy of fun
auto f = fun;
sycl::accessor img_accessor(img_buffer, h,
sycl::read_only);
sycl::accessor ker_accessor(ker_buffer, h,
sycl::read_only);
sycl::accessor out_accessor(out_buffer, h,
sycl::write_only);
h.parallel_for(sycl::range<2> { O_row, O_col },
[=](sycl::id<2> index) {
int row = index[0];
int col = index[1];
out_accessor[row][col] = f(
slice_matrix(img_accessor, O_row,
O_col, filt_row, filt_col),
ker_accessor);
});
});
event.wait();
auto end_overall = std::chrono::system_clock::now();
cl_ulong submit_time = event.template get_profiling_info<
cl::sycl::info::event_profiling::command_submit>();
cl_ulong start_time = event.template get_profiling_info<
cl::sycl::info::event_profiling::command_start>();
cl_ulong end_time = event.template get_profiling_info<
cl::sycl::info::event_profiling::command_end>();
auto submission_time = (start_time - submit_time) / 1000000.0f;
std::cout << "Submit Time: " << submission_time << " ms"
<< std::endl;
auto execution_time = (end_time - start_time) / 1000000.0f;
std::cout << "Execution Time: " << execution_time << " ms"
<< std::endl;
auto execution_overall = std::chrono::duration_cast<
std::chrono::milliseconds>(end_overall - start_overall);
std::cout << "Overall Execution Time: " << execution_overall.count()
<< " ms" << std::endl;
}
;
return out;
}
};
这是我的 slice_matrix:
//Function which Slice a specific part of my matricx
template<class T>
std::vector<std::vector<T>> slice_matrix(std::vector<std::vector<T>> mat, int i,
int j, int r, int c) {
std::vector<std::vector<T>> out(r, std::vector<T>(c, 0));
for (int k = 0; k < r; k++) {
std::vector<T> temp(mat[i + k].begin() + j, mat[i + k].begin() + j + c);
out[k] = temp;
}
return out;
}
;
问题是,在parallel-for里面的Sycl部分
out_accessor[row][col] = f(
slice_matrix(img_accessor, O_row,
O_col, filt_row, filt_col),
ker_accessor);
});
程序向我显示了一个错误:
no matching function for call to 'slice_matrix'
我试图将我的 slice_matrix 放入地图 Class 中,但没有任何改变。我也考虑过 Sycl 的限制 “本规范定义的 SYCL 设备代码不支持虚函数调用”,因此我定义了 slice_matrix 的本地副本,但我又遇到了错误。
我不明白如何解决这个错误。
您正在将 sycl::accessor 类型传递给 slice_matrix,但是 的签名slice_matrix 是:
//Function which Slice a specific part of my matricx
template<class T>
std::vector<std::vector<T>> slice_matrix(std::vector<std::vector<T>> mat, int i, int j, int r, int c)
所以签名不匹配...
您需要一个 slice_matrix 版本,它采用访问器对象而不是向量。