将 oclMat 发送到函数会在运行时产生巨大差异
sending oclMat to function creates huge difference in runtime
我写了一个有 3 个输入的函数(掩码):
- inputOCL - 一个 oclMat
- comparisonValue - 双精度值
- method - 确定比较方法的 int 变量
对于我的示例,我选择了 method=1,它代表 CMP_GT,测试 inputOCL>comparisonValue element-wise。
该函数的目的是将 inputOCL 中不符合给定 copmarison 的所有元素清零。
这是函数屏蔽:
void masking(cv::ocl::oclMat inputOCL, double comparisonValue, int method){
// NOTE: method can be set to 1-->5 corresponding to (==, >, >=, <, <=, !=)
cv::ocl::oclMat valueOCL(inputOCL.size(), inputOCL.type());
valueOCL.setTo(cv::Scalar(comparisonValue));
cv::ocl::oclMat logicalOCL;
cv::ocl::compare(inputOCL, valueOCL, logicalOCL, method);
logicalOCL.convertTo(logicalOCL, inputOCL.type());
cv::ocl::multiply(logicalOCL, inputOCL, inputOCL);
cv::ocl::multiply(1 / 255.0, inputOCL, inputOCL); }
当对函数进行计时时,我发现 运行 函数或 运行 直接计算 运行 以下代码时的运行时间差异很大:
int main(int argc, char** argv){
double value1 = 1.23456789012345;
double value2 = 1.23456789012344;
// initialize matrix
cv::Mat I(5000, 5000, CV_64F, cv::Scalar(value1));
// copy input to GPU
cv::ocl::oclMat inputOCL(I);
int method = 1;
static double start_TIMER;
// computation done in function
start_TIMER = cv::getTickCount();
masking(inputOCL, value2, method);
std::cout << "\nFunction runtime = " << ((double)(cv::getTickCount() - start_TIMER)) / cv::getTickFrequency() << " Seconds\n";
// direct computation
start_TIMER = cv::getTickCount();
cv::ocl::oclMat valueOCL(inputOCL.size(), inputOCL.type());
valueOCL.setTo(cv::Scalar(value2));
cv::ocl::oclMat logicalOCL;
cv::ocl::compare(inputOCL, valueOCL, logicalOCL, method);
logicalOCL.convertTo(logicalOCL, inputOCL.type());
cv::ocl::multiply(logicalOCL, inputOCL, inputOCL);
cv::ocl::multiply(1 / 255.0, inputOCL, inputOCL);
std::cout << "\nDirect runtime = " << ((double)(cv::getTickCount() - start_TIMER)) / cv::getTickFrequency() << " Seconds\n";
}
运行时可以在这张截图中看到:
为什么运行时差这么大?
我要感谢 asarsakov(提醒我注意销毁 oclMats 的问题)和 DarkZeros(注意我忘记销毁函数中的第二个临时 oclMat)。
但是,这并不能反映整个解决方案。似乎我能够获得相同 'direct' 和 'function' 结果的唯一方法是使用 cv::ocl::oclMat& 而不是 cv::ocl::oclMat.
发送 oclMats
请参阅以下代码(完整代码、函数和所有代码)以了解产生相同结果的最终解决方案。通过更改 main 开头的布尔变量,我们可以控制计算(直接或通过函数)以及控制定时器内 oclMat 的释放。
#include "opencv2/ocl/ocl.hpp"
#include <conio.h>
void masking(cv::ocl::oclMat &inputOCL, cv::ocl::oclMat &valueOCL, cv::ocl::oclMat &logicalOCL, double comparisonValue, int method){
// NOTE: the method input is 1-->5 corresponding to (==, >, >=, <, <=, !=)
valueOCL.setTo(cv::Scalar(comparisonValue));
cv::ocl::compare(inputOCL, valueOCL, logicalOCL, method);
logicalOCL.convertTo(logicalOCL, inputOCL.type());
cv::ocl::multiply(logicalOCL, inputOCL, inputOCL);
cv::ocl::multiply(1 / 255.0, inputOCL, inputOCL);
}
int main(int argc, char** argv){
bool direct = 1; // 1 for direct, 0 for function
bool release = 1; // 1 with releasing temporary oclMat, 0 without releasing them
// initialize data
int method = 1;
static double start_TIMER;
double value1 = 1.23456789012345;
double value2 = 1.23456789012344;
cv::Mat I(5000, 5000, CV_64F, cv::Scalar(value1));
if (direct){
// direct computation
cv::ocl::oclMat inputOCL1(I);
cv::ocl::oclMat valueOCL1(inputOCL1.size(), inputOCL1.type());
cv::ocl::oclMat logicalOCL1;
start_TIMER = cv::getTickCount();
valueOCL1.setTo(cv::Scalar(value2));
cv::ocl::compare(inputOCL1, valueOCL1, logicalOCL1, method);
logicalOCL1.convertTo(logicalOCL1, inputOCL1.type());
cv::ocl::multiply(logicalOCL1, inputOCL1, inputOCL1);
cv::ocl::multiply(1 / 255.0, inputOCL1, inputOCL1);
if (release){ valueOCL1.release(); logicalOCL1.release(); }
std::cout << "\nDirect runtime = " << ((double)(cv::getTickCount() - start_TIMER)) / cv::getTickFrequency() << " Seconds\n";
}
if (!direct){
// computation done in function
cv::ocl::oclMat inputOCL2(I);
cv::ocl::oclMat valueOCL2(inputOCL2.size(), inputOCL2.type());
cv::ocl::oclMat logicalOCL2;
start_TIMER = cv::getTickCount();
masking(inputOCL2, valueOCL2, logicalOCL2, value2, method);
if (release){ valueOCL2.release(); logicalOCL2.release(); }
std::cout << "\nFunction runtime = " << ((double)(cv::getTickCount() - start_TIMER)) / cv::getTickFrequency() << " Seconds\n";
}
printf("\nPress any key to exit...");
_getch();
return 0;
}
我写了一个有 3 个输入的函数(掩码):
- inputOCL - 一个 oclMat
- comparisonValue - 双精度值
- method - 确定比较方法的 int 变量
对于我的示例,我选择了 method=1,它代表 CMP_GT,测试 inputOCL>comparisonValue element-wise。
该函数的目的是将 inputOCL 中不符合给定 copmarison 的所有元素清零。
这是函数屏蔽:
void masking(cv::ocl::oclMat inputOCL, double comparisonValue, int method){
// NOTE: method can be set to 1-->5 corresponding to (==, >, >=, <, <=, !=)
cv::ocl::oclMat valueOCL(inputOCL.size(), inputOCL.type());
valueOCL.setTo(cv::Scalar(comparisonValue));
cv::ocl::oclMat logicalOCL;
cv::ocl::compare(inputOCL, valueOCL, logicalOCL, method);
logicalOCL.convertTo(logicalOCL, inputOCL.type());
cv::ocl::multiply(logicalOCL, inputOCL, inputOCL);
cv::ocl::multiply(1 / 255.0, inputOCL, inputOCL); }
当对函数进行计时时,我发现 运行 函数或 运行 直接计算 运行 以下代码时的运行时间差异很大:
int main(int argc, char** argv){
double value1 = 1.23456789012345;
double value2 = 1.23456789012344;
// initialize matrix
cv::Mat I(5000, 5000, CV_64F, cv::Scalar(value1));
// copy input to GPU
cv::ocl::oclMat inputOCL(I);
int method = 1;
static double start_TIMER;
// computation done in function
start_TIMER = cv::getTickCount();
masking(inputOCL, value2, method);
std::cout << "\nFunction runtime = " << ((double)(cv::getTickCount() - start_TIMER)) / cv::getTickFrequency() << " Seconds\n";
// direct computation
start_TIMER = cv::getTickCount();
cv::ocl::oclMat valueOCL(inputOCL.size(), inputOCL.type());
valueOCL.setTo(cv::Scalar(value2));
cv::ocl::oclMat logicalOCL;
cv::ocl::compare(inputOCL, valueOCL, logicalOCL, method);
logicalOCL.convertTo(logicalOCL, inputOCL.type());
cv::ocl::multiply(logicalOCL, inputOCL, inputOCL);
cv::ocl::multiply(1 / 255.0, inputOCL, inputOCL);
std::cout << "\nDirect runtime = " << ((double)(cv::getTickCount() - start_TIMER)) / cv::getTickFrequency() << " Seconds\n";
}
运行时可以在这张截图中看到:
为什么运行时差这么大?
我要感谢 asarsakov(提醒我注意销毁 oclMats 的问题)和 DarkZeros(注意我忘记销毁函数中的第二个临时 oclMat)。
但是,这并不能反映整个解决方案。似乎我能够获得相同 'direct' 和 'function' 结果的唯一方法是使用 cv::ocl::oclMat& 而不是 cv::ocl::oclMat.
发送 oclMats请参阅以下代码(完整代码、函数和所有代码)以了解产生相同结果的最终解决方案。通过更改 main 开头的布尔变量,我们可以控制计算(直接或通过函数)以及控制定时器内 oclMat 的释放。
#include "opencv2/ocl/ocl.hpp"
#include <conio.h>
void masking(cv::ocl::oclMat &inputOCL, cv::ocl::oclMat &valueOCL, cv::ocl::oclMat &logicalOCL, double comparisonValue, int method){
// NOTE: the method input is 1-->5 corresponding to (==, >, >=, <, <=, !=)
valueOCL.setTo(cv::Scalar(comparisonValue));
cv::ocl::compare(inputOCL, valueOCL, logicalOCL, method);
logicalOCL.convertTo(logicalOCL, inputOCL.type());
cv::ocl::multiply(logicalOCL, inputOCL, inputOCL);
cv::ocl::multiply(1 / 255.0, inputOCL, inputOCL);
}
int main(int argc, char** argv){
bool direct = 1; // 1 for direct, 0 for function
bool release = 1; // 1 with releasing temporary oclMat, 0 without releasing them
// initialize data
int method = 1;
static double start_TIMER;
double value1 = 1.23456789012345;
double value2 = 1.23456789012344;
cv::Mat I(5000, 5000, CV_64F, cv::Scalar(value1));
if (direct){
// direct computation
cv::ocl::oclMat inputOCL1(I);
cv::ocl::oclMat valueOCL1(inputOCL1.size(), inputOCL1.type());
cv::ocl::oclMat logicalOCL1;
start_TIMER = cv::getTickCount();
valueOCL1.setTo(cv::Scalar(value2));
cv::ocl::compare(inputOCL1, valueOCL1, logicalOCL1, method);
logicalOCL1.convertTo(logicalOCL1, inputOCL1.type());
cv::ocl::multiply(logicalOCL1, inputOCL1, inputOCL1);
cv::ocl::multiply(1 / 255.0, inputOCL1, inputOCL1);
if (release){ valueOCL1.release(); logicalOCL1.release(); }
std::cout << "\nDirect runtime = " << ((double)(cv::getTickCount() - start_TIMER)) / cv::getTickFrequency() << " Seconds\n";
}
if (!direct){
// computation done in function
cv::ocl::oclMat inputOCL2(I);
cv::ocl::oclMat valueOCL2(inputOCL2.size(), inputOCL2.type());
cv::ocl::oclMat logicalOCL2;
start_TIMER = cv::getTickCount();
masking(inputOCL2, valueOCL2, logicalOCL2, value2, method);
if (release){ valueOCL2.release(); logicalOCL2.release(); }
std::cout << "\nFunction runtime = " << ((double)(cv::getTickCount() - start_TIMER)) / cv::getTickFrequency() << " Seconds\n";
}
printf("\nPress any key to exit...");
_getch();
return 0;
}