本征:优化越界矩阵块/条目周围的切片
Eigen: Optimizing Out of Bounds Matrix Blocks/ Slices Around Entry
作为个人项目的一部分,我想优化以下功能,使其尽快运行。任何性能提升都很重要,因为程序其余部分的性能取决于它。
目前,我正在使用 Eigen 的 block() 函数,但由于负数和越界索引不是有效参数,我正在使用一些额外的代码来仅 block() 必要的数据。
这是我要优化的功能;它尝试在给定的矩阵条目周围提取子矩阵。
目标是然后散列结果矩阵(使用 boost hash_combine 的变体)并在散列 table.
中查找它
要优化的函数
#include <iostream>
#include <Eigen/Dense>
using namespace Eigen;
/// Returns a subsection from a matrix given the center and radius of the section. Fills space outside base matrix bounds with 0.
/// \param baseMatrix: The matrix from which to take a section. Entires are integers, either 0 or 1.
/// \param centerRow: The row serving as the center of the subsection.
/// \param centerCol: The column serving as the center of the subsection.
/// \param radius: The radius of the subsection, including the center entry.
/// \return A matrix of the same type as the input matrix of size radius*2-1, representing the slice of the input matrix around the provided center coordinate.
Eigen::MatrixXi GetMatrixSection(const MatrixXi &baseMatrix, int centerRow, int centerCol, int radius)
{
//== Constraints ==
// baseMatrix, both dimensions can be between 1 and maxInt, but will generally be in range [3, 25] matrix is usually but not always square
// 0 <= centerRow <= baseMatrix.rows()-1
// 0 <= centerCol <= baseMatrix.cols()-1
// 1 <= radius <= max(baseMatrix.rows(), baseMatrix.cols())
// This specific implementation of the function allows for radius of any size > 0
// Create Base Matrix to fill
int nSize = radius * 2 - 1; // Size of resulting matrix
MatrixXi result = Eigen::MatrixXi().Constant(nSize, nSize, 0);
// Get indices of the top-left corner for the block operation
int lowerRowBound = centerRow - (radius-1);
int lowerColBound = centerCol - (radius-1);
// Get the top-left corner of baseMatrix
int upperLeftCopyableRow = std::max(0, lowerRowBound);
int upperLeftCopyableCol = std::max(0, lowerColBound);
// Determine how many rows we need to take from the baseMatrix
int numCopyableRows = std::min((int)baseMatrix.rows()-upperLeftCopyableRow, std::min(0, lowerRowBound)+nSize);
int numCopyableCols = std::min((int)baseMatrix.cols()-upperLeftCopyableCol, std::min(0, lowerColBound)+nSize);
if(numCopyableRows <= 0 || numCopyableCols <= 0) return result; // if it is impossible to copy anything from result, don't try
// Copy all data we can from the baseMatrix
MatrixXi copiedBlock = baseMatrix.block(upperLeftCopyableRow, upperLeftCopyableCol, numCopyableRows, numCopyableCols);
// Copy the data from baseMatrix into resulting matrix
result.block(upperLeftCopyableRow-lowerRowBound, upperLeftCopyableCol-lowerColBound,
(int)copiedBlock.rows(), (int)copiedBlock.cols()) = copiedBlock;
// Return resulting matrix
return result;
}
我正在使用以下代码来测试上述功能的效率。
函数的时间代码
int TestGetMatrixSection(MatrixXi matrixToTest, int trials=1)
{
volatile int result = 0;
for(int t = 0; t < trials; ++t) {
for (int i = 1; i <= std::max(matrixToTest.rows(), matrixToTest.cols()); ++i) {
for (int j = 0; j < matrixToTest.rows(); ++j) {
for (int k = 0; k < matrixToTest.cols(); ++k) {
// std::cout << GetMatrixSection(matrixToTest, j, k, i) << "/n/n"; // printout
result += GetMatrixSection(matrixToTest, j, k, i).cols();
}
}
}
}
return result;
}
int main()
{
MatrixXi m = Eigen::MatrixXi(4, 5);
m<< 1, 0, 0, 0, 1,
1, 1, 0, 0, 1,
1, 0, 1, 1, 0,
1, 0, 0, 1, 0;
auto startTime = std::chrono::steady_clock::now();
TestGetMatrixSection(m, 10000);
std::cout << std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now()-startTime).count() << " milliseconds\n";
return 0;
}
目前该函数可以正常工作,但我担心性能会成为一个问题,因为它会被调用数百万次。
预期输出
Example 3x3 matrix
[[1, 0, 1],
[1, 1, 0],
[0, 1, 0]]
row index 1, col index 1, radius 2
[[1, 0, 1],
[1, 1, 0],
[0, 1, 0]]
row index 0, col index 0, radius 2
[[0, 0, 0],
[0, 1, 0],
[0, 1, 1]]
row index 2, col index 2, radius 2
[[1, 0, 0],
[1, 0, 0],
[0, 0, 0]]
row index 0, col index 0, radius 3
[[0, 0, 0, 0, 0],
[0, 0, 0, 0, 0],
[0, 0, 1, 0, 1],
[0, 0, 1, 1, 0],
[0, 0, 0, 1, 0]]
假设 baseMatrix
的大小不变,并且您知道 radius
的合理上限 我建议将该矩阵存储到一个更大的矩阵中,该矩阵在每一侧都扩展了 0 个条目.实现这一点的一种可能性:
template<class S>
struct ExtendedMatrix{
typedef Eigen::Matrix<S,Eigen::Dynamic, Eigen::Dynamic> MatS;
typedef Eigen::Ref<MatS> Ref;
typedef Eigen::Ref<MatS const> RefC;
// read/write reference to baseMatrix:
Ref getBaseMatrix() {
return storage.block(maxR-1, maxR-1, storage.rows()-2*maxR+1, storage.cols()-2*maxR+1);
}
// read-only reference to baseMatrix:
RefC getBaseMatrix() const {
return storage.block(maxR-1, maxR-1, storage.rows()-2*maxR+1, storage.cols()-2*maxR+1);
}
// constructor. Takes an initial matrix and a maximum radius.
template<class Derived>
ExtendedMatrix(Eigen::MatrixBase<Derived> const& base, int maxRadius)
: maxR(maxRadius), storage(base.rows()+2*maxR-1, base.cols()+2*maxR-1)
{
storage.setZero();
getBaseMatrix() = base;
}
// method to get a (read-only) block of size 2radius-1 around a given coordinate
// entries outside the original matrix are filled with 0s.
RefC getMatrixSection(int centerRow, int centerCol, int radius) const
{
assert(radius <= maxR);
int offset = maxR-radius;
return storage.block(centerRow+offset, centerCol+offset, 2*radius-1, 2*radius-1);
}
protected:
int maxR;
MatS storage;
};
这样使用:
int TestGetMatrixSection(Eigen::MatrixXi const & matrixToTest, int trials=1)
{
int maxR = std::max(matrixToTest.rows(), matrixToTest.cols());
ExtendedMatrix<int> eMat(matrixToTest, maxR);
int result = 0;
for(int t = 0; t < trials; ++t) {
// eMat.getBaseMatrix().setRandom(); // or change individual entries ...
for (int i = 1; i <= std::max(matrixToTest.rows(), matrixToTest.cols()); ++i) {
for (int j = 0; j < matrixToTest.rows(); ++j) {
for (int k = 0; k < matrixToTest.cols(); ++k) {
// std::cout << GetMatrixSection(matrixToTest, j, k, i) << "/n/n"; // printout
result += eMat.getMatrixSection(j, k, i).cols();
}
}
}
}
return result;
}
作为个人项目的一部分,我想优化以下功能,使其尽快运行。任何性能提升都很重要,因为程序其余部分的性能取决于它。
目前,我正在使用 Eigen 的 block() 函数,但由于负数和越界索引不是有效参数,我正在使用一些额外的代码来仅 block() 必要的数据。
这是我要优化的功能;它尝试在给定的矩阵条目周围提取子矩阵。 目标是然后散列结果矩阵(使用 boost hash_combine 的变体)并在散列 table.
中查找它要优化的函数
#include <iostream>
#include <Eigen/Dense>
using namespace Eigen;
/// Returns a subsection from a matrix given the center and radius of the section. Fills space outside base matrix bounds with 0.
/// \param baseMatrix: The matrix from which to take a section. Entires are integers, either 0 or 1.
/// \param centerRow: The row serving as the center of the subsection.
/// \param centerCol: The column serving as the center of the subsection.
/// \param radius: The radius of the subsection, including the center entry.
/// \return A matrix of the same type as the input matrix of size radius*2-1, representing the slice of the input matrix around the provided center coordinate.
Eigen::MatrixXi GetMatrixSection(const MatrixXi &baseMatrix, int centerRow, int centerCol, int radius)
{
//== Constraints ==
// baseMatrix, both dimensions can be between 1 and maxInt, but will generally be in range [3, 25] matrix is usually but not always square
// 0 <= centerRow <= baseMatrix.rows()-1
// 0 <= centerCol <= baseMatrix.cols()-1
// 1 <= radius <= max(baseMatrix.rows(), baseMatrix.cols())
// This specific implementation of the function allows for radius of any size > 0
// Create Base Matrix to fill
int nSize = radius * 2 - 1; // Size of resulting matrix
MatrixXi result = Eigen::MatrixXi().Constant(nSize, nSize, 0);
// Get indices of the top-left corner for the block operation
int lowerRowBound = centerRow - (radius-1);
int lowerColBound = centerCol - (radius-1);
// Get the top-left corner of baseMatrix
int upperLeftCopyableRow = std::max(0, lowerRowBound);
int upperLeftCopyableCol = std::max(0, lowerColBound);
// Determine how many rows we need to take from the baseMatrix
int numCopyableRows = std::min((int)baseMatrix.rows()-upperLeftCopyableRow, std::min(0, lowerRowBound)+nSize);
int numCopyableCols = std::min((int)baseMatrix.cols()-upperLeftCopyableCol, std::min(0, lowerColBound)+nSize);
if(numCopyableRows <= 0 || numCopyableCols <= 0) return result; // if it is impossible to copy anything from result, don't try
// Copy all data we can from the baseMatrix
MatrixXi copiedBlock = baseMatrix.block(upperLeftCopyableRow, upperLeftCopyableCol, numCopyableRows, numCopyableCols);
// Copy the data from baseMatrix into resulting matrix
result.block(upperLeftCopyableRow-lowerRowBound, upperLeftCopyableCol-lowerColBound,
(int)copiedBlock.rows(), (int)copiedBlock.cols()) = copiedBlock;
// Return resulting matrix
return result;
}
我正在使用以下代码来测试上述功能的效率。
函数的时间代码
int TestGetMatrixSection(MatrixXi matrixToTest, int trials=1)
{
volatile int result = 0;
for(int t = 0; t < trials; ++t) {
for (int i = 1; i <= std::max(matrixToTest.rows(), matrixToTest.cols()); ++i) {
for (int j = 0; j < matrixToTest.rows(); ++j) {
for (int k = 0; k < matrixToTest.cols(); ++k) {
// std::cout << GetMatrixSection(matrixToTest, j, k, i) << "/n/n"; // printout
result += GetMatrixSection(matrixToTest, j, k, i).cols();
}
}
}
}
return result;
}
int main()
{
MatrixXi m = Eigen::MatrixXi(4, 5);
m<< 1, 0, 0, 0, 1,
1, 1, 0, 0, 1,
1, 0, 1, 1, 0,
1, 0, 0, 1, 0;
auto startTime = std::chrono::steady_clock::now();
TestGetMatrixSection(m, 10000);
std::cout << std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now()-startTime).count() << " milliseconds\n";
return 0;
}
目前该函数可以正常工作,但我担心性能会成为一个问题,因为它会被调用数百万次。
预期输出
Example 3x3 matrix
[[1, 0, 1],
[1, 1, 0],
[0, 1, 0]]
row index 1, col index 1, radius 2
[[1, 0, 1],
[1, 1, 0],
[0, 1, 0]]
row index 0, col index 0, radius 2
[[0, 0, 0],
[0, 1, 0],
[0, 1, 1]]
row index 2, col index 2, radius 2
[[1, 0, 0],
[1, 0, 0],
[0, 0, 0]]
row index 0, col index 0, radius 3
[[0, 0, 0, 0, 0],
[0, 0, 0, 0, 0],
[0, 0, 1, 0, 1],
[0, 0, 1, 1, 0],
[0, 0, 0, 1, 0]]
假设 baseMatrix
的大小不变,并且您知道 radius
的合理上限 我建议将该矩阵存储到一个更大的矩阵中,该矩阵在每一侧都扩展了 0 个条目.实现这一点的一种可能性:
template<class S>
struct ExtendedMatrix{
typedef Eigen::Matrix<S,Eigen::Dynamic, Eigen::Dynamic> MatS;
typedef Eigen::Ref<MatS> Ref;
typedef Eigen::Ref<MatS const> RefC;
// read/write reference to baseMatrix:
Ref getBaseMatrix() {
return storage.block(maxR-1, maxR-1, storage.rows()-2*maxR+1, storage.cols()-2*maxR+1);
}
// read-only reference to baseMatrix:
RefC getBaseMatrix() const {
return storage.block(maxR-1, maxR-1, storage.rows()-2*maxR+1, storage.cols()-2*maxR+1);
}
// constructor. Takes an initial matrix and a maximum radius.
template<class Derived>
ExtendedMatrix(Eigen::MatrixBase<Derived> const& base, int maxRadius)
: maxR(maxRadius), storage(base.rows()+2*maxR-1, base.cols()+2*maxR-1)
{
storage.setZero();
getBaseMatrix() = base;
}
// method to get a (read-only) block of size 2radius-1 around a given coordinate
// entries outside the original matrix are filled with 0s.
RefC getMatrixSection(int centerRow, int centerCol, int radius) const
{
assert(radius <= maxR);
int offset = maxR-radius;
return storage.block(centerRow+offset, centerCol+offset, 2*radius-1, 2*radius-1);
}
protected:
int maxR;
MatS storage;
};
这样使用:
int TestGetMatrixSection(Eigen::MatrixXi const & matrixToTest, int trials=1)
{
int maxR = std::max(matrixToTest.rows(), matrixToTest.cols());
ExtendedMatrix<int> eMat(matrixToTest, maxR);
int result = 0;
for(int t = 0; t < trials; ++t) {
// eMat.getBaseMatrix().setRandom(); // or change individual entries ...
for (int i = 1; i <= std::max(matrixToTest.rows(), matrixToTest.cols()); ++i) {
for (int j = 0; j < matrixToTest.rows(); ++j) {
for (int k = 0; k < matrixToTest.cols(); ++k) {
// std::cout << GetMatrixSection(matrixToTest, j, k, i) << "/n/n"; // printout
result += eMat.getMatrixSection(j, k, i).cols();
}
}
}
}
return result;
}