Eigen dense matrix * dense vector multiplication 应该比 GSL 慢 7 倍吗？

Question

的答案让我期望 Eigen 中的乘积（对于具有 1/4 非消失项的矩阵）稠密矩阵 * 稠密向量应该跑赢稀疏矩阵*密集向量。

我不仅看到相反的情况，而且两者的表现分别比 GSL 高出 7 倍和 4 倍。

我是不是用错了Eigen？我是在粗心地计时吗？我很吃惊。

我的编译选项是：

ludi@ludi-M17xR4:~/Desktop/tests$ g++ -o eigenfill.x eigenfill.cc -L/usr/local/lib -lgsl -lgslcblas && ./eigenfill.x

我的代码是：

#include <iostream>
#include <stdio.h>
#include <stdlib.h>     
#include <Eigen/Sparse>
#include <Eigen/Dense>
#include <gsl/gsl_matrix.h>
#include <sys/time.h>
#include <gsl/gsl_blas.h>
#define helix 100
#define rows helix*helix
#define cols rows
#define filling rows/4
#define REPS 10


using namespace Eigen;

/*-- DECLARATIONES --*/
int FillSparseMatrix(SparseMatrix<double> & mat);
int FillDenseMatrices(MatrixXd & Mat, gsl_matrix *testmat);
double vee(int i, int j);
int set_vectors_randomly(gsl_vector * v2, VectorXd v1);

int main()
{
int rep;
    struct timeval tval_before, tval_after, tval_result;

gsl_matrix *testmat     = gsl_matrix_calloc(rows, cols);
gsl_vector *v2      =gsl_vector_calloc(cols);
gsl_vector *prod    =gsl_vector_calloc(cols);

SparseMatrix<double> mat(rows,cols);         // default is column major
MatrixXd Mat(rows,cols);         // default is column major
VectorXd v1(cols), vv1(cols);

FillSparseMatrix(mat);
FillDenseMatrices(Mat, testmat);
    printf("\n/*--- --- --- ---*/\n");
for(rep=0;rep<REPS;rep++)
{
set_vectors_randomly(v2, v1);

    gettimeofday(&tval_before, NULL);       
vv1 = mat*v1;
    gettimeofday(&tval_after, NULL);
    timersub(&tval_after, &tval_before, &tval_result);

    printf("Time for one product, SPARSE EIGEN / secs: %ld.%06ld\n", (long int)tval_result.tv_sec, (long int)tval_result.tv_usec);
    gettimeofday(&tval_before, NULL);       
gsl_blas_dgemv( CblasNoTrans,1.0, testmat, v2, 0.0, prod);
    gettimeofday(&tval_after, NULL);
    timersub(&tval_after, &tval_before, &tval_result);
    printf("Time for one product, GSL / secs: %ld.%06ld\n", (long int)tval_result.tv_sec, (long int)tval_result.tv_usec);

    gettimeofday(&tval_before, NULL);       
vv1 = Mat*v1;
    gettimeofday(&tval_after, NULL);
    timersub(&tval_after, &tval_before, &tval_result);
    printf("Time for one product, DENSE EIGEN / secs: %ld.%06ld\n", (long int)tval_result.tv_sec, (long int)tval_result.tv_usec);
    printf("/*--- --- --- ---*/\n\n");


  //std::cout << mat << std::endl;
}
gsl_matrix_free(testmat);   
printf("--- --- --->DONE\n");
return(0);
}

/*-- --*/
int FillSparseMatrix(SparseMatrix<double> &mat)
{
int i, j;
Eigen::VectorXd Vres;
mat.reserve(Eigen::VectorXi::Constant(cols,filling));

printf("Filling Sparse Matrix ...");
    for(i=0;i<rows;i++)
    {
        if(i%2500==0){printf("i= %i\n", i);}
    for(j=0;j<cols;j++)
        {
        if (vee(i,j) != 0){mat.insert(i,j) = vee(i,j);    /*alternative: mat.coeffRef(i,j) += v_ij;*/ }
        }

    }

return(0);
}
/*-- --*/

/*-- --*/
int FillDenseMatrices(MatrixXd &Mat, gsl_matrix * testmat)
{
int i, j;
Eigen::VectorXd Vres;
double aux;
printf("Filling Dense Matrix ...");
    for(i=0;i<rows;i++)
    {
        if(i%2500==0){printf("i= %i\n", i);}
    for(j=0;j<cols;j++)
        {
        aux = vee(i,j);
        if (aux != 0)
        {
        Mat(i,j) = aux;    
        gsl_matrix_set(testmat, i, j, aux);
        }
        }

    }
return(0);
}
/*-- --*/

double vee(int i, int j)
{
    double result = 0.0;

    if(i%4 == 0){result =1.0;}

    return result;
}
/*-- --*/
int set_vectors_randomly(gsl_vector * v2, VectorXd v1){
printf("Setting vectors rendomly anew ...\n");
for (int j=0;j<cols;j++) 
{
double r=drand48();
v1(j) =r;
gsl_vector_set(v2, j, r);

}
return(0);
}
/*-- --*/

Answer 1

使用 Eigen，在没有编译器优化的情况下编译时性能很差。有几种方法可以显着提高性能：

启用优化（g++ 中的 -O2 或 -O3）性能可以提高两到三个数量级。
通过在包含 Eigen 库之前定义 NDEBUG 可以获得额外的（较小的）加速。这会禁用边界检查，因此在启用此功能之前请确保没有问题。
Eigen can also take advantage of vectorization（从 3.2.6 开始的 SSE 和从 3.3 开始的 AVX，以及 PowerPC 和 ARM）导致进一步加速。只需在编译器中启用相关标志即可。
Enabling OMP 也可以导致加速。同样，在您的编译器中启用相关标志，Eigen 将完成剩下的工作。

Answer 2

其实Spasematrix默认是columnmajor，不适合做vector的product。使用 Spasematrix，你会发现它更快。

Eigen dense matrix * dense vector multiplication 应该比 GSL 慢 7 倍吗？

Should Eigen dense matrix * dense vector multiplication be 7 times slower than GSL?

c++

eigen