重复调用 CwiseUnaryView - 最好复制到矩阵?
Repeated calls to CwiseUnaryView - better to copy to a matrix?
正如标题所说,我有一个自定义的 UnaryView 函数,它在不同的操作中被多次调用(主要是与其他矩阵相乘)。例如:
MatrixXd out = mat1.cview() * mat2;
MatrixXd out2 = mat1.cview() * mat3.transpose();
首先将自定义视图复制到一个单独的矩阵中并改用它会更快吗?例如:
MatrixXd mat1_dbl = mat1.cview();
MatrixXd out = mat1_dbl * mat2;
MatrixXd out2 = mat1_dbl * mat3.transpose();
基本上,重复使用 UnaryView 是否比复制到矩阵并改用它慢?
我应该自己做基准测试。 Google 基准测试表明先复制明显更快:
2019-04-09 20:55:55
Running ./CView
Run on (16 X 4053.06 MHz CPU s)
CPU Caches:
L1 Data 32K (x8)
L1 Instruction 64K (x8)
L2 Unified 512K (x8)
L3 Unified 8192K (x2)
--------------------------------------------------------
Benchmark Time CPU Iterations
--------------------------------------------------------
UnaryView_Repeat 147390919 ns 147385796 ns 5
UnaryView_Copy 139456051 ns 139451409 ns 5
测试:
#include <stan/math/prim/mat/fun/Eigen.hpp>
#include <stan/math/fwd/mat.hpp>
#include <stan/math/fwd/core.hpp>
#include <benchmark/benchmark.h>
static void UnaryView_Repeat(benchmark::State& state) {
using Eigen::MatrixXd;
using stan::math::matrix_fd;
matrix_fd m_fd1(1000, 1000);
m_fd1.val_() = MatrixXd::Random(1000, 1000);
m_fd1.d_() = MatrixXd::Random(1000, 1000);
MatrixXd m_d2 = MatrixXd::Random(1000, 1000);
for (auto _ : state) {
MatrixXd out(1000,1000);
out = m_fd1.val_() * m_d2
+ m_fd1.val_().transpose() * m_d2
+ m_fd1.val_().array().exp().matrix();
}
}
BENCHMARK(UnaryView_Repeat);
static void UnaryView_Copy(benchmark::State& state) {
using Eigen::MatrixXd;
using stan::math::matrix_fd;
matrix_fd m_fd1(1000, 1000);
m_fd1.val_() = MatrixXd::Random(1000, 1000);
m_fd1.d_() = MatrixXd::Random(1000, 1000);
MatrixXd m_d2 = MatrixXd::Random(1000, 1000);
for (auto _ : state) {
MatrixXd out(1000,1000);
MatrixXd m_fd1_val = m_fd1.val_();
out = m_fd1_val * m_d2 + m_fd1_val.transpose() * m_d2
+ m_fd1_val.array().exp().matrix();
}
}
BENCHMARK(UnaryView_Copy);
BENCHMARK_MAIN();
正如标题所说,我有一个自定义的 UnaryView 函数,它在不同的操作中被多次调用(主要是与其他矩阵相乘)。例如:
MatrixXd out = mat1.cview() * mat2;
MatrixXd out2 = mat1.cview() * mat3.transpose();
首先将自定义视图复制到一个单独的矩阵中并改用它会更快吗?例如:
MatrixXd mat1_dbl = mat1.cview();
MatrixXd out = mat1_dbl * mat2;
MatrixXd out2 = mat1_dbl * mat3.transpose();
基本上,重复使用 UnaryView 是否比复制到矩阵并改用它慢?
我应该自己做基准测试。 Google 基准测试表明先复制明显更快:
2019-04-09 20:55:55
Running ./CView
Run on (16 X 4053.06 MHz CPU s)
CPU Caches:
L1 Data 32K (x8)
L1 Instruction 64K (x8)
L2 Unified 512K (x8)
L3 Unified 8192K (x2)
--------------------------------------------------------
Benchmark Time CPU Iterations
--------------------------------------------------------
UnaryView_Repeat 147390919 ns 147385796 ns 5
UnaryView_Copy 139456051 ns 139451409 ns 5
测试:
#include <stan/math/prim/mat/fun/Eigen.hpp>
#include <stan/math/fwd/mat.hpp>
#include <stan/math/fwd/core.hpp>
#include <benchmark/benchmark.h>
static void UnaryView_Repeat(benchmark::State& state) {
using Eigen::MatrixXd;
using stan::math::matrix_fd;
matrix_fd m_fd1(1000, 1000);
m_fd1.val_() = MatrixXd::Random(1000, 1000);
m_fd1.d_() = MatrixXd::Random(1000, 1000);
MatrixXd m_d2 = MatrixXd::Random(1000, 1000);
for (auto _ : state) {
MatrixXd out(1000,1000);
out = m_fd1.val_() * m_d2
+ m_fd1.val_().transpose() * m_d2
+ m_fd1.val_().array().exp().matrix();
}
}
BENCHMARK(UnaryView_Repeat);
static void UnaryView_Copy(benchmark::State& state) {
using Eigen::MatrixXd;
using stan::math::matrix_fd;
matrix_fd m_fd1(1000, 1000);
m_fd1.val_() = MatrixXd::Random(1000, 1000);
m_fd1.d_() = MatrixXd::Random(1000, 1000);
MatrixXd m_d2 = MatrixXd::Random(1000, 1000);
for (auto _ : state) {
MatrixXd out(1000,1000);
MatrixXd m_fd1_val = m_fd1.val_();
out = m_fd1_val * m_d2 + m_fd1_val.transpose() * m_d2
+ m_fd1_val.array().exp().matrix();
}
}
BENCHMARK(UnaryView_Copy);
BENCHMARK_MAIN();