将 Matrix::sparseMatrix 传递给 Rcpp
passing Matrix::sparseMatrix to Rcpp
所以我对将稀疏矩阵从 R 传递到 C++ 的推荐方法感到非常困惑。我的印象是 sp_mat 是正确的参数类型,如下面的代码
testCode = '
#include <RcppArmadillo.h>
// [[Rcpp::depends(RcppArmadillo)]]
// [[Rcpp::export]]
void testFun(arma::sp_mat F){
Rcpp::Rcout << "F has " << F.n_rows << " rows" << std::endl;
}'
Rcpp::sourceCpp(code = testCode)
n = 70000
M = Matrix::sparseMatrix(i=c(n), j=c(n), x=c(1))
testFun(M)
但是,运行 此代码会生成以下错误:
error: SpMat::init(): requested size is too large
Error in testFun(M) : SpMat::init(): requested size is too large
Calls: testFun -> .Call
Execution halted
我在 https://gallery.rcpp.org/articles/armadillo-sparse-matrix/ 中看到了示例,但我不确定它是否表示每次我们将稀疏矩阵传递给 c++ 时我们都应该使用那里提供的函数?感谢您的澄清!
好的,我想我找到了答案。如果元素总数大于存储元素数量的变量的大小,基本上犰狳会抛出此错误,如下所示:
https://gitlab.com/conradsnicta/armadillo-code/-/blob/9.900.x/include/armadillo_bits/SpMat_meat.hpp
之前有人意识到了这一点,并在这里提供了解决方案:
如果您重新访问基本 example in the Rcpp Gallery 并设置一个或两个稀疏矩阵 object,很明显 j
的高值会导致p
插槽(检查从 sparseMatrix
返回的 object)。
所以这是一个更简单的示例,它具有(仍然相当高)i
值但较低的 j
。我想你应该可以从这里拿走它:
代码
#include <RcppArmadillo.h>
// [[Rcpp::depends(RcppArmadillo)]]
using namespace Rcpp ;
// [[Rcpp::export]]
void convertSparse(S4 mat) {
// obtain dim, i, p. x from S4 object
IntegerVector dims = mat.slot("Dim");
arma::urowvec i = Rcpp::as<arma::urowvec>(mat.slot("i"));
arma::urowvec p = Rcpp::as<arma::urowvec>(mat.slot("p"));
arma::vec x = Rcpp::as<arma::vec>(mat.slot("x"));
int nrow = dims[0], ncol = dims[1];
// use Armadillo sparse matrix constructor
arma::sp_mat res(i, p, x, nrow, ncol);
Rcout << "SpMat res:\n" << res << std::endl;
}
// [[Rcpp::export]]
void convertSparse2(S4 mat) { // slight improvement with two non-nested loops
IntegerVector dims = mat.slot("Dim");
arma::urowvec i = Rcpp::as<arma::urowvec>(mat.slot("i"));
arma::urowvec p = Rcpp::as<arma::urowvec>(mat.slot("p"));
arma::vec x = Rcpp::as<arma::vec>(mat.slot("x"));
int nrow = dims[0], ncol = dims[1];
arma::sp_mat res(nrow, ncol);
// create space for values, and copy
arma::access::rw(res.values) = arma::memory::acquire_chunked<double>(x.size() + 1);
arma::arrayops::copy(arma::access::rwp(res.values), x.begin(), x.size() + 1);
// create space for row_indices, and copy
arma::access::rw(res.row_indices) = arma::memory::acquire_chunked<arma::uword>(i.size() + 1);
arma::arrayops::copy(arma::access::rwp(res.row_indices), i.begin(), i.size() + 1);
// create space for col_ptrs, and copy
arma::access::rw(res.col_ptrs) = arma::memory::acquire<arma::uword>(p.size() + 2);
arma::arrayops::copy(arma::access::rwp(res.col_ptrs), p.begin(), p.size() + 1);
// important: set the sentinel as well
arma::access::rwp(res.col_ptrs)[p.size()+1] = std::numeric_limits<arma::uword>::max();
// set the number of non-zero elements
arma::access::rw(res.n_nonzero) = x.size();
Rcout << "SpMat res:\n" << res << std::endl;
}
/*** R
suppressMessages({
library(methods)
library(Matrix)
})
i <- c(1,3:6)
j <- c(2,9,6:8)
x <- 5 * (1:5)
A <- sparseMatrix(i, j, x = x)
print(A)
convertSparse(A)
i <- 56789
j <- 87
x <- 42
B <- sparseMatrix(i, j, x=x)
#print(B)
convertSparse(B)
convertSparse2(B)
*/
输出
R> Rcpp::sourceCpp("~/git/Whosebug/60838958/answer.cpp")
R> suppressMessages({library(methods); library(Matrix)})
R> i <- c(1,3:6)
R> j <- c(2,9,6:8)
R> x <- 5 * (1:5)
R> A <- sparseMatrix(i, j, x = x)
R> print(A)
6 x 9 sparse Matrix of class "dgCMatrix"
[1,] . 5 . . . . . . .
[2,] . . . . . . . . .
[3,] . . . . . . . . 10
[4,] . . . . . 15 . . .
[5,] . . . . . . 20 . .
[6,] . . . . . . . 25 .
R> convertSparse(A)
SpMat res:
[matrix size: 6x9; n_nonzero: 5; density: 9.26%]
(0, 1) 5.0000
(3, 5) 15.0000
(4, 6) 20.0000
(5, 7) 25.0000
(2, 8) 10.0000
R> i <- 56789
R> j <- 87
R> x <- 42
R> B <- sparseMatrix(i, j, x=x)
R> #print(B)
R> convertSparse(B)
SpMat res:
[matrix size: 56789x87; n_nonzero: 1; density: 2.02e-05%]
(56788, 86) 42.0000
R> convertSparse2(B)
SpMat res:
[matrix size: 56789x87; n_nonzero: 1; density: 2.02e-05%]
(56788, 86) 42.0000
R>
编辑 确实不错,提醒一下。如果我们添加
#define ARMA_64BIT_WORD 1
在包含 RcppArmadillo.h
header 之前,然后 i
和 j
都很大,一切正常。下面输出的尾部。
更新的输出(只是尾端)
R> i <- 56789
R> j <- 87654
R> x <- 42
R> B <- sparseMatrix(i, j, x=x)
R> #print(B)
R> convertSparse(B)
SpMat res:
[matrix size: 56789x87654; n_nonzero: 1; density: 2.01e-08%]
(56788, 87653) 42.0000
R> convertSparse2(B)
SpMat res:
[matrix size: 56789x87654; n_nonzero: 1; density: 2.01e-08%]
(56788, 87653) 42.0000
R>
所以我对将稀疏矩阵从 R 传递到 C++ 的推荐方法感到非常困惑。我的印象是 sp_mat 是正确的参数类型,如下面的代码
testCode = '
#include <RcppArmadillo.h>
// [[Rcpp::depends(RcppArmadillo)]]
// [[Rcpp::export]]
void testFun(arma::sp_mat F){
Rcpp::Rcout << "F has " << F.n_rows << " rows" << std::endl;
}'
Rcpp::sourceCpp(code = testCode)
n = 70000
M = Matrix::sparseMatrix(i=c(n), j=c(n), x=c(1))
testFun(M)
但是,运行 此代码会生成以下错误:
error: SpMat::init(): requested size is too large
Error in testFun(M) : SpMat::init(): requested size is too large
Calls: testFun -> .Call
Execution halted
我在 https://gallery.rcpp.org/articles/armadillo-sparse-matrix/ 中看到了示例,但我不确定它是否表示每次我们将稀疏矩阵传递给 c++ 时我们都应该使用那里提供的函数?感谢您的澄清!
好的,我想我找到了答案。如果元素总数大于存储元素数量的变量的大小,基本上犰狳会抛出此错误,如下所示: https://gitlab.com/conradsnicta/armadillo-code/-/blob/9.900.x/include/armadillo_bits/SpMat_meat.hpp
之前有人意识到了这一点,并在这里提供了解决方案:
如果您重新访问基本 example in the Rcpp Gallery 并设置一个或两个稀疏矩阵 object,很明显 j
的高值会导致p
插槽(检查从 sparseMatrix
返回的 object)。
所以这是一个更简单的示例,它具有(仍然相当高)i
值但较低的 j
。我想你应该可以从这里拿走它:
代码
#include <RcppArmadillo.h>
// [[Rcpp::depends(RcppArmadillo)]]
using namespace Rcpp ;
// [[Rcpp::export]]
void convertSparse(S4 mat) {
// obtain dim, i, p. x from S4 object
IntegerVector dims = mat.slot("Dim");
arma::urowvec i = Rcpp::as<arma::urowvec>(mat.slot("i"));
arma::urowvec p = Rcpp::as<arma::urowvec>(mat.slot("p"));
arma::vec x = Rcpp::as<arma::vec>(mat.slot("x"));
int nrow = dims[0], ncol = dims[1];
// use Armadillo sparse matrix constructor
arma::sp_mat res(i, p, x, nrow, ncol);
Rcout << "SpMat res:\n" << res << std::endl;
}
// [[Rcpp::export]]
void convertSparse2(S4 mat) { // slight improvement with two non-nested loops
IntegerVector dims = mat.slot("Dim");
arma::urowvec i = Rcpp::as<arma::urowvec>(mat.slot("i"));
arma::urowvec p = Rcpp::as<arma::urowvec>(mat.slot("p"));
arma::vec x = Rcpp::as<arma::vec>(mat.slot("x"));
int nrow = dims[0], ncol = dims[1];
arma::sp_mat res(nrow, ncol);
// create space for values, and copy
arma::access::rw(res.values) = arma::memory::acquire_chunked<double>(x.size() + 1);
arma::arrayops::copy(arma::access::rwp(res.values), x.begin(), x.size() + 1);
// create space for row_indices, and copy
arma::access::rw(res.row_indices) = arma::memory::acquire_chunked<arma::uword>(i.size() + 1);
arma::arrayops::copy(arma::access::rwp(res.row_indices), i.begin(), i.size() + 1);
// create space for col_ptrs, and copy
arma::access::rw(res.col_ptrs) = arma::memory::acquire<arma::uword>(p.size() + 2);
arma::arrayops::copy(arma::access::rwp(res.col_ptrs), p.begin(), p.size() + 1);
// important: set the sentinel as well
arma::access::rwp(res.col_ptrs)[p.size()+1] = std::numeric_limits<arma::uword>::max();
// set the number of non-zero elements
arma::access::rw(res.n_nonzero) = x.size();
Rcout << "SpMat res:\n" << res << std::endl;
}
/*** R
suppressMessages({
library(methods)
library(Matrix)
})
i <- c(1,3:6)
j <- c(2,9,6:8)
x <- 5 * (1:5)
A <- sparseMatrix(i, j, x = x)
print(A)
convertSparse(A)
i <- 56789
j <- 87
x <- 42
B <- sparseMatrix(i, j, x=x)
#print(B)
convertSparse(B)
convertSparse2(B)
*/
输出
R> Rcpp::sourceCpp("~/git/Whosebug/60838958/answer.cpp")
R> suppressMessages({library(methods); library(Matrix)})
R> i <- c(1,3:6)
R> j <- c(2,9,6:8)
R> x <- 5 * (1:5)
R> A <- sparseMatrix(i, j, x = x)
R> print(A)
6 x 9 sparse Matrix of class "dgCMatrix"
[1,] . 5 . . . . . . .
[2,] . . . . . . . . .
[3,] . . . . . . . . 10
[4,] . . . . . 15 . . .
[5,] . . . . . . 20 . .
[6,] . . . . . . . 25 .
R> convertSparse(A)
SpMat res:
[matrix size: 6x9; n_nonzero: 5; density: 9.26%]
(0, 1) 5.0000
(3, 5) 15.0000
(4, 6) 20.0000
(5, 7) 25.0000
(2, 8) 10.0000
R> i <- 56789
R> j <- 87
R> x <- 42
R> B <- sparseMatrix(i, j, x=x)
R> #print(B)
R> convertSparse(B)
SpMat res:
[matrix size: 56789x87; n_nonzero: 1; density: 2.02e-05%]
(56788, 86) 42.0000
R> convertSparse2(B)
SpMat res:
[matrix size: 56789x87; n_nonzero: 1; density: 2.02e-05%]
(56788, 86) 42.0000
R>
编辑 确实不错,提醒一下。如果我们添加
#define ARMA_64BIT_WORD 1
在包含 RcppArmadillo.h
header 之前,然后 i
和 j
都很大,一切正常。下面输出的尾部。
更新的输出(只是尾端)
R> i <- 56789
R> j <- 87654
R> x <- 42
R> B <- sparseMatrix(i, j, x=x)
R> #print(B)
R> convertSparse(B)
SpMat res:
[matrix size: 56789x87654; n_nonzero: 1; density: 2.01e-08%]
(56788, 87653) 42.0000
R> convertSparse2(B)
SpMat res:
[matrix size: 56789x87654; n_nonzero: 1; density: 2.01e-08%]
(56788, 87653) 42.0000
R>