如何创建子矩阵
How can I create submatrices
我知道我可以从已经创建的矩阵中提取子矩阵,但我希望能够先创建子矩阵,然后组合创建的子矩阵以形成更大的矩阵以节省 space 和时间。例如,在我的示例中,我希望能够为具有 NAs (1-10) 的 ID 和不具有 NAs(11-20) 的 ID 创建一个子矩阵,然后将这两个矩阵组合在一起形成一个更大的矩阵,但我没有得到它, 如果有人可以建议我的代码中应该包含什么,我将对有 NA 和没有 NA 的情况进行相同的计算。
P.S:我也希望能够在将它们合并到一个奇异矩阵(20x20)之前分别保存这些子矩阵
dorm<-function(data)
{
library(Matrix)
n<-max(as.numeric(fam[,"ID"]))
t<-min(as.numeric(fam[,"ID"]))
A <- sparseMatrix(i = n, j=n, x=n)
while(t <=n) {
for( t in t:n ){
s <- max(fam[t,"dad"],fam[t,"mum"])
d <- min(fam[t,"dad"],fam[t,"mum"])
if( !is.na(s) ){
if( !is.na(d) ){
A[t,t] = 2-0.5^(fam[t,"GEN"]-1)+0.5^(fam[t,"GEN"])*A[fam[t,"dad"],fam[t,"mum"]]
tmp = 0.5 * (A[1:(t-1),s] + A[1:(t-1),d])
A[t, 1:(t-1)] = tmp
A[1:(t-1), t] = tmp
} else {
A[t,t] = 2-0.5^(fam[t,"GEN"]-1)
tmp = 0.5 * A[1:(t-1),s]
A[t, 1:(t-1)] = tmp
A[1:(t-1), t] = tmp
}
} else {
A[t,t] = 2-0.5^(fam[t,"GEN"]-1)
}
message(" MatbyGEN: ", t)
}
return(A)
}
}
fam <- structure(list(ID = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L,
11L, 12L, 13L, 14L, 18L, 15L, 16L, 17L, 20L, 19L), dad = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, 1L, 1L, 4L, 6L, 4L, 10L,
12L, 13L, 13L, 14L), mum = c(NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, 2L, 3L, 2L, 5L, 11L, 11L, 5L, 3L, 7L, 2L), GEN = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,
3L, 3L, 3L)), class = "data.frame", row.names = c(NA, -20L))
A <- dorm(fam)
这是一个rcpp解决方案。它在大型数据集上快了约 50 倍(1 秒对 50 秒):
#include <RcppArmadillo.h>
// [[Rcpp::depends(RcppArmadillo)]]
using namespace Rcpp;
using namespace arma;
// [[Rcpp::export]]
sp_mat rcpp_dorm_sp(IntegerVector ID, IntegerVector dad, IntegerVector mum, IntegerVector gen){
int n;
int s; int d;
double tmp;
sp_mat A(dad.size(), dad.size());
A.diag().ones();
n = max(ID);
for(int t = 0; t < n; t++){
s = std::max(dad[t], mum[t]);
d = std::min(dad[t], mum[t]);
A(t,t) = 2-pow(0.5, gen[t] - 1);
if ((s>0) & (d>0) ) {
A(t,t) += pow(0.5, gen[t])*A(dad[t]-1,mum[t]-1);
for(int j = 0; j < t; j++){
tmp = 0.5 * (A(j, dad[t]-1) + A(j, mum[t]-1));
if (tmp > 0){
A(t,j) = tmp;
A(j,t) = tmp;
}
}
} else if ((s>0) & (d==0)) {
for(int j = 0; j < t; j++){
tmp = 0.5 * A(j, s-1);
if (tmp > 0){
A(t,j) = tmp;
A(j,t) = tmp;
}
}
}
}
return(A);
}
以及 R
部分:
fam_mid <- structure(list(ID = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L,
11L, 12L, 13L, 14L, 18L, 15L, 16L, 17L, 20L, 19L),
dad = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1L, 1L, 4L, 6L, 4L, 10L,
12L, 13L, 13L, 14L),
mum = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 2L, 3L, 2L, 5L, 11L, 11L, 5L, 3L, 7L, 2L)
, GEN = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,
3L, 3L, 3L)), class = "data.frame", row.names = c(NA, -20L))
rcpp_dorm_sp(fam_cpp$ID, fam_cpp$dad, fam_cpp$mum, fam_cpp$GEN)
为了使 Cole 的书面函数变得稀疏,我不得不使用 A[t, vec]<- 0.5 * Matrix::rowSums(cbind(A[vec,fam[t,"dad"]],A[vec,fam[t,"mum"]]), na.rm=T)
来修复它
感谢到目前为止,我们无法创建子矩阵,但我们认为我们做得更好
我知道我可以从已经创建的矩阵中提取子矩阵,但我希望能够先创建子矩阵,然后组合创建的子矩阵以形成更大的矩阵以节省 space 和时间。例如,在我的示例中,我希望能够为具有 NAs (1-10) 的 ID 和不具有 NAs(11-20) 的 ID 创建一个子矩阵,然后将这两个矩阵组合在一起形成一个更大的矩阵,但我没有得到它, 如果有人可以建议我的代码中应该包含什么,我将对有 NA 和没有 NA 的情况进行相同的计算。
P.S:我也希望能够在将它们合并到一个奇异矩阵(20x20)之前分别保存这些子矩阵
dorm<-function(data)
{
library(Matrix)
n<-max(as.numeric(fam[,"ID"]))
t<-min(as.numeric(fam[,"ID"]))
A <- sparseMatrix(i = n, j=n, x=n)
while(t <=n) {
for( t in t:n ){
s <- max(fam[t,"dad"],fam[t,"mum"])
d <- min(fam[t,"dad"],fam[t,"mum"])
if( !is.na(s) ){
if( !is.na(d) ){
A[t,t] = 2-0.5^(fam[t,"GEN"]-1)+0.5^(fam[t,"GEN"])*A[fam[t,"dad"],fam[t,"mum"]]
tmp = 0.5 * (A[1:(t-1),s] + A[1:(t-1),d])
A[t, 1:(t-1)] = tmp
A[1:(t-1), t] = tmp
} else {
A[t,t] = 2-0.5^(fam[t,"GEN"]-1)
tmp = 0.5 * A[1:(t-1),s]
A[t, 1:(t-1)] = tmp
A[1:(t-1), t] = tmp
}
} else {
A[t,t] = 2-0.5^(fam[t,"GEN"]-1)
}
message(" MatbyGEN: ", t)
}
return(A)
}
}
fam <- structure(list(ID = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L,
11L, 12L, 13L, 14L, 18L, 15L, 16L, 17L, 20L, 19L), dad = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, 1L, 1L, 4L, 6L, 4L, 10L,
12L, 13L, 13L, 14L), mum = c(NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, 2L, 3L, 2L, 5L, 11L, 11L, 5L, 3L, 7L, 2L), GEN = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,
3L, 3L, 3L)), class = "data.frame", row.names = c(NA, -20L))
A <- dorm(fam)
这是一个rcpp解决方案。它在大型数据集上快了约 50 倍(1 秒对 50 秒):
#include <RcppArmadillo.h>
// [[Rcpp::depends(RcppArmadillo)]]
using namespace Rcpp;
using namespace arma;
// [[Rcpp::export]]
sp_mat rcpp_dorm_sp(IntegerVector ID, IntegerVector dad, IntegerVector mum, IntegerVector gen){
int n;
int s; int d;
double tmp;
sp_mat A(dad.size(), dad.size());
A.diag().ones();
n = max(ID);
for(int t = 0; t < n; t++){
s = std::max(dad[t], mum[t]);
d = std::min(dad[t], mum[t]);
A(t,t) = 2-pow(0.5, gen[t] - 1);
if ((s>0) & (d>0) ) {
A(t,t) += pow(0.5, gen[t])*A(dad[t]-1,mum[t]-1);
for(int j = 0; j < t; j++){
tmp = 0.5 * (A(j, dad[t]-1) + A(j, mum[t]-1));
if (tmp > 0){
A(t,j) = tmp;
A(j,t) = tmp;
}
}
} else if ((s>0) & (d==0)) {
for(int j = 0; j < t; j++){
tmp = 0.5 * A(j, s-1);
if (tmp > 0){
A(t,j) = tmp;
A(j,t) = tmp;
}
}
}
}
return(A);
}
以及 R
部分:
fam_mid <- structure(list(ID = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L,
11L, 12L, 13L, 14L, 18L, 15L, 16L, 17L, 20L, 19L),
dad = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1L, 1L, 4L, 6L, 4L, 10L,
12L, 13L, 13L, 14L),
mum = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 2L, 3L, 2L, 5L, 11L, 11L, 5L, 3L, 7L, 2L)
, GEN = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,
3L, 3L, 3L)), class = "data.frame", row.names = c(NA, -20L))
rcpp_dorm_sp(fam_cpp$ID, fam_cpp$dad, fam_cpp$mum, fam_cpp$GEN)
为了使 Cole 的书面函数变得稀疏,我不得不使用 A[t, vec]<- 0.5 * Matrix::rowSums(cbind(A[vec,fam[t,"dad"]],A[vec,fam[t,"mum"]]), na.rm=T)
感谢到目前为止,我们无法创建子矩阵,但我们认为我们做得更好