你如何使用cblas_dgemm做一个向量外积?
How do you use cblas_dgemm to do a vector outer product?
我正在尝试将列向量与行向量相乘。我可以使用 dgemm 吗?
也就是说D = A * B
其中 D 是矩阵,A 是列向量,B 是行向量。
我遵循了此处找到的文档 https://software.intel.com/en-us/node/520775。我似乎无法获得 cblas_dgemm
的正确参数
这是我的尝试。在我的例子中 m = nRows, n = nCols, k = 1
问题似乎出在lda、ldb 和ldc 上。我分别定义为nCols, k, nRows.
#include <stdio.h>
#include <time.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <mkl.h>
#define nCols 5
#define nRows 20
#define k 1
void PrintMatrix(double* pMatrix, const size_t nR, const size_t nC, const CBLAS_ORDER Order) {
unsigned int i, j;
if (Order == CblasRowMajor)
{
for (i = 0; i < nR; i++)
{
for (j = 0; j < nC; j++)
{
printf("%f \t ", pMatrix[i * nC + j]); // !!!
}
printf("\n"); // !!!
}
}
else
{
for (i = 0; i < nR; i++) {
for (j = 0; j < nC; j++) {
printf("%f \t ", pMatrix[i + j* nR ]); // !!!
}
printf("\n"); // !!!
}
}
printf("\n"); // !!!
}
int main(void) {
double A[] = { 8, 4, 7, 3, 5, 1, 1, 3, 2, 1, 2, 3, 2, 0, 1, 1 , 2, 3, 4, 1};
double B[] = { -1, 2, -1, 1, 2 };
double alpha = 1.0, beta = 0.0;
int i, lda, ldb, ldc;
double *C, *D;
D = (double*) malloc(nRows * nCols * sizeof(double));
C = (double*) malloc(nRows * nCols * sizeof(double));
for (i = 0; i < nRows*nCols; i++)
D[i] = 0.0;
for (i = 0; i < nRows*nCols; i++)
C[i] = 0.0;
lda = nCols;
ldb = k;
ldc = nRows;
cblas_dger(CblasRowMajor, nRows, nCols, alpha, A, 1, B, 1, C, nCols);
PrintMatrix(C, nRows, nCols,CblasRowMajor);
cblas_dgemm (CblasRowMajor, CblasNoTrans, CblasNoTrans, nRows, nCols, k, alpha, A, lda, B, ldb, beta, D, ldc);
PrintMatrix(D, nRows, nCols, CblasRowMajor);
free(D);
free(C);
return 0;
}
简短的回答是,是的,您可以使用 dgemm
进行 rank-1 更新。 dger
当然是建议的,因为希望能更好地优化这个操作。
至于使用 cblas_dgemm
。如您所知,领先维度的定义是:
lda: The size of the first dimension of matrix A
您尝试执行的操作是:
D(20x5) = A(20x1) * B(1x5)
您正在使用 CblasRowMajor
,因此前导维度是所有矩阵的列数(有关解释,请参见 )。含义:
lda = 1;
ldb = 5;
ldc = 5;
以下代码有效。我切换到 column-major 是因为在 Fortran BLAS 文档的上下文中更容易理解前导维度问题。很抱歉,我无法完全按照提出的方式解决您的问题
#include <stdio.h>
#include <time.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <mkl.h>
void PrintMatrix(double* pMatrix, const size_t nR, const size_t nC, const CBLAS_ORDER Order) {
unsigned int i, j;
if (Order == CblasRowMajor) {
for (i = 0; i < nR; i++) {
for (j = 0; j < nC; j++) {
printf("%f \t ", pMatrix[i * nC + j]); // !!!
}
printf("\n"); // !!!
}
} else {
for (i = 0; i < nR; i++) {
for (j = 0; j < nC; j++) {
printf("%f \t ", pMatrix[i + j* nR ]); // !!!
}
printf("\n"); // !!!
}
}
printf("\n"); // !!!
}
int main(void)
{
const int m = 20;
const int n = 5;
const int k = 1;
double A[] = { 8, 4, 7, 3, 5, 1, 1, 3, 2, 1, 2, 3, 2, 0, 1, 1, 2, 3, 4, 1};
double B[] = { -1, 2, -1, 1, 2 };
double alpha = 1.0, beta = 0.0;
double * C = (double*) malloc(m * n * sizeof(double));
double * D = (double*) malloc(m * n * sizeof(double));
for (int i = 0; i < m*n; i++) C[i] = 0.0;
for (int i = 0; i < m*n; i++) D[i] = 0.0;
int lda = 20;
int ldb = 1;
int ldc = 20;
cblas_dger(CblasColMajor, m, n, alpha, A, 1, B, 1, C, ldc);
cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans,
m, n, k,
alpha, A, lda,
B, ldb, beta,
D, ldc);
PrintMatrix(C, m, n, CblasRowMajor);
PrintMatrix(D, m, n, CblasRowMajor);
free(D);
free(C);
return 0;
}
我正在尝试将列向量与行向量相乘。我可以使用 dgemm 吗?
也就是说D = A * B 其中 D 是矩阵,A 是列向量,B 是行向量。
我遵循了此处找到的文档 https://software.intel.com/en-us/node/520775。我似乎无法获得 cblas_dgemm
的正确参数这是我的尝试。在我的例子中 m = nRows, n = nCols, k = 1
问题似乎出在lda、ldb 和ldc 上。我分别定义为nCols, k, nRows.
#include <stdio.h>
#include <time.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <mkl.h>
#define nCols 5
#define nRows 20
#define k 1
void PrintMatrix(double* pMatrix, const size_t nR, const size_t nC, const CBLAS_ORDER Order) {
unsigned int i, j;
if (Order == CblasRowMajor)
{
for (i = 0; i < nR; i++)
{
for (j = 0; j < nC; j++)
{
printf("%f \t ", pMatrix[i * nC + j]); // !!!
}
printf("\n"); // !!!
}
}
else
{
for (i = 0; i < nR; i++) {
for (j = 0; j < nC; j++) {
printf("%f \t ", pMatrix[i + j* nR ]); // !!!
}
printf("\n"); // !!!
}
}
printf("\n"); // !!!
}
int main(void) {
double A[] = { 8, 4, 7, 3, 5, 1, 1, 3, 2, 1, 2, 3, 2, 0, 1, 1 , 2, 3, 4, 1};
double B[] = { -1, 2, -1, 1, 2 };
double alpha = 1.0, beta = 0.0;
int i, lda, ldb, ldc;
double *C, *D;
D = (double*) malloc(nRows * nCols * sizeof(double));
C = (double*) malloc(nRows * nCols * sizeof(double));
for (i = 0; i < nRows*nCols; i++)
D[i] = 0.0;
for (i = 0; i < nRows*nCols; i++)
C[i] = 0.0;
lda = nCols;
ldb = k;
ldc = nRows;
cblas_dger(CblasRowMajor, nRows, nCols, alpha, A, 1, B, 1, C, nCols);
PrintMatrix(C, nRows, nCols,CblasRowMajor);
cblas_dgemm (CblasRowMajor, CblasNoTrans, CblasNoTrans, nRows, nCols, k, alpha, A, lda, B, ldb, beta, D, ldc);
PrintMatrix(D, nRows, nCols, CblasRowMajor);
free(D);
free(C);
return 0;
}
简短的回答是,是的,您可以使用 dgemm
进行 rank-1 更新。 dger
当然是建议的,因为希望能更好地优化这个操作。
至于使用 cblas_dgemm
。如您所知,领先维度的定义是:
lda: The size of the first dimension of matrix A
您尝试执行的操作是: D(20x5) = A(20x1) * B(1x5)
您正在使用 CblasRowMajor
,因此前导维度是所有矩阵的列数(有关解释,请参见
lda = 1;
ldb = 5;
ldc = 5;
以下代码有效。我切换到 column-major 是因为在 Fortran BLAS 文档的上下文中更容易理解前导维度问题。很抱歉,我无法完全按照提出的方式解决您的问题
#include <stdio.h>
#include <time.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <mkl.h>
void PrintMatrix(double* pMatrix, const size_t nR, const size_t nC, const CBLAS_ORDER Order) {
unsigned int i, j;
if (Order == CblasRowMajor) {
for (i = 0; i < nR; i++) {
for (j = 0; j < nC; j++) {
printf("%f \t ", pMatrix[i * nC + j]); // !!!
}
printf("\n"); // !!!
}
} else {
for (i = 0; i < nR; i++) {
for (j = 0; j < nC; j++) {
printf("%f \t ", pMatrix[i + j* nR ]); // !!!
}
printf("\n"); // !!!
}
}
printf("\n"); // !!!
}
int main(void)
{
const int m = 20;
const int n = 5;
const int k = 1;
double A[] = { 8, 4, 7, 3, 5, 1, 1, 3, 2, 1, 2, 3, 2, 0, 1, 1, 2, 3, 4, 1};
double B[] = { -1, 2, -1, 1, 2 };
double alpha = 1.0, beta = 0.0;
double * C = (double*) malloc(m * n * sizeof(double));
double * D = (double*) malloc(m * n * sizeof(double));
for (int i = 0; i < m*n; i++) C[i] = 0.0;
for (int i = 0; i < m*n; i++) D[i] = 0.0;
int lda = 20;
int ldb = 1;
int ldc = 20;
cblas_dger(CblasColMajor, m, n, alpha, A, 1, B, 1, C, ldc);
cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans,
m, n, k,
alpha, A, lda,
B, ldb, beta,
D, ldc);
PrintMatrix(C, m, n, CblasRowMajor);
PrintMatrix(D, m, n, CblasRowMajor);
free(D);
free(C);
return 0;
}