为什么我会在这里出现堆栈溢出?
Why Do I Get A Stack Overflow Here?
我正在使用 SSE
来实现矩阵乘法,当 运行 代码时我得到了 Stack Overflow Exception
。而 stack overflow exception
在 chktsk.asm
; Find next lower page and probe
cs20:
sub eax, _PAGESIZE_ ; decrease by PAGESIZE
test dword ptr [eax],eax ; probe page.
jmp short cs10
_chkstk endp
end
很难发现哪里错了,我的代码是:
main.cpp
#include "sse_matrix.h"
int main(int argc, char* argv[])
{
float left[size] = {0};
float right[size] = {0};
float result[size] = {0};
// initialize value
for (int i = 0; i < dim; i ++)
{
for (int j = 0; j < dim; j ++)
{
left[i*dim + j] = j;
right[i*dim + j] = j;
}
}
//calculate the result
SSE_Matrix_Multiply(left, right, result);
/*for (int i = 0; i < dim; i ++)
{
for (int j = 0; j < dim; j ++)
{
cout << result[i * dim + j] << " ";
}
cout << endl;
}*/
system("pause");
return 0;
}
包含文件:
#ifndef __SSE_MATRIX_H
#define __SSE_MATRIX_H
#include <iostream>
using std::cin;
using std::cout;
using std::endl;
#define dim 512
#define size dim * dim
struct Matrix_Info
{
float *A;
int ax, ay;
float *B;
int bx, by;
float *C;
int cx, cy;
int m;
int n;
};
void Transpose_Matrix_SSE(float * matrix)
{
__m128 row1 = _mm_loadu_ps(&matrix[0*4]);
__m128 row2 = _mm_loadu_ps(&matrix[1*4]);
__m128 row3 = _mm_loadu_ps(&matrix[2*4]);
__m128 row4 = _mm_loadu_ps(&matrix[3*4]);
_MM_TRANSPOSE4_PS(row1, row2, row3, row4);
_mm_storeu_ps(&matrix[0*4], row1);
_mm_storeu_ps(&matrix[1*4], row2);
_mm_storeu_ps(&matrix[2*4], row3);
_mm_storeu_ps(&matrix[3*4], row4);
}
float * Shuffle_Matrix_Multiply(float * left, float * right)
{
__m128 _t1, _t2, _sum;
_sum = _mm_setzero_ps(); // set all value of _sum to zero
float _result[size] = {0};
float _res[4] = {0};
for (int i = 0; i < 4; i ++)
{
for (int j = 0; j < 4; j ++)
{
_t1 = _mm_loadu_ps(left + i * 4);
_t2 = _mm_loadu_ps(right + j * 4);
_sum = _mm_mul_ps(_t1, _t2);
_mm_storeu_ps(_res, _sum);
_result[i * 4 + j] = _res[0] + _res[1] + _res[2] + _res[3];
}
}
return _result;
}
float * SSE_4_Matrix(struct Matrix_Info * my_info)
{
int m = my_info->m;
int n = my_info->n;
int ax = my_info->ax;
int ay = my_info->ay;
int bx = my_info->bx;
int by = my_info->by;
//1. split Matrix A and Matrix B
float * _a = new float[16];
float * _b = new float[16];
for (int i = 0; i < m; i ++)
{
for (int j = 0; j < m; j ++)
{
_a[i*m + j] = *(my_info->A + (i + ax) * n + j + ay);
_b[i*m + j] = *(my_info->B + (i + bx) * n + j + by);
}
}
//2. transpose Matrix B
Transpose_Matrix_SSE(_b);
//3. calculate result and return a float pointer
return Shuffle_Matrix_Multiply(_a, _b);
}
int Matrix_Multiply(struct Matrix_Info * my_info)
{
int m = my_info->m;
int n = my_info->n;
int cx = my_info->cx;
int cy = my_info->cy;
for (int i = 0; i < m; i ++)
{
for (int j = 0; j < m; j ++)
{
*(my_info->C + (i + cx) * n + j + cy) += SSE_4_Matrix(my_info)[i*m + j];
}
}
return 0;
}
void SSE_Matrix_Multiply(float *left, float *right, float *result)
{
struct Matrix_Info my_info;
my_info.A = left;
my_info.B = right;
my_info.C = result;
my_info.n = dim;
my_info.m = 4;
// Matrix A row:i, column:j
for (int i = 0; i < dim; i += 4)
{
for (int j = 0; j < dim; j += 4)
{
// Matrix B row:j column:k
for (int k = 0; k < dim; k += 4)
{
my_info.ax = i;
my_info.ay = j;
my_info.bx = j;
my_info.by = k;
my_info.cx = i;
my_info.cy = k;
Matrix_Multiply(&my_info);
}
}
}
}
#endif
当include文件中的dim
(矩阵的维数)为4, 8, 16, 32, 64, 128 and 256
时,不会出现Stack Overflow Exception
。但是,当 dim
为 512 或更大时,将出现 Stack Overflow Exception
。
我的OS是Windows 10
,IDE
是Visual Studio 2012
。
真正让我感到困惑的是,当我在main.cpp
中的一个断点在#include "sse_matrix.h"
,然后运行时,出现了Stack Overflow Exception
。我认为我的代码中没有语法错误或逻辑错误。但我不知道如何解决它。
在 #define dim 512
之后,您 #define
将 size
变为 dim * dim
,因此 size
为 262,144。然后你把 float _result[size]
放在堆栈上。大多数堆栈都小于 262,144 * sizeof(float)
.
正如@πìντα ῥεῖ 所说,您可能想要更像:
std::vector<float> _result(size, 0);
这样你的 262,144 * sizeof(float)
内存块就被放在堆上并为你管理(这比你自己分配和 de-allocating 内存要好得多 - 甚至比智能指针更好因为 std::vector
会为您适应尺寸的变化。
正如@Basile Starynkevitch 指出的那样,您不希望:
#define size dim * dim
因为 dim * dim
文本将简单地替换 size
并且很容易导致语法错误或更糟。
更好的是:
#define size (dim * dim)
更好的是:
constexpr size_t dim = 512;
constexpr size_t size = dim * dim;
我正在使用 SSE
来实现矩阵乘法,当 运行 代码时我得到了 Stack Overflow Exception
。而 stack overflow exception
在 chktsk.asm
; Find next lower page and probe
cs20:
sub eax, _PAGESIZE_ ; decrease by PAGESIZE
test dword ptr [eax],eax ; probe page.
jmp short cs10
_chkstk endp
end
很难发现哪里错了,我的代码是:
main.cpp
#include "sse_matrix.h"
int main(int argc, char* argv[])
{
float left[size] = {0};
float right[size] = {0};
float result[size] = {0};
// initialize value
for (int i = 0; i < dim; i ++)
{
for (int j = 0; j < dim; j ++)
{
left[i*dim + j] = j;
right[i*dim + j] = j;
}
}
//calculate the result
SSE_Matrix_Multiply(left, right, result);
/*for (int i = 0; i < dim; i ++)
{
for (int j = 0; j < dim; j ++)
{
cout << result[i * dim + j] << " ";
}
cout << endl;
}*/
system("pause");
return 0;
}
包含文件:
#ifndef __SSE_MATRIX_H
#define __SSE_MATRIX_H
#include <iostream>
using std::cin;
using std::cout;
using std::endl;
#define dim 512
#define size dim * dim
struct Matrix_Info
{
float *A;
int ax, ay;
float *B;
int bx, by;
float *C;
int cx, cy;
int m;
int n;
};
void Transpose_Matrix_SSE(float * matrix)
{
__m128 row1 = _mm_loadu_ps(&matrix[0*4]);
__m128 row2 = _mm_loadu_ps(&matrix[1*4]);
__m128 row3 = _mm_loadu_ps(&matrix[2*4]);
__m128 row4 = _mm_loadu_ps(&matrix[3*4]);
_MM_TRANSPOSE4_PS(row1, row2, row3, row4);
_mm_storeu_ps(&matrix[0*4], row1);
_mm_storeu_ps(&matrix[1*4], row2);
_mm_storeu_ps(&matrix[2*4], row3);
_mm_storeu_ps(&matrix[3*4], row4);
}
float * Shuffle_Matrix_Multiply(float * left, float * right)
{
__m128 _t1, _t2, _sum;
_sum = _mm_setzero_ps(); // set all value of _sum to zero
float _result[size] = {0};
float _res[4] = {0};
for (int i = 0; i < 4; i ++)
{
for (int j = 0; j < 4; j ++)
{
_t1 = _mm_loadu_ps(left + i * 4);
_t2 = _mm_loadu_ps(right + j * 4);
_sum = _mm_mul_ps(_t1, _t2);
_mm_storeu_ps(_res, _sum);
_result[i * 4 + j] = _res[0] + _res[1] + _res[2] + _res[3];
}
}
return _result;
}
float * SSE_4_Matrix(struct Matrix_Info * my_info)
{
int m = my_info->m;
int n = my_info->n;
int ax = my_info->ax;
int ay = my_info->ay;
int bx = my_info->bx;
int by = my_info->by;
//1. split Matrix A and Matrix B
float * _a = new float[16];
float * _b = new float[16];
for (int i = 0; i < m; i ++)
{
for (int j = 0; j < m; j ++)
{
_a[i*m + j] = *(my_info->A + (i + ax) * n + j + ay);
_b[i*m + j] = *(my_info->B + (i + bx) * n + j + by);
}
}
//2. transpose Matrix B
Transpose_Matrix_SSE(_b);
//3. calculate result and return a float pointer
return Shuffle_Matrix_Multiply(_a, _b);
}
int Matrix_Multiply(struct Matrix_Info * my_info)
{
int m = my_info->m;
int n = my_info->n;
int cx = my_info->cx;
int cy = my_info->cy;
for (int i = 0; i < m; i ++)
{
for (int j = 0; j < m; j ++)
{
*(my_info->C + (i + cx) * n + j + cy) += SSE_4_Matrix(my_info)[i*m + j];
}
}
return 0;
}
void SSE_Matrix_Multiply(float *left, float *right, float *result)
{
struct Matrix_Info my_info;
my_info.A = left;
my_info.B = right;
my_info.C = result;
my_info.n = dim;
my_info.m = 4;
// Matrix A row:i, column:j
for (int i = 0; i < dim; i += 4)
{
for (int j = 0; j < dim; j += 4)
{
// Matrix B row:j column:k
for (int k = 0; k < dim; k += 4)
{
my_info.ax = i;
my_info.ay = j;
my_info.bx = j;
my_info.by = k;
my_info.cx = i;
my_info.cy = k;
Matrix_Multiply(&my_info);
}
}
}
}
#endif
当include文件中的dim
(矩阵的维数)为4, 8, 16, 32, 64, 128 and 256
时,不会出现Stack Overflow Exception
。但是,当 dim
为 512 或更大时,将出现 Stack Overflow Exception
。
我的OS是Windows 10
,IDE
是Visual Studio 2012
。
真正让我感到困惑的是,当我在main.cpp
中的一个断点在#include "sse_matrix.h"
,然后运行时,出现了Stack Overflow Exception
。我认为我的代码中没有语法错误或逻辑错误。但我不知道如何解决它。
在 #define dim 512
之后,您 #define
将 size
变为 dim * dim
,因此 size
为 262,144。然后你把 float _result[size]
放在堆栈上。大多数堆栈都小于 262,144 * sizeof(float)
.
正如@πìντα ῥεῖ 所说,您可能想要更像:
std::vector<float> _result(size, 0);
这样你的 262,144 * sizeof(float)
内存块就被放在堆上并为你管理(这比你自己分配和 de-allocating 内存要好得多 - 甚至比智能指针更好因为 std::vector
会为您适应尺寸的变化。
正如@Basile Starynkevitch 指出的那样,您不希望:
#define size dim * dim
因为 dim * dim
文本将简单地替换 size
并且很容易导致语法错误或更糟。
更好的是:
#define size (dim * dim)
更好的是:
constexpr size_t dim = 512;
constexpr size_t size = dim * dim;