是一个 Fortran 子例程,带有指定大小的虚拟参数线程安全数组
is a Fortran subroutine with a dummy argument specified size array thread safe
以下代码在 gfortran 中编译,带有关于 large_array
大于堆栈变量限制的警告,指出数组将被移动到静态内存,因此不是线程安全的:
subroutine stack_size_warning
implicit none
real :: large_array(65536)
print *, large_array
end subroutine stack_size_warning
但是这个子例程编译时没有错误或警告,我可以用大于 65536 的 n 值调用它而不会出现问题,至少在简单的情况下是这样。
subroutine no_warning(n)
implicit none
integer :: n
real :: automatic_array(n)
print *, automatic_array
end subroutine no_warning
第二个数组线程安全吗?在第二个子例程中为 automatic_array
分配的内存在哪里?是否在每次调用时分配和释放内存使其比在堆栈上或预分配数组作为虚拟参数传入时更慢?
我编写了以下程序来测试 3 个场景,一个子例程在堆栈上有一个小数组,另一个子例程有一个超过堆栈限制的大数组,因此存储在静态内存中,第三个是一个伪参数指定例程中定义的数组的大小。
这是那个程序:
program main
implicit none
call small
call large
call automatic(65536)
end program main
subroutine small
implicit none
real :: small_array(10)
small_array=1.
print *, small_array
end subroutine small
subroutine large
implicit none
real :: large_array(65536)
large_array=1.
print *, large_array
end subroutine large
subroutine automatic(n)
implicit none
integer :: n
real :: automatic_array(n)
automatic_array=1.
print *, automatic_array
end subroutine automatic
使用 steve
的建议,我用树转储编译如下:
gfortran array_dim_test.f90 -o array_dim_test -fdump-tree-original
完整转储在最后,但总结一下我所看到的,automatic
子例程有一个 try/finally 块。在 try 块中,调用 malloc 分配内存,在 finally 块中,释放内存。所以我猜这个内存是在每次调用子程序时在堆上分配和释放的。这在直觉上是有意义的,因为程序如何知道如何处理这个仅存在于子例程中的数组,其大小在对子例程的调用中定义,但有趣的是在树转储中看到显式调用。这看起来是 thread-safe,但如果使用相同的数组大小参数多次调用此例程,则可能也不是最有效的事情,每次调用都分配和释放内存。
这里是树转储:
__attribute__((fn spec (". w ")))
void automatic (integer(kind=4) & restrict n)
{
void * restrict D.3964;
integer(kind=8) ubound.0;
integer(kind=8) size.1;
real(kind=4)[0:D.3961] * restrict automatic_array;
integer(kind=8) D.3961;
bitsizetype D.3962;
sizetype D.3963;
try
{
ubound.0 = (integer(kind=8)) *n;
size.1 = NON_LVALUE_EXPR <ubound.0>;
size.1 = MAX_EXPR <size.1, 0>;
D.3961 = size.1 + -1;
D.3962 = (bitsizetype) (sizetype) NON_LVALUE_EXPR <size.1> * 32;
D.3963 = (sizetype) NON_LVALUE_EXPR <size.1> * 4;
D.3964 = (void * restrict) __builtin_malloc (MAX_EXPR <(unsigned long) (size.1 * 4), 1>);
automatic_array = (real(kind=4)[0:D.3961] * restrict) D.3964;
{
integer(kind=8) D.3940;
D.3940 = ubound.0;
{
integer(kind=8) S.2;
S.2 = 1;
while (1)
{
if (S.2 > D.3940) goto L.1;
(*automatic_array)[S.2 + -1] = 1.0e+0;
S.2 = S.2 + 1;
}
L.1:;
}
}
{
struct __st_parameter_dt dt_parm.3;
dt_parm.3.common.filename = &"array_dim_test.f90"[1]{lb: 1 sz: 1};
dt_parm.3.common.line = 27;
dt_parm.3.common.flags = 128;
dt_parm.3.common.unit = 6;
_gfortran_st_write (&dt_parm.3);
{
integer(kind=8) D.3944;
struct array01_real(kind=4) parm.4;
D.3944 = ubound.0;
parm.4.span = 4;
parm.4.dtype = {.elem_len=4, .rank=1, .type=3};
parm.4.dim[0].lbound = 1;
parm.4.dim[0].ubound = D.3944;
parm.4.dim[0].stride = 1;
parm.4.data = (void *) &(*automatic_array)[0];
parm.4.offset = -1;
_gfortran_transfer_array_write (&dt_parm.3, &parm.4, 4, 0);
}
_gfortran_st_write_done (&dt_parm.3);
}
}
finally
{
__builtin_free ((void *) automatic_array);
}
}
__attribute__((fn spec (". ")))
void large ()
{
static real(kind=4) large_array[65536];
{
integer(kind=8) S.5;
S.5 = 1;
while (1)
{
if (S.5 > 65536) goto L.2;
large_array[S.5 + -1] = 1.0e+0;
S.5 = S.5 + 1;
}
L.2:;
}
{
struct __st_parameter_dt dt_parm.6;
dt_parm.6.common.filename = &"array_dim_test.f90"[1]{lb: 1 sz: 1};
dt_parm.6.common.line = 19;
dt_parm.6.common.flags = 128;
dt_parm.6.common.unit = 6;
_gfortran_st_write (&dt_parm.6);
{
struct array01_real(kind=4) parm.7;
parm.7.span = 4;
parm.7.dtype = {.elem_len=4, .rank=1, .type=3};
parm.7.dim[0].lbound = 1;
parm.7.dim[0].ubound = 65536;
parm.7.dim[0].stride = 1;
parm.7.data = (void *) &large_array[0];
parm.7.offset = -1;
_gfortran_transfer_array_write (&dt_parm.6, &parm.7, 4, 0);
}
_gfortran_st_write_done (&dt_parm.6);
}
}
__attribute__((fn spec (". ")))
void small ()
{
real(kind=4) small_array[10];
{
integer(kind=8) S.8;
S.8 = 1;
while (1)
{
if (S.8 > 10) goto L.3;
small_array[S.8 + -1] = 1.0e+0;
S.8 = S.8 + 1;
}
L.3:;
}
{
struct __st_parameter_dt dt_parm.9;
dt_parm.9.common.filename = &"array_dim_test.f90"[1]{lb: 1 sz: 1};
dt_parm.9.common.line = 12;
dt_parm.9.common.flags = 128;
dt_parm.9.common.unit = 6;
_gfortran_st_write (&dt_parm.9);
{
struct array01_real(kind=4) parm.10;
parm.10.span = 4;
parm.10.dtype = {.elem_len=4, .rank=1, .type=3};
parm.10.dim[0].lbound = 1;
parm.10.dim[0].ubound = 10;
parm.10.dim[0].stride = 1;
parm.10.data = (void *) &small_array[0];
parm.10.offset = -1;
_gfortran_transfer_array_write (&dt_parm.9, &parm.10, 4, 0);
}
_gfortran_st_write_done (&dt_parm.9);
}
}
__attribute__((fn spec (". ")))
void MAIN__ ()
{
small ();
large ();
{
static integer(kind=4) C.3993 = 65536;
automatic (&C.3993);
}
}
__attribute__((externally_visible))
integer(kind=4) main (integer(kind=4) argc, character(kind=1) * * argv)
{
static integer(kind=4) options.11[7] = {2116, 4095, 0, 1, 1, 0, 31};
_gfortran_set_args (argc, argv);
_gfortran_set_options (7, &options.11[0]);
MAIN__ ();
return 0;
}
以下代码在 gfortran 中编译,带有关于 large_array
大于堆栈变量限制的警告,指出数组将被移动到静态内存,因此不是线程安全的:
subroutine stack_size_warning
implicit none
real :: large_array(65536)
print *, large_array
end subroutine stack_size_warning
但是这个子例程编译时没有错误或警告,我可以用大于 65536 的 n 值调用它而不会出现问题,至少在简单的情况下是这样。
subroutine no_warning(n)
implicit none
integer :: n
real :: automatic_array(n)
print *, automatic_array
end subroutine no_warning
第二个数组线程安全吗?在第二个子例程中为 automatic_array
分配的内存在哪里?是否在每次调用时分配和释放内存使其比在堆栈上或预分配数组作为虚拟参数传入时更慢?
我编写了以下程序来测试 3 个场景,一个子例程在堆栈上有一个小数组,另一个子例程有一个超过堆栈限制的大数组,因此存储在静态内存中,第三个是一个伪参数指定例程中定义的数组的大小。
这是那个程序:
program main
implicit none
call small
call large
call automatic(65536)
end program main
subroutine small
implicit none
real :: small_array(10)
small_array=1.
print *, small_array
end subroutine small
subroutine large
implicit none
real :: large_array(65536)
large_array=1.
print *, large_array
end subroutine large
subroutine automatic(n)
implicit none
integer :: n
real :: automatic_array(n)
automatic_array=1.
print *, automatic_array
end subroutine automatic
使用 steve
的建议,我用树转储编译如下:
gfortran array_dim_test.f90 -o array_dim_test -fdump-tree-original
完整转储在最后,但总结一下我所看到的,automatic
子例程有一个 try/finally 块。在 try 块中,调用 malloc 分配内存,在 finally 块中,释放内存。所以我猜这个内存是在每次调用子程序时在堆上分配和释放的。这在直觉上是有意义的,因为程序如何知道如何处理这个仅存在于子例程中的数组,其大小在对子例程的调用中定义,但有趣的是在树转储中看到显式调用。这看起来是 thread-safe,但如果使用相同的数组大小参数多次调用此例程,则可能也不是最有效的事情,每次调用都分配和释放内存。
这里是树转储:
__attribute__((fn spec (". w ")))
void automatic (integer(kind=4) & restrict n)
{
void * restrict D.3964;
integer(kind=8) ubound.0;
integer(kind=8) size.1;
real(kind=4)[0:D.3961] * restrict automatic_array;
integer(kind=8) D.3961;
bitsizetype D.3962;
sizetype D.3963;
try
{
ubound.0 = (integer(kind=8)) *n;
size.1 = NON_LVALUE_EXPR <ubound.0>;
size.1 = MAX_EXPR <size.1, 0>;
D.3961 = size.1 + -1;
D.3962 = (bitsizetype) (sizetype) NON_LVALUE_EXPR <size.1> * 32;
D.3963 = (sizetype) NON_LVALUE_EXPR <size.1> * 4;
D.3964 = (void * restrict) __builtin_malloc (MAX_EXPR <(unsigned long) (size.1 * 4), 1>);
automatic_array = (real(kind=4)[0:D.3961] * restrict) D.3964;
{
integer(kind=8) D.3940;
D.3940 = ubound.0;
{
integer(kind=8) S.2;
S.2 = 1;
while (1)
{
if (S.2 > D.3940) goto L.1;
(*automatic_array)[S.2 + -1] = 1.0e+0;
S.2 = S.2 + 1;
}
L.1:;
}
}
{
struct __st_parameter_dt dt_parm.3;
dt_parm.3.common.filename = &"array_dim_test.f90"[1]{lb: 1 sz: 1};
dt_parm.3.common.line = 27;
dt_parm.3.common.flags = 128;
dt_parm.3.common.unit = 6;
_gfortran_st_write (&dt_parm.3);
{
integer(kind=8) D.3944;
struct array01_real(kind=4) parm.4;
D.3944 = ubound.0;
parm.4.span = 4;
parm.4.dtype = {.elem_len=4, .rank=1, .type=3};
parm.4.dim[0].lbound = 1;
parm.4.dim[0].ubound = D.3944;
parm.4.dim[0].stride = 1;
parm.4.data = (void *) &(*automatic_array)[0];
parm.4.offset = -1;
_gfortran_transfer_array_write (&dt_parm.3, &parm.4, 4, 0);
}
_gfortran_st_write_done (&dt_parm.3);
}
}
finally
{
__builtin_free ((void *) automatic_array);
}
}
__attribute__((fn spec (". ")))
void large ()
{
static real(kind=4) large_array[65536];
{
integer(kind=8) S.5;
S.5 = 1;
while (1)
{
if (S.5 > 65536) goto L.2;
large_array[S.5 + -1] = 1.0e+0;
S.5 = S.5 + 1;
}
L.2:;
}
{
struct __st_parameter_dt dt_parm.6;
dt_parm.6.common.filename = &"array_dim_test.f90"[1]{lb: 1 sz: 1};
dt_parm.6.common.line = 19;
dt_parm.6.common.flags = 128;
dt_parm.6.common.unit = 6;
_gfortran_st_write (&dt_parm.6);
{
struct array01_real(kind=4) parm.7;
parm.7.span = 4;
parm.7.dtype = {.elem_len=4, .rank=1, .type=3};
parm.7.dim[0].lbound = 1;
parm.7.dim[0].ubound = 65536;
parm.7.dim[0].stride = 1;
parm.7.data = (void *) &large_array[0];
parm.7.offset = -1;
_gfortran_transfer_array_write (&dt_parm.6, &parm.7, 4, 0);
}
_gfortran_st_write_done (&dt_parm.6);
}
}
__attribute__((fn spec (". ")))
void small ()
{
real(kind=4) small_array[10];
{
integer(kind=8) S.8;
S.8 = 1;
while (1)
{
if (S.8 > 10) goto L.3;
small_array[S.8 + -1] = 1.0e+0;
S.8 = S.8 + 1;
}
L.3:;
}
{
struct __st_parameter_dt dt_parm.9;
dt_parm.9.common.filename = &"array_dim_test.f90"[1]{lb: 1 sz: 1};
dt_parm.9.common.line = 12;
dt_parm.9.common.flags = 128;
dt_parm.9.common.unit = 6;
_gfortran_st_write (&dt_parm.9);
{
struct array01_real(kind=4) parm.10;
parm.10.span = 4;
parm.10.dtype = {.elem_len=4, .rank=1, .type=3};
parm.10.dim[0].lbound = 1;
parm.10.dim[0].ubound = 10;
parm.10.dim[0].stride = 1;
parm.10.data = (void *) &small_array[0];
parm.10.offset = -1;
_gfortran_transfer_array_write (&dt_parm.9, &parm.10, 4, 0);
}
_gfortran_st_write_done (&dt_parm.9);
}
}
__attribute__((fn spec (". ")))
void MAIN__ ()
{
small ();
large ();
{
static integer(kind=4) C.3993 = 65536;
automatic (&C.3993);
}
}
__attribute__((externally_visible))
integer(kind=4) main (integer(kind=4) argc, character(kind=1) * * argv)
{
static integer(kind=4) options.11[7] = {2116, 4095, 0, 1, 1, 0, 31};
_gfortran_set_args (argc, argv);
_gfortran_set_options (7, &options.11[0]);
MAIN__ ();
return 0;
}