NEON Cortex-A8 的组装示例
In-Assembly example for NEON Cortex-A8
我正在尝试为皮质 A8 的 NEON 编译和示例汇编代码,以便在 BeagleBone 黑板 (BBB) 上使用此二进制文件。我使用下面列出的 eclipse 工具 GCC 编译器和汇编器,
GCC : arm-linux-gnueabi-gcc
ASSEMBLER : arm-linux-gnueabi-as
示例中出现以下错误,对于我使用的每个示例,我都发现了类似的错误
Description Path Resource Location Type
SP not allowed in register list -- `ldmia r12,{r4-r11,r13,lr}' EXAMPLE_NEON line 61, external location: /tmp/ccTXrczs.s C/C++ Problem
我使用的代码
/************************
* neon.c
*
***************************/
#include <stdio.h>
__attribute__((aligned (16)))
unsigned short int data1[8];
unsigned short int data2[8];
unsigned short int out[8];
void* neontest_save_buffer[16];
void
neontest(unsigned short int *a, unsigned short int *b,
unsigned short int* q)
{
__asm__(
" movw r12, #:lower16:neontest_save_buffer\n\t"
" movt r12, #:upper16:neontest_save_buffer\n\t"
" stmia r12, {r4-r11, r13, lr} @ save registers\n\t"
" vld1.16 {q1}, [r0:128]\n\t"
" vld1.16 {q2}, [r1:128]\n\t"
" vadd.i16 q0, q1, q2\n\t"
" vst1.32 {q0}, [r2:128]\n\t"
" movw r12, #:lower16:neontest_save_buffer\n\t"
" movt r12, #:upper16:neontest_save_buffer\n\t"
" ldmia r12, {r4-r11, r13, lr} @ reload all registers and return\n\t"
"finish:\n\t"
);
}
int
main(void)
{
int i;
for (i=0; i<8; i++)
{
data1[i]=i*10;
data2[i]=5;
out[i]=0;
}
neontest(data1, data2, out);
printf("output is: ");
for (i=0; i<7; i++)
{
printf("%d, ", out[i]);
}
printf("%d\n", out[i]);
return(0);
}
您似乎在使用 Thumb32 模式,其中 sp cannot be in the list of registers
(来自 [1])。
如果您不设置新堆栈,为什么还需要保存堆栈?
只需尝试从 stm
块和 ldm
块中删除 r13。
默认编译器使用thumb模式,在命令行中添加“-marm”以在ARM模式下编译代码:
arm-linux-gnueabihf-gcc -mcpu=cortex-a8 -mfpu=neon -marm neon.c
你也可以调整代码让编译器做寄存器save/restore,这样代码就可以为ARM和thumb2指令集编译:
#include <stdio.h>
__attribute__((aligned (16)))
unsigned short int data1[8];
unsigned short int data2[8];
unsigned short int out[8];
void
neontest(unsigned short int *a, unsigned short int *b,
unsigned short int* q)
{
__asm volatile (
" vld1.16 {q1}, [%[a]:128]\n\t"
" vld1.16 {q2}, [%[b]:128]\n\t"
" vadd.i16 q0, q1, q2\n\t"
" vst1.32 {q0}, [%[q]:128]\n\t"
: [q] "+r" (q)
: [a] "r" (a), [b] "r" (b)
: "q0", "q1", "q2"
);
}
int
main(void)
{
int i;
for (i=0; i<8; i++)
{
data1[i]=i*10;
data2[i]=5;
out[i]=0;
}
neontest(data1, data2, out);
printf("output is: ");
for (i=0; i<7; i++)
{
printf("%d, ", out[i]);
}
printf("%d\n", out[i]);
return(0);
}
arm-linux-gnueabihf-gcc -mcpu=cortex-a8 -mfpu=neon -marm neon2.c
arm-linux-gnueabihf-gcc -mcpu=cortex-a8 -mfpu=neon -mthumb neon2.c
我正在尝试为皮质 A8 的 NEON 编译和示例汇编代码,以便在 BeagleBone 黑板 (BBB) 上使用此二进制文件。我使用下面列出的 eclipse 工具 GCC 编译器和汇编器,
GCC : arm-linux-gnueabi-gcc
ASSEMBLER : arm-linux-gnueabi-as
示例中出现以下错误,对于我使用的每个示例,我都发现了类似的错误
Description Path Resource Location Type
SP not allowed in register list -- `ldmia r12,{r4-r11,r13,lr}' EXAMPLE_NEON line 61, external location: /tmp/ccTXrczs.s C/C++ Problem
我使用的代码
/************************
* neon.c
*
***************************/
#include <stdio.h>
__attribute__((aligned (16)))
unsigned short int data1[8];
unsigned short int data2[8];
unsigned short int out[8];
void* neontest_save_buffer[16];
void
neontest(unsigned short int *a, unsigned short int *b,
unsigned short int* q)
{
__asm__(
" movw r12, #:lower16:neontest_save_buffer\n\t"
" movt r12, #:upper16:neontest_save_buffer\n\t"
" stmia r12, {r4-r11, r13, lr} @ save registers\n\t"
" vld1.16 {q1}, [r0:128]\n\t"
" vld1.16 {q2}, [r1:128]\n\t"
" vadd.i16 q0, q1, q2\n\t"
" vst1.32 {q0}, [r2:128]\n\t"
" movw r12, #:lower16:neontest_save_buffer\n\t"
" movt r12, #:upper16:neontest_save_buffer\n\t"
" ldmia r12, {r4-r11, r13, lr} @ reload all registers and return\n\t"
"finish:\n\t"
);
}
int
main(void)
{
int i;
for (i=0; i<8; i++)
{
data1[i]=i*10;
data2[i]=5;
out[i]=0;
}
neontest(data1, data2, out);
printf("output is: ");
for (i=0; i<7; i++)
{
printf("%d, ", out[i]);
}
printf("%d\n", out[i]);
return(0);
}
您似乎在使用 Thumb32 模式,其中 sp cannot be in the list of registers
(来自 [1])。
如果您不设置新堆栈,为什么还需要保存堆栈?
只需尝试从 stm
块和 ldm
块中删除 r13。
默认编译器使用thumb模式,在命令行中添加“-marm”以在ARM模式下编译代码:
arm-linux-gnueabihf-gcc -mcpu=cortex-a8 -mfpu=neon -marm neon.c
你也可以调整代码让编译器做寄存器save/restore,这样代码就可以为ARM和thumb2指令集编译:
#include <stdio.h>
__attribute__((aligned (16)))
unsigned short int data1[8];
unsigned short int data2[8];
unsigned short int out[8];
void
neontest(unsigned short int *a, unsigned short int *b,
unsigned short int* q)
{
__asm volatile (
" vld1.16 {q1}, [%[a]:128]\n\t"
" vld1.16 {q2}, [%[b]:128]\n\t"
" vadd.i16 q0, q1, q2\n\t"
" vst1.32 {q0}, [%[q]:128]\n\t"
: [q] "+r" (q)
: [a] "r" (a), [b] "r" (b)
: "q0", "q1", "q2"
);
}
int
main(void)
{
int i;
for (i=0; i<8; i++)
{
data1[i]=i*10;
data2[i]=5;
out[i]=0;
}
neontest(data1, data2, out);
printf("output is: ");
for (i=0; i<7; i++)
{
printf("%d, ", out[i]);
}
printf("%d\n", out[i]);
return(0);
}
arm-linux-gnueabihf-gcc -mcpu=cortex-a8 -mfpu=neon -marm neon2.c
arm-linux-gnueabihf-gcc -mcpu=cortex-a8 -mfpu=neon -mthumb neon2.c