Hi
I add a piece of code in multimedia.
It makes data in array reverse.
It works, but when I add -O2 or -O3 the result is error and vmov with -O2 ,-O3 create illegal instruction.
I don't understand...
Is this a gcc bug??
asm volatile
(
"pld [%0, #0xFFF];\n\t\
vldm %0!,{d0-d3};\n\t\
vswp.32 d0,d3;\n\t\
vswp.32 d1,d2;\n\t\
VREV64.32 q0, q0;\n\t\
VREV64.32 q1, q1;\n\t\
vstm %1!,{d0-d3};"
:
:"r"(data),"r"(coeff),"r"(tmp),"r"(sum),"r"(ptr)
:"d0", "d1","d2","d3","d4","d5","d6","d7"
);
vldm %1!,{d4-d7};\n\t\
VMUL.I32 d0,d0,d4;\n\t\
VMUL.I32 d1,d1,d5;\n\t\
VMUL.I32 d2,d2,d6;\n\t\
VMUL.I32 d3,d3,d7;\n\t\
VADD.I32 d0,d0,d1;\n\t\
VADD.I32 d2,d2,d3;\n\t\
VADD.I32 d0,d0,d2;\n\t\
VMOV %2,%3,d0;\n\t\
add %3,%3,%2;\n\t\
str %3,[%4]"
Hi Ching Hsiung Yu,
it seems to depend on the compiler version. GCC 4.3.1 could compile the code successfully. However, GCC 4.9.3 could not recognize the neon instructions. The correspondent assembler with GCC 4.9.3 could recognize those neon instructions. So you should better write the code not by the inline assembler but by the pure assembly code as jebsbauer says.
Best regards,
Yasuhiko Koumoto.
It seems strange gcc cannot use neon....
Of course, it is the story of the inline assembler. I used your source code for assembling.
Here are the procedures.
$ cat temp.c main() { float *data; float *coeff; float tmp,sum; float *ptr; asm volatile ( "pld [%0, #0xFFF];\n\t\ vldm %0!,{d0-d3};\n\t\ vswp.32 d0,d3;\n\t\ vswp.32 d1,d2;\n\t\ VREV64.32 q0, q0;\n\t\ VREV64.32 q1, q1;\n\t\ vstm %1!,{d0-d3};" : :"r"(data),"r"(coeff),"r"(tmp),"r"(sum),"r"(ptr) :"d0", "d1","d2","d3","d4","d5","d6","d7" ); asm volatile ( "pld [%0, #0xFFF];\n\t\ vldm %0!,{d0-d3};\n\t\ vldm %1!,{d4-d7};\n\t\ VMUL.I32 d0,d0,d4;\n\t\ VMUL.I32 d1,d1,d5;\n\t\ VMUL.I32 d2,d2,d6;\n\t\ VMUL.I32 d3,d3,d7;\n\t\ VADD.I32 d0,d0,d1;\n\t\ VADD.I32 d2,d2,d3;\n\t\ VADD.I32 d0,d0,d2;\n\t\ VMOV %2,%3,d0;\n\t\ add %3,%3,%2;\n\t\ str %3,[%4]" : :"r"(data),"r"(coeff),"r"(tmp),"r"(sum),"r"(ptr) :"d0", "d1","d2","d3","d4","d5","d6","d7" ); } $ arm-elf-gcc -o temp.out -O3 -mfpu=neon temp.c -nostdlib /cygdrive/d/arm-tools/bin/../lib/gcc/arm-elf/4.3.1/../../../../arm-elf/bin/ld: warning: cannot find entry symbol _start; defaulting to 00008000 $ arm-elf-objdump.exe -D temp.out temp.out: file format elf32-littlearm Disassembly of section .text: 00008000 <main>: 8000: e52d4004 push {r4} ; (str r4, [sp, #-4]!) 8004: e3a0c000 mov ip, #0 ; 0x0 8008: e3a04000 mov r4, #0 ; 0x0 800c: f5d4ffff pld [r4, #4095] 8010: ecb40b08 vldmia r4!, {d0-d3} 8014: f3b20003 vswp d0, d3 8018: f3b21002 vswp d1, d2 801c: f3b80040 vrev64.32 q0, q0 8020: f3b82042 vrev64.32 q1, q1 8024: eca40b08 vstmia r4!, {d0-d3} 8028: f5d4ffff pld [r4, #4095] 802c: ecb40b08 vldmia r4!, {d0-d3} 8030: ecb44b08 vldmia r4!, {d4-d7} 8034: f2200914 vmul.i32 d0, d0, d4 8038: f2211915 vmul.i32 d1, d1, d5 803c: f2222916 vmul.i32 d2, d2, d6 8040: f2233917 vmul.i32 d3, d3, d7 8044: f2200801 vadd.i32 d0, d0, d1 8048: f2222803 vadd.i32 d2, d2, d3 804c: f2200802 vadd.i32 d0, d0, d2 8050: ec5ccb10 vmov ip, ip, d0 8054: e08cc00c add ip, ip, ip 8058: e584c000 str ip, [r4] 805c: e8bd0010 pop {r4} 8060: e12fff1e bx lr Disassembly of section .comment: 00000000 <.comment>: 0: 43434700 movtmi r4, #14080 ; 0x3700 4: 4728203a undefined 8: 2029554e eorcs r5, r9, lr, asr #10 c: 2e332e34 mrccs 14, 1, r2, cr3, cr4, {1} 10: Address 0x00000010 is out of bounds. Disassembly of section .ARM.attributes: 00000000 <_stack-0x80000>: 0: 00000f41 andeq r0, r0, r1, asr #30 4: 61656100 cmnvs r5, r0, lsl #2 8: 01006962 tsteq r0, r2, ror #18 c: 00000005 andeq r0, r0, r5 $ arm-elf-gcc -v Using built-in specs. Target: arm-elf Configured with: ../gcc-4.3.1/configure --target=arm-elf --with-gmp=/usr/local/gmp-4.2.2 --with-mpfr=/usr/local/mpfr-2.3.1 --prefix=/usr/local/arm-tools --enable-languages=c --disable-libssp Thread model: single gcc version 4.3.1 (GCC)
Are there any strange parts?