Hi
I add a piece of code in multimedia.
It makes data in array reverse.
It works, but when I add -O2 or -O3 the result is error and vmov with -O2 ,-O3 create illegal instruction.
I don't understand...
Is this a gcc bug??
asm volatile
(
"pld [%0, #0xFFF];\n\t\
vldm %0!,{d0-d3};\n\t\
vswp.32 d0,d3;\n\t\
vswp.32 d1,d2;\n\t\
VREV64.32 q0, q0;\n\t\
VREV64.32 q1, q1;\n\t\
vstm %1!,{d0-d3};"
:
:"r"(data),"r"(coeff),"r"(tmp),"r"(sum),"r"(ptr)
:"d0", "d1","d2","d3","d4","d5","d6","d7"
);
vldm %1!,{d4-d7};\n\t\
VMUL.I32 d0,d0,d4;\n\t\
VMUL.I32 d1,d1,d5;\n\t\
VMUL.I32 d2,d2,d6;\n\t\
VMUL.I32 d3,d3,d7;\n\t\
VADD.I32 d0,d0,d1;\n\t\
VADD.I32 d2,d2,d3;\n\t\
VADD.I32 d0,d0,d2;\n\t\
VMOV %2,%3,d0;\n\t\
add %3,%3,%2;\n\t\
str %3,[%4]"
It seems strange gcc cannot use neon....
Of course, it is the story of the inline assembler. I used your source code for assembling.
Here are the procedures.
$ cat temp.c main() { float *data; float *coeff; float tmp,sum; float *ptr; asm volatile ( "pld [%0, #0xFFF];\n\t\ vldm %0!,{d0-d3};\n\t\ vswp.32 d0,d3;\n\t\ vswp.32 d1,d2;\n\t\ VREV64.32 q0, q0;\n\t\ VREV64.32 q1, q1;\n\t\ vstm %1!,{d0-d3};" : :"r"(data),"r"(coeff),"r"(tmp),"r"(sum),"r"(ptr) :"d0", "d1","d2","d3","d4","d5","d6","d7" ); asm volatile ( "pld [%0, #0xFFF];\n\t\ vldm %0!,{d0-d3};\n\t\ vldm %1!,{d4-d7};\n\t\ VMUL.I32 d0,d0,d4;\n\t\ VMUL.I32 d1,d1,d5;\n\t\ VMUL.I32 d2,d2,d6;\n\t\ VMUL.I32 d3,d3,d7;\n\t\ VADD.I32 d0,d0,d1;\n\t\ VADD.I32 d2,d2,d3;\n\t\ VADD.I32 d0,d0,d2;\n\t\ VMOV %2,%3,d0;\n\t\ add %3,%3,%2;\n\t\ str %3,[%4]" : :"r"(data),"r"(coeff),"r"(tmp),"r"(sum),"r"(ptr) :"d0", "d1","d2","d3","d4","d5","d6","d7" ); } $ arm-elf-gcc -o temp.out -O3 -mfpu=neon temp.c -nostdlib /cygdrive/d/arm-tools/bin/../lib/gcc/arm-elf/4.3.1/../../../../arm-elf/bin/ld: warning: cannot find entry symbol _start; defaulting to 00008000 $ arm-elf-objdump.exe -D temp.out temp.out: file format elf32-littlearm Disassembly of section .text: 00008000 <main>: 8000: e52d4004 push {r4} ; (str r4, [sp, #-4]!) 8004: e3a0c000 mov ip, #0 ; 0x0 8008: e3a04000 mov r4, #0 ; 0x0 800c: f5d4ffff pld [r4, #4095] 8010: ecb40b08 vldmia r4!, {d0-d3} 8014: f3b20003 vswp d0, d3 8018: f3b21002 vswp d1, d2 801c: f3b80040 vrev64.32 q0, q0 8020: f3b82042 vrev64.32 q1, q1 8024: eca40b08 vstmia r4!, {d0-d3} 8028: f5d4ffff pld [r4, #4095] 802c: ecb40b08 vldmia r4!, {d0-d3} 8030: ecb44b08 vldmia r4!, {d4-d7} 8034: f2200914 vmul.i32 d0, d0, d4 8038: f2211915 vmul.i32 d1, d1, d5 803c: f2222916 vmul.i32 d2, d2, d6 8040: f2233917 vmul.i32 d3, d3, d7 8044: f2200801 vadd.i32 d0, d0, d1 8048: f2222803 vadd.i32 d2, d2, d3 804c: f2200802 vadd.i32 d0, d0, d2 8050: ec5ccb10 vmov ip, ip, d0 8054: e08cc00c add ip, ip, ip 8058: e584c000 str ip, [r4] 805c: e8bd0010 pop {r4} 8060: e12fff1e bx lr Disassembly of section .comment: 00000000 <.comment>: 0: 43434700 movtmi r4, #14080 ; 0x3700 4: 4728203a undefined 8: 2029554e eorcs r5, r9, lr, asr #10 c: 2e332e34 mrccs 14, 1, r2, cr3, cr4, {1} 10: Address 0x00000010 is out of bounds. Disassembly of section .ARM.attributes: 00000000 <_stack-0x80000>: 0: 00000f41 andeq r0, r0, r1, asr #30 4: 61656100 cmnvs r5, r0, lsl #2 8: 01006962 tsteq r0, r2, ror #18 c: 00000005 andeq r0, r0, r5 $ arm-elf-gcc -v Using built-in specs. Target: arm-elf Configured with: ../gcc-4.3.1/configure --target=arm-elf --with-gmp=/usr/local/gmp-4.2.2 --with-mpfr=/usr/local/mpfr-2.3.1 --prefix=/usr/local/arm-tools --enable-languages=c --disable-libssp Thread model: single gcc version 4.3.1 (GCC)
Are there any strange parts?
Best regards,
Yasuhiko Koumoto.