This discussion has been locked.
You can no longer post new replies to this discussion. If you have a question you can start a new discussion

gcc with neon

Hi

I add a piece of code in multimedia.

It makes data in array reverse.

It works, but when I add -O2 or -O3 the result is error and vmov with -O2 ,-O3 create illegal instruction.

I don't understand...

Is this a gcc bug??

asm volatile

  (

  "pld [%0, #0xFFF];\n\t\

  vldm %0!,{d0-d3};\n\t\

  vswp.32 d0,d3;\n\t\

  vswp.32 d1,d2;\n\t\

  VREV64.32 q0, q0;\n\t\

  VREV64.32 q1, q1;\n\t\

  vstm %1!,{d0-d3};"

  :

  :"r"(data),"r"(coeff),"r"(tmp),"r"(sum),"r"(ptr)

  :"d0", "d1","d2","d3","d4","d5","d6","d7"

  );

asm volatile

  (

  "pld [%0, #0xFFF];\n\t\

  vldm %0!,{d0-d3};\n\t\

  vldm %1!,{d4-d7};\n\t\

  VMUL.I32 d0,d0,d4;\n\t\

  VMUL.I32 d1,d1,d5;\n\t\

  VMUL.I32 d2,d2,d6;\n\t\

  VMUL.I32 d3,d3,d7;\n\t\

  VADD.I32 d0,d0,d1;\n\t\

  VADD.I32 d2,d2,d3;\n\t\

  VADD.I32 d0,d0,d2;\n\t\

  VMOV %2,%3,d0;\n\t\

  add %3,%3,%2;\n\t\

  str %3,[%4]"

  :

  :"r"(data),"r"(coeff),"r"(tmp),"r"(sum),"r"(ptr)

  :"d0", "d1","d2","d3","d4","d5","d6","d7"

  );

Parents
  • Of course, it is the story of the inline assembler. I used your source code for assembling.

    Here are the procedures.

    $ cat temp.c
    main()
    {
    float *data;
    float *coeff;
    float tmp,sum;
    float *ptr;
    asm volatile
      (
      "pld [%0, #0xFFF];\n\t\
      vldm %0!,{d0-d3};\n\t\
      vswp.32 d0,d3;\n\t\
      vswp.32 d1,d2;\n\t\
      VREV64.32 q0, q0;\n\t\
      VREV64.32 q1, q1;\n\t\
      vstm %1!,{d0-d3};"
      :
      :"r"(data),"r"(coeff),"r"(tmp),"r"(sum),"r"(ptr)
      :"d0", "d1","d2","d3","d4","d5","d6","d7"
      );
    asm volatile
      (
      "pld [%0, #0xFFF];\n\t\
      vldm %0!,{d0-d3};\n\t\
      vldm %1!,{d4-d7};\n\t\
      VMUL.I32 d0,d0,d4;\n\t\
      VMUL.I32 d1,d1,d5;\n\t\
      VMUL.I32 d2,d2,d6;\n\t\
      VMUL.I32 d3,d3,d7;\n\t\
      VADD.I32 d0,d0,d1;\n\t\
      VADD.I32 d2,d2,d3;\n\t\
      VADD.I32 d0,d0,d2;\n\t\
      VMOV %2,%3,d0;\n\t\
      add %3,%3,%2;\n\t\
      str %3,[%4]"
      :
      :"r"(data),"r"(coeff),"r"(tmp),"r"(sum),"r"(ptr)
      :"d0", "d1","d2","d3","d4","d5","d6","d7"
      );
    
    }
    $ arm-elf-gcc -o temp.out -O3 -mfpu=neon temp.c -nostdlib
    /cygdrive/d/arm-tools/bin/../lib/gcc/arm-elf/4.3.1/../../../../arm-elf/bin/ld: warning: cannot find entry symbol _start; defaulting to 00008000
    $ arm-elf-objdump.exe -D temp.out
    temp.out:    file format elf32-littlearm
    Disassembly of section .text:
    00008000 <main>:
        8000:      e52d4004        push    {r4}            ; (str r4, [sp, #-4]!)
        8004:      e3a0c000        mov    ip, #0  ; 0x0
        8008:      e3a04000        mov    r4, #0  ; 0x0
        800c:      f5d4ffff        pld    [r4, #4095]
        8010:      ecb40b08        vldmia  r4!, {d0-d3}
        8014:      f3b20003        vswp    d0, d3
        8018:      f3b21002        vswp    d1, d2
        801c:      f3b80040        vrev64.32      q0, q0
        8020:      f3b82042        vrev64.32      q1, q1
        8024:      eca40b08        vstmia  r4!, {d0-d3}
        8028:      f5d4ffff        pld    [r4, #4095]
        802c:      ecb40b08        vldmia  r4!, {d0-d3}
        8030:      ecb44b08        vldmia  r4!, {d4-d7}
        8034:      f2200914        vmul.i32        d0, d0, d4
        8038:      f2211915        vmul.i32        d1, d1, d5
        803c:      f2222916        vmul.i32        d2, d2, d6
        8040:      f2233917        vmul.i32        d3, d3, d7
        8044:      f2200801        vadd.i32        d0, d0, d1
        8048:      f2222803        vadd.i32        d2, d2, d3
        804c:      f2200802        vadd.i32        d0, d0, d2
        8050:      ec5ccb10        vmov    ip, ip, d0
        8054:      e08cc00c        add    ip, ip, ip
        8058:      e584c000        str    ip, [r4]
        805c:      e8bd0010        pop    {r4}
        8060:      e12fff1e        bx      lr
    Disassembly of section .comment:
    00000000 <.comment>:
      0:  43434700        movtmi  r4, #14080      ; 0x3700
      4:  4728203a        undefined
      8:  2029554e        eorcs  r5, r9, lr, asr #10
      c:  2e332e34        mrccs  14, 1, r2, cr3, cr4, {1}
      10:  Address 0x00000010 is out of bounds.
    Disassembly of section .ARM.attributes:
    00000000 <_stack-0x80000>:
      0:  00000f41        andeq  r0, r0, r1, asr #30
      4:  61656100        cmnvs  r5, r0, lsl #2
      8:  01006962        tsteq  r0, r2, ror #18
      c:  00000005        andeq  r0, r0, r5
    $ arm-elf-gcc -v
    Using built-in specs.
    Target: arm-elf
    Configured with: ../gcc-4.3.1/configure --target=arm-elf --with-gmp=/usr/local/gmp-4.2.2 --with-mpfr=/usr/local/mpfr-2.3.1 --prefix=/usr/local/arm-tools --enable-languages=c --disable-libssp
    Thread model: single
    gcc version 4.3.1 (GCC)
    

    Are there any strange parts?

    Best regards,

    Yasuhiko Koumoto.

Reply
  • Of course, it is the story of the inline assembler. I used your source code for assembling.

    Here are the procedures.

    $ cat temp.c
    main()
    {
    float *data;
    float *coeff;
    float tmp,sum;
    float *ptr;
    asm volatile
      (
      "pld [%0, #0xFFF];\n\t\
      vldm %0!,{d0-d3};\n\t\
      vswp.32 d0,d3;\n\t\
      vswp.32 d1,d2;\n\t\
      VREV64.32 q0, q0;\n\t\
      VREV64.32 q1, q1;\n\t\
      vstm %1!,{d0-d3};"
      :
      :"r"(data),"r"(coeff),"r"(tmp),"r"(sum),"r"(ptr)
      :"d0", "d1","d2","d3","d4","d5","d6","d7"
      );
    asm volatile
      (
      "pld [%0, #0xFFF];\n\t\
      vldm %0!,{d0-d3};\n\t\
      vldm %1!,{d4-d7};\n\t\
      VMUL.I32 d0,d0,d4;\n\t\
      VMUL.I32 d1,d1,d5;\n\t\
      VMUL.I32 d2,d2,d6;\n\t\
      VMUL.I32 d3,d3,d7;\n\t\
      VADD.I32 d0,d0,d1;\n\t\
      VADD.I32 d2,d2,d3;\n\t\
      VADD.I32 d0,d0,d2;\n\t\
      VMOV %2,%3,d0;\n\t\
      add %3,%3,%2;\n\t\
      str %3,[%4]"
      :
      :"r"(data),"r"(coeff),"r"(tmp),"r"(sum),"r"(ptr)
      :"d0", "d1","d2","d3","d4","d5","d6","d7"
      );
    
    }
    $ arm-elf-gcc -o temp.out -O3 -mfpu=neon temp.c -nostdlib
    /cygdrive/d/arm-tools/bin/../lib/gcc/arm-elf/4.3.1/../../../../arm-elf/bin/ld: warning: cannot find entry symbol _start; defaulting to 00008000
    $ arm-elf-objdump.exe -D temp.out
    temp.out:    file format elf32-littlearm
    Disassembly of section .text:
    00008000 <main>:
        8000:      e52d4004        push    {r4}            ; (str r4, [sp, #-4]!)
        8004:      e3a0c000        mov    ip, #0  ; 0x0
        8008:      e3a04000        mov    r4, #0  ; 0x0
        800c:      f5d4ffff        pld    [r4, #4095]
        8010:      ecb40b08        vldmia  r4!, {d0-d3}
        8014:      f3b20003        vswp    d0, d3
        8018:      f3b21002        vswp    d1, d2
        801c:      f3b80040        vrev64.32      q0, q0
        8020:      f3b82042        vrev64.32      q1, q1
        8024:      eca40b08        vstmia  r4!, {d0-d3}
        8028:      f5d4ffff        pld    [r4, #4095]
        802c:      ecb40b08        vldmia  r4!, {d0-d3}
        8030:      ecb44b08        vldmia  r4!, {d4-d7}
        8034:      f2200914        vmul.i32        d0, d0, d4
        8038:      f2211915        vmul.i32        d1, d1, d5
        803c:      f2222916        vmul.i32        d2, d2, d6
        8040:      f2233917        vmul.i32        d3, d3, d7
        8044:      f2200801        vadd.i32        d0, d0, d1
        8048:      f2222803        vadd.i32        d2, d2, d3
        804c:      f2200802        vadd.i32        d0, d0, d2
        8050:      ec5ccb10        vmov    ip, ip, d0
        8054:      e08cc00c        add    ip, ip, ip
        8058:      e584c000        str    ip, [r4]
        805c:      e8bd0010        pop    {r4}
        8060:      e12fff1e        bx      lr
    Disassembly of section .comment:
    00000000 <.comment>:
      0:  43434700        movtmi  r4, #14080      ; 0x3700
      4:  4728203a        undefined
      8:  2029554e        eorcs  r5, r9, lr, asr #10
      c:  2e332e34        mrccs  14, 1, r2, cr3, cr4, {1}
      10:  Address 0x00000010 is out of bounds.
    Disassembly of section .ARM.attributes:
    00000000 <_stack-0x80000>:
      0:  00000f41        andeq  r0, r0, r1, asr #30
      4:  61656100        cmnvs  r5, r0, lsl #2
      8:  01006962        tsteq  r0, r2, ror #18
      c:  00000005        andeq  r0, r0, r5
    $ arm-elf-gcc -v
    Using built-in specs.
    Target: arm-elf
    Configured with: ../gcc-4.3.1/configure --target=arm-elf --with-gmp=/usr/local/gmp-4.2.2 --with-mpfr=/usr/local/mpfr-2.3.1 --prefix=/usr/local/arm-tools --enable-languages=c --disable-libssp
    Thread model: single
    gcc version 4.3.1 (GCC)
    

    Are there any strange parts?

    Best regards,

    Yasuhiko Koumoto.

Children
No data