Can RealView compiler generate SMLAL/UMLAL instructions?

Is there any way the RealView compiler can take advantage of the multiply-and-accumulate instructions of the Cortex M3?

I wrote a simple MAC loop and the compiler didn't generate any SMLAL or UMLAL instructions, which was disappointing.

Thanks,
Andrew Queisser
HP

Parents
  • Yes, the compiler does generate SMLAL/UMLAL instructions.

    Simple test code (UMLAL):

    unsigned long long mac_test (unsigned long *a, unsigned long *b, int cnt) {
      unsigned long long res = 0;
    
      while (cnt--) {
        res += (unsigned long long)*a++ * (unsigned long long)*b++;
      }
      return (res);
    }
    

    Compiler output:

                      mac_test PROC
    ;;;1      unsigned long long mac_test (unsigned long *a, unsigned long *b, int cnt) {
    000000  b570              PUSH     {r4-r6,lr}
    000002  4603              MOV      r3,r0
    000004  460c              MOV      r4,r1
    000006  2000              MOVS     r0,#0
    000008  4601              MOV      r1,r0
    ;;;2        unsigned long long res = 0;
    ;;;3
    ;;;4        while (cnt--) {
    00000a  e005              B        |L1.24|
                      |L1.12|
    ;;;5          res += (unsigned long long)*a++ * (unsigned long long)*b++;
    00000c  cb20              LDM      r3!,{r5}
    00000e  cc40              LDM      r4!,{r6}
    000010  fba56506          UMULL    r6,r5,r5,r6
    000014  1830              ADDS     r0,r6,r0
    000016  4169              ADCS     r1,r1,r5
                      |L1.24|
    000018  1e52              SUBS     r2,r2,#1              ;4
    00001a  d2f7              BCS      |L1.12|
    ;;;6        }
    ;;;7        return (res);
    ;;;8      }
    00001c  bd70              POP      {r4-r6,pc}
                              ENDP
    

Reply
  • Yes, the compiler does generate SMLAL/UMLAL instructions.

    Simple test code (UMLAL):

    unsigned long long mac_test (unsigned long *a, unsigned long *b, int cnt) {
      unsigned long long res = 0;
    
      while (cnt--) {
        res += (unsigned long long)*a++ * (unsigned long long)*b++;
      }
      return (res);
    }
    

    Compiler output:

                      mac_test PROC
    ;;;1      unsigned long long mac_test (unsigned long *a, unsigned long *b, int cnt) {
    000000  b570              PUSH     {r4-r6,lr}
    000002  4603              MOV      r3,r0
    000004  460c              MOV      r4,r1
    000006  2000              MOVS     r0,#0
    000008  4601              MOV      r1,r0
    ;;;2        unsigned long long res = 0;
    ;;;3
    ;;;4        while (cnt--) {
    00000a  e005              B        |L1.24|
                      |L1.12|
    ;;;5          res += (unsigned long long)*a++ * (unsigned long long)*b++;
    00000c  cb20              LDM      r3!,{r5}
    00000e  cc40              LDM      r4!,{r6}
    000010  fba56506          UMULL    r6,r5,r5,r6
    000014  1830              ADDS     r0,r6,r0
    000016  4169              ADCS     r1,r1,r5
                      |L1.24|
    000018  1e52              SUBS     r2,r2,#1              ;4
    00001a  d2f7              BCS      |L1.12|
    ;;;6        }
    ;;;7        return (res);
    ;;;8      }
    00001c  bd70              POP      {r4-r6,pc}
                              ENDP
    

Children
More questions in this forum