We are running a survey to help us improve the experience for all of our members. If you see the survey appear, please take the time to tell us about your experience if you can.
sample = *samp2_p++; \ re = __SMLALD(ortho_p[COS2_O], sample, re); \ im = __SMLALD(ortho_p[SIN2_O], sample, im); \ ortho_p++;
a94: 4682 mov sl, r0 a96: f10c 0304 add.w r3, ip, #4 a9a: f8d4 0100 ldr.w r0, [r4, #256] ; 0x100 a9e: f8dc c000 ldr.w ip, [ip] aa2: fbc0 a8cc smlald sl, r8, r0, ip aa6: 2000 movs r0, #0 aa8: 9022 str r0, [sp, #136] ; 0x88 aaa: f8cd 808c str.w r8, [sp, #140] ; 0x8c aae: e9dd 8922 ldrd r8, r9, [sp, #136] ; 0x88 ab2: f854 0b04 ldr.w r0, [r4], #4 ab6: ea48 080a orr.w r8, r8, sl aba: fbc0 21cc smlald r2, r1, r0, ip abe: 9125 str r1, [sp, #148] ; 0x94 ac0: 2100 movs r1, #0 ac2: 9124 str r1, [sp, #144] ; 0x90 ac4: e9dd 0124 ldrd r0, r1, [sp, #144] ; 0x90 ac8: ea40 0002 orr.w r0, r0, r2 acc: 468a mov sl, r1 ace: 4684 mov ip, r0 ad0: 464a mov r2, r9
typedef union llreg_u{ uint32_t w32[2]; uint64_t w64;} llreg_t;__attribute__( ( always_inline ) ) __STATIC_INLINE uint64_t __SMLALD (uint32_t op1, uint32_t op2, uint64_t result){ llreg_t llr; llr.w64 = result; __ASM volatile ("smlald %0, %1, %2, %3" : "=r" (llr.w32[0]), "=r" (llr.w32[1]): "r" (op1), "r" (op2) , "0" (llr.w32[0]), "1" (llr.w32[1]) ); return(llr.w64);}
a96: f109 0504 add.w r5, r9, #4 a9a: f8d9 c000 ldr.w ip, [r9] a9e: f8d3 2100 ldr.w r2, [r3, #256] ; 0x100 aa2: fbc2 40cc smlald r4, r0, r2, ip aa6: 9a01 ldr r2, [sp, #4] aa8: f853 9b04 ldr.w r9, [r3], #4 aac: fbc9 12cc smlald r1, r2, r9, ip
We've seen issues with GCC not properly handling the M4 intrinsics. In many cases, the compiler not only doesn't choose the properly instructions, but then runs out of registers and additional overhead is incurred as you save and restore values from the stack. We've found that the Keil compiler does a much better job with the intrinsics.