Hi, I am trying to find out where exactly the soft floating point code used in eabi for a M0+ comes from. When i compile a program using floating point using gcc_eabi, it generates code to carry out this operation. ( code included).
I downloaded the source for gcc_eabi and I've been trying to find out where this code comes from! i'm getting very conflicting info online.
Can someone who uses GCC on these devices please let me know where exactly this code (attached) is coming from when GCC sees a floating point multiply?
Thank you!
000001b4 <__aeabi_fmul>: 1b4: b5f0 push {r4, r5, r6, r7, lr} 1b6: 4657 mov r7, sl 1b8: 464e mov r6, r9 1ba: 4645 mov r5, r8 1bc: 0043 lsls r3, r0, #1 1be: b4e0 push {r5, r6, r7} 1c0: 0246 lsls r6, r0, #9 1c2: 4688 mov r8, r1 1c4: 0a76 lsrs r6, r6, #9 1c6: 0e1f lsrs r7, r3, #24 1c8: 0fc4 lsrs r4, r0, #31 1ca: 2f00 cmp r7, #0 1cc: d047 beq.n 25e <__aeabi_fmul+0xaa> 1ce: 2fff cmp r7, #255 ; 0xff 1d0: d025 beq.n 21e <__aeabi_fmul+0x6a> 1d2: 2300 movs r3, #0 1d4: 2580 movs r5, #128 ; 0x80 1d6: 469a mov sl, r3 1d8: 4699 mov r9, r3 1da: 00f6 lsls r6, r6, #3 1dc: 04ed lsls r5, r5, #19 1de: 432e orrs r6, r5 1e0: 3f7f subs r7, #127 ; 0x7f 1e2: 4643 mov r3, r8 1e4: 4642 mov r2, r8 1e6: 025d lsls r5, r3, #9 1e8: 0fd2 lsrs r2, r2, #31 1ea: 005b lsls r3, r3, #1 1ec: 0a6d lsrs r5, r5, #9 1ee: 0e1b lsrs r3, r3, #24 1f0: 4690 mov r8, r2 1f2: d040 beq.n 276 <__aeabi_fmul+0xc2> 1f4: 2bff cmp r3, #255 ; 0xff 1f6: d039 beq.n 26c <__aeabi_fmul+0xb8> 1f8: 2280 movs r2, #128 ; 0x80 1fa: 2000 movs r0, #0 1fc: 00ed lsls r5, r5, #3 1fe: 04d2 lsls r2, r2, #19 200: 4315 orrs r5, r2 202: 3b7f subs r3, #127 ; 0x7f 204: 18fb adds r3, r7, r3 206: 4642 mov r2, r8 208: 4657 mov r7, sl 20a: 1c59 adds r1, r3, #1 20c: 4062 eors r2, r4 20e: 468c mov ip, r1 210: 4307 orrs r7, r0 212: 2f0f cmp r7, #15 214: d85c bhi.n 2d0 <__aeabi_fmul+0x11c> 216: 496f ldr r1, [pc, #444] ; (3d4 <__aeabi_fmul+0x220>) 218: 00bf lsls r7, r7, #2 21a: 59c9 ldr r1, [r1, r7] 21c: 468f mov pc, r1 21e: 2e00 cmp r6, #0 220: d145 bne.n 2ae <__aeabi_fmul+0xfa> 222: 2308 movs r3, #8 224: 469a mov sl, r3 226: 3b06 subs r3, #6 228: 4699 mov r9, r3 22a: e7da b.n 1e2 <__aeabi_fmul+0x2e> 22c: 4642 mov r2, r8 22e: 2802 cmp r0, #2 230: d02d beq.n 28e <__aeabi_fmul+0xda> 232: 2803 cmp r0, #3 234: d100 bne.n 238 <__aeabi_fmul+0x84> 236: e0c3 b.n 3c0 <__aeabi_fmul+0x20c> 238: 2801 cmp r0, #1 23a: d000 beq.n 23e <__aeabi_fmul+0x8a> 23c: e0a2 b.n 384 <__aeabi_fmul+0x1d0> 23e: 2500 movs r5, #0 240: 2600 movs r6, #0 242: 4002 ands r2, r0 244: b2d4 uxtb r4, r2 246: 0276 lsls r6, r6, #9 248: 05ed lsls r5, r5, #23 24a: 0a76 lsrs r6, r6, #9 24c: 432e orrs r6, r5 24e: 07e4 lsls r4, r4, #31 250: 4326 orrs r6, r4 252: 0030 movs r0, r6 254: bc1c pop {r2, r3, r4} 256: 4690 mov r8, r2 258: 4699 mov r9, r3 25a: 46a2 mov sl, r4 25c: bdf0 pop {r4, r5, r6, r7, pc} 25e: 2e00 cmp r6, #0 260: d11a bne.n 298 <__aeabi_fmul+0xe4> 262: 2304 movs r3, #4 264: 469a mov sl, r3 266: 3b03 subs r3, #3 268: 4699 mov r9, r3 26a: e7ba b.n 1e2 <__aeabi_fmul+0x2e> 26c: 002a movs r2, r5 26e: 1e51 subs r1, r2, #1 270: 418a sbcs r2, r1 272: 1c90 adds r0, r2, #2 274: e7c6 b.n 204 <__aeabi_fmul+0x50> 276: 2001 movs r0, #1 278: 2d00 cmp r5, #0 27a: d0c3 beq.n 204 <__aeabi_fmul+0x50> 27c: 0028 movs r0, r5 27e: f000 f8ad bl 3dc <__clzsi2> 282: 1f43 subs r3, r0, #5 284: 3076 adds r0, #118 ; 0x76 286: 409d lsls r5, r3 288: 4243 negs r3, r0 28a: 2000 movs r0, #0 28c: e7ba b.n 204 <__aeabi_fmul+0x50> 28e: 2401 movs r4, #1 290: 25ff movs r5, #255 ; 0xff 292: 4014 ands r4, r2 294: 2600 movs r6, #0 296: e7d6 b.n 246 <__aeabi_fmul+0x92> 298: 0030 movs r0, r6 29a: f000 f89f bl 3dc <__clzsi2> 29e: 1f43 subs r3, r0, #5 2a0: 409e lsls r6, r3 2a2: 2300 movs r3, #0 2a4: 3076 adds r0, #118 ; 0x76 2a6: 4247 negs r7, r0 2a8: 469a mov sl, r3 2aa: 4699 mov r9, r3 2ac: e799 b.n 1e2 <__aeabi_fmul+0x2e> 2ae: 230c movs r3, #12 2b0: 469a mov sl, r3 2b2: 3b09 subs r3, #9 2b4: 4699 mov r9, r3 2b6: e794 b.n 1e2 <__aeabi_fmul+0x2e> 2b8: 2680 movs r6, #128 ; 0x80 2ba: 2400 movs r4, #0 2bc: 03f6 lsls r6, r6, #15 2be: 25ff movs r5, #255 ; 0xff 2c0: e7c1 b.n 246 <__aeabi_fmul+0x92> 2c2: 0035 movs r5, r6 2c4: 4648 mov r0, r9 2c6: e7b2 b.n 22e <__aeabi_fmul+0x7a> 2c8: 0035 movs r5, r6 2ca: 0022 movs r2, r4 2cc: 4648 mov r0, r9 2ce: e7ae b.n 22e <__aeabi_fmul+0x7a> 2d0: 0429 lsls r1, r5, #16 2d2: 0c09 lsrs r1, r1, #16 2d4: 0008 movs r0, r1 2d6: 0c37 lsrs r7, r6, #16 2d8: 0436 lsls r6, r6, #16 2da: 0c36 lsrs r6, r6, #16 2dc: 0c2c lsrs r4, r5, #16 2de: 4379 muls r1, r7 2e0: 4370 muls r0, r6 2e2: 4367 muls r7, r4 2e4: 4374 muls r4, r6 2e6: 0c06 lsrs r6, r0, #16 2e8: 1864 adds r4, r4, r1 2ea: 1936 adds r6, r6, r4 2ec: 42b1 cmp r1, r6 2ee: d903 bls.n 2f8 <__aeabi_fmul+0x144> 2f0: 2180 movs r1, #128 ; 0x80 2f2: 0249 lsls r1, r1, #9 2f4: 4688 mov r8, r1 2f6: 4447 add r7, r8 2f8: 0400 lsls r0, r0, #16 2fa: 0c00 lsrs r0, r0, #16 2fc: 0431 lsls r1, r6, #16 2fe: 1809 adds r1, r1, r0 300: 018d lsls r5, r1, #6 302: 1e68 subs r0, r5, #1 304: 4185 sbcs r5, r0 306: 0e89 lsrs r1, r1, #26 308: 4329 orrs r1, r5 30a: 0c35 lsrs r5, r6, #16 30c: 19ed adds r5, r5, r7 30e: 01ad lsls r5, r5, #6 310: 430d orrs r5, r1 312: 0129 lsls r1, r5, #4 314: d504 bpl.n 320 <__aeabi_fmul+0x16c> 316: 2301 movs r3, #1 318: 0869 lsrs r1, r5, #1 31a: 401d ands r5, r3 31c: 4663 mov r3, ip 31e: 430d orrs r5, r1 320: 0019 movs r1, r3 322: 317f adds r1, #127 ; 0x7f 324: 2900 cmp r1, #0 326: dd25 ble.n 374 <__aeabi_fmul+0x1c0> 328: 0768 lsls r0, r5, #29 32a: d004 beq.n 336 <__aeabi_fmul+0x182> 32c: 200f movs r0, #15 32e: 4028 ands r0, r5 330: 2804 cmp r0, #4 332: d000 beq.n 336 <__aeabi_fmul+0x182> 334: 3504 adds r5, #4 336: 0128 lsls r0, r5, #4 338: d503 bpl.n 342 <__aeabi_fmul+0x18e> 33a: 4927 ldr r1, [pc, #156] ; (3d8 <__aeabi_fmul+0x224>) 33c: 3380 adds r3, #128 ; 0x80 33e: 400d ands r5, r1 340: 0019 movs r1, r3 342: 29fe cmp r1, #254 ; 0xfe 344: dca3 bgt.n 28e <__aeabi_fmul+0xda> 346: 2401 movs r4, #1 348: 01ad lsls r5, r5, #6 34a: 0a6e lsrs r6, r5, #9 34c: 4014 ands r4, r2 34e: b2cd uxtb r5, r1 350: e779 b.n 246 <__aeabi_fmul+0x92> 352: 2080 movs r0, #128 ; 0x80 354: 03c0 lsls r0, r0, #15 356: 4206 tst r6, r0 358: d007 beq.n 36a <__aeabi_fmul+0x1b6> 35a: 4205 tst r5, r0 35c: d105 bne.n 36a <__aeabi_fmul+0x1b6> 35e: 4328 orrs r0, r5 360: 0246 lsls r6, r0, #9 362: 0a76 lsrs r6, r6, #9 364: 4644 mov r4, r8 366: 25ff movs r5, #255 ; 0xff 368: e76d b.n 246 <__aeabi_fmul+0x92> 36a: 4306 orrs r6, r0 36c: 0276 lsls r6, r6, #9 36e: 0a76 lsrs r6, r6, #9 370: 25ff movs r5, #255 ; 0xff 372: e768 b.n 246 <__aeabi_fmul+0x92> 374: 2401 movs r4, #1 376: 1a61 subs r1, r4, r1 378: 291b cmp r1, #27 37a: dd05 ble.n 388 <__aeabi_fmul+0x1d4> 37c: 4014 ands r4, r2 37e: 2500 movs r5, #0 380: 2600 movs r6, #0 382: e760 b.n 246 <__aeabi_fmul+0x92> 384: 4663 mov r3, ip 386: e7cb b.n 320 <__aeabi_fmul+0x16c> 388: 002e movs r6, r5 38a: 2320 movs r3, #32 38c: 40ce lsrs r6, r1 38e: 1a59 subs r1, r3, r1 390: 408d lsls r5, r1 392: 1e6b subs r3, r5, #1 394: 419d sbcs r5, r3 396: 432e orrs r6, r5 398: 0773 lsls r3, r6, #29 39a: d004 beq.n 3a6 <__aeabi_fmul+0x1f2> 39c: 230f movs r3, #15 39e: 4033 ands r3, r6 3a0: 2b04 cmp r3, #4 3a2: d000 beq.n 3a6 <__aeabi_fmul+0x1f2> 3a4: 3604 adds r6, #4 3a6: 0173 lsls r3, r6, #5 3a8: d504 bpl.n 3b4 <__aeabi_fmul+0x200> 3aa: 2401 movs r4, #1 3ac: 2501 movs r5, #1 3ae: 4014 ands r4, r2 3b0: 2600 movs r6, #0 3b2: e748 b.n 246 <__aeabi_fmul+0x92> 3b4: 2401 movs r4, #1 3b6: 01b6 lsls r6, r6, #6 3b8: 0a76 lsrs r6, r6, #9 3ba: 4014 ands r4, r2 3bc: 2500 movs r5, #0 3be: e742 b.n 246 <__aeabi_fmul+0x92> 3c0: 2680 movs r6, #128 ; 0x80 3c2: 2401 movs r4, #1 3c4: 03f6 lsls r6, r6, #15 3c6: 432e orrs r6, r5 3c8: 0276 lsls r6, r6, #9 3ca: 0a76 lsrs r6, r6, #9 3cc: 4014 ands r4, r2 3ce: 25ff movs r5, #255 ; 0xff 3d0: e739 b.n 246 <__aeabi_fmul+0x92> 3d2: 46c0 nop ; (mov r8, r8) 3d4: 00000418 .word 0x00000418 3d8: f7ffffff .word 0xf7ffffff
It actually comes from libgcc/soft-fp/mul{sf,df,tf}3.c for ARMv6-M only. This is because for ARMv6-M, there is no optimized assembler implementation of softfp multiply. See the comment in config/arm/t-elf. For non-ARMv6-M cores it comes from config/arm/ieee754-sf.S (included from lib1funcs.S which is multilib-built for each softfp API with the appropriate API macro defined).
If you do a ld --trace, you can see where the code is being pulled in from:
Eg for Cortex-M4,
$ arm-none-eabi-gcc -O2 -mfloat-abi=soft -mcpu=cortex-m4 sf.c -mthumb -specs=rdimon.specs -Wl,--trace /data/embedded/release-builds/gcc-arm-none-eabi-6_2-2016q4/bin/../lib/gcc/arm-none-eabi/6.2.1/../../../../arm-none-eabi/bin/ld: mode armelf/data/embedded/release-builds/gcc-arm-none-eabi-6_2-2016q4/bin/../lib/gcc/arm-none-eabi/6.2.1/armv7e-m/crti.o/data/embedded/release-builds/gcc-arm-none-eabi-6_2-2016q4/bin/../lib/gcc/arm-none-eabi/6.2.1/armv7e-m/crtbegin.o/data/embedded/release-builds/gcc-arm-none-eabi-6_2-2016q4/bin/../lib/gcc/arm-none-eabi/6.2.1/../../../../arm-none-eabi/lib/armv7e-m/rdimon-crt0.o/tmp/ccKbhmmj.o(/data/embedded/release-builds/gcc-arm-none-eabi-6_2-2016q4/bin/../lib/gcc/arm-none-eabi/6.2.1/armv7e-m/libgcc.a)_arm_muldivsf3.o
And for Cortex-M0:
$ arm-none-eabi-gcc -O2 -mfloat-abi=soft -mcpu=cortex-m0 sf.c -mthumb -specs=rdimon.specs -Wl,--trace /data/embedded/release-builds/gcc-arm-none-eabi-6_2-2016q4/bin/../lib/gcc/arm-none-eabi/6.2.1/../../../../arm-none-eabi/bin/ld: mode armelf/data/embedded/release-builds/gcc-arm-none-eabi-6_2-2016q4/bin/../lib/gcc/arm-none-eabi/6.2.1/armv6-m/crti.o/data/embedded/release-builds/gcc-arm-none-eabi-6_2-2016q4/bin/../lib/gcc/arm-none-eabi/6.2.1/armv6-m/crtbegin.o/data/embedded/release-builds/gcc-arm-none-eabi-6_2-2016q4/bin/../lib/gcc/arm-none-eabi/6.2.1/../../../../arm-none-eabi/lib/armv6-m/rdimon-crt0.o/tmp/ccgwAn4T.o(/data/embedded/release-builds/gcc-arm-none-eabi-6_2-2016q4/bin/../lib/gcc/arm-none-eabi/6.2.1/armv6-m/libgcc.a)mulsf3.o