This discussion has been locked.
You can no longer post new replies to this discussion. If you have a question you can start a new discussion

__swp compiler optimization error

Hi,

I wanted to simplify my life, so instead of creating asm function I used the __swp intrinsic provided by keil compiler.

After some time I have realized the program behaves incorrectly (making release). It was pretty dificult to find the problem. Finaly I discovered the __swp is translated differently depending on optimization level! Unfortunately, when -O3 and time_optimization are on, the __swp translates to SWPB instruction (instead of SWP), regardless of the first parameter type, thus anything above byte range is lost!

#include <stdint.h>

volatile uint32_t lock = 0x12345678;
volatile uint32_t t1   = 0x11111111;
volatile uint32_t t2   = 0x22222222;

int main(void)
{
    for (;;)
    {
        t1   = __swp(t1,   &lock);
        t2   = __swp(t2,   &t1  );
        lock = __swp(lock, &t2  );
    }
}

Working:

; generated by ARM C/C++ Compiler with , RVCT4.0 [Build 524] for uVision
; commandline ArmCC [--debug -c --asm --interleave -o.\obj\swap.o --depend=.\obj\swap.d --device=DARMP --apcs=interwork -O0 -IC:\apps\Keil\ARM\INC\Philips --omf_browse=.\obj\swap.crf swap.c]
                          ARM

                          AREA ||.text||, CODE, READONLY, ALIGN=2

                  main PROC
;;;6
;;;7      int main(void)
000000  e1a00000          MOV      r0,r0
                  |L1.4|
;;;8      {
;;;9          for (;;)
;;;10         {
;;;11             t1   = __swp(t1,   &lock);
000004  e59f1044          LDR      r1,|L1.80|
000008  e5911000          LDR      r1,[r1,#0]  ; t1
00000c  e59f2040          LDR      r2,|L1.84|
000010  e1020091          SWP      r0,r1,[r2]
000014  e59f1034          LDR      r1,|L1.80|
000018  e5810000          STR      r0,[r1,#0]  ; t1
;;;12             t2   = __swp(t2,   &t1  );
00001c  e59f1034          LDR      r1,|L1.88|
000020  e5912000          LDR      r2,[r1,#0]  ; t2
000024  e59f1024          LDR      r1,|L1.80|
000028  e1010092          SWP      r0,r2,[r1]
00002c  e59f1024          LDR      r1,|L1.88|
000030  e5810000          STR      r0,[r1,#0]  ; t2
;;;13             lock = __swp(lock, &t2  );
000034  e59f1018          LDR      r1,|L1.84|
000038  e5912000          LDR      r2,[r1,#0]  ; lock
00003c  e59f1014          LDR      r1,|L1.88|
000040  e1010092          SWP      r0,r2,[r1]
000044  e59f1008          LDR      r1,|L1.84|
000048  e5810000          STR      r0,[r1,#0]  ; lock
00004c  eaffffec          B        |L1.4|
;;;14         }
;;;15     }
                          ENDP

                  |L1.80|
                          DCD      ||t1||
                  |L1.84|
                          DCD      lock
                  |L1.88|
                          DCD      ||t2||

                          AREA ||.data||, DATA, ALIGN=2

                  lock
                          DCD      0x12345678
                  ||t1||
                          DCD      0x11111111
                  ||t2||
                          DCD      0x22222222

                  __ARM_use_no_argv EQU 0

Optimized:

; generated by ARM C/C++ Compiler with , RVCT4.0 [Build 524] for uVision
; commandline ArmCC [--debug -c --asm --interleave -o.\obj\swap.o --depend=.\obj\swap.d --device=DARMP --apcs=interwork -O3 -Otime -IC:\apps\Keil\ARM\INC\Philips --omf_browse=.\obj\swap.crf swap.c]
                          ARM

                          AREA ||.text||, CODE, READONLY, ALIGN=2

                  main PROC
;;;6
;;;7      int main(void)
000000  e59f0030          LDR      r0,|L1.56|
;;;8      {
;;;9          for (;;)
;;;10         {
;;;11             t1   = __swp(t1,   &lock);
000004  e2802000          ADD      r2,r0,#0
;;;12             t2   = __swp(t2,   &t1  );
000008  e2823004          ADD      r3,r2,#4
;;;13             lock = __swp(lock, &t2  );
00000c  e283c004          ADD      r12,r3,#4
                  |L1.16|
000010  e5901004          LDR      r1,[r0,#4]            ;11  ; t1
000014  e1421091          SWPB     r1,r1,[r2]            ;11
000018  e5801004          STR      r1,[r0,#4]            ;11  ; t1
00001c  e5901008          LDR      r1,[r0,#8]            ;12  ; t2
000020  e1431091          SWPB     r1,r1,[r3]            ;12
000024  e5801008          STR      r1,[r0,#8]            ;12  ; t2
000028  e5901000          LDR      r1,[r0,#0]  ; lock
00002c  e14c1091          SWPB     r1,r1,[r12]
000030  e5801000          STR      r1,[r0,#0]  ; lock
000034  eafffff5          B        |L1.16|
;;;14         }
;;;15     }
                          ENDP

                  |L1.56|
                          DCD      ||.data||

                          AREA ||.data||, DATA, ALIGN=2

                  lock
                          DCD      0x12345678
                  ||t1||
                          DCD      0x11111111
                  ||t2||
                          DCD      0x22222222

                  __ARM_use_no_argv EQU 0
IDE-Version:
µVision V4.00 Beta 3.1
Copyright (c) Keil Elektronik GmbH / Keil Software, Inc. 1995 - 2009


Tool Version Numbers: Toolchain: RealView MDK-ARM Version: 3.50 Toolchain Path: BIN40\ C Compiler: Armcc.Exe V4.0.0.524 Assembler: Armasm.Exe V4.0.0.524 Linker/Locator: ArmLink.Exe V4.0.0.524 Librarian: ArmAr.Exe V4.0.0.524 Hex Converter: FromElf.Exe V4.0.0.524 CPU DLL: SARM.DLL V3.50 Dialog DLL: DARMP.DLL V1.44 Target DLL: BIN\UL2ARM.DLL V1.47 Dialog DLL: ARMP.DLL V1.44