We are running a survey to help us improve the experience for all of our members. If you see the survey appear, please take the time to tell us about your experience if you can.
Hi,
I wanted to simplify my life, so instead of creating asm function I used the __swp intrinsic provided by keil compiler.
After some time I have realized the program behaves incorrectly (making release). It was pretty dificult to find the problem. Finaly I discovered the __swp is translated differently depending on optimization level! Unfortunately, when -O3 and time_optimization are on, the __swp translates to SWPB instruction (instead of SWP), regardless of the first parameter type, thus anything above byte range is lost!
#include <stdint.h> volatile uint32_t lock = 0x12345678; volatile uint32_t t1 = 0x11111111; volatile uint32_t t2 = 0x22222222; int main(void) { for (;;) { t1 = __swp(t1, &lock); t2 = __swp(t2, &t1 ); lock = __swp(lock, &t2 ); } }
Working:
; generated by ARM C/C++ Compiler with , RVCT4.0 [Build 524] for uVision ; commandline ArmCC [--debug -c --asm --interleave -o.\obj\swap.o --depend=.\obj\swap.d --device=DARMP --apcs=interwork -O0 -IC:\apps\Keil\ARM\INC\Philips --omf_browse=.\obj\swap.crf swap.c] ARM AREA ||.text||, CODE, READONLY, ALIGN=2 main PROC ;;;6 ;;;7 int main(void) 000000 e1a00000 MOV r0,r0 |L1.4| ;;;8 { ;;;9 for (;;) ;;;10 { ;;;11 t1 = __swp(t1, &lock); 000004 e59f1044 LDR r1,|L1.80| 000008 e5911000 LDR r1,[r1,#0] ; t1 00000c e59f2040 LDR r2,|L1.84| 000010 e1020091 SWP r0,r1,[r2] 000014 e59f1034 LDR r1,|L1.80| 000018 e5810000 STR r0,[r1,#0] ; t1 ;;;12 t2 = __swp(t2, &t1 ); 00001c e59f1034 LDR r1,|L1.88| 000020 e5912000 LDR r2,[r1,#0] ; t2 000024 e59f1024 LDR r1,|L1.80| 000028 e1010092 SWP r0,r2,[r1] 00002c e59f1024 LDR r1,|L1.88| 000030 e5810000 STR r0,[r1,#0] ; t2 ;;;13 lock = __swp(lock, &t2 ); 000034 e59f1018 LDR r1,|L1.84| 000038 e5912000 LDR r2,[r1,#0] ; lock 00003c e59f1014 LDR r1,|L1.88| 000040 e1010092 SWP r0,r2,[r1] 000044 e59f1008 LDR r1,|L1.84| 000048 e5810000 STR r0,[r1,#0] ; lock 00004c eaffffec B |L1.4| ;;;14 } ;;;15 } ENDP |L1.80| DCD ||t1|| |L1.84| DCD lock |L1.88| DCD ||t2|| AREA ||.data||, DATA, ALIGN=2 lock DCD 0x12345678 ||t1|| DCD 0x11111111 ||t2|| DCD 0x22222222 __ARM_use_no_argv EQU 0
Optimized:
; generated by ARM C/C++ Compiler with , RVCT4.0 [Build 524] for uVision ; commandline ArmCC [--debug -c --asm --interleave -o.\obj\swap.o --depend=.\obj\swap.d --device=DARMP --apcs=interwork -O3 -Otime -IC:\apps\Keil\ARM\INC\Philips --omf_browse=.\obj\swap.crf swap.c] ARM AREA ||.text||, CODE, READONLY, ALIGN=2 main PROC ;;;6 ;;;7 int main(void) 000000 e59f0030 LDR r0,|L1.56| ;;;8 { ;;;9 for (;;) ;;;10 { ;;;11 t1 = __swp(t1, &lock); 000004 e2802000 ADD r2,r0,#0 ;;;12 t2 = __swp(t2, &t1 ); 000008 e2823004 ADD r3,r2,#4 ;;;13 lock = __swp(lock, &t2 ); 00000c e283c004 ADD r12,r3,#4 |L1.16| 000010 e5901004 LDR r1,[r0,#4] ;11 ; t1 000014 e1421091 SWPB r1,r1,[r2] ;11 000018 e5801004 STR r1,[r0,#4] ;11 ; t1 00001c e5901008 LDR r1,[r0,#8] ;12 ; t2 000020 e1431091 SWPB r1,r1,[r3] ;12 000024 e5801008 STR r1,[r0,#8] ;12 ; t2 000028 e5901000 LDR r1,[r0,#0] ; lock 00002c e14c1091 SWPB r1,r1,[r12] 000030 e5801000 STR r1,[r0,#0] ; lock 000034 eafffff5 B |L1.16| ;;;14 } ;;;15 } ENDP |L1.56| DCD ||.data|| AREA ||.data||, DATA, ALIGN=2 lock DCD 0x12345678 ||t1|| DCD 0x11111111 ||t2|| DCD 0x22222222 __ARM_use_no_argv EQU 0
IDE-Version: µVision V4.00 Beta 3.1 Copyright (c) Keil Elektronik GmbH / Keil Software, Inc. 1995 - 2009 Tool Version Numbers: Toolchain: RealView MDK-ARM Version: 3.50 Toolchain Path: BIN40\ C Compiler: Armcc.Exe V4.0.0.524 Assembler: Armasm.Exe V4.0.0.524 Linker/Locator: ArmLink.Exe V4.0.0.524 Librarian: ArmAr.Exe V4.0.0.524 Hex Converter: FromElf.Exe V4.0.0.524 CPU DLL: SARM.DLL V3.50 Dialog DLL: DARMP.DLL V1.44 Target DLL: BIN\UL2ARM.DLL V1.47 Dialog DLL: ARMP.DLL V1.44
Mike,
yes, I've tried both of following:
(volatile uint32_t *) (volatile int *)
Also as tried to "prolong" the first parameter type via casting (as for variables also as for constants).