Asking this here in case it is an ARM issue and not specifically an STM32 issue...
We have a project running on the STM32F411 via the standard discovery board :www.st.com/.../32f411ediscovery.htmlIt has been verified that with the STLink port plugged in the BOOT0 pin is at GND and the NRST pin is at 3V. We are running into the apparently very common issue that code which was running perfectly fine suddenly only works in debug mode. Code has been added to light up LD3 (the orange LED) which connects to pin PD13 on the processor. By placing this code as high up the execution chain as possible we see that the device hangs almost immediately as soon as the clock configuration code is executing. The code running is now as follows:// from stm32.h/* modify bitfield */#define _BMD(reg, msk, val) (reg) = (((reg) & ~(msk)) | (val))/* set bitfield */#define _BST(reg, bits) (reg) = ((reg) | (bits))/* clear bitfield */#define _BCL(reg, bits) (reg) = ((reg) & ~(bits))/* wait until bitfield set */#define _WBS(reg, bits) while(((reg) & (bits)) == 0)/* wait until bitfield clear */#define _WBC(reg, bits) while(((reg) & (bits)) != 0)/* wait for bitfield value */#define _WVL(reg, msk, val) while(((reg) & (msk)) != (val))/* bit value */#define _BV(bit) (0x01 << (bit))Our code:void LD3_Init(void) { _BST(RCC->AHB1ENR, RCC_AHB1ENR_GPIODEN); _BMD(GPIOD->MODER, (0x03 << 26), (0x01 << 26)); _BCL(GPIOD->OTYPER, (0x01 << 13));} // end LD3_Initvoid LD3_ON(void) { _BST(GPIOD->ODR, (0x01 << 13));} // end LD3_ONint main (void) { volatile uint32_t reg_value; // Original source assumes default RCC register setting of 0x0000_XX81 // which enables the HSI clock, but better perhaps to explicitly set it _BCL(RCC->CR, RCC_CR_PLLON); _BCL(RCC->CR, RCC_CR_HSEON); _BST(RCC->CR, RCC_CR_HSION); _WBS(RCC->CR, RCC_CR_HSIRDY); do { _BST(RCC->APB1ENR, RCC_APB1ENR_PWREN); reg_value = RCC->APB1ENR; (void)reg_value; } while (0x00); /* set flash latency 2WS */ _BMD(FLASH->ACR, FLASH_ACR_LATENCY, FLASH_ACR_LATENCY_2WS); /* setting up PLL 16MHz HSI, VCO=144MHz, PLLP = 72MHz PLLQ = 48MHz */ _BMD(RCC->PLLCFGR, RCC_PLLCFGR_PLLM | RCC_PLLCFGR_PLLN | RCC_PLLCFGR_PLLSRC | RCC_PLLCFGR_PLLQ | RCC_PLLCFGR_PLLP, _VAL2FLD(RCC_PLLCFGR_PLLM, 8) | _VAL2FLD(RCC_PLLCFGR_PLLN, 72) | _VAL2FLD(RCC_PLLCFGR_PLLQ, 3)); // Original driver also fails to set the APB1 Prescaler as the APB1 clock must run // at or below 50MHz RCC->CFGR &= ~((uint32_t)(0x07 << 0x0A)); RCC->CFGR |= ((uint32_t)(0x04 << 0x0A)); /* enabling PLL */ _BST(RCC->CR, RCC_CR_PLLON); _WBS(RCC->CR, RCC_CR_PLLRDY); /* switching to PLL */ _BMD(RCC->CFGR, RCC_CFGR_SW, RCC_CFGR_SW_PLL); _WVL(RCC->CFGR, RCC_CFGR_SWS, RCC_CFGR_SWS_PLL); // wait for the clock to stabilize reg_value = 0x0000FFFF; while (reg_value) { --reg_value; }; LD3_Init(); LD3_ON(); // lots of other stuff...} // The rest of the code is too much to post, but this suffices to show the problem. When stepping through the debugger, the LED turns on, but won't do so in Release mode nor run in Debug mode without the debugger stepping the code (and of course, the LED is just an obvious test output, none of the rest of our code runs either). This issue usually relates to incorrect HW settings on the external pins but that is not the case here, and we have numerous examples of this same code to configure the clocks running in other cases perfectly without issue. Any suggestions as to what to look at would be appreciated.
// from stm32.h
/* modify bitfield */
#define _BMD(reg, msk, val) (reg) = (((reg) & ~(msk)) | (val))
/* set bitfield */
#define _BST(reg, bits) (reg) = ((reg) | (bits))
/* clear bitfield */
#define _BCL(reg, bits) (reg) = ((reg) & ~(bits))
/* wait until bitfield set */
#define _WBS(reg, bits) while(((reg) & (bits)) == 0)
/* wait until bitfield clear */
#define _WBC(reg, bits) while(((reg) & (bits)) != 0)
/* wait for bitfield value */
#define _WVL(reg, msk, val) while(((reg) & (msk)) != (val))
/* bit value */
#define _BV(bit) (0x01 << (bit))
void LD3_Init(void) {
_BST(RCC->AHB1ENR, RCC_AHB1ENR_GPIODEN);
_BMD(GPIOD->MODER, (0x03 << 26), (0x01 << 26));
_BCL(GPIOD->OTYPER, (0x01 << 13));
} // end LD3_Init
void LD3_ON(void) {
_BST(GPIOD->ODR, (0x01 << 13));
} // end LD3_ON
int main (void) {
volatile uint32_t reg_value;
// Original source assumes default RCC register setting of 0x0000_XX81
// which enables the HSI clock, but better perhaps to explicitly set it
_BCL(RCC->CR, RCC_CR_PLLON);
_BCL(RCC->CR, RCC_CR_HSEON);
_BST(RCC->CR, RCC_CR_HSION);
_WBS(RCC->CR, RCC_CR_HSIRDY);
do {
_BST(RCC->APB1ENR, RCC_APB1ENR_PWREN);
reg_value = RCC->APB1ENR;
(void)reg_value;
} while (0x00);
/* set flash latency 2WS */
_BMD(FLASH->ACR, FLASH_ACR_LATENCY, FLASH_ACR_LATENCY_2WS);
/* setting up PLL 16MHz HSI, VCO=144MHz, PLLP = 72MHz PLLQ = 48MHz */
_BMD(RCC->PLLCFGR,
RCC_PLLCFGR_PLLM | RCC_PLLCFGR_PLLN | RCC_PLLCFGR_PLLSRC | RCC_PLLCFGR_PLLQ | RCC_PLLCFGR_PLLP,
_VAL2FLD(RCC_PLLCFGR_PLLM, 8) | _VAL2FLD(RCC_PLLCFGR_PLLN, 72) | _VAL2FLD(RCC_PLLCFGR_PLLQ, 3));
// Original driver also fails to set the APB1 Prescaler as the APB1 clock must run
// at or below 50MHz
RCC->CFGR &= ~((uint32_t)(0x07 << 0x0A));
RCC->CFGR |= ((uint32_t)(0x04 << 0x0A));
/* enabling PLL */
_BST(RCC->CR, RCC_CR_PLLON);
_WBS(RCC->CR, RCC_CR_PLLRDY);
/* switching to PLL */
_BMD(RCC->CFGR, RCC_CFGR_SW, RCC_CFGR_SW_PLL);
_WVL(RCC->CFGR, RCC_CFGR_SWS, RCC_CFGR_SWS_PLL);
// wait for the clock to stabilize
reg_value = 0x0000FFFF;
while (reg_value) { --reg_value; };
LD3_Init();
LD3_ON();
// lots of other stuff...
} //
Hi Christopher,
The fact that this works while stepping tells me that the code is functionally correct, but likely the writes are not taking affect instantly.
You may wish to populate the code with barrier instructions (__isb(0xf);) to force the code to wait until the writes complete.
See also the below apps note. It is slightly out of date, but the concepts still apply.
https://developer.arm.com/documentation/dai0321/latest/
See also the ACLE specification, section 7:
We added in barrier commands for bth the instruction and memory pipelines but t did not change anything. We also checked the disassembly listings for both the debug and build versions of the code and they were identical.
volatile uint32_t reg_value; _BCL(RCC->CR, RCC_CR_PLLON); __asm__("isb"); __asm__("dsb"); _BCL(RCC->CR, RCC_CR_HSEON); __asm__("isb"); __asm__("dsb"); _BST(RCC->CR, RCC_CR_HSION); __asm__("isb"); __asm__("dsb"); _WBS(RCC->CR, RCC_CR_HSIRDY); __asm__("isb"); __asm__("dsb"); do { _BST(RCC->APB1ENR, RCC_APB1ENR_PWREN); reg_value = RCC->APB1ENR; (void)reg_value; } while (0x00); __asm__("isb"); __asm__("dsb"); /* set flash latency 2WS */ _BMD(FLASH->ACR, FLASH_ACR_LATENCY, FLASH_ACR_LATENCY_2WS); __asm__("isb"); __asm__("dsb"); do { reg_value = FLASH->ACR; } while ((reg_value & FLASH_ACR_LATENCY_2WS) != FLASH_ACR_LATENCY_2WS); _BMD(RCC->PLLCFGR, RCC_PLLCFGR_PLLM | RCC_PLLCFGR_PLLN | RCC_PLLCFGR_PLLSRC | RCC_PLLCFGR_PLLQ | RCC_PLLCFGR_PLLP, _VAL2FLD(RCC_PLLCFGR_PLLM, 8) | _VAL2FLD(RCC_PLLCFGR_PLLN, 72) | _VAL2FLD(RCC_PLLCFGR_PLLQ, 3)); __asm__("isb"); __asm__("dsb"); RCC->CFGR &= ~((uint32_t)(0x07 << 0x0A)); RCC->CFGR |= ((uint32_t)(0x04 << 0x0A)); __asm__("isb"); __asm__("dsb"); /* enabling PLL */ _BST(RCC->CR, RCC_CR_PLLON); __asm__("isb"); __asm__("dsb"); _WBS(RCC->CR, RCC_CR_PLLRDY); __asm__("isb"); __asm__("dsb"); /* switching to PLL */ _BMD(RCC->CFGR, RCC_CFGR_SW, RCC_CFGR_SW_PLL); __asm__("isb"); __asm__("dsb"); _WVL(RCC->CFGR, RCC_CFGR_SWS, RCC_CFGR_SWS_PLL); __asm__("isb"); __asm__("dsb"); // wait for the clock to stabilize reg_value = 0x0000FFFF; while (reg_value) { --reg_value; }; LD3_Init(); LD3_ON();
__asm__("isb");
__asm__("dsb");
reg_value = FLASH->ACR;
} while ((reg_value & FLASH_ACR_LATENCY_2WS) != FLASH_ACR_LATENCY_2WS);
The barriers are definitely being included:
_BCL(RCC->CR, RCC_CR_PLLON); 4: f643 0100 movw r1, #14336 ; 0x3800 8: f2c4 0102 movt r1, #16386 ; 0x4002 c: 6808 ldr r0, [r1, #0] e: f020 7080 bic.w r0, r0, #16777216 ; 0x1000000 12: 6008 str r0, [r1, #0] __asm__("isb"); 14: f3bf 8f6f isb sy __asm__("dsb"); 18: f3bf 8f4f dsb sy
4: f643 0100 movw r1, #14336 ; 0x3800
8: f2c4 0102 movt r1, #16386 ; 0x4002
c: 6808 ldr r0, [r1, #0]
e: f020 7080 bic.w r0, r0, #16777216 ; 0x1000000
12: 6008 str r0, [r1, #0]
14: f3bf 8f6f isb sy
18: f3bf 8f4f dsb sy
I'm not sure what else the problem may be. Is this code generated by CubeMX? You may need to ask on an STM32 forum.
The code is being generated by Segger Embedded Studio. Using an LED as the test in Release mode to see where the code stops executing, it was traced down to this function that is part of the Segger Embedded Studio boilerplate code for setting up the C runtime environment:
START_FUNC _start // // Call linker init functions which in turn performs the following: // * Perform segment init // * Perform heap init (if used) // * Call constructors of global Objects (if any exist) // ldr R4, =__SEGGER_init_table__ // Set table pointer to start of initialization tableL(RunInit): ldr R0, [R4] // Get next initialization function from table adds R4, R4, #4 // Increment table pointer to point to function arguments blx R0 // Call initialization function b L(RunInit)MARK_FUNC __SEGGER_init_doneMARK_FUNC __startup_complete // // Time to call main(), the application entry point. //#ifndef FULL_LIBRARY // // In a real embedded application ("Free-standing environment"), // main() does not get any arguments, // which means it is not necessary to init R0 and R1. // bl APP_ENTRY_POINT // Call to application entry point (usually main())END_FUNC _start
START_FUNC _start
//
// Call linker init functions which in turn performs the following:
// * Perform segment init
// * Perform heap init (if used)
// * Call constructors of global Objects (if any exist)
ldr R4, =__SEGGER_init_table__ // Set table pointer to start of initialization table
L(RunInit):
ldr R0, [R4] // Get next initialization function from table
adds R4, R4, #4 // Increment table pointer to point to function arguments
blx R0 // Call initialization function
b L(RunInit)
MARK_FUNC __SEGGER_init_done
MARK_FUNC __startup_complete
// Time to call main(), the application entry point.
#ifndef FULL_LIBRARY
// In a real embedded application ("Free-standing environment"),
// main() does not get any arguments,
// which means it is not necessary to init R0 and R1.
bl APP_ENTRY_POINT // Call to application entry point (usually main())
END_FUNC _start
With the LED code at the start of this function, it turns on, but not if it's at the end (again, no issues though when running a debug session). The RunInit process calls these two functions in order:
<__SEGGER_init_zero> 6820 ldr r0, [r4] 6861 ldr r1, [r4, #4] 3408 adds r4, #8 2200 movs r2, #0 E000 b 0x08014016 5442 strb r2, [r0, r1] 3901 subs r1, #1 D5FC bpl 0x08014014 4770 bx lr<__SEGGER_init_copy> 6820 ldr r0, [r4] 6861 ldr r1, [r4, #4] 68A2 ldr r2, [r4, #8] 340C adds r4, #12 E001 b 0x08013D0E 5C8B ldrb r3, [r1, r2] 5483 strb r3, [r0, r2] 3A01 subs r2, #1 D5FB bpl 0x08013D0A 4770 bx lr
<__SEGGER_init_zero>
6820 ldr r0, [r4]
6861 ldr r1, [r4, #4]
3408 adds r4, #8
2200 movs r2, #0
E000 b 0x08014016
5442 strb r2, [r0, r1]
3901 subs r1, #1
D5FC bpl 0x08014014
4770 bx lr
<__SEGGER_init_copy>
68A2 ldr r2, [r4, #8]
340C adds r4, #12
E001 b 0x08013D0E
5C8B ldrb r3, [r1, r2]
5483 strb r3, [r0, r2]
3A01 subs r2, #1
D5FB bpl 0x08013D0A
With __SEGGER_init_zero being invoked many, many, times, so many I gave up trying to step through it and just ran to a break point at __SEGGER_init_copy. So by skipping this code I can get the LED to turn on but of course nothing else in the code runs properly since the runtime environment is not correctly initialized.
Sorry I don't have either the board nor this toolchain available to help further.
The code shown is performing the equivalent of Arm tools' Scatterloading function.
Is there a linker option to disable zero-initialization?
There wasn't an option in the project settings per se, but I did manage to disable it by editing the linker script. That didn't help though as it just causes new hard faults to appear later in the code that weren't there before.