Gccarmnoneeabi 10.3.1 exhibited unexpected compilation behavior

When I attempted to develop my MCU using the gccarm toolchain, gcc exhibited an unexpected behavior that resulted in a bug I spent the entire evening searching for.
here is the fuction source code:

void SHT2x_Read(void *pvParameters){
    SHT2x_Struct *sht2x = (SHT2x_Struct *)pvParameters;
    TwoWire SHT20_Wire = TwoWire(sht2x->SDA_Pin, sht2x->SCL_Pin);
    SHT2x sht2x_sensor;
    sht2x_sensor.begin(&SHT20_Wire);

    while(1){
        sht2x_sensor.read();
        float SHT_Temp = sht2x_sensor.getTemperature(); 
        float SHT_Humi = sht2x_sensor.getHumidity();
        
        if(xSemaphoreTake(xMutex, 1000) == pdTRUE){
            //do something
            SHT_msg.SHT_Humi = SHT_Humi;
            SHT_msg.SHT_Temp = SHT_Temp;
            xSemaphoreGive(xMutex);
        }
        vTaskDelay(1000);
    }
}

When I didn’t add the ‘static’ keyword to the variable SHT20_Wire, the compiler produced an unexpected behavior.
This is the assembly code generated during compilation without the ‘static’ keyword.

0x08006f54: 30 b5           	push	{r4, r5, lr}
0x08006f56: c3 b0           	sub	sp, #268	; 0x10c
0x08006f58: 42 78           	ldrb	r2, [r0, #1]
0x08006f5a: 01 78           	ldrb	r1, [r0, #0]
0x08006f5c: 09 a8           	add	r0, sp, #36	; 0x24
0x08006f5e: 01 f0 93 fc     	bl	0x8008888 <_ZN7TwoWireC2Emm>
0x08006f62: 68 46           	mov	r0, sp
0x08006f64: 03 f0 ca fc     	bl	0x800a8fc <_ZN5SHT2xC2Ev>
0x08006f68: 09 a9           	add	r1, sp, #36	; 0x24
0x08006f6a: 68 46           	mov	r0, sp
0x08006f6c: 03 f0 a6 fd     	bl	0x800aabc <_ZN5SHT2x5beginEP7TwoWire>
0x08006f70: 0d e0           	b.n	0x8006f8e <_Z10SHT2x_ReadPv+58>
0x08006f72: 11 4b           	ldr	r3, [pc, #68]	; (0x8006fb8 <_Z10SHT2x_ReadPv+100>)
0x08006f74: 5d 60           	str	r5, [r3, #4]
0x08006f76: 1c 60           	str	r4, [r3, #0]
0x08006f78: 00 23           	movs	r3, #0
0x08006f7a: 1a 46           	mov	r2, r3
0x08006f7c: 19 46           	mov	r1, r3
0x08006f7e: 0f 48           	ldr	r0, [pc, #60]	; (0x8006fbc <_Z10SHT2x_ReadPv+104>)
0x08006f80: 00 68           	ldr	r0, [r0, #0]
0x08006f82: fe f7 46 fa     	bl	0x8005412 <xQueueGenericSend>
0x08006f86: 4f f4 7a 70     	mov.w	r0, #1000	; 0x3e8
0x08006f8a: ff f7 a3 f8     	bl	0x80060d4 <vTaskDelay>
0x08006f8e: 68 46           	mov	r0, sp
0x08006f90: 03 f0 28 fe     	bl	0x800abe4 <_ZN5SHT2x4readEv>
0x08006f94: 68 46           	mov	r0, sp
0x08006f96: 03 f0 17 fd     	bl	0x800a9c8 <_ZN5SHT2x14getTemperatureEv>
0x08006f9a: 04 46           	mov	r4, r0
0x08006f9c: 68 46           	mov	r0, sp
0x08006f9e: 03 f0 2f fd     	bl	0x800aa00 <_ZN5SHT2x11getHumidityEv>
0x08006fa2: 05 46           	mov	r5, r0
0x08006fa4: 4f f4 7a 71     	mov.w	r1, #1000	; 0x3e8
0x08006fa8: 04 4b           	ldr	r3, [pc, #16]	; (0x8006fbc <_Z10SHT2x_ReadPv+104>)
0x08006faa: 18 68           	ldr	r0, [r3, #0]
0x08006fac: fe f7 22 fc     	bl	0x80057f4 <xQueueSemaphoreTake>
0x08006fb0: 01 28           	cmp	r0, #1
0x08006fb2: de d0           	beq.n	0x8006f72 <_Z10SHT2x_ReadPv+30>
0x08006fb4: e7 e7           	b.n	0x8006f86 <_Z10SHT2x_ReadPv+50>
0x08006fb6: 00 bf           	nop
0x08006fb8: 80 04           	lsls	r0, r0, #18
0x08006fba: 00 20           	movs	r0, #0
0x08006fbc: 7c 04           	lsls	r4, r7, #17
0x08006fbe: 00 20           	movs	r0, #0

And this is the assembly code generated when the ‘static’ keyword is added.

0x08006f90: 30 b5           	push	{r4, r5, lr}
0x08006f92: 8b b0           	sub	sp, #44	; 0x2c
0x08006f94: 1d 4b           	ldr	r3, [pc, #116]	; (0x800700c <_Z10SHT2x_ReadPv+124>)
0x08006f96: 1b 68           	ldr	r3, [r3, #0]
0x08006f98: 13 f0 01 0f     	tst.w	r3, #1
0x08006f9c: 07 d0           	beq.n	0x8006fae <_Z10SHT2x_ReadPv+30>
0x08006f9e: 01 a8           	add	r0, sp, #4
0x08006fa0: 03 f0 dc fc     	bl	0x800a95c <_ZN5SHT2xC2Ev>
0x08006fa4: 1a 49           	ldr	r1, [pc, #104]	; (0x8007010 <_Z10SHT2x_ReadPv+128>)
0x08006fa6: 01 a8           	add	r0, sp, #4
0x08006fa8: 03 f0 b8 fd     	bl	0x800ab1c <_ZN5SHT2x5beginEP7TwoWire>
0x08006fac: 19 e0           	b.n	0x8006fe2 <_Z10SHT2x_ReadPv+82>
0x08006fae: 42 78           	ldrb	r2, [r0, #1]
0x08006fb0: 01 78           	ldrb	r1, [r0, #0]
0x08006fb2: 17 48           	ldr	r0, [pc, #92]	; (0x8007010 <_Z10SHT2x_ReadPv+128>)
0x08006fb4: 01 f0 98 fc     	bl	0x80088e8 <_ZN7TwoWireC2Emm>
0x08006fb8: 14 4b           	ldr	r3, [pc, #80]	; (0x800700c <_Z10SHT2x_ReadPv+124>)
0x08006fba: 01 22           	movs	r2, #1
0x08006fbc: 1a 60           	str	r2, [r3, #0]
0x08006fbe: 15 48           	ldr	r0, [pc, #84]	; (0x8007014 <_Z10SHT2x_ReadPv+132>)
0x08006fc0: 03 f0 6e fe     	bl	0x800aca0 <atexit>
0x08006fc4: eb e7           	b.n	0x8006f9e <_Z10SHT2x_ReadPv+14>
0x08006fc6: 14 4b           	ldr	r3, [pc, #80]	; (0x8007018 <_Z10SHT2x_ReadPv+136>)
0x08006fc8: 5d 60           	str	r5, [r3, #4]
0x08006fca: 1c 60           	str	r4, [r3, #0]
0x08006fcc: 00 23           	movs	r3, #0
0x08006fce: 1a 46           	mov	r2, r3
0x08006fd0: 19 46           	mov	r1, r3
0x08006fd2: 12 48           	ldr	r0, [pc, #72]	; (0x800701c <_Z10SHT2x_ReadPv+140>)
0x08006fd4: 00 68           	ldr	r0, [r0, #0]
0x08006fd6: fe f7 1c fa     	bl	0x8005412 <xQueueGenericSend>
0x08006fda: 4f f4 7a 70     	mov.w	r0, #1000	; 0x3e8
0x08006fde: ff f7 79 f8     	bl	0x80060d4 <vTaskDelay>
0x08006fe2: 01 a8           	add	r0, sp, #4
0x08006fe4: 03 f0 2e fe     	bl	0x800ac44 <_ZN5SHT2x4readEv>
0x08006fe8: 01 a8           	add	r0, sp, #4
0x08006fea: 03 f0 1d fd     	bl	0x800aa28 <_ZN5SHT2x14getTemperatureEv>
0x08006fee: 04 46           	mov	r4, r0
0x08006ff0: 01 a8           	add	r0, sp, #4
0x08006ff2: 03 f0 35 fd     	bl	0x800aa60 <_ZN5SHT2x11getHumidityEv>
0x08006ff6: 05 46           	mov	r5, r0
0x08006ff8: 4f f4 7a 71     	mov.w	r1, #1000	; 0x3e8
0x08006ffc: 07 4b           	ldr	r3, [pc, #28]	; (0x800701c <_Z10SHT2x_ReadPv+140>)
0x08006ffe: 18 68           	ldr	r0, [r3, #0]
0x08007000: fe f7 f8 fb     	bl	0x80057f4 <xQueueSemaphoreTake>
0x08007004: 01 28           	cmp	r0, #1
0x08007006: de d0           	beq.n	0x8006fc6 <_Z10SHT2x_ReadPv+54>
0x08007008: e7 e7           	b.n	0x8006fda <_Z10SHT2x_ReadPv+74>
0x0800700a: 00 bf           	nop
0x0800700c: 8c 04           	lsls	r4, r1, #18
0x0800700e: 00 20           	movs	r0, #0
0x08007010: 90 04           	lsls	r0, r2, #18
0x08007012: 00 20           	movs	r0, #0
0x08007014: 31 6f           	ldr	r1, [r6, #112]	; 0x70
0x08007016: 00 08           	lsrs	r0, r0, #32
0x08007018: 84 04           	lsls	r4, r0, #18
0x0800701a: 00 20           	movs	r0, #0
0x0800701c: 80 04           	lsls	r0, r0, #18
0x0800701e: 00 20           	movs	r0, #0

I’m not an expert in assembly, but I can see that when the ‘static’ keyword is not added, the address of the variable SHT20_Wire is not stored in the stack. This resulted in unexpected errors in my program.
My platfrom:Stm32F103ZET6
platforimio.ini:

; PlatformIO Project Configuration File
;
;   Build options: build flags, source filter
;   Upload options: custom upload port, speed and extra flags
;   Library options: dependencies, extra library storages
;   Advanced options: extra scripting
;
; Please visit documentation for the other options and examples
; https://docs.platformio.org/page/projectconf.html

[env:genericSTM32F103ZE]
platform = ststm32
board = genericSTM32F103ZE
framework = arduino
upload_protocol = cmsis-dap
upload_speed = 1000000
debug_tool = cmsis-dap
lib_deps = 
	stm32duino/STM32duino FreeRTOS@^10.3.2
	olikraus/U8g2@^2.35.4
	adafruit/Adafruit BusIO@^1.14.1
	robtillaart/SHT2x@^0.3.0

I consulted a friend who has a better understanding of gcc, and he mentioned that according to gcc standards, any variable whose address is taken should not be optimized to the extent of not storing its address in the stack. I was quite puzzled by this as well. I’m not sure why gccarm would make such an optimization. Perhaps it’s due to my version being too low? Or maybe it’s an optimization point documented in the gcc documentation? I hope someone can provide an answer to my question.
Thanks :grinning:

No.

You have

In the non-static version’s assembly code

0x08006f54: 30 b5           	push	{r4, r5, lr}       ; save registers and return address onto the stack
0x08006f56: c3 b0           	sub	sp, #268	; 0x10c  ; create the stack frame (268 bytes) 
0x08006f58: 42 78           	ldrb	r2, [r0, #1]     ; r0 = first param (sht2x). Load byte at offset ro + 1 (SDA_PIN)
0x08006f5a: 01 78           	ldrb	r1, [r0, #0]    ; load byte at (r0 + 0) = SCL_PIN
0x08006f5c: 09 a8           	add	r0, sp, #36	; 0x24  ; r0 = first param to the function ('this') = stack pointer + 36. THIS IS THE ADDRESS OF SHT20_WIRE
0x08006f5e: 01 f0 93 fc     	bl	0x8008888 <_ZN7TwoWireC2Emm> ; constructor call of TwoWire() on SHT20_Wire 
0x08006f62: 68 46           	mov	r0, sp  ; prepare "this" parameter for SHT2x constructor call (sht2x_sensor)
0x08006f64: 03 f0 ca fc     	bl	0x800a8fc <_ZN5SHT2xC2Ev> ; constructor call SHT2x() on sht2x_sensor
0x08006f68: 09 a9           	add	r1, sp, #36	; 0x24  ; first constructor parameter: address of SHT20_WIRE (SP + 36)
0x08006f6a: 68 46           	mov	r0, sp  ; "this" parameter = SP = address of sht2x_sensor
0x08006f6c: 03 f0 a6 fd     	bl	0x800aabc <_ZN5SHT2x5beginEP7TwoWire> ; call sht2x_sensor.begin(&SHT20_Wire);

That is per-se valid assembly code. The rest of it, is too. You see how every call to a function

is prefixed with mov r0, sp, which is the address of sht2x_sensor. And in those places where the address to SHT20_Wire needed (SP + 34), it is also stored in r0 correctly so that it becomes the this parameter of a constructor and later in r0 to become the first (regular) parameter of a function call.

What may be happening is that you have stack overflow that does a memory corruption. In the non-static version, the burden of stack usage is only 44 bytes compared to 268. Calling into constructors and functions which in turn allocate their stack and call other functions which in turn allocate a stack, might make the function use more stack memory than what it was given to by xTaskCreate().

  1. Have you tried increasing the stack memory given to the FreeRTOS task by upping the usStackDepth parameter value?
  2. Have you enabled task stack overflow checks in your FreeRTOS config.h?

I apologize for any misunderstanding caused by my limited knowledge of assembly language.
Regarding your first question, I also doubted whether it was due to insufficient memory. When allocating memory, I allocated up to 4096 bytes at a time, but the issue still persists.
Regarding your second question, I attempted to enable FreeRTOS stack overflow checking, but even when debugging with breakpoints, it did not enter the vApplicationStackOverflowHook function.
This may be due to improper configuration of the stack overflow checking that I enabled. Below is the method I used to enable stack overflow checking:
1:define configCHECK_FOR_STACK_OVERFLOW as 1
image
2:define TaskHandle_t and pass it during task creation
image


3:and then check it in the vApplicationStackOverflowHook function

thanks for your reply

Can you set the macro to 2 instead of 1? FreeRTOS - stacks and stack overflow checking

I would not rely on checking the xTask value, just while(1) {} so that you can catch it.

ok,I try to macro configCHECK_FOR_STACK_OVERFLOW to 2
image
Then I modified the content of the vApplicationStackOverflowHook function.


Then I checked for stack overflow, however, in debug mode, the program did not stop at the breakpoint inside this function, and the LED light did not flash rapidly.
Additionally, I attempted to continuously increase the amount of memory allocated to the SH2x task until it reached a point where there was insufficient memory to allocate (16384 bytes), but the previous issues persisted throughout.
This is the method I implemented in the program to check if tasks fail to allocate due to insufficient memory.
1: Create a BaseType_t variable to receive the return value of the xTaskCreate function.

2:Then check if the allocation failure was due to insufficient memory. If it is, then jump to a while(1) loop that would not be executed under normal circumstances.

Can you explain more about the behavior of the program when the bug is present?

Of course, when the bug occurs, it seems that all processes have stopped, and the program appears to be stuck in an infinite loop. When I entered debug mode to investigate, I found that the program stopped at the second-to-last statement of the xTimerCreateTimerTask function in the timers.c file, which is configASSERT( xReturn ); . Then, I checked the value of the variable xReturn and found that it was <optimized out> .


print xReturn

Does the problem go away when you use a lower toolchain version?

platform_packages = 
   toolchain-gccarmnoneeabi@~1.70201.0

in the platformio.ini for 7.2.1.

Sorry, the package “stm32duino” I’m using doesn’t seem to support very low versions. Instead,I used platform_packages = toolchain-gccarmnoneeabi@1.80201.181220 for 8.2.1 as an alternative. After successfully compiling and running, the behavior and debugging results are still the same as before.
The program still stops at this statement.