GCC does not optimize a structural copy of an uninitialized static constant

Firstly, I am developing a microcontroller, so using RAM and ROM is a priority.

I understand that this can be read as a bug report or not specific enough. If I do not get any answers here, I will send it as such.

I like to use static const structures to initialize the default stack structures. In most cases, the default structure is all zeros. I prefer to do this with static const structs rather than memset ( memset or struct assign , static const assign )

My current arm-none-eabi-gcc-4_7_3 , compiling for a Cortex M4 target with -Os optimizations.

I noticed the following: GCC creates different code if I explicitly initialize my static const structure to zero than if I hadn't ( static const struct foo; vs static const struct foo = {0}; ). In particular, it allocates uninitialized static const structures to memory and performs copy operations.

Here is a sample code:

 struct foo {int foo; int bar;}; struct bar {int bar[20];}; static const struct foo foo1_init, foo2_init = {0}; static const struct bar bar1_init, bar2_init = {0}; extern struct foo foo1, foo2; extern struct bar bar1, bar2; void init_foo1(void) { foo1 = foo1_init; } void init_foo2(void) { foo2 = foo2_init; } void init_bar1(void) { bar1 = bar1_init; } void init_bar2(void) { bar2 = bar2_init; } 

Compiled, this leads to the following assembly listing (reordered and trimmed for brevity):

  396 .section .bss.foo1_init,"aw",%nobits 397 .align 2 398 .set .LANCHOR0,. + 0 401 foo1_init: 402 0000 00000000 .space 8 402 00000000 40 .L2: 41 0010 00000000 .word .LANCHOR0 42 0014 00000000 .word foo1 55: **** foo1 = foo1_init; 32 .loc 1 55 0 33 0000 034A ldr r2, .L2 34 0002 044B ldr r3, .L2+4 35 0004 92E80300 ldmia r2, {r0, r1} 36 0008 83E80300 stmia r3, {r0, r1} 67 .L5: 68 000c 00000000 .word foo2 60: **** foo2 = foo2_init; 60 0000 024B ldr r3, .L5 61 0002 0022 movs r2, #0 62 0004 1A60 str r2, [r3, #0] 63 0006 5A60 str r2, [r3, #4] 389 .section .bss.bar1_init,"aw",%nobits 390 .align 2 391 .set .LANCHOR1,. + 0 394 bar1_init: 395 0000 00000000 .space 80 395 00000000 395 00000000 395 00000000 395 00000000 98 .L8: 99 0010 00000000 .word .LANCHOR1 100 0014 00000000 .word bar1 65: **** bar1 = bar1_init; 89 .loc 1 65 0 90 0002 0349 ldr r1, .L8 91 0004 0348 ldr r0, .L8+4 92 0006 5022 movs r2, #80 93 0008 FFF7FEFF bl memcpy 130 .L11: 131 0010 00000000 .word bar2 70: **** bar2 = bar2_init; 121 .loc 1 70 0 122 0002 0021 movs r1, #0 123 0004 5022 movs r2, #80 124 0006 0248 ldr r0, .L11 125 0008 FFF7FEFF bl memset 

We can see that for foo2 = init_foo2 and bar2 = init_bar2 compiler optimized copies before storing zeros to foo2 directly or called memset for bar2 .

We can see that for foo1 = init_foo1 and bar1 = init_bar1 compiler makes explicit copies, loads and saves from registers for foo1 and calls memcpy for foo2 .

I have a few questions:

  • Is such a GCC operation expected? I would expect uninitialized static const structures to follow the same path inside GCC as initialized static const structures, and therefore produce the same output.
  • Does this happen for other versions of ARM GCC? I have no other versions, and all of the online C files for building compilers are actually C++ compilers.
  • Does this happen for other target GCC architectures? Again, I have no other versions.
+7
optimization with gcc struct
source share
1 answer

I tested amd64 and, to my surprise, this seems to be consistent behavior (but I don't know if this is an error). gcc places foo1_init and bar1_init in a shared data segment or a segment of null-initialized values ​​by the operating system (.bss). foo2_init and bar2_init are placed in a read-only segment (.rodata), as if they were non-zero initialized values. This can be seen using -O0. Since you are not using the OS, the initialized section of the OS is manually initialized by gcc and / or the linker and then copied. gcc optimizes rodata values ​​by creating a direct memset and eliminating dead variables * 2_init. However, clang optimizes both cases.

Here follows the output of gcc (-O0):

  .file "defs.c" .local foo1_init .comm foo1_init,8,8 .section .rodata .align 8 .type foo2_init, @object .size foo2_init, 8 foo2_init: .zero 8 .local bar1_init .comm bar1_init,80,32 .align 32 .type bar2_init, @object .size bar2_init, 80 bar2_init: .zero 80 .text .globl init_foo1 .type init_foo1, @function init_foo1: .LFB0: .cfi_startproc pushq %rbp .cfi_def_cfa_offset 16 .cfi_offset 6, -16 movq %rsp, %rbp .cfi_def_cfa_register 6 movq foo1_init(%rip), %rax movq %rax, foo1(%rip) nop popq %rbp .cfi_def_cfa 7, 8 ret .cfi_endproc .LFE0: .size init_foo1, .-init_foo1 .globl init_foo2 .type init_foo2, @function init_foo2: .LFB1: .cfi_startproc pushq %rbp .cfi_def_cfa_offset 16 .cfi_offset 6, -16 movq %rsp, %rbp .cfi_def_cfa_register 6 movq $0, foo2(%rip) nop popq %rbp .cfi_def_cfa 7, 8 ret .cfi_endproc .LFE1: .size init_foo2, .-init_foo2 .globl init_bar1 .type init_bar1, @function init_bar1: .LFB2: .cfi_startproc pushq %rbp .cfi_def_cfa_offset 16 .cfi_offset 6, -16 movq %rsp, %rbp .cfi_def_cfa_register 6 movq bar1_init(%rip), %rax movq %rax, bar1(%rip) movq bar1_init+8(%rip), %rax movq %rax, bar1+8(%rip) movq bar1_init+16(%rip), %rax movq %rax, bar1+16(%rip) movq bar1_init+24(%rip), %rax movq %rax, bar1+24(%rip) movq bar1_init+32(%rip), %rax movq %rax, bar1+32(%rip) movq bar1_init+40(%rip), %rax movq %rax, bar1+40(%rip) movq bar1_init+48(%rip), %rax movq %rax, bar1+48(%rip) movq bar1_init+56(%rip), %rax movq %rax, bar1+56(%rip) movq bar1_init+64(%rip), %rax movq %rax, bar1+64(%rip) movq bar1_init+72(%rip), %rax movq %rax, bar1+72(%rip) nop popq %rbp .cfi_def_cfa 7, 8 ret .cfi_endproc .LFE2: .size init_bar1, .-init_bar1 .globl init_bar2 .type init_bar2, @function init_bar2: .LFB3: .cfi_startproc pushq %rbp .cfi_def_cfa_offset 16 .cfi_offset 6, -16 movq %rsp, %rbp .cfi_def_cfa_register 6 movl $bar2, %eax movl $80, %ecx movl $0, %esi movq %rsi, (%rax) movl %ecx, %edx addq %rax, %rdx addq $8, %rdx movq %rsi, -16(%rdx) leaq 8(%rax), %rdx andq $-8, %rdx subq %rdx, %rax addl %eax, %ecx andl $-8, %ecx movl %ecx, %eax shrl $3, %eax movl %eax, %ecx movq %rdx, %rdi movq %rsi, %rax rep stosq nop popq %rbp .cfi_def_cfa 7, 8 ret .cfi_endproc .LFE3: .size init_bar2, .-init_bar2 .ident "GCC: (GNU) 6.3.1 20170306" .section .note.GNU-stack,"",@progbits 
-one
source share

All Articles