Global pointers are decided by the optimizer - but no links - why?

What is the big difference between global pointer and global reference for VS2010 optimizer? why is this link not allowed?

typedef unsigned char byte_t; typedef unsigned short word_t; struct byte_reg_t { byte_t low; byte_t high; }; union word_reg_t { word_t value; byte_reg_t part; }; word_reg_t r16; byte_t& low_ref = r16.part.low; byte_t* const low_ptr = &r16.part.low; #define SPLIT() _asm nop; int main() { low_ref = 4; SPLIT() byte_t a = r16.part.low; SPLIT() byte_t b = low_ref; SPLIT() byte_t c = *low_ptr; SPLIT() return a+b+c; } 

compiled in Release mode with build output, you will get these results

 ;byte_t a = r16.part.low; mov cl, BYTE PTR ?r16@ @ 3Tword_reg_t@ @A ;byte_t b = low_ref; mov edx, DWORD PTR ?low_ref@ @3AAEA ; low_ref mov dl, BYTE PTR [edx] ;byte_t c = *low_ptr; mov al, BYTE PTR ?r16@ @ 3Tword_reg_t@ @A 

unmodified disassembly

 .text:00401000 _main proc near ; CODE XREF: __tmainCRTStartup+11D .text:00401000 mov eax, ?low_ref@ @3AAEA ; uchar & low_ref .text:00401005 mov byte ptr [eax], 4 .text:00401008 nop .text:00401009 mov cl, ?r16@ @ 3Tword_reg_t@ @A ; word_reg_t r16 .text:0040100F nop .text:00401010 mov edx, ?low_ref@ @3AAEA ; uchar & low_ref .text:00401016 mov dl, [edx] .text:00401018 nop .text:00401019 mov al, ?r16@ @ 3Tword_reg_t@ @A ; word_reg_t r16 .text:0040101E nop .text:0040101F movzx eax, al .text:00401022 movzx edx, dl .text:00401025 movzx ecx, cl .text:00401028 add eax, edx .text:0040102A add eax, ecx .text:0040102C retn .text:0040102C _main endp .data:00403374 ?r16@ @ 3Tword_reg_t@ @A db ? ; DATA XREF: _main+9 .data:00403374 ; _main+19 .data:00403375 align 4 .data:00403018 ; unsigned char & low_ref .data:00403018 ?low_ref@ @3AAEA dd offset ?r16@ @ 3Tword_reg_t@ @A ; DATA XREF: _main .data:00403018 ; _main+10 .data:00403018 ; word_reg_t r16 

I checked several options (return from function, etc.) - not allowed if low_ref is used

  • is the optimizer stupid?
  • unusual case for optimization?
  • some standard c / c ++ restrictions?

UPDATE

This seems like an unusual case for optimization - thanks Michael Burr

it works if the link is in the function area - or inside the class or structure created in the function area (but it’s still strange that the optimizer allows ptr const, but not links that are 100% identical)

UPDATE 2

its even weirder - if you switch from byte_t to int, then both resolution works are const ptr and reference

  • global ptr const global byte_t var: enabled
  • global ptr const for global int var: allowed
  • global reference to global byte_t var: NOT RESOLVED
  • global reference to global int var: allowed
  • global reference to local byte_t var: allowed
  • global reference to local int var: allowed

therefore there is a slight difference in the optimizer for ptr const and links, reference area ..... and reference type ... sometimes :)

UPDATE 3

simpler test code - verified using VS2010 and clang 3.1

 typedef unsigned char byte_t; typedef unsigned int dword_t; //for msvc #define SPLIT() _asm nop _asm nop; //for clang //#define SPLIT() asm("nop"); asm("nop"); byte_t byte; dword_t dword; byte_t& global_ref_byte = byte; dword_t& global_ref_dword = dword; byte_t* const global_ptrc_byte = &byte; dword_t* const global_ptrc_dword = &dword; int main(int argc, char** argv) { byte_t& local_ref_byte = byte; dword_t& local_ref_dword = dword; dword_t random = (dword_t)argv; byte = (byte_t)random; dword = (dword_t)random; SPLIT() byte_t a = global_ref_byte; SPLIT() dword_t b = global_ref_dword; SPLIT() byte_t c = *global_ptrc_byte; SPLIT() dword_t d = *global_ptrc_dword; SPLIT() byte_t e = local_ref_byte; SPLIT() dword_t f = local_ref_dword; SPLIT() dword_t result = a+b+c+d+e+f; return result; } 

Dismantling VS2010

 .text:00401000 ; int __cdecl main(int argc, const char **argv, const char **envp) .text:00401000 _main proc near ; CODE XREF: ___tmainCRTStartup+11D .text:00401000 .text:00401000 argc = dword ptr 8 .text:00401000 argv = dword ptr 0Ch .text:00401000 envp = dword ptr 10h .text:00401000 .text:00401000 push ebp .text:00401001 mov ebp, esp .text:00401003 mov eax, [ebp+argv] .text:00401006 push ebx .text:00401007 push esi .text:00401008 push edi .text:00401009 mov byte_403374, al .text:0040100E mov dword_403378, eax .text:00401013 nop .text:00401014 nop .text:00401015 mov eax, off_40301C .text:0040101A mov al, [eax] .text:0040101C nop .text:0040101D nop .text:0040101E mov ecx, dword_403378 .text:00401024 nop .text:00401025 nop .text:00401026 mov dl, byte_403374 .text:0040102C nop .text:0040102D nop .text:0040102E mov esi, dword_403378 .text:00401034 nop .text:00401035 nop .text:00401036 mov bl, byte_403374 .text:0040103C nop .text:0040103D nop .text:0040103E mov edi, dword_403378 .text:00401044 nop .text:00401045 nop .text:00401046 movzx edx, dl .text:00401049 movzx ebx, bl .text:0040104C add edx, edi .text:0040104E movzx eax, al .text:00401051 add edx, ebx .text:00401053 add eax, edx .text:00401055 pop edi .text:00401056 add eax, esi .text:00401058 pop esi .text:00401059 add eax, ecx .text:0040105B pop ebx .text:0040105C pop ebp .text:0040105D retn .text:0040105D _main endp 

clang 3.1 disassembly

 .text:004012E0 sub_4012E0 proc near ; CODE XREF: sub_401020+91 .text:004012E0 .text:004012E0 arg_4 = dword ptr 0Ch .text:004012E0 .text:004012E0 push ebp .text:004012E1 mov ebp, esp .text:004012E3 call sub_4014F0 .text:004012E8 mov eax, [ebp+arg_4] .text:004012EB mov byte_402000, al .text:004012F0 mov dword_402004, eax .text:004012F5 nop .text:004012F6 nop .text:004012F7 movzx eax, byte_402000 .text:004012FE nop .text:004012FF nop .text:00401300 add eax, dword_402004 .text:00401306 nop .text:00401307 nop .text:00401308 movzx ecx, byte_402000 .text:0040130F add ecx, eax .text:00401311 nop .text:00401312 nop .text:00401313 add ecx, dword_402004 .text:00401319 nop .text:0040131A nop .text:0040131B movzx eax, byte_402000 .text:00401322 add eax, ecx .text:00401324 nop .text:00401325 nop .text:00401326 add eax, dword_402004 .text:0040132C nop .text:0040132D nop .text:0040132E pop ebp .text:0040132F retn .text:0040132F sub_4012E0 endp 

without nops, both optimizers can create better code, but clang is even better

VS2010 (more code due to unresolved byte reference)

 .text:00401003 mov eax, [ebp+argv] .text:00401006 movzx ecx, al .text:00401009 lea edx, [eax+eax*2] .text:0040100C mov byte_403374, al .text:00401011 mov dword_403378, eax .text:00401016 lea eax, [edx+ecx*2] .text:00401019 mov ecx, off_40301C .text:0040101F movzx edx, byte ptr [ecx] .text:00401022 add eax, edx 

clang 3.1:

 .text:004012E8 mov eax, [ebp+arg_4] .text:004012EB mov byte_402000, al .text:004012F0 mov dword_402004, eax .text:004012F5 movzx ecx, al .text:004012F8 add ecx, eax .text:004012FA lea eax, [ecx+ecx*2] 
+7
source share
1 answer

Here I believe that this is happening. The link is processed similarly to the global non-const pointer. You can see this if you remove const from the low_ptr .

You can also see that if you move the link to a local function, the compiler can easily optimize access through it.

I would suggest that since global links are quite rare ("statistics, I agree, I just came up with), there was little effort to optimize them.

+5
source

All Articles