What is the big difference between global pointer and global reference for VS2010 optimizer? why is this link not allowed?
typedef unsigned char byte_t; typedef unsigned short word_t; struct byte_reg_t { byte_t low; byte_t high; }; union word_reg_t { word_t value; byte_reg_t part; }; word_reg_t r16; byte_t& low_ref = r16.part.low; byte_t* const low_ptr = &r16.part.low; #define SPLIT() _asm nop; int main() { low_ref = 4; SPLIT() byte_t a = r16.part.low; SPLIT() byte_t b = low_ref; SPLIT() byte_t c = *low_ptr; SPLIT() return a+b+c; }
compiled in Release mode with build output, you will get these results
unmodified disassembly
.text:00401000 _main proc near ; CODE XREF: __tmainCRTStartup+11D .text:00401000 mov eax, ?low_ref@ @3AAEA ; uchar & low_ref .text:00401005 mov byte ptr [eax], 4 .text:00401008 nop .text:00401009 mov cl, ?r16@ @ 3Tword_reg_t@ @A ; word_reg_t r16 .text:0040100F nop .text:00401010 mov edx, ?low_ref@ @3AAEA ; uchar & low_ref .text:00401016 mov dl, [edx] .text:00401018 nop .text:00401019 mov al, ?r16@ @ 3Tword_reg_t@ @A ; word_reg_t r16 .text:0040101E nop .text:0040101F movzx eax, al .text:00401022 movzx edx, dl .text:00401025 movzx ecx, cl .text:00401028 add eax, edx .text:0040102A add eax, ecx .text:0040102C retn .text:0040102C _main endp .data:00403374 ?r16@ @ 3Tword_reg_t@ @A db ? ; DATA XREF: _main+9 .data:00403374 ; _main+19 .data:00403375 align 4 .data:00403018 ; unsigned char & low_ref .data:00403018 ?low_ref@ @3AAEA dd offset ?r16@ @ 3Tword_reg_t@ @A ; DATA XREF: _main .data:00403018 ; _main+10 .data:00403018 ; word_reg_t r16
I checked several options (return from function, etc.) - not allowed if low_ref is used
- is the optimizer stupid?
- unusual case for optimization?
- some standard c / c ++ restrictions?
UPDATE
This seems like an unusual case for optimization - thanks Michael Burr
it works if the link is in the function area - or inside the class or structure created in the function area (but itβs still strange that the optimizer allows ptr const, but not links that are 100% identical)
UPDATE 2
its even weirder - if you switch from byte_t to int, then both resolution works are const ptr and reference
- global ptr const global byte_t var: enabled
- global ptr const for global int var: allowed
- global reference to global byte_t var: NOT RESOLVED
- global reference to global int var: allowed
- global reference to local byte_t var: allowed
- global reference to local int var: allowed
therefore there is a slight difference in the optimizer for ptr const and links, reference area ..... and reference type ... sometimes :)
UPDATE 3
simpler test code - verified using VS2010 and clang 3.1
typedef unsigned char byte_t; typedef unsigned int dword_t;
Dismantling VS2010
.text:00401000 ; int __cdecl main(int argc, const char **argv, const char **envp) .text:00401000 _main proc near ; CODE XREF: ___tmainCRTStartup+11D .text:00401000 .text:00401000 argc = dword ptr 8 .text:00401000 argv = dword ptr 0Ch .text:00401000 envp = dword ptr 10h .text:00401000 .text:00401000 push ebp .text:00401001 mov ebp, esp .text:00401003 mov eax, [ebp+argv] .text:00401006 push ebx .text:00401007 push esi .text:00401008 push edi .text:00401009 mov byte_403374, al .text:0040100E mov dword_403378, eax .text:00401013 nop .text:00401014 nop .text:00401015 mov eax, off_40301C .text:0040101A mov al, [eax] .text:0040101C nop .text:0040101D nop .text:0040101E mov ecx, dword_403378 .text:00401024 nop .text:00401025 nop .text:00401026 mov dl, byte_403374 .text:0040102C nop .text:0040102D nop .text:0040102E mov esi, dword_403378 .text:00401034 nop .text:00401035 nop .text:00401036 mov bl, byte_403374 .text:0040103C nop .text:0040103D nop .text:0040103E mov edi, dword_403378 .text:00401044 nop .text:00401045 nop .text:00401046 movzx edx, dl .text:00401049 movzx ebx, bl .text:0040104C add edx, edi .text:0040104E movzx eax, al .text:00401051 add edx, ebx .text:00401053 add eax, edx .text:00401055 pop edi .text:00401056 add eax, esi .text:00401058 pop esi .text:00401059 add eax, ecx .text:0040105B pop ebx .text:0040105C pop ebp .text:0040105D retn .text:0040105D _main endp
clang 3.1 disassembly
.text:004012E0 sub_4012E0 proc near ; CODE XREF: sub_401020+91 .text:004012E0 .text:004012E0 arg_4 = dword ptr 0Ch .text:004012E0 .text:004012E0 push ebp .text:004012E1 mov ebp, esp .text:004012E3 call sub_4014F0 .text:004012E8 mov eax, [ebp+arg_4] .text:004012EB mov byte_402000, al .text:004012F0 mov dword_402004, eax .text:004012F5 nop .text:004012F6 nop .text:004012F7 movzx eax, byte_402000 .text:004012FE nop .text:004012FF nop .text:00401300 add eax, dword_402004 .text:00401306 nop .text:00401307 nop .text:00401308 movzx ecx, byte_402000 .text:0040130F add ecx, eax .text:00401311 nop .text:00401312 nop .text:00401313 add ecx, dword_402004 .text:00401319 nop .text:0040131A nop .text:0040131B movzx eax, byte_402000 .text:00401322 add eax, ecx .text:00401324 nop .text:00401325 nop .text:00401326 add eax, dword_402004 .text:0040132C nop .text:0040132D nop .text:0040132E pop ebp .text:0040132F retn .text:0040132F sub_4012E0 endp
without nops, both optimizers can create better code, but clang is even better
VS2010 (more code due to unresolved byte reference)
.text:00401003 mov eax, [ebp+argv] .text:00401006 movzx ecx, al .text:00401009 lea edx, [eax+eax*2] .text:0040100C mov byte_403374, al .text:00401011 mov dword_403378, eax .text:00401016 lea eax, [edx+ecx*2] .text:00401019 mov ecx, off_40301C .text:0040101F movzx edx, byte ptr [ecx] .text:00401022 add eax, edx
clang 3.1:
.text:004012E8 mov eax, [ebp+arg_4] .text:004012EB mov byte_402000, al .text:004012F0 mov dword_402004, eax .text:004012F5 movzx ecx, al .text:004012F8 add ecx, eax .text:004012FA lea eax, [ecx+ecx*2]