Why is the short circuit module not correct in release mode?

The modulus of short integers is incorrect. This is really strange and has already cost me two days. I narrowed down the problematic code as follows (as much as possible):

#include <stdio.h> #include <stdlib.h> int foo(short Width, short Height, short MSize) { short i = 0, k = 0, pos = 0; short j = 0; for(j = 1; j < Width - 1; j = j + 1) {/* a blank loop */} for(i = 1; i < Height - 1; i = i + 1) { for(j = 1; j < Width - 1; j = j + 1) { if((j % MSize) == 0) { k = k + 1; } printf("i=%d, k=%d, j=%d, MSize=%d, j mod MSize=%d\n", (int)i, (int)k, (int)j, (int)MSize, (int)(j % MSize)); if (pos >= 1024) { fprintf(stderr, "pos = %d, over 1024\n", (int)pos); } pos = pos + 1; } } return 0; } int main(int argc, char* argv[]) { foo(32, 32, 8); return 0; } 

When compiling in debug mode, the above codes work fine, j% MSize result is correct, however when compiling in Release mode, j% MSize result will always be 7, which is nonsense (tested in Visual Studio 2005/2012/2013). There is no memory operation, so it should not be caused by a damaged stack. Somebody knows?

The output I see is (slightly edited):

 j=10, MSize=8, j mod MSize=7 j=11, MSize=8, j mod MSize=7 j=12, MSize=8, j mod MSize=7 j=13, MSize=8, j mod MSize=7 j=14, MSize=8, j mod MSize=7 j=15, MSize=8, j mod MSize=7 j=16, MSize=8, j mod MSize=7 j=17, MSize=8, j mod MSize=7 j=18, MSize=8, j mod MSize=7 j=19, MSize=8, j mod MSize=7 j=20, MSize=8, j mod MSize=7 j=21, MSize=8, j mod MSize=7 j=22, MSize=8, j mod MSize=7 j=23, MSize=8, j mod MSize=7 j=24, MSize=8, j mod MSize=7 j=25, MSize=8, j mod MSize=7 j=26, MSize=8, j mod MSize=7 j=27, MSize=8, j mod MSize=7 

The following is a build log:

  1>Project "E:\Code\workspace\C\GeneralC\SNDFeatureExtract\SNDFeatureExtract.vcxproj" on node 2 (Build target(s)). 1>ClCompile: D:\Program Files\Microsoft Visual Studio 11.0\VC\bin\CL.exe /c /Zi /nologo /W3 /WX- /sdl /O2 /Oi /Oy- /GL /D WIN32 /D NDEBUG /D _CONSOLE /D _MBCS /Gm- /EHsc /MT /GS /Gy /fp:precise /Zc:wchar_t /Zc:forScope /Fo"Release\\" /Fd"Release\vc110.pdb" /Gd /TP /analyze- /errorReport:prompt WeirdBug.cpp WeirdBug.cpp Link: D:\Program Files\Microsoft Visual Studio 11.0\VC\bin\link.exe /ERRORREPORT:PROMPT /OUT:"E:\Code\workspace\C\GeneralC\Release\SNDFeatureExtract.exe" /INCREMENTAL:NO /NOLOGO kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /MANIFEST /MANIFESTUAC:"level='asInvoker' uiAccess='false'" /manifest:embed /DEBUG /PDB:"E:\Code\workspace\C\GeneralC\Release\SNDFeatureExtract.pdb" /SUBSYSTEM:CONSOLE /OPT:REF /OPT:ICF /LTCG /TLBID:1 /DYNAMICBASE /NXCOMPAT /IMPLIB:"E:\Code\workspace\C\GeneralC\Release\SNDFeatureExtract.lib" /MACHINE:X86 /SAFESEH Release\WeirdBug.obj Generating code Finished generating code SNDFeatureExtract.vcxproj -> E:\Code\workspace\C\GeneralC\Release\SNDFeatureExtract.exe 1>Done Building Project "E:\Code\workspace\C\GeneralC\SNDFeatureExtract\SNDFeatureExtract.vcxproj" (Build target(s)). 

Below is the result of disassembling VS:

  short i = 0, k = 0, pos = 0; short j = 0; for(j = 1; j < Width - 1; j = j + 1) 00801014 mov edi,1FF983C8h 00801019 jl foo+12h (0801012h) {/* a blank loop */} for(i = 1; i < Height - 1; i = i + 1) { 0080101B mov edx,1 00801020 mov dword ptr [ebp-4],1 00801027 mov dword ptr [ebp-8],edx 0080102A and ecx,80000007h 00801030 jns foo+37h (0801037h) 00801032 dec ecx 00801033 or ecx,0FFFFFFF8h 00801036 inc ecx 00801037 mov dword ptr [ebp-0Ch],ecx 0080103A lea ebx,[ebx] 00801040 mov eax,1 for(j = 1; j < Width - 1; j = j + 1) { 00801045 mov ebx,eax if((j % MSize) == 0) { 00801047 test ecx,ecx 00801049 jne foo+4Ch (080104Ch) k = k + 1; 0080104B inc edi } printf_s("i=%d, k=%d, j=%d, MSize=%d, j mod MSize=%d\n", (int)i, (int)k, (int)j, (int)MSize, (int)(j % MSize)); 0080104C push ecx 0080104D push 8 0080104F push eax 00801050 movsx eax,di 00801053 push eax 00801054 push edx 00801055 push 80CD30h 0080105A call printf_s (0801266h) if (pos >= 1024) { 0080105F mov eax,400h 00801064 add esp,18h 00801067 cmp si,ax 0080106A jl foo+86h (0801086h) fprintf_s(stderr, "pos = %d, over 1024\n", (int)pos); 0080106C movsx eax,si fprintf_s(stderr, "pos = %d, over 1024\n", (int)pos); 0080106F push eax 00801070 push 80CD5Ch 00801075 call __iob_func (0801175h) 0080107A add eax,40h 0080107D push eax 0080107E call fprintf_s (080127Ch) 00801083 add esp,0Ch for(j = 1; j < Width - 1; j = j + 1) { 00801086 mov ecx,dword ptr [ebp-0Ch] 00801089 mov edx,dword ptr [ebp-8] } pos = pos + 1; 0080108C inc ebx 0080108D movsx eax,bx 00801090 inc esi 00801091 cmp eax,1Fh 00801094 jl foo+47h (0801047h) {/* a blank loop */} for(i = 1; i < Height - 1; i = i + 1) { 00801096 mov eax,dword ptr [ebp-4] 00801099 inc eax 0080109A movsx edx,ax 0080109D mov dword ptr [ebp-4],eax 008010A0 mov dword ptr [ebp-8],edx 008010A3 cmp edx,1Fh 008010A6 jl foo+40h (0801040h) } } return 0; 008010A8 pop edi 008010A9 pop esi 008010AA xor eax,eax 008010AC pop ebx } 008010AD mov esp,ebp 008010AF pop ebp 008010B0 ret 
+6
source share
3 answers

this is because the compiler is optimized, and it has something to do with an empty loop. But I'm not quite sure where the problem is.

To just solve the issue, declare j as:

  volatile short j; 

and all will be well. The Reason program will retrieve j from memory, not registers every time.

I debugged the assembly code and found out that the program computes j% MSize and stores it in memory immediately after an empty cycle, and each time before doing printf, it simply extracts the value from instead of recounting it.

 mov ecx,dword ptr [ebp-10h] // j % MSize @ memory push ecx // j % MSize mov ecx,dword ptr [ebp-0Ch] push 8 // MSize push eax // j movsx eax,word ptr [IdxY] movsx esi,di push esi // k push eax // IdxY push ecx // i // push static string and calling printf 

But by adding volatile, it will act like:

 mov dx,word ptr [j] movsx eax,dx // j and eax,80000007h // j % 8 push eax // push other vars and calling printf 

This is re-computing the MOD and dragging it onto the stack for printf. So this is more of a compiler error, because it should extract j from memory, even if there is no volatile addition.

Since now I can not add comments again :( .. I found the error / Oxxx and / GL flag. He will choose one of the following:

 /O1 /O2 /Ox 

He must choose one of the options above with / GL to see the problem.

My IDE is Visual Studio 2010 10.0.40219.1 SP1Rel

+8
source

I do not see a problem

 $ gcc modulus.c $ ./a.out Width = 32, Height = 32, MSize = 8, Dim =16, sizeof(short)=2 i=1, IdxY=0, k=0, j=1, MSize=8, j mod MSize=1 i=1, IdxY=0, k=0, j=2, MSize=8, j mod MSize=2 i=1, IdxY=0, k=0, j=3, MSize=8, j mod MSize=3 i=1, IdxY=0, k=0, j=4, MSize=8, j mod MSize=4 i=1, IdxY=0, k=0, j=5, MSize=8, j mod MSize=5 i=1, IdxY=0, k=0, j=6, MSize=8, j mod MSize=6 i=1, IdxY=0, k=0, j=7, MSize=8, j mod MSize=7 i=1, IdxY=0, k=1, j=8, MSize=8, j mod MSize=0 

Am I missing something?

+2
source

In addition to the other answers already provided, I want to point out that you can prevent such errors with better features (although this is not your mistake in this case, probably a compiler error).

Prefer to declare iterative variables within the loop area. Or even more general: declare variables only in the area where they are used.

If you change the second for -loop to this:

 for(short j = 1; j < Width - 1; j = j + 1) { 

so that j declared in the for -loop scope, the compiler should treat j as a new variable that has nothing to do with the previous empty loop. Therefore, it is less prone to over-optimization by reusing previous memory cells. This small change fixes the bug in VS2013, and I find it much cleaner than using volatile .

+1
source

All Articles