I tried the following gcc 4.4.5 code on Linux and gcc-llvm on Mac OSX (Xcode 4.2.1) and this . The following is the source and the generated parsing of the corresponding functions. (Added: compiled with gcc -O2 main.c )
#include <stdio.h> __attribute__((noinline)) static void g(long num) { long m, n; printf("%p %ld\n", &m, n); return g(num-1); } __attribute__((noinline)) static void h(long num) { long m, n; printf("%ld %ld\n", m, n); return h(num-1); } __attribute__((noinline)) static void f(long * num) { scanf("%ld", num); g(*num); h(*num); return f(num); } int main(void) { printf("int:%lu long:%lu unsigned:%lu\n", sizeof(int), sizeof(long), sizeof(unsigned)); long num; f(&num); return 0; }
08048430 <g>: 8048430: 55 push %ebp 8048431: 89 e5 mov %esp,%ebp 8048433: 53 push %ebx 8048434: 89 c3 mov %eax,%ebx 8048436: 83 ec 24 sub $0x24,%esp 8048439: 8d 45 f4 lea -0xc(%ebp),%eax 804843c: c7 44 24 08 00 00 00 movl $0x0,0x8(%esp) 8048443: 00 8048444: 89 44 24 04 mov %eax,0x4(%esp) 8048448: c7 04 24 d0 85 04 08 movl $0x80485d0,(%esp) 804844f: e8 f0 fe ff ff call 8048344 < printf@plt > 8048454: 8d 43 ff lea -0x1(%ebx),%eax 8048457: e8 d4 ff ff ff call 8048430 <g> 804845c: 83 c4 24 add $0x24,%esp 804845f: 5b pop %ebx 8048460: 5d pop %ebp 8048461: c3 ret 8048462: 8d b4 26 00 00 00 00 lea 0x0(%esi,%eiz,1),%esi 8048469: 8d bc 27 00 00 00 00 lea 0x0(%edi,%eiz,1),%edi 08048470 <h>: 8048470: 55 push %ebp 8048471: 89 e5 mov %esp,%ebp 8048473: 83 ec 18 sub $0x18,%esp 8048476: 66 90 xchg %ax,%ax 8048478: c7 44 24 08 00 00 00 movl $0x0,0x8(%esp) 804847f: 00 8048480: c7 44 24 04 00 00 00 movl $0x0,0x4(%esp) 8048487: 00 8048488: c7 04 24 d8 85 04 08 movl $0x80485d8,(%esp) 804848f: e8 b0 fe ff ff call 8048344 < printf@plt > 8048494: eb e2 jmp 8048478 <h+0x8> 8048496: 8d 76 00 lea 0x0(%esi),%esi 8048499: 8d bc 27 00 00 00 00 lea 0x0(%edi,%eiz,1),%edi 080484a0 <f>: 80484a0: 55 push %ebp 80484a1: 89 e5 mov %esp,%ebp 80484a3: 53 push %ebx 80484a4: 89 c3 mov %eax,%ebx 80484a6: 83 ec 14 sub $0x14,%esp 80484a9: 8d b4 26 00 00 00 00 lea 0x0(%esi,%eiz,1),%esi 80484b0: 89 5c 24 04 mov %ebx,0x4(%esp) 80484b4: c7 04 24 e1 85 04 08 movl $0x80485e1,(%esp) 80484bb: e8 94 fe ff ff call 8048354 < __isoc99_scanf@plt > 80484c0: 8b 03 mov (%ebx),%eax 80484c2: e8 69 ff ff ff call 8048430 <g> 80484c7: 8b 03 mov (%ebx),%eax 80484c9: e8 a2 ff ff ff call 8048470 <h> 80484ce: eb e0 jmp 80484b0 <f+0x10>
We see that g() and h() are basically identical, except for the & (address of) operator next to the argument m of printf() (and the irrelevant %ld and %p ). However, h() optimized for the tail call, while g() is not. Why?