How to de-obfuscate the ctk.c code with the 2001 IOCCC winner?

I saw ctk.c obfuscated code, but how can I start obfuscating it?

#include <stdio.h> #include <stdlib.h> #include <unistd.h> #include <sys/time.h> #include <signal.h> #define m(b)a=b;z=*a;while(*++a){y=*a;*a=z;z=y;} #define h(u)G=u<<3;printf("\e[%uq",l[u]) #define c(n,s)case n:s;continue char x[]="((((((((((((((((((((((",w[]= "\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b";char r[]={92,124,47},l[]={2,3,1 ,0};char*T[]={" |"," |","%\\|/%"," %%%",""};char d=1,p=40,o=40,k=0,*a,y,z,g= -1,G,X,**P=&T[4],f=0;unsigned int s=0;void u(int i){int n;printf( "\233;%uH\233L%c\233;%uH%c\233;%uH%s\23322;% uH@ \23323;%uH \n",*x-*w,r[d],*x+*w ,r[d],X,*P,p+=k,o);if(abs(px[21])>=w[21])exit(0);if(g!=G){struct itimerval t= {0,0,0,0};g+=((g<G)<<1)-1;t.it_interval.tv_usec=t.it_value.tv_usec=72000/((g>> 3)+1);setitimer(0,&t,0);f&&printf("\e[10;%u]",g+24);}f&&putchar(7);s+=(9-w[21] )*((g>>3)+1);o=p;m(x);m(w);(n=rand())&255||--*w||++*w;if(!(**P&&P++||n&7936)){ while(abs((X=rand()%76)-*x+2)-*w<6);++X;P=T;}(n=rand()&31)<3&&(d=n);!d&&--*x<= *w&&(++*x,++d)||d==2&&++*x+*w>79&&(--*x,--d);signal(i,u);}void e(){signal(14, SIG_IGN);printf("\e[0q\ecScore: %u\n",s);system("stty echo -cbreak");}int main (int C,char**V){atexit(e);(C<2||*V[1]!=113)&&(f=(C=*(int*)getenv("TERM"))==( int)0x756E696C||C==(int)0x6C696E75);srand(getpid());system("stty -echo cbreak" );h(0);u(14);for(;;)switch(getchar()){case 113:return 0;case 91:case 98:c(44,k =-1);case 32:case 110:c(46,k=0);case 93:case 109:c(47,k=1);c(49,h(0));c(50,h(1 ));c(51,h(2));c(52,h(3));}} 

http://www.ioccc.org/2001/ctk.hint :

 This is a game based on an Apple ][ Print Shop Companion easter egg named 'DRIVER', in which the goal is to drive as fast as you can down a long twisty highway without running off the road. Use ',./', '[ ]', or 'bnm' to go left, straight, and right respectively. Use '1234' to switch gears. 'q' quits. The faster you go and the thinner the road is, the more points you get. Most of the obfuscation is in the nonsensical if statements among other things. It works best on the Linux console: you get engine sound (!) and the * Lock keyboard lights tell you what gear you're in (none lit=4th). The 'q' argument (no leading '-') will silence the sound. It won't work on a terminal smaller than 80x24, but it works fine with more (try it in an XTerm with the "Unreadable" font and the window maximized vertically!). 
+6
source share
1 answer

1st step

Using:

 sed -e'/#include/d' ctk.c | gcc -E - | sed -e's/;/;\n/g' -e's/}/}\n/g' -e '/^#/d' | indent 

I managed to create the following result, which, although not perfect, already seems to be read much better:

 char x[] = "((((((((((((((((((((((", w[] = "\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b"; char r[] = { 92, 124, 47 } , l[] = { 2, 3, 1, 0} ; char *T[] = { " |", " |", "%\\|/%", " %%%", "" } ; char d = 1, p = 40, o = 40, k = 0, *a, y, z, g = -1, G, X, **P = &T[4], f = 0; unsigned int s = 0; void u (int i) { int n; printf ("\233; %uH\233L%c\233; %uH%c\233; %uH%s\23322; % uH@ \23323; %uH \n", *x - *w, r[d], *x + *w, r[d], X, *P, p += k, o); if (abs (p - x[21]) >= w[21]) exit (0); if (g != G) { struct itimerval t = { 0, 0, 0, 0 } ; g += ((g < G) << 1) - 1; t.it_interval.tv_usec = t.it_value.tv_usec = 72000 / ((g >> 3) + 1); setitimer (0, &t, 0); f && printf ("\e[10; %u]", g + 24); } f && putchar (7); s += (9 - w[21]) * ((g >> 3) + 1); o = p; a = x; z = *a; while (*++a) { y = *a; *a = z; z = y; } ; a = w; z = *a; while (*++a) { y = *a; *a = z; z = y; } ; (n = rand ()) & 255 || --*w || ++*w; if (!(**P && P++ || n & 7936)) { while (abs ((X = rand () % 76) - *x + 2) - *w < 6); ++X; P = T; } (n = rand () & 31) < 3 && (d = n); !d && --*x <= *w && (++*x, ++d) || d == 2 && ++*x + *w > 79 && (--*x, --d); signal (i, u); } void e () { signal (14, SIG_IGN); printf ("\e[0q\ecScore: %u\n", s); system ("stty echo -cbreak"); } int main (int C, char **V) { atexit (e); (C < 2 || *V[1] != 113) && (f = (C = *(int *) getenv ("TERM")) == (int) 0x756E696C || C == (int) 0x6C696E75); srand (getpid ()); system ("stty -echo cbreak"); G = 0 << 3; printf ("\e[%uq", l[0]); u (14); for (;;) switch (getchar ()) { case 113: return 0; case 91: case 98: case 44: k = -1; continue; case 32: case 110: case 46: k = 0; continue; case 93: case 109: case 47: k = 1; continue; case 49: G = 0 << 3; printf ("\e[%uq", l[0]); continue; case 50: G = 1 << 3; printf ("\e[%uq", l[1]); continue; case 51: G = 2 << 3; printf ("\e[%uq", l[2]); continue; case 52: G = 3 << 3; printf ("\e[%uq", l[3]); continue; } } 

... and now?

I do not think that at this moment it will be possible to automate the process, since the term "more" readable or "less" readable from now on may depend on the specific preferences of the reader.

One step that can be performed is to remove the escape sequences from the strings and place them somewhere separately. As it turned out, everything

 char l[] = {2, 3, 1, 0} 

has no other purpose than to use the main loop in escape sequences:

 printf ("\e[%uq", l[0]); 

etc. Having looked at their meaning:

 ESC [ 0 q: clear all LEDs ESC [ 1 q: set Scroll Lock LED ESC [ 2 q: set Num Lock LED ESC [ 3 q: set Caps Lock LED 

depending on your taste, you can exchange them using a macro or calling a function more meaningful to you, for example, clear_all_LEDs , etc.

I strongly doubt that the car will agree that this is a simplification. As it turned out, the entire main loop seems to work with keys entered by the user, so probably turning numbers into their corresponding characters can increase readability, for example, when replacing:

 case 113: return 0; case 91: case 98: case 44: k = -1; // ... case 49: G = 0 << 3; printf ("\e[%uq", l[0]); 

with something like:

 case 'q': return 0; case '[': case 'b': case ',': k = -1; // ... case '1': G = 0 << 3; set_Num_Lock_LED (); 

Oh - and although we are already on it, why don’t we want to change the name from this rather strange G to gear . Again, I strongly doubt that an automated process would find renaming from G to gear better than renaming it to butterfly . Well, perhaps this is not even the case.

When decorating names, perhaps this function referenced by one u is another candidate:

 u (14); 

with a more meaningful name update possible. And since we already included <signal.h> , why don't we de-code the code further by replacing 14 with SIGALRM as follows:

 upadate (SIGALRM); 

As you can see, the "deobfas" here requires the exact opposite step of the previous one. Replace the extension with a macro this time. How will the machine try to decide which one is more useful?

Another place where we might want to replace a bare number with something else is in the update function:

 f && putchar (7); 

Why not replace 7 with \a , because in the end it will be the same. Maybe we should change only the bare f with something more "significant."

Again I will vote butterfly again, but would like to name it play_sound :

 if (play_sound) putchar ('\a'); 

may be the more readable version we're looking for. Of course, we must not forget to replace f in all other places. The one who right at the beginning of our main function turned out to be such a criminal:

Holy mess

 int main (int C, char **V) { atexit (e); (C < 2 || *V[1] != 113) && (f = (C = *(int *) getenv ("TERM")) == (int) 0x756E696C || C == (int) 0x6C696E75); 

If you happily rename f to play_sound and e to - no, there is still no butterfly , this time I’ll rather call: - end we notice that the signature of the function seems a bit strange in terms of naming conventions: argc instead of C and argv instead of V looks more conditional. Thus, giving us:

 int main (int argc, char* argv[]) { atexit (end); (argc < 2 || *argv[1] != 113) && (playsound = (argc = *(int *) getenv ("TERM")) == (int) 0x756E696C || argc == (int) 0x6C696E75); 

Since this is not a beauty yet, we ask our standard guy and he tells us that it’s pretty good to replace

 (A || B) && (C) 

with

 if (A || B) { C } 

and

 E = (x=F)==H || x==I 

with

 x = F; if (x==H || x==I) A=1; else A=0;` 

So maybe it should be a more readable version of the whole code:

 if (argc < 2 || *argv[1] != 'q') { argc = *(int*) getenv ("TERM"); if (argc == (int) 0x756E69 || argc == (int) 0x6C696E75)) play_sound = 1; /* skip the else brach here as play_sound is alredy initialized to 0 */ } 

Now another guy appears and starts telling us that depending on something called endianness , you need to look for the strange numbers 0x6C696E75 and 0x756E69 if they are stored in memory (when interpreting raw byte values ​​as an ASCII code) just look at "linu" or "unil" . One of them is “unil” on one type of architecture and “linu” is another, and the other vice versa - on another architecture with varying accuracy.

So, take a closer look at what's happening here:

  • we get a pointer to a string from getenv ("TERM"), which we inherit from a pointer to int before dereferencing, thereby citing the bit pattern stored in the string location as int.
  • Next, we compare this value with what we would get if we did the same with "unil" or "linu" stored in this particular place.

We probably just want to check if the TERM environment variable is set to "linux", so our deobfuscated version might want to perform string comparisons here.

Since, on the other hand, we cannot be sure that also allowing terminals with names starting with "unil" to play sound may be a special feature of this software, so I decided it was probably better to leave it untouched.

Now what?

When renaming and recoding variable names and values, these weird char arrays may be our next victims. The following mess doesn't look very good:

 char x[] = "((((((((((((((((((((((", w[] = "\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b"; char r[] = { 92, 124, 47 }; 

Therefore, perhaps they could be changed to:

 char x_offset[] = { 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 0 }; char width[] = { 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 0 }; const char border[] = "\\|/"; 

As you can see, I just chose to switch the way of describing the values ​​between x as a string constant in x, written as an array, since this value of the values ​​stored here seemed a little clearer to me.

While, on the other hand, I changed the way that the path r written only in the exact opposite direction, since again it seemed to me much more understandable.

When looking for all of these references to x , w and r , you can use the time to rename p and o to - sorry again butterfly - pos and old_pos when renaming s to score .

Change, for example:

  s += (9 - w[21]) * ((g >> 3) + 1); o = p; a = x; z = *a; while (*++a) { y = *a; *a = z; z = y; } ; a = w; z = *a; while (*++a) { y = *a; *a = z; z = y; } ; 

in

  /* update score */ score += (9 - width[NEXT_LINE]) * ((g >> 3) + 1); old_pos = pos; /* shift x_offset */ a = x_offset; z = *a; while (*++a) { y = *a; *a = z; z = y; }; /* shift width */ a = width; z = *a; while (*++a) { y = *a; *a = z; z = y; }; 

Besides the ability to turn it into some other cycle, there are not many possibilities for both displacement functions, therefore, perhaps adding the appropriate comment is the maximum you can do. Removing magic number 21 might be another NEXT_LINE idea, it seemed not the worst choice here.

The single character variable G still doesn't look too good. But renaming it to something like update_interval , there is also the ability to eliminate another strange terminal escape sequence:

  if (g != G) { struct itimerval t = { 0, 0, 0, 0 } ; g += ((g < G) << 1) - 1; t.it_interval.tv_usec = t.it_value.tv_usec = 72000 / ((g >> 3) + 1); setitimer (0, &t, 0); f && printf ("\e[10; %u]", g + 24); } 

Maybe it looks a bit more confusing than:

  /* update simulation speed */ if (update_interval != gear) { struct itimerval t = { 0, 0, 0, 0 } ; update_interval += ((update_interval < gear) << 1) - 1; t.it_interval.tv_usec = t.it_value.tv_usec = 72000 / ((update_interval >> 3) + 1); setitimer (0, &t, 0); if (play_sound) change_bell_frequency (update_interval + 24); } 

Recent fixes

Although the code should look much more readable, there are still a few unpleasant parts:

 !d && --*x <= *w && (++*x, ++d) || d == 2 && ++*x + *w > 79 && (--*x, --d); 

Choosing a different (hopefully) more meaningful name for d and breaking the operator's priority, you can get something like:

  if (curve == CURVE_LEFT) { --*x_offset; if (*x_offset < *width) { ++*x_offset; curve = CURVE_NONE; } } else if (curve == CURVE_RIGHT) { ++*x_offset; if (*x_offset + *width > 79) { --*x_offsett; curve = CURVE_NONE; } } 

instead, add the appropriate macros for all of these CURVE_... s.

Now there are those x , p and T tags that can be changed. Since this makes its purpose also slightly better visible in the code, I decided to reverse the order of the T lines, which I renamed to tree , which undoubtedly means that the calculation should also be fixed. In general, this is from:

 char *T[] = { " |", " |", "%\\|/%", " %%%", "" }; char X, **P = &T[4]; // ... if (!(**P && P++ || n & 7936)) { while (abs ((X = rand () % 76) - *x + 2) - *w < 6); ++X; P = T; } 

Sort of:

 char *tree[] = { "", " %%%", "%\\|/%", " |", " |", }; char **tree_line = tree; char tree_position; // ... /* update tree line pointer */ if (!(**tree_line && tree_line-- || n & 7936)) { /* find the right spot to grow */ while (abs ((tree_position = rand () % 76) - *x_offset + 2) - *width < 6) ; ++tree_position; tree_line = &tree[4]; } 

Saving the best part to the end

Although the code already seems a lot prettier to me, now another part is missing. This is the one that makes the whole conclusion. On this line, I say:

  printf ("\233;%uH\233L%c\233;%uH%c\233;%uH%s\23322;% uH@ \23323;%uH \n", *x - *w, r[d], *x + *w, r[d], X, *P, p += k, o); 

This, besides being quite difficult to read, was even confusing for the computer to produce any useful result.

I tried a lot of different things working in other terminal emulators, changing terminal settings and switching locales back and forth without success.

Thus, in addition to the fact that this kind of obfuscation seemed more perfect, since it even seems to be confusing my computer, I still cannot tell what kind of trick the author intended here.

The octal code \233 has the same bit pattern as the escape character ( \033 ), with the 8th bit added, which is probably somehow related to the effect that was intended here. Unfortunately, as I said, this did not work for me.

Fortunately, the escape sequences still seemed simple enough to guess, so I came up with the following replacement:

pos + = move_x,

  /* draw street */ printf ("\e[1;%uH" "\e[L" "%c" "\e[1;%uH" "%c", *x_offset - *width, border[curve], *x_offset + *width, border[curve]); /* draw tree */ printf ("\e[1;%uH" "%s", tree_position, *tree_line); /* redraw car */ printf ("\e[22;%uH" "@" "\e[23;%uH" " " "\n", pos, old_pos); 

Bringing the drawings into separate (hopefully) make them a little more readable. The actual line and the previous line are still hardcoded here, as in the original version. Perhaps extracting from them, as shown below, will even improve readability:

  /* draw street */ printf ("\e[1;%uH" "\e[L" "%c" "\e[1;%uH" "%c", *x_offset - *width, border[curve], *x_offset + *width, border[curve]); /* draw tree */ printf ("\e[1;%uH" "%s", tree_position, *tree_line); /* redraw car */ printf ("\e[%u;%uH" "@" "\e[%u;%uH" " " "\n", NEXT_LINE +1, pos, NEXT_LINE +2, old_pos); 

This finally led me to the first usable version, which I then “tested” a lot. Although probably not a 100% state of the art, it still seems very exciting.

Last words

Here is the final version without the phobia with which I came. As you will see, I did not implement the LED configuration function and the clear screen function, but it is not necessary to find the necessary escape sequences scattered throughout the confusing version. In fact, I have already mentioned LED sequences in this post. One to clear the screen is "\ e [0q". Happy hack.

 #include <stdio.h> #include <stdlib.h> #include <unistd.h> #include <sys/time.h> #include <signal.h> #define NEXT_LINE 21 #define CURVE_LEFT 0 #define CURVE_NONE 1 #define CURVE_RIGHT 2 char x_offset[] = { 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 0 }; char width[] = { 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 0 }; const char border[] = "\\|/"; void change_bell_frequency () {} void clear_screen () {} void clear_all_LEDs () {} void set_Num_Lock_LED () {} void set_Scroll_lock_LED () {} void set_Caps_Lock_LED () {} char *tree[] = { "", " %%%", "%\\|/%", " |", " |", }; char **tree_line = tree; char tree_position; char curve = CURVE_NONE; char *a, y, z; char move_x = 0; char update_interval = -1; char pos = 40; char old_pos = 40; char play_sound = 0; char gear; unsigned int score = 0; void move (char x, char y) { printf ("\e[%u;%uH", x, y); } void insert () { printf ("\e[L"); } void update (int i) { int n; pos += move_x, /* draw street */ printf ("\e[1;%uH" "\e[L" "%c" "\e[1;%uH" "%c", *x_offset - *width, border[curve], *x_offset + *width, border[curve]); /* draw tree */ printf ("\e[1;%uH" "%s", tree_position, *tree_line); /* redraw car */ printf ("\e[%u;%uH" "@" "\e[%u;%uH" " " "\n", NEXT_LINE + 1, pos, NEXT_LINE +2, old_pos); /* did we leave the road ? */ if (abs (pos - x_offset[NEXT_LINE]) >= width[NEXT_LINE]) exit (0); /* update simulation speed */ if (update_interval != gear) { struct itimerval t = { 0, 0, 0, 0 } ; update_interval += ((update_interval < gear) << 1) - 1; t.it_interval.tv_usec = t.it_value.tv_usec = 72000 / ((update_interval >> 3) + 1); setitimer (0, &t, 0); if (play_sound) change_bell_frequency (update_interval + 24); } /* play sound */ if (play_sound) putchar ('\a'); /* update score */ score += (9 - width[NEXT_LINE]) * ((update_interval >> 3) + 1); old_pos = pos; /* shift x_offset */ a = x_offset; z = *a; while (*++a) { y = *a; *a = z; z = y; }; /* shift width */ a = width; z = *a; while (*++a) { y = *a; *a = z; z = y; }; /* generate new road */ n = rand (); if (!(n & 255) && *width > 1) --*width; /* set tree line pointer */ if (!(**tree_line && tree_line-- || n & 7936)) { /* find the right spot to grow */ while (abs ((tree_position = rand () % 76) - *x_offset + 2) - *width < 6) ; ++tree_position; tree_line = &tree[4]; } /* new offset */ n = rand () & 31; if (n < 3) curve = n; if (curve == CURVE_LEFT) { --*x_offset; if (*x_offset <= *width) { ++*x_offset; curve = CURVE_NONE; } } else if (curve == CURVE_RIGHT) { ++*x_offset; if (*x_offset + *width > 79) { --*x_offset; curve = CURVE_NONE; } } signal (SIGALRM, update); } void end () { signal (SIGALRM, SIG_IGN); clear_all_LEDs (); clear_screen (); printf ("Score: %u\n", score); system ("stty echo -cbreak"); } int main (int argc, char **argv) { atexit (end); if (argc < 2 || *argv[1] != 'q') { argc = *(int*) getenv ("TERM"); if (argc == (int) 0x6C696E75 || argc == (int) 0x756E696C) play_sound = 1; } srand (getpid ()); system ("stty -echo cbreak"); gear = 0 << 3; clear_all_LEDs (); update (14); for (;;) switch (getchar ()) { case 'q': return 0; case '[': case 'b': case ',': move_x = -1; continue; case ' ': case 'n': case '.': move_x = 0; continue; case ']': case 'm': case '/': move_x = 1; continue; case '1': gear = 0 << 3; set_Num_Lock_LED (); continue; case '2': gear = 1 << 3; set_Caps_Lock_LED (); continue; case '3': gear = 2 << 3; set_Scroll_lock_LED (); continue; case '4': gear = 3 << 3; clear_all_LEDs (); continue; } } 
+19
source

All Articles