It is enough to print a binary tree in C (and other imperative languages)

(An initial poster and, rather, a new one in programming, so be patient!)

I'm interested in how an efficient general algorithm for printing formatted binary trees (in the CLI environment) and C. Here is some code that I wrote for fun (this is a very simplified version of the original and part of a larger program that supports many BST operations, but it should compile just fine):

#include <stdbool.h> // C99, boolean type support #include <stdio.h> #include <stdlib.h> #include <math.h> #define DATATYPE_IS_DOUBLE #define NDEBUG // disable assertions #include <assert.h> #define WCHARBUF_LINES 20 // def: 20 #define WCHARBUF_COLMS 800 // def: 80 (using a huge number, like 500, is a good idea, // in order to prevent a buffer overflow :) #define RECOMMENDED_CONS_WIDTH 150 #define RECOMMENDED_CONS_WIDTHQ "150" // use the same value, quoted /* Preprocessor directives depending on DATATYPE_IS_* : */ #if defined DATATYPE_IS_INT || defined DATATYPE_IS_LONG #define DTYPE long int #define DTYPE_STRING "INTEGER" #define DTYPE_PRINTF "%*.*ld" #undef DATATYPE_IS_CHAR #elif defined DATATYPE_IS_FLOAT #define DTYPE float #define DTYPE_STRING "FLOAT" #define DTYPE_PRINTF "%*.*f" #undef DATATYPE_IS_CHAR #elif defined DATATYPE_IS_DOUBLE #define DTYPE double #define DTYPE_STRING "DOUBLE" #define DTYPE_PRINTF "%*.*lf" #undef DATATYPE_IS_CHAR #elif defined DATATYPE_IS_CHAR #define DTYPE char #define DTYPE_STRING "CHARACTER" #define DTYPE_PRINTF "%*.*c" /* using the "precision" sub-specifier ( .* ) with a */ /* character will produce a harmless compiler warning */ #else #error "DATATYPE_IS_* preprocessor directive undefined!" #endif typedef struct node_struct { DTYPE data; struct node_struct *left; struct node_struct *right; /* int height; // useful for AVL trees */ } node; typedef struct { node *root; bool IsAVL; // useful for AVL trees long size; } tree; static inline DTYPE get_largest(node *n){ if (n == NULL) return (DTYPE)0; for(; n->right != NULL; n=n->right); return n->data; } static int subtreeheight(node *ST){ if (ST == NULL) return -1; int height_left = subtreeheight(ST->left); int height_right = subtreeheight(ST->right); return (height_left > height_right) ? (height_left + 1) : (height_right + 1); } void prettyprint_tree(tree *T){ if (T == NULL) // if T empty, abort return; #ifndef DATATYPE_IS_CHAR /* then DTYPE is a numeric type */ /* compute spaces, find width: */ int width, i, j; DTYPE max = get_largest(T->root); width = (max < 10) ? 1 : (max < 100) ? 2 : (max < 1000) ? 3 : (max < 10000) ? 4 : (max < 100000) ? 5 : (max < 1000000) ? 6 : (max < 10000000) ? 7 : (max < 100000000) ? 8 : (max < 1000000000) ? 9 : 10; assert (max < 10000000000); width += 2; // needed for prettier results #if defined DATATYPE_IS_FLOAT || defined DATATYPE_IS_DOUBLE width += 2; // because of the decimals! (1 decimal is printed by default...) #endif // float or double int spacesafter = width / 2; int spacesbefore = spacesafter + 1; //int spacesbefore = ceil(width / 2.0); #else /* character input */ int i, j, width = 3, spacesbefore = 2, spacesafter = 1; #endif // #ifndef DATATYPE_IS_CHAR /* start wchar_t printing, using a 2D character array with swprintf() : */ struct columninfo{ // auxiliary structure bool visited; int col; }; wchar_t wcharbuf[WCHARBUF_LINES][WCHARBUF_COLMS]; int line=0; struct columninfo eachline[WCHARBUF_LINES]; for (i=0; i<WCHARBUF_LINES; ++i){ // initialization for (j=0; j<WCHARBUF_COLMS; ++j) wcharbuf[i][j] = (wchar_t)' '; eachline[i].visited = false; eachline[i].col = 0; } int height = subtreeheight(T->root); void recur_swprintf(node *ST, int cur_line, const wchar_t *nullstr){ // nested function, // GCC extension! float offset = width * pow(2, height - cur_line); ++cur_line; if (eachline[cur_line].visited == false) { eachline[cur_line].col = (int) (offset / 2); eachline[cur_line].visited = true; } else{ eachline[cur_line].col += (int) offset; if (eachline[cur_line].col + width > WCHARBUF_COLMS) swprintf(wcharbuf[cur_line], L" BUFFER OVERFLOW DETECTED! "); } if (ST == NULL){ swprintf(wcharbuf[cur_line] + eachline[cur_line].col, L"%*.*s", 0, width, nullstr); if (cur_line <= height){ /* use spaces instead of the nullstr for all the "children" of a NULL node */ recur_swprintf(NULL, cur_line, L" "); recur_swprintf(NULL, cur_line, L" "); } else return; } else{ recur_swprintf(ST->left, cur_line, nullstr); recur_swprintf(ST->right, cur_line, nullstr); swprintf(wcharbuf[cur_line] + eachline[cur_line].col - 1, L"("DTYPE_PRINTF"", spacesbefore, 1, ST->data); //swprintf(wcharbuf[cur_line] + eachline[cur_line].col + spacesafter + 1, L")"); swprintf(wcharbuf[cur_line] + eachline[cur_line].col + spacesafter + 2, L")"); } } void call_recur(tree *tr){ // nested function, GCC extension! (wraps recur_swprintf()) recur_swprintf(tr->root, -1, L"NULL"); } call_recur(T); /* Omit empty columns: */ int omit_cols(void){ // nested function, GCC extension! int col; for (col=0; col<RECOMMENDED_CONS_WIDTH; ++col) for (line=0; line <= height+1; ++line) if (wcharbuf[line][col] != ' ' && wcharbuf[line][col] != '\0') return col; return 0; } /* Use fputwc to transfer the character array to the screen: */ j = omit_cols() - 2; j = (j < 0) ? 0 : j; for (line=0; line <= height+1; ++line){ // assumes RECOMMENDED_CONS_WIDTH console window! fputwc('\n', stdout); // optional blanc line for (i=j; i<j+RECOMMENDED_CONS_WIDTH && i<WCHARBUF_COLMS; ++i) fputwc(wcharbuf[line][i], stdout); fputwc('\n', stdout); } } 

(also loaded into the pastebin service to preserve syntax highlighting)

It works pretty well, although automatic width adjustment might be better. The magic of the preprocessor is a little stupid (or even ugly) and is not related to the algorithm, but allows you to use different types of data in the nodes of the tree (I saw that this is a chance to experiment a little with the preprocessor - remember, I'm new!).

The main program should call

  system ("mode con: cols =" RECOMMENDED_CONS_WIDTHQ "lines = 2000");

before calling prettyprint_tree () when running inside cmd.exe.

Output Example:

                                                   (106.0)


                       (102.0) (109.0)


         (101.5) NULL (107.0) (115.0)


   NULL NULL (106.1) NULL (113.0) NULL


                                                       NULL NULL NULL NULL

Ideally, the output would be like this (the reason I use the wprintf () family of functions can print Unicode characters anyway):

  (107.0)
          ┌─────┴─────┐
      (106.1) NULL
    ┌───┴───┐
   Null null

So my questions are:

  • What do you think of this code? (Coding style suggestions are also very welcome!)
  • Can it be expanded in an elegant way to include line drawing characters? (Unfortunately, I do not think so.)
  • Any other algorithms in C or other imperative languages ​​(or imperative pseudocode)?
  • A bit unrelated: what's your opinion on nested functions (GNU non-portable extension)? I believe that this is an elegant way to write recursive parts of a function without having to provide all local variables as arguments (and also useful as a method to hide the implementation), but this may be my Pascal by :-) I'm interested in the opinion of more experienced coders.

Thank you in advance for your answers!

PS. This question is not a duplicate of this .


change Jonathan Leffler wrote a great excellent answer , which is likely to become an “accepted answer” in a few days (if only someone says something equally amazing!). I decided to answer here instead of comments due to space limitations.

  • The above code is actually part of a larger homework project (implementing BST operations in a shared library + CLI application using this library). However, the "prettyprint" function was not part of the requirements; I just decided to add myself.
  • I also added the function "convert to AVL without rotation", which used "arraystr" as an intermediate view ;-) I forgot that it was not used here. I edited the code to remove it. In addition, a member of the bool IsAVL structure is nothing but unused; just not used in this particular function. I had to copy / paste the code from different files and make a lot of changes to present the code above. This is a problem that I don’t know how to solve. I would love to publish the whole program, but it is too large and commented on in my native language (not in English!).
  • The entire project was about 1,600 LOCs (including comments) with several build goals (debug / release / static-linking), and it compiled purely with -Wall and -Wextra . Assertions and debug messages were automatically enabled / disabled depending on the purpose of the build. I also thought that function prototypes are not needed for nested functions, because all nested functions do not implement any external interface by definition - GCC, of ​​course, did not complain here. I do not know why there are so many warnings in OSX :(
  • I am using GCC 4.4.1 on Windows 7.
  • Although I write and test this program on Windows, I am actually a Linux user ... However, I cannot stand vim , and instead I use nano (inside the GNU screen) or gedit (shoot me) ! Anyway, I prefer the K & R brace style :)
  • Portability doesn't really matter, for Linux GCC users, it’s pretty much de facto ... The fact that it also works well under Windows is a great bonus.
  • I do not use VCS, perhaps I should. I want to try, but they all seem too complicated for my needs, and I don’t know how to choose them :-)
  • You are definitely right in checking for depth overflow, fortunately it is very easy to add.
  • Thanks for the L' ' tip!
  • I find your suggestion (encapsulating "the entire drawing code so that the image on the screen and related information are in the same structure") is extremely interesting ... but I really do not understand what you mean by "encapsulation". Could you provide 3 or 4 lines of (pseudo) code showing a possible function declaration and / or a possible function call?

This is my first big-ish (and non-trivial) program, and I am very grateful for your advice.


edit # 2: Here is the implementation of the “quick and dirty” method mentioned here .
( edit # 3: I decided to split it into a separate answer , as it is a valid answer to the OP.)

Many answers are mentioned by Graphviz . I already knew about this (many Linux applications are associated with it), but I thought it would be redundant for a 10K CLK executable. However, I will keep this in mind in the future. It seems healthy.

+6
c language-agnostic pretty-print binary-tree
source share
5 answers

You need to decide if your code should be portable. If you ever need to use a compiler other than GCC, nested functions are deadly for your portability purpose. I would not use them, but my portability goals may differ from yours.

Missing code <wchar.h> ; it compiles quite cleanly without it - GCC complained about the lack of prototypes for your non-static functions and for swprintf() and fputwc() ), but the addition of <wchar.h> generates a lot of serious warnings related to swprintf() ; they actually diagnose the error.

 gcc -O -I/Users/jleffler/inc -std=c99 -Wall -Wextra -Wmissing-prototypes \ -Wstrict-prototypes -Wold-style-definition -c tree.c tree.c:88:6: warning: no previous prototype for 'prettyprint_tree' tree.c: In function 'prettyprint_tree': tree.c:143:10: warning: no previous prototype for 'recur_swprintf' tree.c: In function 'recur_swprintf': tree.c:156:17: warning: passing argument 2 of 'swprintf' makes integer from pointer without a cast /usr/include/wchar.h:135:5: note: expected 'size_t' but argument is of type 'int *' tree.c:156:17: error: too few arguments to function 'swprintf' /usr/include/wchar.h:135:5: note: declared here tree.c:160:13: warning: passing argument 2 of 'swprintf' makes integer from pointer without a cast /usr/include/wchar.h:135:5: note: expected 'size_t' but argument is of type 'int *' tree.c:174:22: warning: passing argument 2 of 'swprintf' makes integer from pointer without a cast /usr/include/wchar.h:135:5: note: expected 'size_t' but argument is of type 'int *' tree.c:174:22: warning: passing argument 3 of 'swprintf' makes pointer from integer without a cast /usr/include/wchar.h:135:5: note: expected 'const wchar_t * restrict' but argument is of type 'int' tree.c:177:13: warning: passing argument 2 of 'swprintf' makes integer from pointer without a cast /usr/include/wchar.h:135:5: note: expected 'size_t' but argument is of type 'int *' tree.c:177:13: error: too few arguments to function 'swprintf' /usr/include/wchar.h:135:5: note: declared here tree.c: In function 'prettyprint_tree': tree.c:181:10: warning: no previous prototype for 'call_recur' tree.c:188:9: warning: no previous prototype for 'omit_cols' 

(This is GCC 4.5.2 on MacOS X 10.6.5.)

  • Look at the interface on swprintf() ; it looks more like snprintf() than sprintf() (which is A Good Thing ™!).

The general idea is interesting. I suggest choosing one view when sending the code for analysis and clear everything that is not related to code analysis. For example, the arraystr type arraystr defined but not used - you do not want people like me to get cheap snapshots in your code. Similarly with unused structural elements; don’t even leave them as comments, even if you want to save them in your VCS code (although why?). You use version control system (VCS), right? And this rhetorical question is if you are not using VCS, start using it now before losing something that you value.

Think about it, you want to avoid actions such as requiring the main program to execute the hidden system() command - your code should take care of such problems (perhaps with the initialization function and, possibly, the finalizer function to undo the changes made to the terminal settings) .

Another reason to dislike nested functions: I can't figure out how to get a function declaration in place. Credible alternatives did not seem to work, but I did not go and read their GCC manual.

  • You check for column overflow; you do not check for depth overflow. If you create a tree that is too deep, your code will crash and burn.

Minor thread: you can tell people who don’t use “vi” or “vim” for editing - they don’t put the open function shape in column 1. In “vi”, the opening brace in column 1 gives you a simple way to start the function from any point inside it ('[[' to go back, ']]' to go to the beginning of the next function).

Do not turn off claims.

Include the main program and related test data - this means that people can test your code, not just compile it.

Use wide character constants instead of translating:

 wcharbuf[i][j] = (wchar_t)' '; 

 wcharbuf[i][j] = L' '; 

Your code creates a large image on the screen (20 lines of 800 columns in the code) and fills the data for printing. This is a smart way to do it. With caution, you can arrange the processing of line drawing characters. However, I think you will need to rethink the basic drawing algorithms. You will probably want to encapsulate the entire drawing code so that the image on the screen and the information associated with it are in the same structure, which can be transmitted via a link (pointer) to a function. You will have a set of functions for drawing various bits in the positions indicated by your tree. You will have a function to draw the data value in the appropriate position; You will have the function of drawing lines in the appropriate positions. You probably would not have nested functions - in my opinion, it is much harder to read the code when the function is nested inside another. Creating static functions is good; make nested functions in static (not nested) functions. Give them the context they need - hence the encapsulation of the image on the screen.

  • Overall good start; a lot of good ideas. Much remains to be done.

Request for encapsulation information ...

You can use a structure such as:

 typedef struct columninfo Colinfo; typedef struct Image { wchar_t image[WCHARBUF_LINES][WCHARBUF_COLUMNS]; Colinfo eachline[WCHARBUF_LINES]; } Image; Image image; 

You may find it convenient and / or reasonable to add some additional members; which will appear during implementation. Then you can create a function:

 void format_node(Image *image, int line, int column, DTYPE value) { ... } 

You can also do some of the constants, for example, spaces in the enumeration values:

 enum { spacesafter = 2 }; 

Then they can be used by any of the functions.

+4
source share

Coding style . The prettyprint_tree() function juggles too many calculations and data to make it easy to read. The initialization and printing of the image buffer can, for example, be placed in separate functions and in the calculation of width . I'm sure you can write a formula with log to replace

 width = (max < 10) ? 1 : (max < 100) ? 2 : (max < 1000) ? 3 : ... 

calculation.

I'm not used to reading nested functions and C, which makes code scanning much more difficult. If you do not share your code with other or non-ideological reasons for linking the code to GCC, I would not use these extensions.

Algorithm For a quick and dirty pretty printer written in C, I would never use your layout style. Compared to your algorithm, there is no problem to write a workaround in order to print

  a / \ bc 

but

  c a b 

and I don't mind tilting my head. For something prettier than that, I'd rather choose

 digraph g { a -> b; a -> c; } 

and leave it dot for formatting.

+2
source share

This code should work from: http://www.ihas1337code.com/2010/09/how-to-pretty-print-binary-tree.html

  #include <fstream> #include <iostream> #include <deque> #include <iomanip> #include <sstream> #include <string> #include <cmath> using namespace std; struct BinaryTree { BinaryTree *left, *right; int data; BinaryTree(int val) : left(NULL), right(NULL), data(val) { } }; // Find the maximum height of the binary tree int maxHeight(BinaryTree *p) { if (!p) return 0; int leftHeight = maxHeight(p->left); int rightHeight = maxHeight(p->right); return (leftHeight > rightHeight) ? leftHeight + 1: rightHeight + 1; } // Convert an integer value to string string intToString(int val) { ostringstream ss; ss << val; return ss.str(); } // Print the arm branches (eg, / \ ) on a line void printBranches(int branchLen, int nodeSpaceLen, int startLen, int nodesInThisLevel, const deque<BinaryTree*>& nodesQueue, ostream& out) { deque<BinaryTree*>::const_iterator iter = nodesQueue.begin(); for (int i = 0; i < nodesInThisLevel / 2; i++) { out << ((i == 0) ? setw(startLen-1) : setw(nodeSpaceLen-2)) << "" << ((*iter++) ? "/" : " "); out << setw(2*branchLen+2) << "" << ((*iter++) ? "\\" : " "); } out << endl; } // Print the branches and node (eg, ___10___ ) void printNodes(int branchLen, int nodeSpaceLen, int startLen, int nodesInThisLevel, const deque<BinaryTree*>& nodesQueue, ostream& out) { deque<BinaryTree*>::const_iterator iter = nodesQueue.begin(); for (int i = 0; i < nodesInThisLevel; i++, iter++) { out << ((i == 0) ? setw(startLen) : setw(nodeSpaceLen)) << "" << ((*iter && (*iter)->left) ? setfill('_') : setfill(' ')); out << setw(branchLen+2) << ((*iter) ? intToString((*iter)->data) : ""); out << ((*iter && (*iter)->right) ? setfill('_') : setfill(' ')) << setw(branchLen) << "" << setfill(' '); } out << endl; } // Print the leaves only (just for the bottom row) void printLeaves(int indentSpace, int level, int nodesInThisLevel, const deque<BinaryTree*>& nodesQueue, ostream& out) { deque<BinaryTree*>::const_iterator iter = nodesQueue.begin(); for (int i = 0; i < nodesInThisLevel; i++, iter++) { out << ((i == 0) ? setw(indentSpace+2) : setw(2*level+2)) << ((*iter) ? intToString((*iter)->data) : ""); } out << endl; } // Pretty formatting of a binary tree to the output stream // @ param // level Control how wide you want the tree to sparse (eg, level 1 has the minimum space between nodes, while level 2 has a larger space between nodes) // indentSpace Change this to add some indent space to the left (eg, indentSpace of 0 means the lowest level of the left node will stick to the left margin) void printPretty(BinaryTree *root, int level, int indentSpace, ostream& out) { int h = maxHeight(root); int nodesInThisLevel = 1; int branchLen = 2*((int)pow(2.0,h)-1) - (3-level)*(int)pow(2.0,h-1); // eq of the length of branch for each node of each level int nodeSpaceLen = 2 + (level+1)*(int)pow(2.0,h); // distance between left neighbor node right arm and right neighbor node left arm int startLen = branchLen + (3-level) + indentSpace; // starting space to the first node to print of each level (for the left most node of each level only) deque<BinaryTree*> nodesQueue; nodesQueue.push_back(root); for (int r = 1; r < h; r++) { printBranches(branchLen, nodeSpaceLen, startLen, nodesInThisLevel, nodesQueue, out); branchLen = branchLen/2 - 1; nodeSpaceLen = nodeSpaceLen/2 + 1; startLen = branchLen + (3-level) + indentSpace; printNodes(branchLen, nodeSpaceLen, startLen, nodesInThisLevel, nodesQueue, out); for (int i = 0; i < nodesInThisLevel; i++) { BinaryTree *currNode = nodesQueue.front(); nodesQueue.pop_front(); if (currNode) { nodesQueue.push_back(currNode->left); nodesQueue.push_back(currNode->right); } else { nodesQueue.push_back(NULL); nodesQueue.push_back(NULL); } } nodesInThisLevel *= 2; } printBranches(branchLen, nodeSpaceLen, startLen, nodesInThisLevel, nodesQueue, out); printLeaves(indentSpace, level, nodesInThisLevel, nodesQueue, out); } int main() { BinaryTree *root = new BinaryTree(30); root->left = new BinaryTree(20); root->right = new BinaryTree(40); root->left->left = new BinaryTree(10); root->left->right = new BinaryTree(25); root->right->left = new BinaryTree(35); root->right->right = new BinaryTree(50); root->left->left->left = new BinaryTree(5); root->left->left->right = new BinaryTree(15); root->left->right->right = new BinaryTree(28); root->right->right->left = new BinaryTree(41); cout << "Tree pretty print with level=1 and indentSpace=0\n\n"; // Output to console printPretty(root, 1, 0, cout); cout << "\n\nTree pretty print with level=5 and indentSpace=3,\noutput to file \"tree_pretty.txt\".\n\n"; // Create a file and output to that file ofstream fout("tree_pretty.txt"); // Now print a tree that more spread out to the file printPretty(root, 5, 0, fout); return 0; } 
+2
source share

Perhaps you can take a look at the Bresenham line algorithm , which may be useful to you

0
source share

Here is the C implementation of the "quick and dirty" method mentioned here. It does not get much faster and / or messier:

 void shittyprint_tree(tree *T){ // Supposed to be quick'n'dirty! // When DTYPE is "char", width is a bit larger than needed. if (T == NULL) return; const int width = ceil(log10(get_largest(T->root)+0.01)) + 2; const wchar_t* sp64 = L" "; void nested(node *ST, int spaces){ // GCC extension if (ST == NULL){ wprintf(L"\n"); // Can be commented to disable the extra blanc line. return; } nested(ST->right, spaces + width); wprintf(L"%*.*s("DTYPE_PRINTF")\n", 0, spaces, sp64, 1, 1, ST->data); nested(ST->left, spaces + width); } nested(T->root, 2); } 

Example output (using the same tree as before):

             (115.0)

                  (113.0)

        (109.0)

             (107.0)

                  (106.1)

   (106.0)

        (102.0)

             (101.5)

I can’t say that it meets my initial requirements ...

0
source share

All Articles