Personally, I go through my lexer and parser (LL). Here is a very short example. It is in C ++, but hopefully you can adapt it. It uses the PARSE_HIGHER macro to make it easier to insert statements at different priority levels without significant code changes.
// routine to scan over whitespace/comments void ScanWhite(const char* &pc){ while(true){ if(0); else if (WHITESPACE(*pc)) pc++; else if (pc[0]=='/' && pc[1]=='/'){ while(*pc && *pc++ != '\n'); } else break; } } // routine to lex an identifier bool SeeId(const char* &pc, string sId){ ScanWhite(pc); const char* pc0 = pc; if (alpha(*pc)){ sId = ""; while(alphanum(*pc)) sId += (*pc++); return true; } pc = pc0; return false; } // routine to lex a number bool SeeNum(const char* &pc, double &dNum){ ScanWhite(pc); const char* pc0 = pc; if (digit(*pc)){ dNum = 0; while(digit(*pc)) dNum = dNum * 10 + (*pc++ - '0'); if (*pc == '.'){ double divisor = 1, frac = 0; while(digit(*pc)){ divisor *= 0.1; frac += (*pc++ - '0') * divisor; } dNum += frac; } return true; } pc = pc0; return false; } // routine to lex some constant word bool SeeWord(const char* &pc, const char* sWord){ ScanWhite(pc); const char* pc0 = pc; int len = strlen(sWord); if (strncmp(pc, sWord, len)==0 && !alphanum(pc[len])){ pc += len; return true; } pc = pc0; return false; } // routine to lex a single character like an operator bool SeeChar(const char* &pc, const char c){ ScanWhite(pc); const char* pc0 = pc; if (*pc == c){ pc++; return true; } pc = pc0; return false; } // primitive expression parser void ParsePrimitiveExpr(const char* &pc, CNode* &p){ double dNum; char sId[100]; if (0); else if (SeeNum(pc, dNum)){ p = new CNode(dNum); } else if (SeeId(pc, sId)){ // see if its a function call if (SeeChar(pc, '(')){ p = MakeNewFunctionCallNode(sId); while(!SeeChar(pc, ')')){ CNode* p1 = null; ParseExpression(pc, p1); AddArgumentExprToFunctionCallNode(p, p1); SeeChar(pc, ','); /* optional comma separator */ } } // otherwise its just a variable reference else { p = new CNode(sId); } } // handle embedded expressions else if (SeeChar(pc, '(')){ ParseExpression(pc, p); if (!SeeChar(pc, ')')) /* deal with syntax error */ } }
Added some Pascal-style instruction syntax:
void ParseStatement(const char* &pc){ char sId[100]; if(0); else if (SeeWord(pc, "begin")){ while(!SeeWord(pc, "end")){ ParseStatement(pc); SeeChar(pc, ';'); } } else if (SeeWord(pc, "while")){ CNode* p1 = null; ParseExpression(pc, p1); ParseStatement(pc); /* semantics for while statement */ } else if (SeeWord(pc, "if")){ CNode* p1 = null; ParseExpression(pc, p1); ParseStatement(pc); if (SeeWord(pc, "else")){ ParseStatement(pc); } /* semantics for if statement */ } else if (SeeWord(pc, "for")){ /* you do it */ } // handle assignments and subroutine calls else if (SeeId(pc, sId)){ if(0); else if (SeeChar(pc, '=')){ CNode* p1 = null; ParseExpression(pc, p1); /* semantics for assignment statement */ } else if (SeeChar(pc, '(')){ CNode* p = MakeNewFunctionCallNode(sId); while(!SeeChar(pc, ')')){ CNode* p1 = null; ParseExpression(pc, p1); AddArgumentExprToFunctionCallNode(p, p1); SeeChar(pc, ','); /* optional comma separator */ } } else { /* we have a 1-word statement, which is OK in pascal */ } } else { /* syntax error */ } }
It still needs syntax for indexing the array, declaring variables and defining a function, but I hope it is clear how to do this.