//***************************************************************** // // $file: parser.cpp $ // $author: Martin Fouilleul $ // $date: 25/12/2017 $ // //***************************************************************** /* ------------------------------------------------------------------------------ This software is available under 2 licenses -- choose whichever you prefer. ------------------------------------------------------------------------------ ALTERNATIVE A - MIT License Copyright (c) 2017 Martin Fouilleul Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ------------------------------------------------------------------------------ ALTERNATIVE B - Public Domain (www.unlicense.org) This is free and unencumbered software released into the public domain. Anyone is free to copy, modify, publish, use, compile, sell, or distribute this software, either in source code form or as a compiled binary, for any purpose, commercial or non-commercial, and by any means. In jurisdictions that recognize copyright laws, the author or authors of this software dedicate any and all copyright interest in the software to the public domain. We make this dedication for the benefit of the public at large and to the detriment of our heirs and successors. We intend this dedication to be an overt act of relinquishment in perpetuity of all present and future rights to this software under copyright law. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ------------------------------------------------------------------------------ */ #include #include #include #include //------------------------------------------------------------------------------------ // structs and globals //------------------------------------------------------------------------------------ static FILE* input; static FILE* output; enum { NONE, ENDOFCODE, ENDF, KEYWORD, BUILTIN_TYPE, IDENTIFIER, CONSTANT, STRING_LITERAL, LPAR, RPAR, LBRACKET, RBRACKET, CLBRACKET, CRBRACKET, PTR_OP, OP_ASSIGN, OPERATOR, INC_DEC_OP, DOT, CONDITION_OP, COMMA, COLON, SEMICOLON, ELLIPSIS }; enum { SIZEOF = 128, NEQUAL, EQUAL, AND, OR, LSHIFT, RSHIFT, LEQUAL, GEQUAL, NEGEQUAL, CONST, VOLATILE, ENUM, UNION, STRUCT, TYPE_QUALIFIER, STORAGE_CLASS_SPECIFIER, CASE, DEFAULT, IF, ELSE, SWITCH, WHILE, DO, FOR, GOTO, CONTINUE, BREAK, RETURN }; const int BUILTIN_TYPES_COUNT = 8; const char* BUILTIN_TYPES[BUILTIN_TYPES_COUNT] = { "void", "char", "short", "int", "long", "float", "double", "unsigned" }; struct c_keyword { const char* str; int category; }; const int KEYWORDS_COUNT = 23; const c_keyword KEYWORDS[KEYWORDS_COUNT] = { {"auto", 0 }, {"break", BREAK}, {"case", CASE}, {"const", TYPE_QUALIFIER}, {"continue", CONTINUE}, {"default", DEFAULT}, {"do", DO}, {"else", ELSE}, {"enum", ENUM}, {"extern", STORAGE_CLASS_SPECIFIER}, {"for", FOR}, {"goto", GOTO}, {"if", IF}, {"register", STORAGE_CLASS_SPECIFIER}, {"return", RETURN}, {"sizeof", SIZEOF}, {"static", STORAGE_CLASS_SPECIFIER}, {"struct", STRUCT}, {"switch", SWITCH}, {"typedef", STORAGE_CLASS_SPECIFIER}, {"union", UNION}, {"volatile", TYPE_QUALIFIER}, {"while", WHILE}}; struct c_token { int type; int c; char* text; }; c_token LOOK_AHEAD_TOKEN_0; c_token LOOK_AHEAD_TOKEN_1; //------------------------------------------------------------------------------------ // Lexer functions //------------------------------------------------------------------------------------ char* TrailingSpaces() { int count = 0; int size = 256; char* buffer = (char*)malloc(size); char c = getc(input); while((c == ' ' || c == '\t' || c == '\n')) { buffer[count] = c; count++; c = getc(input); if(count >= size) { size += 256; buffer = (char*)realloc(buffer, size); } } buffer[count] = '\0'; ungetc(c, input); return(buffer); } c_token GetNextIdentifierOrKeyword() { int size = 256; int count = 0; char* buffer = (char*)malloc(size); char c; while(true) { c = getc(input); if(!isalpha(c) && !isdigit(c) && c != '_') { ungetc(c, input); buffer[count] = '\0'; char* sp = TrailingSpaces(); c_token t; t.type = IDENTIFIER; t.c = 0; for(int i=0;i= size) { size += 256; buffer = (char*)realloc(buffer, size); } } } c_token GetNextConstant() { //TODO(martin): hande exponent notation // and type size specifier int size = 256; char* buffer = (char*)malloc(size); int count = 0; char c; while(true) { c = getc(input); if(!isdigit(c) && c != '.') { ungetc(c, input); buffer[count] = '\0'; char* sp = TrailingSpaces(); c_token t; t.type = CONSTANT; t.text = (char*)malloc(count+strlen(sp)+1); strcpy(t.text, buffer); strcat(t.text, sp); free(sp); free(buffer); return(t); } buffer[count] = c; count++; if(count >= size) { size += 256; buffer = (char*)realloc(buffer, 256); } } } c_token GetNextStringLiteral() { int size = 256; char* buffer = (char*)malloc(size); int count = 0; char c = getc(input); buffer[0] = c; count++; while(true) { c = getc(input); if(c == '\"') //TODO(martin): handle escaped quotes in string literal !! { buffer[count] = c; count++; buffer[count] = '\0'; char* sp = TrailingSpaces(); c_token t; t.type = STRING_LITERAL; t.text = (char*)malloc(count+strlen(sp)+1); strcpy(t.text, buffer); strcat(t.text, sp); free(sp); free(buffer); return(t); } buffer[count] = c; count++; if(count >= size) { size += 256; buffer = (char*)realloc(buffer, size); } } } c_token GetNextToken(); c_token GetNextOneLineCommentToken() { fprintf(output, "/"); char c; while((c = getc(input)) != EOF) { switch(c) { case '<': fprintf(output, "<"); break; case '>': fprintf(output, ">"); break; case '&': fprintf(output, "&"); break; default: putc(c, output); break; } if(c == '\n') { break; } } fprintf(output, ""); while((c = getc(input)) == ' ' || c == '\t' || c == '\n' ) { putc(c, output); } ungetc(c, input); return(GetNextToken()); } c_token GetNextMultilineCommentToken() { fprintf(output, "/"); char c; char last = 0; while((c = getc(input)) != EOF) { switch(c) { case '<': fprintf(output, "<"); break; case '>': fprintf(output, ">"); break; case '&': fprintf(output, "&"); break; default: putc(c, output); break; } if(c == '/' && last == '*') { break; } last = c; } fprintf(output, ""); while((c = getc(input)) == ' ' || c == '\t' || c == '\n' ) { putc(c, output); } ungetc(c, input); return(GetNextToken()); } c_token GetNextToken() { char c = getc(input); if(c==EOF) { c_token t; t.type = ENDF; t.text = 0; return(t); } else if(isalpha(c) || c=='_') { ungetc(c, input); return GetNextIdentifierOrKeyword(); } else if(isdigit(c)) { ungetc(c, input); return GetNextConstant(); } else { c_token t; t.type = NONE; t.text = 0; const char* lex = ""; switch(c) { case '\"': ungetc(c, input); return GetNextStringLiteral(); case '(': { t.type = LPAR; lex = "("; break; } case ')': { t.type = RPAR; lex = ")"; break; } case '[': { char buff[7]; for(int i=0;i<6;i++) { buff[i] = getc(input); } buff[6] = '\0'; if(!strcmp(buff, "/code]")) { t.type = ENDOFCODE; return(t); } else { for(int i=5;i>=0;i--) { ungetc(buff[i], input); } } t.type = LBRACKET; lex = "["; break; } case ']': { t.type = RBRACKET; lex = "]"; break; } case '{': { t.type = CLBRACKET; lex = "{"; break; } case '}': { t.type = CRBRACKET; lex = "}"; break; } case '-': { char n = getc(input); if(n == '-') { t.type = INC_DEC_OP; lex = "--"; } else if(n == '>') { t.type = PTR_OP; lex = "->"; } else if(n == '=') { t.type = OP_ASSIGN; lex = "-="; } else { ungetc(n, input); t.type = OPERATOR; t.c = c; lex = "-"; } break; } case '+': { char n = getc(input); if(n == '+') { t.type = INC_DEC_OP; lex = "++"; } else if(n == '=') { t.type = OP_ASSIGN; lex = "+="; } else { ungetc(n, input); t.type = OPERATOR; t.c = c; lex = "+"; } break; } case '*': { char n = getc(input); if(n == '=') { t.type = OP_ASSIGN; lex = "*="; } else { ungetc(n, input); t.type = OPERATOR; t.c = c; lex = "*"; } break; } case '/': { char n = getc(input); if(n == '/') { ungetc(n, input); return(GetNextOneLineCommentToken()); } else if(n == '*') { ungetc(n, input); return(GetNextMultilineCommentToken()); } else if(n == '=') { t.type = OP_ASSIGN; lex = "/="; } else { ungetc(n, input); t.type = OPERATOR; t.c = c; lex = "/"; } break; } case '%': { char n = getc(input); if(n == '=') { t.type = OP_ASSIGN; lex = "%="; } else { ungetc(n, input); t.type = OPERATOR; t.c = c; lex = "%"; } break; } case '<': { char n = getc(input); if(n == '=') { t.type = OPERATOR; t.c = LEQUAL; lex = "<="; } else if(n == '<') { t.type = OPERATOR; t.c = LSHIFT; lex = "<<"; } else { ungetc(n, input); t.type = OPERATOR; t.c = c; lex = "<"; } break; } case '>': { char n = getc(input); if(n == '=') { t.type = OPERATOR; t.c = GEQUAL; lex = ">="; } else if(n == '>') { t.type = OPERATOR; t.c = RSHIFT; lex = ">>"; } else { ungetc(n, input); t.type = OPERATOR; t.c = c; lex = ">"; } break; } case '=': { char n = getc(input); if(n == '=') { t.type = OPERATOR; t.c = EQUAL; lex = "=="; } else { ungetc(n, input); t.type = EQUAL; t.c = c; lex = "="; } break; } case '&': { char n = getc(input); if(n == '=') { t.type = OP_ASSIGN; t.c = '&'; lex = "&="; } else if(n == '&') { t.type = OPERATOR; t.c = AND; lex = "&&"; } else { ungetc(n, input); t.type = OPERATOR; t.c = c; lex = "&"; } break; } case '|': { char n = getc(input); if(n == '=') { t.type = OP_ASSIGN; t.c = '|'; lex = "|="; } else if(n == '|') { t.type = OPERATOR; t.c = OR; lex = "||"; } else { ungetc(n, input); t.type = OPERATOR; t.c = c; lex = "|"; } break; } case '^': { char n = getc(input); if(n == '^') { t.type = OP_ASSIGN; t.c = '^'; lex = "^="; } else { ungetc(n, input); t.type = OPERATOR; t.c = '^'; lex = "^"; } break; } case '!': { char n = getc(input); if(n == '=') { t.type = OPERATOR; t.c = NEQUAL; lex = "!="; } else { ungetc(n, input); t.type = OPERATOR; t.c = '!'; lex = "!"; } break; } case '~': { char n = getc(input); if(n == '=') { t.type = OP_ASSIGN; t.c = NEGEQUAL; lex = "~="; } else { ungetc(n, input); t.type = OPERATOR; t.c = '~'; lex = "~"; } break; } case '.': { char c1 = getc(input); char c2 = getc(input); if(c1 == '.' && c2 == '.') { t.type = ELLIPSIS; lex = "..."; } else { t.type = DOT; lex = "."; ungetc(c2, input); ungetc(c1, input); } break; } case '?': { t.type = CONDITION_OP; lex = "?"; break; } case ':': { t.type = COLON; lex = ":"; break; } case ',': { t.type = COMMA; lex = ","; break; } case ';': { t.type = SEMICOLON; lex = ";"; break; } default: { t.type = NONE; char* sp = TrailingSpaces(); t.text = (char*)malloc(strlen(sp)+2); t.text[0] = c; strcpy(t.text+1, sp); free(sp); return(t); } } char* sp = TrailingSpaces(); t.text = (char*)malloc(strlen(sp)+strlen(lex)+1); strcpy(t.text, lex); strcat(t.text, sp); free(sp); return(t); } } //------------------------------------------------------------------------------------ // Parser Helper and output functions //------------------------------------------------------------------------------------ void InitParser() { LOOK_AHEAD_TOKEN_0 = GetNextToken(); LOOK_AHEAD_TOKEN_1.type = NONE; LOOK_AHEAD_TOKEN_1.text = 0; } c_token* LookAhead(int level = 0) { if(level) { if(LOOK_AHEAD_TOKEN_1.type == NONE) { LOOK_AHEAD_TOKEN_1 = GetNextToken(); } return(&LOOK_AHEAD_TOKEN_1); } else { return(&LOOK_AHEAD_TOKEN_0); } } void Accept(c_token* t) { if(t->text) { int len = strlen(t->text); for(int i=0;itext[i]) { case '<': fprintf(output, "<"); break; case '>': fprintf(output, ">"); break; case '&': fprintf(output, "&"); break; default: putc(t->text[i], output); } } free(t->text); } if(LOOK_AHEAD_TOKEN_1.type != NONE) { LOOK_AHEAD_TOKEN_0 = LOOK_AHEAD_TOKEN_1; LOOK_AHEAD_TOKEN_1.type = NONE; LOOK_AHEAD_TOKEN_1.text = 0; } else { LOOK_AHEAD_TOKEN_0 = GetNextToken(); } } void AcceptHighlight(const char* style, c_token* t) { if(t->text) { fprintf(output, "", style); int len = strlen(t->text); for(int i=0;itext[i]) { case '<': fprintf(output, "<"); break; case '>': fprintf(output, ">"); break; case '&': fprintf(output, "&"); break; default: putc(t->text[i], output); } } fprintf(output, ""); free(t->text); } if(LOOK_AHEAD_TOKEN_1.type != NONE) { LOOK_AHEAD_TOKEN_0 = LOOK_AHEAD_TOKEN_1; LOOK_AHEAD_TOKEN_1.type = NONE; LOOK_AHEAD_TOKEN_1.text = 0; } else { LOOK_AHEAD_TOKEN_0 = GetNextToken(); } } bool IsBinaryOp(c_token* t) { if(t->type != OPERATOR) { return(false); } if( t->c == '+' || t->c == '-' || t->c == '*' || t->c == '/' || t->c == '%' || t->c == '<' || t->c == '>' || t->c == '&' || t->c == '|' || t->c == '^' || t->c == NEQUAL || t->c == EQUAL || t->c == AND || t->c == OR || t->c == LSHIFT || t->c == RSHIFT || t->c == LEQUAL || t->c == GEQUAL) { return(true); } else { return(false); } } //------------------------------------------------------------------------------------ // Parser production rules //------------------------------------------------------------------------------------ int ParseExpression(); int ParseConditionalExpression(); int ParseAssignmentExpression(); int ParseDirectDeclarator(); int ParsePostfixedDeclarator(); int ParsePointer(); int ParseDeclarator(); int ParseAbstractDeclarator(); int ParseInitializer(); int ParseInitializerList(); int ParseSpecifiersQualifiers(); int ParseStructDeclaratorList(); int ParseStructDeclarator(); int ParseStructDeclarationList(); int ParseEnumSpecifier(); int ParseStructDeclaration(); int ParseTypeSpecifier(); int ParseDeclarationSpecifiers(); int ParseInitDeclaratorList(); int ParseInitDeclarator(); int ParseParameterList(); int ParseDeclarationHead() { if(ParseDeclarationSpecifiers()) { return(-1); } c_token* t = LookAhead(); if(t->type == IDENTIFIER || t->type == LPAR || (t->type == OPERATOR && t->c == '*')) { if(ParseInitDeclaratorList()) { return(-1); } } return(0); } int ParseDeclaration() { if(ParseDeclarationHead()) { return(-1); } c_token* t = LookAhead(); if(t->type != SEMICOLON) { return(-1); } Accept(t); return(0); } int ParseDeclarationSpecifiers() { c_token* t = LookAhead(); if( t->type == IDENTIFIER) { return(ParseTypeSpecifier()); } else if(t->type == BUILTIN_TYPE) { if(ParseTypeSpecifier()) { return(-1); } if( t->type == BUILTIN_TYPE || (t->type == KEYWORD && (t->c == STRUCT || t->c == UNION || t->c == ENUM )) || (t->type == KEYWORD && t->c == STORAGE_CLASS_SPECIFIER) || (t->type == KEYWORD && t->c == TYPE_QUALIFIER)) { return(ParseDeclarationSpecifiers()); } return(0); } else if(t->type == KEYWORD && (t->c == STRUCT || t->c == UNION || t->c == ENUM )) { if(ParseTypeSpecifier()) { return(-1); } if( t->type == BUILTIN_TYPE || t->type == IDENTIFIER || (t->type == KEYWORD && (t->c == STRUCT || t->c == UNION || t->c == ENUM )) || (t->type == KEYWORD && t->c == STORAGE_CLASS_SPECIFIER) || (t->type == KEYWORD && t->c == TYPE_QUALIFIER)) { return(ParseDeclarationSpecifiers()); } return(0); } else if( t->type == KEYWORD && (t->c == STORAGE_CLASS_SPECIFIER || t->c == TYPE_QUALIFIER)) { AcceptHighlight("keyword", t); if( t->type == BUILTIN_TYPE || t->type == IDENTIFIER || (t->type == KEYWORD && (t->c == STRUCT || t->c == UNION || t->c == ENUM )) || (t->type == KEYWORD && t->c == STORAGE_CLASS_SPECIFIER) || (t->type == KEYWORD && t->c == TYPE_QUALIFIER)) { return(ParseDeclarationSpecifiers()); } return(0); } else { return(-1); } } int ParseStructOrUnionSpecifier() { c_token* t = LookAhead(); switch(t->type) { case IDENTIFIER: AcceptHighlight("type", t); t = LookAhead(); if(t->type == CLBRACKET) { Accept(t); if(ParseStructDeclarationList()) { return(-1); } t = LookAhead(); if(t->type != CRBRACKET) { return(-1); } Accept(t); return(0); } return(0); case CLBRACKET: Accept(t); if(ParseStructDeclarationList()) { return(-1); } t = LookAhead(); if(t->type != CRBRACKET) { return(-1); } Accept(t); return(0); default: return(0); } } int ParseTypeSpecifier() { c_token* t = LookAhead(); switch(t->type) { case BUILTIN_TYPE: AcceptHighlight("type", t); return(0); case KEYWORD: if(t->c == STRUCT || t->c == UNION) { AcceptHighlight("keyword", t); return(ParseStructOrUnionSpecifier()); } else if(t->c == ENUM) { return(ParseEnumSpecifier()); } else { return(-1); } case IDENTIFIER: AcceptHighlight("type", t); return(0); default: return(-1); } } int ParseStructDeclarationList() { if(ParseStructDeclaration()) { return(-1); } c_token* t = LookAhead(); /* if( t->type == BUILTIN_TYPE || t->type == IDENTIFIER || (t->type == KEYWORD && (t->c == STRUCT || t->c == UNION || t->c == ENUM )) || (t->type == KEYWORD && t->c == TYPE_QUALIFIER)) */ if(t->type != CRBRACKET) { return(ParseStructDeclarationList()); } return(0); } int ParseStructDeclaration() { if(ParseDeclarationSpecifiers()) { return(-1); } if(ParseStructDeclaratorList()) { return(-1); } c_token* t = LookAhead(); if(t->type != SEMICOLON) { return(-1); } Accept(t); return(0); } int ParseSpecifiersQualifiers() { c_token* t = LookAhead(); if(t->type == IDENTIFIER) { return(ParseTypeSpecifier()); } if( t->type == BUILTIN_TYPE) { if(ParseTypeSpecifier()) { return(-1); } if( t->type == BUILTIN_TYPE || (t->type == KEYWORD && (t->c == STRUCT || t->c == UNION || t->c == ENUM || t->c == TYPE_QUALIFIER))) { return(ParseSpecifiersQualifiers()); } return(0); } else if((t->type == KEYWORD && (t->c == STRUCT || t->c == UNION || t->c == ENUM))) { if(ParseTypeSpecifier()) { return(-1); } if( t->type == BUILTIN_TYPE || t->type == IDENTIFIER || (t->type == KEYWORD && (t->c == STRUCT || t->c == UNION || t->c == ENUM || t->c == TYPE_QUALIFIER))) { return(ParseSpecifiersQualifiers()); } return(0); } else if( t->type == KEYWORD && t->c == TYPE_QUALIFIER) { AcceptHighlight("keyword", t); if( t->type == BUILTIN_TYPE || t->type == IDENTIFIER || (t->type == KEYWORD && (t->c == STRUCT || t->c == UNION || t->c == ENUM || t->c == TYPE_QUALIFIER))) { return(ParseSpecifiersQualifiers()); } return(0); } else { return(-1); } } int ParseStructDeclaratorList() { if(ParseStructDeclarator()) { return(-1); } c_token* t; while((t = LookAhead())->type == COMMA) { Accept(t); if(ParseStructDeclaratorList()) { return(-1); } } return(0); } int ParseStructDeclarator() { //NOTE(martin): we don't do ':' declarators.. return(ParseDeclarator()); } int ParseEnumerator(); int ParseEnumeratorList(); int ParseEnumSpecifier() { c_token* t = LookAhead(); if(t->type != KEYWORD || t->c != ENUM) { return(-1); } AcceptHighlight("keyword", t); t = LookAhead(); if(t->type == IDENTIFIER) { Accept(t); t = LookAhead(); if(t->type == CLBRACKET) { return(ParseEnumeratorList()); } return(0); } else { return(ParseEnumeratorList()); } } int ParseEnumeratorList() { c_token* t = LookAhead(); if(t->type == CLBRACKET) { Accept(t); t = LookAhead(); if(ParseEnumerator()) { return(-1); } while((t = LookAhead())->type == COMMA) { Accept(t); t = LookAhead(); if(ParseEnumerator()) { return(-1); } } t = LookAhead(); if(t->type != CRBRACKET) { return(-1); } Accept(t); return(0); } else { return(-1); } } int ParseEnumerator() { c_token* t = LookAhead(); if(t->type != IDENTIFIER) { return(-1); } Accept(t); t = LookAhead(); if(t->type == EQUAL) { Accept(t); return(ParseConditionalExpression()); } return(0); } int ParseInitDeclaratorList() { if(ParseInitDeclarator()) { return(-1); } c_token* t; while((t = LookAhead())->type == COMMA) { Accept(t); if(ParseInitDeclarator()) { return(-1); } } return(0); } int ParseInitDeclarator() { if(ParseDeclarator()) { return(-1); } c_token* t = LookAhead(); if(t->type == EQUAL) { Accept(t); return(ParseInitializer()); } return(0); } int ParseInitializer() { c_token* t = LookAhead(); if(t->type == CLBRACKET) { Accept(t); if(ParseInitializerList()) { return(-1); } t = LookAhead(); if(t->type != CRBRACKET) { return(-1); } Accept(t); return(0); } else { return(ParseAssignmentExpression()); } } int ParseInitializerList() { if(ParseInitializer()) { return(-1); } c_token* t = LookAhead(); if(t->type == COMMA) { Accept(t); return(ParseInitializerList()); } return(0); } int ParseDeclarator() { //dummy c_token* t = LookAhead(); if(t->type == OPERATOR && t->c == '*') { if(ParsePointer()) { return(-1); } return(ParsePostfixedDeclarator()); } else if(t->type == LPAR || t->type == IDENTIFIER || t->type == LBRACKET) { return(ParsePostfixedDeclarator()); } else { return(-1); } } int ParsePointer() { c_token* t = LookAhead(); if(t->type != OPERATOR || t->c != '*') { printf("expected pointer\n"); return(-1); } Accept(t); while(t->type == OPERATOR && t->c == '*' ) { Accept(t); t = LookAhead(); } if(t->type == KEYWORD && t->c == TYPE_QUALIFIER) { while(t->type == KEYWORD && t->c == TYPE_QUALIFIER) { AcceptHighlight("keyword", t); } t = LookAhead(); if(t->type == OPERATOR && t->c == '*') { return(ParsePointer()); } } return(0); } int ParsePostfixedDeclarator() { if(ParseDirectDeclarator()) { printf("Error in parse array declarator : expected direct declarator\n"); return(-1); } c_token* t; while((t = LookAhead())->type == LBRACKET || t->type == LPAR) { if(t->type == LBRACKET) { Accept(t); t = LookAhead(); if(t->type != RBRACKET) { if(ParseConditionalExpression()) { return(-1); } t = LookAhead(); if(t->type != RBRACKET) { return(-1); } Accept(t); } else { Accept(t); } } else { Accept(t); t = LookAhead(); if(t->type != RPAR) { if(ParseParameterList()) { return(-1); } t = LookAhead(); if(t->type != RPAR) { return(-1); } Accept(t); } else { Accept(t); } } } return(0); } int ParseDirectDeclarator() { c_token* t = LookAhead(); switch(t->type) { case IDENTIFIER: { //HACK to know if we must consider this as a function name... char c = getc(input); ungetc(c, input); if(c == '(') { AcceptHighlight("function-name", t); } else { Accept(t); } return(0); } case LPAR: { //NEED TO DECIDE BETWEEN DECLARATOR OR PARAMETER TYPE LIST Accept(t); t = LookAhead(); if(t->type != RPAR) { if(t->type == KEYWORD || t->type == BUILTIN_TYPE) { if(ParseParameterList()) { return(-1); } } else if(t->type == IDENTIFIER) { //We need a look ahead + 1 to decide between declarator and parametertypelist // '(' or '[' or ')' -> declarator // otherwise, parameter_type_list c_token* t1 = LookAhead(1); if(t1->type == LPAR || t1->type == LBRACKET || t1->type == RPAR) { if(ParseDeclarator()) { return(-1); } } else if(ParseParameterList()) { return(-1); } } else if(ParseDeclarator()) { return(-1); } t = LookAhead(); if(t->type != RPAR) { return(-1); } } Accept(t); return(0); } case LBRACKET: { Accept(t); t = LookAhead(); if(t->type != RBRACKET) { if(ParseConditionalExpression()) { return(-1); } t = LookAhead(); if(t->type != RBRACKET) { return(-1); } } Accept(t); return(0); } default: return(0); // direct declarator is optionnal... } } //------------------------------------------------------------------------------------ // Functions //------------------------------------------------------------------------------------ int ParseDirectAbstractDeclarator(); int ParsePostfixedAbstractDeclarator(); int ParseParameterDeclaration(); int ParseDirectFunctionDeclarator(); int ParseFunctionDeclarator(); /* int ParseFunctionDeclaration() { if(ParseDeclarationSpecifiers()) { return(-1); } return(ParseFunctionDeclarator()); } */ int ParseFunctionDeclarator() { c_token* t = LookAhead(); if(t->type == OPERATOR && t->c == '*') { if(ParsePointer()) { return(-1); } } return(ParseDirectFunctionDeclarator()); } int ParseDirectFunctionDeclarator() { c_token* t = LookAhead(); if(t->type != IDENTIFIER) { return(-1); } AcceptHighlight("function-name", t); t = LookAhead(); if(t->type != LPAR) { return(-1); } Accept(t); t = LookAhead(); if(t->type != RPAR) { if(ParseParameterList()) { return(-1); } t = LookAhead(); if(t->type != RPAR) { return(-1); } } Accept(t); return(0); } int ParseParameterList() { if(ParseParameterDeclaration()) { return(-1); } c_token* t; while((t = LookAhead())->type == COMMA) { Accept(t); t = LookAhead(); if(t->type == ELLIPSIS) { Accept(t); return(0); } if(ParseParameterList()) { return(-1); } } return(0); } int ParseParameterDeclaration() { if(ParseDeclarationSpecifiers()) { return(-1); } c_token* t = LookAhead(); if(t->type == LPAR || t->type == LBRACKET || t->type == IDENTIFIER || (t->type == OPERATOR && t->c == '*')) { return(ParseDeclarator()); } else { return(-1); } }; int ParseAbstractDeclarator() { c_token* t = LookAhead(); if(t->type == OPERATOR && t->c == '*') { if(ParsePointer()) { return(-1); } t = LookAhead(); if(t->type == LPAR || t->type == LBRACKET) { if(ParsePostfixedAbstractDeclarator()) { return(-1); } } return(0); } else { return(ParsePostfixedAbstractDeclarator()); } } int ParsePostfixedAbstractDeclarator() { if(ParseDirectAbstractDeclarator()) { return(-1); } c_token* t; while((t = LookAhead())->type == LBRACKET || t->type == LPAR) { if(t->type == LBRACKET) { Accept(t); if(t->type != RBRACKET) { if(ParseConditionalExpression()) { return(-1); } t = LookAhead(); if(t->type != RBRACKET) { return(-1); } } Accept(t); } else { Accept(t); if(ParseParameterList()) { return(-1); } t = LookAhead(); if(t->type != RPAR) { return(-1); } Accept(t); } } return(0); } int ParseDirectAbstractDeclarator() { c_token* t = LookAhead(); switch(t->type) { case LPAR: Accept(t); t = LookAhead(); if(t->type == LPAR || t->type == LBRACKET || (t->type == OPERATOR && t->c == '*')) { if(ParseAbstractDeclarator()) { return(-1); } } else { if(ParseParameterList()) { return(-1); } } t = LookAhead(); if(t->type != RPAR) { return(-1); } Accept(t); return(0); case LBRACKET: Accept(t); t = LookAhead(); if(t->type != RBRACKET) { if(ParseConditionalExpression()) { return(-1); } t = LookAhead(); if(t->type != RBRACKET) { return(-1); } } Accept(t); return(0); default: return(-1); } } //------------------------------------------------------------------------------------ // Expression //------------------------------------------------------------------------------------ int ParsePrimaryExpression(); int ParsePostfixExpression(); int ParseUnaryExpression(); int ParseBinaryExpression(); int ParseConditionalExpression(); int ParseAssignmentExpression(); int ParseExpression() { if(ParseAssignmentExpression()) { return(-1); } c_token* t; while((t = LookAhead())->type == COMMA) { Accept(t); if(ParseAssignmentExpression()) { return(-1); } } return(0); } int ParseAssignmentTailOption() { c_token* t = LookAhead(); switch(t->type) { case OP_ASSIGN: case EQUAL: { Accept(t); return(ParseAssignmentExpression()); } case OPERATOR: { if(IsBinaryOp(t)) { Accept(t); if(ParseBinaryExpression()) { return(-1); } while((t = LookAhead())->type == CONDITION_OP) { Accept(t); if(ParseExpression()) { return(-1); } t = LookAhead(); if(t->type != COLON) { return(-1); } Accept(t); if(ParseConditionalExpression()) { return(-1); } } return(0); } else { return(-1); } } case CONDITION_OP: { Accept(t); if(ParseExpression()) { return(-1); } t = LookAhead(); if(t->type != COLON) { return(-1); } Accept(t); if(ParseConditionalExpression()) { return(-1); } } default: return(0); } } int ParseAssignmentExpression() { if(ParseUnaryExpression()) { return(-1); } return(ParseAssignmentTailOption()); } int ParseConditionalExpression() { if(ParseBinaryExpression()) { return(-1); } c_token* t = LookAhead(); if(t->type == CONDITION_OP) { Accept(t); if(ParseExpression()) { return(-1); } t = LookAhead(); if(t->type != COLON) { return(-1); } Accept(t); return(ParseConditionalExpression()); } return(0); } int ParseBinaryExpression() { if(ParseUnaryExpression()) { return(-1); } c_token* t = LookAhead(); while(IsBinaryOp(t)) { Accept(t); if(ParseBinaryExpression()) { return(-1); } } return(0); } int ParseCastExpression() { //NOTE(martin): we are not implementing cast for now !! return(ParseUnaryExpression()); } int ParseTypeName() { if(ParseSpecifiersQualifiers()) { return(-1); } c_token* t = LookAhead(); if(t->type != RPAR) // type_name is always used in parentheses { return(ParseAbstractDeclarator()); } return(0); } int ParseUnaryExpression() { c_token* t = LookAhead(); switch(t->type) { case INC_DEC_OP: { Accept(t); return(ParseUnaryExpression()); } case KEYWORD: { if(t->c != SIZEOF) { return(-1); } AcceptHighlight("keyword", t); t = LookAhead(); if(t->type != LPAR) { return(-1); } Accept(t); if(ParseTypeName()) { return(-1); } t = LookAhead(); if(t->type != RPAR) { return(-1); } Accept(t); return(0); } case OPERATOR: { if(t->c == '&' || t->c == '*' || t->c == '+' || t->c == '-' || t->c == '~' || t->c == '!') { Accept(t); return(ParseCastExpression()); } else { return(-1); } } default: return(ParsePostfixExpression()); } } int ParseArgumentExpressionList() { fprintf(output, ""); if(ParseAssignmentExpression()) { return(-1); } c_token* t;; while((t = LookAhead())->type == COMMA) { Accept(t); if(ParseArgumentExpressionList()) { return(-1); } } fprintf(output, ""); return(0); } int ParsePostfixTailOption() { c_token* t = LookAhead(); switch(t->type) { case LBRACKET : { Accept(t); if(ParseExpression()) { return(-1); } t = LookAhead(); if(t->type == RBRACKET) { Accept(t); return(0); } else { return(-1); } } case LPAR : { Accept(t); t = LookAhead(); if(t->type == RPAR) { Accept(t); return(0); } else { ParseArgumentExpressionList(); t = LookAhead(); if(t->type == RPAR) { Accept(t); return(0); } else { return(-1); } } } case DOT : { Accept(t); t = LookAhead(); if(t->type == IDENTIFIER) { Accept(t); return(0); } else { return(-1); } } case PTR_OP : { Accept(t); t = LookAhead(); if(t->type == IDENTIFIER) { Accept(t); return(0); } else { return(-1); } } case INC_DEC_OP : { Accept(t); return(0); } default: return(-1); } } int ParsePostfixExpression() { if(ParsePrimaryExpression()) { return(-1); } c_token* t; while((t = LookAhead())->type == LBRACKET || t->type == LPAR || t->type == DOT || t->type == PTR_OP || t->type == INC_DEC_OP) { if(ParsePostfixTailOption()) { return(-1); } } return(0); } int ParsePrimaryExpression() { c_token* t = LookAhead(); switch(t->type) { case IDENTIFIER: { //Look ahead +1 to know if we must consider this as a function name... c_token* t1 = LookAhead(1); if(t1->type == LPAR) { AcceptHighlight("function-name", t); } else { Accept(t); } } return(0); case CONSTANT: AcceptHighlight("constant", t); return(0); case STRING_LITERAL: AcceptHighlight("string", t); return(0); case LPAR: { Accept(t); if(ParseExpression()) { return(-1); // error } t = LookAhead(); if(t->type == RPAR) { Accept(t); return(0); } else { return(-1); } } default: fprintf(stderr, "error in primary expression\n"); return(-1); } } //------------------------------------------------------------------------------------ // Statements //------------------------------------------------------------------------------------ int ParseStatement(); int ParseExpressionStatement() { c_token* t = LookAhead(); fprintf(output, ""); if(t->type == SEMICOLON) { Accept(t); fprintf(output, ""); return(0); } else { if(ParseExpression()) { return(-1); } t = LookAhead(); if(t->type != SEMICOLON) { return(-1); } Accept(t); fprintf(output, ""); return(0); } } int ParseLabeledStatement() { c_token* t = LookAhead(); switch(t->type) { case KEYWORD: if(t->c == CASE) { AcceptHighlight("keyword", t); if(ParseConditionalExpression()) { return(-1); } } else if(t->c == DEFAULT) { AcceptHighlight("keyword", t); } else { return(-1); } break; case IDENTIFIER: AcceptHighlight("label", t); t = LookAhead(); break; default: return(-1); } t = LookAhead(); if(t->type != COLON) { return(-1); } Accept(t); return(ParseStatement()); } int ParseSelectionStatement() { c_token* t = LookAhead(); if(t->type != KEYWORD) { return(-1); } if(t->c == IF) { AcceptHighlight("keyword", t); t = LookAhead(); if(t->type != LPAR) { return(-1); } Accept(t); if(ParseExpression()) { return(-1); } t = LookAhead(); if(t->type != RPAR) { return(-1); } Accept(t); if(ParseStatement()) { return(-1); } t = LookAhead(); if(t->type == KEYWORD && t->c == ELSE) { AcceptHighlight("keyword", t); return(ParseStatement()); } return(0); } else if(t->c == SWITCH) { AcceptHighlight("keyword", t); t = LookAhead(); if(t->type != LPAR) { return(-1); } Accept(t); if(ParseExpression()) { return(-1); } t = LookAhead(); if(t->type != RPAR) { return(-1); } Accept(t); return(ParseStatement()); } else { return(-1); } } int ParseExpressionDeclaration() { c_token* t = LookAhead(); switch(t->type) { case KEYWORD: switch(t->c) { case SIZEOF: return(ParseExpressionStatement()); case STORAGE_CLASS_SPECIFIER: case TYPE_QUALIFIER: case STRUCT: case UNION: case ENUM: return(ParseDeclaration()); default: return(-1); } case CONSTANT: case STRING_LITERAL: case LPAR: case OPERATOR: return(ParseExpressionStatement()); case BUILTIN_TYPE: return(ParseDeclaration()); case IDENTIFIER: { //Here we need a look ahead + 1 to make a decision... c_token* t1 = LookAhead(1); if( t1->type == IDENTIFIER || (t1->type == KEYWORD && ( t1->c == TYPE_QUALIFIER || t1->c == STORAGE_CLASS_SPECIFIER || t1->c == STRUCT || t1->c == UNION || t1->c == ENUM))) { return(ParseDeclaration()); } else { return(ParseExpressionStatement()); } } default: return(-1); } } int ParseIterationStatement() { c_token* t = LookAhead(); if(t->type != KEYWORD) { return(-1); } switch(t->c) { case WHILE: AcceptHighlight("keyword", t); t = LookAhead(); if(t->type != LPAR) { return(-1); } Accept(t); if(ParseExpression()) { return(-1); } t = LookAhead(); if(t->type != RPAR) { return(-1); } Accept(t); return(ParseStatement()); case DO: AcceptHighlight("keyword", t); if(ParseStatement()) { return(-1); } t = LookAhead(); if(t->type != KEYWORD || t->c != WHILE) { return(-1); } AcceptHighlight("keyword", t); t = LookAhead(); if(t->type != LPAR) { return(-1); } Accept(t); if(ParseExpression()) { return(-1); } t = LookAhead(); if(t->type != RPAR) { return(-1); } Accept(t); t = LookAhead(); if(t->type != SEMICOLON) { return(-1); } Accept(t); return(0); case FOR: AcceptHighlight("keyword", t); t = LookAhead(); if(t->type != LPAR) { return(-1); } Accept(t); if(ParseExpressionDeclaration()) { return(-1); } if(ParseExpressionStatement()) { return(-1); } if(ParseExpression()) { return(-1); } t = LookAhead(); if(t->type != RPAR) { return(-1); } Accept(t); return(ParseStatement()); default: return(-1); } } int ParseJumpStatement() { c_token* t = LookAhead(); if(t->type != KEYWORD) { return(-1); } switch(t->c) { case GOTO: AcceptHighlight("keyword", t); t = LookAhead(); if(t->type != IDENTIFIER) { return(-1); } AcceptHighlight("label", t); t = LookAhead(); if(t->type != SEMICOLON) { return(-1); } Accept(t); return(0); case CONTINUE: case BREAK: AcceptHighlight("keyword", t); t = LookAhead(); if(t->type != SEMICOLON) { return(-1); } Accept(t); return(0); case RETURN: AcceptHighlight("keyword", t); t = LookAhead(); if(t->type != SEMICOLON) { if(ParseExpression()) { return(-1); } t = LookAhead(); if(t->type != SEMICOLON) { return(-1); } Accept(t); return(0); } Accept(t); return(0); default: return(-1); } } int ParseStatementDeclaration() { c_token* t = LookAhead(); switch(t->type) { case KEYWORD: switch(t->c) { case CASE: case DEFAULT: case SIZEOF: case IF: case SWITCH: case WHILE: case DO: case FOR: case GOTO: case CONTINUE: case BREAK: case RETURN: return(ParseStatement()); case STORAGE_CLASS_SPECIFIER: case TYPE_QUALIFIER: case STRUCT: case UNION: case ENUM: return(ParseDeclaration()); default: return(-1); } case CONSTANT: case STRING_LITERAL: case CLBRACKET: case LPAR: case OPERATOR: case SEMICOLON: return(ParseStatement()); case BUILTIN_TYPE: return(ParseDeclaration()); case IDENTIFIER: { //Here we need a look ahead + 1 to make a decision... c_token* t1 = LookAhead(1); if( t1->type == IDENTIFIER || (t1->type == KEYWORD && ( t1->c == TYPE_QUALIFIER || t1->c == STORAGE_CLASS_SPECIFIER || t1->c == STRUCT || t1->c == UNION || t1->c == ENUM)) || (t1->type == OPERATOR && t1->c == '*')) // we arbitrarily prioritize pointer declaration over expression...) { return(ParseDeclaration()); } else { return(ParseStatement()); } } default: return(-1); } } int ParseCompoundStatement() { c_token* t = LookAhead(); if(t->type != CLBRACKET) { return(-1); } Accept(t); while((t = LookAhead())->type != CRBRACKET) { if(ParseStatementDeclaration()) { return(-1); } } t = LookAhead(); if(t->type != CRBRACKET) { return(-1); } Accept(t); return(0); } int ParseStatement() { //dummy c_token* t = LookAhead(); if(t->type == KEYWORD) { if(t->c == CASE || t->c == DEFAULT) { return(ParseLabeledStatement()); } else if(t->c == IF || t->c == SWITCH) { return(ParseSelectionStatement()); } else if(t->c == WHILE || t->c == DO || t->c == FOR) { return(ParseIterationStatement()); } else if(t->c == GOTO || t->c == CONTINUE || t->c == BREAK || t->c == RETURN) { return(ParseJumpStatement()); } else { return(-1); } } else if(t->type == IDENTIFIER) { //NOTE(martin): here we need a lookahead + 1 to make our decision char c = getc(input); ungetc(c, input); if(c == ':') { return(ParseLabeledStatement()); } else { return(ParseExpressionStatement()); } } else if(t->type == CLBRACKET) { return(ParseCompoundStatement()); } else { return(ParseExpressionStatement()); } } //------------------------------------------------------------------------------------ // Code and code snippets //------------------------------------------------------------------------------------ int ParseDeclarationOrFunction() { if(ParseDeclarationHead()) { return(-1); } c_token* t = LookAhead(); if(t->type == SEMICOLON) { Accept(t); return(0); } else if(t->type == CLBRACKET) { return(ParseStatement()); } else { return(-1); } } int ParseCodeSnippet() { c_token* t = LookAhead(); switch(t->type) { case KEYWORD: switch(t->c) { case CASE: case DEFAULT: case SIZEOF: case IF: case SWITCH: case WHILE: case DO: case FOR: case GOTO: case CONTINUE: case BREAK: case RETURN: return(ParseStatement()); case STORAGE_CLASS_SPECIFIER: case TYPE_QUALIFIER: case STRUCT: case UNION: case ENUM: return(ParseDeclarationOrFunction()); default: return(-1); } case CONSTANT: case STRING_LITERAL: case CLBRACKET: case LPAR: case OPERATOR: case SEMICOLON: return(ParseStatement()); case BUILTIN_TYPE: return(ParseDeclarationOrFunction()); case IDENTIFIER: { //Here we need a look ahead + 1 to make a decision... c_token* t1 = LookAhead(1); if( t1->type == IDENTIFIER || (t1->type == KEYWORD && ( t1->c == TYPE_QUALIFIER || t1->c == STORAGE_CLASS_SPECIFIER || t1->c == STRUCT || t1->c == UNION || t1->c == ENUM)) || (t1->type == OPERATOR && t1->c == '*')) // we arbitrarily prioritize pointer declaration over expression... { return(ParseDeclarationOrFunction()); } else { return(ParseStatement()); } } default: return(-1); } } int ParseCodeSection() { c_token* t; while((t = LookAhead())->type != ENDF && t->type != ENDOFCODE) { if(ParseCodeSnippet()) { return(-1); } } return(0); } int HighlightCode(FILE* in, FILE* out) { //TODO(martin): change for a buffer input input = in; output = out; InitParser(); c_token* t; while((t = LookAhead())->type != ENDF && t->type != ENDOFCODE) { if(ParseCodeSection() != 0) { t = LookAhead(); fprintf(stderr, "-- A syntax error occured at token \'%s\' (%i)\n", t->text, t->type); if(t->type == ENDOFCODE) { return(-1); } #if 0 AcceptHighlight("syntax-error", t); #else Accept(t); #endif } } return(0); }