commit 02fe867dfbe435bd352ecc9dddc7c52ecfe446f4 Author: Martin Fouilleul Date: Wed Nov 16 07:21:31 2022 -0800 init commit diff --git a/c_parser.cpp b/c_parser.cpp new file mode 100644 index 0000000..1b8c465 --- /dev/null +++ b/c_parser.cpp @@ -0,0 +1,2933 @@ +//***************************************************************** +// +// $file: parser.cpp $ +// $author: Martin Fouilleul $ +// $date: 25/12/2017 $ +// +//***************************************************************** + +/* +------------------------------------------------------------------------------ +This software is available under 2 licenses -- choose whichever you prefer. +------------------------------------------------------------------------------ +ALTERNATIVE A - MIT License +Copyright (c) 2017 Martin Fouilleul +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +------------------------------------------------------------------------------ +ALTERNATIVE B - Public Domain (www.unlicense.org) +This is free and unencumbered software released into the public domain. +Anyone is free to copy, modify, publish, use, compile, sell, or distribute this +software, either in source code form or as a compiled binary, for any purpose, +commercial or non-commercial, and by any means. +In jurisdictions that recognize copyright laws, the author or authors of this +software dedicate any and all copyright interest in the software to the public +domain. We make this dedication for the benefit of the public at large and to +the detriment of our heirs and successors. We intend this dedication to be an +overt act of relinquishment in perpetuity of all present and future rights to +this software under copyright law. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +------------------------------------------------------------------------------ +*/ + +#include +#include +#include +#include + +//------------------------------------------------------------------------------------ +// structs and globals +//------------------------------------------------------------------------------------ + +static FILE* input; +static FILE* output; + +enum { NONE, + ENDOFCODE, + ENDF, + KEYWORD, + BUILTIN_TYPE, + IDENTIFIER, + CONSTANT, + STRING_LITERAL, + LPAR, + RPAR, + LBRACKET, + RBRACKET, + CLBRACKET, + CRBRACKET, + PTR_OP, + OP_ASSIGN, + OPERATOR, + INC_DEC_OP, + DOT, + CONDITION_OP, + COMMA, + COLON, + SEMICOLON, + ELLIPSIS }; + +enum { SIZEOF = 128, + NEQUAL, + EQUAL, + AND, + OR, + LSHIFT, + RSHIFT, + LEQUAL, + GEQUAL, + NEGEQUAL, + CONST, + VOLATILE, + ENUM, + UNION, + STRUCT, + TYPE_QUALIFIER, + STORAGE_CLASS_SPECIFIER, + CASE, + DEFAULT, + IF, + ELSE, + SWITCH, + WHILE, + DO, + FOR, + GOTO, + CONTINUE, + BREAK, + RETURN }; + +const int BUILTIN_TYPES_COUNT = 8; +const char* BUILTIN_TYPES[BUILTIN_TYPES_COUNT] = { "void", + "char", + "short", + "int", + "long", + "float", + "double", + "unsigned" }; + +struct c_keyword +{ + const char* str; + int category; +}; + +const int KEYWORDS_COUNT = 23; +const c_keyword KEYWORDS[KEYWORDS_COUNT] = { {"auto", 0 }, + {"break", BREAK}, + {"case", CASE}, + {"const", TYPE_QUALIFIER}, + {"continue", CONTINUE}, + {"default", DEFAULT}, + {"do", DO}, + {"else", ELSE}, + {"enum", ENUM}, + {"extern", STORAGE_CLASS_SPECIFIER}, + {"for", FOR}, + {"goto", GOTO}, + {"if", IF}, + {"register", STORAGE_CLASS_SPECIFIER}, + {"return", RETURN}, + {"sizeof", SIZEOF}, + {"static", STORAGE_CLASS_SPECIFIER}, + {"struct", STRUCT}, + {"switch", SWITCH}, + {"typedef", STORAGE_CLASS_SPECIFIER}, + {"union", UNION}, + {"volatile", TYPE_QUALIFIER}, + {"while", WHILE}}; + +struct c_token +{ + int type; + int c; + char* text; +}; + +c_token LOOK_AHEAD_TOKEN_0; +c_token LOOK_AHEAD_TOKEN_1; + +//------------------------------------------------------------------------------------ +// Lexer functions +//------------------------------------------------------------------------------------ + +char* TrailingSpaces() +{ + int count = 0; + int size = 256; + + char* buffer = (char*)malloc(size); + + char c = getc(input); + while((c == ' ' || c == '\t' || c == '\n')) + { + buffer[count] = c; + count++; + c = getc(input); + + if(count >= size) + { + size += 256; + buffer = (char*)realloc(buffer, size); + } + } + buffer[count] = '\0'; + + ungetc(c, input); + return(buffer); +} + +c_token GetNextIdentifierOrKeyword() +{ + int size = 256; + int count = 0; + char* buffer = (char*)malloc(size); + + char c; + while(true) + { + c = getc(input); + if(!isalpha(c) && !isdigit(c) && c != '_') + { + ungetc(c, input); + + buffer[count] = '\0'; + char* sp = TrailingSpaces(); + + c_token t; + + t.type = IDENTIFIER; + t.c = 0; + + for(int i=0;i= size) + { + size += 256; + buffer = (char*)realloc(buffer, size); + } + } +} + +c_token GetNextConstant() +{ + //TODO(martin): hande exponent notation + // and type size specifier + + int size = 256; + char* buffer = (char*)malloc(size); + int count = 0; + char c; + while(true) + { + c = getc(input); + if(!isdigit(c) && c != '.') + { + ungetc(c, input); + + buffer[count] = '\0'; + char* sp = TrailingSpaces(); + + c_token t; + t.type = CONSTANT; + t.text = (char*)malloc(count+strlen(sp)+1); + + strcpy(t.text, buffer); + strcat(t.text, sp); + free(sp); + free(buffer); + return(t); + } + buffer[count] = c; + count++; + if(count >= size) + { + size += 256; + buffer = (char*)realloc(buffer, 256); + } + } +} + +c_token GetNextStringLiteral() +{ + int size = 256; + char* buffer = (char*)malloc(size); + int count = 0; + + char c = getc(input); + buffer[0] = c; + count++; + + while(true) + { + c = getc(input); + if(c == '\"') //TODO(martin): handle escaped quotes in string literal !! + { + buffer[count] = c; + count++; + buffer[count] = '\0'; + + char* sp = TrailingSpaces(); + + c_token t; + t.type = STRING_LITERAL; + t.text = (char*)malloc(count+strlen(sp)+1); + + strcpy(t.text, buffer); + strcat(t.text, sp); + free(sp); + free(buffer); + return(t); + } + buffer[count] = c; + count++; + if(count >= size) + { + size += 256; + buffer = (char*)realloc(buffer, size); + } + } +} +c_token GetNextToken(); + +c_token GetNextOneLineCommentToken() +{ + fprintf(output, "/"); + + char c; + while((c = getc(input)) != EOF) + { + switch(c) + { + case '<': + fprintf(output, "<"); + break; + case '>': + fprintf(output, ">"); + break; + case '&': + fprintf(output, "&"); + break; + default: + putc(c, output); + break; + } + if(c == '\n') + { + break; + } + } + fprintf(output, ""); + while((c = getc(input)) == ' ' || c == '\t' || c == '\n' ) + { + putc(c, output); + } + ungetc(c, input); + return(GetNextToken()); +} + +c_token GetNextMultilineCommentToken() +{ + fprintf(output, "/"); + + char c; + char last = 0; + while((c = getc(input)) != EOF) + { + switch(c) + { + case '<': + fprintf(output, "<"); + break; + case '>': + fprintf(output, ">"); + break; + case '&': + fprintf(output, "&"); + break; + default: + putc(c, output); + break; + } + if(c == '/' && last == '*') + { + break; + } + last = c; + } + fprintf(output, ""); + while((c = getc(input)) == ' ' || c == '\t' || c == '\n' ) + { + putc(c, output); + } + ungetc(c, input); + return(GetNextToken()); +} + +c_token GetNextToken() +{ + char c = getc(input); + + if(c==EOF) + { + c_token t; + t.type = ENDF; + t.text = 0; + return(t); + } + else if(isalpha(c) || c=='_') + { + ungetc(c, input); + return GetNextIdentifierOrKeyword(); + } + else if(isdigit(c)) + { + ungetc(c, input); + return GetNextConstant(); + } + else + { + c_token t; + t.type = NONE; + t.text = 0; + + const char* lex = ""; + + switch(c) + { + case '\"': + ungetc(c, input); + return GetNextStringLiteral(); + case '(': + { + t.type = LPAR; + lex = "("; + break; + } + case ')': + { + t.type = RPAR; + lex = ")"; + break; + } + case '[': + { + char buff[7]; + for(int i=0;i<6;i++) + { + buff[i] = getc(input); + } + buff[6] = '\0'; + if(!strcmp(buff, "/code]")) + { + t.type = ENDOFCODE; + return(t); + } + else + { + for(int i=5;i>=0;i--) + { + ungetc(buff[i], input); + } + } + + t.type = LBRACKET; + lex = "["; + break; + } + case ']': + { + t.type = RBRACKET; + lex = "]"; + break; + } + + case '{': + { + t.type = CLBRACKET; + lex = "{"; + break; + } + case '}': + { + t.type = CRBRACKET; + lex = "}"; + break; + } + + case '-': + { + char n = getc(input); + if(n == '-') + { + t.type = INC_DEC_OP; + lex = "--"; + } + else if(n == '>') + { + t.type = PTR_OP; + lex = "->"; + } + else if(n == '=') + { + t.type = OP_ASSIGN; + lex = "-="; + } + else + { + ungetc(n, input); + t.type = OPERATOR; + t.c = c; + lex = "-"; + } + break; + } + case '+': + { + char n = getc(input); + if(n == '+') + { + t.type = INC_DEC_OP; + lex = "++"; + } + else if(n == '=') + { + t.type = OP_ASSIGN; + lex = "+="; + } + else + { + ungetc(n, input); + t.type = OPERATOR; + t.c = c; + lex = "+"; + } + break; + } + case '*': + { + char n = getc(input); + if(n == '=') + { + t.type = OP_ASSIGN; + lex = "*="; + } + else + { + ungetc(n, input); + t.type = OPERATOR; + t.c = c; + lex = "*"; + } + break; + } + case '/': + { + char n = getc(input); + if(n == '/') + { + ungetc(n, input); + return(GetNextOneLineCommentToken()); + } + else if(n == '*') + { + ungetc(n, input); + return(GetNextMultilineCommentToken()); + } + else if(n == '=') + { + t.type = OP_ASSIGN; + lex = "/="; + } + else + { + ungetc(n, input); + t.type = OPERATOR; + t.c = c; + lex = "/"; + } + break; + } + case '%': + { + char n = getc(input); + if(n == '=') + { + t.type = OP_ASSIGN; + lex = "%="; + } + else + { + ungetc(n, input); + t.type = OPERATOR; + t.c = c; + lex = "%"; + } + break; + } + + case '<': + { + char n = getc(input); + if(n == '=') + { + t.type = OPERATOR; + t.c = LEQUAL; + lex = "<="; + } + else if(n == '<') + { + t.type = OPERATOR; + t.c = LSHIFT; + lex = "<<"; + } + else + { + ungetc(n, input); + t.type = OPERATOR; + t.c = c; + lex = "<"; + } + break; + } + + case '>': + { + char n = getc(input); + if(n == '=') + { + t.type = OPERATOR; + t.c = GEQUAL; + lex = ">="; + } + else if(n == '>') + { + t.type = OPERATOR; + t.c = RSHIFT; + lex = ">>"; + } + else + { + ungetc(n, input); + t.type = OPERATOR; + t.c = c; + lex = ">"; + } + break; + } + + + case '=': + { + char n = getc(input); + if(n == '=') + { + t.type = OPERATOR; + t.c = EQUAL; + lex = "=="; + } + else + { + ungetc(n, input); + t.type = EQUAL; + t.c = c; + lex = "="; + } + break; + } + + case '&': + { + char n = getc(input); + if(n == '=') + { + t.type = OP_ASSIGN; + t.c = '&'; + lex = "&="; + } + else if(n == '&') + { + t.type = OPERATOR; + t.c = AND; + lex = "&&"; + } + else + { + ungetc(n, input); + t.type = OPERATOR; + t.c = c; + lex = "&"; + } + break; + } + + case '|': + { + char n = getc(input); + if(n == '=') + { + t.type = OP_ASSIGN; + t.c = '|'; + lex = "|="; + } + else if(n == '|') + { + t.type = OPERATOR; + t.c = OR; + lex = "||"; + } + else + { + ungetc(n, input); + t.type = OPERATOR; + t.c = c; + lex = "|"; + } + break; + } + + case '^': + { + char n = getc(input); + if(n == '^') + { + t.type = OP_ASSIGN; + t.c = '^'; + lex = "^="; + } + else + { + ungetc(n, input); + t.type = OPERATOR; + t.c = '^'; + lex = "^"; + } + break; + } + + case '!': + { + char n = getc(input); + if(n == '=') + { + t.type = OPERATOR; + t.c = NEQUAL; + lex = "!="; + } + else + { + ungetc(n, input); + t.type = OPERATOR; + t.c = '!'; + lex = "!"; + } + break; + } + + case '~': + { + char n = getc(input); + if(n == '=') + { + t.type = OP_ASSIGN; + t.c = NEGEQUAL; + lex = "~="; + } + else + { + ungetc(n, input); + t.type = OPERATOR; + t.c = '~'; + lex = "~"; + } + break; + } + + case '.': + { + char c1 = getc(input); + char c2 = getc(input); + + if(c1 == '.' && c2 == '.') + { + t.type = ELLIPSIS; + lex = "..."; + } + else + { + t.type = DOT; + lex = "."; + ungetc(c2, input); + ungetc(c1, input); + } + break; + } + + case '?': + { + t.type = CONDITION_OP; + lex = "?"; + break; + } + + case ':': + { + t.type = COLON; + lex = ":"; + break; + } + + case ',': + { + t.type = COMMA; + lex = ","; + break; + } + + case ';': + { + t.type = SEMICOLON; + lex = ";"; + break; + } + + default: + { + t.type = NONE; + char* sp = TrailingSpaces(); + t.text = (char*)malloc(strlen(sp)+2); + t.text[0] = c; + strcpy(t.text+1, sp); + free(sp); + return(t); + } + } + + char* sp = TrailingSpaces(); + t.text = (char*)malloc(strlen(sp)+strlen(lex)+1); + strcpy(t.text, lex); + strcat(t.text, sp); + free(sp); + return(t); + } +} + +//------------------------------------------------------------------------------------ +// Parser Helper and output functions +//------------------------------------------------------------------------------------ + +void InitParser() +{ + LOOK_AHEAD_TOKEN_0 = GetNextToken(); + LOOK_AHEAD_TOKEN_1.type = NONE; + LOOK_AHEAD_TOKEN_1.text = 0; +} + +c_token* LookAhead(int level = 0) +{ + if(level) + { + if(LOOK_AHEAD_TOKEN_1.type == NONE) + { + LOOK_AHEAD_TOKEN_1 = GetNextToken(); + } + return(&LOOK_AHEAD_TOKEN_1); + } + else + { + return(&LOOK_AHEAD_TOKEN_0); + } +} + +void Accept(c_token* t) +{ + if(t->text) + { + int len = strlen(t->text); + for(int i=0;itext[i]) + { + case '<': + fprintf(output, "<"); + break; + case '>': + fprintf(output, ">"); + break; + case '&': + fprintf(output, "&"); + break; + default: + putc(t->text[i], output); + + } + } + free(t->text); + } + if(LOOK_AHEAD_TOKEN_1.type != NONE) + { + LOOK_AHEAD_TOKEN_0 = LOOK_AHEAD_TOKEN_1; + LOOK_AHEAD_TOKEN_1.type = NONE; + LOOK_AHEAD_TOKEN_1.text = 0; + } + else + { + LOOK_AHEAD_TOKEN_0 = GetNextToken(); + } +} + +void AcceptHighlight(const char* style, c_token* t) +{ + if(t->text) + { + fprintf(output, "", style); + + int len = strlen(t->text); + for(int i=0;itext[i]) + { + case '<': + fprintf(output, "<"); + break; + case '>': + fprintf(output, ">"); + break; + case '&': + fprintf(output, "&"); + break; + default: + putc(t->text[i], output); + + } + } + fprintf(output, ""); + free(t->text); + } + if(LOOK_AHEAD_TOKEN_1.type != NONE) + { + LOOK_AHEAD_TOKEN_0 = LOOK_AHEAD_TOKEN_1; + LOOK_AHEAD_TOKEN_1.type = NONE; + LOOK_AHEAD_TOKEN_1.text = 0; + } + else + { + LOOK_AHEAD_TOKEN_0 = GetNextToken(); + } +} + +bool IsBinaryOp(c_token* t) +{ + if(t->type != OPERATOR) + { + return(false); + } + if( t->c == '+' + || t->c == '-' + || t->c == '*' + || t->c == '/' + || t->c == '%' + || t->c == '<' + || t->c == '>' + || t->c == '&' + || t->c == '|' + || t->c == '^' + || t->c == NEQUAL + || t->c == EQUAL + || t->c == AND + || t->c == OR + || t->c == LSHIFT + || t->c == RSHIFT + || t->c == LEQUAL + || t->c == GEQUAL) + { + return(true); + } + else + { + return(false); + } +} + +//------------------------------------------------------------------------------------ +// Parser production rules +//------------------------------------------------------------------------------------ + +int ParseExpression(); +int ParseConditionalExpression(); +int ParseAssignmentExpression(); + + +int ParseDirectDeclarator(); +int ParsePostfixedDeclarator(); +int ParsePointer(); +int ParseDeclarator(); +int ParseAbstractDeclarator(); +int ParseInitializer(); +int ParseInitializerList(); + +int ParseSpecifiersQualifiers(); +int ParseStructDeclaratorList(); +int ParseStructDeclarator(); +int ParseStructDeclarationList(); +int ParseEnumSpecifier(); +int ParseStructDeclaration(); +int ParseTypeSpecifier(); + +int ParseDeclarationSpecifiers(); +int ParseInitDeclaratorList(); +int ParseInitDeclarator(); + + +int ParseParameterList(); + + +int ParseDeclarationHead() +{ + if(ParseDeclarationSpecifiers()) + { + return(-1); + } + c_token* t = LookAhead(); + if(t->type == IDENTIFIER || t->type == LPAR || (t->type == OPERATOR && t->c == '*')) + { + if(ParseInitDeclaratorList()) + { + return(-1); + } + } + return(0); +} + +int ParseDeclaration() +{ + if(ParseDeclarationHead()) + { + return(-1); + } + c_token* t = LookAhead(); + if(t->type != SEMICOLON) + { + return(-1); + } + Accept(t); + return(0); +} + +int ParseDeclarationSpecifiers() +{ + c_token* t = LookAhead(); + + if( t->type == IDENTIFIER) + { + return(ParseTypeSpecifier()); + } + else if(t->type == BUILTIN_TYPE) + { + if(ParseTypeSpecifier()) + { + return(-1); + } + + if( t->type == BUILTIN_TYPE + || (t->type == KEYWORD && (t->c == STRUCT || t->c == UNION || t->c == ENUM )) + || (t->type == KEYWORD && t->c == STORAGE_CLASS_SPECIFIER) + || (t->type == KEYWORD && t->c == TYPE_QUALIFIER)) + { + return(ParseDeclarationSpecifiers()); + } + return(0); + } + else if(t->type == KEYWORD && (t->c == STRUCT || t->c == UNION || t->c == ENUM )) + { + if(ParseTypeSpecifier()) + { + return(-1); + } + + if( t->type == BUILTIN_TYPE + || t->type == IDENTIFIER + || (t->type == KEYWORD && (t->c == STRUCT || t->c == UNION || t->c == ENUM )) + || (t->type == KEYWORD && t->c == STORAGE_CLASS_SPECIFIER) + || (t->type == KEYWORD && t->c == TYPE_QUALIFIER)) + { + return(ParseDeclarationSpecifiers()); + } + return(0); + } + else if( t->type == KEYWORD && (t->c == STORAGE_CLASS_SPECIFIER || t->c == TYPE_QUALIFIER)) + { + AcceptHighlight("keyword", t); + + if( t->type == BUILTIN_TYPE + || t->type == IDENTIFIER + || (t->type == KEYWORD && (t->c == STRUCT || t->c == UNION || t->c == ENUM )) + || (t->type == KEYWORD && t->c == STORAGE_CLASS_SPECIFIER) + || (t->type == KEYWORD && t->c == TYPE_QUALIFIER)) + { + return(ParseDeclarationSpecifiers()); + } + return(0); + } + else + { + return(-1); + } +} + +int ParseStructOrUnionSpecifier() +{ + c_token* t = LookAhead(); + switch(t->type) + { + case IDENTIFIER: + AcceptHighlight("type", t); + t = LookAhead(); + if(t->type == CLBRACKET) + { + Accept(t); + if(ParseStructDeclarationList()) + { + return(-1); + } + t = LookAhead(); + if(t->type != CRBRACKET) + { + return(-1); + } + Accept(t); + return(0); + } + return(0); + + case CLBRACKET: + Accept(t); + if(ParseStructDeclarationList()) + { + return(-1); + } + t = LookAhead(); + if(t->type != CRBRACKET) + { + return(-1); + } + Accept(t); + return(0); + + default: + return(0); + } +} + +int ParseTypeSpecifier() +{ + c_token* t = LookAhead(); + switch(t->type) + { + case BUILTIN_TYPE: + AcceptHighlight("type", t); + return(0); + + case KEYWORD: + if(t->c == STRUCT || t->c == UNION) + { + AcceptHighlight("keyword", t); + return(ParseStructOrUnionSpecifier()); + } + else if(t->c == ENUM) + { + return(ParseEnumSpecifier()); + } + else + { + return(-1); + } + + case IDENTIFIER: + AcceptHighlight("type", t); + return(0); + + default: + return(-1); + } +} + +int ParseStructDeclarationList() +{ + if(ParseStructDeclaration()) + { + return(-1); + } + + c_token* t = LookAhead(); +/* if( t->type == BUILTIN_TYPE + || t->type == IDENTIFIER + || (t->type == KEYWORD && (t->c == STRUCT || t->c == UNION || t->c == ENUM )) + || (t->type == KEYWORD && t->c == TYPE_QUALIFIER)) +*/ if(t->type != CRBRACKET) + { + return(ParseStructDeclarationList()); + } + return(0); +} + +int ParseStructDeclaration() +{ + if(ParseDeclarationSpecifiers()) + { + return(-1); + } + if(ParseStructDeclaratorList()) + { + return(-1); + } + c_token* t = LookAhead(); + if(t->type != SEMICOLON) + { + return(-1); + } + Accept(t); + return(0); +} + +int ParseSpecifiersQualifiers() +{ + c_token* t = LookAhead(); + if(t->type == IDENTIFIER) + { + return(ParseTypeSpecifier()); + + } + if( t->type == BUILTIN_TYPE) + { + if(ParseTypeSpecifier()) + { + return(-1); + } + + if( t->type == BUILTIN_TYPE + || (t->type == KEYWORD && (t->c == STRUCT || t->c == UNION || t->c == ENUM || t->c == TYPE_QUALIFIER))) + { + return(ParseSpecifiersQualifiers()); + } + return(0); + } + else if((t->type == KEYWORD && (t->c == STRUCT || t->c == UNION || t->c == ENUM))) + { + if(ParseTypeSpecifier()) + { + return(-1); + } + + if( t->type == BUILTIN_TYPE + || t->type == IDENTIFIER + || (t->type == KEYWORD && (t->c == STRUCT || t->c == UNION || t->c == ENUM || t->c == TYPE_QUALIFIER))) + { + return(ParseSpecifiersQualifiers()); + } + return(0); + } + else if( t->type == KEYWORD && t->c == TYPE_QUALIFIER) + { + AcceptHighlight("keyword", t); + + if( t->type == BUILTIN_TYPE + || t->type == IDENTIFIER + || (t->type == KEYWORD && (t->c == STRUCT || t->c == UNION || t->c == ENUM || t->c == TYPE_QUALIFIER))) + { + return(ParseSpecifiersQualifiers()); + } + return(0); + } + else + { + return(-1); + } +} + +int ParseStructDeclaratorList() +{ + if(ParseStructDeclarator()) + { + return(-1); + } + c_token* t; + while((t = LookAhead())->type == COMMA) + { + Accept(t); + if(ParseStructDeclaratorList()) + { + return(-1); + } + } + return(0); +} + +int ParseStructDeclarator() +{ + //NOTE(martin): we don't do ':' declarators.. + return(ParseDeclarator()); +} + +int ParseEnumerator(); +int ParseEnumeratorList(); + +int ParseEnumSpecifier() +{ + c_token* t = LookAhead(); + if(t->type != KEYWORD || t->c != ENUM) + { + return(-1); + } + AcceptHighlight("keyword", t); + + t = LookAhead(); + if(t->type == IDENTIFIER) + { + Accept(t); + t = LookAhead(); + if(t->type == CLBRACKET) + { + return(ParseEnumeratorList()); + } + return(0); + } + else + { + return(ParseEnumeratorList()); + } +} + +int ParseEnumeratorList() +{ + c_token* t = LookAhead(); + if(t->type == CLBRACKET) + { + Accept(t); + t = LookAhead(); + if(ParseEnumerator()) + { + return(-1); + } + while((t = LookAhead())->type == COMMA) + { + Accept(t); + t = LookAhead(); + if(ParseEnumerator()) + { + return(-1); + } + } + t = LookAhead(); + if(t->type != CRBRACKET) + { + return(-1); + } + Accept(t); + return(0); + } + else + { + return(-1); + } +} + +int ParseEnumerator() +{ + c_token* t = LookAhead(); + if(t->type != IDENTIFIER) + { + return(-1); + } + Accept(t); + t = LookAhead(); + if(t->type == EQUAL) + { + Accept(t); + return(ParseConditionalExpression()); + } + return(0); +} + +int ParseInitDeclaratorList() +{ + if(ParseInitDeclarator()) + { + return(-1); + } + c_token* t; + while((t = LookAhead())->type == COMMA) + { + Accept(t); + if(ParseInitDeclarator()) + { + return(-1); + } + } + return(0); +} + +int ParseInitDeclarator() +{ + if(ParseDeclarator()) + { + return(-1); + } + c_token* t = LookAhead(); + if(t->type == EQUAL) + { + Accept(t); + return(ParseInitializer()); + } + return(0); +} + +int ParseInitializer() +{ + c_token* t = LookAhead(); + if(t->type == CLBRACKET) + { + Accept(t); + if(ParseInitializerList()) + { + return(-1); + } + t = LookAhead(); + if(t->type != CRBRACKET) + { + return(-1); + } + Accept(t); + return(0); + } + else + { + return(ParseAssignmentExpression()); + } +} + +int ParseInitializerList() +{ + if(ParseInitializer()) + { + return(-1); + } + c_token* t = LookAhead(); + if(t->type == COMMA) + { + Accept(t); + return(ParseInitializerList()); + } + return(0); +} + +int ParseDeclarator() +{ + //dummy + c_token* t = LookAhead(); + if(t->type == OPERATOR && t->c == '*') + { + if(ParsePointer()) + { + return(-1); + } + return(ParsePostfixedDeclarator()); + } + else if(t->type == LPAR || t->type == IDENTIFIER || t->type == LBRACKET) + { + return(ParsePostfixedDeclarator()); + } + else + { + return(-1); + } +} + +int ParsePointer() +{ + c_token* t = LookAhead(); + if(t->type != OPERATOR || t->c != '*') + { + printf("expected pointer\n"); + return(-1); + } + Accept(t); + while(t->type == OPERATOR && t->c == '*' ) + { + Accept(t); + t = LookAhead(); + } + + if(t->type == KEYWORD && t->c == TYPE_QUALIFIER) + { + while(t->type == KEYWORD && t->c == TYPE_QUALIFIER) + { + AcceptHighlight("keyword", t); + } + t = LookAhead(); + if(t->type == OPERATOR && t->c == '*') + { + return(ParsePointer()); + } + } + return(0); +} + +int ParsePostfixedDeclarator() +{ + if(ParseDirectDeclarator()) + { + printf("Error in parse array declarator : expected direct declarator\n"); + return(-1); + } + + c_token* t; + while((t = LookAhead())->type == LBRACKET || t->type == LPAR) + { + if(t->type == LBRACKET) + { + Accept(t); + t = LookAhead(); + if(t->type != RBRACKET) + { + if(ParseConditionalExpression()) + { + return(-1); + } + t = LookAhead(); + if(t->type != RBRACKET) + { + return(-1); + } + Accept(t); + } + else + { + Accept(t); + } + } + else + { + Accept(t); + t = LookAhead(); + if(t->type != RPAR) + { + if(ParseParameterList()) + { + return(-1); + } + t = LookAhead(); + if(t->type != RPAR) + { + return(-1); + } + Accept(t); + } + else + { + Accept(t); + } + } + } + return(0); +} + +int ParseDirectDeclarator() +{ + c_token* t = LookAhead(); + switch(t->type) + { + case IDENTIFIER: + { + //HACK to know if we must consider this as a function name... + char c = getc(input); + ungetc(c, input); + + if(c == '(') + { + AcceptHighlight("function-name", t); + } + else + { + Accept(t); + } + return(0); + } + case LPAR: + { + //NEED TO DECIDE BETWEEN DECLARATOR OR PARAMETER TYPE LIST + Accept(t); + t = LookAhead(); + if(t->type != RPAR) + { + if(t->type == KEYWORD || t->type == BUILTIN_TYPE) + { + if(ParseParameterList()) + { + return(-1); + } + } + else if(t->type == IDENTIFIER) + { + //We need a look ahead + 1 to decide between declarator and parametertypelist + // '(' or '[' or ')' -> declarator + // otherwise, parameter_type_list + + c_token* t1 = LookAhead(1); + if(t1->type == LPAR || t1->type == LBRACKET || t1->type == RPAR) + { + if(ParseDeclarator()) + { + return(-1); + } + } + else if(ParseParameterList()) + { + return(-1); + } + } + else if(ParseDeclarator()) + { + return(-1); + } + t = LookAhead(); + if(t->type != RPAR) + { + return(-1); + } + } + Accept(t); + return(0); + } + case LBRACKET: + { + Accept(t); + t = LookAhead(); + if(t->type != RBRACKET) + { + if(ParseConditionalExpression()) + { + return(-1); + } + t = LookAhead(); + if(t->type != RBRACKET) + { + return(-1); + } + } + Accept(t); + return(0); + } + default: + return(0); // direct declarator is optionnal... + } +} +//------------------------------------------------------------------------------------ +// Functions +//------------------------------------------------------------------------------------ + +int ParseDirectAbstractDeclarator(); +int ParsePostfixedAbstractDeclarator(); +int ParseParameterDeclaration(); +int ParseDirectFunctionDeclarator(); +int ParseFunctionDeclarator(); + +/* +int ParseFunctionDeclaration() +{ + if(ParseDeclarationSpecifiers()) + { + return(-1); + } + return(ParseFunctionDeclarator()); +} +*/ + +int ParseFunctionDeclarator() +{ + c_token* t = LookAhead(); + if(t->type == OPERATOR && t->c == '*') + { + if(ParsePointer()) + { + return(-1); + } + } + return(ParseDirectFunctionDeclarator()); +} + +int ParseDirectFunctionDeclarator() +{ + c_token* t = LookAhead(); + if(t->type != IDENTIFIER) + { + return(-1); + } + AcceptHighlight("function-name", t); + t = LookAhead(); + if(t->type != LPAR) + { + return(-1); + } + Accept(t); + + t = LookAhead(); + if(t->type != RPAR) + { + if(ParseParameterList()) + { + return(-1); + } + t = LookAhead(); + if(t->type != RPAR) + { + return(-1); + } + } + Accept(t); + return(0); +} + +int ParseParameterList() +{ + if(ParseParameterDeclaration()) + { + return(-1); + } + c_token* t; + + while((t = LookAhead())->type == COMMA) + { + Accept(t); + t = LookAhead(); + + if(t->type == ELLIPSIS) + { + Accept(t); + return(0); + } + if(ParseParameterList()) + { + return(-1); + } + } + return(0); +} + +int ParseParameterDeclaration() +{ + if(ParseDeclarationSpecifiers()) + { + return(-1); + } + c_token* t = LookAhead(); + if(t->type == LPAR || t->type == LBRACKET || t->type == IDENTIFIER || (t->type == OPERATOR && t->c == '*')) + { + return(ParseDeclarator()); + } + else + { + return(-1); + } +}; + +int ParseAbstractDeclarator() +{ + c_token* t = LookAhead(); + if(t->type == OPERATOR && t->c == '*') + { + if(ParsePointer()) + { + return(-1); + } + t = LookAhead(); + if(t->type == LPAR || t->type == LBRACKET) + { + if(ParsePostfixedAbstractDeclarator()) + { + return(-1); + } + } + return(0); + } + else + { + return(ParsePostfixedAbstractDeclarator()); + } +} + +int ParsePostfixedAbstractDeclarator() +{ + if(ParseDirectAbstractDeclarator()) + { + return(-1); + } + c_token* t; + while((t = LookAhead())->type == LBRACKET || t->type == LPAR) + { + if(t->type == LBRACKET) + { + Accept(t); + if(t->type != RBRACKET) + { + if(ParseConditionalExpression()) + { + return(-1); + } + t = LookAhead(); + if(t->type != RBRACKET) + { + return(-1); + } + } + Accept(t); + } + else + { + Accept(t); + if(ParseParameterList()) + { + return(-1); + } + t = LookAhead(); + if(t->type != RPAR) + { + return(-1); + } + Accept(t); + } + } + return(0); +} + +int ParseDirectAbstractDeclarator() +{ + c_token* t = LookAhead(); + switch(t->type) + { + case LPAR: + Accept(t); + t = LookAhead(); + if(t->type == LPAR || t->type == LBRACKET || (t->type == OPERATOR && t->c == '*')) + { + if(ParseAbstractDeclarator()) + { + return(-1); + } + } + else + { + if(ParseParameterList()) + { + return(-1); + } + } + t = LookAhead(); + if(t->type != RPAR) + { + return(-1); + } + Accept(t); + return(0); + + case LBRACKET: + Accept(t); + t = LookAhead(); + if(t->type != RBRACKET) + { + if(ParseConditionalExpression()) + { + return(-1); + } + t = LookAhead(); + if(t->type != RBRACKET) + { + return(-1); + } + } + Accept(t); + return(0); + + default: + return(-1); + } +} + +//------------------------------------------------------------------------------------ +// Expression +//------------------------------------------------------------------------------------ + +int ParsePrimaryExpression(); +int ParsePostfixExpression(); +int ParseUnaryExpression(); +int ParseBinaryExpression(); +int ParseConditionalExpression(); +int ParseAssignmentExpression(); + +int ParseExpression() +{ + if(ParseAssignmentExpression()) + { + return(-1); + } + c_token* t; + while((t = LookAhead())->type == COMMA) + { + Accept(t); + if(ParseAssignmentExpression()) + { + return(-1); + } + } + return(0); +} + +int ParseAssignmentTailOption() +{ + c_token* t = LookAhead(); + switch(t->type) + { + case OP_ASSIGN: case EQUAL: + { + Accept(t); + return(ParseAssignmentExpression()); + } + case OPERATOR: + { + if(IsBinaryOp(t)) + { + Accept(t); + if(ParseBinaryExpression()) + { + return(-1); + } + while((t = LookAhead())->type == CONDITION_OP) + { + Accept(t); + if(ParseExpression()) + { + return(-1); + } + t = LookAhead(); + if(t->type != COLON) + { + return(-1); + } + Accept(t); + if(ParseConditionalExpression()) + { + return(-1); + } + } + return(0); + } + else + { + return(-1); + } + } + case CONDITION_OP: + { + Accept(t); + if(ParseExpression()) + { + return(-1); + } + t = LookAhead(); + if(t->type != COLON) + { + return(-1); + } + Accept(t); + if(ParseConditionalExpression()) + { + return(-1); + } + } + default: + return(0); + } +} + +int ParseAssignmentExpression() +{ + if(ParseUnaryExpression()) + { + return(-1); + } + return(ParseAssignmentTailOption()); +} + +int ParseConditionalExpression() +{ + if(ParseBinaryExpression()) + { + return(-1); + } + c_token* t = LookAhead(); + if(t->type == CONDITION_OP) + { + Accept(t); + if(ParseExpression()) + { + return(-1); + } + t = LookAhead(); + if(t->type != COLON) + { + return(-1); + } + Accept(t); + return(ParseConditionalExpression()); + } + return(0); +} + +int ParseBinaryExpression() +{ + if(ParseUnaryExpression()) + { + return(-1); + } + c_token* t = LookAhead(); + while(IsBinaryOp(t)) + { + Accept(t); + if(ParseBinaryExpression()) + { + return(-1); + } + } + return(0); +} + +int ParseCastExpression() +{ + //NOTE(martin): we are not implementing cast for now !! + + return(ParseUnaryExpression()); +} + + +int ParseTypeName() +{ + if(ParseSpecifiersQualifiers()) + { + return(-1); + } + c_token* t = LookAhead(); + + if(t->type != RPAR) // type_name is always used in parentheses + { + return(ParseAbstractDeclarator()); + } + return(0); +} + + +int ParseUnaryExpression() +{ + c_token* t = LookAhead(); + + switch(t->type) + { + case INC_DEC_OP: + { + Accept(t); + return(ParseUnaryExpression()); + } + case KEYWORD: + { + if(t->c != SIZEOF) + { + return(-1); + } + AcceptHighlight("keyword", t); + t = LookAhead(); + if(t->type != LPAR) + { + return(-1); + } + Accept(t); + if(ParseTypeName()) + { + return(-1); + } + t = LookAhead(); + if(t->type != RPAR) + { + return(-1); + } + Accept(t); + return(0); + } + case OPERATOR: + { + if(t->c == '&' || t->c == '*' || t->c == '+' || t->c == '-' || t->c == '~' || t->c == '!') + { + Accept(t); + return(ParseCastExpression()); + } + else + { + return(-1); + } + } + default: + return(ParsePostfixExpression()); + } +} + +int ParseArgumentExpressionList() +{ + fprintf(output, ""); + if(ParseAssignmentExpression()) + { + return(-1); + } + c_token* t;; + while((t = LookAhead())->type == COMMA) + { + Accept(t); + if(ParseArgumentExpressionList()) + { + return(-1); + } + } + fprintf(output, ""); + return(0); +} + +int ParsePostfixTailOption() +{ + c_token* t = LookAhead(); + switch(t->type) + { + case LBRACKET : + { + Accept(t); + if(ParseExpression()) + { + return(-1); + } + t = LookAhead(); + if(t->type == RBRACKET) + { + Accept(t); + return(0); + } + else + { + return(-1); + } + } + case LPAR : + { + Accept(t); + + t = LookAhead(); + if(t->type == RPAR) + { + Accept(t); + return(0); + } + else + { + ParseArgumentExpressionList(); + t = LookAhead(); + if(t->type == RPAR) + { + Accept(t); + return(0); + } + else + { + return(-1); + } + } + } + case DOT : + { + Accept(t); + t = LookAhead(); + if(t->type == IDENTIFIER) + { + Accept(t); + return(0); + } + else + { + return(-1); + } + } + case PTR_OP : + { + + Accept(t); + t = LookAhead(); + if(t->type == IDENTIFIER) + { + Accept(t); + return(0); + } + else + { + return(-1); + } + } + case INC_DEC_OP : + { + Accept(t); + return(0); + } + default: + return(-1); + } + +} + +int ParsePostfixExpression() +{ + if(ParsePrimaryExpression()) + { + return(-1); + } + c_token* t; + while((t = LookAhead())->type == LBRACKET || t->type == LPAR || t->type == DOT || t->type == PTR_OP || t->type == INC_DEC_OP) + { + if(ParsePostfixTailOption()) + { + return(-1); + } + } + return(0); +} + +int ParsePrimaryExpression() +{ + c_token* t = LookAhead(); + switch(t->type) + { + case IDENTIFIER: + { + //Look ahead +1 to know if we must consider this as a function name... + c_token* t1 = LookAhead(1); + + if(t1->type == LPAR) + { + AcceptHighlight("function-name", t); + } + else + { + Accept(t); + } + } + return(0); + + case CONSTANT: + AcceptHighlight("constant", t); + return(0); + + case STRING_LITERAL: + AcceptHighlight("string", t); + return(0); + + case LPAR: + { + Accept(t); + if(ParseExpression()) + { + return(-1); // error + } + t = LookAhead(); + if(t->type == RPAR) + { + Accept(t); + return(0); + } + else + { + return(-1); + } + } + + default: + fprintf(stderr, "error in primary expression\n"); + return(-1); + } +} + +//------------------------------------------------------------------------------------ +// Statements +//------------------------------------------------------------------------------------ + +int ParseStatement(); + +int ParseExpressionStatement() +{ + c_token* t = LookAhead(); + + fprintf(output, ""); + + if(t->type == SEMICOLON) + { + Accept(t); + fprintf(output, ""); + return(0); + } + else + { + if(ParseExpression()) + { + return(-1); + } + t = LookAhead(); + if(t->type != SEMICOLON) + { + return(-1); + } + Accept(t); + fprintf(output, ""); + return(0); + } +} + +int ParseLabeledStatement() +{ + c_token* t = LookAhead(); + + switch(t->type) + { + case KEYWORD: + if(t->c == CASE) + { + AcceptHighlight("keyword", t); + if(ParseConditionalExpression()) + { + return(-1); + } + } + else if(t->c == DEFAULT) + { + AcceptHighlight("keyword", t); + } + else + { + return(-1); + } + break; + case IDENTIFIER: + AcceptHighlight("label", t); + t = LookAhead(); + break; + default: + return(-1); + } + t = LookAhead(); + if(t->type != COLON) + { + return(-1); + } + Accept(t); + return(ParseStatement()); +} + +int ParseSelectionStatement() +{ + c_token* t = LookAhead(); + if(t->type != KEYWORD) + { + return(-1); + } + if(t->c == IF) + { + AcceptHighlight("keyword", t); + t = LookAhead(); + if(t->type != LPAR) + { + return(-1); + } + Accept(t); + if(ParseExpression()) + { + return(-1); + } + t = LookAhead(); + if(t->type != RPAR) + { + return(-1); + } + Accept(t); + if(ParseStatement()) + { + return(-1); + } + t = LookAhead(); + if(t->type == KEYWORD && t->c == ELSE) + { + AcceptHighlight("keyword", t); + return(ParseStatement()); + } + return(0); + } + else if(t->c == SWITCH) + { + AcceptHighlight("keyword", t); + t = LookAhead(); + if(t->type != LPAR) + { + return(-1); + } + Accept(t); + if(ParseExpression()) + { + return(-1); + } + t = LookAhead(); + if(t->type != RPAR) + { + return(-1); + } + Accept(t); + return(ParseStatement()); + } + else + { + return(-1); + } +} + +int ParseExpressionDeclaration() +{ + c_token* t = LookAhead(); + switch(t->type) + { + case KEYWORD: + switch(t->c) + { + case SIZEOF: + return(ParseExpressionStatement()); + + case STORAGE_CLASS_SPECIFIER: + case TYPE_QUALIFIER: + case STRUCT: + case UNION: + case ENUM: + return(ParseDeclaration()); + + default: + return(-1); + } + case CONSTANT: case STRING_LITERAL: case LPAR: case OPERATOR: + return(ParseExpressionStatement()); + + case BUILTIN_TYPE: + return(ParseDeclaration()); + + case IDENTIFIER: + { + //Here we need a look ahead + 1 to make a decision... + c_token* t1 = LookAhead(1); + + if( t1->type == IDENTIFIER + || (t1->type == KEYWORD + && ( t1->c == TYPE_QUALIFIER + || t1->c == STORAGE_CLASS_SPECIFIER + || t1->c == STRUCT + || t1->c == UNION + || t1->c == ENUM))) + { + return(ParseDeclaration()); + } + else + { + return(ParseExpressionStatement()); + } + } + default: + return(-1); + } +} + +int ParseIterationStatement() +{ + c_token* t = LookAhead(); + if(t->type != KEYWORD) + { + return(-1); + } + switch(t->c) + { + case WHILE: + AcceptHighlight("keyword", t); + t = LookAhead(); + if(t->type != LPAR) + { + return(-1); + } + Accept(t); + if(ParseExpression()) + { + return(-1); + } + t = LookAhead(); + if(t->type != RPAR) + { + return(-1); + } + Accept(t); + return(ParseStatement()); + + case DO: + AcceptHighlight("keyword", t); + if(ParseStatement()) + { + return(-1); + } + t = LookAhead(); + if(t->type != KEYWORD || t->c != WHILE) + { + return(-1); + } + AcceptHighlight("keyword", t); + t = LookAhead(); + if(t->type != LPAR) + { + return(-1); + } + Accept(t); + if(ParseExpression()) + { + return(-1); + } + t = LookAhead(); + if(t->type != RPAR) + { + return(-1); + } + Accept(t); + t = LookAhead(); + if(t->type != SEMICOLON) + { + return(-1); + } + Accept(t); + return(0); + + case FOR: + AcceptHighlight("keyword", t); + t = LookAhead(); + if(t->type != LPAR) + { + return(-1); + } + Accept(t); + if(ParseExpressionDeclaration()) + { + return(-1); + } + if(ParseExpressionStatement()) + { + return(-1); + } + if(ParseExpression()) + { + return(-1); + } + t = LookAhead(); + if(t->type != RPAR) + { + return(-1); + } + Accept(t); + return(ParseStatement()); + + default: + return(-1); + } +} + +int ParseJumpStatement() +{ + c_token* t = LookAhead(); + if(t->type != KEYWORD) + { + return(-1); + } + switch(t->c) + { + case GOTO: + AcceptHighlight("keyword", t); + t = LookAhead(); + if(t->type != IDENTIFIER) + { + return(-1); + } + AcceptHighlight("label", t); + t = LookAhead(); + if(t->type != SEMICOLON) + { + return(-1); + } + Accept(t); + return(0); + + case CONTINUE: case BREAK: + AcceptHighlight("keyword", t); + t = LookAhead(); + if(t->type != SEMICOLON) + { + return(-1); + } + Accept(t); + return(0); + + case RETURN: + AcceptHighlight("keyword", t); + t = LookAhead(); + if(t->type != SEMICOLON) + { + if(ParseExpression()) + { + return(-1); + } + t = LookAhead(); + if(t->type != SEMICOLON) + { + return(-1); + } + Accept(t); + return(0); + } + Accept(t); + return(0); + + default: + return(-1); + } +} + +int ParseStatementDeclaration() +{ + c_token* t = LookAhead(); + switch(t->type) + { + case KEYWORD: + switch(t->c) + { + case CASE: + case DEFAULT: + case SIZEOF: + case IF: + case SWITCH: + case WHILE: + case DO: + case FOR: + case GOTO: + case CONTINUE: + case BREAK: + case RETURN: + return(ParseStatement()); + + case STORAGE_CLASS_SPECIFIER: + case TYPE_QUALIFIER: + case STRUCT: + case UNION: + case ENUM: + return(ParseDeclaration()); + + default: + return(-1); + } + case CONSTANT: case STRING_LITERAL: case CLBRACKET: case LPAR: case OPERATOR: case SEMICOLON: + return(ParseStatement()); + + case BUILTIN_TYPE: + return(ParseDeclaration()); + + case IDENTIFIER: + { + //Here we need a look ahead + 1 to make a decision... + c_token* t1 = LookAhead(1); + + if( t1->type == IDENTIFIER + || (t1->type == KEYWORD + && ( t1->c == TYPE_QUALIFIER + || t1->c == STORAGE_CLASS_SPECIFIER + || t1->c == STRUCT + || t1->c == UNION + || t1->c == ENUM)) + || (t1->type == OPERATOR && t1->c == '*')) // we arbitrarily prioritize pointer declaration over expression...) + { + return(ParseDeclaration()); + + } + else + { + return(ParseStatement()); + } + } + default: + return(-1); + } +} + +int ParseCompoundStatement() +{ + c_token* t = LookAhead(); + if(t->type != CLBRACKET) + { + return(-1); + } + Accept(t); + + while((t = LookAhead())->type != CRBRACKET) + { + if(ParseStatementDeclaration()) + { + return(-1); + } + } + + t = LookAhead(); + if(t->type != CRBRACKET) + { + return(-1); + } + Accept(t); + return(0); +} + +int ParseStatement() +{ + //dummy + c_token* t = LookAhead(); + + if(t->type == KEYWORD) + { + if(t->c == CASE || t->c == DEFAULT) + { + return(ParseLabeledStatement()); + } + else if(t->c == IF || t->c == SWITCH) + { + return(ParseSelectionStatement()); + } + else if(t->c == WHILE || t->c == DO || t->c == FOR) + { + return(ParseIterationStatement()); + } + else if(t->c == GOTO || t->c == CONTINUE || t->c == BREAK || t->c == RETURN) + { + return(ParseJumpStatement()); + } + else + { + return(-1); + } + } + else if(t->type == IDENTIFIER) + { + //NOTE(martin): here we need a lookahead + 1 to make our decision + char c = getc(input); + ungetc(c, input); + + if(c == ':') + { + return(ParseLabeledStatement()); + } + else + { + return(ParseExpressionStatement()); + } + } + else if(t->type == CLBRACKET) + { + return(ParseCompoundStatement()); + } + else + { + return(ParseExpressionStatement()); + } +} + + +//------------------------------------------------------------------------------------ +// Code and code snippets +//------------------------------------------------------------------------------------ + +int ParseDeclarationOrFunction() +{ + if(ParseDeclarationHead()) + { + return(-1); + } + c_token* t = LookAhead(); + + if(t->type == SEMICOLON) + { + Accept(t); + return(0); + } + else if(t->type == CLBRACKET) + { + return(ParseStatement()); + } + else + { + return(-1); + } +} + +int ParseCodeSnippet() +{ + c_token* t = LookAhead(); + switch(t->type) + { + case KEYWORD: + switch(t->c) + { + case CASE: + case DEFAULT: + case SIZEOF: + case IF: + case SWITCH: + case WHILE: + case DO: + case FOR: + case GOTO: + case CONTINUE: + case BREAK: + case RETURN: + return(ParseStatement()); + + case STORAGE_CLASS_SPECIFIER: + case TYPE_QUALIFIER: + case STRUCT: + case UNION: + case ENUM: + return(ParseDeclarationOrFunction()); + + default: + return(-1); + } + case CONSTANT: case STRING_LITERAL: case CLBRACKET: case LPAR: case OPERATOR: case SEMICOLON: + return(ParseStatement()); + + case BUILTIN_TYPE: + return(ParseDeclarationOrFunction()); + + case IDENTIFIER: + { + //Here we need a look ahead + 1 to make a decision... + c_token* t1 = LookAhead(1); + + if( t1->type == IDENTIFIER + || (t1->type == KEYWORD + && ( t1->c == TYPE_QUALIFIER + || t1->c == STORAGE_CLASS_SPECIFIER + || t1->c == STRUCT + || t1->c == UNION + || t1->c == ENUM)) + || (t1->type == OPERATOR && t1->c == '*')) // we arbitrarily prioritize pointer declaration over expression... + { + return(ParseDeclarationOrFunction()); + } + else + { + return(ParseStatement()); + } + } + default: + return(-1); + } +} + +int ParseCodeSection() +{ + c_token* t; + while((t = LookAhead())->type != ENDF && t->type != ENDOFCODE) + { + if(ParseCodeSnippet()) + { + return(-1); + } + } + return(0); +} + +int HighlightCode(FILE* in, FILE* out) +{ + //TODO(martin): change for a buffer input + input = in; + output = out; + + InitParser(); + + c_token* t; + while((t = LookAhead())->type != ENDF && t->type != ENDOFCODE) + { + if(ParseCodeSection() != 0) + { + t = LookAhead(); + fprintf(stderr, "-- A syntax error occured at token \'%s\' (%i)\n", t->text, t->type); + + if(t->type == ENDOFCODE) + { + return(-1); + } + + #if 0 + AcceptHighlight("syntax-error", t); + #else + Accept(t); + #endif + } + } + return(0); +} diff --git a/example/css/styles.css b/example/css/styles.css new file mode 100644 index 0000000..28ac427 --- /dev/null +++ b/example/css/styles.css @@ -0,0 +1,230 @@ +@font-face +{ + font-family: Lato; + font-style: normal; + font-weight: normal; + src: url(../fonts/Lato.woff); +} + +@font-face +{ + font-family: Maths; + font-style: italic; + font-weight: normal; + src: url(../fonts/cmunti.otf); +} + +@font-face +{ + font-family: Maths; + font-style: normal; + font-weight: normal; + src: url(../fonts/cmunui.otf); +} + +@font-face +{ + font-family: latin-modern-maths; + font-style: normal; + font-weight: normal; + src: url(../fonts/latinmodern-math.otf); +} + +/*------------------------------------------------- + Body and misc styles +--------------------------------------------------*/ + +body +{ + display:block; + background-size: 1800px 1197px; + margin-left:20px; + + font-family: Lato, Helvetica, sans-serif; + font-size : 18px; +} + +a +{ + font-weight: bold; + text-decoration: none; + color: #555555; +} + +a:hover +{ + font-weight : bold; + color: #AAAAAA; +} + +a img +{ + border: none; +} + +img +{ + -ms-interpolation-mode: bicubic; +} + +/*------------------------------------------------- + Maths styles +--------------------------------------------------*/ + +sup, sub { + vertical-align: baseline; + position: relative; + top: -0.4em; +} +sub { + top: 0.4em; +} + +.regular +{ + font-style: normal; +} + +.maths +{ + font-family: Maths; + font-style: italic; + font-size: 18px; +} + +.maths .sqrt-symbol +{ + font-family: Maths; + font-style: normal; + font-size:1.37em; +} + +.maths .sqrt-arg +{ + margin-left:-0.1em; + border-top: 0.1em solid black; +} + +.maths .fraction +{ + display:inline-block; + vertical-align:-0.8em; + text-align: center; +} + +.maths .fraction .numerator +{ + display:block; +} + +.maths .fraction .denominator +{ + display:block; + border-top: 1px solid; +} + +.maths .n-sum-product +{ + display:inline-block; + vertical-align:-1.2em; + text-align: center; +} + +.maths .range-min, .maths .range-max +{ + display:block; + font-size:0.8em; +} + +.maths .range-max +{ + line-height: 0.7em; +} + +.maths .n-op-symbol +{ + font-family: latin-modern-maths; + font-size:1.8em; + font-style:normal; + display:block; +} + +.maths .op, .maths .rel, .maths .symbol +{ + font-family: latin-modern-maths; + font-style: normal; +} + +.maths .vector +{ + font-family: latin-modern-maths; + font-style: italic; +} + +.maths .digit, .maths .delim +{ + font-style: normal; +} + +/*------------------------------------------------- + Code styles +--------------------------------------------------*/ + +.code +{ + font-family: Courier; + white-space: pre; +} + +div.code, p.code +{ + background-color: #CCCCCC; + margin-top: 20px; + margin-bottom: 20px; + padding-top: 10px; + padding-bottom: 10px; + padding-left: 10px; + padding-right: 10px; + overflow: scroll; + line-height: 125%; +} + +.code .keyword +{ + color:#AA00AA; + font-weight: bold; +} + +.code .type +{ + color:#CC5500; +/* font-weight: bold;*/ +} + +.code .preproc +{ + color:#0055AA; + font-weight: bold; +} + +.code .function-name +{ + color:#2222FF; +} + +.code .comment, .code .comment .keyword, .code .comment .type, .comment .code .preproc +{ + color:#007700; + font-weight: normal; +} + +.code .string +{ + color: #AA0000; +}; + +.code .syntax-error +{ + color:#FF0000; + font-style:italic; +} diff --git a/example/doc.groucho.txt b/example/doc.groucho.txt new file mode 100644 index 0000000..21fbdd9 --- /dev/null +++ b/example/doc.groucho.txt @@ -0,0 +1,319 @@ +[html] + + + + + + +[/html] += Groucho = + +(A clunky tool I made to mar[m]\r\Chi\r[/m] up blog posts) + +------------------------------------------------\ +== Intro == + +Groucho is a little tool to generate an HTML file from a text containing simplified and non-intrusive markup codes. + +It was written in order to ease writing blog posts for my website. Its goal is to allow to write content along with formatting instructions, aiming at the least friction, thus avoiding the need to do a second pass manually adding html tags or using a more complex editing tool, a process that is repetitive and tedious, and makes the source text less readable. + +As the content I'm writting on these posts is mainly technical, Groucho is built around three main goals\s: + +- Easy presentation of normal content, including sections titles, paragraphs, lists, images, links. +- Formatting of basic maths or physics formulas, supporting maths symbols and notations. +- Syntax highlighting of C source code. + +Some examples : + +[c]*this is some text*[/c] \twill print in bold : *this is some text*. +[c]/this is some text/[/c] \twill print in italics : /this is some text/. +[c]_this is some text_[/c] \twill print underlined : _this is some text_. + +[c][m]x_+ = \frac{{-b+\sqrt{b^2-4ac}}{2a}}[/m][/c] \twill print this well-known maths formula\s: + +[m]x_+ = \frac{{-b+\sqrt{b^2-4ac}}{2a}}[/m] + + +=== Download and Usage === + +The source code of Groucho is available under a Public domain or MIT license (choose whichever you prefer). The fonts provided for the example document are covered by their own licenses (X11 license for CMU and the GUST font license for latin-modern-maths), which you can find in the font directory. + +You can download the sources [url=https://www.forkingpaths.garden/download.php?id=15]HERE[/url]. Please keep in mind though that this was written as a quick side-project for my personal use, and hence is far for production quality. It is made available online only in the interest of the reader's curiosity. It may or may not be updated in the future. + +After downloading and extracting the archive, you should be able to compile in the source directory with\s: + +[code=none]cc -o groucho *.cpp[/code] +\ +To use it you can run : + +[code=none]./groucho in out[/code] +\ +Where in and out are your input and output files. If no files are specified groucho will get its input from the standard input and output to the standard output. You can test groucho by running it on the sample [c]doc.groucho.txt[/c] file. It should output the present quick documentation. + +------------------------------------------------\ +== Basic principles == + +If not otherwise mentioned, a character in the input will produce the same character on the output. Some special characters and sequences will be interpreted as symbols and insert html entities in the html output. Some others will be interpreted as markups and insert html tags in the output stream. These tags contain [c]class[/c] attributes that are used by the browser, along with a css stylesheet, to style their content. A sample stylesheet is available with groucho, but it should be customizable at will (well, with css, you never know). + +The symbols, the markups and their interpretations depends on the current mode of the interpreter which can be one of the four following modes\s: + +- Text mode which is the default and doesn't require a markup. +- HTML mode, which is used to insert raw html. +- Maths mode, which is used to present simple mathematical formulas. +- Code mode, which is used to present syntax-highlighted C code. + +By default, groucho will operate in text mode. You can switch to one of the other modes by using block tags, like [c][maths]...[/maths][/c], and all text inserted between these two tags will be interpreted in this mode. + + +------------------------------------------------\ +== HTML mode == + +All text placed between the tags [c][html][/c] and [c][/html][/c] will be interpreted as HTML and will be outputed as-is. +To produce the basic tags needed for an HTML document, you should insert the following block at the beginning of your input file\s: + +[c] +[html] + + + + + + +[/html] +[/c] + +Where you can also specify your CSS style sheet. To close those tags you should insert the following block at the end of your input file\s: + +[c] +[html] + + +[/html] +[/c] + + +------------------------------------------------\ +== Markups common to Text and Maths mode == + +Text is the default mode, which is used when the source content is not contained between a pair of matching block markups. + +Maths mode can be used by enclosing text between the markup tags [c][maths][/c] and [c][/maths][/c], which will interpret its content as maths, and also place the output between [c]
[/c] html tags. Alternatively, if one whishes to insert maths into the same line as normal text, the markup tags [c][m][/c] and [c][/m][/c] can be used, which will place the output between [c][/c] html tags. + +Text and maths mode share some of their markups, so we will present them together and then present their differences. + +=== HTML entities === + +Some characters used by html tags will be replaced by html entities : + +- [c]<[/c] will be replaced by [c]<[/c] in maths mode, and in text mode if it is not followed by an alphabetic character, by a [c]/[/c] or by a [c]![/c]. Otherwise, it will be interpreted as the beginning of an html tag and the following text until the next matching [c]>[/c] will be output as-is. +- [c]>[/c] will be replaced by [c]>[/c]. +- [c]&[/c] will be replaced by [c]&[/c]. + +=== Escape sequences === + +If a character is preceded by a [c]\[/c], it will be interpreted as follows : + +- A [c]\n[/c] will be interpreted as an intentional line break and produce a [c]
[/c] tag. +- An end of line preceded by [c]\[/c] will be interpreted as a source-only line break and will be suppressed in the output. +- A [c]\t[/c] will output two em spaces [c] &emsp[/c]. +- A [c]\s[/c] will output a non breakable space [c] [/c]. + +In addition to that, there's a list of letters or words that will be interpreted as a special symbol or markup when preceded by [c]\[/c]\s: + +- Symbols and special letters. These symbols will likely be more useful in maths sections and not all fonts support them, but they're available in text mode as well\s: + - [c]\star[/c] will output a [m]\star[/m] + - [c]\lowstar[/c] will output a [m]\lowstar[/m] + - [c]\div[/c] will output a [m]\div[/m] + - [c]\minus[/c] will output a [m]\minus[/m] + - [c]\cdot[/c] will output a [m]\cdot[/m] + - [c]\times[/c] will output a [m]\times[/m] + - [c]\otimes[/c] will output a [m]\otimes[/m] + - [c]\leq[/c] will output a [m]\leq[/m] + - [c]\geq[/c] will output a [m]\geq[/m] + - [c]\larr[/c] will output a [m]\larr[/m] + - [c]\rarr[/c] will output a [m]\rarr[/m] + - [c]\inf[/c] will output the symbol [m]\inf[/m] + +- Greek letters: + - [c]\alpha[/c], [c]\beta[/c], [c]\gamma[/c], [c]\delta[/c], etc. : [m]\r\alpha \beta \gamma \delta\r[/m] + - [c]\Alpha[/c], [c]\Beta[/c], [c]\Gamma[/c], [c]\Delta[/c], etc. : [m]\r\Alpha \Beta \Gamma \Delta\r[/m] + +- Font style markups, availables in both modes, change the style of text enclosed between a matching pair of them\s: + - [c]\b[/c] for \bbold\b. + - [c]\i[/c] for \iitalics\i. + - [c]\u[/c] for \uunderlined\u + - [c]\r[/c] is used to render text in roman in a section that would otherwise be in italics. It can be useful for function in maths blocks, as in [m]a = \rlog\r(b)[/m] + +- A single letter not matched by one of the above escape sequences will be output as is and not be interpreted as part of a markup. + + + +------------------------------------------------\ +== Text mode == + +==== Additional markups ==== + +In text mode, a section title can be created by enclosing text in a matching pair of [c]=[/c] sequences. The section level corresponds to the number of [c]=[/c] signs\s: + +[c]===== Exemple title =====[/c] in the input will produce the html [c]
Exemple title
[/c], resulting in\s: +===== Exemple title ===== + +A separating line can be created by a sequence of five or more hyphens : [c]-----[/c] will output the html tag [c]
[/c] resulting in\s: +----- + +Paragraphs can be created by simply having two line breaks in a row. + +The use of \\b, \\i and \\u markups can alternatively replaced by (respectively) \*, \/, and \_. + +The following character sequences can be used to create symbols\s: + +- [c]--[/c] : two hyphens in a row will create a dash\s: -- +- [c]-->[/c] : one or more hyphens followed by a right angled bracket will produce a right arrow\s: --> +- [c]<--[/c] : a left angled bracket followed by one or more hyphens will produce a left arrow\s: <-- +- [c]"[/c] : matching pairs of double quotes will be replaced by [c]“[/c] and [c]”[/c] as in "quoted". + +==== Lists ==== + +A number of tabs followed by an hyphen, followed by a space, will be interpreted as a list item. The number of leading tabs determine the list depth when creating nested lists\s: + +[c]- item 1 +- item 2 + - sub item 2.1 + - sub item 2.2 +- item 3 +[/c] + +will produce the following list : + +- item 1 +- item 2 + - sub item 2.1 + - sub item 2.2 +- item 3 + +=== External content === + +==== Links ==== +The markup tags [c][url=][/url][/c] can be used to create a link. You specifiy the URL of your link by adding it after the [c]=[/c] sign. The text of the link goes between the two markup tags. +For instance, [c][url=https://www.forkingpaths.garden]Home page[/url][/c] will create the following link : [url=https://www.forkingpaths.garden]Home page[/url] that will land you on Forking Paths's website. + + +==== Images ==== +An image can be created with the tag [c][img=][/c]. Similarly to the URL tag, you specify the URL to you image after the [c]=[/c] sign. Hence, the following tag [c][img=https://www.forkingpaths.garden/img/logo_black.png][/c] will display Forking Paths' logo\s: + +[img=https://www.forkingpaths.garden/img/logo_black.png] + + +------------------------------------------------\ +== Maths mode == + +By default, operators and capital letters will be printed in roman with a math-specific font, while lower-case letters will be printed in italics. You can always reverse that by using \\r and \\i markups. + +=== Default symbol interpretation === + +In maths mode, the following (non-escaped) sequences are recognized as symbol equivalents\s: + +- [c]-[/c] is equivalent to [c]\minus[/c] +- [c]*[/c] is equivalent to [c]\times[/c] +- [c]/[/c] is equivalent to [c]\div[/c] +- [c]<=[/c] is equivalent to [c]\leq[/c] +- [c]>=[/c] is equivalent to [c]\geq[/c] + +=== Exponents and indices === + +A character preceded by a [c]_[/c] will be printed as an index, as in [m]u_k[/m]. You can use a pair of curled brackets after the underscode to have a longer index, ie. this code : [c]C_{(i,j)}[/c] will produce this result : [m]C_{(i,j)}[/m]. + +Similarly, the character [c]^[/c] is used for exponent, as in [m]\re\r^{\thin\rj(2\pi\rf\thint+\phi)}[/m]. + +=== Vectors === + +[c]\vec[/c] : the character following this escape markup will be rendered with a vector arrow above it, as in [m]\vecv[/m]. + +=== Square roots === + +[c]\sqrt[/c] : a single character, or a sequence of characters contained in curly brackets, following this escape markup, will be printed as the arguments of a square root. + +[m]\sqrt{a*(1-n)^2}[/m] + +More precisely, this markup outputs the following HTML\s: + +[c] + YourExpressionHere; [/c] + +The square root character is rendered as an HTML entity and the trailing top line of the square root is simulated by the top border of the [c]sqrt-arg[/c] span. Because HTML\/CSS layout is such a disaster zone, it is extremely likely that the square root and the top bar won't join and you will have to adjust the stylesheet to make it happen. There's not much we can do about it, until MathML is largely supported, except resorting to TeX generated images and the like... + +=== Fraction === + +You can use the markup [c]\frac{{numerator}{denominator}}[/c] to generate a fraction. For instance, + +[c]y = \frac{{x^2+1}{2}}[/c] + +will generate the following equation : + +[m]y = \frac{{x^2+1}{2}}[/m] + +As stated above, depending on your font, you might want to adjust the [c]text-alignment[/c] property of the [c]fraction[/c] class to align the fraction line with the equal sign. + +=== Sums and Products === + +\nsum and \nproduct can be used to create indexed sums or products. You need to specify the maximum index and the minimum index enclosed in curled brackets, like this\s: + +[c]x = \nsum{{p}{k=1}}u_k[/c] + +which produces this result : + +[m]x = \nsum{{p}{k=1}}u_k[/m] + +------------------------------------------------\ +== Code mode == + +All text placed between the tags [c][html][/c] and [c][/html][/c] will be interpreted as C code, syntax highlighted and placed between the html tags [c]
[/c]. Alternatively, if one whishes to insert code into the same line as normal text, the markup tags [c][c][/c] and [\/c] can be used, which will place the output between the html tags [c][/c]. Inline code won't be syntax highlighted though. + +You can specify that you don't want the syntax highlighter to be run on a code block by using the opening [c][code=none][/c] mark instead of [c][code][/c] + +The CSS styling of code section uses [c]white-space: pre;[/c] to preserve indentation and line breaks. It also uses a monospace font by default. The syntax highlighter performs a simplified syntactical analysis of the code and encloses elements in [c][/c] tags, with one of the following classes\s: + +- [c]keyword[/c] is used for C keywords, except for built-in types. +- [c]type[/c] is used for built-in or user-defined types. +- [c]function-name[/c] is used to highlight functions in declarations and expressions. +- [c]comment[/c] is used to highlight comments. +- [c]label[/c] is used for labels and [c]goto[/c] instructions. + +The syntax highlighter may fail on some cases, either because the syntax is in fact incorrect or because the code uses some unrecognized dialect, or because the parser stumbles upon a macro, or simply because it lacks context. As a matter of fact, C not being completely context free, and to avoid complicating the parser for such a simple task, the parser sometimes falls back to some (hopefully) reasonable assumption on what's going on. If it was a wrong guess, it will try to recover and continue to output non highlighted code until it can resynchronize, often after the next semicolon or at the begining of the next compound statement. + +Here is an exemple of a code snippet : + +[code]for(int i=0;ivalue, a->unit); +} +[/code] + +------------------------------------------------\ +== Planned features == + +This is a list of features that will eventually be added along the way, as I need them and\/or find time to implement them\s: + +- Output some useful info to stderr on errors. +- Clean-up the scanner code and reduce redundancies between modes. +- Add markups to control over font size, text justification, etc. +- Automatic non breakable space before colons. +- Tables +- Big parentheses for enclosing fractions +- Exponents that should work with those big parentheses +- More maths and logic symbols +- TeX mode for more involved equations. It would allow to insert TeX code that would generate an image and a corresponding [c][/c] tag. +- Add cast-expressions to the C parser, recognize /some/ C++ features, etc... +- Other langages for syntax highlighting : eg. bash scripts, Objective-C ? + + + + +[html] + + +[/html] diff --git a/example/doc.html b/example/doc.html new file mode 100644 index 0000000..a5ef132 --- /dev/null +++ b/example/doc.html @@ -0,0 +1,422 @@ + + + + + + + +

Groucho

+

+(A clunky tool I made to marΧ up blog posts) +

+

+


+ +

+

Intro

+

+Groucho is a little tool to generate an HTML file from a text containing simplified and non-intrusive markup codes. +

+

+It was written in order to ease writing blog posts for my website. Its goal is to allow to write content along with formatting instructions, aiming at the least friction, thus avoiding the need to do a second pass manually adding html tags or using a more complex editing tool, a process that is repetitive and tedious, and makes the source text less readable. +

+

+As the content I'm writting on these posts is mainly technical, Groucho is built around three main goals : +

+ +
    +
  • Easy presentation of normal content, including sections titles, paragraphs, lists, images, links.
  • +
  • Formatting of basic maths or physics formulas, supporting maths symbols and notations.
  • +
  • Syntax highlighting of C source code.
  • +
+

+Some examples : +

+

+*this is some text*   will print in bold : this is some text.
+/this is some text/   will print in italics : this is some text.
+_this is some text_   will print underlined : this is some text. +

+

+[m]x_+ = \frac{{-b+\sqrt{b^2-4ac}}{2a}}[/m]   will print this well-known maths formula : +

+

+x +   =   − b +  b2 − 4ac 2a +

+
+

Download and Usage

+

+The source code of Groucho is available under a Public domain or MIT license (choose whichever you prefer). The fonts provided for the example document are covered by their own licenses (X11 license for CMU and the GUST font license for latin-modern-maths), which you can find in the font directory. +

+

+You can download the sources HERE +. Please keep in mind though that this was written as a quick side-project for my personal use, and hence is far for production quality. It is made available online only in the interest of the reader's curiosity. It may or may not be updated in the future. +

+

+After downloading and extracting the archive, you should be able to compile in the source directory with : +

+
cc -o groucho *.cpp
+

+To use it you can run : +

+
./groucho in out
+

+Where in and out are your input and output files. If no files are specified groucho will get its input from the standard input and output to the standard output. You can test groucho by running it on the sample doc.groucho.txt file. It should output the present quick documentation. +

+

+


+ +

+

Basic principles

+

+If not otherwise mentioned, a character in the input will produce the same character on the output. Some special characters and sequences will be interpreted as symbols and insert html entities in the html output. Some others will be interpreted as markups and insert html tags in the output stream. These tags contain class attributes that are used by the browser, along with a css stylesheet, to style their content. A sample stylesheet is available with groucho, but it should be customizable at will (well, with css, you never know). +

+

+The symbols, the markups and their interpretations depends on the current mode of the interpreter which can be one of the four following modes : +

+ +
    +
  • Text mode which is the default and doesn't require a markup.
  • +
  • HTML mode, which is used to insert raw html.
  • +
  • Maths mode, which is used to present simple mathematical formulas.
  • +
  • Code mode, which is used to present syntax-highlighted C code.
  • +
+

+By default, groucho will operate in text mode. You can switch to one of the other modes by using block tags, like [maths]...[/maths], and all text inserted between these two tags will be interpreted in this mode. +

+
+

+


+ +

+

HTML mode

+

+All text placed between the tags [html] and [/html] will be interpreted as HTML and will be outputed as-is.
+To produce the basic tags needed for an HTML document, you should insert the following block at the beginning of your input file : +

+

+ +[html] +<html> +<head> +<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> +<link rel="stylesheet" href="css/styles.css"/> +</head> +<body> +[/html] + +

+

+Where you can also specify your CSS style sheet. To close those tags you should insert the following block at the end of your input file : +

+

+ +[html] +</body> +</html> +[/html] + +

+
+

+


+ +

+

Markups common to Text and Maths mode

+

+Text is the default mode, which is used when the source content is not contained between a pair of matching block markups. +

+

+Maths mode can be used by enclosing text between the markup tags [maths] and [/maths], which will interpret its content as maths, and also place the output between <div class="maths"></div> html tags. Alternatively, if one whishes to insert maths into the same line as normal text, the markup tags [m] and [/m] can be used, which will place the output between <span class="maths"></span> html tags. +

+

+Text and maths mode share some of their markups, so we will present them together and then present their differences. +

+

HTML entities

+

+Some characters used by html tags will be replaced by html entities : +

+ +
    +
  • < will be replaced by &lt; in maths mode, and in text mode if it is not followed by an alphabetic character, by a / or by a !. Otherwise, it will be interpreted as the beginning of an html tag and the following text until the next matching > will be output as-is.
  • +
  • > will be replaced by &gt;.
  • +
  • & will be replaced by &amp;.
  • +
+

Escape sequences

+

+If a character is preceded by a \, it will be interpreted as follows : +

+ +
    +
  • A \n will be interpreted as an intentional line break and produce a <br> tag.
  • +
  • An end of line preceded by \ will be interpreted as a source-only line break and will be suppressed in the output.
  • +
  • A \t will output two em spaces &emsp;&emsp.
  • +
  • A \s will output a non breakable space &nbsp;.
  • +
+

+In addition to that, there's a list of letters or words that will be interpreted as a special symbol or markup when preceded by \ : +

+ +
    +
  • Symbols and special letters. These symbols will likely be more useful in maths sections and not all fonts support them, but they're available in text mode as well : +
      +
    • \star will output a  * 
    • +
    • \lowstar will output a  ∗ 
    • +
    • \div will output a  ÷ 
    • +
    • \minus will output a  − 
    • +
    • \cdot will output a  · 
    • +
    • \times will output a  × 
    • +
    • \otimes will output a  ⊗ 
    • +
    • \leq will output a
    • +
    • \geq will output a
    • +
    • \larr will output a
    • +
    • \rarr will output a
    • +
    • \inf will output the symbol
    • +
    +
  • +
+ +
    +
  • Greek letters: +
      +
    • \alpha, \beta, \gamma, \delta, etc. : α β γ δ
    • +
    • \Alpha, \Beta, \Gamma, \Delta, etc. : Α Β Γ Δ
    • +
    +
  • +
+ +
    +
  • Font style markups, availables in both modes, change the style of text enclosed between a matching pair of them : +
      +
    • \b for bold.
    • +
    • \i for italics.
    • +
    • \u for underlined
    • +
    • \r is used to render text in roman in a section that would otherwise be in italics. It can be useful for function in maths blocks, as in a  =  log(b)
    • +
    +
  • +
+ +
    +
  • A single letter not matched by one of the above escape sequences will be output as is and not be interpreted as part of a markup.
  • +
+
+
+

+


+ +

+

Text mode

+

Additional markups

+

+In text mode, a section title can be created by enclosing text in a matching pair of = sequences. The section level corresponds to the number of = signs : +

+

+===== Exemple title ===== in the input will produce the html <h5>Exemple title</h5>, resulting in :
+ +

+
Exemple title
+

+A separating line can be created by a sequence of five or more hyphens : ----- will output the html tag <hr/> resulting in :
+


+

+

+Paragraphs can be created by simply having two line breaks in a row. +

+

+The use of \b, \i and \u markups can alternatively replaced by (respectively) *, /, and _. +

+

+The following character sequences can be used to create symbols : +

+ +
    +
  • -- : two hyphens in a row will create a dash : —
  • +
  • --> : one or more hyphens followed by a right angled bracket will produce a right arrow : →
  • +
  • <-- : a left angled bracket followed by one or more hyphens will produce a left arrow : ←
  • +
  • " : matching pairs of double quotes will be replaced by &ldquo; and &rdquo; as in “quoted”.
  • +
+

Lists

+

+A number of tabs followed by an hyphen, followed by a space, will be interpreted as a list item. The number of leading tabs determine the list depth when creating nested lists : +

+

+- item 1 +- item 2 + - sub item 2.1 + - sub item 2.2 +- item 3 + +

+

+will produce the following list : +

+ +
    +
  • item 1
  • +
  • item 2 +
      +
    • sub item 2.1
    • +
    • sub item 2.2
    • +
    +
  • +
  • item 3
  • +
+

External content

+

Links

+

+he markup tags [url=][/url] can be used to create a link. You specifiy the URL of your link by adding it after the = sign. The text of the link goes between the two markup tags.
+For instance, [url=https://www.forkingpaths.garden]Home page[/url] will create the following link : Home page + that will land you on Forking Paths's website. +

+
+

Images

+

+n image can be created with the tag [img=]. Similarly to the URL tag, you specify the URL to you image after the = sign. Hence, the following tag [img=https://www.forkingpaths.garden/img/logo_black.png] will display Forking Paths' logo : +

+

+ +

+
+

+


+ +

+

Maths mode

+

+By default, operators and capital letters will be printed in roman with a math-specific font, while lower-case letters will be printed in italics. You can always reverse that by using \r and \i markups. +

+

Default symbol interpretation

+

+In maths mode, the following (non-escaped) sequences are recognized as symbol equivalents : +

+ +
    +
  • - is equivalent to \minus
  • +
  • * is equivalent to \times
  • +
  • / is equivalent to \div
  • +
  • <= is equivalent to \leq
  • +
  • >= is equivalent to \geq
  • +
+

Exponents and indices

+

+A character preceded by a _ will be printed as an index, as in uk. You can use a pair of curled brackets after the underscode to have a longer index, ie. this code : C_{(i,j)} will produce this result : C(i,j). +

+

+Similarly, the character ^ is used for exponent, as in ej(2πf t + φ). +

+

Vectors

+

+\vec : the character following this escape markup will be rendered with a vector arrow above it, as in v. +

+

Square roots

+

+\sqrt : a single character, or a sequence of characters contained in curly brackets, following this escape markup, will be printed as the arguments of a square root. +

+

+ a × (1 − n)2  +

+

+More precisely, this markup outputs the following HTML : +

+

+<span class="sqrt-symbol">&radic;</span> +<span class="sqrt-arg">&thinsp;YourExpressionHere;&nbsp;</span> +

+

+The square root character is rendered as an HTML entity and the trailing top line of the square root is simulated by the top border of the sqrt-arg span. Because HTML/CSS layout is such a disaster zone, it is extremely likely that the square root and the top bar won't join and you will have to adjust the stylesheet to make it happen. There's not much we can do about it, until MathML is largely supported, except resorting to TeX generated images and the like... +

+

Fraction

+

+You can use the markup \frac{{numerator}{denominator}} to generate a fraction. For instance, +

+

+y = \frac{{x^2+1}{2}} +

+

+will generate the following equation : +

+

+y  =  x2 + 12 +

+

+As stated above, depending on your font, you might want to adjust the text-alignment property of the fraction class to align the fraction line with the equal sign. +

+

Sums and Products

+

+ and  ct can be used to create indexed sums or products. You need to specify the maximum index and the minimum index enclosed in curled brackets, like this : +

+

+x = \nsum{{p}{k=1}}u_k +

+

+which produces this result : +

+

+x  =  pk = 1 uk +

+

+


+ +

+

Code mode

+

+All text placed between the tags [html] and [/html] will be interpreted as C code, syntax highlighted and placed between the html tags <div class="code"></div>. Alternatively, if one whishes to insert code into the same line as normal text, the markup tags [c] and [/c] can be used, which will place the output between the html tags <span class="code"></span>. Inline code won't be syntax highlighted though. +

+

+You can specify that you don't want the syntax highlighter to be run on a code block by using the opening [code=none] mark instead of [code] +

+

+The CSS styling of code section uses white-space: pre; to preserve indentation and line breaks. It also uses a monospace font by default. The syntax highlighter performs a simplified syntactical analysis of the code and encloses elements in <span></span> tags, with one of the following classes : +

+ +
    +
  • keyword is used for C keywords, except for built-in types.
  • +
  • type is used for built-in or user-defined types.
  • +
  • function-name is used to highlight functions in declarations and expressions.
  • +
  • comment is used to highlight comments.
  • +
  • label is used for labels and goto instructions.
  • +
+

+The syntax highlighter may fail on some cases, either because the syntax is in fact incorrect or because the code uses some unrecognized dialect, or because the parser stumbles upon a macro, or simply because it lacks context. As a matter of fact, C not being completely context free, and to avoid complicating the parser for such a simple task, the parser sometimes falls back to some (hopefully) reasonable assumption on what's going on. If it was a wrong guess, it will try to recover and continue to output non highlighted code until it can resynchronize, often after the next semicolon or at the begining of the next compound statement. +

+

+Here is an exemple of a code snippet : +

+
for(int i=0;i<count;i++) +{ + // Get all my items by name in the hash table and print their value + + int hash = HashFunction(names[i]); + my_struct* a = GetItem(hash); + printf("item %s = %i %s\n", names[i], a->value, a->unit); +} +
+
+

+


+ +

+

Planned features

+

+This is a list of features that will eventually be added along the way, as I need them and/or find time to implement them : +

+ +
    +
  • Output some useful info to stderr on errors.
  • +
  • Clean-up the scanner code and reduce redundancies between modes.
  • +
  • Add markups to control over font size, text justification, etc.
  • +
  • Automatic non breakable space before colons.
  • +
  • Tables
  • +
  • Big parentheses for enclosing fractions
  • +
  • Exponents that should work with those big parentheses
  • +
  • More maths and logic symbols
  • +
  • TeX mode for more involved equations. It would allow to insert TeX code that would generate an image and a corresponding <img> tag.
  • +
  • Add cast-expressions to the C parser, recognize some C++ features, etc...
  • +
  • Other langages for syntax highlighting : eg. bash scripts, Objective-C ?
  • +
+
+
+
+ + + diff --git a/example/fonts/CM-Unicode-Font-Licence b/example/fonts/CM-Unicode-Font-Licence new file mode 100644 index 0000000..776ed82 --- /dev/null +++ b/example/fonts/CM-Unicode-Font-Licence @@ -0,0 +1,39 @@ +X11 License + +Andrey V. Panov (C) 2005 + +All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, and/or sell copies of the Software, and to permit persons to +whom the Software is furnished to do so, provided that the above +copyright notice(s) and this permission notice appear in all copies of +the Software and that both the above copyright notice(s) and this +permission notice appear in supporting documentation. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF +THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS +INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT +OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS +OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR +OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +PERFORMANCE OF THIS SOFTWARE. + +Except as contained in this notice, the name of a copyright holder shall +not be used in advertising or otherwise to promote the sale, use or +other dealings in this Software without prior written authorization of +the copyright holder. + +As a special exception, if you create a document which uses these fonts, +and embed these fonts or unaltered portions of these fonts into the +document, these fonts does not by itself cause the resulting document to +be covered by the X11 License. This exception does not however +invalidate any other reasons why the document might be covered by the +X11 License. If you modify these fonts, you may extend this exception to +your version of the fonts, but you are not obligated to do so. If you do +not wish to do so, delete this exception statement from your version. diff --git a/example/fonts/GUST-FONT-LICENSE b/example/fonts/GUST-FONT-LICENSE new file mode 100644 index 0000000..306d614 --- /dev/null +++ b/example/fonts/GUST-FONT-LICENSE @@ -0,0 +1,29 @@ +% This is version 1.0, dated 22 June 2009, of the GUST Font License. +% (GUST is the Polish TeX Users Group, http://www.gust.org.pl) +% +% For the most recent version of this license see +% http://www.gust.org.pl/fonts/licenses/GUST-FONT-LICENSE.txt +% or +% http://tug.org/fonts/licenses/GUST-FONT-LICENSE.txt +% +% This work may be distributed and/or modified under the conditions +% of the LaTeX Project Public License, either version 1.3c of this +% license or (at your option) any later version. +% +% Please also observe the following clause: +% 1) it is requested, but not legally required, that derived works be +% distributed only after changing the names of the fonts comprising this +% work and given in an accompanying "manifest", and that the +% files comprising the Work, as listed in the manifest, also be given +% new names. Any exceptions to this request are also given in the +% manifest. +% +% We recommend the manifest be given in a separate file named +% MANIFEST-.txt, where is some unique identification +% of the font family. If a separate "readme" file accompanies the Work, +% we recommend a name of the form README-.txt. +% +% The latest version of the LaTeX Project Public License is in +% http://www.latex-project.org/lppl.txt and version 1.3c or later +% is part of all distributions of LaTeX version 2006/05/20 or later. + diff --git a/example/fonts/Lato.woff b/example/fonts/Lato.woff new file mode 100644 index 0000000..01b13d8 Binary files /dev/null and b/example/fonts/Lato.woff differ diff --git a/example/fonts/OFL-LICENSE b/example/fonts/OFL-LICENSE new file mode 100644 index 0000000..84d123e --- /dev/null +++ b/example/fonts/OFL-LICENSE @@ -0,0 +1,86 @@ +SIL OPEN FONT LICENSE + +Version 1.1 - 26 February 2007 + +PREAMBLE +The goals of the Open Font License (OFL) are to stimulate worldwide +development of collaborative font projects, to support the font creation +efforts of academic and linguistic communities, and to provide a free and +open framework in which fonts may be shared and improved in partnership +with others. + +The OFL allows the licensed fonts to be used, studied, modified and +redistributed freely as long as they are not sold by themselves. The +fonts, including any derivative works, can be bundled, embedded, +redistributed and/or sold with any software provided that any reserved +names are not used by derivative works. The fonts and derivatives, +however, cannot be released under any other type of license. The +requirement for fonts to remain under this license does not apply +to any document created using the fonts or their derivatives. + +DEFINITIONS +"Font Software" refers to the set of files released by the Copyright +Holder(s) under this license and clearly marked as such. This may +include source files, build scripts and documentation. + +"Reserved Font Name" refers to any names specified as such after the +copyright statement(s). + +"Original Version" refers to the collection of Font Software components as +distributed by the Copyright Holder(s). + +"Modified Version" refers to any derivative made by adding to, deleting, +or substituting — in part or in whole — any of the components of the +Original Version, by changing formats or by porting the Font Software to a +new environment. + +"Author" refers to any designer, engineer, programmer, technical +writer or other person who contributed to the Font Software. + +PERMISSION & CONDITIONS +Permission is hereby granted, free of charge, to any person obtaining +a copy of the Font Software, to use, study, copy, merge, embed, modify, +redistribute, and sell modified and unmodified copies of the Font +Software, subject to the following conditions: + +1) Neither the Font Software nor any of its individual components, +in Original or Modified Versions, may be sold by itself. + +2) Original or Modified Versions of the Font Software may be bundled, +redistributed and/or sold with any software, provided that each copy +contains the above copyright notice and this license. These can be +included either as stand-alone text files, human-readable headers or +in the appropriate machine-readable metadata fields within text or +binary files as long as those fields can be easily viewed by the user. + +3) No Modified Version of the Font Software may use the Reserved Font +Name(s) unless explicit written permission is granted by the corresponding +Copyright Holder. This restriction only applies to the primary font name as +presented to the users. + +4) The name(s) of the Copyright Holder(s) or the Author(s) of the Font +Software shall not be used to promote, endorse or advertise any +Modified Version, except to acknowledge the contribution(s) of the +Copyright Holder(s) and the Author(s) or with their explicit written +permission. + +5) The Font Software, modified or unmodified, in part or in whole, +must be distributed entirely under this license, and must not be +distributed under any other license. The requirement for fonts to +remain under this license does not apply to any document created +using the Font Software. + +TERMINATION +This license becomes null and void if any of the above conditions are +not met. + +DISCLAIMER +THE FONT SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT +OF COPYRIGHT, PATENT, TRADEMARK, OR OTHER RIGHT. IN NO EVENT SHALL THE +COPYRIGHT HOLDER BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +INCLUDING ANY GENERAL, SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL +DAMAGES, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF THE USE OR INABILITY TO USE THE FONT SOFTWARE OR FROM +OTHER DEALINGS IN THE FONT SOFTWARE. \ No newline at end of file diff --git a/example/fonts/cmunti.otf b/example/fonts/cmunti.otf new file mode 100644 index 0000000..56f0f37 Binary files /dev/null and b/example/fonts/cmunti.otf differ diff --git a/example/fonts/cmunui.otf b/example/fonts/cmunui.otf new file mode 100644 index 0000000..736ef60 Binary files /dev/null and b/example/fonts/cmunui.otf differ diff --git a/example/fonts/latinmodern-math.otf b/example/fonts/latinmodern-math.otf new file mode 100755 index 0000000..0e4642e Binary files /dev/null and b/example/fonts/latinmodern-math.otf differ diff --git a/main.cpp b/main.cpp new file mode 100644 index 0000000..bd2b2e2 --- /dev/null +++ b/main.cpp @@ -0,0 +1,1698 @@ +//***************************************************************** +// +// $file: main.cpp $ +// $author: Martin Fouilleul $ +// $date: 23/12/2017 $ +// +//***************************************************************** + +/* +------------------------------------------------------------------------------ +This software is available under 2 licenses -- choose whichever you prefer. +------------------------------------------------------------------------------ +ALTERNATIVE A - MIT License +Copyright (c) 2017 Martin Fouilleul +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +------------------------------------------------------------------------------ +ALTERNATIVE B - Public Domain (www.unlicense.org) +This is free and unencumbered software released into the public domain. +Anyone is free to copy, modify, publish, use, compile, sell, or distribute this +software, either in source code form or as a compiled binary, for any purpose, +commercial or non-commercial, and by any means. +In jurisdictions that recognize copyright laws, the author or authors of this +software dedicate any and all copyright interest in the software to the public +domain. We make this dedication for the benefit of the public at large and to +the detriment of our heirs and successors. We intend this dedication to be an +overt act of relinquishment in perpetuity of all present and future rights to +this software under copyright law. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +------------------------------------------------------------------------------ +*/ + +#include +#include + +#include"scanner.h" + +int section = 0; +int enterCount = 0; + +bool underline = false; +bool bold = false; +bool italic = false; +bool regular = false; +bool paragraph = false; +bool ldquo = false; + +FILE* input = 0; +FILE* output = 0; + + +int HighlightCode(FILE* in, FILE* out); + +void EvalMarkup(token* t); +void EvalTag(token* t); +void EvalEsc(token* t); + + +void EatSpaceUntilEndl(FILE* input, token* t) +{ + GetNextToken(input, t); + while(t->type == CHAR && (t->c == ' ' || t->c == '\t')) + { + GetNextToken(input, t); + } + if(t->type == ENDL) + { + GetNextToken(input, t); + } +} + +void EvalCharInCodeMode(token* t) +{ + //NOTE(martin): replaces special characters with html entities + + switch(t->c) + { + case '<': + fprintf(output, "<"); + break; + + case '>': + fprintf(output, ">"); + break; + + case '&': + fprintf(output, "&"); + break; + + default: + putc(t->c, output); + break; + } +} + +void EvalChar(token* t) +{ + //NOTE(martin): replaces special characters with html entities + + switch(t->c) + { + case '<': + fprintf(output, "<"); + break; + + case '>': + fprintf(output, ">"); + break; + + case '&': + fprintf(output, "&"); + break; + + case '"': + if(!ldquo) + { + fprintf(output, "“"); + ldquo = true; + } + else + { + fprintf(output, "”"); + ldquo = false; + } + break; + + case '\t': + fprintf(output, "\t"); + break; + + default: + putc(t->c, output); + break; + } +} + +void EvalSymbol(token* t) +{ + switch(t->symbol.code) + { + case MULTIPLY: + fprintf(output, "×"); + break; + case DIVIDE: + fprintf(output, "÷"); + break; + case SUBSTRACT: + fprintf(output, "−"); + break; + case STAR: + fprintf(output, "*"); + break; + case LOSTAR: + fprintf(output, "∗"); + break; + case CDOT: + fprintf(output, "·"); + break; + case LEQ: + fprintf(output, "≤"); + break; + case GEQ: + fprintf(output, "≥"); + break; + case INF: + fprintf(output, "∞"); + break; + case OTIMES: + fprintf(output, "⊗"); + break; + case LARR: + fprintf(output, "←"); + break; + case RARR: + fprintf(output, "→"); + break; + case EMDASH: + fprintf(output, "—"); + break; + case LINE: + fprintf(output, "
"); + break; + case THINSPACE: + fprintf(output, " "); + break; + case ALPHA: + fprintf(output, "α"); + break; + case BETA: + fprintf(output, "β"); + break; + case GAMMA: + fprintf(output, "γ"); + break; + case DELTA: + fprintf(output, "δ"); + break; + case EPSILON: + fprintf(output, "ε"); + break; + case ZETA: + fprintf(output, "ζ"); + break; + case ETA: + fprintf(output, "η"); + break; + case THETA: + fprintf(output, "θ"); + break; + case IOTA: + fprintf(output, "ι"); + break; + case KAPPA: + fprintf(output, "κ"); + break; + case LAMBDA: + fprintf(output, "λ"); + break; + case MU: + fprintf(output, "μ"); + break; + case NU: + fprintf(output, "ν"); + break; + case XI: + fprintf(output, "ξ"); + break; + case OMICRON: + fprintf(output, "ο"); + break; + case PI: + fprintf(output, "π"); + break; + case RHO: + fprintf(output, "ρ"); + break; + case SIGMA: + fprintf(output, "σ"); + break; + case TAU: + fprintf(output, "τ"); + break; + case UPSILON: + fprintf(output, "υ"); + break; + case PHI: + fprintf(output, "φ"); + break; + case CHI: + fprintf(output, "χ"); + break; + case PSI: + fprintf(output, "ψ"); + break; + case OMEGA: + fprintf(output, "ω"); + break; + case ALPHA_UPPER: + fprintf(output, "Α"); + break; + case BETA_UPPER: + fprintf(output, "Β"); + break; + case GAMMA_UPPER: + fprintf(output, "Γ"); + break; + case DELTA_UPPER: + fprintf(output, "Δ"); + break; + case EPSILON_UPPER: + fprintf(output, "Ε"); + break; + case ZETA_UPPER: + fprintf(output, "Ζ"); + break; + case ETA_UPPER: + fprintf(output, "Η"); + break; + case THETA_UPPER: + fprintf(output, "Θ"); + break; + case IOTA_UPPER: + fprintf(output, "Ι"); + break; + case KAPPA_UPPER: + fprintf(output, "Κ"); + break; + case LAMBDA_UPPER: + fprintf(output, "Λ"); + break; + case MU_UPPER: + fprintf(output, "Μ"); + break; + case NU_UPPER: + fprintf(output, "Ν"); + break; + case XI_UPPER: + fprintf(output, "Ξ"); + break; + case OMICRON_UPPER: + fprintf(output, "Ο"); + break; + case PI_UPPER: + fprintf(output, "Π"); + break; + case RHO_UPPER: + fprintf(output, "Ρ"); + break; + case SIGMA_UPPER: + fprintf(output, "Σ"); + break; + case TAU_UPPER: + fprintf(output, "Τ"); + break; + case UPSILON_UPPER: + fprintf(output, "Υ"); + break; + case PHI_UPPER: + fprintf(output, "Φ"); + break; + case CHI_UPPER: + fprintf(output, "Χ"); + break; + case PSI_UPPER: + fprintf(output, "Ψ"); + break; + case OMEGA_UPPER: + fprintf(output, "Ω"); + break; + } +} + +void EvalURL(token* t) +{ + fprintf(output, "", t->markup.text); + GetNextToken(input, t); + while(t->type != ENDF) + { + switch(t->type) + { + case ESC: + EvalEsc(t); + break; + case MARKUP: + if( t->markup.type == CODE_BLOCK_START + || t->markup.type == MATHS_BLOCK_START + || t->markup.type == HTML_BLOCK_START + || t->markup.type == LIST) + { + //error + return; + } + else if(t->markup.type == URL_STOP) + { + fprintf(output, "\n"); + return; + } + else + { + EvalMarkup(t); + } + break; + case HTML_TAG: + EvalTag(t); + break; + case ENDL: + fprintf(output, "
\n"); + break; + + case SYMBOL: + EvalSymbol(t); + break; + + case CHAR: default: + EvalChar(t); + break; + } + GetNextToken(input, t); + } + //error +} + +void EvalImage(token* t) +{ + fprintf(output, "", t->markup.text); +} + +void EvalMathsChar(token* t) +{ + //NOTE(martin): replaces special characters with html entities + + switch(t->c) + { + case '<': + fprintf(output, "<"); + break; + + case '>': + fprintf(output, ">"); + break; + + case '&': + fprintf(output, "&"); + break; + + case '\t': + fprintf(output, "  "); + break; + + case '+': case '/' : case '*': case '!': case '=': + fprintf(output, " %c ", t->c); + break; + + case '(': case ')' : case '[' : case ']': case ',': case ';': + fprintf(output, "%c", t->c); + break; + + case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': + fprintf(output, "%c", t->c); + break; + + default: + if(t->c >= 'A' && t->c <= 'Z' && !italic) + { + fprintf(output, "%c", t->c); + } + else + { + putc(t->c, output); + } + break; + } +} + +void EvalMathsSymbol(token* t) +{ + switch(t->symbol.code) + { + case MULTIPLY: + fprintf(output, " × "); + break; + case DIVIDE: + fprintf(output, " ÷ "); + break; + case SUBSTRACT: + fprintf(output, " − "); + break; + case STAR: + fprintf(output, " * "); + break; + case LOSTAR: + fprintf(output, " ∗ "); + break; + case CDOT: + fprintf(output, " · "); + break; + case LEQ: + fprintf(output, ""); + break; + case GEQ: + fprintf(output, ""); + break; + case INF: + fprintf(output, ""); + break; + case OTIMES: + fprintf(output, " ⊗ "); + break; + case LARR: + fprintf(output, ""); + break; + case RARR: + fprintf(output, ""); + break; + case EMDASH: + fprintf(output, "—"); + break; + case LINE: + fprintf(output, "
"); + break; + case THINSPACE: + fprintf(output, " "); + break; + case ALPHA: + fprintf(output, "α"); + break; + case BETA: + fprintf(output, "β"); + break; + case GAMMA: + fprintf(output, "γ"); + break; + case DELTA: + fprintf(output, "δ"); + break; + case EPSILON: + fprintf(output, "ε"); + break; + case ZETA: + fprintf(output, "ζ"); + break; + case ETA: + fprintf(output, "η"); + break; + case THETA: + fprintf(output, "θ"); + break; + case IOTA: + fprintf(output, "ι"); + break; + case KAPPA: + fprintf(output, "κ"); + break; + case LAMBDA: + fprintf(output, "λ"); + break; + case MU: + fprintf(output, "μ"); + break; + case NU: + fprintf(output, "ν"); + break; + case XI: + fprintf(output, "ξ"); + break; + case OMICRON: + fprintf(output, "ο"); + break; + case PI: + fprintf(output, "π"); + break; + case RHO: + fprintf(output, "ρ"); + break; + case SIGMA: + fprintf(output, "σ"); + break; + case TAU: + fprintf(output, "τ"); + break; + case UPSILON: + fprintf(output, "υ"); + break; + case PHI: + fprintf(output, "φ"); + break; + case CHI: + fprintf(output, "χ"); + break; + case PSI: + fprintf(output, "ψ"); + break; + case OMEGA: + fprintf(output, "ω"); + break; + + case ALPHA_UPPER: + fprintf(output, "Α"); + break; + case BETA_UPPER: + fprintf(output, "Β"); + break; + case GAMMA_UPPER: + fprintf(output, "Γ"); + break; + case DELTA_UPPER: + fprintf(output, "Δ"); + break; + case EPSILON_UPPER: + fprintf(output, "Ε"); + break; + case ZETA_UPPER: + fprintf(output, "Ζ"); + break; + case ETA_UPPER: + fprintf(output, "Η"); + break; + case THETA_UPPER: + fprintf(output, "Θ"); + break; + case IOTA_UPPER: + fprintf(output, "Ι"); + break; + case KAPPA_UPPER: + fprintf(output, "Κ"); + break; + case LAMBDA_UPPER: + fprintf(output, "Λ"); + break; + case MU_UPPER: + fprintf(output, "Μ"); + break; + case NU_UPPER: + fprintf(output, "Ν"); + break; + case XI_UPPER: + fprintf(output, "Ξ"); + break; + case OMICRON_UPPER: + fprintf(output, "Ο"); + break; + case PI_UPPER: + fprintf(output, "Π"); + break; + case RHO_UPPER: + fprintf(output, "Ρ"); + break; + case SIGMA_UPPER: + fprintf(output, "Σ"); + break; + case TAU_UPPER: + fprintf(output, "Τ"); + break; + case UPSILON_UPPER: + fprintf(output, "Υ"); + break; + case PHI_UPPER: + fprintf(output, "Φ"); + break; + case CHI_UPPER: + fprintf(output, "Χ"); + break; + case PSI_UPPER: + fprintf(output, "Ψ"); + break; + case OMEGA_UPPER: + fprintf(output, "Ω"); + break; + } +} + +void EvalEsc(token* t) +{ + switch(t->c) + { + case '\n': + fprintf(output, "\n"); + break; + + case 'n': + fprintf(output, "
\n"); + break; + + case 't': + fprintf(output, "  "); + break; + + case 's': case ' ': + fprintf(output, " "); + break; + case '&': + putc('&', output); + break; + default: + EvalChar(t); + break; + } +} + +void EvalSection(token* t) +{ + int sectionLevel = t->markup.value; + + fprintf(output, "", sectionLevel); + + GetNextToken(input, t); + while(t->type != ENDF) + { + switch(t->type) + { + case ESC: + EvalEsc(t); + break; + case MARKUP: + if( t->markup.type == CODE_BLOCK_START + || t->markup.type == MATHS_BLOCK_START + || t->markup.type == HTML_BLOCK_START + || t->markup.type == LIST) + { + //error + return; + } + else if(t->markup.type == SECTION) + { + if(t->markup.value != sectionLevel) + { + //error + } + fprintf(output, "\n", sectionLevel); + return; + } + else + { + EvalMarkup(t); + } + break; + case HTML_TAG: + EvalTag(t); + break; + case ENDL: + fprintf(output, "
\n"); + break; + + case SYMBOL: + EvalSymbol(t); + break; + + case CHAR: default: + EvalChar(t); + break; + } + GetNextToken(input, t); + } +} + +void EvalBold() +{ + if(!bold) + { + fprintf(output, ""); + bold = true; + } + else + { + fprintf(output, ""); + bold = false; + } +} +void EvalItalic() +{ + if(!italic) + { + fprintf(output, ""); + italic = true; + } + else + { + fprintf(output, ""); + italic = false; + } +} + +void EvalUnderline() +{ + if(!underline) + { + fprintf(output, ""); + underline = true; + } + else + { + fprintf(output, ""); + underline = false; + } +} + +void EvalRegular() +{ + if(!regular) + { + fprintf(output, ""); + regular = true; + } + else + { + fprintf(output, ""); + regular = false; + } +} + +void EvalCode(token* t, bool inlineCode) +{ + if(inlineCode || (t->markup.text && !strcmp(t->markup.text, "none"))) + { + fprintf(output, "<%s class=\"code\">", inlineCode ? "span" : "div"); + + GetNextToken(input, t); + + bool exit = false; + while(t->type != ENDF && !exit) + { + switch(t->type) + { + case MARKUP: + { + if(t->markup.type != CODE_INLINE_STOP) + { + //error; + } + exit = true; + } + break; + case CHAR: + { + EvalCharInCodeMode(t); + } + break; + default: + //error + exit = true; + break; + } + if(!exit) + { + GetNextToken(input, t); + } + } + fprintf(output, "", inlineCode ? "span" : "div"); + } + else + { + fprintf(output, "
"); + HighlightCode(input, output); + fprintf(output, "
\n"); + ResetScanMode(); + } +} + +void EvalMaths(bool inlineMaths) +{ + if(inlineMaths) + { + fprintf(output, ""); + } + else + { + fprintf(output, "
\n"); + } + + token t; + EatSpaceUntilEndl(input, &t); + + while(t.type != ENDF) + { + if(t.type == MARKUP) + { + if( (!inlineMaths && t.markup.type == MATHS_BLOCK_STOP) + || (inlineMaths && t.markup.type == MATHS_INLINE_STOP)) + { + //ok + break; + } + else if( t.markup.type == MATHS_BLOCK_START + || t.markup.type == MATHS_INLINE_START + || t.markup.type == CODE_BLOCK_START + || t.markup.type == CODE_INLINE_START + || t.markup.type == HTML_BLOCK_START + || t.markup.type == CODE_BLOCK_STOP + || t.markup.type == CODE_INLINE_STOP + || t.markup.type == LIST) + { + //error + break; + } + } + switch(t.type) + { + case ESC: + EvalEsc(&t); + break; + case MARKUP: + EvalMarkup(&t); + break; + case HTML_TAG: + EvalTag(&t); + break; + case ENDL: + fprintf(output, "
\n"); + break; + case SYMBOL: + EvalMathsSymbol(&t); + break; + case CHAR: default: + EvalMathsChar(&t); + break; + } + GetNextToken(input, &t); + } + + if(inlineMaths) + { + fprintf(output, ""); + } + else + { + fprintf(output, "
\n"); + } +} + +void EvalHTMLBlock() +{ + token t; + GetNextToken(input, &t); + + bool exit = false; + while(t.type != ENDF && !exit) + { + switch(t.type) + { + case MARKUP: + { + if(t.markup.type != CODE_BLOCK_STOP) + { + //error; + } + exit = true; + } + break; + case CHAR: + putc(t.c, output); + break; + default: + //error + exit = true; + break; + } + GetNextToken(input, &t); + } +} + +void Indent(int level) +{ + for(int i=0;i\n"); + + listLevel++; + + Indent(listLevel); + fprintf(output, "
  • "); + if(listLevel != destLevel) + { + fprintf(output, "\n"); + } + } + + token t; + GetNextToken(input, &t); + bool exit = false; + while(t.type != ENDF && !exit) + { + if(t.type == MARKUP && t.markup.type == LIST) + { + break; + } + switch(t.type) + { + case ESC: + EvalEsc(&t); + break; + case MARKUP: + EvalMarkup(&t); + break; + case HTML_TAG: + EvalTag(&t); + break; + case ENDL: + { + GetNextToken(input, &t); + if(t.type == ENDL) + { + exit = true; + } + else + { + if(t.type != MARKUP || t.markup.type != LIST) + { + fprintf(output, "
    \n"); + } + continue; + } + } + break; + case SYMBOL: + EvalSymbol(&t); + break; + case CHAR: default: + EvalChar(&t); + break; + } + if(!exit) + { + GetNextToken(input, &t); + } + } + + if(t.type == MARKUP && t.markup.type == LIST) + { + if(t.markup.value >= listLevel) + { + if(t.markup.value == listLevel) + { + fprintf(output, "
  • \n"); + Indent(listLevel); + fprintf(output, "
  • "); + } + EvalList(listLevel, t.markup.value); + return; + } + else + { + fprintf(output, "
  • \n"); + while(listLevel > t.markup.value ) + { + listLevel--; + Indent(listLevel); + fprintf(output, "\n"); + Indent(listLevel); + fprintf(output, "\n"); + } + if(listLevel) + { + Indent(listLevel); + fprintf(output, "
  • "); + EvalList(listLevel, t.markup.value); + } + else + { + fprintf(output, "\n"); + } + } + } + else + { + fprintf(output, "
  • \n"); + while(listLevel > 1) + { + listLevel--; + Indent(listLevel); + fprintf(output, "\n"); + Indent(listLevel); + fprintf(output, "\n"); + } + fprintf(output, "\n"); + } +} + +void EvalLongSuperscript(token* t) +{ + fprintf(output, ""); + GetNextToken(input, t); + while(t->type != ENDF) + { + switch(t->type) + { + case ESC: + EvalEsc(t); + break; + case MARKUP: + if( t->markup.type == CODE_BLOCK_START + || t->markup.type == MATHS_BLOCK_START + || t->markup.type == HTML_BLOCK_START + || t->markup.type == LIST + || t->markup.type == SECTION) + { + //error + return; + } + else + { + EvalMarkup(t); + } + break; + case HTML_TAG: + EvalTag(t); + break; + + case SYMBOL: + EvalMathsSymbol(t); + break; + case CHAR: + if(t->c == '}') + { + fprintf(output, ""); + return; + } + EvalMathsChar(t); + break; + + default: + //error + return; + } + GetNextToken(input, t); + } +} +void EvalSuperscript(token* t) +{ + GetNextToken(input, t); + if(t->type == CHAR && t->c == '{') + { + EvalLongSuperscript(t); + return; + } + else + { + fprintf(output, " "); + if(t->type == CHAR) + { + EvalMathsChar(t); + } + else if(t->type == SYMBOL) + { + EvalMathsSymbol(t); + } + else if(t->type == ESC) + { + EvalEsc(t); + } + else + { + //error + } + fprintf(output, ""); + } +} + +void EvalLongSubscript(token* t) +{ + fprintf(output, ""); + GetNextToken(input, t); + while(t->type != ENDF) + { + switch(t->type) + { + case ESC: + EvalEsc(t); + break; + case MARKUP: + if( t->markup.type == CODE_BLOCK_START + || t->markup.type == MATHS_BLOCK_START + || t->markup.type == HTML_BLOCK_START + || t->markup.type == LIST + || t->markup.type == SECTION) + { + //error + return; + } + else + { + EvalMarkup(t); + } + break; + case HTML_TAG: + EvalTag(t); + break; + + case SYMBOL: + EvalMathsSymbol(t); + case CHAR: + if(t->c == '}') + { + fprintf(output, ""); + return; + } + EvalMathsChar(t); + break; + + default: + //error + return; + } + GetNextToken(input, t); + } +} +void EvalSubscript(token* t) +{ + GetNextToken(input, t); + if(t->type == CHAR && t->c == '{') + { + EvalLongSubscript(t); + return; + } + else + { + fprintf(output, ""); + if(t->type == CHAR) + { + EvalMathsChar(t); + } + else if(t->type == SYMBOL) + { + EvalMathsSymbol(t); + } + else if(t->type == ESC) + { + EvalEsc(t); + } + else + { + //error + } + fprintf(output, ""); + } +} + +void EvalVector(token* t) +{ + GetNextToken(input, t); + + if(t->type == CHAR) + { + EvalMathsChar(t); + } + else if(t->type == SYMBOL) + { + EvalMathsSymbol(t); + } + else if(t->type == ESC) + { + EvalEsc(t); + } + else + { + //error + } + fprintf(output, ""); +} + +void EvalSqrt(token* t) +{ + fprintf(output, " "); + + GetNextToken(input, t); + if(t->type == CHAR && t->c == '{') + { + GetNextToken(input, t); + bool exit = false; + while(t->type != ENDF && !exit) + { + switch(t->type) + { + case ESC: + EvalEsc(t); + break; + case MARKUP: + if( t->markup.type == CODE_BLOCK_START + || t->markup.type == MATHS_BLOCK_START + || t->markup.type == HTML_BLOCK_START + || t->markup.type == LIST + || t->markup.type == SECTION) + { + //error + return; + } + else + { + EvalMarkup(t); + } + break; + case HTML_TAG: + EvalTag(t); + break; + + case SYMBOL: + EvalMathsSymbol(t); + case CHAR: + if(t->c == '}') + { + exit = true; + break; + } + EvalMathsChar(t); + break; + + default: + //error + return; + } + if(!exit) + { + GetNextToken(input, t); + } + } + + } + else + { + if(t->type == CHAR) + { + EvalMathsChar(t); + } + else if(t->type == SYMBOL) + { + EvalMathsSymbol(t); + } + else if(t->type == ESC) + { + EvalEsc(t); + } + else + { + //error + } + } + fprintf(output, " "); +} + +void GetMathArg(token* t) +{ + GetNextToken(input, t); + bool exit = false; + while(t->type != ENDF && !exit) + { + switch(t->type) + { + case ESC: + EvalEsc(t); + break; + case MARKUP: + if( t->markup.type == CODE_BLOCK_START + || t->markup.type == MATHS_BLOCK_START + || t->markup.type == HTML_BLOCK_START + || t->markup.type == LIST + || t->markup.type == SECTION) + { + //error + return; + } + else + { + EvalMarkup(t); + } + break; + case HTML_TAG: + EvalTag(t); + break; + + case SYMBOL: + EvalMathsSymbol(t); + case CHAR: + if(t->c == '}') + { + exit = true; + break; + } + EvalMathsChar(t); + break; + + default: + //error + return; + } + if(!exit) + { + GetNextToken(input, t); + } + } +} + +void EvalSumProd(token* t, bool sum) +{ + fprintf(output, ""); + + GetNextToken(input, t); + if(t->type == CHAR && t->c == '{') + { + GetNextToken(input, t); + + if(t->type == CHAR && t->c == '{') + { + fprintf(output, ""); + GetMathArg(t); + fprintf(output, ""); + } + else + { + //error + } + + fprintf(output, "%s", sum ? "∑" : "∏"); + + GetNextToken(input, t); + if(t->type == CHAR && t->c == '{') + { + fprintf(output, ""); + GetMathArg(t); + fprintf(output, ""); + } + else + { + //error + } + + GetNextToken(input, t); + if(t->type != CHAR || t->c == '}') + { + //error + } + } + else + { + //error + } + fprintf(output, " "); +} + + +void EvalFrac(token* t) +{ + fprintf(output, ""); + + GetNextToken(input, t); + if(t->type == CHAR && t->c == '{') + { + GetNextToken(input, t); + + if(t->type == CHAR && t->c == '{') + { + fprintf(output, ""); + GetMathArg(t); + fprintf(output, ""); + } + else + { + //error + } + GetNextToken(input, t); + if(t->type == CHAR && t->c == '{') + { + fprintf(output, ""); + GetMathArg(t); + fprintf(output, ""); + } + else + { + //error + } + + GetNextToken(input, t); + if(t->type != CHAR || t->c == '}') + { + //error + } + } + else + { + //error + } + fprintf(output, ""); +} + +void EvalMarkup(token* t) +{ + markup_tag* markup = &t->markup; + + switch(markup->type) + { + case MATHS_INLINE_START: + EvalMaths(true); + break; + case MATHS_BLOCK_START: + EvalMaths(false); + break; + case CODE_INLINE_START: + EvalCode(t, true); + break; + case CODE_BLOCK_START: + EvalCode(t, false); + break; + case HTML_BLOCK_START: + EvalHTMLBlock(); + break; + case URL_START: + EvalURL(t); + break; + case IMG: + EvalImage(t); + break; + case SECTION: + EvalSection(t); + break; + case UNDERLINE: + EvalUnderline(); + break; + case BOLD: + EvalBold(); + break; + case ITALIC: + EvalItalic(); + break; + case REGULAR: + EvalRegular(); + break; + case LIST: + EvalList(0, 1); + break; + case SUBSCRIPT: + EvalSubscript(t); + break; + case SUPERSCRIPT: + EvalSuperscript(t); + break; + case VECTOR: + EvalVector(t); + break; + case SQRT: + EvalSqrt(t); + break; + case FRACTION: + EvalFrac(t); + break; + case NSUM: + EvalSumProd(t, true); + break; + case NPRODUCT: + EvalSumProd(t, false); + break; + + default: + //error + break; + } +} + +void EvalTag(token* t) +{ + fprintf(output, "%s", t->html.text); +} + +void EvalParagraph(token* t) +{ + fprintf(output, "

    \n"); + + while(t->type != ENDF) + { + switch(t->type) + { + case ESC: + EvalEsc(t); + break; + case MARKUP: + if( t->markup.type == CODE_BLOCK_START + || t->markup.type == MATHS_BLOCK_START + || t->markup.type == HTML_BLOCK_START + || t->markup.type == SECTION + || t->markup.type == LIST) + { + fprintf(output, "\n

    \n"); + return; + } + EvalMarkup(t); + break; + case HTML_TAG: + EvalTag(t); + break; + case ENDL: + GetNextToken(input, t); + if(t->type == ENDL) + { + fprintf(output, "\n

    \n"); + GetNextToken(input, t); + return; + } + else + { + fprintf(output, "
    \n"); + } + continue; + + case SYMBOL: + EvalSymbol(t); + break; + case CHAR: default: + EvalChar(t); + break; + } + GetNextToken(input, t); + } + fprintf(output, "\n

    \n"); +} + +int main(int argc, char** argv) +{ + if(argc > 1) + { + input = fopen(argv[1], "r"); + } + if(argc > 2) + { + output = fopen(argv[2], "w"); + } + if(!input) + { + input = stdin; + } + if(!output) + { + output = stdout; + } + + token t; + GetNextToken(input, &t); + + while(t.type != ENDF) + { + switch(t.type) + { + case MARKUP: + if( t.markup.type == CODE_BLOCK_START + || t.markup.type == MATHS_BLOCK_START + || t.markup.type == HTML_BLOCK_START) + { + EvalMarkup(&t); + + //NOTE(martin): we eat the first newline after blocks + EatSpaceUntilEndl(input, &t); + continue; + } + else if(t.markup.type == SECTION) + { + EvalMarkup(&t); + + //NOTE(martin): we eat the first newline after blocks + EatSpaceUntilEndl(input, &t); + EatSpaceUntilEndl(input, &t); + continue; + } + else if(t.markup.type == LIST) + { + EvalMarkup(&t); + //NOTE(martin): we eat the first newline after blocks + break; + } + else + { + EvalParagraph(&t); + continue; + } + break; + case HTML_TAG: + EvalTag(&t); + //NOTE(martin): we eat the first newline after a block + EatSpaceUntilEndl(input, &t); + continue; + + break; + case ENDL: + fprintf(output, "
    \n"); + break; + case ESC: + if(t.c == '\n') + { + EvalEsc(&t); + break; + } + else + { + EvalParagraph(&t); + } + break; + case SYMBOL: + case CHAR: default: + EvalParagraph(&t); + continue; + break; + } + GetNextToken(input, &t); + } + + if(input != stdin) + { + fclose(input); + } + if(output != stdout) + { + fclose(output); + } + return(0); +} diff --git a/scanner.cpp b/scanner.cpp new file mode 100644 index 0000000..57ea8a4 --- /dev/null +++ b/scanner.cpp @@ -0,0 +1,858 @@ +//***************************************************************** +// +// $file: scanner.cpp $ +// $author: Martin Fouilleul $ +// $date: 24/12/2017 $ +// +//***************************************************************** + +/* +------------------------------------------------------------------------------ +This software is available under 2 licenses -- choose whichever you prefer. +------------------------------------------------------------------------------ +ALTERNATIVE A - MIT License +Copyright (c) 2017 Martin Fouilleul +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +------------------------------------------------------------------------------ +ALTERNATIVE B - Public Domain (www.unlicense.org) +This is free and unencumbered software released into the public domain. +Anyone is free to copy, modify, publish, use, compile, sell, or distribute this +software, either in source code form or as a compiled binary, for any purpose, +commercial or non-commercial, and by any means. +In jurisdictions that recognize copyright laws, the author or authors of this +software dedicate any and all copyright interest in the software to the public +domain. We make this dedication for the benefit of the public at large and to +the detriment of our heirs and successors. We intend this dedication to be an +overt act of relinquishment in perpetuity of all present and future rights to +this software under copyright law. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +------------------------------------------------------------------------------ +*/ + +#include +#include + +#include"scanner.h" + +static int mode = MODE_TEXT; + +const int MAX_LOOK_AHEAD = 15; + +bool LookAhead(FILE* input, const char* string) +{ + char buffer[MAX_LOOK_AHEAD+1]; + int len = strlen(string); + int count = 0; + + + while(count=0;i--) + { + ungetc(buffer[i], input); + } + return(false); + } + else + { + return(true); + } +} + +char Peek(FILE* input) +{ + char c = getc(input); + ungetc(c, input); + return(c); +} + +void CharacterToken(FILE* input, token* t) +{ + t->type = CHAR; + t->c = getc(input); +} + +void MarkupCharToken(markup_type type, token* t) +{ + t->type = MARKUP; + t->markup.type = type; + t->markup.value = 0; +} + +void SymbolToken(symbol_code code, token* t) +{ + t->type = SYMBOL; + t->symbol.code = code; +} + + +void EndlToken(FILE* input, token* t) +{ + getc(input); + t->type = ENDL; +} + +void AttributedMarkupToken(FILE* input, markup_type type, token* t) +{ + t->type = MARKUP; + t->markup.type = type; + int count = 0; + char c; + while(((c = getc(input)) != EOF) && count < MAX_MARKUP_TAG_LENGTH) + { + if(c==']') + { + t->markup.text[count] = '\0'; + return; + } + t->markup.text[count] = c; + count++; + } + //error + return; +} + +void BlockToken(FILE* input, token* t) +{ + t->type = MARKUP; + + if( LookAhead(input, "[c]")) + { + t->markup.type = CODE_INLINE_START; + mode = MODE_CODE_INLINE; + } + else if(LookAhead(input, "[m]")) + { + t->markup.type = MATHS_INLINE_START; + mode = MODE_MATHS_INLINE; + } + else if(LookAhead(input, "[/c]")) + { + t->markup.type = CODE_INLINE_STOP; + mode = MODE_TEXT; + } + else if(LookAhead(input, "[/m]")) + { + t->markup.type = MATHS_INLINE_STOP; + mode = MODE_TEXT; + } + else if(LookAhead(input, "[code=")) + { + AttributedMarkupToken(input, CODE_BLOCK_START, t); + mode = MODE_CODE_BLOCK; + } + else if(LookAhead(input, "[code]")) + { + t->markup.type = CODE_BLOCK_START; + mode = MODE_CODE_BLOCK; + } + else if(LookAhead(input, "[html]")) + { + t->markup.type = HTML_BLOCK_START; + mode = MODE_HTML; + } + else if(LookAhead(input, "[/code]")) + { + t->markup.type = CODE_BLOCK_STOP; + mode = MODE_TEXT; + } + else if(LookAhead(input, "[/html]")) + { + t->markup.type = HTML_BLOCK_STOP; + mode = MODE_TEXT; + } + else if(LookAhead(input, "[maths]")) + { + t->markup.type = MATHS_BLOCK_START; + mode = MODE_MATHS_BLOCK; + } + else if(LookAhead(input, "[/maths]")) + { + t->markup.type = MATHS_BLOCK_STOP; + mode = MODE_TEXT; + } + else if(LookAhead(input, "[url=")) + { + AttributedMarkupToken(input, URL_START, t); + } + else if(LookAhead(input, "[/url]")) + { + t->markup.type = URL_STOP; + } + else if(LookAhead(input, "[img=")) + { + AttributedMarkupToken(input, IMG, t); + } + else + { + //TODO(martin): should issue a warning ? + CharacterToken(input, t); + } +} + +void RepeatableMarkupToken(FILE* input, char m, markup_type type, token* t) +{ + char c; + int count = 0; + while((c = getc(input)) == m) + { + count++; + } + ungetc(c, input); + + t->type = MARKUP; + t->markup.type = type; + t->markup.value = count; +} + +void LessToken(FILE* input, token* t) +{ + t->html.text[0] = getc(input); + char c = Peek(input); + if( c == '!' + || (c >= 'A' && c <= 'Z') + || (c >= 'a' && c <= 'z') + || c == '/') + { + bool quotes = false; + bool dquotes = false; + int count = 1; + while(((c = getc(input)) != EOF) && (count < MAX_HTML_TAG_LENGTH)) + { + t->html.text[count] = c; + + if(c == '>' && !quotes && !dquotes) + { + count++; + break; + } + else if(c == '\'' && !dquotes) + { + quotes = !quotes; + } + else if(c == '\"' && !quotes) + { + dquotes = !dquotes; + } + count++; + } + if(count >= MAX_HTML_TAG_LENGTH || c == EOF) + { + //issue an error + } + else + { + t->type = HTML_TAG; + t->html.text[count] = '\0'; + } + } + else if(c == '-') + { + while(c == '-') + { + c = getc(input); + } + ungetc(c, input); + t->type = SYMBOL; + t->symbol.code = LARR; + + } + else + { + ungetc(t->html.text[0], input); + CharacterToken(input, t); + } +} + +void EscapeToken(FILE* input, token* t) +{ + getc(input); + t->type = SYMBOL; + + if(LookAhead(input, "star")) + { + t->symbol.code = STAR; + } + else if(LookAhead(input, "lowstar")) + { + t->symbol.code = LOSTAR; + } + else if(LookAhead(input, "minus")) + { + SymbolToken(SUBSTRACT, t); + } + else if(LookAhead(input, "div")) + { + t->symbol.code = DIVIDE; + } + else if(LookAhead(input, "cdot")) + { + t->symbol.code = CDOT; + } + else if(LookAhead(input, "times")) + { + t->symbol.code = MULTIPLY; + } + else if(LookAhead(input, "otimes")) + { + t->symbol.code = OTIMES; + } + else if(LookAhead(input, "leq")) + { + t->symbol.code = LEQ; + } + else if(LookAhead(input, "geq")) + { + t->symbol.code = GEQ; + } + else if(LookAhead(input, "inf")) + { + t->symbol.code = INF; + } + else if(LookAhead(input, "larr")) + { + t->symbol.code = LARR; + } + else if(LookAhead(input, "rarr")) + { + t->symbol.code = RARR; + } + else if(LookAhead(input, "vec")) + { + MarkupCharToken(VECTOR, t); + } + else if(LookAhead(input, "sqrt")) + { + MarkupCharToken(SQRT, t); + } + else if(LookAhead(input, "frac")) + { + MarkupCharToken(FRACTION, t); + } + else if(LookAhead(input, "nsum")) + { + MarkupCharToken(NSUM, t); + } + else if(LookAhead(input, "nprod")) + { + MarkupCharToken(NPRODUCT, t); + } + else if(LookAhead(input, "alpha")) + { + t->symbol.code = ALPHA; + } + else if(LookAhead(input, "beta")) + { + t->symbol.code = BETA; + } + else if(LookAhead(input, "gamma")) + { + t->symbol.code = GAMMA; + } + else if(LookAhead(input, "delta")) + { + t->symbol.code = DELTA; + } + else if(LookAhead(input, "epsilon")) + { + t->symbol.code = EPSILON; + } + else if(LookAhead(input, "zeta")) + { + t->symbol.code = ZETA; + } + else if(LookAhead(input, "eta")) + { + t->symbol.code = ETA; + } + else if(LookAhead(input, "theta")) + { + t->symbol.code = THETA; + } + else if(LookAhead(input, "iota")) + { + t->symbol.code = IOTA; + } + else if(LookAhead(input, "kappa")) + { + t->symbol.code = KAPPA; + } + else if(LookAhead(input, "lambda")) + { + t->symbol.code = LAMBDA; + } + else if(LookAhead(input, "mu")) + { + t->symbol.code = MU; + } + else if(LookAhead(input, "nu")) + { + t->symbol.code = NU; + } + else if(LookAhead(input, "xi")) + { + t->symbol.code = XI; + } + else if(LookAhead(input, "omicron")) + { + t->symbol.code = OMICRON; + } + else if(LookAhead(input, "pi")) + { + t->symbol.code = PI; + } + else if(LookAhead(input, "rho")) + { + t->symbol.code = RHO; + } + else if(LookAhead(input, "sigma")) + { + t->symbol.code = SIGMA; + } + else if(LookAhead(input, "tau")) + { + t->symbol.code = TAU; + } + else if(LookAhead(input, "upsilon")) + { + t->symbol.code = UPSILON; + } + else if(LookAhead(input, "phi")) + { + t->symbol.code = PHI; + } + else if(LookAhead(input, "chi")) + { + t->symbol.code = CHI; + } + else if(LookAhead(input, "psi")) + { + t->symbol.code = PSI; + } + else if(LookAhead(input, "omega")) + { + t->symbol.code = OMEGA; + } + else if(LookAhead(input, "Alpha")) + { + t->symbol.code = ALPHA_UPPER; + } + else if(LookAhead(input, "Beta")) + { + t->symbol.code = BETA_UPPER; + } + else if(LookAhead(input, "Gamma")) + { + t->symbol.code = GAMMA_UPPER; + } + else if(LookAhead(input, "Delta")) + { + t->symbol.code = DELTA_UPPER; + } + else if(LookAhead(input, "Epsilon")) + { + t->symbol.code = EPSILON_UPPER; + } + else if(LookAhead(input, "Zeta")) + { + t->symbol.code = ZETA_UPPER; + } + else if(LookAhead(input, "Eta")) + { + t->symbol.code = ETA_UPPER; + } + else if(LookAhead(input, "Theta")) + { + t->symbol.code = THETA_UPPER; + } + else if(LookAhead(input, "Iota")) + { + t->symbol.code = IOTA_UPPER; + } + else if(LookAhead(input, "Kappa")) + { + t->symbol.code = KAPPA_UPPER; + } + else if(LookAhead(input, "Lambda")) + { + t->symbol.code = LAMBDA_UPPER; + } + else if(LookAhead(input, "Mu")) + { + t->symbol.code = MU_UPPER; + } + else if(LookAhead(input, "Nu")) + { + t->symbol.code = NU_UPPER; + } + else if(LookAhead(input, "Xi")) + { + t->symbol.code = XI_UPPER; + } + else if(LookAhead(input, "Omicron")) + { + t->symbol.code = OMICRON_UPPER; + } + else if(LookAhead(input, "Pi")) + { + t->symbol.code = PI_UPPER; + } + else if(LookAhead(input, "Rho")) + { + t->symbol.code = RHO_UPPER; + } + else if(LookAhead(input, "Sigma")) + { + t->symbol.code = SIGMA_UPPER; + } + else if(LookAhead(input, "Tau")) + { + t->symbol.code = TAU_UPPER; + } + else if(LookAhead(input, "Upsilon")) + { + t->symbol.code = UPSILON_UPPER; + } + else if(LookAhead(input, "Phi")) + { + t->symbol.code = PHI_UPPER; + } + else if(LookAhead(input, "Chi")) + { + t->symbol.code = CHI_UPPER; + } + else if(LookAhead(input, "Psi")) + { + t->symbol.code = PSI_UPPER; + } + else if(LookAhead(input, "Omega")) + { + t->symbol.code = OMEGA_UPPER; + } + else if(LookAhead(input, "thin")) + { + t->symbol.code = THINSPACE; + } + else if(LookAhead(input, "b")) + { + MarkupCharToken(BOLD, t); + } + else if(LookAhead(input, "i")) + { + MarkupCharToken(ITALIC, t); + } + else if(LookAhead(input, "u")) + { + MarkupCharToken(UNDERLINE, t); + } + else if(LookAhead(input, "r")) + { + MarkupCharToken(REGULAR, t); + } + else + { + t->type = ESC; + t->c = getc(input); + } +} + +void GetNextTokenInHTMLMode(FILE* input, token* t) +{ + char c = Peek(input); + if(c==EOF) + { + t->type = ENDF; + return; + } + if(c == '[') + { + t->type = MARKUP; + if(LookAhead(input, "[/html]")) + { + t->markup.type = CODE_INLINE_STOP; + mode = MODE_TEXT; + } + else + { + CharacterToken(input, t); + } + } + else + { + CharacterToken(input, t); + } +} + +void GetNextTokenInCodeMode(FILE* input, token* t) +{ + char c = Peek(input); + if(c==EOF) + { + t->type = ENDF; + return; + } + if(c == '[') + { + t->type = MARKUP; + if(LookAhead(input, "[/c]")) + { + t->markup.type = CODE_INLINE_STOP; + mode = MODE_TEXT; + } + else if(LookAhead(input, "[/code]")) + { + t->markup.type = CODE_BLOCK_STOP; + mode = MODE_TEXT; + } + else + { + CharacterToken(input, t); + } + } + else + { + CharacterToken(input, t); + } +} + +void GetNextTokenInMathsMode(FILE* input, token* t) +{ + char c = Peek(input); + if(c==EOF) + { + t->type = ENDF; + return; + } + switch(c) + { + case ':': + getc(input); + MarkupCharToken(BOLD, t); + break; + case '*': + getc(input); + SymbolToken(MULTIPLY, t); + break; + case '-': + getc(input); + SymbolToken(SUBSTRACT, t); + break; + case '_': + getc(input); + MarkupCharToken(SUBSCRIPT, t); + break; + case '^': + getc(input); + MarkupCharToken(SUPERSCRIPT, t); + break; + case '\n': + EndlToken(input, t); + break; + case '[': + BlockToken(input, t); + break; + case '<': + getc(input); + c = Peek(input); + if(c == '=') + { + getc(input); + SymbolToken(LEQ, t); + } + else + { + ungetc('<', input); + LessToken(input, t); + } + break; + case '>': + getc(input); + c = Peek(input); + if(c == '=') + { + getc(input); + SymbolToken(GEQ, t); + } + else + { + ungetc('>', input); + CharacterToken(input, t); + } + break; + case '\\': + EscapeToken(input, t); + break; + default: + CharacterToken(input, t); + break; + } +} + +void RepeatableHyphen(FILE* input, token* t) +{ + char c; + int count = 1; + while((c = getc(input)) == '-') + { + count++; + } + + t->type = SYMBOL; + if(c == '>') + { + t->symbol.code = RARR; + } + else + { + if(count <= 3) + { + t->symbol.code = EMDASH; + } + else + { + t->symbol.code = LINE; + } + ungetc(c, input); + } +} + +void LookAheadForList(FILE* input, token* t) +{ + char buffer[8]; + bool plain = false; + int count = 0; + while(count < 8) + { + char c = getc(input); + buffer[count] = c; + count++; + + if(c == '-') + { + if(LookAhead(input, " ")) + { + t->type = MARKUP; + t->markup.type = LIST; + t->markup.value = count; + return; + } + else if(count==1 && LookAhead(input, "-")) + { + RepeatableHyphen(input, t); + return; + } + break; + } + else if(c != '\t') + { + break; + } + } + + for(int i=count-1;i>0;i--) + { + ungetc(buffer[i], input); + } + t->type = CHAR; + t->c = buffer[0]; +} + +void GetNextTokenInTextMode(FILE* input, token* t) +{ + char c = Peek(input); + if(c==EOF) + { + t->type = ENDF; + return; + } + switch(c) + { + case '=': + RepeatableMarkupToken(input, '=', SECTION, t); + break; + case '\t': case '-': + LookAheadForList(input, t); + break; + case '_': + getc(input); + MarkupCharToken(UNDERLINE, t); + break; + case '*': + getc(input); + MarkupCharToken(BOLD, t); + break; + case '/': + getc(input); + MarkupCharToken(ITALIC, t); + break; + case '\n': + EndlToken(input, t); + break; + case '[': + BlockToken(input, t); + break; + case '<': + LessToken(input, t); + break; + case '\\': + return EscapeToken(input, t); + default: + CharacterToken(input, t); + break; + } +} + +void GetNextToken(FILE* input, token* t) +{ + //NOTE(martin): finds the next token from input, effectively consumming characters + // if no token is found, those 'lookahead' characters are pushed back to the stream + + switch(mode) + { + case MODE_HTML: + return(GetNextTokenInHTMLMode(input, t)); + + case MODE_CODE_INLINE: case MODE_CODE_BLOCK: + return(GetNextTokenInCodeMode(input, t)); + + case MODE_MATHS_INLINE: case MODE_MATHS_BLOCK: + return(GetNextTokenInMathsMode(input, t)); + + case MODE_TEXT: default: + GetNextTokenInTextMode(input, t); + } +} + +void ResetScanMode() +{ + mode = MODE_TEXT; +} diff --git a/scanner.h b/scanner.h new file mode 100644 index 0000000..7919626 --- /dev/null +++ b/scanner.h @@ -0,0 +1,190 @@ +//***************************************************************** +// +// $file: scanner.h $ +// $author: Martin Fouilleul $ +// $date: 24/12/2017 $ +// +//***************************************************************** + +/* +------------------------------------------------------------------------------ +This software is available under 2 licenses -- choose whichever you prefer. +------------------------------------------------------------------------------ +ALTERNATIVE A - MIT License +Copyright (c) 2017 Martin Fouilleul +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +------------------------------------------------------------------------------ +ALTERNATIVE B - Public Domain (www.unlicense.org) +This is free and unencumbered software released into the public domain. +Anyone is free to copy, modify, publish, use, compile, sell, or distribute this +software, either in source code form or as a compiled binary, for any purpose, +commercial or non-commercial, and by any means. +In jurisdictions that recognize copyright laws, the author or authors of this +software dedicate any and all copyright interest in the software to the public +domain. We make this dedication for the benefit of the public at large and to +the detriment of our heirs and successors. We intend this dedication to be an +overt act of relinquishment in perpetuity of all present and future rights to +this software under copyright law. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +------------------------------------------------------------------------------ +*/ + + +#ifndef __SCANNER_H_ +#define __SCANNER_H_ + +enum{MODE_TEXT, MODE_MATHS_INLINE, MODE_MATHS_BLOCK, MODE_CODE_INLINE, MODE_CODE_BLOCK, MODE_HTML}; + +const int MAX_HTML_TAG_LENGTH = 1023; +const int MAX_MARKUP_TAG_LENGTH = 1023; + +struct html_tag +{ + char text[MAX_HTML_TAG_LENGTH+1]; +}; + +typedef enum {MATHS_INLINE_START, + MATHS_INLINE_STOP, + MATHS_BLOCK_START, + MATHS_BLOCK_STOP, + CODE_INLINE_START, + CODE_INLINE_STOP, + CODE_BLOCK_START, + CODE_BLOCK_STOP, + HTML_BLOCK_START, + HTML_BLOCK_STOP, + URL_START, + URL_STOP, + IMG, + SECTION, + UNDERLINE, + BOLD, + ITALIC, + REGULAR, + LIST, + SUBSCRIPT, + SUPERSCRIPT, + VECTOR, + SQRT, + FRACTION, + NSUM, + NPRODUCT } markup_type; + +struct markup_tag +{ + markup_type type; + union + { + int value; + char text[MAX_MARKUP_TAG_LENGTH+1]; + }; +}; + +typedef enum { MULTIPLY, + DIVIDE, + SUBSTRACT, + STAR, + LOSTAR, + CDOT, + OTIMES, + LEQ, + GEQ, + INF, + LARR, + RARR, + EMDASH, + LINE, + THINSPACE, + ALPHA, + BETA, + GAMMA, + DELTA, + EPSILON, + ZETA, + ETA, + THETA, + IOTA, + KAPPA, + LAMBDA, + MU, + NU, + XI, + OMICRON, + PI, + RHO, + SIGMA, + TAU, + UPSILON, + PHI, + CHI, + PSI, + OMEGA, + ALPHA_UPPER, + BETA_UPPER, + GAMMA_UPPER, + DELTA_UPPER, + EPSILON_UPPER, + ZETA_UPPER, + ETA_UPPER, + THETA_UPPER, + IOTA_UPPER, + KAPPA_UPPER, + LAMBDA_UPPER, + MU_UPPER, + NU_UPPER, + XI_UPPER, + OMICRON_UPPER, + PI_UPPER, + RHO_UPPER, + SIGMA_UPPER, + TAU_UPPER, + UPSILON_UPPER, + PHI_UPPER, + CHI_UPPER, + PSI_UPPER, + OMEGA_UPPER } symbol_code; + +struct markup_symbol +{ + symbol_code code; +}; + +typedef enum { CHAR, SYMBOL, ESC, ENDL, ENDF, MARKUP, HTML_TAG} token_type; + +struct token +{ + token_type type; + int line; + int col; + union + { + char c; + html_tag html; + markup_tag markup; + markup_symbol symbol; + }; +}; + +void GetNextToken(FILE* input, token* t); +void ResetScanMode(); + +#endif //__SCANNER_H_