/* parse.c -- LRL Bootstrap Parser Copyright © 2020-2021 Samuel Lidén Borell Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "bootstrap.h" #include "hash.h" #include #include static struct Type *parse_type(struct Type *type); static struct Expr *parse_expr(struct Block *block); static int parse_optional_block(struct Block **blockptr, struct Block *base, enum TokenType blocktype); static struct Block *parse_ctlblock(struct Block *block, struct Block *base); static struct Block *parse_block(struct Block *block, struct Block *base); static struct Block *parse_block_internal(struct Block *block); static struct Block *parse_case_block(struct Block *block); static int require_block_start(void); /* We don't care about multi-threading support etc. in the bootstrap parser, so we can use global variables */ static FILE *file; static struct Token tok; static int reuse_token = 0; static struct LoopInfo *current_loop; static unsigned long current_gotoid; static struct FunctionState current_func; /* special characters */ #define Er -1 #define SP -2 /* spacing character */ #define NL -3 /* newline */ #define CN -4 /* comment */ /* standard characters. some are not used */ #define XM -5 /* exclamation mark */ #define DQ String #define DL -1 /* dollar sign */ #define PC -1 /* percent sign */ #define AM -1 /* ampersand */ #define SQ -1 /* single quote */ #define LP LParen #define RP RParen #define AS Asterisk #define PL Plus #define CA Comma #define MI Minus #define DT Dot #define SL Slash #define DG Number #define CL Colon #define SC Semicolon #define LT Less #define EQ Assign #define GT Greater #define QU Question #define AT -1 /* at */ #define AZ Identifier #define LS LSquare #define BS -1 /* backslash */ #define RS RSquare #define CF -1 /* circumflex */ #define BT -1 /* backtick */ #define LC LCurly #define PI -1 /* pipe */ #define RC RCurly #define TI -1 /* tilde */ static signed char char2type[128] = { /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */ /* 00-0F */ Er,Er,Er,Er,Er,Er,Er,Er,Er,SP,NL,Er,Er,SP,Er,Er, /* 10-1F */ Er,Er,Er,Er,Er,Er,Er,Er,Er,Er,Er,Er,Er,Er,Er,Er, /* 20-2F */ SP,XM,DQ,CN,DL,PC,AM,SQ,LP,RP,AS,PL,CA,MI,DT,SL, /* 30-3F */ DG,DG,DG,DG,DG,DG,DG,DG,DG,DG,CL,SC,LT,EQ,GT,QU, /* 40-4F */ AT,AZ,AZ,AZ,AZ,AZ,AZ,AZ,AZ,AZ,AZ,AZ,AZ,AZ,AZ,AZ, /* 50-5F */ AZ,AZ,AZ,AZ,AZ,AZ,AZ,AZ,AZ,AZ,AZ,LS,BS,RS,CF,AZ, /* 60-6F */ BT,AZ,AZ,AZ,AZ,AZ,AZ,AZ,AZ,AZ,AZ,AZ,AZ,AZ,AZ,AZ, /* 70-7F */ AZ,AZ,AZ,AZ,AZ,AZ,AZ,AZ,AZ,AZ,AZ,LC,PI,RC,TI,Er, }; /*#undef XM*/ #undef DQ #undef DL #undef PC #undef AM #undef SQ #undef LP #undef RP #undef AS #undef PL #undef CA #undef MI #undef DT #undef SL #undef DG #undef CL #undef SC #undef LT #undef EQ #undef GT #undef QU #undef AT #undef AZ #undef LS #undef BK #undef RS #undef CF #undef BT #undef LC #undef PI #undef RC #undef TL #define BUFFER \ char *s = tok.data; \ const char *endp = tok.data + MAX_TOKLEN -1; \ tok.size = 0; #define BUFFER_APPEND \ if (s == endp) error_tok(&tok, "Too long"); \ else { *(s++) = (char)ch; tok.size++; } #define BUFFER_END \ *s = '\0'; #define IS_DIGIT(ch) ((ch) >= '0' && (ch) <= '9') #define IS_HEX(ch) \ (((ch) >= '0' && (ch) <= '9') || \ ((ch) >= 'a' && (ch) <= 'f') || \ ((ch) >= 'A' && (ch) <= 'F')) static int parse_token(void) { if (reuse_token) { reuse_token = 0; return 1; } /* Get first token character */ for (;;) { int ch = getc(file); if (ch == EOF) return 0; tok.column++; if (ch > 126) { error_tok(&tok, "Invalid character"); continue; } tokstart: tok.type = (enum TokenType)char2type[ch]; switch ((int)tok.type) { case Er: error_tok(&tok, "Invalid character"); break; case SP: break; /* do nothing */ case NL: tok.column = 0; tok.line++; break; case CN: do { ch = getc(file); if (ch == EOF) return 0; } while (ch != 10); goto tokstart; case String: { BUFFER for (;;) { ch = getc(file); if (ch == EOF) goto unexpected_eof; tok.column++; if (ch == '"') break; if (ch == '\\') { ch = getc(file); tok.column++; switch (ch) { case EOF: goto unexpected_eof; case '\\': case '\"': break; case 'n': ch = '\n'; break; case 'r': ch = '\r'; break; case '0': ch = '\0'; break; case 'x': { /* Hex escape */ unsigned char res = 0; int i; for (i = 0; i < 2; i++) { ch = getc(file); if (ch == EOF) goto unexpected_eof; tok.column++; if (IS_DIGIT(ch)) res |= (unsigned char)ch - '0'; else if (IS_HEX(ch)) res |= ((unsigned char)ch|0x20) - 'a' + 10; else { error_tok(&tok, "Invalid digit"); break; } res <<= 4; } ch = res; break; } /* TODO unicode escape (converted to UTF-8 byte sequence) */ default: error_tok(&tok, "Invalid escape"); break; } } BUFFER_APPEND } BUFFER_END break; } case Number: { BUFFER if (ch == '0') { ch = getc(file); if (ch == EOF) goto number_done; tok.column++; if (ch == 'x') { /* Hexadecimal */ ch = getc(file); if (ch == EOF) goto unexpected_eof; tok.column++; if (!IS_HEX(ch)) { error_tok(&tok, "Expected digit"); goto number_done_ungetc; } do { ch |= 0x20; /* to lowercase */ BUFFER_APPEND do { ch = getc(file); if (ch == EOF) goto number_done; tok.column++; } while (ch == '_'); } while (IS_HEX(ch)); goto number_done_ungetc; } else if (ch == 'b') { /* Binary */ /* TODO */ } } /* Decimal */ do { BUFFER_APPEND do { ch = getc(file); if (ch == EOF) goto number_done; tok.column++; } while (ch == '_'); /* TODO Floating point: Fractional part and exponent */ } while (IS_DIGIT(ch)); number_done_ungetc: if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) { error_tok(&tok, "Letter in number"); } if (ch != EOF) { ungetc(ch, file); tok.column--; } number_done: BUFFER_END tok.type = Number; return 1; } case Identifier: { unsigned int hash = 0; BUFFER do { BUFFER_APPEND hash = HASH(hash, ch); ch = getc(file); tok.column++; if (ch == EOF) break; if (ch > 126) { error_tok(&tok, "Invalid character"); break; } tok.type = (enum TokenType)char2type[ch]; } while (tok.type == Identifier || tok.type == Number); BUFFER_END if (ch == ':') { tok.type = GotoTarget; } else { tok.type = Identifier; if (ch != EOF) { ungetc(ch, file); tok.column--; } } if (tok.size <= MAX_KEYWORD_LEN) { switch (hash) { case HASH4('d','a','t','a'): if (!strcmp(tok.data, "data")) tok.type = KW_Data; break; case HASH4('f','u','n','c'): if (!strcmp(tok.data, "func")) tok.type = KW_Func; break; case HASH4('t','y','p','e'): if (!strcmp(tok.data, "type")) tok.type = KW_Type; break; case HASH3('r','e','f'): if (!strcmp(tok.data, "ref")) tok.type = KW_Ref; break; case HASH4('a','d','d','r'): if (!strcmp(tok.data, "addr")) tok.type = KW_Addr; break; case HASH9('m','e','r','g','e','d','r','e','f'): if (!strcmp(tok.data, "mergedref")) tok.type = KW_MergedRef; break; case HASH3('v','a','r'): if (!strcmp(tok.data, "var")) tok.type = KW_Var; break; case HASH9('w','r','i','t','e','o','n','l','y'): if (!strcmp(tok.data, "writeonly")) tok.type = KW_WriteOnly; break; case HASH3('o','w','n'): if (!strcmp(tok.data, "own")) tok.type = KW_Own; break; /* Can we replace with with only alias/shared? */ /*case HASH6('r','a','l','i','a','s'): if (!strcmp(tok.data, "ralias")) tok.type = KW_RAlias; break; case HASH6('w','a','l','i','a','s'): if (!strcmp(tok.data, "walias")) tok.type = KW_WAlias; break; case HASH7('r','w','a','l','i','a','s'): if (!strcmp(tok.data, "rwalias")) tok.type = KW_RWAlias; break; case HASH7('r','s','h','a','r','e','d'): if (!strcmp(tok.data, "rshared")) tok.type = KW_RShared; break; case HASH7('w','s','h','a','r','e','d'): if (!strcmp(tok.data, "wshared")) tok.type = KW_WShared; break; case HASH8('r','w','s','h','a','r','e','d'): if (!strcmp(tok.data, "rwshared")) tok.type = KW_RWShared; break;*/ case HASH7('a','l','i','a','s','e','d'): if (!strcmp(tok.data, "aliased")) tok.type = KW_Aliased; break; case HASH8('t','h','r','e','a','d','e','d'): if (!strcmp(tok.data, "threaded")) tok.type = KW_Threaded; break; case HASH8('n','o','r','e','t','u','r','n'): if (!strcmp(tok.data, "noreturn")) tok.type = KW_NoReturn; break; case HASH6('s','t','r','u','c','t'): if (!strcmp(tok.data, "struct")) tok.type = KW_Struct; break; case HASH4('e','n','u','m'): if (!strcmp(tok.data, "enum")) tok.type = KW_Enum; break; case HASH4('n','o','n','e'): if (!strcmp(tok.data, "none")) tok.type = KW_None; break; case HASH5('u','n','d','e','f'): if (!strcmp(tok.data, "undef")) tok.type = KW_Undef; break; case HASH2('i','f'): if (!strcmp(tok.data, "if")) tok.type = KW_If; break; case HASH4('e','l','s','e'): if (!strcmp(tok.data, "else")) tok.type = KW_Else; break; case HASH5('w','h','i','l','e'): if (!strcmp(tok.data, "while")) tok.type = KW_While; break; case HASH2('d','o'): if (!strcmp(tok.data, "do")) tok.type = KW_Do; break; case HASH3('f','o','r'): if (!strcmp(tok.data, "for")) tok.type = KW_For; break; case HASH2('i','n'): if (!strcmp(tok.data, "in")) tok.type = KW_In; break; case HASH7('l','o','o','p','e','n','d'): if (!strcmp(tok.data, "loopend")) tok.type = KW_LoopEnd; break; case HASH9('l','o','o','p','e','m','p','t','y'): if (!strcmp(tok.data, "loopempty")) tok.type = KW_LoopEmpty; break; case HASH6('s','w','i','t','c','h'): if (!strcmp(tok.data, "switch")) tok.type = KW_Switch; break; case HASH4('c','a','s','e'): if (!strcmp(tok.data, "case")) tok.type = KW_Case; break; case HASH4('w','i','t','h'): if (!strcmp(tok.data, "with")) tok.type = KW_With; break; case HASH7('d','e','f','a','u','l','t'): if (!strcmp(tok.data, "default")) tok.type = KW_Default; break; case HASH6('a','s','s','e','r','t'): if (!strcmp(tok.data, "assert")) tok.type = KW_Assert; break; case HASH5('b','r','e','a','k'): if (!strcmp(tok.data, "break")) tok.type = KW_Break; break; case HASH8('c','o','n','t','i','n','u','e'): if (!strcmp(tok.data, "continue")) tok.type = KW_Continue; break; case HASH4('g','o','t','o'): if (!strcmp(tok.data, "goto")) tok.type = KW_Goto; break; case HASH6('r','e','t','u','r','n'): if (!strcmp(tok.data, "return")) tok.type = KW_Return; break; case HASH4('v','o','i','d'): if (!strcmp(tok.data, "void")) tok.type = KW_Void; break; case HASH4('b','o','o','l'): if (!strcmp(tok.data, "bool")) tok.type = KW_Bool; break; case HASH5('u','s','i','z','e'): if (!strcmp(tok.data, "usize")) tok.type = KW_USize; break; case HASH5('s','s','i','z','e'): if (!strcmp(tok.data, "ssize")) tok.type = KW_SSize; break; /*case HASH5('u','o','f','f','s'): if (!strcmp(tok.data, "uoffs")) tok.type = KW_UOffs; break;*/ case HASH8('f','i','l','e','o','f','f','s'): if (!strcmp(tok.data, "fileoffs")) tok.type = KW_FileOffs; break; case HASH4('c','h','a','r'): if (!strcmp(tok.data, "char")) tok.type = KW_Char; break; case HASH4('b','y','t','e'): if (!strcmp(tok.data, "byte")) tok.type = KW_Byte; break; case HASH5('s','h','o','r','t'): if (!strcmp(tok.data, "short")) tok.type = KW_Short; break; case HASH6('u','s','h','o','r','t'): if (!strcmp(tok.data, "ushort")) tok.type = KW_UShort; break; case HASH7('w','u','s','h','o','r','t'): if (!strcmp(tok.data, "wushort")) tok.type = KW_WUShort; break; case HASH3('i','n','t'): if (!strcmp(tok.data, "int")) tok.type = KW_Int; break; case HASH4('u','i','n','t'): if (!strcmp(tok.data, "uint")) tok.type = KW_UInt; break; case HASH5('w','u','i','n','t'): if (!strcmp(tok.data, "wuint")) tok.type = KW_WUInt; break; case HASH4('l','o','n','g'): if (!strcmp(tok.data, "long")) tok.type = KW_Long; break; case HASH5('u','l','o','n','g'): if (!strcmp(tok.data, "ulong")) tok.type = KW_ULong; break; case HASH6('w','u','l','o','n','g'): if (!strcmp(tok.data, "wulong")) tok.type = KW_WULong; break; case HASH5('i','n','t','3','2'): if (!strcmp(tok.data, "int32")) tok.type = KW_Int32; break; case HASH6('u','i','n','t','3','2'): if (!strcmp(tok.data, "uint32")) tok.type = KW_UInt32; break; case HASH7('w','u','i','n','t','3','2'): if (!strcmp(tok.data, "wuint32")) tok.type = KW_WUInt32; break; case HASH3('n','o','t'): if (!strcmp(tok.data, "not")) tok.type = KW_Not; break; case HASH3('a','n','d'): if (!strcmp(tok.data, "and")) tok.type = KW_And; break; case HASH2('o','r'): if (!strcmp(tok.data, "or")) tok.type = KW_Or; break; } } tok.hash = hash; return 1; } case Plus: case Asterisk: case Minus: case Slash: case Less: case Assign: case Greater: /* Check for +=, *=, >=, ==, etc. */ ch = getc(file); if (ch == '=') { tok.column++; tok.type += ASSIGN_2CHAR_DELTA; } else if (ch != EOF) { ungetc(ch, file); } return 1; case XM: /* Exclamation mark. Check for != */ ch = getc(file); if (ch == '!') { tok.column++; tok.type = NotEqual; return 1; } else if (ch != EOF) { ungetc(ch, file); } error_tok(&tok, "Invalid character"); break; case LParen: case RParen: case Comma: case Dot: case Colon: case Semicolon: case Question: case LSquare: case RSquare: case LCurly: case RCurly: return 1; /* Values that cannot appear here */ /*case PlusAssign: case MinusAssign: case MultiplyAssign: case DivideAssign: case LessEqual: case Equal: case GreaterEqual: case NotEqual:*/ default: internal_error(1); } /*goto tokstart;*/ } unexpected_eof: error_tok(&tok, "Unexpected end of file"); return -1; } static int parse_token_noeof() { int status = parse_token(); if (status > 0) return 1; if (!status) error_tok(&tok, "Unexpected end of file"); return 0; } static int parse_expected(enum TokenType toktype) { if (!parse_token_noeof()) return 0; if (tok.type != toktype) goto error_unexpected; return 1; error_unexpected: error_tok(&tok, "Unexpected token"); return 0; } static struct Ident *parse_ident(unsigned deftype) { if (!parse_expected(Identifier)) return NULL; return ident_insert(deftype, &tok); } static struct Ident *parse_decl(unsigned deftype) { struct Type typetmp; struct Type *type = parse_type(&typetmp); struct Ident *ident; if (!type) return NULL; ident = parse_ident(deftype); if (!ident) return NULL; memcpy(&ident->decl.type, &typetmp, sizeof(struct Type)); /* TODO type states */ return ident; } /* Rounds up to a power of 2. Works with numbers up to 2**16 */ static unsigned int round_up_pow2(unsigned int n) { --n; n |= n >> 1; n |= n >> 2; n |= n >> 4; n |= n >> 8; ++n; return n; } /** Parses members/parameters in a struct or function */ static struct FieldOrParamList *parse_members(struct FieldOrParamList *list, enum TokenType end_token) { struct FieldOrParam *member, **nextptr; size_t hashmask; if (!parse_token_noeof()) return NULL; reuse_token = 1; if (!list) list = aalloc(sizeof(struct FieldOrParamList), sizeof(void*)); /* First, we parse (and count) the members */ list->count = 0; list->first = NULL; nextptr = &list->first; while (tok.type != end_token) { member = aalloc(sizeof(*member), sizeof(void*)); member->list_next = NULL; member->bucket_next = NULL; if (!parse_type(&member->type)) return NULL; if (!parse_expected(Identifier)) return NULL; member->hash = tok.hash; member->name = aalloc(tok.size+1, sizeof(char)); memcpy(member->name, tok.data, tok.size+1); if (++list->count >= 4096) { error_tok(&tok, end_token == RCurly ? "Too many struct fields" : "Too many function parameters"); return NULL; } *nextptr = member; nextptr = &member->list_next; if (!parse_token_noeof()) return NULL; if (end_token == RCurly) { /* Parsing a struct, using newlines for separation */ } else { /* Parsing a function parameter list, using commas for separation */ if (tok.type == Comma) continue; if (tok.type != end_token) goto error_unexpected; } reuse_token = 1; } if (!parse_expected(end_token)) return NULL; /* Second, we build a hashtable, and also check for duplicates */ list->num_buckets = round_up_pow2(list->count); list->buckets = aalloc(sizeof(void*)*list->num_buckets, sizeof(void*)); memset(list->buckets, 0, sizeof(void*)*list->num_buckets); hashmask = list->num_buckets-1; for (member = list->first; member; member = member->list_next) { size_t bucket = member->hash & hashmask; struct FieldOrParam *other = list->buckets[bucket]; for (; other; other = other->bucket_next) { if (!strcmp(other->name, member->name)) { error_tok(&tok, end_token == RCurly ? "Duplicate field names" : "Duplicate parameter names"); return NULL; } } member->bucket_next = list->buckets[bucket]; list->buckets[bucket] = member; } return list; error_unexpected: error_tok(&tok, "Unexpected token"); return 0; } static struct Type *parse_functype(struct Type *type) { struct FunctionType *functype; if (!parse_expected(LParen)) return NULL; functype = aalloc(sizeof(*functype), sizeof(void*)); if (!parse_members(&functype->params, RParen)) return NULL; if (!parse_type(&functype->returntype)) return NULL; if (!type) type = aalloc(sizeof(*type), sizeof(void*)); type->type = T_FUNC; type->kind.func = functype; return type; } /** * Parses a type. The caller is responsible for setting the deftype field. * * @param outertype Pre-allocated type to use, or NULL to allocate * @return Parsed type, or NULL on error */ static struct Type *parse_type(struct Type *outertype) { unsigned qualifiers = 0; struct Type *type; if (!outertype) outertype = aalloc(sizeof(*outertype), sizeof(void*)); type = outertype; for (;;) { struct Type *innertype; if (!parse_token_noeof()) return NULL; switch (tok.type) { case KW_Ref: /* Pointer */ type->type = T_REF; type->quals = qualifiers; innertype = aalloc(sizeof(*innertype), sizeof(void*)); innertype->deftype = D_NESTEDTYPE; type->kind.nested = innertype; type = innertype; break; case LSquare: { /* Array */ type->type = T_ARRAY; type->quals = qualifiers; if (!parse_token_noeof()) return NULL; if (tok.type == RSquare) { type->misc = M_UNKNOWN_ARR_LENGTH; innertype = aalloc(sizeof(*innertype), sizeof(void*)); innertype->deftype = D_NESTEDTYPE; type->kind.nested = innertype; } else { /*unsigned int length;*/ struct Expr *lengthexpr; struct ArrayType *arrtype; reuse_token = 1; lengthexpr = parse_expr(NULL); if (!parse_expected(RSquare)) return NULL; /* TODO constexpr and short lengths (stored in type->misc) */ arrtype = aalloc(sizeof(*arrtype), sizeof(void*)); arrtype->lengthexpr = lengthexpr; type->kind.arr = arrtype; innertype = &arrtype->elemtype; innertype->deftype = D_NESTEDTYPE; } type = innertype; break; } case KW_Struct: { /* Struct */ struct FieldOrParamList *fields; if (!parse_expected(LCurly)) return NULL; type->type = T_STRUCT; type->quals = qualifiers; fields = parse_members(NULL, RCurly); if (!fields) return NULL; type->kind.fields = fields; goto done; } case KW_Enum: { /* Enum */ /* TODO "final" enums that cannot have more values added? that would allow for optimizing enums to use less space automatically (e.g. 1 bit for bool instead of 32) */ struct EnumType *enumtype; struct EnumValue **inspoint; if (!parse_token_noeof()) return NULL; enumtype = aalloc(sizeof(*enumtype), sizeof(void*)); if (!enumtype) return NULL; type->type = T_ENUM; type->quals = qualifiers; type->kind.enu = enumtype; if (tok.type != LCurly) { reuse_token = 1; enumtype->base = parse_type(NULL); if (!parse_expected(LCurly)) return NULL; } else { enumtype->base = NULL; } if (!parse_token_noeof()) return NULL; enumtype->values = NULL; inspoint = &enumtype->values; for (;;) { struct EnumValue *ev; if (tok.type == RCurly) break; if (tok.type != Identifier) { error_tok(&tok, "Expected identifier for enum value"); return NULL; } ev = aalloc(sizeof(*ev), sizeof(void*)); if (!ev) return NULL; /* Each value is: identifier[=expr] */ ev->hash = tok.hash; ev->name = aalloc(tok.size+1, sizeof(char)); memcpy(ev->name, tok.data, tok.size+1); /* already null terminated */ if (!parse_token_noeof()) return NULL; if (tok.type == Assign) { ev->value = parse_expr(NULL); if (!ev->value) return NULL; if (!parse_token_noeof()) return NULL; } else { ev->value = NULL; } *inspoint = ev; inspoint = &ev->next; } goto done; } case KW_Func: /* Function */ /* TODO automatically wrap in ref type? */ if (!parse_functype(type)) return NULL; type->quals = qualifiers; goto done; case Question: /* Optional type */ type->type = T_OPTIONAL; type->quals = qualifiers; innertype = aalloc(sizeof(*innertype), sizeof(void*)); innertype->deftype = D_NESTEDTYPE; type->kind.nested = innertype; type = innertype; break; case Identifier: { /* Identifier reference */ struct Ident *ident; /* TODO which qualifiers should be allowed, and when? */ if (!qualifiers && type == outertype) error_tok(&tok, "Missing type qualifiers"); ident = ident_insert(0, &tok); if (!ident) return NULL; type->type = T_IDENT; type->quals = qualifiers; type->kind.ident = ident; /* TODO type parameters? */ goto done; } case KW_Void: case KW_Bool: case KW_USize: case KW_SSize: case KW_FileOffs: case KW_Char: case KW_Byte: case KW_Short: case KW_UShort: case KW_WUShort: case KW_Int: case KW_UInt: case KW_WUInt: case KW_Long: case KW_ULong: case KW_WULong: case KW_Int32: case KW_UInt32: case KW_WUInt32: /* Elementary types */ type->type = T_ELMNTRY; type->quals = qualifiers; type->misc = KEYWORD_TO_BUILTIN(tok.type); /* ID of elementary type */ goto done; /* TODO add a default qualifier: stack/data/include...? or require struct/enum? */ case KW_Var: qualifiers |= Q_VAR; break; case KW_WriteOnly: qualifiers |= Q_WRONLY; break; case KW_Aliased: qualifiers |= Q_ALIASED; break; case KW_Threaded: qualifiers |= Q_THREADED; break; case KW_Own: qualifiers |= Q_OWN; break; case KW_Data: /* This is required when including a struct as a field */ qualifiers |= Q_VAR; /* suppress "Missing qualifiers" error */ break; default: error_tok(&tok, "Unexpected token where type was expected"); } } done: return outertype; } static int skip_expression(void) { /* TODO */ return 0; } struct OpInfo { unsigned int precedence : 8; unsigned int right_assoc : 1; unsigned int no_mixing : 1; enum { Binary, Ternary, Postfix, Prefix } type; }; #define CALL_ARRINDEX 17 /*static OpInfo get_opinfo(enum TokenType type, enum RPNContext context)*/ static struct OpInfo get_opinfo(const struct RPNEntry *rpn) { struct OpInfo op; /* TODO go through these */ /* TODO we want to have "user centric" precedence levels, where non-obvious cases require parentheses */ switch (rpn->stage.rpn.tokentype) { case Dot: op.precedence = 18; op.right_assoc = 0; op.no_mixing = 0; op.type = Binary; break; /*case Deref:*/ case Question: op.precedence = 18; op.right_assoc = 0; op.no_mixing = 0; op.type = Postfix; break; /*case AddrOf: case LRL_Op_EnumBase: case LRL_Op_SizeOf: case LRL_Op_MinSizeOf: case LRL_Op_OffsetOf: case LRL_Op_AlignOf: op.precedence = 16; op.right_assoc = 1; op.type = Prefix; break;*/ /*case LRL_Op_Compl: op.precedence = 15; op.right_assoc = 1; op.type = Prefix; break;*/ case Asterisk: case Slash: /*case Modulo:*/ op.precedence = 14; op.right_assoc = 0; op.no_mixing = 0; op.type = Binary; break; case Plus: case Minus: if (rpn->stage.rpn.context == RC_UNARY) { op.precedence = 15; op.right_assoc = 1; op.no_mixing = 0; op.type = Prefix; } else { op.precedence = 13; op.right_assoc = 0; op.no_mixing = 0; op.type = Binary; } break; /*case KW_ShL: case KW_ShR: op.precedence = 12; op.right_assoc = 0; op.type = Binary; break; case KW_BitAnd: op.precedence = 11; op.right_assoc = 0; op.type = Binary; break; case KW_BitXor: op.precedence = 10; op.right_assoc = 0; op.type = Binary; break; case KW_BitOr: op.precedence = 9; op.right_assoc = 0; op.type = Binary; break; case KW_MakeOpt: op.precedence = 8; op.right_assoc = 1; op.type = Prefix; break;*/ /*case KW_As: case KW_TypeAssert: op.precedence = 7; op.right_assoc = 0; op.type = Postfix; break;*/ case Equal: case NotEqual: case Less: case LessEqual: case Greater: case GreaterEqual: op.precedence = 6; op.right_assoc = 0; op.no_mixing = 0; op.type = Binary; break; case KW_Not: op.precedence = 4; op.right_assoc = 0; op.no_mixing = 1; op.type = Prefix; break; case KW_And: case KW_Or: /*case LRL_Op_LXor:*/ op.precedence = 4; op.right_assoc = 0; op.no_mixing = 1; op.type = Binary; break; /*case CondThen: case CondElse: op.precedence = 2; op.right_assoc = 1; op.type = Ternary; break;*/ case Assign: case PlusAssign: case MinusAssign: case MultiplyAssign: case DivideAssign: /*case ShiftLAssign: case ShiftRAssign:*/ op.precedence = 1; op.right_assoc = 1; op.no_mixing = 0; op.type = Binary; break; default: op.precedence = 0; op.right_assoc = 0; op.no_mixing = 0; op.type = Binary; break; } return op; } static const unsigned char token2op[] = { -1, -1, /* Number */ -1, /* Identifier */ -1, /* Goto identifier */ -1, /* String */ -1, /* LParen */ -1, /* RParen */ -1, /* LSquare */ -1, /* RSquare */ -1, /* LCurly */ -1, /* RCurly */ OP_ADD, /* Plus */ OP_SUB, /* Minus */ OP_MUL, /* Asterisk */ OP_DIV, /* Slash */ OP_LESS, /* Less */ OP_ASSIGN, /* Assign */ OP_GREATER, /* Greater */ -1, /* Comma */ OP_MEMBER, /* Dot */ -1, /* Question */ /* TODO */ -1, /* Colon */ -1, /* Semicolon */ OP_ADDASSIGN, /* PlusAssign */ OP_SUBASSIGN, /* MinusAssign */ OP_MULASSIGN, /* MultiplyAssign */ OP_DIVASSIGN, /* DivideAssign */ OP_LEQ, /* LessEqual */ OP_EQ,/* Equal */ OP_GEQ, /* GreaterEqual */ OP_NOTEQ, /* NotEqual */ OP_NOT, OP_AND, OP_OR }; /** * Converts a stack of tokens in Reverse Polish Notation to an AST substree */ static struct Expr *rpn_to_ast(struct RPNEntry *node) { /* TODO preserve RPN also? e.g. in expr->tmp.rpnnext */ struct OpInfo op; struct Expr *expr = NULL; struct RPNEntry *nextnode; if (!node) return NULL; do { nextnode = node->stage.rpn.next; op = get_opinfo(node); if (!op.precedence) { /* Terminals (identifiers and literals) */ struct RPNEntry entry = *node; node->stage.expr.op = token2op[entry.stage.rpn.tokentype]; switch ((int)entry.stage.rpn.tokentype) { case Identifier: if (entry.stage.rpn.context == RC_LOCALIDENT) { node->stage.expr.type = E_LOCALIDENT; node->stage.expr.a.localident = entry.stage.rpn.data.localident; } else { node->stage.expr.type = E_GLOBALIDENT; node->stage.expr.a.ident = entry.stage.rpn.data.ident; } break; case KW_Undef: node->stage.expr.type = E_UNDEF; break; case KW_None: node->stage.expr.type = E_NONE; break; case Number: node->stage.expr.type = E_NUMBER; goto add_data; case String: node->stage.expr.type = E_STRING; add_data: { struct LiteralValue *str; size_t length = entry.stage.rpn.length; str = aalloc(sizeof(struct LiteralValue), sizeof(void*)); str->length = length; str->data = aalloc(length+1, 1); /* Include null terminator */ memcpy(str->data, entry.stage.rpn.data.pointer, length+1); node->stage.expr.a.value = str; break; } case LParen: node->stage.expr.type = entry.stage.rpn.context == RC_ARGLIST ? E_ARRAY : E_INDEX; goto add_elems; case LSquare: node->stage.expr.type = entry.stage.rpn.context == RC_ARGLIST ? E_STRUCT : E_CALL; add_elems: { /* Function call, array index, array value or struct value */ size_t length = entry.stage.rpn.length; struct ExprList *list = aalloc(sizeof(struct ExprList), sizeof(void*)); struct ExprElement *elem; if (entry.stage.rpn.context != RC_ARGLIST) { /* Pop array or function */ if (!expr) internal_error(8); node->stage.expr.b.expr = expr; expr = expr->tmp.rpnnext; } list->length = length; elem = &list->first; if (!length) break; for (;;) { struct ExprElement *nextelem; if (!expr) internal_error(2); elem->expr = expr; expr = expr->tmp.rpnnext; if (!--length) { elem->next = NULL; break; } nextelem = aalloc(sizeof(struct ExprElement), sizeof(void*)); elem->next = nextelem; elem = nextelem; } node->stage.expr.a.exprlist = list; break; } case KW_Type: /* pseudo operend for types? */ /* TODO */ break; default: fprintf(stderr, "unexpected token type %d\n", entry.stage.rpn.tokentype); /* TODO can this happen? if so, report error */ internal_error(3); } node->stage.expr.tmp.rpnnext = expr; expr = &node->stage.expr; } else if (op.type == Binary) { unsigned opnum = token2op[node->stage.rpn.tokentype]; struct Expr *exprnext; if (!expr || !expr->tmp.rpnnext) internal_error(4); exprnext = expr->tmp.rpnnext; node->stage.expr.tmp.rpnnext = exprnext->tmp.rpnnext; node->stage.expr.b.expr = expr; node->stage.expr.a.expr = exprnext; node->stage.expr.type = E_BINARYOP; node->stage.expr.op = opnum; /* FIXME this will also trigger when there are parentheses */ /*if (op.no_mixing && (expr->type == E_BINARYOP && expr->op != opnum) || (exprnext->type == E_BINARYOP && exprnext->op != opnum)) { error_tok(&tok, "Invalid mixing of different operations. Use parentheses"); return NULL; }*/ expr = &node->stage.expr; } else if (op.type == Ternary) { /* TODO */ } else { /* Postfix or prefix operators */ unsigned opnum = token2op[node->stage.rpn.tokentype]; if (!expr) internal_error(6); node->stage.expr.tmp.rpnnext = expr->tmp.rpnnext; node->stage.expr.a.expr = expr; node->stage.expr.type = E_UNARYOP; node->stage.expr.op = opnum; expr = &node->stage.expr; } node = nextnode; } while (node); /*if (node) { TODO how to detect? error_tok(&tok, "Incomplete expression"); return NULL; }*/ return expr; } static struct RPNEntry *reverse_rpn_list(struct RPNEntry *first) { struct RPNEntry *current; struct RPNEntry *previous; if (!first) return NULL; previous = first; current = first->stage.rpn.next; first->stage.rpn.next = NULL; while (current) { struct RPNEntry *next = current->stage.rpn.next; current->stage.rpn.next = previous; previous = current; current = next; } do { struct RPNEntry *e = previous; fprintf(stderr, "reversed:"); while (e) { fprintf(stderr, " %d", e->stage.rpn.tokentype); e= e->stage.rpn.next; } fputc('\n', stderr); } while (0); return previous; } #define OPSTACK_TO_OUT do { \ struct RPNEntry *tmp = opstack; \ opstack = tmp->stage.rpn.next; \ tmp->stage.rpn.next = out; \ out = tmp; \ } while (0) static struct Expr *parse_expr(struct Block *block) { /* TODO error recovery? */ int operator_expected = 0; int last_line; struct RPNEntry *opstack = NULL; struct RPNEntry *out = NULL; for (;;) { struct RPNEntry *entry; if (!parse_token_noeof()) return NULL; switch ((int)tok.type) { case Number: case Identifier: case String: case KW_None: case KW_Undef: if (operator_expected) { if (tok.line != last_line && tok.type == Identifier) { reuse_token = 1; goto finished; } error_tok(&tok, "Operator expected"); return NULL; } entry = aalloc(sizeof(struct RPNEntry), sizeof(void*)); entry->stage.rpn.tokentype = tok.type; entry->stage.rpn.context = RC_TERMINAL; entry->stage.rpn.next = out; if (tok.type == String || tok.type == Number) { char *data = aalloc(tok.size+1, 1); /* Include null terminator */ memcpy(data, tok.data, tok.size+1); entry->stage.rpn.length = tok.size; entry->stage.rpn.data.pointer = data; } else if (tok.type == Identifier) { struct LocalIdent *localident = localident_find(block, &tok); fprintf(stderr, "%sident (%d:%d: %.*s)\n", localident?"local":"global", tok.line, tok.column, tok.size, tok.data); if (localident) { entry->stage.rpn.context = RC_LOCALIDENT; entry->stage.rpn.data.localident = localident; } else { entry->stage.rpn.data.ident = ident_insert(0, &tok); } } out = entry; operator_expected = 1; /* TODO type parameters */ break; /* Argument and element list (exprlist) parsing. exprlists are stored like this on the out stack: identifier value1 value2 value3 ( TERM TERM TERM TERM CALL -- -- -- -- 3 */ case LParen: case LSquare: if (operator_expected && tok.line != last_line) { /* "function(" and "array[" may not be broken apart, so assume these are separate statements */ reuse_token = 1; goto finished; } /* Move operators with higher precedence to output stack */ while (opstack) { struct OpInfo st = get_opinfo(opstack); if (st.precedence <= CALL_ARRINDEX) break; OPSTACK_TO_OUT; } entry = aalloc(sizeof(struct RPNEntry), sizeof(void*)); entry->stage.rpn.tokentype = tok.type; entry->stage.rpn.context = (operator_expected ? RC_ARGLIST : /* Function call or array index */ (tok.type == LSquare ? RC_ELEMLIST : /* Array literal */ RC_GROUPING)); /* Grouping or struct literal (until a comma is parsed) */ entry->stage.rpn.next = opstack; /* Check if the argument list is empty */ if (!parse_token_noeof()) return NULL; reuse_token = 1; entry->stage.rpn.length = (tok.type == RParen || tok.type == RSquare ? 0 : 1); opstack = entry; operator_expected = 0; break; case Comma: case RParen: case RSquare: { int popped_ops = 0; int found = 0; /* Pop everything off the stack until a ( or [ is found */ while (opstack) { if (opstack->stage.rpn.tokentype == LParen || opstack->stage.rpn.tokentype == LSquare) { found = 1; break; } OPSTACK_TO_OUT; popped_ops = 1; } if (!operator_expected && popped_ops) { error_tok(&tok, "Missing operand"); return NULL; } if (!found) { reuse_token = 1; goto finished; } if (tok.type == Comma) { /* TODO check for repeated comma */ if (opstack->stage.rpn.context == RC_GROUPING) { opstack->stage.rpn.context = RC_ELEMLIST; } if (!parse_token_noeof()) return NULL; reuse_token = 1; if (tok.type != RParen) { /* Comma, but not a trailing one */ opstack->stage.rpn.length++; } else if (opstack->stage.rpn.context == RC_ARGLIST) { /* Trailing commas are not allowed in arglists or array index expressions */ error_tok(&tok, "Trailing comma not allowed in " "argument list or index"); break; } operator_expected = 0; } else { struct RPNEntry *tmp; /* Pop the left parenthesis off the stack */ tmp = opstack; opstack = tmp->stage.rpn.next; /* Grouping parantheses are not pushed to the out stack */ if (tmp->stage.rpn.context != RC_GROUPING || tmp->stage.rpn.tokentype != LParen || tmp->stage.rpn.length != 1) { tmp->stage.rpn.next = out; /*fprintf(stderr, "pushing parenthesis %d %d %d\n", tmp->stage.rpn.context != RC_GROUPING, tmp->stage.rpn.tokentype != LParen, tmp->stage.rpn.length != 1);*/ out = tmp; } operator_expected = 1; } break; } /* TODO conditional operator (ternary operator) */ case LCurly: case RCurly: case KW_Data: case KW_Func: case KW_Type: case KW_Void: case KW_Bool: case KW_USize: case KW_SSize: case KW_FileOffs: case KW_Char: case KW_Byte: case KW_Short: case KW_UShort: case KW_WUShort: case KW_Int: case KW_UInt: case KW_WUInt: case KW_Long: case KW_ULong: case KW_WULong: case KW_Int32: case KW_UInt32: case KW_WUInt32: case KW_Ref: case KW_Addr: case KW_MergedRef: case KW_NoReturn: case KW_Struct: case KW_Enum: case KW_Var: case KW_WriteOnly: case KW_Own: case KW_Aliased: case KW_Threaded: case KW_If: case KW_Else: case KW_While: case KW_Do: case KW_For: /* KW_In could be used as an operator also */ case KW_LoopEnd: case KW_LoopEmpty: case KW_Switch: case KW_Case: case KW_With: case KW_Default: case KW_Assert: case KW_Break: case KW_Continue: case KW_Goto: case KW_Return: reuse_token = 1; goto finished; default: { struct OpInfo op; entry = aalloc(sizeof(struct RPNEntry), sizeof(void*)); entry->stage.rpn.tokentype = tok.type; entry->stage.rpn.context = operator_expected ? RC_OP : RC_UNARY; /* FIXME add param to get_opinfo? */ op = get_opinfo(entry); if (!op.precedence) { error_tok(&tok, "Unexpected token in expression"); return NULL; } if (op.type == Prefix) { entry->stage.rpn.context = RC_UNARY; entry->stage.rpn.next = opstack; opstack = entry; operator_expected = 0; break; } if (!operator_expected) { error_tok(&tok, "Operator not expected here"); return NULL; } /* Binary or postfix operator */ while (opstack) { /*struct OpInfo st = get_opinfo(st_type, st_entry->context);*/ struct OpInfo st = get_opinfo(opstack); if (!st.precedence) break; if (st.no_mixing && op.precedence == st.precedence && opstack->stage.rpn.tokentype != tok.type) { error_tok(&tok, "Ambiguous mixing of different operations. Use parentheses"); } /* if ((!st.right_assoc && op.precedence <= st.precedence) || (st.right_assoc && op.precedence < st.precedence)) { if (!(!st.right_assoc && op.precedence <= st.precedence) && !(st.right_assoc && op.precedence < st.precedence)) {*/ if (!(!st.right_assoc && op.precedence <= st.precedence) && !(st.right_assoc && op.precedence < st.precedence)) { break; } OPSTACK_TO_OUT; } if (op.type == Postfix) { entry->stage.rpn.context = RC_UNARY; entry->stage.rpn.next = out; out = entry; operator_expected = 1; break; } entry->stage.rpn.context = RC_OP; entry->stage.rpn.next = opstack; opstack = entry; operator_expected = 0; break; } } last_line = tok.line; } finished: /* Move remaining tokens on the operator stack to the output stack */ while (opstack) { if (opstack->stage.rpn.tokentype == LParen) { error_tok(&tok, "Unclosed paranthesis"); return NULL; } OPSTACK_TO_OUT; } if (!out || !operator_expected) { error_tok(&tok, "Incomplete expression"); return NULL; } fprintf(stderr, "end of expr: %d (%d:%d[%d])\n", tok.type, tok.line, tok.column, reuse_token); /* Read from stack in reverse order, and build the ASTExpr */ out = reverse_rpn_list(out); return rpn_to_ast(out); } static int skip_funcbody(int startline, int startcol) { int status, level = 0; for (;;) { status = parse_token(); if (status <= 0) goto error_unbalanced; if (tok.type == LCurly) level++; else if (tok.type == RCurly) { if (!level--) return 1; } } error_unbalanced: if (!status) { error_linecol(startline, startcol, "Missing/unbalanced ending }"); } return 0; } /** * Parses a statement. * * @param stmt Pre-allocated statement, or NULL to allocate. * @param block Current statement block. Used for identifier * creation and lookup. * @return Pointer to statement, or NULL on error. */ static struct Stmt *parse_stmt(struct Stmt *stmt, struct Block *block) { if (!parse_token_noeof()) return NULL; if (!stmt) stmt = aalloc(sizeof(struct Stmt), sizeof(void*)); switch (tok.type) { case LCurly: { struct Block *nested; stmt->type = S_BLOCK; reuse_token = 1; nested = parse_block(NULL, block); if (!nested) return NULL; stmt->kind.block = nested; break; } case KW_Ref: case LSquare: case KW_Struct: case KW_Enum: case Question: case KW_Void: case KW_Bool: case KW_USize: case KW_SSize: case KW_FileOffs: case KW_Char: case KW_Byte: case KW_Short: case KW_UShort: case KW_WUShort: case KW_Int: case KW_UInt: case KW_WUInt: case KW_Long: case KW_ULong: case KW_WULong: case KW_Int32: case KW_UInt32: case KW_WUInt32: case KW_Var: case KW_WriteOnly: case KW_Aliased: case KW_Threaded: case KW_Own: { /* Start of a type (and a variable declaration) */ struct LocalIdent *localvar = aalloc(sizeof(struct LocalIdent), sizeof(void*)); reuse_token = 1; if (!parse_type(&localvar->type)) return NULL; if (!parse_expected(Identifier)) return NULL; if (!localident_insert(block, localvar, &tok)) return NULL; /* Initial value */ if (!parse_token_noeof()) return NULL; if (tok.type == Assign) { localvar->initval = parse_expr(block); } else { localvar->initval = NULL; reuse_token = 1; } stmt->type = S_DECL; stmt->kind.localvar = localvar; break; } case Identifier: case LParen: { /* Start of an expression */ struct Expr *expr; reuse_token = 1; expr = parse_expr(block); if (!expr) return NULL; stmt->type = S_EXPR; stmt->kind.expr = expr; break; } case KW_If: { struct CtlIf *ctl = aalloc(sizeof(struct CtlIf), sizeof(void*)); if (!(ctl->cond = parse_expr(block))) return NULL; if (!parse_ctlblock(&ctl->true_block, block)) return NULL; if (!parse_token_noeof()) return NULL; if (tok.type == KW_Else) { struct Block *false_block; if (!parse_token_noeof()) return NULL; reuse_token = 1; if (tok.type == KW_If) { /* else may be followed by if */ /* XXX or replace "else if" with elseif/elif/etc? */ false_block = parse_block(NULL, block); } else { false_block = parse_ctlblock(NULL, block); } if (!false_block) return NULL; ctl->false_block = false_block; } else { ctl->false_block = NULL; reuse_token = 1; } stmt->type = S_IF; stmt->kind.ifstm = ctl; break; } case KW_While: { struct CtlWhile *ctl = aalloc(sizeof(struct CtlWhile), sizeof(void*)); ctl->loopinfo.gotoid = ++current_gotoid; ctl->loopinfo.base = current_loop; ctl->loopinfo.has_break = 0; current_loop = &ctl->loopinfo; if (!(ctl->cond = parse_expr(block))) return NULL; if (!parse_ctlblock(&ctl->block, block)) return NULL; current_loop = ctl->loopinfo.base; if (!parse_optional_block(&ctl->end, block, KW_LoopEnd) || !parse_optional_block(&ctl->empty, block, KW_LoopEmpty)) return NULL; stmt->type = S_WHILE; stmt->kind.whilestm = ctl; break; } case KW_Do: { struct CtlWhile *ctl = aalloc(sizeof(struct CtlWhile), sizeof(void*)); ctl->loopinfo.gotoid = ++current_gotoid; ctl->loopinfo.base = current_loop; ctl->loopinfo.has_break = 0; current_loop = &ctl->loopinfo; if (!parse_ctlblock(&ctl->block, block)) return NULL; if (!parse_expected(KW_While)) return NULL; if (!(ctl->cond = parse_expr(block))) return NULL; current_loop = ctl->loopinfo.base; if (!parse_optional_block(&ctl->end, block, KW_LoopEnd)) return NULL; stmt->type = S_DOWHILE; stmt->kind.whilestm = ctl; break; } case KW_For: { /* XXX should we have different syntax for different types of loops? */ struct CtlFor *ctl = aalloc(sizeof(struct CtlFor), sizeof(void*)); ctl->loopinfo.gotoid = ++current_gotoid; ctl->loopinfo.base = current_loop; ctl->loopinfo.has_break = 0; current_loop = &ctl->loopinfo; ctl->block.base = block; ctl->block.idents = NULL; /* Loop identifier */ if (!parse_type(&ctl->loopvar.type)) return NULL; if (!parse_expected(Identifier)) return NULL; if (!localident_insert(&ctl->block, &ctl->loopvar, &tok)) return NULL; /* Loop expression */ if (!parse_expected(KW_In)) return NULL; ctl->loopexpr = parse_expr(&ctl->block); if (!ctl->loopexpr) return NULL; /* Loop block */ if (!require_block_start()) return NULL; if (!parse_token_noeof()) return NULL; /* LCurly or first token */ if (!parse_block_internal(&ctl->block)) return NULL; current_loop = ctl->loopinfo.base; if (!parse_optional_block(&ctl->end, block, KW_LoopEnd) || !parse_optional_block(&ctl->empty, block, KW_LoopEmpty)) return NULL; stmt->type = S_FOR; stmt->kind.forstm = ctl; break; } case KW_Switch: { struct CtlSwitch *ctl = aalloc(sizeof(struct CtlSwitch), sizeof(void*)); struct SwitchCase **caseptr; int seen_default = 0; if (!(ctl->cond = parse_expr(block))) return NULL; if (!parse_expected(LCurly)) return NULL; ctl->cases = NULL; caseptr = &ctl->cases; if (!parse_token_noeof()) return NULL; for (;;) { struct SwitchCase *cas; struct CaseValue *cv; if (tok.type == RCurly) break; if (tok.type != KW_Case && tok.type != KW_Default) { error_tok(&tok, "Expected \"case\" or \"default\" keyword."); return NULL; } cas = aalloc(sizeof(struct SwitchCase), sizeof(void*)); cas->next = NULL; cv = &cas->values; for (;;) { struct CaseValue *nextcv; if (tok.type == KW_Case) { cv->expr = parse_expr(block); } else if (tok.type == KW_Default) { cv->expr = NULL; seen_default = 1; } else internal_error(10); if (!parse_token_noeof()) return NULL; if (tok.type == KW_With) { if (!(cv->withblock = parse_block(NULL, block))) return NULL; if (!parse_token_noeof()) return NULL; } else { cv->withblock = NULL; } /* Look ahead at next token */ if (tok.type != KW_Case && tok.type != KW_Default) { reuse_token = 1; break; } if (seen_default) { error_tok(&tok, "No cases can come after \"default\"."); return NULL; } nextcv = aalloc(sizeof(struct CaseValue), sizeof(void*)); cv->next = nextcv; cv = nextcv; } if (!parse_case_block(&cas->block)) return NULL; *caseptr = cas; if (!parse_token_noeof()) return NULL; if (tok.type == RCurly) break; else if (seen_default) { error_tok(&tok, "Expected } after \"default\" case."); return NULL; } caseptr = &cas->next; } break; } case KW_Assert: { struct Expr *expr = parse_expr(block); if (!expr) return NULL; stmt->type = S_ASSERT; stmt->kind.expr = expr; break; } case KW_Break: if (!current_loop) { error_tok(&tok, "No loop here"); return NULL; } stmt->type = S_BREAK; stmt->kind.loopinfo = current_loop; current_loop->has_break = 1; break; case KW_Continue: if (!current_loop) { error_tok(&tok, "No loop here"); return NULL; } stmt->type = S_CONT; stmt->kind.loopinfo = current_loop; break; case KW_Goto: if (!parse_expected(Identifier)) { error_tok(&tok, "\"goto\" must be followed by an identifier"); return NULL; } stmt->type = S_GOTO; stmt->kind.gotoident = gotoident(¤t_func, &tok); break; case GotoTarget: stmt->type = S_LABEL; stmt->kind.gotoident = gotoident(¤t_func, &tok); stmt->kind.gotoident->declared = 1; break; case KW_Return: { struct Expr *expr = parse_expr(block); if (!expr) return NULL; stmt->type = S_RETURN; stmt->kind.expr = expr; break; } default: error_tok(&tok, "Expected valid start of statement"); return NULL; } return stmt; } static int parse_optional_block(struct Block **blockptr, struct Block *base, enum TokenType blocktype) { struct Block *block; if (!parse_token_noeof()) return 0; if (tok.type != blocktype) { *blockptr = NULL; reuse_token = 1; return 1; } block = aalloc(sizeof(struct Block), sizeof(void*)); if (!block) return 0; *blockptr = block; return parse_ctlblock(block, base) != NULL; } static int require_block_start(void) { if (!parse_token_noeof()) return 0; reuse_token = 1; if (tok.type != LCurly && tok.type != KW_Return && tok.type != KW_Break && tok.type != KW_Continue && tok.type != KW_Goto) { error_tok(&tok, "Expected { or return/break/continue/goto"); return 0; } return 1; } static struct Block *parse_ctlblock(struct Block *block, struct Block *base) { if (!require_block_start()) return NULL; return parse_block(block, base); } static struct Block *parse_block(struct Block *block, struct Block *base) { if (!parse_token_noeof()) return NULL; if (!block) block = aalloc(sizeof(struct Block), sizeof(void*)); block->base = base; block->idents = NULL; return parse_block_internal(block); } static struct Block *parse_block_internal(struct Block *block) { if (tok.type == LCurly) { struct Stmt *preallocd = &block->stmt; struct Stmt **nextptr = NULL; block->stmt.next = NULL; for (;;) { struct Stmt *thisstmt; if (!parse_token_noeof()) return NULL; if (tok.type == RCurly) break; reuse_token = 1; thisstmt = parse_stmt(preallocd, block); if (!thisstmt) return NULL; if (nextptr) *nextptr = thisstmt; nextptr = &thisstmt->next; preallocd = NULL; } if (nextptr) *nextptr = NULL; if (preallocd) { block->stmt.type = S_NOP; } fprintf(stderr, "exiting from parse_block\n"); } else { reuse_token = 1; block->stmt.next = NULL; if (!parse_stmt(&block->stmt, block)) return NULL; } return block; } static struct Block *parse_case_block(struct Block *block) { struct Stmt *preallocd = &block->stmt; struct Stmt **nextptr = NULL; if (tok.type == LCurly) { error_tok(&tok, "Curly braces are not needed in case blocks."); } block->stmt.next = NULL; for (;;) { struct Stmt *thisstmt; if (!parse_token_noeof()) return NULL; if (tok.type == RCurly || tok.type == KW_Case || tok.type == KW_Default) break; reuse_token = 1; thisstmt = parse_stmt(preallocd, block); if (!thisstmt) return NULL; if (nextptr) *nextptr = thisstmt; nextptr = &thisstmt->next; preallocd = NULL; } if (nextptr) *nextptr = NULL; if (preallocd) { block->stmt.type = S_NOP; } reuse_token = 1; return block; } /** * Parses a file. * * @param decls_only Only parse top level declarations * @return 1 if successful, 0 if not. */ int parse_file(FILE *sourcefile, const int decls_only) { /* FIXME When decls_only is 0 we should not parse decls again (or at least not create the decls twice) - We could solve this by having an interface and implementation part, like Turbo Pascal based languages. - Or should we have headers, like C? - Or a keyword (like "public" or "pub") to export declarations to other files. Non-public declarations are created in the decls_only phase. */ int status; file = sourcefile; tok.line = 1; tok.column = 0; if (!decls_only) { arena_new(); identarena_new(); } for (;;) { status = parse_token(); if (status <= 0) break; toplevel_token: fprintf(stderr, "toplevel %d [%d]>%.*s<\n", tok.type, tok.type == Identifier ? tok.size : 0, tok.type == Identifier ? tok.size : 0, tok.data); switch (tok.type) { case KW_Type: { struct Ident *ident = parse_ident(D_TYPEDEF); if (!ident) return 0; /* Parse "= type" */ status = parse_token(); if (status <= 0) break; if (tok.type != Assign) goto toplevel_token; if (!parse_type(&ident->decl.type)) return 0; break; } /* TODO separate toplevel entry type for constants and macros? */ case KW_Data: { struct Ident *ident = parse_decl(D_DATADEF); if (!ident) return 0; /* Parse "= value" */ status = parse_token(); if (status <= 0) break; /*ident->decl.kind.initval = NULL;*/ if (tok.type != Assign) goto toplevel_token; if (decls_only) { if (!skip_expression()) return 0; } else { /*ident->decl.kind.initval = parse_expr(NULL);*/ /* FIXME should not allow things that cannot be used in toplevel exprs. should this include the address-of operation? perhaps there should be an option in the module config file, that defaults to off. */ struct Expr *initval = parse_expr(NULL); if (initval) { if (!output_initval(ident, initval)) return 0; } } goto toplevel_token; } case KW_Func: { struct Ident *ident = parse_ident(D_FUNCDEF); if (!ident) return 0; if (!parse_functype(&ident->decl.type)) return 0; ident->decl.type.quals = 0; /* FIXME will overwrite if we add qualifier parsing to function types */ /* Parse "{ body }" */ status = parse_token(); if (status <= 0) break; if (tok.type != LCurly) goto toplevel_token; if (decls_only) { if (!skip_funcbody(tok.line, tok.column)) return 0; } else { struct Block *funcbody; arena_new(); reuse_token = 1; current_loop = NULL; current_gotoid = 0; current_func.gotoidents = NULL; /*ident->decl.kind.funcbody = parse_block(NULL, NULL);*/ funcbody = parse_block(NULL, NULL); if (funcbody) { /* TODO verify goto. jumps into for loops should not be allowed */ if (!output_funcbody(ident, funcbody)) return 0; } arena_free(); } break; } default: error_tok(&tok, "Unexpected token at top level"); return 0; } } if (!decls_only) { arena_free(); identarena_free(); } return !status ? 1 : 0; /* 0 is an expected EOF, -1 is error */ }