/* parser.c -- Builds an AST from a token stream. Copyright © 2011-2016 Samuel Lidén Borell Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include "parser.h" #include "context_private.h" #include "misc.h" #include "string.h" typedef enum { RE_TERMINAL, /* no arguments follow because it's a terminal symbol */ RE_OP, /* number of arguments depends on the operator type */ RE_UNARY, /* a single argument follows (used for unary +/-) */ RE_ARGLIST, /* function call or array index access */ RE_GROUPING, /* grouping, e.g. (x+x)*2 */ RE_ELEMLIST /* structs and arrays */ } REContext; typedef struct { const LRLToken *token; REContext context; size_t num_args; } RPNEntry; static LRLASTExpr *parse_expr(LRLCtx *ctx, LRLIdent *scope, const LRLToken **tokens); static LRLASTType *parse_type(LRLCtx *ctx, LRLIdent *scope, const LRLToken **tokens); static LRLASTExpr *rpn_to_ast(LRLCtx *ctx, const LRLToken *operator, LRLIdent *scope, const RPNEntry *out_stack, size_t *out_size); static int is_declaration(LRLCtx *ctx, const LRLToken *tokens, const LRLToken *start); #define set_def_node(ident, def) do { \ if ((ident) && !(ident)->def_node) { \ (ident)->def_node = (def); \ } \ } while (0) static int expected(LRLCtx *ctx, const LRLToken **tokens, LRLTokenType expected_token) { if ((*tokens)->type != expected_token) { const char *tokstr; lrl_err_set_token(ctx, *tokens, 0); switch ((int)expected_token) { case LRL_Sym_LParen: tokstr = "("; break; case LRL_Sym_RParen: tokstr = ")"; break; case LRL_Sym_LSquare: tokstr = "["; break; case LRL_Sym_RSquare: tokstr = "]"; break; case LRL_Sym_LCurly: tokstr = "{"; break; case LRL_Sym_RCurly: tokstr = "}"; break; case LRL_Sym_Semicolon: tokstr = ";"; break; case LRL_Sym_Comma: tokstr = ","; break; case LRL_Sym_NamespaceSep: tokstr = ":"; break; case LRL_Op_Assign: tokstr = "="; break; case LRL_KW_Enum: tokstr = "enum"; break; case LRL_KW_Bits: tokstr = "bits"; break; case LRL_KW_While: tokstr = "while"; break; case LRL_KW_In: tokstr = "in"; break; case LRL_TT_String: tokstr = "(a string)"; break; default: tokstr = NULL; } if (tokstr) { lrl_err_set_char_range(ctx, tokstr, strlen(tokstr), 1); } lrl_err_finish(ctx, LRL_Err_UnexpectedToken); return 0; } (*tokens)++; return 1; } /* The "skip_" functions are mainly used for error recovery, but also for looking ahead during parsing. The token pointer is placed after the final closing parenthesis/semicolon. */ /** * Skips through a parenthesis. The token pointer should point to the token * right after the opening parenthesis. Returns 1 if OK, 0 if an error was * displayed. */ static int skip_parens(LRLCtx *ctx, const LRLToken **tokens) { const LRLToken *token = *tokens + 1; size_t depth = 1; do { const LRLTokenType type = token->type; if (!type) { lrl_err_set_token(ctx, token, 0); lrl_err_set_token(ctx, *tokens, 1); lrl_err_finish(ctx, LRL_Err_UnclosedParenthesis); *tokens = token; return 0; } if (lrl_is_paren(type)) { if (lrl_is_start_paren(type)) { depth++; } else { depth--; } } else if (type == LRL_KW_Typedef) { /* Break and continue parsing at top level */ lrl_err_token(ctx, LRL_Err_UnexpectedTokenInDefOrStmt, token); *tokens = token; return 0; } token++; } while (depth); *tokens = token; return 1; } #define skip_statement_skip_paren(ctx, t) skip_statement(ctx, t, 0) /** * Skips all tokens until the next semicolon, end of open parenthesis or * something that cannot appear inside a statement (e.g. a code block or * a typedef). Tokens in parentheses are also skipped. * * Returns 0 if we should try to break out to the top level. */ static int skip_statement(LRLCtx *ctx, const LRLToken **tokens, LRLTokenType dont_skip_type) { const LRLToken *token = *tokens; int ret = 1; while (token->type && token->type != LRL_Sym_Semicolon && token->type != LRL_Sym_Comma) { const LRLTokenType type = token->type; if (lrl_is_paren(type)) { /* Parentheses are handled specially */ if (!lrl_is_start_paren(type)) { if (type != dont_skip_type) { token++; } break; } skip_parens(ctx, &token); /* Stop if we reached a code block, and it's not an "else" block */ if (type == LRL_Sym_LCurly && token->type && token->type != LRL_KW_Else) { ret = 0; break; } } else if (type == LRL_KW_Typedef) { ret = 0; break; } else { token++; } } if ((token->type == LRL_Sym_Semicolon || token->type == LRL_Sym_Comma) && token->type != dont_skip_type) { token++; } *tokens = token; return ret; } static int expect_end_of_statement(LRLCtx *ctx, const LRLToken **tokens) { if (expected(ctx, tokens, LRL_Sym_Semicolon)) { return 1; } else { skip_statement_skip_paren(ctx, tokens); return 0; } } static int skip_ident(LRLCtx *ctx, const LRLToken **tokens) { const LRLToken *token = *tokens; int ok = 0; /* "here" is also a valid identifier */ if (token->type == LRL_KW_Here) { token++; if (token->type != LRL_Sym_NamespaceSep) { ok = 1; goto end; } token++; } /* Process sequence of namespace and identifier tokens: aaa:bbb:ccc ... */ for (;;) { /* First (and odd) tokens must be identifiers */ if (token->type != LRL_TT_Ident) { if (*tokens == token) { lrl_err_token(ctx, LRL_Err_ExpectedIdentifier, *tokens); } else { lrl_err_token(ctx, LRL_Err_ColonWithoutSubident, token-1); } goto end; } token++; /* Second (and even) tokens must be separators */ if (token->type != LRL_Sym_NamespaceSep) break; token++; } ok = 1; end: *tokens = token; return ok; } static int is_valid_ident(LRLCtx *ctx, const LRLToken *tokens) { return skip_ident(ctx, &tokens); } #define is_ident(token) ((token)->type == LRL_TT_Ident || \ (token)->type == LRL_KW_Here) /** "do nothing" scope */ static const LRLIdent discarding_scope; /** * Parses an identifier and moves the *tokens pointer forward. * newident may optionally point to a pre-allocated identifier to use, * otherwise it's allocated. */ static LRLIdent *parse_ident(LRLCtx *ctx, LRLIdent *scope, const LRLToken **tokens, LRLIdentOperation op, LRLIdent *newident) { const LRLToken *first = *tokens; if (op == LRL_Ident_CreateMember) { /* May only contain a plain identifier, without any namespaces */ if (first->type != LRL_TT_Ident) { lrl_err_token(ctx, LRL_Err_ExpectedIdentifier, first); return NULL; } else if (first[1].type == LRL_Sym_NamespaceSep) { lrl_err_token(ctx, LRL_Err_NamespaceNotAllowed, first+1); /* most likely the user meant ";". continue */ } ++*tokens; } else if (!skip_ident(ctx, tokens)) { return NULL; } return scope == &discarding_scope ? scope : lrl_ident_get(ctx, scope, first, op, newident); } static void defer_ident(LRLCtx *ctx, LRLIdent *scope, LRLIdentRef *identref, const LRLToken **tokens) { const LRLToken *first = *tokens; identref->ident = NULL; /* Deferred */ identref->first_token = first; identref->scope = scope; identref->next = LRL_IDENTREF_NEW; if (skip_ident(ctx, tokens) && scope != &discarding_scope) { lrl_ident_defer(ctx, identref); } else { identref->ident = LRL_IDENT_MISSING; } } static void defer_uses(LRLCtx *ctx, LRLIdent *scope, LRLIdentRef *identref, const LRLToken **tokens) { const LRLToken *first = *tokens; identref->ident = NULL; /* Deferred */ identref->first_token = first; identref->scope = scope; identref->next = LRL_IDENTREF_NEW; if (skip_ident(ctx, tokens)) { lrl_ident_defer_uses(ctx, identref); } else { identref->ident = LRL_IDENT_MISSING; } } /** * Returns the scope for the function body that the given scope is inside of. */ static LRLIdent *get_function_scope(LRLIdent *scope) { while (scope->scope && (scope->scope->flags & LRL_IdFl_FunctionBody) == 0) { scope = (LRLIdent*)scope->scope; } return scope; } static LRLASTExpr *make_undef_expr(const LRLToken *token) { LRLASTExpr *expr = malloc(sizeof(LRLASTExpr)); expr->ast_type = LRL_AST_Value_Undefined; expr->from = expr->to = token; memset(&expr->typeref, 0, sizeof(LRLTypeRef)); return expr; } static LRLASTType *make_private_type(const LRLToken *from, const LRLToken *to) { LRLASTType *type = malloc(sizeof(LRLASTType)); type->ast_type = LRL_AST_Type_Private; type->from = from; type->to = to; type->quals = 0; type->unique_id = LRL_UNIQUEID_UNSET; return type; } static LRLTypeQualifiers parse_qualifiers(LRLCtx *ctx, const LRLToken **tokens) { const LRLToken *token = *tokens; LRLTypeQualifiers quals = 0; for (;; token++) { LRLTypeQualifiers addquals; if (!token->type) { lrl_err_token(ctx, LRL_Err_ExpectedType, token); break; } if (token->type == LRL_KW_Incomplete) { lrl_err_token(ctx, LRL_Err_IncompleteKeywordNotOnTypedef, token); continue; } if (token->type < LRL_FirstQual || token->type > LRL_LastQual) break; /* Convert and add to bitmask */ addquals = 1 << (token->type - LRL_FirstQual); if (quals & addquals) { lrl_err_token(ctx, LRL_Err_RepeatedKeyword, token); } quals |= addquals; } *tokens = token; return quals; } /** * If the given scope is the hidden scope of a typedef (used for type params * and struct members), this function returns the visible typedef scope * (used for enum members). */ static LRLIdent *typedef_get_visible_scope(LRLIdent *scope) { if (scope->flags & LRL_IdFl_TypedefAnon) { return (LRLIdent*)scope->scope; } else if (scope->flags & LRL_IdFl_Statement) { /* Do not add enum identifiers directly in statement scopes */ return lrl_ident_create_priv_scope(scope); } return scope; } static LRLASTType *parse_enum(LRLCtx *ctx, LRLIdent *scope, LRLASTType *base_type, const LRLToken **tokens) { const LRLToken *token = *tokens; LRLIdent *defscope; LRLASTType *type = NULL; LRLASTDefList *list_first = NULL, *list_last = NULL; if (!expected(ctx, &token, LRL_KW_Enum)) return NULL; defscope = typedef_get_visible_scope(scope); type = malloc(sizeof(LRLASTType)); type->ast_type = LRL_AST_Type_Enum; type->kind.enu.scope = defscope; type->from = token-1; type->kind.enu.base_type = base_type; type->unique_id = 0; /* Read values */ if (!expected(ctx, &token, LRL_Sym_LParen)) goto end; while (token->type && token->type != LRL_Sym_RParen) { LRLASTDefList *entry; LRLIdent *ident; /* Read identifier */ ident = parse_ident(ctx, defscope, &token, LRL_Ident_CreateMember, NULL); if (!ident) goto recover; if (ident != &discarding_scope) { ident->flags |= LRL_IdFl_EnumValue; } /* Add to list */ linked_append(&entry, &list_first, &list_last); entry->def.ast_type = LRL_AST_Def_Data; entry->def.kind.data.type = type; /* e.g. type of "false" is "bool" */ entry->def.kind.data.flags = 0; entry->def.kind.data.ident = ident; entry->def.kind.data.value = NULL; if (ident != &discarding_scope) { set_def_node(ident, (LRLASTDefOrStmt*)&entry->def); } /* Read value (if any) */ if (token->type == LRL_Op_Assign) { LRLASTExpr *value; token++; value = parse_expr(ctx, scope, &token); if (!value) goto recover; entry->def.kind.data.value = value; } else { /* Auto increment. Computed later */ } /* Check for end of enum */ if (token->type == LRL_Sym_RParen) break; /* (a, b) */ expected(ctx, &token, LRL_Sym_Comma); continue; recover: if (!skip_statement(ctx, &token, LRL_Sym_RParen)) break; } expected(ctx, &token, LRL_Sym_RParen); end: type->to = token-1; type->kind.enu.values = list_first; *tokens = token; return type; } /** * Parses a struct type. If is_funcparams is set, then the struct is * allowed to end with "...*" to indicate a variadic function */ static LRLASTType *parse_struct(LRLCtx *ctx, LRLIdent *scope, const LRLToken **tokens, int is_funcparams) { const LRLToken *token = *tokens; LRLASTType *type = NULL; LRLASTDefList *list_first = NULL, *list_last = NULL; if (!expected(ctx, &token, LRL_Sym_LParen)) return NULL; type = malloc(sizeof(LRLASTType)); type->ast_type = LRL_AST_Type_Struct; type->kind.struc.scope = scope; type->kind.struc.flags = 0; type->from = token-1; type->unique_id = LRL_UNIQUEID_UNSET; while (token->type && token->type != LRL_Sym_RParen) { LRLASTDefList *entry; LRLASTType *member_type; LRLIdent *ident; if (scope != &discarding_scope) { ident = calloc(1, sizeof(LRLIdent)); ident->scope = scope; ident->flags |= LRL_IdFl_StructMember; } else { ident = (LRLIdent*)&discarding_scope; } /* Read type */ member_type = parse_type(ctx, ident, &token); if (!member_type) goto recover; if (member_type->quals & LRL_Qual_Var) { lrl_err_type(ctx, LRL_Err_VarInsideType, member_type); } /* Add to list */ linked_append(&entry, &list_first, &list_last); entry->def.ast_type = LRL_AST_Def_Data; entry->def.kind.data.ident = NULL; entry->def.kind.data.flags = 0; entry->def.kind.data.type = member_type; entry->def.kind.data.value = NULL; if (is_ident(token)) { /* Read identifier */ ident = parse_ident(ctx, scope, &token, LRL_Ident_CreateMember, ident); if (ident != &discarding_scope) { entry->def.kind.data.ident = ident; set_def_node(ident, (LRLASTDefOrStmt*)&entry->def); } } /* Check for end of struct */ if (token->type == LRL_Sym_RParen) break; /* (int a, int b) */ expected(ctx, &token, LRL_Sym_Comma); /* "...*" = C-style varargs */ if (is_funcparams && token->type == LRL_Sym_CVarArg) { type->kind.struc.flags |= LRL_SF_CVarArg; token++; if (token->type != LRL_Sym_RParen) { lrl_err_token(ctx, LRL_Err_UnexpectedToken, token); } } continue; recover: if (!skip_statement(ctx, &token, LRL_Sym_RParen)) break; } expected(ctx, &token, LRL_Sym_RParen); type->to = token-1; type->kind.struc.members = list_first; *tokens = token; return type; } static LRLASTType *parse_bitfield(LRLCtx *ctx, LRLIdent *scope, LRLASTType *base_type, const LRLToken **tokens) { const LRLToken *token = *tokens; LRLASTType *type = NULL; LRLASTDefList *list_first = NULL, *list_last = NULL; if (!expected(ctx, &token, LRL_KW_Bits)) return NULL; if (!expected(ctx, &token, LRL_Sym_LParen)) return NULL; scope = lrl_ident_create_priv_scope(scope); type = malloc(sizeof(LRLASTType)); type->ast_type = LRL_AST_Type_Bitfield; type->kind.bitfield.scope = scope; type->from = token-1; type->kind.bitfield.base_type = base_type; type->unique_id = 0; /* Read values */ while (token->type && token->type != LRL_Sym_RParen) { LRLASTDefList *entry; LRLASTExpr *num_bits; LRLASTType *member_type; LRLIdent *ident; if (scope != &discarding_scope) { ident = calloc(1, sizeof(LRLIdent)); ident->scope = scope; } else { ident = (LRLIdent*)&discarding_scope; } /* Syntaxes: x, => 1 bits boolean x, 2 bits x, => 2 bits base_type x, 2 bits int x, => 2 bits int x, */ if (token->type == LRL_TT_Ident && (token[1].type == LRL_Sym_Comma || token[1].type == LRL_Sym_RParen)) { /* x, --> Bits = 1, Type = bool (defaults) */ num_bits = NULL; member_type = NULL; } else { /* X bits [type] y, --> Bits = X, Type = type or NULL */ num_bits = parse_expr(ctx, ident, &token); if (!num_bits) goto recover; if (!expected(ctx, &token, LRL_KW_Bits)) goto recover; member_type = (is_declaration(ctx, token, token) > 0 ? parse_type(ctx, ident, &token) : NULL); if (member_type && member_type->quals & LRL_Qual_Var) { lrl_err_type(ctx, LRL_Err_VarInsideType, member_type); } } ident = parse_ident(ctx, scope, &token, LRL_Ident_CreateMember, ident); /* Add to list */ linked_append(&entry, &list_first, &list_last); entry->def.ast_type = LRL_AST_Def_Data; entry->def.kind.data.ident = ident; entry->def.kind.data.flags = 0; entry->def.kind.data.type = member_type; entry->def.kind.data.value = num_bits; if (ident != &discarding_scope) { entry->def.kind.data.ident = ident; set_def_node(ident, (LRLASTDefOrStmt*)&entry->def); } /* Check for end */ if (token->type == LRL_Sym_RParen) break; expected(ctx, &token, LRL_Sym_Comma); continue; recover: if (!skip_statement(ctx, &token, LRL_Sym_RParen)) break; } expected(ctx, &token, LRL_Sym_RParen); type->to = token-1; type->kind.bitfield.members = list_first; *tokens = token; return type; } /** * Parses the #[] part of an array type. The array element type should * be passed in the elemtype parameter. */ static LRLASTType *parse_array_type(LRLCtx *ctx, LRLIdent *scope, const LRLToken **tokens, LRLASTType *elemtype) { const LRLToken *token = *tokens; LRLASTType *root = NULL; /* leftmost dimension (row) */ LRLASTType *leaf = NULL; /* rightmost dimension (column) */ if (!expected(ctx, &token, LRL_Sym_LSquare)) return NULL; for (;;) { LRLASTType *array; LRLASTExpr *length = NULL; const LRLToken *first_token = token; if (!token->type || token->type == LRL_Sym_Semicolon) { lrl_err_token(ctx, LRL_Err_UnexpectedToken, token); break; } else if (token->type == LRL_Sym_RSquare || token->type == LRL_Sym_Comma) { /* No array length specified, like [] or [,] (should be [undefined] or [undefined,undefined]) */ lrl_err_token(ctx, LRL_Err_EmptyArrayLength, token); length = make_undef_expr(token); } else { /* Read array length */ length = parse_expr(ctx, scope, &token); } /* Add array dimension (in reverse order) */ array = malloc(sizeof(LRLASTType)); array->ast_type = LRL_AST_Type_Array; array->from = first_token-2; /* including surrounding #[ , and ] */ array->to = token; array->quals = elemtype->quals; array->unique_id = 0; array->kind.array.type = elemtype; array->kind.array.length = length; if (leaf) leaf->kind.array.type = array; leaf = array; if (!root) root = array; /* Check for end */ if (token->type == LRL_Sym_RSquare) break; if (!expected(ctx, &token, LRL_Sym_Comma)) break; } expected(ctx, &token, LRL_Sym_RSquare); *tokens = token; return root; } static LRLASTTypeList *parse_type_params(LRLCtx *ctx, LRLIdent *scope, const LRLToken **tokens) { const LRLToken *token = *tokens; LRLASTTypeList *list_first = NULL, *list_last = NULL; if (!expected(ctx, &token, LRL_Sym_LSquare)) return NULL; if (token->type == LRL_Sym_RSquare) { lrl_err_token(ctx, LRL_Err_EmptyTypeParamList, token); } while (token->type && token->type != LRL_Sym_RSquare) { LRLASTTypeList *entry; /* Read type */ LRLASTType *parameter = parse_type(ctx, scope, &token); if (!parameter) goto recover; /* TODO only type parameters that are used inside pointers etc. may have a "var" qualifier. Otherwise it isn't meaningful (and could likely be used to circumvent var/const checks) */ /* Add to list */ linked_append(&entry, &list_first, &list_last); entry->type = parameter; /* Check for end of type params */ if (token->type == LRL_Sym_RSquare) break; /* [a] as opposed to [a,] */ expected(ctx, &token, LRL_Sym_Comma); continue; recover: if (!skip_statement(ctx, &token, LRL_Sym_RSquare)) break; } expected(ctx, &token, LRL_Sym_RSquare); *tokens = token; return list_first; } static LRLASTDefList *parse_type_names(LRLCtx *ctx, LRLIdent *scope, const LRLToken **tokens) { const LRLToken *token = *tokens; LRLASTDefList *list_first = NULL, *list_last = NULL; if (!expected(ctx, &token, LRL_Sym_LSquare)) return NULL; if (token->type == LRL_Sym_RSquare) { lrl_err_token(ctx, LRL_Err_EmptyTypeParamList, token); } while (token->type && token->type != LRL_Sym_RSquare) { LRLASTDefList *entry; LRLASTType *type = NULL; const LRLToken *first_token = token; /* TODO covariance/contravariance? */ /* Read type identifier */ /* TODO can reuse typedef parsing */ LRLIdent *ident = parse_ident(ctx, scope, &token, LRL_Ident_CreateMember, NULL); if (!ident) goto recover; ident->flags |= LRL_IdFl_TypedefParam; if (token->type == LRL_Op_Assign) { token++; type = parse_type(ctx, ident, &token); /* TODO should have a type qualifier check */ } if (!type) { type = make_private_type(first_token, token-1); } type->quals |= LRL_InternQual_TypeParam; /* Add to list */ linked_append(&entry, &list_first, &list_last); entry->def.ast_type = LRL_AST_Def_Type; entry->def.kind.type.ident = ident; entry->def.kind.type.flags = 0; entry->def.kind.type.typenames = NULL; entry->def.kind.type.type = type; if (ident != &discarding_scope) { set_def_node(ident, (LRLASTDefOrStmt*)&entry->def); } /* Check for end of typename list */ if (token->type == LRL_Sym_RSquare) break; /* [a] as opposed to [a,] */ expected(ctx, &token, LRL_Sym_Comma); continue; recover: if (!skip_statement(ctx, &token, LRL_Sym_RSquare)) break; } expected(ctx, &token, LRL_Sym_RSquare); *tokens = token; return list_first; } static LRLASTType *parse_type(LRLCtx *ctx, LRLIdent *scope, const LRLToken **tokens) { const LRLToken *token = *tokens; LRLASTType *type = NULL; const LRLToken *outer_first_token = token; LRLTypeQualifiers quals; LRLPointerFlags pointerflags; int noreturn_kw; /* Read type qualifiers */ quals = parse_qualifiers(ctx, &token); if (!token->type) goto reported_error; if (token->type == LRL_Sym_LParen) { /* Struct type */ LRLIdent *memberscope = (scope != &discarding_scope ? lrl_ident_create_priv_scope(scope) : (LRLIdent*)&discarding_scope); type = parse_struct(ctx, memberscope, &token, 0); } else if (token->type == LRL_KW_Union) { /* Union type */ LRLIdent *memberscope = (scope != &discarding_scope ? lrl_ident_create_priv_scope(scope) : (LRLIdent*)&discarding_scope); token++; /* skip keyword */ type = parse_struct(ctx, memberscope, &token, 0); if (!type) goto reported_error; /* missing "(" */ type->ast_type = LRL_AST_Type_Union; } else if (token->type == LRL_KW_Enum) { /* Enum with default base type (i.e. count) */ type = parse_enum(ctx, scope, lrl_builtin_get_type(LRL_BT_count), &token); if (!type) goto reported_error; } else if (token->type == LRL_KW_Bits) { /* Bitfield type without type (=unaligned bitfield) */ type = parse_bitfield(ctx, scope, NULL, &token); if (!type) goto reported_error; } else if (is_ident(token)) { /* Reference to a type */ type = malloc(sizeof(LRLASTType)); type->ast_type = LRL_AST_Type_Ident; defer_ident(ctx, scope, &type->kind.identref, &token); } else if (token->type == LRL_KW_Private) { /* Private type (= nothing is known about it) */ type = malloc(sizeof(LRLASTType)); type->ast_type = LRL_AST_Type_Private; token++; } else if (token->type == LRL_KW_Any) { /* Wildcard type */ type = malloc(sizeof(LRLASTType)); type->ast_type = LRL_AST_Type_Any; token++; } else if (token->type == LRL_KW_NoReturn) { /* noreturn function */ if (token[1].type != LRL_Sym_LParen) { token++; lrl_err_token(ctx, LRL_Err_UnexpectedToken, token); goto reported_error; } type = malloc(sizeof(LRLASTType)); type->ast_type = LRL_AST_Type_Struct; type->from = outer_first_token; type->to = token; type->quals = 0; type->unique_id = 0; type->kind.struc.members = NULL; type->kind.struc.scope = NULL; type->kind.struc.flags = 0; if (quals != 0) { lrl_err_token(ctx, LRL_Err_QualifierOnNoReturn, token); } noreturn_kw = 1; token++; goto function_type; } else { /* Invalid type */ lrl_err_token(ctx, LRL_Err_ExpectedType, token); goto reported_error; } type->from = outer_first_token; type->to = token-1; type->quals = quals; type->unique_id = 0; /* Check pointers, arrays, optional modifiers... */ for (;;) { const LRLToken *first_token = token; if (!token->type) goto end; /* Pointers can have type qualifiers before them */ quals = parse_qualifiers(ctx, &token); if (!token->type) goto reported_error; switch ((int)token->type) { case LRL_Op_Deref: pointerflags = 0; goto has_ptrflags; case LRL_Sym_FlexiPointer: pointerflags = LRL_PF_Flexible; goto has_ptrflags; case LRL_Sym_RawPointer: pointerflags = LRL_PF_Raw; goto has_ptrflags; case LRL_Sym_RawFlexiPointer: pointerflags = LRL_PF_Flexible | LRL_PF_Raw; goto has_ptrflags; has_ptrflags: /* Pointer type */ if (type->quals & LRL_Qual_Const) { lrl_err_token(ctx, LRL_Err_ConstOnPointer, token); } { LRLASTType *ptrtype = malloc(sizeof(LRLASTType)); ptrtype->ast_type = LRL_AST_Type_Pointer; ptrtype->from = first_token; ptrtype->to = token; ptrtype->unique_id = 0; ptrtype->kind.pointer.type = type; ptrtype->kind.pointer.flags = pointerflags; type = ptrtype; token++; break; } case LRL_Op_OptionalValue: /* Optional value */ { LRLASTType *optional = malloc(sizeof(LRLASTType)); optional->ast_type = LRL_AST_Type_Optional; optional->from = first_token; optional->to = token; optional->unique_id = 0; optional->kind.pointer.type = type; /* Qualifiers are inherited from the element type */ if (quals != 0) { lrl_err_token(ctx, LRL_Err_QualifierOnOptional, token); } quals = type->quals; type = optional; token++; break; } case LRL_Sym_ArrayIndex: /* Array type */ { LRLASTType *array; /* Qualifiers are inherited from the element type */ if (quals != 0) { lrl_err_token(ctx, LRL_Err_QualifierOnArray, token); } quals = type->quals; token++; array = parse_array_type(ctx, scope, &token, type); if (!array) goto reported_error; type = array; break; } case LRL_Sym_LSquare: /* Type parameter */ { LRLASTType *parametric; /* The preceding type must be an identifier */ if (type->ast_type != LRL_AST_Type_Ident) { lrl_err_token(ctx, LRL_Err_TypeParameterNotOnIdent, token); goto reported_error; } /* Qualifiers are inherited from the unparametrized type */ if (quals != 0) { lrl_err_token(ctx, LRL_Err_QualifierOnTypeParamList, token); } quals = type->quals; parametric = malloc(sizeof(LRLASTType)); parametric->ast_type = LRL_AST_Type_Parametric; parametric->from = first_token; parametric->unique_id = 0; parametric->kind.parametric.type = type; parametric->kind.parametric.params = parse_type_params( ctx, scope, &token); parametric->to = token-1; type = parametric; break; } case LRL_Sym_LParen: /* Function type */ noreturn_kw = 0; function_type: { LRLASTType *functype; LRLIdent *argscope = (scope != &discarding_scope ? lrl_ident_create_priv_scope(scope) : (LRLIdent*)&discarding_scope); /* Qualifiers are not allowed on the input parameter or return parameter lists. */ if (quals != 0 || type->quals != 0) { lrl_err_token(ctx, LRL_Err_QualifierOnFunction, token); quals = 0; type->quals = 0; } functype = malloc(sizeof(LRLASTType)); functype->ast_type = LRL_AST_Type_Function; functype->from = outer_first_token; functype->unique_id = 0; functype->kind.function.flags = noreturn_kw ? LRL_FF_NoReturn : 0; functype->kind.function.ret = type; functype->kind.function.args = parse_struct( ctx, argscope, &token, 1); functype->kind.function.args->quals = LRL_InternQual_NotApplicable; functype->to = token-1; type = functype; break; } case LRL_KW_Enum: /* Enumeration type */ if (quals != 0) { lrl_err_token(ctx, LRL_Err_QualifierOnEnumList, token); } quals = type->quals; type = parse_enum(ctx, scope, type, &token); break; case LRL_KW_Bits: /* Bitfield type */ if (quals != 0) { lrl_err_token(ctx, LRL_Err_QualifierOnBitsList, token); } quals = type->quals; type = parse_bitfield(ctx, scope, type, &token); break; default: /* Reached the end */ if (quals != 0) { lrl_err_token(ctx, LRL_Err_NoTypeAfterQualifier, token); } goto end; } if (!type) goto reported_error; type->quals = quals; } reported_error: /* Free the type and return NULL */ /* TODO */ type = NULL; end: if (type) { type->to = token-1; } *tokens = token; return type; } static int skip_type(LRLCtx *ctx, const LRLToken **tokens) { return parse_type(ctx, (LRLIdent*)&discarding_scope, tokens) != NULL; } typedef struct { unsigned char precedence; unsigned char right_assoc; enum { Binary, Ternary, Postfix, Prefix } type; } OpInfo; #define LRL_CALL_AND_ARRIND_PRECEDENCE 17 static OpInfo get_opinfo(LRLTokenType type, REContext context) { /* operator precedence: highest . -> ^ ? [] () #[] @ enumbase makeopt sizeof etc. + - compl (unary) * / mod + - (binary) << >> bitand bitxor bitor as makeopt == != < <= > >= not and or xor then-else += -= /= *= <<= >>= lowest = */ OpInfo info; switch (type) { /* function calls, type parameters and array indices are handled in rpn_to_ast() */ case LRL_Op_Member: case LRL_Op_FunctionMember: info.precedence = 18; info.right_assoc = 0; info.type = Binary; break; case LRL_Op_Deref: case LRL_Op_OptionalValue: info.precedence = 18; info.right_assoc = 0; info.type = Postfix; break; /* #[] (LRL_Sym_ArrayIndex) is 17 */ case LRL_Op_AddrOf: case LRL_Op_EnumBase: case LRL_Op_SizeOf: case LRL_Op_MinSizeOf: case LRL_Op_OffsetOf: case LRL_Op_AlignOf: /* TODO add a "type x" operator so these operations can be used on types and not only on expressions */ info.precedence = 16; info.right_assoc = 1; info.type = Prefix; break; case LRL_Op_Compl: info.precedence = 15; info.right_assoc = 1; info.type = Prefix; break; case LRL_Op_Times: case LRL_Op_Divide: case LRL_Op_Modulo: info.precedence = 14; info.right_assoc = 0; info.type = Binary; break; case LRL_Op_Plus: case LRL_Op_Minus: if (context == RE_UNARY) { info.precedence = 15; info.right_assoc = 1; info.type = Prefix; } else { info.precedence = 13; info.right_assoc = 0; info.type = Binary; } break; case LRL_Op_ShiftL: case LRL_Op_ShiftR: info.precedence = 12; info.right_assoc = 0; info.type = Binary; break; case LRL_Op_BitAnd: info.precedence = 11; info.right_assoc = 0; info.type = Binary; break; case LRL_Op_BitXor: info.precedence = 10; info.right_assoc = 0; info.type = Binary; break; case LRL_Op_BitOr: info.precedence = 9; info.right_assoc = 0; info.type = Binary; break; case LRL_Op_MakeOpt: info.precedence = 8; info.right_assoc = 1; info.type = Prefix; break; case LRL_KW_As: case LRL_KW_TypeAssert: info.precedence = 7; info.right_assoc = 0; info.type = Postfix; break; case LRL_Op_Equal: case LRL_Op_NotEqual: case LRL_Op_Less: case LRL_Op_LessEqual: case LRL_Op_Greater: case LRL_Op_GreaterEqual: info.precedence = 6; info.right_assoc = 0; info.type = Binary; break; case LRL_Op_LNot: info.precedence = 5; info.right_assoc = 0; info.type = Prefix; break; case LRL_Op_LAnd: info.precedence = 4; info.right_assoc = 0; info.type = Binary; break; case LRL_Op_LOr: case LRL_Op_LXor: info.precedence = 3; info.right_assoc = 0; info.type = Binary; break; case LRL_Op_Then: case LRL_KW_Else: info.precedence = 2; info.right_assoc = 1; info.type = Ternary; break; case LRL_Op_Assign: case LRL_Op_PlusAssign: case LRL_Op_MinusAssign: case LRL_Op_TimesAssign: case LRL_Op_DivideAssign: case LRL_Op_ShiftLAssign: case LRL_Op_ShiftRAssign: info.precedence = 1; info.right_assoc = 1; info.type = Binary; break; case LRL_Sym_ArrayIndex: LRL_case_except_tt_ops default: /* Not parsed using the Shunting-yard algorithm */ info.precedence = 0; info.right_assoc = 0; info.type = Binary; } return info; } /** * Like rpn_to_ast() but handles .member operands. */ static const LRLToken *rpn_member_to_ast(LRLCtx *ctx, const RPNEntry *out_stack, size_t *out_size) { const RPNEntry *entry; const LRLToken *token; if (*out_size == 0) return NULL; /* Pop last token */ entry = &out_stack[--*out_size]; token = entry->token; if (token->type != LRL_TT_Ident) { lrl_err_token(ctx, LRL_Err_MemberIsNotAnIdent, token); return NULL; } return token; } /** * Reads argument and element lists from an RPN stack. * * entry is the start bracket, with the argument count. */ static void rpn_read_exprlist(LRLCtx *ctx, LRLIdent *scope, const RPNEntry *entry, const RPNEntry *out_stack, size_t *out_size, LRLASTExprList *list) { size_t argcount = entry->num_args, i; list->num_args = argcount; list->values = malloc(argcount*sizeof(LRLASTExpr*)); for (i = argcount; i > 0; i--) { list->values[i-1] = rpn_to_ast(ctx, NULL, scope, out_stack, out_size); } } /** * Sets the from/to tokens on an expr */ static void set_expr_range(LRLASTExpr *expr, const LRLASTExpr *fromexpr, const LRLASTExpr *toexpr) { if (fromexpr && fromexpr->from < expr->from) expr->from = fromexpr->from; if (toexpr && toexpr->to > expr->to) expr->to = toexpr->to; } /** * Converts a stack of tokens in Reverse Polish Notation to an AST substree */ static LRLASTExpr *rpn_to_ast(LRLCtx *ctx, const LRLToken *operator, LRLIdent *scope, const RPNEntry *out_stack, size_t *out_size) { LRLASTExpr *expr; const RPNEntry *entry; const LRLToken *token; LRLTokenType toktype; OpInfo op; if (*out_size == 0) { if (operator) { lrl_err_token(ctx, LRL_Err_MissingOperand, operator); } return NULL; } /* Pop last token */ entry = &out_stack[--*out_size]; token = entry->token; toktype = token->type; op = get_opinfo(toktype, entry->context); expr = malloc(sizeof(LRLASTExpr)); /* from/to are used when displaying the expr, e.g. in error messages */ expr->from = token; expr->to = token; memset(&expr->typeref, 0, sizeof(LRLTypeRef)); if (!op.precedence) { /* Values */ switch ((int)toktype) { case LRL_TT_Ident: case LRL_KW_Here: expr->ast_type = LRL_AST_Value_Ident; /* Parse identifier */ defer_ident(ctx, scope, &expr->kind.ident.identref, &token); /* Parse type parameters */ expr->kind.ident.type_params = (token->type == LRL_Sym_LSquare ? parse_type_params(ctx, scope, &token) : NULL); expr->to = token-1; break; case LRL_Sym_NamespaceSep: expr->ast_type = LRL_AST_Value_TypeIdent; expr->kind.typeident.identref.ident = NULL; /* looked up later */ expr->kind.typeident.identref.first_token = token+1; expr->kind.typeident.identref.scope = NULL; /* of type. assigned later */ expr->kind.typeident.identref.next = LRL_IDENTREF_NEW; token++; if (!skip_ident(ctx, &token)) { expr->kind.typeident.identref.ident = LRL_IDENT_MISSING; } /* Parse type parameters */ expr->kind.typeident.type_params = (token->type == LRL_Sym_LSquare ? parse_type_params(ctx, scope, &token) : NULL); expr->to = token-1; break; case LRL_TT_Undefined: expr->ast_type = LRL_AST_Value_Undefined; break; case LRL_TT_None: expr->ast_type = LRL_AST_Value_None; break; case LRL_TT_NaN: expr->ast_type = LRL_AST_Value_NaN; break; case LRL_TT_Inf: expr->ast_type = LRL_AST_Value_Inf; break; case LRL_TT_Integer: case LRL_TT_Real: case LRL_TT_String: expr->ast_type = LRL_AST_Value_Scalar; expr->kind.scalar.token = token; expr->kind.scalar.is_negative = 0; /* it's set from unary op */ break; case LRL_Sym_LSquare: case LRL_Sym_LParen: { /* Function call, array index, array value or struct value */ LRLASTExpr **values; size_t numargs; /* Read arguments */ rpn_read_exprlist(ctx, scope, entry, out_stack, out_size, &expr->kind.call.args); values = expr->kind.call.args.values; numargs = expr->kind.call.args.num_args; /* Include function or array also */ if (entry->context == RE_ARGLIST) expr->from--; if (numargs && values[numargs-1] && values[numargs-1]->to) { /* Include elements up to the end parenthesis */ expr->to = values[numargs-1]->to+1; if (expr->to->type == LRL_Sym_Comma) expr->to++; } else { /* Include end parenthesis */ expr->to++; } if (entry->context == RE_ARGLIST) { if (toktype == LRL_Sym_LParen) { expr->ast_type = LRL_AST_Expr_Call; expr->kind.call.function = rpn_to_ast(ctx, token, scope, out_stack, out_size); } else { /* Array index */ /* Convert the exprlist into multiple exprs */ size_t i; expr->kind.index.array = rpn_to_ast(ctx, token, scope, out_stack, out_size); if (!numargs) { lrl_err_token(ctx, LRL_Err_MissingArrayIndex, token); expr->ast_type = LRL_AST_Expr_ArrayIndex; expr->kind.index.index = NULL; } else { for (i = 0; i < numargs; i++) { if (i != 0) { LRLASTExpr *inner = expr; expr = malloc(sizeof(LRLASTExpr)); expr->from = token; expr->to = token; memset(&expr->typeref, 0, sizeof(LRLTypeRef)); expr->kind.index.array = inner; } expr->ast_type = LRL_AST_Expr_ArrayIndex; expr->kind.index.index = values[i]; } } free(values); } } else { if (toktype == LRL_Sym_LParen) { expr->ast_type = LRL_AST_Value_Struct; } else { expr->ast_type = LRL_AST_Value_Array; } } break; } default: lrl_err_token(ctx, LRL_Err_UnexpectedToken, token); goto error; } } else if (op.type == Binary) { /* Binary operators */ if (toktype == LRL_Op_Member) { expr->ast_type = LRL_AST_Expr_Member; expr->kind.member.token = rpn_member_to_ast(ctx, out_stack, out_size); expr->kind.member.struc = rpn_to_ast(ctx, token, scope, out_stack, out_size); expr->kind.member.ident = NULL; set_expr_range(expr, expr->kind.member.struc, NULL); if (expr->kind.member.token) { expr->to = expr->kind.member.token; } } else if (toktype == LRL_Op_FunctionMember) { expr->ast_type = LRL_AST_Expr_FuncMember; expr->kind.member.token = rpn_member_to_ast(ctx, out_stack, out_size); expr->kind.member.struc = rpn_to_ast(ctx, token, scope, out_stack, out_size); expr->kind.member.ident = NULL; set_expr_range(expr, expr->kind.member.struc, NULL); if (expr->kind.member.token) { expr->to = expr->kind.member.token; } } else { expr->ast_type = LRL_AST_Expr_BinaryOp; expr->kind.binary_op.token_type = toktype; expr->kind.binary_op.operand2 = rpn_to_ast(ctx, token, scope, out_stack, out_size); expr->kind.binary_op.operand1 = rpn_to_ast(ctx, token, scope, out_stack, out_size); set_expr_range(expr, expr->kind.binary_op.operand1, expr->kind.binary_op.operand2); } } else if (op.type == Ternary) { /* "then-else" operator */ expr->ast_type = LRL_AST_Expr_Conditional; expr->kind.conditional.falseexpr = rpn_to_ast(ctx, token, scope, out_stack, out_size); expr->kind.conditional.trueexpr = rpn_to_ast(ctx, token, scope, out_stack, out_size); expr->kind.conditional.condexpr = rpn_to_ast(ctx, token, scope, out_stack, out_size); set_expr_range(expr, expr->kind.conditional.condexpr, expr->kind.conditional.falseexpr); } else { /* Unary operators */ if (toktype == LRL_KW_As) { const LRLToken *typetok = entry->token+1; LRLASTType *type; expr->ast_type = LRL_AST_Expr_As; type = parse_type(ctx, scope, &typetok); if (!type) { /* Recover from error */ type = make_private_type(typetok, typetok); } else if (type->quals & LRL_NonInternal_Quals) { lrl_err_type(ctx, LRL_Err_QualifierNotAllowed, type); } expr->kind.asexpr.type = type; expr->kind.asexpr.expr = rpn_to_ast(ctx, token, scope, out_stack, out_size); set_expr_range(expr, expr->kind.asexpr.expr, NULL); if (expr->kind.asexpr.type) { /* FIXME never true. and expr->to should = expr->from (or the given token? does that work in rpn_to_ast?) on parse error */ expr->to = expr->kind.asexpr.type->to; } } else if (toktype == LRL_KW_TypeAssert) { const LRLToken *typetok = entry->token+1; LRLASTType *type; expr->ast_type = LRL_AST_Expr_TypeAssert; type = parse_type(ctx, scope, &typetok); if (!type) { /* This is an error, but it will be handled by the verifier */ } else if (type->quals & LRL_NonInternal_Quals) { lrl_err_type(ctx, LRL_Err_QualifierNotAllowed, type); } expr->kind.typeassert.type = type; expr->kind.typeassert.expr = rpn_to_ast(ctx, token, scope, out_stack, out_size); set_expr_range(expr, expr->kind.typeassert.expr, NULL); if (expr->kind.typeassert.type) { expr->to = expr->kind.typeassert.type->to; } } else { expr->ast_type = LRL_AST_Expr_UnaryOp; /* prefix or postfix */ expr->kind.unary_op.token_type = toktype; expr->kind.unary_op.operand = rpn_to_ast(ctx, token, scope, out_stack, out_size); /* Special handling of negative literals, e.g. -128 */ if (toktype == LRL_Op_Minus && expr->kind.unary_op.operand && expr->kind.unary_op.operand->ast_type == LRL_AST_Value_Scalar) { expr->kind.unary_op.operand->kind.scalar.is_negative = 1; } /* Extend the range of tokens, either to the left or to the right */ set_expr_range(expr, expr->kind.unary_op.operand, expr->kind.unary_op.operand); } } return expr; error: free(expr); return NULL; } /** * Modified version of the "Shunting-yard algorithm" by Edsger Dijkstra. * https://en.wikipedia.org/wiki/Shunting-yard_algorithm */ static LRLASTExpr *parse_expr(LRLCtx *ctx, LRLIdent *scope, const LRLToken **tokens) { const LRLToken *token = *tokens; LRLASTExpr *expr; RPNEntry entry; int operator_expected = 0; /* Operator stack */ size_t op_size, op_capacity; RPNEntry *op_stack; /* Output stack */ size_t out_size, out_capacity; RPNEntry *out_stack; init_list(&op_stack, &op_size, &op_capacity, 16); init_list(&out_stack, &out_size, &out_capacity, 16); while (token->type) { int found; LRLTokenType type = token->type; OpInfo op; switch ((int)type) { case LRL_TT_Ident: case LRL_KW_Here: case LRL_Sym_NamespaceSep: /* :typeident */ case LRL_TT_Integer: case LRL_TT_Real: case LRL_TT_String: case LRL_TT_Undefined: case LRL_TT_None: case LRL_TT_NaN: case LRL_TT_Inf: if (operator_expected) { lrl_err_token(ctx, LRL_Err_OperatorExpected, token); break; } /* Terminal tokens are pushed directly to the output stack */ entry.token = token; entry.context = RE_TERMINAL; list_push(&out_stack, &out_size, &out_capacity, entry); operator_expected = 1; if (type == LRL_Sym_NamespaceSep) { token++; if (token->type != LRL_TT_Ident) { lrl_err_token(ctx, LRL_Err_UnexpectedToken, token); expr = NULL; goto cleanup; } } if (type == LRL_TT_Ident || type == LRL_KW_Here || type == LRL_Sym_NamespaceSep) { /* Skip any following subidentifiers */ while (token[1].type == LRL_Sym_NamespaceSep) { token++; if (token[1].type != LRL_TT_Ident) { lrl_err_token(ctx, LRL_Err_UnexpectedToken, &token[1]); break; } token++; } if (token[1].type == LRL_Sym_LSquare) { /* Skip type parameters (re-parsed later) */ token++; parse_type_params(ctx, scope, &token); goto no_add; } } break; /* Argument and element list (exprlist) parsing. exprlists are stored like this on the out stack: identifier value1 value2 value3 ( TERM TERM TERM TERM CALL -- -- -- -- 3 */ case LRL_Sym_ArrayIndex: if (!operator_expected) { lrl_err_token(ctx, LRL_Err_UnexpectedToken, token); break; } if (token[1].type != LRL_Sym_LSquare) { lrl_err_token(ctx, LRL_Err_UnexpectedToken, &token[1]); break; } token++; /* fall through */ case LRL_Sym_LParen: case LRL_Sym_LSquare: { LRLTokenType next_type = token[1].type; if (operator_expected && type == LRL_Sym_LSquare) { /* Type parameters are allowed on identifiers only */ lrl_err_token(ctx, LRL_Err_UnexpectedToken, token); break; } /* Pop operators with higher precedence, e.g. member operator */ while (op_size > 0) { const RPNEntry *st_entry = &op_stack[op_size-1]; LRLTokenType st_type = st_entry->token->type; OpInfo st = get_opinfo(st_type, st_entry->context); if (!st.precedence) break; if ((!st.right_assoc && LRL_CALL_AND_ARRIND_PRECEDENCE <= st.precedence) || (st.right_assoc && LRL_CALL_AND_ARRIND_PRECEDENCE < st.precedence)) { /* Move to the output stack */ list_push(&out_stack, &out_size, &out_capacity, *st_entry); op_size--; continue; } break; } /* Check if the argument list is empty */ entry.num_args = (lrl_is_paren(next_type) && !lrl_is_start_paren(next_type) ? 0 : 1); entry.context = (operator_expected ? RE_ARGLIST : /* Function call or array index */ (type == LRL_Sym_LSquare ? RE_ELEMLIST : /* Array literal */ RE_GROUPING)); /* Grouping or struct literal (until a comma is parsed) */ entry.token = token; list_push(&op_stack, &op_size, &op_capacity, entry); operator_expected = 0; break; } case LRL_Sym_Comma: case LRL_Sym_RParen: case LRL_Sym_RSquare: { LRLTokenType next_type = token[1].type; int popped_ops = 0; /* Pop everything off the stack until a ( or [ is found */ found = 0; while (op_size > 0) { const RPNEntry *st_entry = &op_stack[op_size-1]; LRLTokenType st_type = st_entry->token->type; if (lrl_is_paren(st_type) && lrl_is_start_paren(st_type)) { found = 1; break; } list_push(&out_stack, &out_size, &out_capacity, *st_entry); op_size--; popped_ops = 1; } /* This check is equivalent to checking if this is neither an empty list nor a completed operation. */ if (!operator_expected && popped_ops) { /* Missing right operand */ lrl_err_token(ctx, LRL_Err_MissingOperand, token); } /* Check for end of expression */ if (!found) goto finished; operator_expected = (type != LRL_Sym_Comma); if (type != LRL_Sym_Comma) { /* Pop the left parenthesis of the stack */ op_size--; /* Check that this wasn't just a grouping parenthesis */ if (op_stack[op_size].context != RE_GROUPING || op_stack[op_size].token->type != LRL_Sym_LParen || op_stack[op_size].num_args != 1) { /* Push the start parenthesis to the out stack */ list_push(&out_stack, &out_size, &out_capacity, op_stack[op_size]); } } else { /* Comma */ LRLTokenType prev_type = token[-1].type; if (prev_type == LRL_Sym_Comma || (lrl_is_paren(prev_type) && lrl_is_start_paren(prev_type))) { lrl_err_token(ctx, LRL_Err_EmptyExprInList, token); break; } if (op_stack[op_size-1].context == RE_GROUPING) { op_stack[op_size-1].context = RE_ELEMLIST; } if (!lrl_is_paren(next_type) || lrl_is_start_paren(next_type)) { /* Comma, but not a trailing one */ op_stack[op_size-1].num_args++; } else if (op_stack[op_size-1].context == RE_ARGLIST) { /* Trailing commas are not allowed in arglists or array index expressions */ lrl_err_token(ctx, LRL_Err_TrailingCommaInArgList, token); break; } } break; } case LRL_KW_Else: /* Last part of conditional "then-else" operator */ if (!operator_expected) { lrl_err_token(ctx, LRL_Err_UnexpectedToken, token); break; } operator_expected = 0; /* Pop everything off the stack up to and including "then" */ found = 0; while (op_size > 0) { const RPNEntry *st_entry = &op_stack[op_size-1]; LRLTokenType st_type = st_entry->token->type; op_size--; if (st_type == LRL_Op_Then) { /* deleted from stack */ found = 1; break; } list_push(&out_stack, &out_size, &out_capacity, *st_entry); } /* Check for end of expression */ if (!found) goto finished; /* Push ternary "else" operator to the operator stack */ entry.token = token; entry.context = RE_OP; list_push(&op_stack, &op_size, &op_capacity, entry); break; case LRL_Sym_Semicolon: case LRL_Sym_LCurly: case LRL_KW_Bits: case LRL_KW_Break: case LRL_KW_Continue: case LRL_KW_Goto: case LRL_KW_SkipTo: case LRL_KW_RepeatFrom: case LRL_KW_Return: case LRL_KW_Case: case LRL_KW_Unreachable: case LRL_KW_With: goto finished; default: op = get_opinfo(type, operator_expected ? RE_OP : RE_UNARY); if (!op.precedence) { /* Error */ lrl_err_token(ctx, LRL_Err_UnexpectedToken, token); break; } /* Prefix operator? */ if (op.type == Prefix) { entry.token = token; entry.context = RE_UNARY; list_push(&op_stack, &op_size, &op_capacity, entry); operator_expected = 0; break; } if (!operator_expected) { lrl_err_token(ctx, LRL_Err_UnexpectedToken, token); break; } /* Binary or postfix operator */ while (op_size > 0) { const RPNEntry *st_entry = &op_stack[op_size-1]; LRLTokenType st_type = st_entry->token->type; OpInfo st = get_opinfo(st_type, st_entry->context); if (!st.precedence) break; if ((!st.right_assoc && op.precedence <= st.precedence) || (st.right_assoc && op.precedence < st.precedence)) { /* Move to the output stack */ list_push(&out_stack, &out_size, &out_capacity, *st_entry); op_size--; continue; } break; } if (op.type == Postfix) { entry.token = token; entry.context = RE_UNARY; list_push(&out_stack, &out_size, &out_capacity, entry); operator_expected = 1; if (type == LRL_KW_As || type == LRL_KW_TypeAssert) { /* Skip through the following type */ token++; skip_type(ctx, &token); continue; /* we are at the next token now */ } break; } /* Push to the operator stack */ entry.token = token; entry.context = RE_OP; list_push(&op_stack, &op_size, &op_capacity, entry); operator_expected = 0; } token++; no_add: ; } finished: /* Move remaining tokens on the operator stack to the output stack */ while (op_size > 0) { const RPNEntry *st_entry = &op_stack[op_size-1]; LRLTokenType st_type = st_entry->token->type; if (lrl_is_paren(st_type)) { /* Error - Mismatched parenthesis */ lrl_err_token(ctx, LRL_Err_UnexpectedToken, st_entry->token); break; } list_push(&out_stack, &out_size, &out_capacity, *st_entry); op_size--; } if (out_size == 0 || !operator_expected) { lrl_err_token(ctx, LRL_Err_IncompleteExpression, token); expr = NULL; goto cleanup; } /* Read from stack in reverse order, and build the ASTExpr */ expr = rpn_to_ast(ctx, NULL, scope, out_stack, &out_size); if (out_size != 0) { lrl_err_token(ctx, LRL_Err_IncompleteExpression, out_stack[out_size-1].token); } cleanup: free(out_stack); free(op_stack); *tokens = token; return expr; } /** * Looks ahead at the token sequence, and returns: * -1 on error * 0 if it should be parsed as a normal expression * 1 if it should be parsed as a variable declaration */ static int is_declaration(LRLCtx *ctx, const LRLToken *tokens, const LRLToken *start) { /* An expression cannot contain two operands followed by each other. Declarations always consist of a type (an operand) and an identifier (also and operand). By determining if the first operand could be a type, and whether the second operand is an identifier, we can check if a statement is a declaration. */ /* Skip type qualifiers */ int expected_decl = parse_qualifiers(ctx, &tokens); /* The "leaf" type comes first and is either an identifier, a struct or an enumeration. */ if (is_ident(tokens)) { /* Possibly identifier type */ skip_ident(ctx, &tokens); } else if (tokens->type == LRL_Sym_LParen) { /* Possibly struct type */ if (!skip_parens(ctx, &tokens)) return -1; } else if (tokens->type == LRL_KW_Union || tokens->type == LRL_KW_Enum || tokens->type == LRL_KW_Bits) { /* Union/Enum type */ return 1; } else if (tokens->type == LRL_KW_Private || tokens->type == LRL_KW_Any) { return 1; } else if (tokens->type == LRL_KW_NoReturn) { /* Function type */ return 1; } else { /* Doesn't start with a type */ goto is_expression; } /* Go through pointer/optional specifiers, function parameters, etc. */ for (;;) { LRLTokenType ttype; if (!tokens->type) break; /* Only types can have qualifiers */ if (parse_qualifiers(ctx, &tokens)) { expected_decl = 1; } ttype = tokens->type; if (!tokens->type) break; /* Only types can be used as a base for enumerations/bitfields */ if (ttype == LRL_KW_Enum || ttype == LRL_KW_Bits) return 1; /* Check if we found the name of the variable */ if (is_ident(tokens)) return 1; /* Skip "#" in array index */ if (ttype == LRL_Sym_ArrayIndex && tokens[1].type == LRL_Sym_LSquare) { tokens++; ttype = tokens->type; } if (ttype == LRL_Sym_LParen || ttype == LRL_Sym_LSquare) { /* Skip brackets (function, type parameters or array) */ if (!skip_parens(ctx, &tokens)) return -1; } else if (ttype == LRL_Op_Deref || ttype == LRL_Op_OptionalValue || (ttype >= LRL_Sym_FlexiPointer && ttype <= LRL_Sym_RawFlexiPointer)) { /* Skip pointer or optional type */ tokens++; } else { /* Not a type */ break; } } is_expression: if (expected_decl) { /* Error */ lrl_err_set_token_range(ctx, start, tokens, 0); lrl_err_finish(ctx, LRL_Err_InvalidDeclaration); return -1; } return 0; } static void check_no_semicolon_before_body(LRLCtx *ctx, const LRLToken **tokens) { if ((*tokens)->type == LRL_Sym_Semicolon) { /* Should not have a semicolon before the if body! */ lrl_err_token(ctx, LRL_Err_SemicolonBeforeBody, *tokens); ++*tokens; } } static LRLDefFlags parse_typedef_flags(LRLCtx *ctx, const LRLToken **tokens) { LRLDefFlags flags = 0; const LRLToken *token = *tokens; for (;;) { LRLDefFlags addflag; if (token->type == LRL_KW_Incomplete) { addflag = LRL_DeFl_Incomplete; } else if (token->type == LRL_KW_Alias) { addflag = LRL_DeFl_Alias; } else { break; } if (flags & addflag) { lrl_err_token(ctx, LRL_Err_RepeatedKeyword, token); } flags |= addflag; token++; } *tokens = token; return flags; } static int parse_typedef(LRLCtx *ctx, LRLIdent *scope, const LRLToken **tokens, LRLASTDefOrStmt *defstmt) { LRLASTDefType *deftype = &defstmt->def.kind.type; LRLIdent *ident, *anon; if (!defstmt) fail("parsetypedef_nostmt"); ++*tokens; deftype->flags = parse_typedef_flags(ctx, tokens); /* Read identifier */ ident = parse_ident(ctx, scope, tokens, LRL_Ident_Create, NULL); if (!ident) return 0; ident->flags |= LRL_IdFl_Typedef | (scope->flags & LRL_IdFl_Statement); deftype->ident = ident; set_def_node(ident, defstmt); /* Create anonymous namespace */ anon = lrl_ident_create_priv_scope(ident); anon->flags |= LRL_IdFl_TypedefAnon; /* Read type parameter definitions (if any) */ deftype->typenames = ((*tokens)->type == LRL_Sym_LSquare ? parse_type_names(ctx, ident, tokens) : NULL); if (!expected(ctx, tokens, LRL_Op_Assign)) return 0; /* Read definition */ deftype->type = parse_type(ctx, anon, tokens); if (!deftype->type) return 0; if (deftype->type->quals & LRL_Qual_Var) { lrl_err_type(ctx, LRL_Err_TypedefVarIsDefault, deftype->type); } expected(ctx, tokens, LRL_Sym_Semicolon); return 1; } typedef enum { STF_NoSemicolonEnd = 0x1 } StmtFlags; static LRLASTStmt *parse_statement(LRLCtx *ctx, const LRLIdent *outer_scope, LRLASTStmt *last_loop, StmtFlags flags, const LRLToken **tokens) { const LRLToken *token = *tokens; const LRLToken *start = token; LRLTokenType toktype = token->type; LRLIdent *scope; LRLASTStmt *stmt = malloc(sizeof(LRLASTStmt)); stmt->token = token; /* Prune empty statement scopes (except "{...}") in the identifier tree */ if (!outer_scope->contents.size && (outer_scope->flags & LRL_IdFl_Statement) != 0) { if (outer_scope->scope) outer_scope = outer_scope->scope; } /* Statement scopes are nested */ memset(&stmt->scope, 0, sizeof(stmt->scope)); stmt->scope.flags = LRL_IdFl_Statement; /* except in {} */ stmt->scope.scope = outer_scope; scope = &stmt->scope; switch ((int)toktype) { case LRL_Sym_LCurly: { /* Multiple statements grouped by { } */ LRLASTStmtList *list_first = NULL, *list_last = NULL; stmt->scope.flags = 0; token++; while (token->type && token->type != LRL_Sym_RCurly) { LRLASTStmt *inner = parse_statement(ctx, scope, last_loop, 0, &token); if (inner) { /* Add to list */ LRLASTStmtList *entry; linked_append(&entry, &list_first, &list_last); entry->statement = inner; /* Make the scope of the last statement accessible to subsequent statements */ scope = &inner->scope; if (inner->scope.scope == NULL) fail("parsestmt_nullscope"); } } stmt->ast_type = LRL_AST_Stmt_Compound; stmt->kind.compound = list_first; if (!token->type) { lrl_err_token(ctx, LRL_Err_UnclosedParenthesis, *tokens); } else { token++; } break; } case LRL_Sym_Semicolon: /* Error - Empty statements should be written as { }, not ; */ lrl_err_token(ctx, LRL_Err_SemicolonWithoutStatement, token); stmt = NULL; token++; break; case LRL_KW_If: stmt->ast_type = LRL_AST_Stmt_If; token++; /* Parse boolean expression */ stmt->kind.ifstm.boolexpr = parse_expr(ctx, scope, &token); check_no_semicolon_before_body(ctx, &token); /* Parse if bodies */ stmt->kind.ifstm.body_true = parse_statement(ctx, scope, last_loop, 0, &token); stmt->kind.ifstm.body_false = NULL; if (token->type == LRL_KW_Else) { token++; check_no_semicolon_before_body(ctx, &token); stmt->kind.ifstm.body_false = parse_statement(ctx, scope, last_loop, 0, &token); } break; case LRL_KW_Switch: stmt->ast_type = LRL_AST_Stmt_Switch; token++; /* Parse switch expression */ stmt->kind.switchstm.switchexpr = parse_expr(ctx, scope, &token); /* Parse cases */ stmt->kind.switchstm.num_cases = 0; stmt->kind.switchstm.cases = NULL; stmt->kind.switchstm.defaultstm = NULL; if (!expected(ctx, &token, LRL_Sym_LCurly)) break; while (token->type && token->type != LRL_Sym_RCurly && token->type != LRL_KW_Default) { LRLASTCase *casenode; stmt->kind.switchstm.cases = try_realloc( stmt->kind.switchstm.cases, sizeof(LRLASTCase)*++stmt->kind.switchstm.num_cases); casenode = &stmt->kind.switchstm.cases[stmt->kind.switchstm.num_cases-1]; casenode->num_matchvalues = 0; casenode->matchvalues = NULL; casenode->stmt = NULL; /* Parse comparison values */ if (token->type != LRL_KW_Case) { lrl_err_token(ctx, LRL_Err_UnexpectedToken, token); goto recover_case; } while (token->type == LRL_KW_Case) { LRLASTCaseMatch *casematch; token++; recover_case: if (!token || token->type == LRL_Sym_RCurly) { break; /* Error */ } casenode->matchvalues = try_realloc(casenode->matchvalues, sizeof(LRLASTCaseMatch)*++casenode->num_matchvalues); casematch = &casenode->matchvalues[casenode->num_matchvalues-1]; casematch->expr = parse_expr(ctx, scope, &token); if (token->type == LRL_KW_With) { token++; casematch->withstm = parse_statement(ctx, scope, last_loop, STF_NoSemicolonEnd, &token); } else { casematch->withstm = NULL; } } /* Parse code block */ check_no_semicolon_before_body(ctx, &token); if (token->type == LRL_Sym_RCurly) { lrl_err_token(ctx, LRL_Err_UnexpectedToken, token); break; } casenode->stmt = parse_statement(ctx, scope, last_loop, 0, &token); } /* Parse default case */ if (token->type == LRL_KW_Default) { token++; check_no_semicolon_before_body(ctx, &token); stmt->kind.switchstm.defaultstm = parse_statement(ctx, scope, last_loop, 0, &token); } expected(ctx, &token, LRL_Sym_RCurly); break; case LRL_KW_While: stmt->ast_type = LRL_AST_Stmt_While; token++; /* Parse boolean expression */ stmt->kind.whilestm.boolexpr = parse_expr(ctx, scope, &token); check_no_semicolon_before_body(ctx, &token); /* Parse while body */ stmt->kind.whilestm.body = parse_statement(ctx, scope, stmt, 0, &token); break; case LRL_KW_Do: stmt->ast_type = LRL_AST_Stmt_DoWhile; token++; /* Parse do/while body */ if (token->type != LRL_Sym_LCurly) { lrl_err_token(ctx, LRL_Err_DoWhileWithoutBlock, token); } stmt->kind.whilestm.body = parse_statement(ctx, scope, stmt, 0, &token); /* Parse boolean expression */ if (expected(ctx, &token, LRL_KW_While)) { stmt->kind.whilestm.boolexpr = parse_expr(ctx, scope, &token); goto expect_semicolon; } else { stmt->kind.whilestm.boolexpr = NULL; } break; case LRL_KW_For: { LRLIdent *ident, *scope_endempty; LRLASTType *valuetype; stmt->ast_type = LRL_AST_Stmt_For; token++; /* Create a separate scope for the main loop body (which the end/empty bodies can't access) */ scope_endempty = lrl_ident_create_priv_scope(scope); /* Create a private scope for the whole loop, to block access to the loop variable from statements after the loop */ scope = lrl_ident_create_priv_scope(scope); /* Parse value variable */ /* TODO allow nested loops to be written like this? for int x y in [1,2,3], [1,2,3] { ... } for int x, double f in [1,2,3], [1, 1.1, 1.2] { ... } */ valuetype = parse_type(ctx, scope, &token); stmt->kind.forstm.valuedef.ast_type = LRL_AST_Stmt_Decl; stmt->kind.forstm.valuedef.kind.data.flags = 0; stmt->kind.forstm.valuedef.kind.data.type = valuetype; if (valuetype) { if (valuetype->quals & LRL_Qual_Const) { lrl_err_type(ctx, LRL_Err_DataConstIsDefault, valuetype); } else if (valuetype->quals & LRL_NonInternal_Quals) { lrl_err_type(ctx, LRL_Err_QualifierNotAllowed, valuetype); } } ident = parse_ident(ctx, scope, &token, LRL_Ident_Create, NULL); set_def_node(ident, (LRLASTDefOrStmt*)&stmt->kind.forstm.valuedef); stmt->kind.forstm.valuedef.kind.data.ident = ident; stmt->kind.forstm.valuedef.kind.data.value = make_undef_expr(token); /* "in" */ expected(ctx, &token, LRL_KW_In); /* Parse iterable expression */ stmt->kind.forstm.iterexpr = parse_expr(ctx, scope, &token); check_no_semicolon_before_body(ctx, &token); stmt->kind.forstm.temp = NULL; /* used by the verifier */ /* Parse for body */ stmt->kind.forstm.body = parse_statement(ctx, scope, stmt, 0, &token); /* "loopend" body is executed if no "break" was executed */ if (token->type == LRL_KW_LoopEnd) { token++; check_no_semicolon_before_body(ctx, &token); stmt->kind.forstm.body_end = parse_statement(ctx, scope_endempty, last_loop, 0, &token); } else { stmt->kind.forstm.body_end = NULL; } /* "loopempty" body is executed if the loop was empty */ if (token->type == LRL_KW_LoopEmpty) { token++; check_no_semicolon_before_body(ctx, &token); stmt->kind.forstm.body_empty = parse_statement(ctx, scope_endempty, last_loop, 0, &token); } else { stmt->kind.forstm.body_empty = NULL; } break; } case LRL_KW_Goto: stmt->ast_type = LRL_AST_Stmt_Goto; goto parse_goto; case LRL_KW_SkipTo: stmt->ast_type = LRL_AST_Stmt_SkipTo; goto parse_goto; case LRL_KW_RepeatFrom: stmt->ast_type = LRL_AST_Stmt_RepeatFrom; parse_goto: { LRLIdent *funcscope; token++; /* Parse identifier */ if (token->type != LRL_TT_Ident) { lrl_err_token(ctx, LRL_Err_UnexpectedToken, token); goto recover; } funcscope = get_function_scope(scope); defer_ident(ctx, funcscope, &stmt->kind.gotostm.identref, &token); goto expect_semicolon; } case LRL_KW_Label: { LRLIdent *ident, *funcscope, *dup; char *name; stmt->ast_type = LRL_AST_Stmt_Label; token++; /* Read name token */ if (token->type != LRL_TT_Ident) { lrl_err_token(ctx, LRL_Err_UnexpectedToken, token); goto recover; } name = lrl_strndup(token->loc.start, token->loc.length); if (!name) goto recover; /* Check for duplicate */ funcscope = get_function_scope(scope); dup = lrl_ident_get_string(funcscope, name); if (dup) { lrl_err_set_token(ctx, token, 0); if (dup->def_token) { lrl_err_set_token(ctx, dup->def_token, 1); } lrl_err_finish(ctx, LRL_Err_DuplicateIdentifier); } /* Insert label into scope */ ident = lrl_ident_insert_string(ctx, funcscope, name); set_def_node(ident, (LRLASTDefOrStmt*)stmt); stmt->kind.labelstm.ident = ident; token++; /* Expect a colon */ /* TODO should require a non-identifier character after the colon, to prevent confusing code e.g. "label a:b;" perhaps we can just check that there's at least one character between the : and the first character in the next token if it's an identifier token? */ expected(ctx, &token, LRL_Sym_NamespaceSep); break; } case LRL_KW_Break: stmt->ast_type = LRL_AST_Stmt_Break; goto breakcont_stmt; case LRL_KW_Continue: stmt->ast_type = LRL_AST_Stmt_Continue; breakcont_stmt: stmt->kind.breakstm.outer_stmt = last_loop; if (!last_loop) { lrl_err_token(ctx, LRL_Err_BreakContinueOutsideLoop, token); } token++; goto expect_semicolon; case LRL_KW_Return: stmt->ast_type = LRL_AST_Stmt_Return; token++; stmt->kind.retstm.retexpr = (token->type != LRL_Sym_Semicolon ? parse_expr(ctx, scope, &token) : NULL); goto expect_semicolon; case LRL_KW_Unreachable: stmt->ast_type = LRL_AST_Stmt_Unreachable; token++; goto expect_semicolon; case LRL_KW_TypeAssert: { LRLIdent *targetscope = calloc(1, sizeof(LRLIdent)); int has_do_body = 0; stmt->ast_type = LRL_AST_Stmt_TypeAssert; stmt->kind.typeassertstm.asserts = NULL; stmt->kind.typeassertstm.body_do = NULL; stmt->kind.typeassertstm.body_else = NULL; targetscope->flags = LRL_IdFl_Statement; /* except in {} */ targetscope->scope = scope; do { LRLTypeAssert *ta = calloc(1, sizeof(LRLTypeAssert)); const LRLToken *identstart; LRLIdent *ident; ta->next = stmt->kind.typeassertstm.asserts; ta->def.ast_type = LRL_AST_Def_Data; identstart = ++token; /* Add identref in parent scope */ ta->refexpr.ast_type = LRL_AST_Value_Ident; ta->refexpr.from = token; defer_ident(ctx, (LRLIdent*)outer_scope, &ta->refexpr.kind.ident.identref, &token); ta->refexpr.to = token-1; /* Create identifier in this scope */ token = identstart; ident = parse_ident(ctx, targetscope, &token, LRL_Ident_Create, NULL); set_def_node(ident, (LRLASTDefOrStmt*)&ta->def); ta->def.kind.data.ident = ident; ta->def.kind.data.flags = LRL_DeFl_Internal_TypeAssert; ta->def.kind.data.value = &ta->refexpr; if (token->type != LRL_KW_Is && token->type != LRL_KW_In) { lrl_err_token(ctx, LRL_Err_TypeAssertIsOrInExpected, token); continue; } if (token->type == LRL_KW_Is) { LRLASTType *type; token++; /* Parse type to cast to */ type = parse_type(ctx, scope, &token); if (type && type->quals & LRL_NonInternal_Quals) { lrl_err_type(ctx, LRL_Err_QualifierNotAllowed, type); } ta->def.kind.data.type = type; /* TODO should be able to cast e.g. (int,int) into (byte,byte) */ } /* TODO change this into a generic condition? e.g. "typeassert x is byte cond (value >= 0 and value <= 15) and y is ..." */ if (token->type == LRL_KW_In) { token++; /* Parse range */ /* TODO */ fail("parsestmt_typeassert_ranges_notimpl"); } if (!ta->def.kind.data.type) continue; stmt->kind.typeassertstm.asserts = ta; } while (token->type == LRL_Op_LAnd); /* Read do and/or else blocks (if any) */ if (token->type != LRL_KW_Do && token->type != LRL_KW_Else) { if ((flags & STF_NoSemicolonEnd) == 0) { expect_end_of_statement(ctx, &token); } } else { if (token->type == LRL_KW_Do) { token++; check_no_semicolon_before_body(ctx, &token); stmt->kind.typeassertstm.body_do = parse_statement( ctx, targetscope, last_loop, 0, &token); has_do_body = 1; } if (token->type == LRL_KW_Else) { token++; check_no_semicolon_before_body(ctx, &token); stmt->kind.typeassertstm.body_else = parse_statement( ctx, scope->scope, last_loop, 0, &token); } } if (!has_do_body) { size_t bi; /* Make target identifiers visible to statements after the typeassert stmt by moving them to the statement scope */ for (bi = 0; bi < targetscope->contents.num_buckets; bi++) { LRLIdent *subident = targetscope->contents.buckets[bi]; for (; subident; subident = subident->next) { subident->scope = &stmt->scope; } } targetscope->scope = outer_scope; stmt->scope = *targetscope; free(targetscope); } break; } case LRL_KW_Assert: stmt->ast_type = LRL_AST_Stmt_Assert; token++; stmt->kind.assertstm.exprstart = token->loc.start; stmt->kind.assertstm.boolexpr = parse_expr(ctx, scope, &token); stmt->kind.assertstm.exprlength = token->loc.start - stmt->kind.assertstm.exprstart; goto expect_semicolon; case LRL_Sym_RParen: case LRL_Sym_RSquare: case LRL_Sym_RCurly: lrl_err_token(ctx, LRL_Err_UnexpectedToken, token); stmt = NULL; if (token->type != LRL_Sym_RCurly) token++; break; case LRL_KW_Typedef: { /* Type definition */ stmt->ast_type = LRL_AST_Stmt_DefType; stmt->kind.deftype.ident = NULL; stmt->kind.deftype.flags = 0; stmt->kind.deftype.type = NULL; stmt->kind.deftype.typenames = NULL; if (!parse_typedef(ctx, scope, &token, (LRLASTDefOrStmt*)stmt)) goto recover; break; } default: { /* An expression or variable declaration */ int is_decl = is_declaration(ctx, token, start); if (is_decl > 0) { /* This is a variable declaration */ /* Type */ LRLIdent *ident; stmt->ast_type = LRL_AST_Stmt_Decl; stmt->kind.data.flags = 0; stmt->kind.data.type = parse_type(ctx, scope, &token); if (!stmt->kind.data.type) goto recover; if (stmt->kind.data.type->quals & LRL_Qual_Const) { lrl_err_type(ctx, LRL_Err_DataConstIsDefault, stmt->kind.data.type); } else if ((stmt->kind.data.type->quals & (LRL_Qual_Var|LRL_Qual_Shared)) == LRL_Qual_Shared) { lrl_err_type(ctx, LRL_Err_LocalSharedWithoutVar, stmt->kind.data.type); } /* Identifier */ if (is_valid_ident(ctx, token)) { lrl_ident_check_duplicates(ctx, scope, token); ident = parse_ident(ctx, scope, &token, LRL_Ident_Create, NULL); set_def_node(ident, (LRLASTDefOrStmt*)stmt); stmt->kind.data.ident = ident; } else { /* Error recovery */ skip_ident(ctx, &token); stmt->kind.data.ident = NULL; ident = NULL; if (token->type != LRL_Op_Assign) { stmt->kind.data.value = make_undef_expr(token); skip_statement_skip_paren(ctx, &token); break; } } if (token->type == LRL_Sym_LSquare) { lrl_err_token(ctx, LRL_Err_DataWithTypeParameters, token); stmt->kind.data.value = make_undef_expr(token); skip_statement_skip_paren(ctx, &token); break; } if (token->type == LRL_Op_Assign) { /* Parse initial value */ token++; stmt->kind.data.value = parse_expr(ctx, ident, &token); } else { /* Use default (undefined) */ stmt->kind.data.value = make_undef_expr(token); } goto expect_semicolon; } else if (!is_decl) { /* This is an expression */ stmt->ast_type = LRL_AST_Stmt_Expr; stmt->kind.expr = parse_expr(ctx, scope, &token); goto expect_semicolon; } else { /* Error (already reported by is_declaration()) */ goto recover; } break; } } end: *tokens = token; return stmt; expect_semicolon: if ((flags & STF_NoSemicolonEnd) == 0) { expect_end_of_statement(ctx, &token); } goto end; recover: skip_statement_skip_paren(ctx, &token); *tokens = token; return NULL; } static void try_add_linkname(LRLCtx *ctx, LRLIdent *ident, const LRLToken *linkname) { if (!linkname) return; if (ident->linkname && !lrl_strings_equal(&ident->linkname->loc, &linkname->loc)) { lrl_err_set_token(ctx, linkname, 0); lrl_err_set_token(ctx, ident->linkname, 1); lrl_err_finish(ctx, LRL_Err_MultipleDifferentLinknames); return; } ident->linkname = linkname; } /** * Parses the top level definitions in a file: Functions, types, * uses statements, static variables, etc. */ static void parse_deflist(LRLCtx *ctx, LRLIdent *scope, const LRLToken **tokens, LRLASTDefList **parsed) { LRLASTDefList *list_first = NULL, *list_last = NULL; const LRLToken *token = *tokens; while (token->type && token->type != LRL_Sym_RCurly) { LRLIdent *ident; LRLASTType *type; LRLASTDefList *def; LRLDefFlags defflags = 0; LRLASTDefList *typenames = NULL; const LRLToken *first_token = token; const LRLToken *linkname = NULL; const LRLToken *type_start; int noreturn_kw; if (token->type == LRL_KW_Deprecated) { defflags |= LRL_DeFl_Deprecated; token++; } /* Parse linkname qualifier (if any) */ if (token->type == LRL_KW_Local) { defflags |= LRL_DeFl_Local; token++; } else if (token->type == LRL_KW_DeclOnly) { /* TODO should be the default in headers should be required if there's no function body/variable value */ defflags |= LRL_DeFl_DeclOnly; token++; } else if (token->type == LRL_KW_Export) { defflags |= LRL_DeFl_Export; token++; } else if (token->type == LRL_KW_Import) { defflags |= LRL_DeFl_Import; token++; } if (token->type == LRL_KW_Linkname) { token++; if (token->type == LRL_TT_String) { linkname = token; token++; } else { lrl_err_token(ctx, LRL_Err_ExpectedLinknameString, token); } /* If there's nothing after it, add the linkname to the current namespace */ if (token->type == LRL_Sym_Semicolon || token->type == LRL_TT_EOF) { try_add_linkname(ctx, scope, linkname); expected(ctx, &token, LRL_Sym_Semicolon); continue; } } /* Type definition? */ if (token->type == LRL_KW_Typedef) { /* * Syntax: * * typedef = "typedef", [ flags ], identifier, "=", * type, ";" ; */ LRLASTDefList *saved = list_last; if (defflags & LRL_LinkageFlags) { lrl_err_token(ctx, LRL_Err_LinkageFlagsNotAllowedHere, token); defflags = 0; } linked_append(&def, &list_first, &list_last); def->def.ast_type = LRL_AST_Def_Type; def->def.kind.type.ident = NULL; def->def.kind.type.flags = 0; def->def.kind.type.type = NULL; def->def.kind.type.typenames = NULL; if (!parse_typedef(ctx, scope, &token, (LRLASTDefOrStmt*)&def->def)) { if (!def->def.kind.type.ident) { /* Remove def it the identifier couldn't be parsed */ if (list_first == list_last) list_first = NULL; free(list_last); list_last = saved; if (saved) saved->next = NULL; goto recover; } if (token->type == LRL_Sym_Semicolon) { token++; /* don't show "extranous semicolon" error also */ } } try_add_linkname(ctx, def->def.kind.type.ident, linkname); continue; } /* Uses statement? */ if (token->type == LRL_KW_Uses) { /* * Syntax: * * typedef = "uses", identifier, [ "as", identifier ], ";" ; */ if (linkname) { lrl_err_token(ctx, LRL_Err_LinknameNotAllowedHere, token); } if (defflags & LRL_LinkageFlags) { lrl_err_token(ctx, LRL_Err_LinkageFlagsNotAllowedHere, token); defflags = 0; } token++; if (token->type != LRL_TT_Ident) { lrl_err_token(ctx, LRL_Err_UnexpectedToken, token); goto recover; } linked_append(&def, &list_first, &list_last); def->def.ast_type = LRL_AST_Uses; defer_uses(ctx, NULL, &def->def.kind.uses.identref, &token); if (token->type == LRL_KW_As) { token++; if (token->type == LRL_KW_Here) { lrl_err_token(ctx, LRL_Err_HereNotAllowedHere, token); ident = calloc(1, sizeof(LRLIdent)); /* throwaway */ } else { ident = parse_ident(ctx, scope, &token, LRL_Ident_Create, NULL); if (!ident) { ident = calloc(1, sizeof(LRLIdent)); /* throwaway */ } else { ident->flags = LRL_IdFl_Link; set_def_node(ident, (LRLASTDefOrStmt*)&def->def); } } } else { ident = scope; } def->def.kind.uses.identref.scope = ident; expected(ctx, &token, LRL_Sym_Semicolon); continue; } /* Namespace? */ if (token->type == LRL_KW_Namespace) { /* * Syntax: * * namespace = "namespace", identifier, "{", def-list, "}"; */ token++; if (token->type == LRL_KW_Here) { lrl_err_token(ctx, LRL_Err_HereNotAllowedHere, token); } if (defflags & LRL_LinkageFlags) { lrl_err_token(ctx, LRL_Err_LinkageFlagsNotAllowedHere, token); defflags = 0; } ident = parse_ident(ctx, scope, &token, LRL_Ident_Extend, NULL); if (!ident) goto recover; ident->flags |= LRL_IdFl_HasHere; try_add_linkname(ctx, ident, linkname); if (!expected(ctx, &token, LRL_Sym_LCurly)) goto recover; linked_append(&def, &list_first, &list_last); def->def.ast_type = LRL_AST_Namespace; def->def.kind.namespac = calloc(1, sizeof(LRLASTNamespace)); def->def.kind.namespac->ident = ident; parse_deflist(ctx, ident, &token, &def->def.kind.namespac->list); expected(ctx, &token, LRL_Sym_RCurly); continue; } /* Interop (e.g. inclusion of C header file) */ if (token->type == LRL_KW_Interop) { /* * Syntax: * * interop = "interop", identifier, "=", * interop-name-string, interop-config-expr, ";" ; */ if (defflags & LRL_LinkageFlags) { lrl_err_token(ctx, LRL_Err_LinkageFlagsNotAllowedHere, token); defflags = 0; } token++; ident = parse_ident(ctx, scope, &token, LRL_Ident_Extend, NULL); if (!ident) goto recover; ident->flags |= LRL_IdFl_HasHere | LRL_IdFl_Uninitialized; try_add_linkname(ctx, ident, linkname); if (!expected(ctx, &token, LRL_Op_Assign)) goto recover; linked_append(&def, &list_first, &list_last); def->def.ast_type = LRL_AST_Interop; def->def.kind.interop.ident = ident; def->def.kind.interop.name = token; if (!expected(ctx, &token, LRL_TT_String)) { def->def.kind.interop.name = NULL; } def->def.kind.interop.options_expr = parse_expr(ctx, scope, &token); def->def.kind.interop.options_type = NULL; def->def.kind.interop.translated = NULL; set_def_node(ident, (LRLASTDefOrStmt*)&def->def); def->def.kind.interop.next = ctx->interop_list; ctx->interop_list = &def->def.kind.interop; expected(ctx, &token, LRL_Sym_Semicolon); continue; } /* Common error */ if (token->type == LRL_Sym_Semicolon) { lrl_err_token(ctx, LRL_Err_ExtraneousSemicolonInDefList, token); token++; continue; } /* It must be a variable or function */ /* * syntax: * * data = [ "alias" ], type, identifier, [ "=", expression ], ";" ; * function = [ "alias" ], type, identifier, struct-type, * ( ";" | "{", code, "}" ) ; */ if (token->type == LRL_KW_Alias) { defflags |= LRL_DeFl_Alias; token++; } type_start = token; if (type_start->type == LRL_KW_NoReturn) { /* noreturn function */ token++; } else { /* Normal data or function definition */ if (!skip_type(ctx, &token)) goto recover; } ident = parse_ident(ctx, scope, &token, LRL_Ident_Create, NULL); if (!ident) goto recover; try_add_linkname(ctx, ident, linkname); if (!token->type) { lrl_err_token(ctx, LRL_Err_UnexpectedToken, token); break; } noreturn_kw = 0; if (type_start->type == LRL_KW_NoReturn) { /* noreturn function */ type = malloc(sizeof(LRLASTType)); type->ast_type = LRL_AST_Type_Struct; type->from = type_start; type->to = type_start; type->quals = 0; type->unique_id = 0; type->kind.struc.members = NULL; type->kind.struc.scope = NULL; type->kind.struc.flags = 0; noreturn_kw = 1; } else { /* Normal data or function definition */ type = parse_type(ctx, ident, &type_start); if (!type) goto recover; } if (type->quals & LRL_Qual_Const) { lrl_err_type(ctx, LRL_Err_DataConstIsDefault, type); } /* Read type parameter definitions (if any) */ if (token->type == LRL_Sym_LSquare) { typenames = parse_type_names(ctx, ident, &token); } if (token->type == LRL_Sym_LParen) { /* Function */ LRLASTType *args_type; LRLASTStmt *code; const LRLToken *last_token; int error = 0; /* Parse argument list */ args_type = parse_struct(ctx, ident, &token, 1); args_type->quals = LRL_InternQual_NotApplicable; /* Parse constraints */ /* TODO */ last_token = token-1; if (token->type == LRL_Sym_LCurly) { /* Parse code block */ if (defflags & (LRL_DeFl_Import|LRL_DeFl_DeclOnly)) { lrl_err_token(ctx, LRL_Err_ImportWithBody, token); } code = parse_statement(ctx, ident, NULL, 0, &token); if (code) { code->scope.flags |= LRL_IdFl_FunctionBody; } } else if (token->type == LRL_Sym_Semicolon) { /* It's a function prototype */ code = NULL; token++; } else { /* Error */ lrl_err_token(ctx, LRL_Err_ExpectedFunctionBody, token); code = NULL; error = 1; } /* Put in definition list */ linked_append(&def, &list_first, &list_last); def->def.ast_type = LRL_AST_Def_Function; def->def.kind.function.flags = defflags; def->def.kind.function.ident = ident; def->def.kind.function.type.ast_type = LRL_AST_Type_Function; def->def.kind.function.type.from = first_token; def->def.kind.function.type.to = last_token; def->def.kind.function.type.quals = LRL_InternQual_NotApplicable; def->def.kind.function.type.kind.function.ret = type; def->def.kind.function.type.kind.function.args = args_type; def->def.kind.function.type.kind.function.flags = noreturn_kw ? LRL_FF_NoReturn : 0; def->def.kind.function.typenames = typenames; def->def.kind.function.code = code; set_def_node(ident, (LRLASTDefOrStmt*)&def->def); if (error) goto recover; else continue; } else if (token->type == LRL_Op_Assign || token->type == LRL_Sym_Semicolon) { /* Data */ LRLASTExpr *expr; if (typenames != NULL) { lrl_err_token(ctx, LRL_Err_DataWithTypeParameters, token); skip_statement(ctx, &token, LRL_Sym_Semicolon); goto recover_datadef; } if (noreturn_kw) { lrl_err_token(ctx, LRL_Err_DataWithNoReturn, token); skip_statement(ctx, &token, LRL_Sym_Semicolon); goto recover_datadef; } if ((defflags & LRL_DeFl_Alias) != 0 && (type->quals & LRL_Qual_Var) != 0) { lrl_err_token(ctx, LRL_Err_VarAlias, first_token); defflags &= ~LRL_DeFl_Alias; } /* Parse constraints */ /* TODO */ /* Check initial value */ if (token->type == LRL_Op_Assign) { /* Parse expression */ if (defflags & (LRL_DeFl_Import|LRL_DeFl_DeclOnly)) { lrl_err_token(ctx, LRL_Err_ImportWithValue, token); } token++; expr = parse_expr(ctx, scope, &token); } else { recover_datadef: /* Use default (undefined) */ expr = make_undef_expr(token); } /* Put in definition list */ linked_append(&def, &list_first, &list_last); def->def.ast_type = LRL_AST_Def_Data; def->def.kind.data.flags = defflags; def->def.kind.data.ident = ident; def->def.kind.data.type = type; def->def.kind.data.value = expr; set_def_node(ident, (LRLASTDefOrStmt*)&def->def); expected(ctx, &token, LRL_Sym_Semicolon); continue; } /* Parse error */ lrl_err_token(ctx, LRL_Err_NotADeclaration, token); /* Fall through */ recover: skip_statement_skip_paren(ctx, &token); } *tokens = token; *parsed = list_first; } LRLASTNamespace *lrl_parse(LRLCtx *ctx, LRLIdent *scope, const LRLToken *tokens) { LRLASTNamespace *root; /* Create root namespace for this file */ root = malloc(sizeof(LRLASTNamespace)); root->ident = scope; root->list = NULL; for (;;) { parse_deflist(ctx, scope, &tokens, &root->list); ctx->has_parsed = 1; if (!tokens->type) break; /* Invalid } detected */ lrl_err_token(ctx, LRL_Err_MismatchedRCurly, tokens); tokens++; } return root; }