/* parse.c -- Parsing of a token stream to an AST Copyright © 2021-2024 Samuel Lidén Borell Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "internal.h" #include "hash.h" #include "tokencase.h" #include "ast.h" #include #include #define INTERR_PARSE(errnum) MAKE_INTERR(errnum, INTERRBASE_PARSE) #define INTERR_BADRPN1 INTERR_PARSE(0x01) #define INTERR_BADRPN2 INTERR_PARSE(0x02) #define INTERR_BADRPN3 INTERR_PARSE(0x03) #define INTERR_BADRPN4 INTERR_PARSE(0x04) #define INTERR_BADRPNOPER INTERR_PARSE(0x05) #define INTERR_STACKSTATE INTERR_PARSE(0x06) #define INTERR_BADTPSENUM INTERR_PARSE(0x07) #define INTERR_BADEPSENUM INTERR_PARSE(0x08) #define INTERR_BADGDSENUM INTERR_PARSE(0x09) #define INTERR_BADSVSENUM INTERR_PARSE(0x0A) static void parse_slul(struct CSlul *ctx); void cslul_ll_parse(struct CSlul *ctx) { if (ctx->in_moduleheader) { parse_moduleheader(ctx); } else { parse_slul(ctx); } } /** All SLUL parsing sub-functions (e.g. parse_type) return these result codes */ enum Result { RES_SKIPTOEND = -4, /**< Error occurred. Skip to end of toplevel */ RES_OUTOFMEM = -3, /**< Out of memory, abort parsing */ RES_BUFFEREND = -2, /**< End of buffer */ RES_UNEXPECTEDEOF = -1, /**< EOF error that has been reported already */ RES_EOF = 0, /**< EOF that caller can choose how to handle */ RES_OK = 1 /**< Operation was successful (or failed but recovered) */ }; #define IS_ERR_OR_EOF(res) UNLIKELY((res) <= 0) #define IS_ERR(res) UNLIKELY((res) < 0) #define IS_TOPLEVEL_OR_SINCE(tok) \ (CSLUL_IS_TOPLEVEL(tok) || (tok) == CSLUL_T_KW_Since) #define IS_BRACELESS_STMT(tok) ((tok) == CSLUL_T_KW_Return || \ (tok) == CSLUL_T_KW_Continue || (tok) == CSLUL_T_KW_Break || \ (tok) == CSLUL_T_KW_Goto) #define INITIAL_QUALS(ctx) ((ctx)->phase==CSLUL_P_IMPL ? Q_CLOSED : 0) /* Flags for parse_ident */ #define PI_DEF 0x01 /**< Definition (if absent: reference) */ #define PI_DATA 0x02 /**< Data */ #define PI_TYPE 0x04 /**< Type */ #define PI_FUNC 0x06 /**< Function */ #define PI_LOCAL 0x20 /**< Context specific local scope */ #define PI_NONIDENT 0x40 /**< Non-identifier. Used for enum value-trees */ #define PI_IS_DEF(f) (((f) & PI_DEF) != 0) #define PI_IS_LOCAL(f) (((f) & PI_LOCAL) != 0) #define PI_IS_TOPLEVEL(f) (((f) & PI_LOCAL) == 0) #define PI_IS_DATA(f) (((f) & 0x6) == PI_DATA) #define PI_IS_TYPE(f) (((f) & 0x6) == PI_TYPE) #define PI_IS_FUNC(f) (((f) & 0x6) == PI_FUNC) #define PI_IS_NONIDENT(f) (((f) & PI_NONIDENT) == PI_NONIDENT) static enum Result parse_ident(struct CSlul *ctx, struct TreeNode **newident, struct TreeNode **root, unsigned flags); static enum Result parse_lifetime_ident(struct CSlul *ctx, struct IdentDecl **match, struct TreeNode *params_root); static enum Result parse_sinceversions(struct CSlul *ctx, struct ApiRefList **sinceversions, int is_toplevel); static enum Result parse_expr_start(struct CSlul *ctx, struct ExprRoot **rootptr); static enum Result parse_expr(struct CSlul *ctx); static void parse_funcbody_end(struct CSlul *ctx); static struct TreeNode *tree_search_tok(struct CSlul *ctx, struct TreeNode *root) { return tree_search(ctx, root, ctx->tokhash, ctx->toklen, ctx->tokval); } static void insert_toplevel_ident(struct CSlul *ctx, struct TopLevelIdent *tlident) { if (tlident->decl.ident.is_new) { PROTECT_STRUCT(*tlident); PROTECT_STRUCT(tlident->decl); tlident->iface_decl = NULL; tlident->next = ctx->tl.idents_list; ctx->tl.idents_list = tlident; } } static void insert_toplevel_type(struct CSlul *ctx, struct TopLevelType *tltype) { if (tltype->decl.ident.is_new) { tltype->decl.typeidents = NULL; PROTECT_STRUCT(*tltype); PROTECT_STRUCT(tltype->decl); /* tree.c:create_node() fills with a non-zero bit pattern in debug mode */ tltype->decl.type.type = T_INVALID; tltype->iface_decl = NULL; tltype->next = ctx->tl.types_list; ctx->tl.types_list = tltype; } } static void set_sourceline(struct CSlul *ctx, struct TreeNode *ident) { ident->line = ctx->tokline; ident->column = ctx->tokcolumn; ident->filename = ctx->current_filename; } /** * A method on a generic type may be declared before the generic type is * declared. In that case, the type parameters will not have been declared * yet. This function queues an undefined type parameter for later checking. * * Returns 1, except on fatal errors in which case it returns 0. */ static int add_seen_typeparam(struct CSlul *ctx, struct TypeDecl **paramdecl_out) { struct TypeDecl *cl = ctx->parser.slul.current_class; struct TypeDecl *decl; enum CSlulErrorCode errcode; assert(cl != NULL); if (IS_IDENT_DEFINED(cl)) { if (LIKELY(cl->type.type == T_GENERICDEF)) { decl = (struct TypeDecl*)tree_search_tok(ctx, cl->type.u.gdef->params_root); if (UNLIKELY(!decl)) { errcode = CSLUL_E_TYPEPARAMNOTFOUND; goto report_error; } } else { errcode = CSLUL_E_NONPARAMETRICTYPE; goto report_error; } } else { if (cl->type.type != T_GENERICSEEN) { assert(cl->type.type == T_INVALID); cl->type.type = T_GENERICSEEN; cl->type.u.genericseen = NULL; } decl = (struct TypeDecl*)tree_insert(ctx, &cl->type.u.genericseen, ctx->tokhash, ctx->toklen, ctx->tokval, NULL, sizeof(struct TypeDecl)); if (!decl) return 0; PROTECT_STRUCT(*decl); decl->type.type = T_INVALID; /* will be linked to the real param */ set_sourceline(ctx, &decl->ident); } *paramdecl_out = decl; return 1; report_error: error_tok(ctx, errcode); *paramdecl_out = NULL; return 1; } static enum Result parse_type_start(struct CSlul *ctx, struct Type *type) { struct TypeStackEntry *entry; int depth = ctx->typedepth+2; if (UNLIKELY(depth >= MAXTYPEDEPTH)) { error_tok(ctx, CSLUL_E_TYPETOODEEP); return RES_SKIPTOEND; } ctx->typedepth = depth; entry = &ctx->typestack[depth]; entry->type = type; entry->state = TPSDone; /* Add boundary entry (for removing all entries on error) */ entry--; entry->state = TPSBoundary; return RES_OK; } /** Parses a type. The parse buffer can end even inside a nested type definition, so this function needs to be able to restart parsing even inside nested types. */ static enum Result parse_type(struct CSlul *ctx) { enum CSlulToken tok; struct TypeStackEntry *stack = &ctx->typestack[ctx->typedepth]; struct Type *type; struct TreeNode *ident; struct FieldOrParamEntry *field; enum Result res; if (UNLIKELY(stack->state)) { restore_state: type = stack->type; switch (stack->state) { case TPSQuals: goto in_quals; case TPSTypeParamLBracket: goto in_typeparam_lbracket; case TPSTypeParamComma: goto in_typeparam_comma; case TPSArrayExpr: goto in_array_expr; case TPSArrayRSquare: goto in_array_rsquare; case TPSStructLCurly: goto in_struct_lcurly; case TPSStructMemberStart: case TPSFuncParamStart: goto in_struct_member_start; case TPSStructMemberIdent: case TPSFuncParamIdent: goto in_struct_member_ident; case TPSStructMemberSince: goto in_struct_member_sincever; case TPSFuncLParen: goto in_func_lparen; case TPSFuncParamComma: goto in_funcparam_comma; case TPSFuncReturnArrow: goto in_funcreturn_arrow; case TPSFuncReturnType: goto in_funcreturn_type; case TPSFuncLifetimeKW: goto in_func_lifetime_kw; case TPSFuncLifetimeIdentA: goto in_func_lifetime_ident_a; case TPSFuncLifetimeGreater: goto in_func_lifetime_greater; case TPSFuncLifetimeIdentB: goto in_func_lifetime_ident_b; case TPSEnumStart: goto in_enum_start; case TPSEnumLCurly: goto in_enum_lcurly; case TPSEnumValues: goto in_enum_values; case TPSEnumIdent: goto in_enum_ident; case TPSEnumSince: goto in_enum_since; case TPSEnumEquals: goto in_enum_equals; case TPSEnumExpr: goto in_enum_expr; case TPSOptionalType: goto in_optional_type; case TPSSlotIdent: goto in_slot_ident; case TPSDone: case TPSBoundary: error_tok(ctx, INTERR_BADTPSENUM); } } type = stack->type; goto root_type; nested_type: ctx->parser.slul.in_toplevel_type = 0; root_type: /* Check for type qualifiers. Module-private types in implementation code are always closed */ type->quals = INITIAL_QUALS(ctx); PROTECT_STRUCT(*type); for (;;) { unsigned qual; in_quals: tok = cslul_ll_next_slul_token(ctx); if (tok == CSLUL_T_NEEDDATA) goto buffer_end; if (tok < CSLUL_T_FirstQual || tok > CSLUL_T_LastQual) break; stack->state = TPSQuals; qual = 1 << (tok - CSLUL_T_FirstQual); if (UNLIKELY((type->quals & qual) != 0)) { error_tok(ctx, qual==Q_CLOSED && ctx->phase==CSLUL_P_IMPL ? CSLUL_E_BADIMPLQUAL : CSLUL_E_DUPLQUAL); continue; } else if (UNLIKELY(ctx->parser.slul.in_toplevel_data && (qual & FORBIDDEN_QUALS_TLDATA) != 0)) { error_tok(ctx, CSLUL_E_BADTLDATAQUAL); continue; } else if (UNLIKELY((qual & ctx->parser.slul.forbidden_quals) != 0)) { error_tok(ctx, CSLUL_E_QUALNOTALLOWED); continue; } type->quals |= qual; /* TODO restrictions for qualifiers: closed - only makes sense in interfaces (mainly on structs and enums) - *could* make sense on integers etc in type defs, if they default to open (which would imply that their size is unknown by default), i.e. that they can become structs in the future. var/writeonly - only makes sense on variables (local and global) threaded/aliased - ??? (own/arena are types of references and not type qualifiers - this makes it impossible to have "own addr" or "arena addr" though - "own addr" does not make sense - "arena addr" could make sense) */ /* TODO It is easy to confuse "var ref int" and "ref var int". Can it be solved? Simple examples: var int var int var-ref int var ref> int threaded-ref int threaded ref> int var-threaded-ref int var treaded ref> int Complex examples: ref var int ref var-int ref var int ref> var int ref var threaded int ref var-threaded-int ref var-threaded int ref> var threaded int ref ref int ref ref-int ref ref int ref> ref> int "ref>" syntax: - If there is only a "ref T", then the > could be omitted (is this exception a good idea? should be very common though) - good: easier to type. this is probably the 80+% case. - bad: harder to spot visually. harder for beginners - If there are multiple refs, then there should be a > - If there are type qualifiers on the ref, it must have > - If there are type qualifiers on the target type, the ref must have > - "ref>" could be handled as a special case in the tokenizer (i.e. "ref>" is a keyword) (this works because you cannot use "ref" as an identifier) */ } type->line = ctx->tokline; type->column = ctx->tokcolumn; ctx->parser.slul.forbidden_quals = 0; switch (tok) { case CSLUL_T_EOF: goto bad_eof; case CSLUL_T_NEEDDATA: goto buffer_end; case CSLUL_T_UpperIdent: { struct Type *identtype; struct GenericPrm *gprm; struct TypeDecl *decl; /* Type parameters should not appear without a "slot" keyword before */ decl = (struct TypeDecl *)tree_search_tok(ctx, ctx->params_root); if (UNLIKELY(decl)) { message_set_token(ctx, 0, CSLUL_LT_MAIN); message_set_ident(ctx, 1, CSLUL_LT_DEFINITION, &decl->ident); message_final(ctx, CSLUL_E_TYPEPARAMWITHOUTSLOT); } decl = (struct TypeDecl*)tree_insert(ctx, &ctx->tl.types_root, ctx->tokhash, ctx->toklen, ctx->tokval, NULL, sizeof(struct TopLevelType)); if (!decl) return RES_OUTOFMEM; insert_toplevel_type(ctx, (struct TopLevelType *)decl); PROTECT_STRUCT(decl->type); /* decl->type.defflags is set to 0 by a memset in tree_insert */ type->type = T_IDENT; type->misc = 0; type->u.ident = decl; /* Check for type parameters */ in_typeparam_lbracket: stack->state = TPSTypeParamLBracket; tok = cslul_ll_next_slul_token(ctx); if (tok == CSLUL_T_NEEDDATA) goto buffer_end; if (LIKELY(tok != CSLUL_T_Less)) { ctx->reused_token.slul = tok; break; } if (ctx->typedepth >= MAXTYPEDEPTH-1) goto too_deep; gprm = aallocp(ctx, sizeof(struct GenericPrm)); if (!gprm) goto outofmem; PROTECT_STRUCT(*gprm); identtype = aallocp(ctx, sizeof(struct Type)); /* move ident type */ if (!identtype) goto outofmem; *identtype = *type; PROTECT_STRUCT(*identtype); gprm->generictype = identtype; type->type = T_GENERICSPEC; type->misc = 0; /* number of type params */ type->u.gprm = gprm; stack->state = TPSTypeParamComma; stack->type = type; stack->nextptr.prmentry = NULL; for (;;) { struct PrmEntry *prmentry; if (!stack->nextptr.prmentry) { prmentry = &gprm->param; /* First entry is stored in struct */ } else { /* Not first entry */ prmentry = aallocp(ctx, sizeof(struct PrmEntry)); if (!prmentry) goto outofmem; PROTECT_STRUCT(*prmentry); *stack->nextptr.prmentry = prmentry; } prmentry->next = NULL; stack->nextptr.prmentry = &prmentry->next; if (UNLIKELY(stack->type->misc++ == MAX_TYPE_PARAMS)) { error_tok(ctx, CSLUL_E_TOOMANYTYPEPARAMS); } stack++; ctx->typedepth++; ctx->generic_param_depth++; stack->state = TPSDone; stack->type = type = &prmentry->type; goto nested_type; in_typeparam_comma: gprm = type->u.gprm; stack->state = TPSTypeParamComma; tok = cslul_ll_next_slul_token(ctx); if (tok > 0) ctx->generic_param_depth--; if (tok == CSLUL_T_Greater) break; if (UNLIKELY(tok != CSLUL_T_Comma)) { if (tok <= 0) goto buffer_end_noeof; /* FIXME different errors: - identifier on same line: probably missing > - beginning of type: probably missing , - */ error_prevtok_end(ctx, CSLUL_E_TYPEPARAMNOCOMMA); break; } } stack->state = TPSDone; break; } case CSLUL_T_LSquare: { /* Array type */ struct ArrayType *arrtype; inner_array: type->type = T_ARRAY; arrtype = aallocp(ctx, sizeof(struct ArrayType)); type->u.arr = arrtype; if (!arrtype) goto outofmem; type->misc = M_LONG_LENGTH; PROTECT_STRUCT(*arrtype); PROTECT_STRUCT(arrtype->elemtype); arrtype->elemtype.type = T_INVALID; arrtype->elemtype.defflags = 0; arrtype->elemtype.quals = type->quals; res = parse_expr_start(ctx, &arrtype->lengthexpr); if (IS_ERR(res)) goto err_res_noeof; stack->type = type = &arrtype->elemtype; in_array_expr: stack->state = TPSArrayExpr; res = parse_expr(ctx); if (IS_ERR_OR_EOF(res)) goto err_res_noeof; /* TODO If the length is constant and less than 2^16, then we could store it directly in the misc field. */ in_array_rsquare: stack->state = TPSArrayRSquare; tok = cslul_ll_next_slul_token(ctx); if (tok <= 0) goto buffer_end_noeof; if (tok == CSLUL_T_Comma) { PROTECT_STRUCT(*type); goto inner_array; } if (tok != CSLUL_T_RSquare) goto bad_type; stack->state = TPSDone; goto nested_type; } case CSLUL_T_KW_Struct: { struct IdentDecl *fielddecl; if (!ctx->parser.slul.in_toplevel_type) { type->quals |= Q_CLOSED; } type->type = T_INVALID; type->misc = (type->quals & Q_CLOSED) != 0 ? M_KNOWN_SIZE : 0; if (ctx->typedepth == 2) { /* since-versions are forbidden in fields in nested structs, but it is possible that a field that makes up a nested struct has a since-version, and that needs to be saved */ ctx->outer_member_sincevers = ctx->previous_member_sinceversions; ctx->previous_member_sinceversions = NULL; } in_struct_lcurly: stack->state = TPSStructLCurly; tok = cslul_ll_next_slul_token(ctx); if (tok <= 0) goto buffer_end_noeof; if (tok != CSLUL_T_LCurly) goto bad_type; if (ctx->typedepth >= MAXTYPEDEPTH-1) goto too_deep; type->type = T_STRUCT; type->u.fields = aallocp(ctx, sizeof(struct FieldOrParamList)); if (!type->u.fields) goto outofmem; PROTECT_STRUCT(*type->u.fields); type->u.fields->count = 0; type->u.fields->first = NULL; type->u.fields->fields_root = NULL; stack->nextptr.field = &type->u.fields->first; stack->state = TPSStructMemberStart; in_struct_member_start: /* Check for end of fields/params */ tok = cslul_ll_next_slul_token(ctx); if (tok <= 0) goto buffer_end_noeof; if (stack->state == TPSStructMemberStart) { if (tok == CSLUL_T_RCurly) goto struct_end; } else { assert(stack->state == TPSFuncParamStart); if (tok == CSLUL_T_RParen) goto in_funcreturn_arrow; } ctx->reused_token.slul = tok; field = aallocp(ctx, sizeof(struct FieldOrParamEntry)); if (!field) goto outofmem; PROTECT_STRUCT(*field); field->next = NULL; field->f.vardef.decl.u.initval = NULL; field->f.vardef.var_id = stack->type->u.fields->count + (stack->state==TPSFuncParamStart && type->type==T_METHOD ? 1:0); *stack->nextptr.field = field; stack->nextptr.field = &field->next; stack->type->u.fields->count++; if (stack->state == TPSFuncParamStart) { ctx->previous_member_sinceversions = NULL; stack->state = TPSFuncParamIdent; } else { assert(stack->state == TPSStructMemberStart); in_struct_member_sincever: stack->state = TPSStructMemberSince; field = (struct FieldOrParamEntry*)stack->nextptr.field; res = parse_sinceversions(ctx, &field->f.sinceversions, 0); if (IS_ERR_OR_EOF(res)) goto err_res_noeof; if (UNLIKELY(field->f.sinceversions && ctx->typedepth >= 2)) { error_sincever(ctx, CSLUL_E_NESTEDSINCE); } else if (UNLIKELY(field->f.sinceversions && (stack->type->quals & Q_CLOSED) != 0)) { error_sincever(ctx, CSLUL_E_VERSIONEDCLOSEDTYPE); } ctx->previous_member_sinceversions = field->f.sinceversions; stack->state = TPSStructMemberIdent; } /* Parse type of field/param */ stack++; ctx->typedepth++; stack->state = TPSDone; stack->type = type = &field->f.vardef.decl.type; goto nested_type; in_struct_member_ident: /* or function parameter */ /* Struct is at stack[0]. (stack[1] is the field's, *innermost* type which isn't useful here) */ fielddecl = &((struct FieldOrParamEntry*)stack->nextptr.field)->f.vardef.decl; /* TODO get rid of these ugly casts */ ident = &fielddecl->ident; res = parse_ident(ctx, &ident, &stack->type->u.fields->fields_root, PI_DEF|PI_DATA|PI_LOCAL); if (IS_ERR_OR_EOF(res)) goto err_res_noeof; assert(ident == &fielddecl->ident); set_sourceline(ctx, ident); fielddecl->type.defflags = D_DEFINED; if (stack->state == TPSFuncParamIdent) { fielddecl->type.defflags |= D_LOCAL; in_funcparam_comma: stack->state = TPSFuncParamComma; tok = cslul_ll_next_slul_token(ctx); if (tok <= 0) goto buffer_end_noeof; if (tok == CSLUL_T_RParen) goto in_funcreturn_arrow; if (tok != CSLUL_T_Comma) goto bad_type; stack->state = TPSFuncParamStart; } else { assert(stack->state == TPSStructMemberIdent); stack->state = TPSStructMemberStart; } goto in_struct_member_start; struct_end: stack->state = TPSDone; if (ctx->typedepth == 2) { ctx->previous_member_sinceversions = ctx->outer_member_sincevers; } break; } case CSLUL_T_KW_FuncRef: type->type = T_FUNC; type->misc = 0; type->u.func = NULL; funcref: if (ctx->parser.slul.in_toplevel_data && ctx->generic_param_depth == 0) { /* don't report twice */ error_tok(ctx, CSLUL_E_BADTLDATAREF); } in_func_lparen: stack->state = TPSFuncLParen; tok = cslul_ll_next_slul_token(ctx); if (tok <= 0) goto buffer_end_noeof; if (tok != CSLUL_T_LParen) goto bad_type; if (ctx->typedepth >= MAXTYPEDEPTH-1) goto too_deep; type->u.func = aallocp(ctx, sizeof(struct FuncType)); if (!type->u.fields) goto outofmem; PROTECT_STRUCT(*type->u.func); type->u.fields->count = 0; type->u.fields->first = NULL; type->u.fields->fields_root = NULL; type->u.func->returntype.defflags = 0; type->u.func->returntype.type = T_INVALID; stack->nextptr.field = &type->u.fields->first; stack->state = TPSFuncParamStart; goto in_struct_member_start; /* re-use struct parsing */ in_funcreturn_arrow: /* Parse "-> TYPE" / "noreturn" */ stack->state = TPSFuncReturnArrow; tok = cslul_ll_next_slul_token(ctx); if (tok == CSLUL_T_RArrow) { /* "-> T" */ stack->state = TPSFuncReturnType; in_funcreturn_type: tok = cslul_ll_next_slul_token(ctx); if (tok <= 0) goto buffer_end_noeof; ctx->reused_token.slul = tok; stack->state = TPSFuncLifetimeKW; stack++; ctx->typedepth++; /* FIXME check for overflow on those? */ stack->state = TPSDone; stack->type = type = &type->u.func->returntype; goto nested_type; } else if (tok == CSLUL_T_KW_NoReturn) { type->misc |= M_NORETURN; } else { if (tok == CSLUL_T_NEEDDATA) goto buffer_end; type->misc |= M_VOIDRETURN; type->u.func->returntype.type = T_INTERNAL; type->u.func->returntype.u.internal = IT_Void; goto in_func_lifetime_kw_reusetoken; } /* Function types may be followed by lifetime specifications for the parameters. The syntax is "lifetime param_a >= param_b */ in_func_lifetime_kw: tok = cslul_ll_next_slul_token(ctx); in_func_lifetime_kw_reusetoken: stack->state = TPSFuncLifetimeKW; if (tok == CSLUL_T_NEEDDATA) goto buffer_end; if (tok != CSLUL_T_KW_Lifetime) { ctx->reused_token.slul = tok; stack->state = TPSDone; break; } stack->state = TPSFuncLifetimeIdentA; in_func_lifetime_ident_a: res = parse_lifetime_ident(ctx, &ctx->parser.slul.lifetime_a, type->u.fields->fields_root); if (IS_ERR(res)) goto err_res_noeof; stack->state = TPSFuncLifetimeGreater; in_func_lifetime_greater: tok = cslul_ll_next_slul_token(ctx); if (UNLIKELY(tok != CSLUL_T_GreaterEqual)) { if (tok <= 0) goto buffer_end_noeof; error_tok(ctx, CSLUL_E_LIFETIMENONGREATERTOK); goto ret_skiptoend; /* TODO also skip to end of line */ } stack->state = TPSFuncLifetimeIdentB; in_func_lifetime_ident_b: res = parse_lifetime_ident(ctx, &ctx->parser.slul.lifetime_b, type->u.fields->fields_root); if (IS_ERR(res)) goto err_res_noeof; if (UNLIKELY(ctx->parser.slul.lifetime_a == ctx->parser.slul.lifetime_b)) { error_tok(ctx, CSLUL_E_LIFETIMESAMEPARAM); goto in_func_lifetime_kw; /* skip to next, if any */ } /* Wrap the correct parameters/return */ /* TODO */ /* Check if there are more lifetime specifiers */ goto in_func_lifetime_kw; case CSLUL_T_KW_Enum: /* open enums default to int type. closed enums default to the smallest possible type */ if (!ctx->parser.slul.in_toplevel_type) { error_tok(ctx, CSLUL_E_ENUMOUTSIDETYPEDEF); goto ret_skiptoend; } else if (ctx->current.typedecl->type.type == T_GENERICDEF) { error_tok(ctx, CSLUL_E_GENERICENUM); goto ret_skiptoend; } type->type = T_ENUM; type->misc = 0; { struct EnumType *enu; enu = aallocp(ctx, sizeof(struct EnumType)); type->u.enu = enu; if (!enu) goto outofmem; PROTECT_STRUCT(*enu); PROTECT_STRUCT(enu->base); enu->values = NULL; enu->values_root = NULL; enu->base.defflags = 0; enu->base.type = T_INVALID; } in_enum_start: stack->state = TPSEnumStart; type = stack->type; tok = cslul_ll_next_slul_token(ctx); if (tok <= 0) goto buffer_end_noeof; if (tok == CSLUL_T_LCurly) { struct Type *base = &type->u.enu->base; if ((type->quals & Q_CLOSED) == 0) { base->defflags = D_DEFINED; base->type = T_ELMNTRY; base->quals = Q_CLOSED; base->misc = 0; base->u.builtin = BT_Int; } else { /* Optimal type is determined later */ } goto enum_values_start; } if (ctx->typedepth >= MAXTYPEDEPTH-1) goto too_deep; ctx->reused_token.slul = tok; /* Parse base type */ stack->state = TPSEnumLCurly; stack->type->misc |= M_EXPLICIT_BASETYPE; stack++; ctx->typedepth++; stack->state = TPSDone; stack->type = type = &type->u.enu->base; ctx->parser.slul.forbidden_quals = FORBIDDEN_QUALS_ENUM; goto nested_type; in_enum_lcurly: stack->state = TPSEnumLCurly; tok = cslul_ll_next_slul_token(ctx); if (UNLIKELY(tok != CSLUL_T_LCurly)) { if (tok <= 0) goto buffer_end_noeof; error_tok(ctx, CSLUL_E_LCURLYEXPECTED); goto ret_skiptoend; } ctx->previous_member_sinceversions = NULL; enum_values_start: stack->nextptr.enumval = &stack->type->u.enu->values; in_enum_values: { struct EnumType *enumtype; assert(stack->type->type == T_ENUM); ctx->parser.slul.current_enumval = NULL; for (;;) { struct EnumValueEntry *enumval; struct IdentDecl *declptr; struct TreeNode *nodeptr; stack->state = TPSEnumValues; tok = cslul_ll_next_slul_token(ctx); if (tok <= 0) goto buffer_end_noeof; if (tok == CSLUL_T_RCurly) break; ctx->reused_token.slul = tok; enumval = aallocp(ctx, sizeof(struct EnumValueEntry)); if (!enumval) goto outofmem; PROTECT_STRUCT(*enumval); enumval->next = NULL; enumval->e.vd.decl.u.initval = NULL; enumval->e.vd.decl.type.type = T_INVALID; enumval->e.vd.decl.type.defflags = 0; enumval->e.vd.decl.ident.state = TNS_INITIAL; *stack->nextptr.enumval = enumval; stack->nextptr.enumval = &enumval->next; ctx->parser.slul.previous_enumval = ctx->parser.slul.current_enumval; ctx->parser.slul.current_enumval = enumval; /* Since version */ stack->state = TPSEnumSince; in_enum_since: enumval = ctx->parser.slul.current_enumval; res = parse_sinceversions(ctx, &enumval->e.vd.sinceversions, 0); if (IS_ERR_OR_EOF(res)) goto err_res_noeof; if (UNLIKELY(enumval->e.vd.sinceversions && (stack->type->quals & Q_CLOSED) != 0)) { error_sincever(ctx, CSLUL_E_VERSIONEDCLOSEDTYPE); } ctx->previous_member_sinceversions = enumval->e.vd.sinceversions; stack->state = TPSEnumIdent; in_enum_ident: enumval = ctx->parser.slul.current_enumval; enumtype = stack->type->u.enu; /* Insert into enum value tree */ nodeptr = &enumval->e.treenode; res = parse_ident(ctx, &nodeptr, &enumtype->values_root, PI_DEF|PI_DATA|PI_LOCAL|PI_NONIDENT); if (IS_ERR_OR_EOF(res)) goto err_res_noeof; assert(nodeptr == &enumval->e.treenode); set_sourceline(ctx, nodeptr); /* Insert .typeident (re-parsing the same token) */ declptr = &enumval->e.vd.decl; ctx->reused_token.slul = CSLUL_T_LowerIdent; res = parse_ident(ctx, (struct TreeNode **)&declptr, &ctx->current.typedecl->typeidents, PI_DEF|PI_DATA|PI_LOCAL); if (IS_ERR_OR_EOF(res)) goto err_res_noeof; assert(declptr == &enumval->e.vd.decl); set_sourceline(ctx, &declptr->ident); if (enumtype->base.type != 0) { memcpy(&declptr->type, &enumtype->base, sizeof(struct Type)); } declptr->type.defflags = D_DEFINED; /* Enum value */ in_enum_equals: enumval = ctx->parser.slul.current_enumval; stack->state = TPSEnumEquals; tok = cslul_ll_next_slul_token(ctx); if (tok <= 0) goto buffer_end_noeof; if (UNLIKELY(tok == CSLUL_T_Comma)) { error_tok(ctx, CSLUL_E_ENUMCOMMA); goto in_enum_equals; /* skip */ } if (tok != CSLUL_T_Assign) { ctx->reused_token.slul = tok; continue; } if ((stack->type->misc & M_EXPLICIT_BASETYPE) == 0) { /* TODO also forbid enumbase operations in this case */ error_tok(ctx, CSLUL_E_ENUMEQUALSWITHOUTBASE); } declptr = &enumval->e.vd.decl; res = parse_expr_start(ctx, &declptr->u.initval); if (IS_ERR(res)) goto err_res_noeof; in_enum_expr: stack->state = TPSEnumExpr; res = parse_expr(ctx); if (IS_ERR_OR_EOF(res)) goto err_res_noeof; } break; } case CSLUL_T_KW_Own: type->misc = M_OWN; goto ref; case CSLUL_T_KW_Arena: type->misc = M_ARENA; goto ref; case CSLUL_T_KW_Ref: type->misc = M_NORMAL; ref: { struct Type *innertype; if (ctx->parser.slul.in_toplevel_data && ctx->generic_param_depth == 0) { /* don't report twice */ error_tok(ctx, CSLUL_E_BADTLDATAREF); } type->type = T_REF; innertype = aallocp(ctx, sizeof(*innertype)); type->u.nested = innertype; if (!innertype) goto outofmem; PROTECT_STRUCT(*innertype); innertype->type = T_INVALID; innertype->defflags = 0; stack->type = type = innertype; goto nested_type; } case CSLUL_T_Question: type->type = T_INVALID; in_optional_type: stack->state = TPSOptionalType; tok = cslul_ll_next_slul_token(ctx); if (tok <= 0) goto buffer_end_noeof; stack->state = TPSDone; if (tok == CSLUL_T_KW_Ref) { type->misc = M_NORMAL | M_OPTIONAL; goto ref; } else if (tok == CSLUL_T_KW_Arena) { type->misc = M_ARENA | M_OPTIONAL; goto ref; } else if (tok == CSLUL_T_KW_Own) { type->misc = M_OWN | M_OPTIONAL; goto ref; } else if (tok == CSLUL_T_KW_String) { type->type = T_ELMNTRY; type->misc = M_BT_OPTIONAL; type->u.builtin = BT_String; } else if (tok == CSLUL_T_KW_Slot) { type->type = T_SLOT; type->misc = PT_OPTIONAL; goto slot; } else if (tok == CSLUL_T_KW_FuncRef) { type->type = T_FUNC; type->misc = M_OPTIONAL; type->u.func = NULL; goto funcref; /* TODO allow enums with a "none" item also */ } else { error_tok(ctx, CSLUL_E_OPTIONALNONREF); ctx->reused_token.slul = tok; goto nested_type; } break; case CSLUL_T_KW_Slot: { /* XXX better keyword than "slot"? - generic - typeparam - typeslot - genericslot - ref_or_int - ref_slot/int_slot/arena_slot/own_slot Related: Should there be a keyord for parametric values? */ struct TypeDecl *decl; /* TODO this should only be allowed when there are type parameters */ type->type = T_SLOT; type->misc = 0; slot: stack->state = TPSSlotIdent; in_slot_ident: tok = cslul_ll_next_slul_token(ctx); if (UNLIKELY(tok <= 0)) goto buffer_end_noeof; stack->state = TPSDone; if (UNLIKELY(tok != CSLUL_T_UpperIdent)) { error_tok(ctx, CSLUL_E_SLOTNONTYPEPARAM); goto ret_skiptoend; } decl = (struct TypeDecl *)tree_search_tok(ctx, ctx->params_root); if (!decl) { if (LIKELY(ctx->parser.slul.is_method)) { /* The class could have type parameters, but those are not necessarilly available at this point. Existence will be checked in exprchk. */ if (UNLIKELY(!add_seen_typeparam(ctx, &decl))) { return RES_OUTOFMEM; } } else { error_tok(ctx, CSLUL_E_TYPEPARAMNOTFOUND); } } type->u.ident = decl; break; } /* TODO "lifetime" qualifiers would also make sense on structs how to avoid ambuigities when structs appear in return values? perhaps the lifetime specifiers could be added after all members like this (or perhaps after the "}" ?): type Something = struct { ref Thing a ref Thing b lifetime a >= b } */ CASE_SIMPLETYPES assert(tok >= CSLUL_T_FirstElemType && tok <= CSLUL_T_LastElemType); if (ctx->parser.slul.in_toplevel_type && ctx->current.typedecl->type.type == T_GENERICDEF) { error_tok(ctx, CSLUL_E_GENERICELEMENTARY); goto ret_skiptoend; } type->type = T_ELMNTRY; type->misc = 0; type->u.builtin = tok - CSLUL_T_FirstElemType; break; CASE_EXCEPT_TYPES default: type->type = T_INVALID; assert(tok < CSLUL_T_FirstElemType || tok > CSLUL_T_LastElemType); goto bad_type; } ctx->typedepth--; stack--; if (stack->state != TPSBoundary) goto restore_state; ctx->typedepth--; return RES_OK; buffer_end_noeof: if (tok == CSLUL_T_EOF) goto bad_eof; buffer_end: return RES_BUFFEREND; err_res_noeof: /* TODO check this */ switch (res) { case RES_OUTOFMEM: goto outofmem; case RES_SKIPTOEND: goto ret_skiptoend; case RES_UNEXPECTEDEOF:return RES_UNEXPECTEDEOF; case RES_BUFFEREND: goto buffer_end; case RES_OK: /* Cannot happen */ case RES_EOF:; assert(0); } /* Fall through */ bad_eof: error_tok(ctx, CSLUL_E_UNEXPECTEDEOF); return RES_UNEXPECTEDEOF; bad_type: { enum CSlulErrorCode errcode; if (tok == CSLUL_T_EOF) goto bad_eof; else if (tok == CSLUL_T_Semicolon) errcode = CSLUL_E_BADSEMICOLON; else if (stack->state == TPSFuncLParen) errcode = CSLUL_E_FUNCLPAREN; else if (tok == CSLUL_T_LowerIdent) { if (ctx->toklen == 4 && ctx->tokhash == H_VOID) { errcode = CSLUL_E_VOIDNOTATYPE; } else { errcode = CSLUL_E_LOWERCASETYPE; } } else if (tok == CSLUL_T_KW_Func && ctx->tokcolumn != 1) { errcode = CSLUL_E_TYPEFUNCREFNOTFUNC; } else errcode = CSLUL_E_BADTYPE; error_tok(ctx, errcode); ctx->reused_token.slul = tok; goto ret_skiptoend; } too_deep: error_tok(ctx, CSLUL_E_TYPETOODEEP); ret_skiptoend: while (stack->state != TPSBoundary) { stack--; ctx->typedepth--; } ctx->typedepth--; ctx->generic_param_depth = 0; return RES_SKIPTOEND; outofmem: return RES_OUTOFMEM; } /** * Gets the size of an identifier struct of the given type. * Types and func/data have different ident sizes, and so * do top-level and non-top-level idents. */ static size_t get_declsize(unsigned flags) { static const unsigned char sizes[] = { sizeof(struct TopLevelIdent), /* PI_TYPE=0, PI_LOCAL=0 */ sizeof(struct IdentDecl), /* PI_TYPE=0, PI_LOCAL=1 */ sizeof(struct TopLevelType), /* PI_TYPE=1, PI_LOCAL=0 */ sizeof(struct TypeDecl) /* PI_TYPE=1, PI_LOCAL=1 */ }; /* PI_LOCAL in bit 0, PI_TYPE in bit 1 */ return sizes[flags>>5|(flags&PI_TYPE)>>1]; } /** * Parses an identifier. If successful, the data in identout is updated. * The identifier is allocated if NULL. If there is an existing identifier * it will be returned, unless there is a duplicate declaration. * * \param ctx Compilation context * \param newident IdentDecl or TypeDecl * \param root Root of identifier tree * \param flags PI_* flags */ static enum Result parse_ident(struct CSlul *ctx, struct TreeNode **newident, struct TreeNode **root, unsigned flags) { struct TreeNode *insresult; enum CSlulToken tok; enum CSlulErrorCode errcode; tok = cslul_ll_next_slul_token(ctx); if (UNLIKELY(!CSLUL_IS_IDENT(tok))) { if (tok == CSLUL_T_NEEDDATA) return RES_BUFFEREND; error_sameline(ctx, CSLUL_E_IDENTIFIEREXPECTED); ctx->reused_token.slul = tok; return RES_SKIPTOEND; } else if (UNLIKELY(!PI_IS_TYPE(flags) && tok != CSLUL_T_LowerIdent)) { errcode = CSLUL_E_CAPITALIZEDNONTYPE; goto error_and_skip; } else if (UNLIKELY(PI_IS_TYPE(flags) && tok != CSLUL_T_UpperIdent)) { errcode = CSLUL_E_LOWERCASETYPE; goto error_and_skip; } insresult = tree_insert(ctx, root, ctx->tokhash, ctx->toklen, ctx->tokval, *newident, get_declsize(flags)); *newident = insresult; if (UNLIKELY(insresult == NULL)) return RES_OUTOFMEM; else if (!insresult->is_new) { struct IdentDecl *decl = (struct IdentDecl *)insresult; if (UNLIKELY(PI_IS_DEF(flags) && (PI_IS_LOCAL(flags) || IS_IDENT_DEFINED(decl)))) { errcode = CSLUL_E_IDENTEXISTS; if (ctx->typedepth >= 0) { struct TypeStackEntry *stack = &ctx->typestack[ctx->typedepth]; if (stack->state == TPSStructMemberIdent) { errcode = CSLUL_E_DUPLFIELD; } else if (stack->state == TPSFuncParamIdent) { errcode = CSLUL_E_DUPLFUNCPARAM; } else if (stack->state == TPSEnumIdent) { assert(stack->type->type == T_ENUM); if (root == &stack->type->u.enu->values_root) { errcode = CSLUL_E_DUPLENUMIDENT; } } } message_set_token(ctx, 0, CSLUL_LT_DUPLICATE); message_set_ident(ctx, 1, CSLUL_LT_DUPLICATE, insresult); message_final(ctx, errcode); return RES_SKIPTOEND; } } if (PI_IS_TOPLEVEL(flags)) { if (PI_IS_TYPE(flags)) { insert_toplevel_type(ctx, (struct TopLevelType *)*newident); } else { insert_toplevel_ident(ctx, (struct TopLevelIdent *)*newident); } } else if (insresult->is_new && !PI_IS_NONIDENT(flags)) { /* When using struct-type protection, is is necessary to register the struct. But "non-identifiers" (such as enum value tree nodes) are only TreeNodes, not full-blown identifiers. So casting them to an IdentDecl would be an error */ if (PI_IS_TYPE(flags)) { PROTECT_STRUCT(*(struct TypeDecl *)insresult); } else { PROTECT_STRUCT(*(struct IdentDecl *)insresult); } } return RES_OK; error_and_skip: error_tok(ctx, errcode); ctx->reused_token.slul = tok; return RES_SKIPTOEND; } /** * Parses a parameter name in a lifetime specification. Store the result * in *match (will be NULL for return). */ static enum Result parse_lifetime_ident(struct CSlul *ctx, struct IdentDecl **match, struct TreeNode *params_root) { enum CSlulToken tok = cslul_ll_next_slul_token(ctx); if (tok == CSLUL_T_LowerIdent) { struct IdentDecl *decl; struct TreeNode *ident = tree_search_tok(ctx, params_root); if (UNLIKELY(!ident)) { error_tok(ctx, CSLUL_E_LIFETIMEIDENTNOTFOUND); return RES_SKIPTOEND; /* FIXME avoid skipping to end */ } *match = decl = (struct IdentDecl *)ident; if (UNLIKELY(decl->type.type != T_REF)) { error_tok(ctx, CSLUL_E_LIFETIMENONREF); } return RES_OK; } else if (tok == CSLUL_T_KW_Return) { *match = NULL; return RES_OK; } else if (tok == CSLUL_T_NEEDDATA) { return RES_BUFFEREND; } else { error_tok(ctx, CSLUL_E_LIFETIMENONIDENTTOK); return RES_SKIPTOEND; } } static struct TreeNode *local_ident_lookup(struct CSlul *ctx) { struct StmtBlock *block = ctx->current_block; /* Check if it is a local variable, starting from the deepest block */ for (; block; block = block->base) { struct TreeNode *ident; if (!block->idents) continue; ident = tree_search_tok(ctx, block->idents); if (ident) return ident; } /* Also check if it is a function parameter */ if (ctx->current_functype) { return tree_search_tok(ctx, ctx->current_functype->u.fields->fields_root); } return NULL; } static enum Result parse_expr_start(struct CSlul *ctx, struct ExprRoot **rootptr) { struct ExprRoot *root; struct ExprStackEntry *entry; int depth = ctx->exprdepth+1; if (UNLIKELY(depth >= MAXEXPRDEPTH)) { error_tok(ctx, CSLUL_E_EXPRTOODEEP); return RES_SKIPTOEND; } root = aallocp(ctx, sizeof(struct ExprRoot)); if (!root) goto outofmem; PROTECT_STRUCT(*root); root->rpn = NULL; root->filename = ctx->current_filename; root->line_start = ctx->tokline; root->column_start = ctx->tokcolumn; root->is_computed = 0; ctx->exprdepth = depth; entry = &ctx->exprstack[depth]; entry->state = XPSDone; entry->operator_expected = 0; entry->opstack = NULL; entry->out = NULL; entry->last_tok_line = -1; entry->exprroot = root; *rootptr = root; return RES_OK; outofmem: *rootptr = NULL; return RES_OUTOFMEM; } #define Binary 0 #define Ternary 1 #define Postfix 2 #define Prefix 3 struct OpInfo { unsigned precedence : 8; unsigned right_assoc : 1; unsigned no_mixing : 1; unsigned type : 2; unsigned op : 5; }; #define CALL_ARRINDEX 17 #define NOT_APPL { 0, 0, 0, 0, 0 } /* Left/even index = Binary operation Right/odd index = Unary operation */ #define NON_OP NOT_APPL, NOT_APPL, #define PREFIX_NOMIX(pr, o) NOT_APPL, { (pr), 1, 1, Prefix, (o) }, #define POSTFIX(pr, o) NOT_APPL, { (pr), 0, 0, Postfix, (o) }, #define BINARY(pr, o) { (pr), 0, 0, Binary, (o) }, NOT_APPL, #define BINARY_R(pr, o) { (pr), 1, 0, Binary, (o) }, NOT_APPL, #define BINARY_NOMIX(pr, o) { (pr), 0, 1, Binary, (o) }, NOT_APPL, #define BINARY_PREFIX(binpr, bo, prepr, po) \ { (binpr), 0, 0, Binary, (bo) }, \ { (prepr), 1, 0, Prefix, (po) }, #define NUM_REAL_OPS CSLUL_T_KW_Data #define INTERNAL_OP_FIELDINIT (NUM_REAL_OPS) #define NUM_TOTAL_OPS (NUM_REAL_OPS+1) /* TODO go through these */ /* TODO we want to have "user centric" precedence levels, where non-obvious cases require parentheses */ static const struct OpInfo opinfos[2*NUM_TOTAL_OPS] = { NON_OP NON_OP /* Integer literal */ NON_OP /* Floating point literal */ NON_OP /* UpperCase identifier */ NON_OP /* lower_case identifier */ NON_OP /* Goto target */ NON_OP /* Version */ NON_OP /* String */ NON_OP /* Newline (n/a) */ NON_OP /* Whitespace (n/a) */ NON_OP /* Comment (n/a) */ NON_OP /* LParen */ NON_OP /* RParen */ NON_OP /* LSquare */ NON_OP /* RSquare */ NON_OP /* LCurly */ NON_OP /* RCurly */ BINARY_PREFIX(13, OP_ADD, 15, OP_POS) /* Plus */ BINARY_PREFIX(13, OP_SUB, 15, OP_NEG) /* Minus */ BINARY(14, OP_MUL) /* Asterisk */ BINARY(14, OP_DIV) /* Slash */ BINARY_NOMIX(6, OP_LESS) /* Less */ BINARY_R(1, OP_ASSIGN) /* Assign */ BINARY_NOMIX(6, OP_GREATER) /* Greater */ NON_OP /* Exclamation */ NON_OP /* Comma */ POSTFIX(18, OP_MEMBER) /* Dot. Type scope is handled separatly */ POSTFIX(18, OP_OPTIONAL) /* Question */ /* TODO */ NON_OP /* Colon */ NON_OP /* Semicolon */ BINARY_R(1, OP_ADDASSIGN) /* PlusAssign */ BINARY_R(1, OP_SUBASSIGN) /* MinusAssign */ BINARY_R(1, OP_MULASSIGN) /* MultiplyAssign */ BINARY_R(1, OP_DIVASSIGN) /* DivideAssign */ BINARY_NOMIX(6, OP_LEQ) /* LessEqual */ BINARY_NOMIX(6, OP_EQ) /* Equal */ BINARY_NOMIX(6, OP_GEQ) /* GreaterEqual */ BINARY_NOMIX(6, OP_NOTEQ) /* NotEqual */ NON_OP /* RightArrow (not used in exprs) */ PREFIX_NOMIX(4, OP_NOT) /* not */ BINARY_NOMIX(4, OP_AND) /* and */ BINARY_NOMIX(4, OP_OR) /* or */ BINARY(14, OP_MOD) /* mod */ PREFIX_NOMIX(17, OP_DEREF) /* deref */ PREFIX_NOMIX(17, OP_REFTO) /* refto */ BINARY_NOMIX(6, OP_REF_IS) /* ref_is */ BINARY_NOMIX(6, OP_REF_IS_NOT) /* ref_is */ NON_OP /* none */ NON_OP /* undef */ NON_OP /* this */ NON_OP /* false */ NON_OP /* true */ PREFIX_NOMIX(1, 0) /* Internal operation for E_FIELDINIT */ }; #undef NOT_APPL #define get_opinfo(node) (opinfos[((node)->rpntokentype << 1) | \ ((node)->rpncontext == RC_UNARY ? 0x1 : 0x0)]) /** * Converts a stack of tokens in Reverse Polish Notation to an AST substree */ static struct ExprNode *rpn_to_ast(struct CSlul *ctx, struct ExprNode *node) { struct OpInfo op; struct ExprNode *expr = NULL; struct ExprNode *nextnode; assert(node); do { nextnode = node->rpnnext; op = get_opinfo(node); if (!op.precedence) { /* Terminals (identifiers and literals) */ switch ((int)node->rpntokentype) { case CSLUL_T_LowerIdent: case CSLUL_T_KW_Undef: case CSLUL_T_KW_None: case CSLUL_T_KW_This: case CSLUL_T_KW_False: case CSLUL_T_KW_True: case CSLUL_T_Integer: case CSLUL_T_Float: case CSLUL_T_String: case CSLUL_T_Dot: /* Already processed in parse_expr */ break; case CSLUL_T_LSquare: node->exprtype = node->rpncontext == RC_ARGLIST ? E_INDEX : E_ARRAY; /* TODO forbid "nested" arr[i][j] syntax when arr[i,j] works, (i.e. when arr is not an array of references) */ if (ctx->funcbody) { /* Temporary variables: - Index ops: multiplication, sum of products, result - Array literals: address */ ctx->funcbody->num_temporaries += node->exprtype == E_INDEX ? 3 : 1; } goto add_elems; case CSLUL_T_LParen: node->exprtype = (node->rpncontext == RC_ARGLIST ? E_CALL : E_STRUCT); /* Temporary variables: - Calls: return value - Struct literals: address */ if (ctx->funcbody) ctx->funcbody->num_temporaries++; add_elems: { /* Function call, array index, array value or struct value */ size_t numargs = node->a.rpnargs; struct ExprNode **elemptr; struct ExprList *list = aallocp(ctx, sizeof(struct ExprList)); if (!list) goto outofmem; PROTECT_STRUCT(*list); node->a.exprlist = list; list->length = numargs; if (!numargs) { list->elems = NULL; } else { /* TODO add limit of maximum number of args */ if ((list->elems = aallocp(ctx, numargs*sizeof(struct ExprNode *))) == NULL) goto outofmem; elemptr = &list->elems[numargs]; do { if (UNLIKELY(!expr)) { internal_error(ctx, INTERR_BADRPN2); return NULL; } *(--elemptr) = expr; expr = expr->u.exprnext; } while (--numargs); } if (node->rpncontext == RC_ARGLIST) { /* Pop array (to be accessed via index) or function */ if (UNLIKELY(!expr)) { internal_error(ctx, INTERR_BADRPN1); return NULL; } node->b.expr = expr; if (node->exprtype == E_INDEX) { expr->is_element_base = 1; } else { expr->is_called = 1; } expr = expr->u.exprnext; } break; } default: internal_error(ctx, INTERR_BADRPNOPER); return NULL; } node->u.exprnext = expr; } else if (op.type == Binary) { struct ExprNode *exprnext; int is_assign_op; if (UNLIKELY(!expr || !expr->u.exprnext)) { internal_error(ctx, INTERR_BADRPN3); return NULL; } exprnext = expr->u.exprnext; node->u.exprnext = exprnext->u.exprnext; node->b.expr = expr; node->a.expr = exprnext; node->exprtype = E_BINARYOP; is_assign_op = IS_ASSIGN_OP(node->op); exprnext->is_assigned = is_assign_op; if (ctx->funcbody) { ctx->funcbody->num_temporaries += (is_assign_op ? 2 : 1); } } else if (op.type == Ternary) { /* TODO */ } else { /* Postfix or prefix operators */ if (UNLIKELY(!expr)) { internal_error(ctx, INTERR_BADRPN4); return NULL; } node->u.exprnext = expr->u.exprnext; node->a.expr = expr; if (node->rpntokentype != CSLUL_T_Dot && node->rpntokentype != INTERNAL_OP_FIELDINIT) { node->exprtype = E_UNARYOP; } if (ctx->funcbody) ctx->funcbody->num_temporaries++; } expr = node; node = nextnode; } while (node); return expr; outofmem: return NULL; } static struct ExprNode *reverse_rpn_list(struct ExprNode *first) { struct ExprNode *current; struct ExprNode *previous; assert(first); previous = first; current = first->rpnnext; first->rpnnext = NULL; first->u.exprnext = NULL; while (current) { struct ExprNode *next = current->rpnnext; current->rpnnext = previous; current->u.exprnext = previous; previous = current; current = next; } assert(previous != NULL); return previous; } #define OPSTACK_TO_OUT do { \ struct ExprNode *tmp = opstack; \ opstack = tmp->rpnnext; \ tmp->rpnnext = out; \ out = tmp; \ } while (0) static struct ExprNode *new_exprnode(struct CSlul *ctx, struct ExprRoot *root) { struct ExprNode *node = aallocp(ctx, sizeof(struct ExprNode)); if (!node) return NULL; PROTECT_STRUCT(*node); node->is_assigned = 0; node->is_element_base = 0; node->is_called = 0; node->line_offset = ctx->tokline - root->line_start; node->column = ctx->tokcolumn; return node; } /** * Parses an expression. The operations and values are parsed into a singly * linked list, that is then reversed to obtain the RPN (Reverse Polish * Notation) of the expression. The RPN is then used to build a tree structure * with the final/outer operation as the root node. * * The algorithm is based on the "Shunting-yard" algorithm by Edsger Dijkstra. * https://en.wikipedia.org/wiki/Shunting-yard_algorithm */ static enum Result parse_expr(struct CSlul *ctx) { enum CSlulToken tok; struct ExprStackEntry *exprstate = &ctx->exprstack[ctx->exprdepth]; struct ExprRoot *root; struct ExprNode *opstack, *out; int operator_expected, last_tok_line; operator_expected = exprstate->operator_expected; last_tok_line = exprstate->last_tok_line; opstack = exprstate->opstack; out = exprstate->out; root = exprstate->exprroot; if (UNLIKELY(exprstate->state)) { switch (exprstate->state) { case XPSArglistLookahead: goto in_arglist_lookahead; case XPSCommaCheck: goto in_comma_check; case XPSDot: goto in_dot; case XPSMaybeFieldInit: goto in_maybe_field_init; case XPSStringLookahead: goto in_string_lookahead; case XPSDone: error_tok(ctx, INTERR_BADEPSENUM); } } goto first_token; /* don't overwrite last line from previous call */ for (;;) { last_tok_line = ctx->tokline; first_token: tok = cslul_ll_next_slul_token(ctx); reuse_token: switch (tok) { case CSLUL_T_EOF: if (!operator_expected) goto bad_eof; goto finished; case CSLUL_T_NEEDDATA: exprstate->state = XPSDone; goto buffer_end; CASE_LITERALS case CSLUL_T_KW_Undef: { struct ExprNode *node; if (operator_expected) { if (ctx->line != last_tok_line && (tok==CSLUL_T_LowerIdent || tok==CSLUL_T_KW_This)) { /* Terminal token that belongs to the next line */ ctx->reused_token.slul = tok; goto finished; } error_prevtok_end(ctx, CSLUL_E_OPERATOREXPECTED); goto ret_skiptoend; } node = new_exprnode(ctx, root); if (!node) goto outofmem; node->rpntokentype = tok; node->rpncontext = RC_TERMINAL; node->rpnnext = out; if (tok == CSLUL_T_LowerIdent) { struct TreeNode *ident = local_ident_lookup(ctx); if (!ident) { /* Assume it is a reference to a top level identifier */ ident = tree_insert(ctx, &ctx->tl.idents_root, ctx->tokhash, ctx->toklen, ctx->tokval, NULL, sizeof(struct TopLevelIdent)); if (!ident) goto outofmem; insert_toplevel_ident(ctx, (struct TopLevelIdent *)ident); } if (ctx->funcbody) ctx->funcbody->num_temporaries++; node->exprtype = E_IDENT; node->a.ident = ident; } else if (tok == CSLUL_T_Integer) { node->exprtype = E_INTEGER; node->a.intval = ctx->parser.slul.number; node->b.intflags = 0; } else if (tok == CSLUL_T_Float) { /* TODO */ } else if (tok == CSLUL_T_String) { struct StringLiteral *strval; size_t len = ctx->toklen; char *data = aalloc_memzdup(ctx, ctx->tokval, len); if (!data) goto outofmem; strval = aallocp(ctx, sizeof(struct StringLiteral)); if (!strval) goto outofmem; PROTECT_STRUCT(*strval); strval->data = data; strval->u.length = len; node->exprtype = E_STRING; node->a.strval = strval; ctx->num_literals++; /* Check if there are multiple string chunks (i.e. a multi-line string) */ ctx->current_expr = node; ctx->last_stringchunk = NULL; last_tok_line = ctx->tokline; in_string_lookahead: node = ctx->current_expr; for (;;) { /* TODO require that chunks come on separate lines? */ /* TODO require that there are no blank lines between chunks? */ tok = cslul_ll_next_slul_token(ctx); if (tok != CSLUL_T_String) { if (tok == CSLUL_T_NEEDDATA) { exprstate->state = XPSStringLookahead; goto buffer_end; } else { out = node; operator_expected = 1; goto reuse_token; } } else { /* String chunk */ struct StringChunk *chunk = aallocp(ctx, sizeof(struct StringChunk)); if (!chunk) goto outofmem; PROTECT_STRUCT(*chunk); data = aalloc_memzdup(ctx, ctx->tokval, ctx->toklen); if (!data) goto outofmem; chunk->data = data; chunk->next = NULL; chunk->length = ctx->toklen; if (ctx->last_stringchunk == NULL) { /* Convert to chunked string */ struct StringChunk *firstchunk = aallocp(ctx, sizeof(struct StringChunk)); if (!firstchunk) goto outofmem; PROTECT_STRUCT(*firstchunk); strval = node->a.strval; firstchunk->data = strval->data; firstchunk->next = chunk; firstchunk->length = strval->u.length; strval->data = NULL; strval->u.chunks = firstchunk; } else { /* Append to already converted string */ ctx->last_stringchunk->next = chunk; } ctx->last_stringchunk = chunk; last_tok_line = ctx->tokline; } } } else if (tok == CSLUL_T_KW_None) { node->exprtype = E_NONE; } else if (tok == CSLUL_T_KW_Undef) { node->exprtype = E_UNDEF; } else if (tok == CSLUL_T_KW_False) { node->exprtype = E_BOOL; node->a.intval = 0; } else if (tok == CSLUL_T_KW_True) { node->exprtype = E_BOOL; node->a.intval = 1; } else if (tok == CSLUL_T_KW_This) { struct TypeDecl *thisclass; node->exprtype = E_THIS; thisclass = ctx->parser.slul.current_class; node->a.thisclass = thisclass; if (!thisclass) { error_tok(ctx, CSLUL_E_THISOUTSIDEMETHOD); } } out = node; operator_expected = 1; /* TODO type parameters */ break; } case CSLUL_T_Dot: { struct ExprNode *node; /* This can be: - a struct field: x.ident - a type scope identifier: .ident - a field initialization: (.ident=y) */ if (operator_expected && ctx->line != last_tok_line) { /* Terminal token that belongs to the next line */ ctx->reused_token.slul = tok; goto finished; } in_dot: exprstate->state = XPSDot; tok = cslul_ll_next_slul_token(ctx); if (UNLIKELY(tok != CSLUL_T_LowerIdent)) { if (tok <= 0) goto buffer_end_noeof; error_prevtok_end(ctx, CSLUL_E_DOTWITHOUTIDENT); ctx->reused_token.slul = tok; goto ret_skiptoend; } else if (UNLIKELY(ctx->prev_tok_line != ctx->tokline || ctx->prev_tok_endcol != ctx->tokcolumn)) { error_tok(ctx, CSLUL_E_DOTSPACEIDENT); ctx->reused_token.slul = tok; goto ret_skiptoend; } node = new_exprnode(ctx, root); if (!node) goto outofmem; node->rpntokentype = CSLUL_T_Dot; node->rpnnext = out; node->misc = ctx->toklen; node->a.unbound_hash = ctx->tokhash; node->b.unbound_ident = aalloc_memzdup(ctx, ctx->tokval, ctx->toklen); if (!node->b.unbound_ident) goto outofmem; exprstate->state = XPSDone; if (operator_expected) { /* Struct field */ node->exprtype = E_FIELD; node->rpncontext = RC_UNARY; if (UNLIKELY(!out)) { assert(ctx->has_errors); } else { out->is_element_base = 1; } out = node; operator_expected = 1; } else { /* Type scope identifier OR struct field initializer */ node->exprtype = E_TYPEIDENT; node->rpncontext = RC_TERMINAL; last_tok_line = ctx->tokline; ctx->current_expr = node; in_maybe_field_init: node = ctx->current_expr; exprstate->state = XPSMaybeFieldInit; tok = cslul_ll_next_slul_token(ctx); if (tok == CSLUL_T_Assign) { /* Struct field initialization */ if (opstack && opstack->rpncontext == RC_GROUPING) { opstack->rpncontext = RC_ELEMLIST; } node->exprtype = E_FIELDINIT; node->rpncontext = RC_UNARY; node->rpntokentype = INTERNAL_OP_FIELDINIT; node->rpnnext = opstack; opstack = node; operator_expected = 0; } else if (tok == CSLUL_T_NEEDDATA) goto buffer_end; else { /* including EOF */ out = node; operator_expected = 1; goto reuse_token; } } break; } /* Argument and element list (exprlist) parsing. exprlists are stored like this on the out stack: identifier value1 value2 value3 ( TERM TERM TERM TERM CALL -- -- -- -- 3 */ case CSLUL_T_LParen: case CSLUL_T_LSquare: { struct ExprNode *node; if (operator_expected && ctx->tokline != last_tok_line) { /* "function(" and "array[" may not be broken apart, so assume this token is the start of a separate statement */ ctx->reused_token.slul = tok; goto finished; } /* Move operators with higher precedence to output stack */ while (opstack) { struct OpInfo st = get_opinfo(opstack); if (st.precedence <= CALL_ARRINDEX) break; OPSTACK_TO_OUT; } node = new_exprnode(ctx, root); if (!node) goto outofmem; node->rpntokentype = tok; node->rpncontext = (operator_expected ? RC_ARGLIST : /* Function call or array index */ (tok == CSLUL_T_LSquare ? RC_ELEMLIST : /* Array literal */ RC_GROUPING)); /* Grouping or struct literal (until a comma is parsed) */ node->rpnnext = opstack; opstack = node; last_tok_line = ctx->tokline; /* Check if the argument list is empty */ in_arglist_lookahead: exprstate->state = XPSArglistLookahead; tok = cslul_ll_next_slul_token(ctx); if (tok <= 0) goto buffer_end_noeof; opstack->a.rpnargs = /* opstack==node */ (tok == CSLUL_T_RParen || tok == CSLUL_T_RSquare || tok == CSLUL_T_Comma ? 0 : 1); operator_expected = 0; exprstate->state = XPSDone; goto reuse_token; } case CSLUL_T_Comma: case CSLUL_T_RParen: case CSLUL_T_RSquare: { int popped_ops = 0; int found = 0; /* Pop everything off the stack until a ( or [ is found */ while (opstack) { if (opstack->rpntokentype == CSLUL_T_LParen || opstack->rpntokentype == CSLUL_T_LSquare) { found = 1; break; } OPSTACK_TO_OUT; popped_ops = 1; } if (UNLIKELY(!operator_expected && popped_ops)) { error_prevtok_end(ctx, CSLUL_E_MISSINGOPERAND); goto ret_skiptoend; } if (!found) { ctx->reused_token.slul = tok; goto finished; } if (tok == CSLUL_T_Comma) { if (opstack->rpncontext == RC_GROUPING) { opstack->rpncontext = RC_ELEMLIST; } if (UNLIKELY(!operator_expected)) { error_prevtok_end(ctx, CSLUL_E_MISSINGOPERAND); } in_comma_check: last_tok_line = ctx->tokline; tok = cslul_ll_next_slul_token(ctx); if (UNLIKELY(tok <= 0)) { exprstate->state = XPSCommaCheck; goto buffer_end_noeof; } else if (UNLIKELY(tok == CSLUL_T_Comma)) { error_tok(ctx, CSLUL_E_MISSINGOPERAND); goto in_comma_check; } else if (tok != CSLUL_T_RParen && tok != CSLUL_T_RSquare) { /* Comma, but not a trailing one */ opstack->a.rpnargs++; } else if (UNLIKELY(!operator_expected)) { /* Error already reported */ } else if (UNLIKELY(opstack->rpncontext == RC_ARGLIST)) { /* Trailing commas are not allowed in arglists or array index expressions */ error_prevtok_end(ctx, tok == CSLUL_T_RParen ? CSLUL_E_TRAILINGCOMMA : CSLUL_E_TRAILINGCOMMAINDEX); } else if (UNLIKELY(opstack->rpncontext == RC_ELEMLIST && tok == CSLUL_T_RParen && ctx->prev_tok_line == ctx->tokline)) { /* Trailing commas are not needed in struct values */ error_prevtok_end(ctx, CSLUL_E_TRAILINGCOMMASTRUCT); } operator_expected = 0; goto reuse_token; } else { struct ExprNode *tmp; if (UNLIKELY(opstack->rpntokentype != tok-1)) { /* ^ right )]} = left ([{ +1 */ error_tok(ctx, CSLUL_E_WRONGPARENTYPE); } /* Pop the left parenthesis off the stack */ tmp = opstack; opstack = tmp->rpnnext; /* Grouping parantheses are not pushed to the out stack */ if (tmp->rpncontext != RC_GROUPING || tmp->rpntokentype != CSLUL_T_LParen || tmp->a.rpnargs != 1) { tmp->rpnnext = out; out = tmp; } operator_expected = 1; } break; } /* TODO conditional operator (ternary operator) */ case CSLUL_T_KW_Deref: /* Special hack for this prefix operator that can appear at the beginning of a line (unlike all other prefix operators) */ if (operator_expected && ctx->tokline != last_tok_line) { assert(out); ctx->reused_token.slul = tok; goto finished; } goto fallthrough_deref; CASE_OPERATORS_EXCEPT_LINESTART fallthrough_deref: { struct ExprNode *node; struct OpInfo op; node = new_exprnode(ctx, root); if (!node) goto outofmem; node->rpntokentype = tok; node->rpncontext = operator_expected ? RC_OP : RC_UNARY; op = get_opinfo(node); /* TODO move opinfo to node? */ node->op = op.op; if (op.type == Prefix) { node->rpnnext = opstack; opstack = node; operator_expected = 0; break; } if (UNLIKELY(!operator_expected)) { error_tok(ctx, CSLUL_E_OPERATORNOTEXP); goto ret_skiptoend; } else if (UNLIKELY(!op.precedence)) { error_prevtok_end(ctx, CSLUL_E_OPERATOREXPECTED); goto ret_skiptoend; } /* Binary or postfix operator */ while (opstack) { struct OpInfo st = get_opinfo(opstack); if (!st.precedence) break; if (UNLIKELY( (st.no_mixing && op.precedence == st.precedence) && (opstack->rpntokentype != tok || op.precedence == 6 /* == != etc. */))) { error_tok(ctx, CSLUL_E_AMBIGUOUSOPERMIX); } if (!(!st.right_assoc && op.precedence <= st.precedence) && !(st.right_assoc && op.precedence < st.precedence)) { break; } OPSTACK_TO_OUT; } if (op.type == Postfix) { node->rpncontext = RC_UNARY; node->rpnnext = out; out = node; operator_expected = 1; break; } node->rpncontext = RC_OP; node->rpnnext = opstack; opstack = node; operator_expected = 0; break; } case CSLUL_T_Semicolon: case CSLUL_T_KW_Since: CASE_TOPLEVELS CASE_SIMPLETYPES CASE_COMPOSITETYPES CASE_INSIDEDEF CASE_TYPEQUALS CASE_CONTROL_NEEDCURLY CASE_INTERNAL if (UNLIKELY(ctx->tokline == last_tok_line)) { /* E.g "i = 2 if". Assume it is a typo */ enum CSlulErrorCode errcode; if (tok == CSLUL_T_Semicolon) { errcode = CSLUL_E_BADSEMICOLON; } else if (operator_expected || !out || !opstack || opstack->rpntokentype == CSLUL_T_LParen || opstack->rpntokentype == CSLUL_T_LSquare) { errcode = CSLUL_E_BADEXPRTOKEN; } else { errcode = CSLUL_E_TOKENAFTEROPERATOR; } error_tok(ctx, errcode); goto ret_skiptoend; } /* Fall through */ case CSLUL_T_Colon: case CSLUL_T_LCurly: case CSLUL_T_RCurly: CASE_CONTROL_NOCURLY default: ctx->reused_token.slul = tok; goto finished; } } finished: /* Move remaining tokens on the operator stack to the output stack */ while (opstack) { if (UNLIKELY(opstack->rpntokentype == CSLUL_T_LParen)) { error_sameline(ctx, CSLUL_E_UNCLOSEDPAREN); goto ret_skiptoend; } OPSTACK_TO_OUT; } if (UNLIKELY(!out || !operator_expected)) { error_sameline(ctx, CSLUL_E_INCOMPLETEEXPR); goto ret_skiptoend; } root->rpn = reverse_rpn_list(out); root->root = rpn_to_ast(ctx, root->rpn); if (!root->root) goto outofmem; ctx->exprdepth--; return RES_OK; buffer_end_noeof: if (tok == CSLUL_T_EOF) goto bad_eof; buffer_end: exprstate->operator_expected = operator_expected; exprstate->last_tok_line = last_tok_line; exprstate->opstack = opstack; exprstate->out = out; return RES_BUFFEREND; bad_eof: error_tok(ctx, CSLUL_E_UNEXPECTEDEOF); root->rpn = NULL; root->root = NULL; return RES_UNEXPECTEDEOF; ret_skiptoend: if (tok == CSLUL_T_EOF) goto bad_eof; /* TODO skip to next: - token with indentation <= indentation of first line, or - {, return, break, continue, goto token */ root->rpn = NULL; root->root = NULL; ctx->exprdepth--; return RES_SKIPTOEND; outofmem: /* If this is false, we went here without running out of memory */ assert(ctx->has_fatal_errors); root->rpn = NULL; root->root = NULL; return RES_OUTOFMEM; } static struct LoopInfo *get_prev_loopinfo(struct Stmt *prevstmt) { unsigned st = prevstmt->type; assert(st == S_WHILE || st == S_DOWHILE || st == S_FOR); return prevstmt->u.loopinfo; } static void mark_have_gototarget(struct StmtBlock *block) { do { if (block->f.f.has_gototarget) break; block->f.f.has_gototarget = 1; block = block->base; } while (block); } static int parse_funcbody_start(struct CSlul *ctx) { struct FuncBody *func; struct BlockStackEntry *entry; struct StmtBlock *stmtblock; struct Type *functype; func = aallocp(ctx, sizeof(struct FuncBody)); if (!func) return 0; PROTECT_STRUCT(*func); assert(ctx->current.decl != NULL); func->module = ctx->parsed_module; func->ident = ctx->current.decl; func->sinceversions = ctx->current_sinceversions; func->gotoidents = NULL; func->goto_tree = NULL; func->filename = ctx->current_filename; functype = ctx->current_functype; assert(functype->type == T_FUNC || functype->type == T_METHOD); func->has_retval = HAS_RETURN(functype); func->num_variables = functype->u.func->params.count + (functype->type == T_METHOD ? 1 : 0); func->num_temporaries = 0; func->num_ebb = 1; func->next = ctx->funcbody; ctx->current_loop = NULL; ctx->current_case = NULL; ctx->required_linebreak = 0; ctx->last_void_return_line = 0; ctx->current_goto_id = 0; if (UNLIKELY(ctx->blockdepth >= MAXBLOCKDEPTH-1)) { ctx->current_block = NULL; error_tok(ctx, CSLUL_E_BLOCKTOODEEP); return 0; } ctx->funcbody = func; entry = &ctx->blockstack[++ctx->blockdepth]; entry->has_braces = 1; entry->state = FPSDone; entry->stmtblock = stmtblock = ctx->current_block = &ctx->funcbody->stmtblock; stmtblock->base = NULL; stmtblock->stmt.type = S_NOP; stmtblock->stmt.next = NULL; stmtblock->idents = NULL; stmtblock->f.all.bits = 0; entry->stmt = NULL; entry->prevstmt = NULL; return 1; } /** * Parses a function body. */ static enum Result parse_funcbody(struct CSlul *ctx) { enum CSlulToken tok; struct BlockStackEntry *stack = &ctx->blockstack[ctx->blockdepth]; struct StmtBlock *stmtblock = stack->stmtblock; struct Stmt *stmt = stack->stmt; struct Stmt *prevstmt = stack->prevstmt; enum Result res; if (UNLIKELY(stack->state)) { switch (stack->state) { case FPSExpr: goto in_expr; case FPSIfExpr: goto in_if_expr; case FPSWhileExpr: goto in_while_expr; case FPSDoWhileExpr: goto in_dowhile_expr; case FPSForType: goto in_for_type; case FPSForIdent: goto in_for_ident; case FPSForIn: goto in_for_in; case FPSForExpr: goto in_for_expr; case FPSSwitchExpr: goto in_switch_expr; case FPSSwitchLCurly: goto in_switch_lcurly; case FPSSwitchFirstCase: goto in_switch_firstcase; case FPSCaseValue: goto in_case_value; case FPSCaseSeparator: goto in_case_separator; case FPSCaseDefaultLookahead: goto in_case_default_lookahead; case FPSCaseColon: goto in_case_colon; case FPSCaseAfterColon: goto after_case_colon; case FPSSubCaseValue: goto in_subcase_value; case FPSBlock: case FPSIfBlock: case FPSElseBlock: case FPSWhileBlock: case FPSDoWhileBlock: case FPSForBlock: case FPSLoopEmptyBlock: case FPSLoopEndBlock: goto in_blockstart; case FPSMismatchedRCurly: goto mismatched_rcurly; case FPSAfterRCurly: goto after_rcurly; case FPSMaybeLoopEmpty: goto before_maybe_loopempty; case FPSGoto: goto in_goto; case FPSVardefType: goto in_vardef_type; case FPSVardefIdentLookahead: goto in_vardef_ident_lookahead; case FPSVardefIdent: goto in_vardef_ident; case FPSVardefEquals: goto in_vardef_equals; case FPSVardefInitval: goto in_vardef_initval; case FPSSkipBlockStart: goto in_skip_block_start; case FPSSkipBlock: goto in_skip_block; case FPSSkipStatment: goto in_skip_statement; case FPSCaseBlock: case FPSDone: error_tok(ctx, INTERR_STACKSTATE); } } for (;;) { next_token: tok = cslul_ll_next_slul_token(ctx); if (UNLIKELY(tok == CSLUL_T_NEEDDATA)) { stack->state = FPSDone; goto buffer_end; } reuse_token: /* TODO check indentation */ /*if (tok > 0 && ctx->tokcolumn != ...) { error_tok(ctx, CSLUL_E_...); goto skip_statement; }*/ if (UNLIKELY( /* line break required after previous stmt? */ ctx->required_linebreak == ctx->tokline && /* ctx->required_linebreak is not wrong due to lookahead? */ ctx->tokcolumn != ctx->numspaces+1 && /* } terminate blocks and is exempt. else/loopempty/loopend are also exempt. */ tok > 0 && tok != CSLUL_T_RCurly && tok != CSLUL_T_KW_Else && tok != CSLUL_T_KW_LoopEmpty && tok != CSLUL_T_KW_LoopEnd && /* while in do-while is exempt (happens only in error recovery) */ !(tok == CSLUL_T_KW_While && !stack->has_braces && stack[-1].state == FPSDoWhileBlock))) { /* Error. Also reached for e.g. "return 123" in void functions, hence the special case. */ if (IS_TOPLEVEL_OR_SINCE(tok)) goto have_toplevel; error_tok(ctx, ctx->last_void_return_line==ctx->tokline ? CSLUL_E_NORETURNNEWLINE : CSLUL_E_NONEWLINE); } ctx->required_linebreak = 0; ctx->last_void_return_line = 0; if (!stack->has_braces) { ctx->reused_token.slul = tok; goto end_of_block; } reuse_token_noendofblock: if (tok == CSLUL_T_RCurly) { /* End of statement block */ unsigned last_tok_line; end_of_block: stmtblock->last_goto_id = ctx->current_goto_id; if (!ctx->blockdepth) { /* Detect mismatched } early */ if (UNLIKELY(ctx->tokcolumn != 1 && /* indented */ (ctx->tokcolumn == ctx->numspaces+1 ||/* no token before */ (int)ctx->current.decl->ident.line != ctx->tokline))) { /* not a one-line decl */ /* This can be either an accidentally indented } or too many closing } */ int is_toplevel; mismatched_rcurly: stack->state = FPSMismatchedRCurly; tok = cslul_ll_next_slul_token(ctx); if (tok == CSLUL_T_NEEDDATA) goto buffer_end; ctx->reused_token.slul = tok; is_toplevel = CSLUL_IS_TOPLEVEL(tok); error_prevtok_start(ctx, is_toplevel ? CSLUL_E_FINALRCURLYINDENTED : CSLUL_E_TOOMANYRCURLY); if (!is_toplevel) { goto next_token; } } ctx->blockdepth--; parse_funcbody_end(ctx); return RES_OK; } after_rcurly: /* Look ahead (and reuse the token if it does not match) */ last_tok_line = ctx->tokline; stack->state = FPSAfterRCurly; tok = cslul_ll_next_slul_token(ctx); if (tok == CSLUL_T_NEEDDATA) goto buffer_end; /* Restore state of outer block */ stack = &ctx->blockstack[--ctx->blockdepth]; ctx->current_block = stmtblock = stack->stmtblock; prevstmt = stack->prevstmt; stmt = stack->stmt; if (stack->state == FPSDoWhileBlock) { assert(ctx->current_loop != NULL); ctx->current_loop = ctx->current_loop->base; assert(stmt->type == S_DOWHILE); if (UNLIKELY(tok != CSLUL_T_KW_While)) { error_tok(ctx, CSLUL_E_DOWITHOUTWHILE); goto reuse_token; } res = parse_expr_start(ctx, &stmt->u.whilestm->cond); if (IS_ERR(res)) goto err_res; in_dowhile_expr: stack->state = FPSDoWhileExpr; res = parse_expr(ctx); if (IS_ERR_OR_EOF(res)) goto err_res_noeof; goto before_maybe_loopempty; } else if (stack->state == FPSWhileBlock || stack->state == FPSForBlock) { assert(ctx->current_loop != NULL); ctx->current_loop = ctx->current_loop->base; goto maybe_loopempty; } else if (stack->state == FPSLoopEmptyBlock) { goto maybe_loopend; } else if (stack->state == FPSLoopEndBlock) { if (tok == CSLUL_T_KW_LoopEmpty) { error_tok(ctx, CSLUL_E_LOOPEMPTYENDORDER); goto maybe_loopempty; } else if (tok == CSLUL_T_KW_LoopEnd) { goto maybe_loopend; /* Will report error */ } } else if (stack->state == FPSCaseBlock) { assert(ctx->current_case != NULL); ctx->current_case = ctx->current_case->base; } ctx->required_linebreak = last_tok_line; goto reuse_token; /* Loops may be followed by a loopempty and then a loopend block. Both are optional, but they must come in that order. */ before_maybe_loopempty: stack->state = FPSMaybeLoopEmpty; tok = cslul_ll_next_slul_token(ctx); if (tok == CSLUL_T_NEEDDATA) goto buffer_end; maybe_loopempty: if (tok == CSLUL_T_KW_LoopEmpty) { struct LoopInfo *loop = get_prev_loopinfo(prevstmt); struct StmtBlock *newblock; newblock = aallocp(ctx, sizeof(struct StmtBlock)); if (!newblock) goto outofmem; PROTECT_STRUCT(*newblock); newblock->base = stmtblock; newblock->f.all.bits = 0; newblock->f.f.prev_goto_id = ctx->current_goto_id; loop->empty_block = stmtblock = newblock; stack->state = FPSLoopEmptyBlock; ctx->funcbody->num_ebb++; goto blockstart; } /* Fall through */ maybe_loopend: if (tok == CSLUL_T_KW_LoopEnd) { struct LoopInfo *loop = get_prev_loopinfo(prevstmt); struct StmtBlock *newblock; if (UNLIKELY(loop->end_block != NULL)) { error_tok(ctx, CSLUL_E_DUPLICATELOOPEND); } else if (UNLIKELY(!loop->has_break)) { error_tok(ctx, CSLUL_E_LOOPENDWITHOUTBREAK); } newblock = aallocp(ctx, sizeof(struct StmtBlock)); if (!newblock) goto outofmem; PROTECT_STRUCT(*newblock); newblock->base = stmtblock; newblock->f.all.bits = 0; newblock->f.f.prev_goto_id = ctx->current_goto_id; loop->end_block = stmtblock = newblock; stack->state = FPSLoopEndBlock; ctx->funcbody->num_ebb++; goto blockstart; } else if (UNLIKELY(tok == CSLUL_T_KW_LoopEmpty)) { error_tok(ctx, CSLUL_E_DUPLICATELOOPEMPTY); goto maybe_loopempty; } stack->state = FPSDone; goto reuse_token; } else if (UNLIKELY(tok == CSLUL_T_EOF)) { error_tok(ctx, CSLUL_E_NOFUNCENDCURLY); stack->state = FPSDone; goto buffer_end; } else if (tok != CSLUL_T_KW_Else && tok != CSLUL_T_KW_LoopEmpty && tok != CSLUL_T_KW_LoopEnd && tok != CSLUL_T_KW_Case && tok != CSLUL_T_KW_Default) { if (UNLIKELY(IS_TOPLEVEL_OR_SINCE(tok))) goto have_toplevel; if (!stmt) stmt = &stmtblock->stmt; else { struct Stmt *newstmt = aallocp(ctx, sizeof(struct Stmt)); if (!newstmt) goto outofmem; PROTECT_STRUCT(*newstmt); stmt->next = newstmt; stmt = newstmt; } stmt->next = NULL; stmt->line = ctx->tokline; stmt->column = ctx->tokcolumn; stack->stmt = stmt; stack->prevstmt = prevstmt = stmt; } switch (tok) { case CSLUL_T_KW_If: { struct CtlIf *ifstm; stmt->type = S_IF; stmt->u.ifstm = ifstm = aallocp(ctx, sizeof(struct CtlIf)); if (!ifstm) goto outofmem; PROTECT_STRUCT(*ifstm); ifstm->false_block = NULL; res = parse_expr_start(ctx, &ifstm->cond); if (IS_ERR(res)) goto err_res; in_if_expr: ifstm = stmt->u.ifstm; stack->state = FPSIfExpr; res = parse_expr(ctx); if (IS_ERR_OR_EOF(res)) goto err_res_noeof; stack->state = FPSIfBlock; ifstm->true_block.base = stmtblock; ifstm->true_block.f.all.bits = 0; ifstm->true_block.f.f.prev_goto_id = ctx->current_goto_id; ctx->funcbody->num_ebb += 2; /* true_block EBB + end EBB */ stmtblock = &ifstm->true_block; } blockstart: { if (UNLIKELY(ctx->blockdepth >= MAXBLOCKDEPTH-1)) { error_tok(ctx, CSLUL_E_BLOCKTOODEEP); if (stack->stmt) { stack->stmt->type = S_NOP; stack->stmt->next = NULL; } return RES_SKIPTOEND; } stmtblock->stmt.type = S_NOP; stmtblock->stmt.next = NULL; if (stack->state != FPSForBlock) { /* keep "for" variable */ stmtblock->idents = NULL; } ctx->current_block = stmtblock; stack = &ctx->blockstack[++ctx->blockdepth]; stack->stmtblock = stmtblock; /* TODO simplify (move cslul_ll_next_slul_token above) */ stack->state = stack[-1].state; stack->stmt = NULL; stack->prevstmt = NULL; stmt = NULL; prevstmt = NULL; } if (stack[-1].state == FPSCaseBlock) { /* Special case, because it has no { */ stack->has_braces = 1; goto next_token; } in_blockstart: tok = cslul_ll_next_slul_token(ctx); if (tok == CSLUL_T_NEEDDATA) goto buffer_end; stack->state = FPSDone; if (tok == CSLUL_T_LCurly) { stack->has_braces = 1; goto next_token; } else if (UNLIKELY(stack[-1].state == FPSDoWhileBlock)) { /* TODO forbid while and for without braces also? */ if (tok == CSLUL_T_RCurly) { error_sameline(ctx, CSLUL_E_LCURLYEXPECTEDONLY); /* Close block or we get lots of errors */ stack[-1].state = FPSWhileBlock; stack->has_braces = 0; ctx->reused_token.slul = tok; goto end_of_block; } error_tok(ctx, CSLUL_E_LOOPWITHOUTBRACES); } stack->has_braces = 0; if (IS_BRACELESS_STMT(tok)) { /* return, break, continue, goto */ goto reuse_token_noendofblock; } else if (tok == CSLUL_T_KW_If && stack[-1].state == FPSElseBlock) { /* TODO check that the "if" is on the same line as the else. */ goto reuse_token_noendofblock; } else { error_sameline(ctx, CSLUL_E_NOCURLYSTMT); ctx->required_linebreak = 0; /* don't report twice */ ctx->last_void_return_line = 0; if (ctx->tokcolumn == 1) ctx->reused_token.slul = tok; goto reuse_token_noendofblock; } case CSLUL_T_KW_While: { struct CtlWhile *whilestm; stmt->type = S_WHILE; stmt->u.whilestm = whilestm = aallocp(ctx, sizeof(struct CtlWhile)); if (!whilestm) goto outofmem; PROTECT_STRUCT(*whilestm); whilestm->l.base = ctx->current_loop; whilestm->l.empty_block = NULL; whilestm->l.end_block = NULL; whilestm->l.has_break = 0; res = parse_expr_start(ctx, &whilestm->cond); if (IS_ERR(res)) goto err_res; in_while_expr: whilestm = stmt->u.whilestm; stack->state = FPSWhileExpr; res = parse_expr(ctx); if (IS_ERR_OR_EOF(res)) goto err_res_noeof; stack->state = FPSWhileBlock; whilestm->l.block.base = stmtblock; whilestm->l.block.stmt.type = S_NOP; whilestm->l.block.stmt.next = NULL; whilestm->l.block.f.all.bits = 0; whilestm->l.block.f.f.prev_goto_id = ctx->current_goto_id; stmtblock = &whilestm->l.block; ctx->current_loop = &whilestm->l; ctx->funcbody->num_ebb += 3; /* loop-body + condition + end */ goto blockstart; } case CSLUL_T_KW_Do: { struct CtlWhile *dwstm; stmt->type = S_DOWHILE; stmt->u.whilestm = dwstm = aallocp(ctx, sizeof(struct CtlWhile)); if (!dwstm) goto outofmem; PROTECT_STRUCT(*dwstm); dwstm->cond = NULL; dwstm->l.base = ctx->current_loop; dwstm->l.empty_block = NULL; dwstm->l.end_block = NULL; dwstm->l.has_break = 0; stack->state = FPSDoWhileBlock; dwstm->l.block.base = stmtblock; dwstm->l.block.stmt.type = S_NOP; dwstm->l.block.stmt.next = NULL; dwstm->l.block.f.all.bits = 0; dwstm->l.block.f.f.prev_goto_id = ctx->current_goto_id; stmtblock = &dwstm->l.block; ctx->current_loop = &dwstm->l; ctx->funcbody->num_ebb += 3; /* loop-body + condition + end */ goto blockstart; } case CSLUL_T_KW_For: { struct CtlFor *forstm; stmt->type = S_FOR; stmt->u.forstm = forstm = aallocp(ctx, sizeof(struct CtlFor)); if (!forstm) goto outofmem; PROTECT_STRUCT(*forstm); forstm->l.base = ctx->current_loop; forstm->l.empty_block = NULL; forstm->l.end_block = NULL; forstm->l.has_break = 0; forstm->l.block.base = stmtblock; forstm->l.block.stmt.type = S_NOP; forstm->l.block.stmt.next = NULL; forstm->l.block.idents = NULL; forstm->l.block.f.all.bits = 0; forstm->l.block.f.f.prev_goto_id = ctx->current_goto_id; forstm->loopvar.type.defflags = 0; forstm->loopvar.u.initval = NULL; forstm->loopexpr = NULL; /* Type */ res = parse_type_start(ctx, &forstm->loopvar.type); if (IS_ERR(res)) goto err_res; ctx->parser.slul.forbidden_quals = FORBIDDEN_QUALS_FOR; in_for_type: stack->state = FPSForType; res = parse_type(ctx); if (IS_ERR_OR_EOF(res)) goto err_res_noeof; /* Identifier */ in_for_ident: forstm = stmt->u.forstm; stack->state = FPSForIdent; { struct IdentDecl *declptr = &forstm->loopvar; res = parse_ident(ctx, (struct TreeNode **)&declptr, &forstm->l.block.idents, PI_DEF|PI_DATA|PI_LOCAL); if (IS_ERR_OR_EOF(res)) goto err_res_noeof; assert(declptr == &forstm->loopvar); declptr->type.defflags = D_DEFINED; set_sourceline(ctx, &declptr->ident); } /* Iterable expression (e.g. an array) */ in_for_in: forstm = stmt->u.forstm; stack->state = FPSForIn; tok = cslul_ll_next_slul_token(ctx); if (UNLIKELY(tok != CSLUL_T_KW_In)) { if (tok == CSLUL_T_NEEDDATA) goto buffer_end; error_prevtok_end(ctx, CSLUL_E_NOIN); stack->state = FPSDone; goto skip_block; } res = parse_expr_start(ctx, &forstm->loopexpr); if (IS_ERR(res)) goto err_res; in_for_expr: forstm = stmt->u.forstm; stack->state = FPSForExpr; res = parse_expr(ctx); if (IS_ERR_OR_EOF(res)) goto err_res_noeof; stack->state = FPSForBlock; stmtblock = &forstm->l.block; ctx->current_loop = &forstm->l; ctx->funcbody->num_ebb += 3; /* loop-body + condition + end. init uses the preceeding EBB */ goto blockstart; } case CSLUL_T_KW_Switch: { struct CtlSwitch *switchstm; stmt->type = S_SWITCH; stmt->u.switchstm = switchstm = aallocp(ctx, sizeof(struct CtlSwitch)); if (!switchstm) goto outofmem; PROTECT_STRUCT(*switchstm); switchstm->cases = NULL; switchstm->has_default = 0; res = parse_expr_start(ctx, &switchstm->cond); if (IS_ERR(res)) goto err_res; ctx->funcbody->num_temporaries++; /* to load switch variable to */ ctx->funcbody->num_ebb++; /* to jump to end */ in_switch_expr: stack->state = FPSSwitchExpr; res = parse_expr(ctx); if (IS_ERR_OR_EOF(res)) goto err_res_noeof; in_switch_lcurly: stack->state = FPSSwitchLCurly; tok = cslul_ll_next_slul_token(ctx); if (UNLIKELY(tok != CSLUL_T_LCurly)) { if (tok == CSLUL_T_NEEDDATA) goto buffer_end; error_prevtok_end(ctx, CSLUL_E_LCURLYEXPECTED); goto reuse_token; /* TODO */ } in_switch_firstcase: stack->state = FPSSwitchFirstCase; tok = cslul_ll_next_slul_token(ctx); if (UNLIKELY(tok != CSLUL_T_KW_Case && tok != CSLUL_T_KW_Default)) { if (tok == CSLUL_T_NEEDDATA) goto buffer_end; error_tok(ctx, CSLUL_E_CASEEXPECTED); /* Block isn't actually opened until the first case is seen */ if (tok == CSLUL_T_RCurly) goto next_token; /* TODO error recovery could depend on indentation level */ goto reuse_token; } switch_case: { struct SwitchCase *newcase = aallocp( ctx, sizeof(struct SwitchCase)); if (!newcase) goto outofmem; PROTECT_STRUCT(*newcase); newcase->next = stmt->u.switchstm->cases; stmt->u.switchstm->cases = newcase; newcase->values.next = NULL; newcase->has_subcases = 0; ctx->funcbody->num_ebb++; if (stmt->u.switchstm->has_default) { error_tok(ctx, tok == CSLUL_T_KW_Case ? CSLUL_E_CASEAFTERDEFAULT : CSLUL_E_DUPLICATEDEFAULT); } if (tok == CSLUL_T_KW_Default) goto switch_default; case_value: res = parse_expr_start(ctx, &stmt->u.switchstm->cases->values.expr); if (IS_ERR(res)) goto err_res; /* TODO decide whether to have commas or multiple-case-lines for multiple case values. IMHO multiple-case-lines is sometimes easier to read */ in_case_value: stack->state = FPSCaseValue; res = parse_expr(ctx); if (IS_ERR_OR_EOF(res)) { if (res == RES_SKIPTOEND) { /* Recover from missing value (with only a colon) */ if (ctx->reused_token.slul == CSLUL_T_Colon) { ctx->reused_token.slul = 0; goto after_case_colon; } else if (ctx->reused_token.slul == CSLUL_T_Comma) { goto in_case_separator; } else { /* FIXME improve error recovery here and/or in parse_expr */ stack->misc = 1; goto in_skip_block; } } goto err_res_noeof; } in_case_separator: stack->state = FPSCaseSeparator; tok = cslul_ll_next_slul_token(ctx); if (tok == CSLUL_T_Comma) { struct CaseValue *lastval, *copy = aallocp( ctx, sizeof(struct CaseValue)); if (!copy) goto outofmem; lastval = &stmt->u.switchstm->cases->values; *copy = *lastval; PROTECT_STRUCT(*copy); lastval->next = copy; in_case_default_lookahead: stack->state = FPSCaseDefaultLookahead; tok = cslul_ll_next_slul_token(ctx); if (tok == CSLUL_T_KW_Default) goto switch_default; if (tok == CSLUL_T_NEEDDATA) goto buffer_end; ctx->reused_token.slul = tok; goto case_value; } if (LIKELY(tok == CSLUL_T_Colon)) goto after_case_colon; if (tok == CSLUL_T_NEEDDATA) goto buffer_end; error_prevtok_end(ctx, CSLUL_E_NOCASECOMMACOLON); ctx->reused_token.slul = tok; goto after_case_colon; switch_default: stmt->u.switchstm->cases->values.expr = NULL; stmt->u.switchstm->has_default = 1; } /* Fall through */ in_case_colon: /* The colon serves as visual separation in one-line cases, but is always required, even when the block starts on a new line */ stack->state = FPSCaseColon; tok = cslul_ll_next_slul_token(ctx); if (UNLIKELY(tok != CSLUL_T_Colon)) { if (tok == CSLUL_T_NEEDDATA) goto buffer_end; /* TODO detect if this was a case value or default: */ error_prevtok_end(ctx, CSLUL_E_NOCOLONAFTERDEFAULT); /* CSLUL_E_NOCOLONAFTERCASE */ ctx->reused_token.slul = tok; } after_case_colon: /* Lookahead to check syntax */ stack->state = FPSCaseAfterColon; tok = cslul_ll_next_slul_token(ctx); if (tok == CSLUL_T_NEEDDATA) goto buffer_end; if (UNLIKELY(tok == CSLUL_T_KW_Case || tok == CSLUL_T_KW_Default)) { /* Repeated cases are not allowed (it could be confused with C style repeated cases, which has fall-through semantics) */ error_tok(ctx, CSLUL_E_REPEATEDCASE); } ctx->reused_token.slul = tok; stack->state = FPSCaseBlock; stmt->u.switchstm->cases->base = ctx->current_case; ctx->current_case = stmt->u.switchstm->cases; assert(stmtblock != &ctx->current_case->block); ctx->current_case->block.base = stmtblock; ctx->current_case->block.f.all.bits = 0; ctx->current_case->block.f.f.prev_goto_id = ctx->current_goto_id; stmtblock = &ctx->current_case->block; goto blockstart; } case CSLUL_T_KW_Case: case CSLUL_T_KW_Default: if (UNLIKELY(!ctx->blockdepth || stack[-1].state != FPSCaseBlock)) { error_tok(ctx, CSLUL_E_CASEOUTSIDESWITCH); goto skip_statement; } /* Close the existing block */ stack = &ctx->blockstack[--ctx->blockdepth]; ctx->current_block = stmtblock = stack->stmtblock; prevstmt = stack->prevstmt; stmt = stack->stmt; assert(stmt->type == S_SWITCH); goto switch_case; case CSLUL_T_KW_Break: stmt->type = S_BREAK; goto continue_break; case CSLUL_T_KW_Continue: stmt->type = S_CONT; continue_break: if (UNLIKELY(!ctx->current_loop)) { error_tok(ctx, tok == CSLUL_T_KW_Break ? CSLUL_E_BREAKOUTSIDELOOP : CSLUL_E_CONTINUEOUTSIDELOOP); stmt->type = S_NOP; break; } stmt->u.loopinfo = ctx->current_loop; if (tok == CSLUL_T_KW_Break) { if (!ctx->current_loop->has_break) { ctx->funcbody->num_ebb++; } ctx->current_loop->has_break = 1; } ctx->required_linebreak = ctx->tokline; if (!stack->has_braces) goto end_of_block; break; case CSLUL_T_KW_Goto: stmt->type = S_GOTO; stack->state = FPSGoto; in_goto: tok = cslul_ll_next_slul_token(ctx); if (UNLIKELY(tok != CSLUL_T_LowerIdent)) { if (tok == CSLUL_T_NEEDDATA) goto buffer_end; error_sameline(ctx, (tok == CSLUL_T_UpperIdent ? CSLUL_E_CAPITALIZEDNONTYPE : CSLUL_E_IDENTIFIEREXPECTED)); ctx->reused_token.slul = tok; stmt->u.gotoident = NULL; goto skip_statement; } goto add_gotoident; case CSLUL_T_GotoTarget: stmt->type = S_TARGET; mark_have_gototarget(stmtblock); ctx->funcbody->num_ebb++; add_gotoident: { struct FuncBody *funcbody = ctx->funcbody; struct GotoIdent *gi = (struct GotoIdent*)tree_insert(ctx, &funcbody->goto_tree, ctx->tokhash, ctx->toklen, ctx->tokval, NULL, sizeof(struct GotoIdent)); stmt->u.gotoident = gi; if (!gi) goto outofmem; PROTECT_STRUCT(*gi); if (tok == CSLUL_T_GotoTarget) { if (UNLIKELY(gi->defined)) { error_tok(ctx, CSLUL_E_DUPLICATEGOTO); } else { set_sourceline(ctx, &gi->node); gi->defined = 1; } gi->goto_id = ctx->current_goto_id++; } if (!gi->seen_line) { gi->next = funcbody->gotoidents; funcbody->gotoidents = gi; gi->seen_line = ctx->tokline; gi->seen_column = ctx->tokcolumn; gi->jump_vs = NULL; gi->target_vs = NULL; } } if (tok != CSLUL_T_GotoTarget && !stack->has_braces) { goto end_of_block; } ctx->required_linebreak = ctx->tokline; break; case CSLUL_T_KW_SubCase: { struct CtlSubCase *subcase; stmt->type = S_SUBCASE; stmt->u.subcase = subcase = aallocp(ctx, sizeof(struct CtlSubCase)); if (!subcase) goto outofmem; PROTECT_STRUCT(*subcase); subcase->thecase = ctx->current_case; if (UNLIKELY(!ctx->current_case)) { error_tok(ctx, CSLUL_E_SUBCASEOUTSIDECASE); } res = parse_expr_start(ctx, &subcase->value); if (IS_ERR(res)) goto err_res; ctx->funcbody->num_ebb++; in_subcase_value: subcase = stmt->u.subcase; stack->state = FPSSubCaseValue; res = parse_expr(ctx); if (IS_ERR_OR_EOF(res)) goto err_res_noeof; if (ctx->current_case) { ctx->current_case->has_subcases = 1; } subcase->block.base = stmtblock; subcase->block.f.all.bits = 0; subcase->block.f.f.prev_goto_id = ctx->current_goto_id; stmtblock = &subcase->block; goto blockstart; } case CSLUL_T_LCurly: { struct StmtBlock *newblock; stmt->type = S_BLOCK; stmt->u.block = newblock = aallocp(ctx, sizeof(struct StmtBlock)); if (!newblock) return RES_OUTOFMEM; PROTECT_STRUCT(*newblock); newblock->base = stmtblock; newblock->f.all.bits = 0; newblock->f.f.prev_goto_id = ctx->current_goto_id; stmtblock = newblock; stack->state = FPSBlock; ctx->reused_token.slul = tok; goto blockstart; } case CSLUL_T_KW_Assert: stmt->type = S_ASSERT; res = parse_expr_start(ctx, &stmt->u.expr); if (IS_ERR(res)) goto err_res; goto in_expr; case CSLUL_T_KW_Return: stmt->type = S_RETURN; if (ctx->funcbody->has_retval) { res = parse_expr_start(ctx, &stmt->u.expr); if (IS_ERR(res)) goto err_res; goto in_expr; } else { stmt->u.expr = NULL; ctx->required_linebreak = ctx->tokline; ctx->last_void_return_line = ctx->tokline; } break; case CSLUL_T_KW_Undef: /* XXX or use separate "unreachable" keyword? */ stmt->type = S_UNREACH; /* TODO */ ctx->required_linebreak = ctx->tokline; break; case CSLUL_T_LSquare: CASE_SIMPLETYPES CASE_COMPOSITETYPES CASE_TYPEQUALS /* Variable definition (starting with a type) */ if (ctx->tokcolumn != ctx->numspaces+1) { error_tok(ctx, CSLUL_E_VARDEFINSIDELINE); } stmt->type = S_DECL; stmt->u.vardef = aallocp(ctx, sizeof(struct VarDef)); if (!stmt->u.vardef) goto outofmem; PROTECT_STRUCT(*stmt->u.vardef); stmt->u.vardef->var_id = ctx->funcbody->num_variables++; stmt->u.vardef->decl.type.type = T_INVALID; stmt->u.vardef->decl.type.defflags = D_LOCAL; ctx->reused_token.slul = tok; res = parse_type_start(ctx, &stmt->u.vardef->decl.type); if (IS_ERR(res)) { if (res != RES_BUFFEREND) stmt->u.vardef = NULL; goto err_res; } ctx->parser.slul.forbidden_quals = FORBIDDEN_QUALS_LOCALVAR; in_vardef_type: stack->state = FPSVardefType; res = parse_type(ctx); if (IS_ERR_OR_EOF(res)) { if (res != RES_BUFFEREND) stmt->u.vardef = NULL; goto err_res_noeof; } stmt->u.vardef->decl.u.initval = NULL; in_vardef_ident_lookahead: stack->state = FPSVardefIdentLookahead; tok = cslul_ll_next_slul_token(ctx); if (tok == CSLUL_T_NEEDDATA) goto buffer_end; if (UNLIKELY(tok == CSLUL_T_Assign)) { error_prevtok_end(ctx, CSLUL_E_MISSINGVARIDENT); stmt->u.vardef = NULL; goto skip_statement; } ctx->reused_token.slul = tok; in_vardef_ident: stack->state = FPSVardefIdent; { struct IdentDecl *declptr = &stmt->u.vardef->decl; res = parse_ident(ctx, (struct TreeNode **)&declptr, &stmtblock->idents, PI_DEF|PI_DATA|PI_LOCAL); if (IS_ERR_OR_EOF(res)) goto err_res_noeof; assert(declptr == &stmt->u.vardef->decl); declptr->type.defflags |= D_DEFINED; set_sourceline(ctx, &declptr->ident); } in_vardef_equals: stack->state = FPSVardefEquals; tok = cslul_ll_next_slul_token(ctx); if (tok == CSLUL_T_NEEDDATA) goto buffer_end; if (tok != CSLUL_T_Assign) { if (LIKELY(ctx->tokline != ctx->prev_tok_line)) { goto reuse_token; } else if (tok >= CSLUL_T_FirstElemType && tok <= CSLUL_T_LastElemType) { /* The next token is probably meant to go on a separate line */ goto reuse_token; } else { /* The next token is probably meant to be part of the initval */ error_tok(ctx, CSLUL_E_DECLMISSINGEQUALS); ctx->reused_token.slul = tok; } } else if (UNLIKELY(ctx->tokline > ctx->prev_tok_line+1)) { error_tok(ctx, CSLUL_E_BADINITVALBLANKLINE); } res = parse_expr_start(ctx, &stmt->u.vardef->decl.u.initval); if (IS_ERR(res)) goto err_res; in_vardef_initval: stack->state = FPSVardefInitval; res = parse_expr(ctx); if (IS_ERR_OR_EOF(res)) goto err_res_noeof; ctx->required_linebreak = ctx->tokline; break; case CSLUL_T_LParen: case CSLUL_T_RParen: CASE_OPERATORS CASE_LITERALS /* Expression */ stmt->type = S_EXPR; ctx->reused_token.slul = tok; res = parse_expr_start(ctx, &stmt->u.expr); if (IS_ERR(res)) goto err_res; in_expr: stack->state = FPSExpr; res = parse_expr(ctx); if (IS_ERR_OR_EOF(res)) goto err_res_noeof; ctx->required_linebreak = ctx->tokline; goto next_token; case CSLUL_T_KW_Else: { struct CtlIf *ifstm; if (UNLIKELY(!prevstmt || prevstmt->type != S_IF)) { if (prevstmt && prevstmt->column > ctx->tokcolumn && ctx->tokcolumn == ctx->numspaces+1 && ctx->blockdepth && ctx->blockstack[ctx->blockdepth-1 ].prevstmt->type == S_IF) { /* Probably a missing } */ error_tok(ctx, CSLUL_E_ELSEWITHOUTRCURLY); ctx->reused_token.slul = CSLUL_T_KW_Else; goto end_of_block; } /* Parse following statement as if the "else" was not there */ error_tok(ctx, CSLUL_E_NOIFEND); goto next_token; } /* TODO check that the "else" is: - if the previous stmt was a {} block, that it is one the same line as the "}". - otherwise, that there are no empty lines between the previous statement */ assert(prevstmt->u.ifstm != NULL); ifstm = prevstmt->u.ifstm; stack->state = FPSElseBlock; assert(ifstm->false_block == NULL); ifstm->false_block = aallocp(ctx, sizeof(struct StmtBlock)); if (!ifstm->false_block) goto outofmem; PROTECT_STRUCT(*ifstm->false_block); ifstm->false_block->base = stmtblock; ifstm->false_block->f.all.bits = 0; ifstm->false_block->f.f.prev_goto_id = ctx->current_goto_id; stmtblock = ifstm->false_block; stack->prevstmt = &stmtblock->stmt; ctx->funcbody->num_ebb++; goto blockstart; } case CSLUL_T_KW_LoopEnd: case CSLUL_T_KW_LoopEmpty: error_tok(ctx, CSLUL_E_NOLOOPEND); goto skip_block; case CSLUL_T_KW_Since: CASE_TOPLEVELS have_toplevel: if (ctx->tokcolumn == 1) { assert(ctx->tokline > ctx->prev_tok_line); error_linecol(ctx, CSLUL_E_NOFUNCENDCURLY, ctx->prev_tok_line+1, 1); ctx->blockdepth = -1; ctx->reused_token.slul = tok; parse_funcbody_end(ctx); return RES_OK; /* recovered */ } if (!stmt) stmt = &stmtblock->stmt; /* Fall through */ case CSLUL_T_KW_In: case CSLUL_T_KW_With: case CSLUL_T_Dot: case CSLUL_T_RSquare: CASE_SEPARATORS CASE_INSIDEDEF /* These cases cannot happen */ CASE_INTERNAL case CSLUL_T_EOF: case CSLUL_T_NEEDDATA: case CSLUL_T_RCurly: default: { /* different errors depending on token? CSLUL_T_LCurly... */ if (ctx->tokcolumn != 1 && ctx->tokcolumn != ctx->numspaces+1) { error_prevtok_end(ctx, CSLUL_E_STMTENDSHERE); } else { error_tok(ctx, CSLUL_E_BADSTMTLINESTART); } stmt->type = S_NOP; goto skip_statement; } } } err_res_noeof: if (res == RES_EOF) { error_tok(ctx, CSLUL_E_UNEXPECTEDEOF); return RES_UNEXPECTEDEOF; } /* Fall through */ err_res: switch (res) { case RES_OUTOFMEM: case RES_UNEXPECTEDEOF: case RES_EOF: return res; case RES_SKIPTOEND: goto skip_block; case RES_BUFFEREND: goto buffer_end; case RES_OK:; /* Cannot happen */ assert(0); } goto buffer_end; buffer_end: return RES_BUFFEREND; skip_block: stack->misc = ctx->tokline; /* for detection of line breaks */ assert(!ctx->has_fatal_errors); in_skip_block_start: stack->state = FPSSkipBlockStart; for (;;) { tok = cslul_ll_next_slul_token(ctx); if (tok <= 0) goto buffer_end; else if (tok == CSLUL_T_KW_Return && ctx->funcbody->has_retval) { if (ctx->tokline <= stack->misc+1) goto skip_statement; else goto reuse_token; } else if (IS_BRACELESS_STMT(tok)) { if (ctx->tokline <= stack->misc+1) goto next_token; else goto reuse_token; } else if (tok == CSLUL_T_LCurly) break; else if (ctx->tokline != stack->misc || tok == CSLUL_T_RCurly) goto reuse_token; } stack->misc = 1; /* counter of brace level */ in_skip_block: stack->state = FPSSkipBlock; for (;;) { tok = cslul_ll_next_slul_token(ctx); if (tok <= 0) goto buffer_end; if (tok == CSLUL_T_LCurly) stack->misc++; else if (tok == CSLUL_T_RCurly) { if (--stack->misc == 0) break; } } goto next_token; skip_statement: stack->misc = ctx->tokline; /* for detection of line breaks */ in_skip_statement: stack->state = FPSSkipStatment; do { tok = cslul_ll_next_slul_token(ctx); if (tok <= 0) goto buffer_end; } while (!IS_BRACELESS_STMT(tok) && tok != CSLUL_T_LCurly && tok != CSLUL_T_RCurly && ctx->tokline == stack->misc); goto reuse_token; outofmem: return RES_OUTOFMEM; } static void parse_funcbody_end(struct CSlul *ctx) { struct GotoIdent *gi = ctx->funcbody->gotoidents; for (; gi; gi = gi->next) { if (UNLIKELY(!gi->defined)) { error_linecol(ctx, CSLUL_E_UNDEFINEDGOTO, gi->seen_line, gi->seen_column); } } ctx->current_block = NULL; assert(ctx->blockdepth == -1); } static enum Result parse_type_param_def_start(struct CSlul *ctx, struct Type *type) { struct GenericDef *gdef = aallocp(ctx, sizeof(struct GenericDef)); if (!gdef) return RES_OUTOFMEM; PROTECT_STRUCT(*gdef); PROTECT_STRUCT(gdef->paramdef); PROTECT_STRUCT(*type); type->type = T_GENERICDEF; type->quals = 0; type->misc = 0; /* number of type parameters */ type->u.gdef = gdef; gdef->basetype.type = T_INVALID; /* changed later */ gdef->basetype.defflags = 0; gdef->params_root = NULL; ctx->current_gdef = gdef; ctx->next_prmdef = NULL; ctx->gdef_state = GDSParamType; return RES_OK; } /** Parses the definition of type parameter names, e.g. "ref T, enum U>" */ static enum Result parse_type_param_def(struct CSlul *ctx, struct Type *type) { enum CSlulToken tok; enum Result res; if (UNLIKELY(ctx->gdef_state != GDSParamType)) { switch (ctx->gdef_state) { case GDSParamOptionalType: goto in_param_optionaltype; case GDSParamIdent: goto in_param_ident; case GDSParamCommaOrEnd: goto in_param_comma_or_end; case GDSParamType: default: error_tok(ctx, INTERR_BADGDSENUM); } } for (;;) { unsigned prmtype; struct PrmDefEntry *prmdef; struct TypeDecl *decl; tok = cslul_ll_next_slul_token(ctx); if (UNLIKELY(tok <= 0)) goto buffer_end_noeof; if (tok != CSLUL_T_Question) { prmtype = 0; } else { ctx->gdef_state = GDSParamOptionalType; in_param_optionaltype: prmtype = PT_OPTIONAL; tok = cslul_ll_next_slul_token(ctx); } switch (tok) { case CSLUL_T_EOF: case CSLUL_T_NEEDDATA: goto buffer_end_noeof; case CSLUL_T_KW_Ref: prmtype |= PT_REFTYPE; break; case CSLUL_T_KW_Own: prmtype |= PT_OWNTYPE; break; case CSLUL_T_KW_Arena: prmtype |= PT_ARENATYPE; break; case CSLUL_T_UpperIdent: prmtype |= PT_ANY; no_param_type_optional: if (UNLIKELY((prmtype & PT_OPTIONAL) != 0)) { error_tok(ctx, CSLUL_E_OPTIONALNONREF); } break; case CSLUL_T_KW_Enum: prmtype |= PT_ENUMTYPE; goto no_param_type_optional; CASE_EXCEPT_TYPES CASE_SIMPLETYPES /* these could be allowed as enum base types */ case CSLUL_T_KW_FuncRef: case CSLUL_T_KW_Struct: case CSLUL_T_KW_Slot: case CSLUL_T_LSquare: case CSLUL_T_Question: default: error_tok(ctx, tok == CSLUL_T_Greater ? CSLUL_E_EMPTYPARAMDEF : CSLUL_E_BADPARAMTYPE); goto skip_to_end; } if (UNLIKELY(type->misc++ == MAX_TYPE_PARAMS)) { error_tok(ctx, CSLUL_E_TOOMANYTYPEPARAMS); /* report once */ } if (ctx->next_prmdef) { prmdef = aallocp(ctx, sizeof(struct PrmDefEntry)); if (!prmdef) return RES_OUTOFMEM; PROTECT_STRUCT(*prmdef); *ctx->next_prmdef = prmdef; } else { prmdef = &ctx->current_gdef->paramdef; } ctx->next_prmdef = &prmdef->next; prmdef->prmtype = prmtype; prmdef->next = NULL; ctx->gdef_state = GDSParamIdent; ctx->current_prmdef = prmdef; if (prmtype == PT_ANY) { ctx->reused_token.slul = CSLUL_T_UpperIdent; } in_param_ident: prmdef = ctx->current_prmdef; decl = &prmdef->paramdecl; res = parse_ident(ctx, (struct TreeNode **)&decl, &ctx->current_gdef->params_root, PI_DEF|PI_TYPE|PI_LOCAL); if (IS_ERR_OR_EOF(res)) return res; assert(decl == &prmdef->paramdecl); PROTECT_STRUCT(decl->type); decl->type.defflags = D_DEFINED; decl->type.type = T_GENERICVAR; decl->type.misc = prmdef->prmtype; set_sourceline(ctx, &decl->ident); ctx->gdef_state = GDSParamCommaOrEnd; in_param_comma_or_end: tok = cslul_ll_next_slul_token(ctx); if (tok == CSLUL_T_Greater) break; else if (UNLIKELY(tok != CSLUL_T_Comma)) { if (tok <= 0) goto buffer_end_noeof; error_sameline(ctx, CSLUL_E_PARAMDEFEND); goto skip_to_end; } ctx->gdef_state = GDSParamType; } return RES_OK; buffer_end_noeof: if (tok == CSLUL_T_EOF) { error_tok(ctx, CSLUL_E_UNEXPECTEDEOF); return RES_EOF; } return RES_BUFFEREND; skip_to_end: if (tok == CSLUL_T_Greater) { return RES_OK; /* recovered */ } else { ctx->reused_token.slul = tok; return RES_SKIPTOEND; } } /** Finds the typeident scope for the current declaration. On error, a message is reported an NULL is returned. */ static struct TreeNode **find_typeidents_root(struct CSlul *ctx, struct Type *identtype, enum CSlulErrorCode error_code) { if (LIKELY(identtype != NULL)) { return &identtype->u.ident->typeidents; } else if (error_code == CSLUL_E_NOTYPESCOPEFUNC) { /* The return type is parsed after the identifier is parsed */ message_set_ident(ctx, 0, CSLUL_LT_MAIN, &ctx->current.decl->ident); } else { message_set_token(ctx, 0, CSLUL_LT_MAIN); } message_final(ctx, error_code); return NULL; } /** * When parsing functions that are typeidents, we cannot know the scope until * the return type has been parsed. So we need to move the ident declaration * to the correct place (the target_root parameter) after the return type * has been parsed. * * Returns 1, except on fatal errors in which case it returns 0. */ static int migrate_identdecl(struct CSlul *ctx, struct TreeNode **target_root, struct TreeNode *source_node) { if (!*target_root) { /* First type identifier in type */ *target_root = source_node; } else { /* Other type identifiers exist in type */ struct TreeNode *insresult = tree_insert(ctx, target_root, 0, USE_EXISTING, NULL, source_node, sizeof(struct IdentDecl)); if (!insresult) return 0; PROTECT_STRUCT(*insresult); if (UNLIKELY(insresult != source_node)) goto error_exists; } return 1; error_exists: message_set_ident(ctx, 0, CSLUL_LT_MAIN, source_node); message_final(ctx, CSLUL_E_IDENTEXISTS); return 1; /* not a fatal error */ } /** * Used by parse_sinceversions to build lists of since-versions. * Returns 1, except on fatal errors in which case it returns 0. */ static int versionlist_add_from_token(struct CSlul *ctx, struct ApiRefList **list, struct ApiRefList **last) { struct ApiRefList *entry; struct ApiDef *ver = (struct ApiDef *)tree_search_tok(ctx, ctx->parsed_module->apidefs_root); if (UNLIKELY(!ver)) { if (ctx->phase != CSLUL_P_IMPL && ctx->parsed_module->first_apidef) { error_tok(ctx, CSLUL_E_NOSUCHVERSION); } assert(ctx->has_errors); return 1; } if (ctx->current_type_sinceversions) { struct ApiRefList *declver; unsigned ver_ind; /* Check ordering of versions */ ver_ind = ver->index; declver = ctx->current_type_sinceversions; do { unsigned decl_ind = declver->version->index; if (UNLIKELY(ver_ind < decl_ind)) { error_tok(ctx, CSLUL_E_VERSIONEARLIERTHANDECL); } else if (UNLIKELY(ver_ind == decl_ind)) { error_tok(ctx, CSLUL_E_SAMEVERSION); } declver = declver->next; } while (declver); /* The first version is the "main" version, and the following versions should be backport versions */ if (*last) { unsigned prev_ind = (*last)->version->index; if (UNLIKELY(ver_ind <= prev_ind)) { unsigned decl_ind = ctx->current_type_sinceversions->version->index; if (ver_ind > decl_ind) { /* don't complain twice */ error_tok(ctx, CSLUL_E_NOTABACKPORT); } } } } /* TODO check that the version is not an ancestor of the any versions in the versionlist. */ entry = aallocp(ctx, sizeof(struct ApiRefList)); if (!entry) return 0; PROTECT_STRUCT(*entry); if (*last) { (*last)->next = entry; } *last = entry; if (!*list) *list = entry; entry->version = ver; entry->next = NULL; return 1; } /** * Parses a "since" version (if present), and places a pointer to it * in *apidef. Reports error if not versioning is not allowed in the * module type. */ static enum Result parse_sinceversions(struct CSlul *ctx, struct ApiRefList **sinceversions, int is_toplevel) { struct ApiRefList *ver_list = NULL, *last_ver = NULL; enum CSlulToken tok; if (UNLIKELY(ctx->parser.slul.sincever_state != SVDone)) { ver_list = ctx->current_sinceversions; switch (ctx->parser.slul.sincever_state) { case SVVersionStart: goto in_version_start; case SVVersionList: goto in_version_list; default: case SVDone: error_tok(ctx, INTERR_BADSVSENUM); } } tok = cslul_ll_next_slul_token(ctx); ctx->parser.slul.sincekeyword_line = ctx->tokline; ctx->parser.slul.sincekeyword_column = ctx->tokcolumn; if (UNLIKELY(tok == CSLUL_T_NEEDDATA)) goto buffer_end; else if (UNLIKELY(tok == CSLUL_T_EOF)) { ctx->reused_token.slul = tok; goto done; } else if (tok != CSLUL_T_KW_Since) { /* No version */ ctx->reused_token.slul = tok; if (ctx->phase != CSLUL_P_IMPL) { if (!is_toplevel) { if (UNLIKELY(ctx->previous_member_sinceversions)) { error_tok(ctx, CSLUL_E_UNVERSIONEDAFTERVERSIONED); } *sinceversions = ctx->current_type_sinceversions; } else if (UNLIKELY(!ctx->parsed_module->is_unstable)) { /* Top-level symbols in versioned interfaces must have "since" versions */ if (ctx->parsed_module->first_apidef != NULL) { error_tok(ctx, CSLUL_E_MISSINGVERWITHAPIDEF); } else { /* Assume that the programmer forgot to add "unstable_api", and continue */ assert(ctx->has_errors); } } } goto done; } /* A version is present */ if (UNLIKELY(ctx->phase == CSLUL_P_IMPL)) { error_tok(ctx, CSLUL_E_SINCEVERSIONINIMPL); } else if (UNLIKELY(!ctx->parsed_module->first_apidef)) { error_tok(ctx, CSLUL_E_VERSIONWITHOUTAPIDEF); } if (UNLIKELY(ctx->tokcolumn != 1 && is_toplevel)) { error_tok(ctx, CSLUL_E_BADTOPLEVELCOLUMN); } ctx->parser.slul.sincever_state = SVVersionStart; in_version_start: tok = cslul_ll_next_slul_token(ctx); if (UNLIKELY(tok == CSLUL_T_NEEDDATA)) goto buffer_end; if (tok == CSLUL_T_Version) { /* Single version */ if (!versionlist_add_from_token(ctx, &ver_list, &last_ver)) goto oom; } else if (tok == CSLUL_T_LCurly) { /* Multiple versions: "since { FIRST_VER BACKPORT_V1 BACKPORT_V2 }" */ ctx->parser.slul.sincever_state = SVVersionList; in_version_list: for (;;) { ctx->parser.slul.version_line = ctx->tokline; tok = cslul_ll_next_slul_token(ctx); if (UNLIKELY(tok == CSLUL_T_NEEDDATA)) goto buffer_end; if (tok != CSLUL_T_Version) break; if (!versionlist_add_from_token(ctx, &ver_list, &last_ver)) goto oom; } if (tok != CSLUL_T_RCurly) { error_sameline(ctx, CSLUL_E_EXPECTEDVERORRCURLY); ctx->reused_token.slul = tok; } else if (ver_list == NULL) { error_prevtok_end(ctx, CSLUL_E_EMPTYSINCE); } } else { error_sameline(ctx, CSLUL_E_NOSINCEVERSION); ctx->reused_token.slul = tok; } done: if (is_toplevel) { ctx->current_sinceversions = ver_list; } else if (ctx->previous_member_sinceversions && ver_list) { unsigned prev_ind = ctx->previous_member_sinceversions->version->index; unsigned this_ind = ver_list->version->index; if (UNLIKELY(this_ind < prev_ind)) { error_sincever(ctx, CSLUL_E_SINCEVERNOTCHRONOLOGICAL); } } *sinceversions = ver_list; ctx->parser.slul.sincever_state = SVDone; ctx->parser.slul.version_line = 0; return RES_OK; buffer_end: ctx->current_sinceversions = ver_list; return RES_BUFFEREND; oom: *sinceversions = NULL; return RES_OUTOFMEM; } static void parse_slul(struct CSlul *ctx) { enum ParserState state = PDone; enum CSlulToken tok; enum Result res; if (ctx->parser.slul.state != PDone) { switch (ctx->parser.slul.state) { case PToplevelStart: goto in_toplevel_start; case PDataType: goto in_datatype; case PDataIdentStart: goto in_datadent_start; case PDataIdent: goto in_dataident; case PDataEquals: goto in_dataequals; case PDataValue: goto in_datavalue; case PTypeIdent: goto in_typeident; case PTypeParamDefStart: goto in_typeparamdefstart; case PTypeParamDefs: goto in_typeparamdefs; case PTypeEquals: goto in_typeequals; case PTypeType: goto in_typetype; case PFuncIdentStart: goto in_funcident_start; case PFuncClass: goto in_func_class; case PFuncClassDot: goto in_func_classdot; case PFuncIdent: goto in_funcident; case PFuncTypeParamDefStart: goto in_func_typeparamdef_start; case PFuncTypeParamDefs: goto in_func_typeparamdefs; case PFuncType: goto in_functype; case PFuncLCurly: goto in_funclcurly; case PFuncBody: goto in_funcbody; case PSkipTopLevel: goto skip_top_level_entry; case PDone:; } } for (;;) { next_token: state = PDone; /* Parse the since-version first, if any */ if (UNLIKELY(ctx->next_decl_sinceversions)) { error_tok(ctx, CSLUL_E_REPEATEDSINCE); } ctx->previous_member_sinceversions = NULL; /* not in a struct */ ctx->current_type_sinceversions = NULL; res = parse_sinceversions(ctx, &ctx->next_decl_sinceversions, 1); if (IS_ERR(res)) goto err_res; /* Parse the start token of the top-level (func, data or type) */ in_toplevel_start: state = PToplevelStart; tok = cslul_ll_next_slul_token(ctx); if (UNLIKELY(tok > 0) && UNLIKELY(ctx->tokcolumn != 1)) { enum CSlulErrorCode errcode; if (ctx->tokcolumn == ctx->numspaces+1 && (tok == CSLUL_T_KW_Data || tok == CSLUL_T_KW_Func || tok == CSLUL_T_KW_Type)) { errcode = CSLUL_E_BADTOPLEVELCOLUMN; } else { errcode = CSLUL_E_BADTOPLEVELCONTINUATION; } error_tok(ctx, errcode); goto skip_top_level; } assert(!ctx->has_fatal_errors); assert(ctx->typedepth == -1); assert(ctx->generic_param_depth == 0); ctx->params_root = NULL; /* clear any previous type parameters */ ctx->parser.slul.current_class = NULL; ctx->current_functype = NULL; switch (tok) { case CSLUL_T_EOF: if (UNLIKELY(ctx->next_decl_sinceversions)) { error_tok(ctx, CSLUL_E_UNEXPECTEDEOF); } /* Fall through */ case CSLUL_T_NEEDDATA: goto buffer_end; case CSLUL_T_KW_Data: { /* Syntax: "data TYPE IDENT [= VALUE]" */ { struct TopLevelIdent *tlident = aallocp(ctx, sizeof(struct TopLevelIdent)); if (!tlident) return; tlident->iface_decl = NULL; tlident->decl.type.type = T_INVALID; ctx->current.tlident = tlident; ctx->parser.slul.in_toplevel_type = 0; ctx->parser.slul.in_toplevel_data = 1; ctx->parser.slul.is_method = 0; ctx->parser.slul.forbidden_quals = 0; /* in_toplevel_data used instead */ res = parse_type_start(ctx, &tlident->decl.type); if (IS_ERR(res)) goto err_res; } in_datatype: state = PDataType; res = parse_type(ctx); if (IS_ERR_OR_EOF(res)) goto err_res_noeof; /* TODO Check that it is not a function type Need to know the real type for this! */ in_datadent_start: state = PDataIdentStart; /* Check if this is a typeidentifier */ tok = cslul_ll_next_slul_token(ctx); if (tok == CSLUL_T_Dot) { struct TreeNode **ti_root; struct Type *identtype = get_typescope_type( ctx, &ctx->current.decl->type); ctx->current.tlident->class_ = identtype ? identtype->u.ident : NULL; ctx->current.tlident->is_typeident = 1; ti_root = find_typeidents_root(ctx, identtype, CSLUL_E_NOTYPESCOPEDATA); if (!ti_root) goto skip_top_level; ctx->current_identroot = ti_root; ctx->typeident_temp_root = NULL; ctx->parser.slul.is_typeident = 1; } else if (LIKELY(tok > 0)) { ctx->current_identroot = &ctx->tl.idents_root; ctx->current.tlident->class_ = NULL; ctx->parser.slul.is_typeident = 0; ctx->reused_token.slul = tok; } else goto buffer_end_noeof; in_dataident: state = PDataIdent; { struct TreeNode *treenode = &ctx->current.decl->ident; res = parse_ident(ctx, &treenode, ctx->current_identroot, PI_DEF|PI_DATA); if (IS_ERR_OR_EOF(res)) goto err_res_noeof; if (treenode != &ctx->current.decl->ident) { /* The identifier has been referenced before, and has already been inserted into the tree. Copy the new type to it */ struct IdentDecl *existing = (struct IdentDecl*)treenode; memcpy(&existing->type, &ctx->current.decl->type, sizeof(struct Type)); ctx->current.decl = existing; } ctx->current.tlident->id = ctx->num_datadefs++; } PROTECT_STRUCT(*ctx->current.tlident); PROTECT_STRUCT(*ctx->current.decl); assert(ctx->current.decl->type.type != T_INVALID); ctx->current.decl->type.defflags = D_DEFINED; ctx->current.decl->u.initval = NULL; set_sourceline(ctx, &ctx->current.decl->ident); ctx->current.tlident->sinceversions = ctx->next_decl_sinceversions; ctx->next_decl_sinceversions = NULL; ctx->current_type_sinceversions = NULL; in_dataequals: state = PDataEquals; tok = cslul_ll_next_slul_token(ctx); if (IS_TOPLEVEL_OR_SINCE(tok)) { if (UNLIKELY(ctx->phase == CSLUL_P_IMPL)) { error_prevtok_end(ctx, CSLUL_E_IMPLDATAWITHOUTINITVAL); } ctx->reused_token.slul = tok; goto next_token; } else if (UNLIKELY(tok != CSLUL_T_Assign)) { if (LIKELY(tok <= 0)) { if (UNLIKELY(ctx->phase == CSLUL_P_IMPL && tok == CSLUL_T_EOF)) { error_tok(ctx, CSLUL_E_IMPLDATAWITHOUTINITVAL); } goto buffer_end; } /* TODO check for newline, and report different error */ error_tok(ctx, CSLUL_E_BADINITVALTOKEN); goto skip_top_level; } res = parse_expr_start(ctx, &ctx->current.decl->u.initval); if (IS_ERR(res)) goto err_res; in_datavalue: state = PDataValue; res = parse_expr(ctx); if (IS_ERR_OR_EOF(res)) goto err_res_noeof; goto next_token; } case CSLUL_T_KW_Func: /* Syntax: "func IDENT(PTYPE1 PIDENT1, ...) -> RETTYPE" or "func .IDENT(PTYPE1 PIDENT1, ...) -> RETTYPE" or "func TYPE.IDENT(PTYPE1 PIDENT1, ...) -> RETTYPE" */ ctx->current.decl = NULL; in_funcident_start: state = PFuncIdentStart; tok = cslul_ll_next_slul_token(ctx); if (tok == CSLUL_T_Dot) { /* Type identifier (e.g. ".new") */ ctx->current_identroot = &ctx->typeident_temp_root; ctx->typeident_temp_root = NULL; ctx->parser.slul.is_typeident = 1; ctx->parser.slul.is_method = 0; ctx->parser.slul.current_class = NULL; } else if (tok == CSLUL_T_LowerIdent) { /* Plain function */ ctx->reused_token.slul = tok; ctx->current_identroot = &ctx->tl.idents_root; ctx->parser.slul.is_typeident = 0; ctx->parser.slul.is_method = 0; ctx->parser.slul.current_class = NULL; } else if (tok == CSLUL_T_UpperIdent) { /* Method. Syntax: "func TYPE.IDENT("... "func TYPE.IDENT!("... "func TYPE.IDENT QUALS("... "func TYPE.IDENT QUALS!("... both TYPE and IDENT can have type parameters */ ctx->reused_token.slul = tok; in_func_class: state = PFuncClass; res = parse_ident(ctx, (struct TreeNode **)&ctx->parser.slul.current_class, &ctx->tl.types_root, PI_TYPE); if (IS_ERR_OR_EOF(res)) goto err_res_noeof; assert(ctx->tl.types_list != NULL); in_func_classdot: state = PFuncClassDot; tok = cslul_ll_next_slul_token(ctx); if (tok <= 0) goto buffer_end_noeof; if (UNLIKELY(tok != CSLUL_T_Dot)) { enum CSlulErrorCode errcode = CSLUL_E_CLASSDOTTOKEN; if (tok == CSLUL_T_LParen) { ctx->parser.slul.current_class->type.defflags |= D_DEFINED; /* suppress "Type x not found" */ errcode = CSLUL_E_UPPERFUNCNAME; } error_tok(ctx, errcode); goto skip_top_level; } /* TODO also check if the type is parametric! */ ctx->current_identroot = &ctx->parser.slul.current_class->typeidents; ctx->typeident_temp_root = NULL; ctx->parser.slul.is_typeident = 0; ctx->parser.slul.is_method = 1; } else if (tok <= 0) goto buffer_end_noeof; else { /* TODO support methods on elementary types */ error_tok(ctx, CSLUL_E_FUNCDEFIDENT); goto skip_top_level; } in_funcident: state = PFuncIdent; res = parse_ident(ctx, (struct TreeNode **)&ctx->current.decl, ctx->current_identroot, PI_DEF|PI_FUNC); if (IS_ERR_OR_EOF(res)) goto err_res_noeof; assert(ctx->tl.idents_list != NULL); PROTECT_STRUCT(ctx->current.decl->type); ctx->current.decl->type.type = T_INVALID; /* changed later */ ctx->current.decl->type.defflags = D_DEFINED|D_FUNC; ctx->current.tlident->id = ctx->num_funcdefs++; ctx->current.tlident->class_ = ctx->parser.slul.current_class; ctx->current.tlident->is_typeident = ctx->parser.slul.is_typeident; ctx->current.tlident->sinceversions = ctx->next_decl_sinceversions; set_sourceline(ctx, &ctx->current.decl->ident); /* TODO forbid having the same identifier as both a method and a typeident/constructor (that will cause them to have the same low-level/linkage identifier) */ ctx->next_decl_sinceversions = NULL; ctx->current.decl->u.funcbody = NULL; ctx->parser.slul.in_toplevel_type = 0; ctx->parser.slul.in_toplevel_data = 0; in_func_typeparamdef_start: /* FIXME how to handle implicit type params from "this"-parameter? */ state = PFuncTypeParamDefStart; tok = cslul_ll_next_slul_token(ctx); if (tok == CSLUL_T_NEEDDATA) goto buffer_end; else if (tok == CSLUL_T_EOF) goto unexpected_eof; else if (tok != CSLUL_T_Less) { ctx->reused_token.slul = tok; goto func_typeparams_done; } else { /* Generic type */ ctx->parser.slul.seen_typeparams = NULL; res = parse_type_param_def_start(ctx, &ctx->current.decl->type); if (IS_ERR_OR_EOF(res)) goto err_res_noeof; in_func_typeparamdefs: state = PFuncTypeParamDefs; res = parse_type_param_def(ctx, &ctx->current.decl->type); if (IS_ERR_OR_EOF(res)) goto err_res_noeof; ctx->params_root = ctx->current_gdef->params_root; } func_typeparams_done: /* Parameters and return type are parsed just like a func type */ ctx->current_functype = ctx->current.decl->type.type == T_INVALID ? &ctx->current.decl->type : /* T_GENERICDEF */ &ctx->current.decl->type.u.gdef->basetype; res = parse_type_start(ctx, ctx->current_functype); if (IS_ERR(res)) goto err_res; ctx->typestack[ctx->typedepth].type->type = ctx->parser.slul.is_method ? T_METHOD : T_FUNC; ctx->typestack[ctx->typedepth].type->quals = INITIAL_QUALS(ctx); ctx->typestack[ctx->typedepth].type->line = ctx->tokline; ctx->typestack[ctx->typedepth].type->column = ctx->tokcolumn; ctx->typestack[ctx->typedepth].type->misc = 0; ctx->typestack[ctx->typedepth].type->u.func = NULL; ctx->typestack[ctx->typedepth].state = TPSFuncLParen; ctx->parser.slul.forbidden_quals = FORBIDDEN_QUALS_FUNCPARAM; /* FIXME different for methods. XXX also: "funcPARAM"...?? */ /* TODO - parse_type could parse method qualifiers and "!" . - also, should there be "method pointer types"? - how to parse defs? - how to parse calls? */ in_functype: state = PFuncType; res = parse_type(ctx); if (IS_ERR_OR_EOF(res)) goto err_res_noeof; assert(ctx->current.decl->type.type != T_INVALID); ctx->current.decl->type.quals |= Q_CLOSED; /* not a struct/enum */ if (ctx->parser.slul.is_typeident) { /* Move typeident to typeident scope of return type */ struct TreeNode **ti_root; struct Type *returntype = &ctx->current_functype->u.func->returntype; struct Type *identtype = get_typescope_type(ctx, returntype); ctx->current.tlident->class_ = identtype ? identtype->u.ident : NULL; assert(ctx->typeident_temp_root != NULL); ti_root = find_typeidents_root(ctx, identtype, CSLUL_E_NOTYPESCOPEFUNC); if (ti_root) { if (!migrate_identdecl(ctx, ti_root, ctx->typeident_temp_root)) goto outofmem; } } if (ident_is_appmain(ctx->current.tlident)) { if (UNLIKELY(!is_valid_appmain(ctx, ctx->current.tlident))) { error_prevtok_start(ctx, CSLUL_E_MAINMALFORMED); } if (UNLIKELY(!is_app(ctx->parsed_module))) { error_prevtok_start(ctx, CSLUL_E_MAINNOTAPP); } assert(!ctx->has_app_main || ctx->has_errors); ctx->has_app_main = 1; } in_funclcurly: state = PFuncLCurly; tok = cslul_ll_next_slul_token(ctx); if (tok != CSLUL_T_LCurly) { if (tok == CSLUL_T_NEEDDATA) goto buffer_end; if (UNLIKELY(ctx->phase == CSLUL_P_IMPL)) { if (ctx->tokcolumn == 1 || tok == CSLUL_T_EOF) { error_prevtok_end(ctx, CSLUL_E_IMPLFUNCWITHOUTBODY); } else { enum CSlulErrorCode errcode; if (ctx->tokcolumn == ctx->numspaces+1) { errcode = CSLUL_E_FUNCNOLCURLY; } else if (IS_VOIDFUNC(ctx->current_functype)) { errcode = CSLUL_E_FUNCBADKEYWORD; } else { errcode = CSLUL_E_FUNCNOARROW; } error_tok(ctx, errcode); goto skip_top_level; } } ctx->reused_token.slul = tok; goto next_token; } if (!parse_funcbody_start(ctx)) return; ctx->current.decl->u.funcbody = ctx->funcbody; in_funcbody: state = PFuncBody; /* TODO: - should we do per-function arena allocation? - idents are inserted per block. should we merge the ident and type trees? (also at the top level) - how about nested functions? is it a good idea? - transform them to toplevel functions? (i.e. they can only access local types, functions, and compile time constant data, but not runtime-initialized data) - check that referenced "external" data is not allocated on the stack - it makes sense to do these two things: 1. transform local non-stack data to global data, and 2. transform nested functions to top-level functions */ res = parse_funcbody(ctx); if (IS_ERR_OR_EOF(res)) goto err_res; goto next_token; case CSLUL_T_KW_Type: /* Syntax: "type IDENT [= TYPE]" or "type IDENT [= TYPE]" */ ctx->parser.slul.in_toplevel_data = 0; ctx->parser.slul.is_method = 0; ctx->current.typedecl = NULL; in_typeident: state = PTypeIdent; res = parse_ident(ctx, (struct TreeNode **)&ctx->current.typedecl, &ctx->tl.types_root, PI_DEF|PI_TYPE); if (IS_ERR_OR_EOF(res)) goto err_res_noeof; if (UNLIKELY(!ctx->current.typedecl->ident.is_new && IS_IDENT_DEFINED(ctx->current.typedecl) && ctx->current.typedecl->type.type != T_GENERICSEEN)) { message_set_token(ctx, 0, CSLUL_LT_DUPLICATE); message_set_ident(ctx, 1, CSLUL_LT_DUPLICATE, &ctx->current.typedecl->ident); message_final(ctx, CSLUL_E_IDENTEXISTS); goto skip_top_level_entry; } assert(ctx->tl.types_list != NULL); ctx->current.typedecl->type.defflags = D_DEFINED; ctx->current.tltype->id = ctx->num_typedefs++; ctx->current.tltype->sinceversions = ctx->next_decl_sinceversions; ctx->next_decl_sinceversions = NULL; set_sourceline(ctx, &ctx->current.typedecl->ident); in_typeparamdefstart: state = PTypeParamDefStart; tok = cslul_ll_next_slul_token(ctx); if (tok == CSLUL_T_NEEDDATA) goto buffer_end; else if (tok == CSLUL_T_Less) { /* Generic type */ if (ctx->current.typedecl->ident.is_new || ctx->current.typedecl->type.type == T_INVALID) { ctx->parser.slul.seen_typeparams = NULL; } else { /* (Some) parameters already seen */ assert(ctx->current.typedecl->type.type == T_GENERICSEEN); ctx->parser.slul.seen_typeparams = ctx->current.typedecl->type.u.genericseen; } res = parse_type_param_def_start(ctx, &ctx->current.typedecl->type); if (IS_ERR_OR_EOF(res)) goto err_res_noeof; in_typeparamdefs: state = PTypeParamDefs; res = parse_type_param_def(ctx, &ctx->current.typedecl->type); if (IS_ERR_OR_EOF(res)) goto err_res_noeof; ctx->params_root = ctx->current_gdef->params_root; check_seen_typeparams(ctx, ctx->params_root, ctx->parser.slul.seen_typeparams); } else { if (UNLIKELY(!ctx->current.typedecl->ident.is_new && ctx->current.typedecl->type.type != T_INVALID)) { /* Already seen with type parameters! */ assert(ctx->current.typedecl->type.type == T_GENERICSEEN); typeparams_on_non_generic(ctx, ctx->current.typedecl->type.u.genericseen); } ctx->reused_token.slul = tok; } ctx->current_type_sinceversions = ctx->current.tltype->sinceversions; in_typeequals: state = PTypeEquals; tok = cslul_ll_next_slul_token(ctx); if (tok != CSLUL_T_Assign) { /* Private type */ if (tok == CSLUL_T_NEEDDATA) goto buffer_end; if (UNLIKELY(ctx->phase == CSLUL_P_IMPL)) { if (tok != CSLUL_T_EOF && ctx->tokline == (int)ctx->current.typedecl->ident.line) { error_prevtok_end(ctx, CSLUL_E_TYPEDEFWITHOUTEQUAL); goto skip_top_level; } else { message_set_ident(ctx, 0, CSLUL_LT_MAIN, &ctx->current.typedecl->ident); message_final(ctx, CSLUL_E_PRIVATEINIMPL); } } PROTECT_STRUCT(ctx->current.typedecl->type); ctx->current.typedecl->type.type = T_PRIVATE; ctx->current.typedecl->type.misc = 0; ctx->current.typedecl->type.line = ctx->current.typedecl->ident.line; ctx->current.typedecl->type.column = ctx->current.typedecl->ident.column; ctx->reused_token.slul = tok; goto next_token; } /* Prepare for start of type */ ctx->parser.slul.in_toplevel_type = 1; { struct Type *t; if (ctx->current.typedecl->type.type == T_GENERICDEF) { t = &ctx->current.typedecl->type.u.gdef->basetype; } else { t = &ctx->current.typedecl->type; assert(!t->type || ctx->has_errors); } res = parse_type_start(ctx, t); } if (IS_ERR(res)) goto err_res; ctx->parser.slul.forbidden_quals = FORBIDDEN_QUALS_TYPEDECL; in_typetype: state = PTypeType; res = parse_type(ctx); if (IS_ERR_OR_EOF(res)) goto err_res_noeof; assert(ctx->current.typedecl->type.type != T_INVALID); goto next_token; case CSLUL_T_KW_Since: CASE_EXCEPT_TOPLEVELS default: { enum CSlulErrorCode errcode; if (ctx->tokcolumn == 1) { errcode = CSLUL_E_BADTOPLEVELTOKEN; } else { errcode = CSLUL_E_BADTOPLEVELCONTINUATION; } error_tok(ctx, errcode); ctx->next_decl_sinceversions = NULL; goto skip_top_level; } } } skip_top_level_entry: tok = cslul_ll_next_slul_token(ctx); skip_top_level: /* Recover from error by skipping to the start of the next toplevel, or end of buffer (tok <= 0) */ assert(!ctx->has_fatal_errors); assert(ctx->typedepth == -1); assert(ctx->exprdepth == -1); ctx->next_decl_sinceversions = NULL; ctx->blockdepth = -1; while (tok > 0 && (ctx->tokcolumn != 1 || !IS_TOPLEVEL_OR_SINCE(tok))) { tok = cslul_ll_next_slul_token(ctx); } if (LIKELY(tok != CSLUL_T_NEEDDATA)) { ctx->reused_token.slul = tok; goto next_token; } state = PSkipTopLevel; goto buffer_end; err_res_noeof: if (res == RES_EOF) { error_tok(ctx, CSLUL_E_UNEXPECTEDEOF); } /* Fall through */ err_res: switch (res) { case RES_OUTOFMEM: /* If this is false, we went here without running out of memory */ outofmem: assert(ctx->has_fatal_errors); return; case RES_SKIPTOEND: goto skip_top_level_entry; case RES_BUFFEREND: goto buffer_end; case RES_UNEXPECTEDEOF: case RES_EOF: break; case RES_OK:; /* Cannot happen */ assert(0); } buffer_end: ctx->parser.slul.state = state; return; unexpected_eof: error_tok(ctx, CSLUL_E_UNEXPECTEDEOF); goto buffer_end; buffer_end_noeof: if (tok == CSLUL_T_EOF) goto unexpected_eof; goto buffer_end; }