/* typechk.c -- Type checker Copyright © 2022-2024 Samuel Lidén Borell Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "internal.h" #include "ast.h" #include #include #define INTERR_TYPECHK(errnum) MAKE_INTERR(errnum, INTERRBASE_TYPECHK) #define INTERR_BADTYPENUM INTERR_TYPECHK(0x01) #define INTERR_BADASTNULL INTERR_TYPECHK(0x02) #define INTERR_BADUNDEFIDENT INTERR_TYPECHK(0x03) #define REQUIRE_NONNULL(expr) do { \ if (UNLIKELY((expr) == NULL)) goto incomplete_ast; \ } while (0) static struct ExprRoot *enum_nextint(struct CSlul *ctx, struct ExprRoot *lastval, struct TreeNode *ident) { struct ExprRoot *ret; uint64_t val; unsigned intflags; if (lastval) { /* compute lastval+1 */ struct ExprNode *e = lastval->root; CHK(e); CHK(e->exprtype == E_INTEGER); val = e->a.intval; intflags = e->b.intflags; if ((intflags & EIF_NEGATIVE) == 0) { /* positive */ if (UNLIKELY(!++val)) { message_set_ident(ctx, 0, CSLUL_LT_MAIN, ident); message_final(ctx, CSLUL_E_NUMBERTOOLARGE); return NULL; } } else { /* negative */ if (!--val) { intflags = 0; /* became positive */ } } } else { val = 0; intflags = 0; } ret = make_number_expr(ctx, val, NULL); CHK(ret); ret->root->b.intflags = intflags; return ret; assert_error: assert(ctx->has_errors); return NULL; } void typechk_ctx_init(struct TypeChkCtx *tc) { tc->step = 0; tc->cycle_max = 16; tc->canary_type = NULL; } /** * Detects cycles in types, i.e. self-referential types. * * \param ctx Optional compilation context (used only for error reporting) * \param type Current type * \param tc Cycle detection context * \param error_decl Optional type declaration to blame in error messages * \return 1 if OK, 0 if a cycle was detected */ int detect_type_cycles(struct CSlul *ctx, const struct Type *type, struct TypeChkCtx *tc, const struct TypeDecl *error_decl) { if (!tc->canary_type) { tc->canary_type = type; } else if (UNLIKELY(tc->canary_type == type)) { /* Cycle detected */ if (error_decl && ctx->in_typedef_check) { message_set_ident(ctx, 0, CSLUL_LT_MAIN, &error_decl->ident); message_final(ctx, CSLUL_E_CYCLICTYPEDECL); } return 0; } else if (tc->step++ == tc->cycle_max) { /* We might be stuck in a loop inside a nested type. Set the current type as the "canary type" and try to find cycles in the nested type. */ tc->step = 0; tc->cycle_max *= 2; tc->canary_type = type; } return 1; } #define check_type NOPE /** * Checks a type. This function is always called from check_type(). * * \param ctx Compilation context. * \param fromref Decl that the current type was referenced from. * \param toref Decl that the current type is used in. * \param type The type to check. * \param loc The context of the type. * \return 1 if successful, or 0 on failure. */ static int chk_type(struct CSlul *ctx, const struct TypeDecl *fromref, const struct TypeDecl *toref, const struct Type *type, enum TypeLocation loc, struct TypeChkCtx *tc) { /* XXX will *type be defined (D_DEFINED) and have a valid .type field in case of referenced but undefined idents? */ REQUIRE_NONNULL(type); if (UNLIKELY(!detect_type_cycles(ctx, type, tc, toref))) return 0; switch (type->type) { case T_REF: return chk_type(ctx, fromref, toref, type->u.nested, LTRef, tc); case T_SLOT: { struct Type *nested; REQUIRE_NONNULL(type->u.ident); nested = &type->u.ident->type; REQUIRE_NONNULL(nested); if (nested->type == T_IMPORTED) { /* if seen before definition */ nested = &nested->u.ident->type; REQUIRE_NONNULL(nested); } /* The nested type must be a type parameter */ if (UNLIKELY(!nested->type)) goto incomplete_ast; if (UNLIKELY(nested->type != T_GENERICVAR)) { /* XXX redundant check? */ /* TODO finish this */ message_set_ident(ctx, 0, CSLUL_LT_MAIN, &fromref->ident); if (toref != fromref) { message_set_type(ctx, 1, CSLUL_LT_DEFINITION, toref, type); } message_final(ctx, CSLUL_E_SLOTNONTYPEPARAM); } /* TODO more checks? */ return 1; } case T_ARRAY: { int ok = 1; tc->realtype_equals_typedef = 0; /* TODO check expr. there are at least 3 ways to do this: 1. separate implcheck_type() function 2. separate TypeLocation flag(s) for chk_type() 3. add the expr to a list/queue, and process later. XXX regarding arrays in typedefs in interfaces containing identifiers: - types cannot change across versions - public constants cannot change across versions - enums (non-closed) can have new values added in new versions (NOT any change!) - hence, arrays types in interfaces cannot depend on these two(NO!) (and of course not on module-private data) Solution: (NO?) - add D_FROZEN to definitions that are safe to use. Solution 2: (YES) - Totally forbid changing constants and inlined functions in interfaces? */ if (type->u.arr->lengthexpr) { ok &= check_expr(ctx, &ctx->basedefs->builtin_tr[BT_USize], type->u.arr->lengthexpr, LETypeConstant); /* XXX is it necessary to require that expr is constant IF the array is used in a toplevel type? - yes: it can contain references to externally-defined constants! - no: maybe externally-defined constants can be forbidden in exprs in types? */ } ok &= chk_type(ctx, toref, toref, &type->u.arr->elemtype, LTInner, tc); return ok; } case T_STRUCT: { int ok = 1; struct FieldOrParamEntry *f; struct TypeChkCtx tc_saved; tc->realtype_equals_typedef = 0; if (UNLIKELY(loc != LTRef && loc != LTTypedef &&/*loc != LTGenericPrm*/ (type->misc & M_KNOWN_SIZE) == 0)) { message_set_ident(ctx, 0, CSLUL_LT_MAIN, &fromref->ident); if (toref != fromref) { message_set_type(ctx, 1, CSLUL_LT_DEFINITION, toref, type); } message_final(ctx, loc == LTToplevelData ? CSLUL_E_OPENTLDATA : CSLUL_E_OPENWITHOUTREF); } REQUIRE_NONNULL(type->u.fields); tc_saved = *tc; for (f = type->u.fields->first; f != NULL; f = f->next) { /* FIXME can these nasty casts be avoided? */ ok &= chk_type(ctx, (struct TypeDecl *)&f->f.vardef.decl, (struct TypeDecl *)&f->f.vardef.decl, &f->f.vardef.decl.type, LTInner, tc); *tc = tc_saved; } return ok; } case T_ENUM: { int ok=1, has_manual=0; struct TypeRef basetr; struct EnumValueEntry *val; struct ExprRoot *lastval = NULL; struct ExprRoot templt, *saved_exprroot = ctx->current_exprroot; REQUIRE_NONNULL(type->u.enu); if (type->u.enu->base.type != T_INVALID) { /* TODO require that the base type is an integer type? but allow bool? */ if (!chk_type(ctx, toref, toref, &type->u.enu->base, LTInner, tc)) return 0; basetr = root_tr(&type->u.enu->base); } else { basetr = INTERNAL_TR(IT_UnsizedInt); } /* Enum values */ templt.rpn = NULL; for (val = type->u.enu->values; val; val = val->next) { struct ExprRoot *expr; if (UNLIKELY(val->e.vd.decl.ident.state == TNS_PROCESSING)) { message_set_ident(ctx,0,CSLUL_LT_MAIN, &val->e.vd.decl.ident); message_final(ctx, CSLUL_E_CYCLICDECL); ctx->current_exprroot = saved_exprroot; return 0; } val->e.vd.decl.ident.state = TNS_PROCESSING; if (UNLIKELY(!IS_IDENT_DEFINED(&val->e.vd.decl))) { assert(ctx->has_errors); ok = 0; val->e.vd.decl.ident.state = TNS_DONE; continue; } expr = val->e.vd.decl.u.initval; if (!expr) { templt.filename = val->e.vd.decl.ident.filename; templt.line_start = val->e.vd.decl.ident.line; templt.column_start = val->e.vd.decl.ident.column; templt.is_computed = 0; ctx->current_exprroot = &templt; expr = enum_nextint(ctx, lastval, &val->e.vd.decl.ident); if (UNLIKELY(!expr)) { ok = 0; val->e.vd.decl.ident.state = TNS_DONE; continue; } val->e.vd.decl.u.initval = expr; } else { ctx->current_exprroot = expr; if (val != type->u.enu->values) { has_manual = 1; /* could have duplicates */ } } ok &= check_expr(ctx, &basetr, expr, LETypeConstant); if (has_manual) { /* TODO check for duplicates */ } lastval = expr; val->e.vd.decl.ident.state = TNS_DONE; } ctx->current_exprroot = saved_exprroot; /* Determine optimal type */ if (type->u.enu->base.type == T_INVALID) { struct Type *basetype; /* TODO determine optimal type */ basetype = &type->u.enu->base; basetype->defflags = D_DEFINED; basetype->type = T_ELMNTRY; basetype->quals = 0; basetype->misc = 0; basetype->line = type->line; basetype->column = type->column; basetype->u.builtin = BT_Int; for (val = type->u.enu->values; val; val = val->next) { memcpy(&val->e.vd.decl.type, basetype, sizeof(struct Type)); } } return ok; } case T_IDENT: if (loc == LTRef && !tc->realtype_equals_typedef) return 1; REQUIRE_NONNULL(type->u.ident); if (UNLIKELY(!IS_IDENT_DEFINED(type->u.ident))) goto undefined_ident; /* TODO forbid allocating data structures containing refs and funcs as global data items */ /* TODO optimize out the check_type call where possible: - it is *currently* needed because the "opaque-must-be-ref" check is done inside check_type for T_PRIVATE (and possibly indirectly via T_LIFETIMED etc.) - Maybe there should be a flag for uknown-size/open types on each typedef. That way, already defined defs could be skipped here */ return chk_type(ctx, fromref, type->u.ident, &type->u.ident->type, loc, tc); case T_ELMNTRY: assert(type->u.builtin >= BT_FIRST && type->u.builtin <= BT_LAST); return 1; case T_BITS: /* TODO */ return 1; case T_FUNC: case T_METHOD: { int ok = 1; struct FieldOrParamEntry *fprm; struct TypeChkCtx tc_saved; tc->realtype_equals_typedef = 0; REQUIRE_NONNULL(type->u.func); tc_saved = *tc; for (fprm = type->u.func->params.first; fprm != NULL; fprm = fprm->next) { /* FIXME can this nasty cast be avoided? */ ok &= chk_type(ctx, (struct TypeDecl *)&fprm->f.vardef.decl, (struct TypeDecl *)&fprm->f.vardef.decl, &fprm->f.vardef.decl.type, LTParam, tc); fprm->f.vardef.decl.ident.state = TNS_DONE; *tc = tc_saved; } if (HAS_RETURN(type)) { ok &= chk_type(ctx, toref, toref, &type->u.func->returntype, LTParam, tc); } return ok; } case T_GENERICDEF: REQUIRE_NONNULL(type->u.gdef); return chk_type(ctx, fromref, toref, &type->u.gdef->basetype, loc, tc); case T_GENERICSPEC: { int ok = 1; struct PrmEntry *prm; struct TypeDecl *base; struct TypeChkCtx tc_saved; REQUIRE_NONNULL(type->u.gprm); REQUIRE_NONNULL(type->u.gprm->generictype); assert(type->u.gprm->generictype->type == T_IDENT || type->u.gprm->generictype->type == T_IMPORTED); base = type->u.gprm->generictype->u.ident; if (UNLIKELY(base->type.type != T_GENERICDEF)) { message_set_type(ctx, 0, CSLUL_LT_MAIN, toref, type); message_set_ident(ctx, 1, CSLUL_LT_DEFINITION, &base->ident); message_final(ctx, CSLUL_E_NONPARAMETRICTYPE); } ok &= chk_type(ctx, fromref, toref, type->u.gprm->generictype, loc, tc); tc->realtype_equals_typedef = 0; tc_saved = *tc; for (prm = &type->u.gprm->param; prm != NULL; prm = prm->next) { ok &= chk_type(ctx, fromref, toref, &prm->type, LTGenericPrm, tc); *tc = tc_saved; } return ok; } case T_GENERICVAR: /* TODO should enum types be allowed as type parameters also? */ /* XXX is this handling needed for other types also? */ if (type->line) { message_set_type(ctx, 0, CSLUL_LT_MAIN, toref, type); } else { /* XXX error appears on the identifier being declared, and not the correct type */ message_set_ident(ctx, 0, CSLUL_LT_MAIN, &fromref->ident); } message_final(ctx, CSLUL_E_TYPEPARAMNONSLOT); return 1; case T_LIFETIMED: REQUIRE_NONNULL(type->u.lifetimed); /* TODO */ return chk_type(ctx, fromref, toref, &type->u.lifetimed->type, loc, tc); case T_PRIVATE: if (UNLIKELY(loc != LTRef && loc != LTTypedef)) { message_set_type(ctx, 0, CSLUL_LT_MAIN, fromref, &fromref->type); message_final(ctx, CSLUL_E_PRIVATENONREF); } return 1; case T_IMPORTED: assert(type->u.ident != NULL); if (UNLIKELY(!IS_IDENT_DEFINED(type->u.ident))) goto undefined_ident; return chk_type(ctx, fromref, type->u.ident, &type->u.ident->type, loc, tc); default: /* including T_INVALID */ if (!ctx->has_errors || type->type != T_INVALID) { internal_error(ctx, INTERR_BADTYPENUM); } return 0; } incomplete_ast: if (!ctx->has_errors) { /* nulls should only exist if there are syntax errors */ internal_error(ctx, INTERR_BADASTNULL); } return 0; undefined_ident: if (!ctx->has_errors) { /* undefined idents should only exist if there are syntax errors */ internal_error(ctx, INTERR_BADUNDEFIDENT); } return 0; /* TODO what needs to be checked here? - cyclic non-ref idents - that type params (and private types etc.) are not "allocated" (used without ref) - arrays: check base type length exprs (must be constant>=0 -or- immutable and "simple") --> "implcheck_type" - functions: check param types check return type (does lifetimes etc. need to be checked here? also: params arena/own quals?) - idents: check that typeparams are NOT req'd (XXX [1]) - parametric idents: check that base type is parametric (XXX [1]) check that number of params match check that types of params are acceptable (XXX [1]) check that types of params match - enum: check base type step below --> "implcheck_type": check and evaluate values compute implicit values (first is 0, others are +1) check for duplicate values - struct/union: check members - bits: check members check number of bits - private/any check that it's not used in the wrong place (XXX [1]) [1] are those checked already in the parser? XXX should there be two steps here? - one for interface/form checking - one for implementation checking: array length exprs enum values */ } #undef check_type /** * Checks a type. * * \param ctx Compilation context. * \param fromref Decl that the current type was referenced from. * \param toref Decl that the current type is used in. * \param type The type to check. * \param loc The context of the type. * \return 1 if successful, or 0 on failure. */ int check_type(struct CSlul *ctx, const struct TypeDecl *fromref, const struct TypeDecl *toref, const struct Type *type, enum TypeLocation loc) { /* Set up cycle detection */ struct TypeChkCtx tc; typechk_ctx_init(&tc); tc.realtype_equals_typedef = (loc == LTTypedef); return chk_type(ctx, fromref, toref, type, loc, &tc); }