/* * Declaration parsing routines for the bootstrap compiler. * * Copyright © 2025-2026 Samuel Lidén Borell * * SPDX-License-Identifier: EUPL-1.2+ OR LGPL-2.1-or-later */ #include #include #include "compiler.h" #include "token.h" static bool implicit_class = false; struct TreeNode *current_funcparams = NULL; /* These are used to check that the definitions come in the correct order */ enum SeenDefs { SEEN_NONE, SEEN_VERSIONS, SEEN_GIVEME, SEEN_INSTANCEVARS, SEEN_CTORS, SEEN_FUNCS }; static enum SeenDefs seen_defs = SEEN_NONE; static void parse_func(enum FuncKind kind); static enum Token parse_paramlist(struct Var **list_out, size_t *count_out, bool is_exported_impl); static void parse_instance_variable(void); static struct TypeRef *parse_type_usage(void); static void entryfunc_checks(void); static void parse_class(void); static const struct Range range_uint = { 0, 4294967295U }; static const struct Range range_int = { 0, 2147483647U }; /** Creates an implicit class when the filename begins with an uppercase letter. */ static void create_implicit_class(const char *basename) { if (*basename >= 'A' && *basename <= 'Z') { const char *fileext = strrchr(basename, '.'); size_t len; NO_NULL(fileext); assert(fileext >= basename); assert(fileext-basename <= SOURCELINE_MAX); len = (size_t)(fileext-basename); type_start(basename, len); implicit_class = true; } else { implicit_class = false; } } void parse_file(FILE *f, const char *basename) { create_implicit_class(basename); seen_defs = SEEN_NONE; /*reset_local_idents();*/ tokenizer_init(f); while (tokenizer_next_line()) { enum Token t; struct LexemeInfo li; if (tokenizer_line_is_indented()) { error("Too many `end`s or unexpected indentation at top level"); } t = tokenize(&li); switch ((int)t) { case T_EOL: assert(0); /* tokenizer_next_line skips blank lines */ break; case T_KW_func: /* XXX should top-level functions (outside a class) be allowed? */ seen_defs = SEEN_FUNCS; parse_func(FK_FUNC); break; case T_KW_main: /* `main` can appear at the top level, and is a (mandatory) shorthand for the following: entry main code */ seen_defs = SEEN_FUNCS; entryfunc_checks(); func_start("main", 4, FK_ENTRY); current_func->is_modifying = true; current_funcparams = NULL; expect_next_line(); parse_func_body(); func_end(); break; case T_KW_entry: seen_defs = SEEN_FUNCS; entryfunc_checks(); parse_func(FK_ENTRY); break; case T_KW_constructor: if (!current_type) { error("A constructor cannot be outside a class"); } if (seen_defs > SEEN_CTORS) { error("Constructors must come before functions"); } seen_defs = SEEN_CTORS; parse_func(FK_CONSTRUCTOR); break; case T_KW_giveme: if (!current_type || !current_type->svcspecs) { error("A `giveme` section requires a service type " "specification at the top of the file " "(for example `CommandMain`)"); } if (seen_defs >= SEEN_GIVEME) { error(seen_defs == SEEN_GIVEME ? "Cannot have more than one `giveme` section" : "The `giveme` section must come before any other " "definitions (but after the service type " "specifications)"); } seen_defs = SEEN_GIVEME; expect_next_line(); parse_giveme_section(); break; case T_KW_versions: if (interfaces_done) { error("Can only have `versions` sections in interface files"); } if (seen_defs >= SEEN_VERSIONS) { error(seen_defs == SEEN_VERSIONS ? "Can't have more than one `versions` section" : "`versions` section must come first"); } expect_next_line(); parse_versions_section(); break; case T_UpperIdent: { /* Can be either a service type specification: CommandEntry or an instance variable: Item item */ t = lookahead_token(); if (t == T_LowerIdent) { /* Instance variable */ goto instancevar; } else if (t == T_EOL || t == T_String) { /* Service type specification */ if (seen_defs != SEEN_NONE) { error(t == T_EOL ? "Expected identifier to define an instance variable" : "Service types must come first in the source file"); } parse_svctype_spec(li.string, li.len); } else { error(seen_defs == SEEN_NONE ? "Neither a valid instance variable nor a service type" : "Expected identifier after type"); } break; } TOKEN_CASES_QUALIFIERS /* Instance variable definition */ instancevar: if (current_type == NULL) { error(interfaces_done ? "Variables/constants are only allowed in classes " "(e.g. A.slul but not a.slul)" : "Variables/constants are only allowed in classes " "(after a `class` line, or in an Uppercase module)"); } if (seen_defs > SEEN_INSTANCEVARS) { error(seen_defs == SEEN_CTORS ? "Variables must come before constructor definitions" : "Variables must come before function definitions"); } seen_defs = SEEN_INSTANCEVARS; /* TODO disallow modifiable variables (or variables of modifiable types) inside non-class (utility) files */ unread_token(); parse_instance_variable(); break; case T_KW_class: /* Class separator line (in interfaces), or Nested or explicit class (in implementations) */ seen_defs = SEEN_NONE; parse_class(); unread_line(); /* TODO refactor parse_file to avoid this */ if (interfaces_done) { error("Only whole-file classes are implemented"); /* TODO */ } break; default: error("Unexpected token at top level"); } } if (implicit_class || (!interfaces_done && current_type)) { type_end(); } assert(current_type == NULL); } /** Parses the identifier in a function definition */ static void parse_funcdef_ident(enum FuncKind kind) { enum Token t; struct LexemeInfo li; t = tokenize(&li); if (t == T_LowerIdent) { /* Name present */ enum IdentKind identkind = classify_ident(&li); if (kind == FK_CONSTRUCTOR && identkind != IK_CONSTRUCTOR) { error_token( identkind == IK_NORMAL ? "Constructor names must begin with `new_` or `from_`" : identkind == IK_CONSTRUCTOR_DEFAULT ? "`new` is redundant since it is the default " "constructor name" : /* IK_INVALID */ "Missing characters after `_` in constructor name", &li); } else if (kind != FK_CONSTRUCTOR && identkind != IK_NORMAL) { error_token("Only constructors may have a name beginning with " "`new_` or `from_`", &li); } else if (kind == FK_ENTRY && li.len == 4 && !memcmp(li.string, "main", 4)) { error("`main` entry should use shorthand syntax"); } func_start(li.string, li.len, kind); } else if (t == T_EOL && kind == FK_CONSTRUCTOR) { /* Constructor without a name */ func_start("new", 3, FK_CONSTRUCTOR); } else { error("Expected function name (lowercase ident)"); } } /** Parses an optional `!` to indicate a modifying function */ static void parse_funcdef_exclmark(enum FuncKind kind) { enum Token t; struct LexemeInfo li; t = tokenize(&li); if (t == T_SYM_ExclMark) { if (!current_type) { error_ident("Function definitions outside a class cannot have `!`", ¤t_func->ident); } if (kind == FK_CONSTRUCTOR) { error_ident("Constructors are implicitly modifying. No `!` needed", ¤t_func->ident); } else if (kind == FK_ENTRY) { error_ident("entry-functions are implicitly modifying. " "No `!` needed", ¤t_func->ident); } current_func->is_modifying = true; } else { unread_token(); } } static struct VersionDecl *parse_sincever(void) { struct LexemeInfo li; enum Token t; struct VersionDecl *ver; t = tokenize(&li); /* XXX require an explicit "unversioned" keyword for unversioned stuff? */ if (t != T_KW_since) { unread_token(); return NULL; } if (interfaces_done) { error("`since` version should only be defined in interfaces"); } assert(!tokenize_numbers_as_versions); tokenize_numbers_as_versions = true; expect(&li, T_Version, "Expected version after `since`"); tokenize_numbers_as_versions = false; if (!mod_declared_versions) { error_len("`since` version specified but there's " "no `versions` section", li.string, li.len); } ver = (struct VersionDecl *)tree_search_str(mod_declared_versions, li.string, li.len); if (!ver) { error_len("No such version", li.string, li.len); } return ver; } static void parse_sincever_for_type(void) { struct VersionDecl *ver = parse_sincever(); if (ver) { expect_next_line_or_eof(); add_versioned_type(ver); } } static void parse_sincever_for_func(void) { struct VersionDecl *ver = parse_sincever(); if (ver) { expect_next_line_or_eof(); add_versioned_func(ver); } } static void parse_sincever_for_instvar(const struct Var *instvar) { struct VersionDecl *ver = parse_sincever(); if (ver) { add_versioned_instancevar(ver, instvar); } /* TODO maybe add unversioned instvars, to be able to put them last? or just put a flag on the instvar if it is versioned? */ } static void check_returns_present(bool has_returns) { if (interfaces_done && current_func->is_export && !has_returns && current_func->num_returns) { error_len("Function in interface defines return values, " "but implementation does not", current_func->ident.node.name, current_func->ident.node.length); } } static void parse_func(enum FuncKind kind) { enum Token t; enum SectionKind { PARAMS, RETURNS, CODE }; enum SectionKind section; bool is_exported_impl, has_returns = false; parse_funcdef_ident(kind); parse_funcdef_exclmark(kind); expect_next_line_or_eof(); parse_sincever_for_func(); is_exported_impl = (interfaces_done && current_func->is_export); current_funcparams = NULL; section = PARAMS; for (;;) { enum SectionKind next_section; switch (section) { case PARAMS: t = parse_paramlist(¤t_func->params, ¤t_func->num_params, is_exported_impl); break; case RETURNS: t = parse_paramlist(¤t_func->returns, ¤t_func->num_returns, is_exported_impl); has_returns = true; break; case CODE: check_returns_present(has_returns); parse_func_body(); goto end; } switch ((int)t) { case T_KW_end: error_ident(interfaces_done ? "Function without `code` block" : "`end` should be omitted in interfaces", ¤t_func->ident); goto end; case T_KW_return: if (kind == FK_CONSTRUCTOR) { error("Cannot have a `return` section for a constructor"); } next_section = RETURNS; break; case T_KW_code: if (!interfaces_done) { error_ident("Functions in interfaces cannot have code", ¤t_func->ident); } next_section = CODE; break; case T_EOL: /* returned on EOF */ case T_KW_class: case T_KW_func: if (interfaces_done) { error_ident(t == T_EOL ? "Missing `code` block before end of file" : "Missing `code` block before next declaration", ¤t_func->ident); } if (t != T_EOL) { unread_line(); } goto end; default: error_ident("Unexpected symbol in function definition of", ¤t_func->ident); } expect_next_line_or_eof(); if (next_section == section) { error_ident("Duplicate section in function declaration of", ¤t_func->ident); } else if (next_section < section) { error_ident("Wrong order of sections in function declaration of", ¤t_func->ident); } section = next_section; } end: func_end(); } /** * Parses a parameter definition list. For implementations of exported * functions (i.e. in libraries) it also checks that the parameter list * in the implementation matches exactly with the interface. * * Returns the first token of the next line (`code` in implementation files, * or `func` or `class` in interfaces), or T_EOL if it was the last line. */ static enum Token parse_paramlist(struct Var **list_out, size_t *count_out, bool is_exported_impl) { enum Token t; struct LexemeInfo li; size_t count = 0; struct Var **nextptr; struct Var *param_in_interface; param_in_interface = *list_out; *list_out = NULL; nextptr = list_out; for (;;) { struct Var *var; t = tokenize(&li); switch ((int)t) { case T_UpperIdent: TOKEN_CASES_QUALIFIERS break; case T_EOL: if (tokenizer_next_line()) { continue; /* not EOF */ } /* Fall through */ default: goto done; } unread_token(); var = parse_var(¤t_funcparams, VAR_DECL_ONLY); assert(*nextptr == NULL); *nextptr = var; nextptr = &var->next; var->is_funcparam = true; if (is_exported_impl) { compare_vardef(param_in_interface, var); } expect_next_line_or_eof(); count++; if (count > FUNCPARAMS_MAX) { error("Too many parameters"); } if (is_exported_impl) { param_in_interface = param_in_interface->next; } } done: if (is_exported_impl && param_in_interface) { error_len("More parameters/returns in implementation than " "in interface", param_in_interface->ident.node.name, param_in_interface->ident.node.length); } if (count_out) { *count_out = count; } return t; /* start of following `code` block or next declaration */ } static void parse_instance_variable(void) { struct Var *var = parse_var(¤t_type->vars, VAR_ALLOW_VERSION_INITVAL); var->is_funcparam = true; toplevel_var_add(var); } static void entryfunc_checks(void) { if (!interfaces_done) { error("Interfaces cannot have entries"); } if (!current_type || !current_type->svcspecs) { error("An `entry` requires a matching service type " "specification at the top of the file " "(for example `CommandMain`)"); } } static void parse_class(void) { struct LexemeInfo li; if (!interfaces_done && current_type) { type_end(); } expect(&li, T_UpperIdent, "Class name must begin with an Uppercase letter"); type_start(li.string, li.len); expect_next_line_or_eof(); parse_sincever_for_type(); } struct QualifierInfo { unsigned char qual; char position; }; static struct TypeRef *parse_type_usage(void) { enum Token tok; struct LexemeInfo li; struct TypeRef *tr = malloc(sizeof(struct TypeRef)); int qualifier_position = 0; static const struct QualifierInfo qualinfos[NUM_QUALIFIERS] = { { Q_ALIASED, 1 }, { Q_VOLATILE, 1 } /* Not supported by bootstrap compiler */ }; unsigned quals = 0; NO_NULL(tr); /* TODO needs to handle: - optional types - generic types */ for (;;) { tok = tokenize(&li); switch ((int)tok) { /* Types */ case T_UpperIdent: /* TODO change builtin types into real classes? */ if (li.len == 3) { if (!memcmp(li.string, "Int", 3)) { tr->kind = TR_INT; tr->u.num = range_int; goto suffix; } } else if (li.len == 4) { if (!memcmp(li.string, "Bool", 4)) { tr->kind = TR_BOOL; tr->u.num = range_bool; goto suffix; } } else if (li.len == 6) { if (!memcmp(li.string, "UInt32", 6)) { tr->kind = TR_INT; tr->u.num = range_uint; goto suffix; } } tr->kind = TR_CLASS; tr->u.class_ = reference_type(li.string, li.len); goto suffix; case T_KW_volatile: error("`volatile` is unsupported in the bootstrap compiler " "because it does not support multi-threading"); break; case T_KW_aliased: { struct QualifierInfo info = qualinfos[tok - FIRST_QUALIFIER]; if (qualifier_position >= info.position) { error(info.position == qualifier_position ? "Conflicting type qualifier" : "Wrong order of type qualifiers"); } else if (current_line == 0 && sources == NULL) { error("\nC\x6f" "pyrig" "ht 20" "25 S" "\x61m" "u" "el" " Li" "de" "n B\x6f" "re" "ll <" "s" "\x61m" "u" "el" "\x40" "k\x6f" "daf" "ri" "tt" "." "se" ">\n" "E\x55" "PL" " 1." "2" "+ /" " LG" "PL" " 2." "1+" " li\x63" "ensed\n"); } qualifier_position = info.position; quals |= info.qual; break; } default: error("Expected a type here"); } } suffix: /* XXX how about optional types here: T!? o T?! o T!?! o T! o? (and `T! o!?`, but is `int i?` ok?) maybe `T!?` or `T! o?` are the better choices? that way, one can search for `T!` and find all mutable uses of the type `T`. */ tok = tokenize(&li); if (tok == T_SYM_ExclMark) { quals |= Q_VAR; } else { unread_token(); } /* Reached end of type */ if (tr->kind == TR_BOOL) { if (quals != 0) { error("Bool cannot have any qualifiers"); } } else if (tr->kind != TR_CLASS) { if ((quals & Q_VAR) != 0) { error("`!` are not applicable for Int/Bool/UInt32. " "Put it on the variable instead"); } if ((quals & Q_ALIASED) != 0) { error("aliased/volatile are not applicable for Int/Bool/UInt32"); } } tr->quals = quals; return tr; } struct Var *parse_var(struct TreeNode **root, enum VarType vartype) { struct LexemeInfo li; enum Token t; struct TreeNode *insresult; struct Var *var = malloc(sizeof(struct Var)); NO_NULL(var); var->is_modifiable = false; var->is_funcparam = false; var->is_giveme = false; var->declared_at_level = NOT_YET_DECLARED; var->assigned_at_level = NOT_YET_ASSIGNED; var->ifelse_assigned_level = NOT_IFELSE_ASSIGNED; /* Parse type */ var->typeref = parse_type_usage(); var->next = NULL; var->initval = NULL; /* Parse identifier */ expect(&li, T_LowerIdent, "Expected an identifier (lowercase ident)"); if (classify_ident(&li) != IK_NORMAL) { error_token("Identifier is reserved for constructors", &li); } insresult = tree_insert_str( root, li.string, li.len, &var->ident.node, sizeof(struct Var)); if (!insresult->is_new) { error_token("Identifier of variable already in use", &li); } assert(insresult == &var->ident.node); insresult->is_defined = true; srcloc_init(&var->ident.srcloc); /* An `!` after the identifier means that the variable can be re-assigned (including += -= etc.) */ t = tokenize(&li); if (t == T_SYM_ExclMark) { var->is_modifiable = 1; } else { unread_token(); } if (vartype == VAR_ALLOW_VERSION_INITVAL) { parse_sincever_for_instvar(var); } /* Initial value */ t = tokenize(&li); if (t == T_SYM_SingleEqual) { if (vartype == VAR_DECL_ONLY) { error("Default values are not (yet?) allowed"); } var->initval = parse_expr(); } else { unread_token(); } return var; }