/* * Common declarations for the bootstrap compiler. * * Copyright © 2025 Samuel Lidén Borell * * SPDX-License-Identifier: EUPL-1.2+ OR LGPL-2.1-or-later * * Note that the "Common Definitions and Macros" section is shared (with * some minor modifications) with the rtlincl/rtl.h file. */ #ifndef SLUL_COMPILER_H #define SLUL_COMPILER_H /* ======================================================================= ||||||||||||||||||| Common Definitions and Macros ||||||||||||||||||| ======================================================================= */ #if !defined(__STDC_VERSION__) || __STDC_VERSION__ < 199901L #define bool int #define true 1 #define false 0 #elif __STDC_VERSION__ < 202311L /* untested */ #include #endif #include #include #include #define SLUL_INT_MAX 0xFFFFFFFF #if UINT_MAX >= SLUL_INT_MAX typedef unsigned SlulInt; #define SLUL_INT_FMT "u" #else typedef unsigned long SlulInt; #define SLUL_INT_FMT "lu" #endif #define FAIL(s) do { \ fprintf(stderr, "failure in bootstrap compiler at %s:%d: %s\n", \ __FILE__, __LINE__, s); \ abort(); \ } while (1) #define NO_NEG(expr) do { if ((expr) < 0) FAIL(#expr); } while (0) #define NO_NULL(expr) do { if ((expr) == NULL) FAIL(#expr); } while (0) /* Define unreachable() */ #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L /* C23 already has unreachable in stddef.h */ #elif (defined(__GNUC__) && __GNUC__ > 4) || defined(__clang__) #define unreachable() __builtin_unreachable() #else #define unreachable() ((void)0) #endif /* Define NORETURN */ #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L #define NORETURN [[noreturn]] #elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define NORETURN _Noreturn #elif (defined(__GNUC__) && __GNUC__ > 2) || defined(__clang__) #define NORETURN __attribute__((__noreturn__)) #else #define NORETURN #endif /* ======================================================================= ||||||||||||||||||||||| Basic Data Structures ||||||||||||||||||||||| ======================================================================= */ typedef unsigned HashCode; /** * A head of a tree node. This structure is supposed to be followed by * node data. */ struct TreeNode { HashCode hashcode; unsigned length : 8; unsigned rankdelta : 2; unsigned is_new : 1; unsigned is_defined : 1; unsigned line : 16; struct TreeNode *lower, *higher; const char *name; }; #define TREE_MAXDEPTH 24 /* 16 million entries */ #define TREENODE_LEN_MAX 255 #define TREENODE_LEN_TYPE unsigned char /* to suppress conversion warning */ int treenode_equals(const struct TreeNode *n, const char *s, size_t len); /** * Looks up a node. * * \return A pointer the to node if it existed, or NULL if not found. */ struct TreeNode *tree_search(const struct TreeNode *root, HashCode h, size_t len, const char *name); struct TreeNode *tree_search_node(const struct TreeNode *root, const struct TreeNode *n); /** * Looks up or inserts a node. If newnode is NULL, the node is allocated * if it does not exist. * * The is_new field is set to 0 if the node already existed, or * 1 if it was just inserted. * * \return A pointer the to node, or NULL is returned on memory * allocation failure. */ struct TreeNode *tree_insert(struct TreeNode **root, HashCode h, const char *name, size_t len, struct TreeNode *newnode, size_t newsize); struct TreeNode *tree_insert_str(struct TreeNode **root, const char *name, size_t len, struct TreeNode *newnode, size_t newsize); /* ======================================================================= |||||||||||||||||||||||| Abstract Syntax Tree ||||||||||||||||||||||| ======================================================================= */ struct Stmt; struct Expr; struct SourceLocation { const char *filename; unsigned line; }; struct Ident { struct TreeNode node; struct SourceLocation srcloc; }; #define ident_equals(ident) (treenode_equals((ident)->node)) struct Type { struct Ident ident; struct Type *outer; struct TreeNode *funcs; struct Func *funcs_list; struct TreeNode *ctors; struct Func *ctors_list; struct Func *ctor_default; struct TreeNode *inner_types; struct Type *inner_types_list; struct TreeNode *vars; struct Var *vars_list; struct TreeNode *svcspecs; struct ServiceTypeSpec *svcspecs_list; /* TODO `else` section for giveme, when a non-optional giveme fails */ /* TODO flags? i.e. what can be done with the type */ struct Type *next; }; /** Range of an integer value. The bootstrap compiler doesn't support negative integers. */ struct Range { SlulInt min; SlulInt max; }; extern const struct Range range_bool; enum TypeRefKind { TR_UNKNOWN, TR_VOID, TR_BOOL, TR_INT, TR_CLASS }; struct TypeRef { enum TypeRefKind kind; /** Bitmask of Q_... flags defined in token.h */ unsigned quals; union { struct Range num; struct Type *class_; } u; }; struct ExprInteger { /* This is an integer literal. Note that the bootstrap compiler only supports unsigned 31 or 32 bit integer literals */ SlulInt num; }; extern struct ExprString *string_constants; struct ExprString { struct ExprString *next; size_t len; unsigned id; char *s; }; struct ExprIdent { size_t namelen; union { /* if namelen != 0 */ char *name; /* if namelen == 0 */ struct Var *var; struct Func *func; } u; }; struct CallArg { struct CallArg *next; struct Expr *expr; }; struct ExprCall { struct ExprIdent ident; struct CallArg *args; struct CallArg **nextptr; }; struct ExprMethodCall { struct ExprCall call; struct Expr *object; unsigned is_field : 1; }; struct ExprBinary { int left_id; /* the right_id is: expr->id - 1 */ }; enum ExprGroupKind { GK_GROUPING_PAREN, GK_ARRAY_LITERAL }; enum ExprKind { E_GROUP_TEMP, E_SEQPOINT, /* Terminals - Scalar constants */ E_NONE, E_FALSE, E_TRUE, E_THIS, E_INTEGER, E_STRING, /* Terminals - Identifiers */ E_LOCALVAR, E_INSTVAR, /* Terminals - Multi-argument */ E_ARRAY, E_CALL, /* Unary operators */ E_METHODCALL, /* E_NEGATE is unsupported in the bootstrap compiler */ E_BOOL_NOT, /* Binary operators */ E_ADD, E_SUB, E_MUL, E_DIV, E_MOD, E_EQUAL, E_NOT_EQUAL, E_LESS, E_GREATER, E_LESS_EQUAL, E_GREATER_EQUAL, E_BOOL_AND, E_BOOL_OR }; #define NUM_EXPRKINDS (E_BOOL_OR+1) struct Expr { enum ExprKind kind; int id; union { struct ExprInteger intval; struct ExprString *strval; struct ExprBinary binary; struct ExprIdent ident; struct ExprCall *call; struct ExprMethodCall *mcall; enum ExprGroupKind grouptemp; struct Expr *seqpoint_end; /* TODO struct exprs? (for closed or local types only) */ /* TODO array exprs */ } u; struct TypeRef *typeref; struct Expr *rpnnext; }; struct Var { struct Ident ident; struct TypeRef *typeref; unsigned is_funcparam : 1; unsigned is_giveme : 1; unsigned is_modifiable : 1; struct Expr *initval; struct Var *next; }; struct LoopInfo { struct Expr *cond; struct Stmt *body; struct Stmt *loopempty; struct Stmt *loopend; }; struct StmtFor { struct LoopInfo loop; struct Var *var; }; /** "else if". Only appears inside StmtIf */ struct StmtElif { struct Expr *cond; struct Stmt *body; struct StmtElif *next; }; struct StmtIf { struct Expr *cond; struct Stmt *true_; struct StmtElif *elifs; struct Stmt *false_; }; struct StmtBreak { struct Stmt *loop; }; struct Case { struct Case *next; struct Expr *value; struct Stmt *block; }; struct StmtSwitch { struct Expr *cond; struct Case *cases; struct Stmt *default_; }; struct AssignDestination { struct Expr *expr; struct AssignDestination *next; }; struct StmtAssign { struct AssignDestination first_dest; struct Expr *sourceexpr; }; enum StmtType { S_ASSERT, S_ASSIGN, S_BREAK, S_CONTINUE, S_EXPR, S_FOR, S_IF, S_RETURN_NOVALUE, S_RETURN_VALUE, S_SWITCH, S_VARDECL, S_WHILE }; struct Stmt { struct Stmt *next; enum StmtType kind; unsigned line : 16; unsigned has_break : 1; int id; union { struct Expr *expr; struct Var *var; struct LoopInfo loop; struct StmtBreak break_; struct StmtFor for_; struct StmtIf if_; struct StmtSwitch switch_; struct StmtAssign assign; } u; }; struct Section { struct Ident ident; struct Stmt *code; struct Section *next; }; #define FUNCPARAMS_MAX 255 struct Func { struct Ident ident; struct Type *class_; struct Var *params; struct Var *returns; struct Var *vardecls; struct Stmt *code; struct Section *section_first; struct TreeNode *section_by_name; unsigned is_noreturn : 1; unsigned is_modifying : 1; unsigned is_constructor : 1; unsigned is_service_ctor : 1; unsigned is_entry : 1; size_t num_params, num_returns; struct Func *next; }; struct ServiceTypeSpec { struct Ident class_ident; /* used for duplicate detection */ struct Type *class_; const char *name; size_t namelen; /* TODO these can have additional params also: CommandMain "bmp2png" PEInformation with product_name = "BMP to PNG" product_version = "1.0" end */ struct ServiceTypeSpec *next; }; struct Module { struct TreeNode *types; struct TreeNode *funcs; /*struct TreeNode *consts;*/ struct Type *types_list; struct Func *funcs_list; /* FIXME how to access constants? * could allow only local constants in utility files. - in that case, local functions should be allowed too * alternatively, simply disallow constants in utility files (either in the bootstrap compiler only, or in the language spec itself) */ /*struct Var *consts_list;*/ struct Module *next; }; enum FuncKind { FK_FUNC, FK_ENTRY, FK_CONSTRUCTOR }; void module_start(void); struct Type *map_named_type(const char *name, size_t len); void type_start(const char *name, size_t len); void type_end(void); struct Func *map_named_func(const char *name, size_t len, enum FuncKind kind); void func_start(const char *name, size_t len, enum FuncKind kind); void func_end(void); void toplevel_var_add(struct Var *var); bool instancedefs_seen(void); bool funcdefs_seen(void); struct ExprString *new_string_literal(const char *string, size_t len); void srcloc_init(struct SourceLocation *srcloc); /* ======================================================================= ||||||||||||||||||||||||||| Module State |||||||||||||||||||||||||||| ======================================================================= */ #define MAX_SOURCES 256 extern unsigned num_sources; extern char *(sources[MAX_SOURCES]); extern const char *current_filename; extern unsigned current_line; extern struct Module *module, *modules; extern struct Type *current_type; extern struct Func *current_func; extern struct TreeNode *current_funcparams; /* ======================================================================= ||||||||||||||||||||| Internal Parser Functions ||||||||||||||||||||| ======================================================================= */ enum VarType { VAR_DECL_ONLY, VAR_ALLOW_INITVAL /* TODO might need a VAR_ALLOW_CONST_INITVAL also */ }; void parse_func_body(void); struct Expr *parse_expr(void); struct Var *parse_var(struct TreeNode **root, enum VarType vartype); struct Var *lookup_local_var(const char *name, size_t len); struct Var *lookup_instance_var(const char *name, size_t len); /** Reports an error in the source code and exits */ NORETURN void error(const char *s); /** Reports a warning in the source code (doesn't exit) */ void warning(const char *s); void typeref_free(struct TypeRef *tr); /* ======================================================================= |||||||||||||||||||||| Main Parsing Functions ||||||||||||||||||||||| ======================================================================= */ /** Parses a source index file */ void parse_source_index(FILE *f); /** Parses the code in a source file. basename is the filename without the path */ void parse_file(FILE *f, const char *basename); /* ======================================================================= |||||||||||||||||||||||||| Builtin Classes |||||||||||||||||||||||||| ======================================================================= */ extern struct Type *builtin_commandmain_class; extern struct Type *builtin_string_class; /** Initializes the builtin types. The module must have been initialized first */ void builtins_init(void); /* ======================================================================= ||||||||||||||||||||||||| Semantic Checking ||||||||||||||||||||||||| ======================================================================= */ /** * Binds types of the subexpressions in an expression, and also performs * type checks. If the typeref parameter is NULL, then the expr can have * any result type (but it must have known and non-ambiguous type), and * if not NULL, then it must be assignment-compatible to the given * typref. */ void typecheck_expr(const struct TypeRef *typeref, struct Expr *expr); /** * Determines the type of the expression. * * The return value must be free'd by the caller, with typeref_free(). */ struct TypeRef *determine_expr_typeref(struct Expr *expr); /** * Type checks a function call expression. Called by typecheck_expr(). * * The typescope parameter is used to look up type-scoped identifiers, * i.e. constructors in this case. */ struct TypeRef funccall_check(struct Expr *e, struct Type *typescope); enum TypeCompatMode { TC_ASSIGN, /**< Assignment a=b */ TC_COMPARE /**< Comparison */ }; /** * Checks whether two types are compatible. */ void check_type_compat( const struct TypeRef *tr_a, const struct TypeRef *tr_b, enum TypeCompatMode mode); /** Returns true if a range can only contain one specific value, i.e. that min==max. */ bool is_const(const struct Range *range); /** Returns true if an expression is constant */ bool is_expr_const(const struct Expr *expr); /** Determines the range of an arithmetic operation given the ranges of the inputs */ struct Range arithmetic_op_range( enum ExprKind operation, const struct Range *a, const struct Range *b); /* ======================================================================= ||||||||||||||||||||||||| C Code Generation ||||||||||||||||||||||||| ======================================================================= */ void emit_c_code(const char *filename); /* ======================================================================= ||||||||||||||||||||||||| Utility Functions ||||||||||||||||||||||||| ======================================================================= */ #define SOURCELINE_MAX 512 enum ReadSourceMode { KEEP_COMMENTS, STRIP_COMMENTS }; /** Reads a line from a file, optionall skipping any comments */ bool read_source_line(FILE *f, char line[SOURCELINE_MAX], size_t *len_out, enum ReadSourceMode mode); /** Replace all occurrences of given character in a string */ void replacechar(char *s, char from, char to, size_t len); /** Duplicates memory with a null-terminator at the end (len excludes it) */ char *dupmemz(const char *s, size_t len); /** Checks that a filename is "safe" (on modern systems) */ void check_filename(const char *s); /** Returns the last component (i.e. the name of the file) in a file path */ const char *path_basename(const char *path); /** Very fast but stupid hash function. Used to avoid string comparison in AVL trees */ HashCode hash_str(const char *s, size_t len); #endif