/* * Common declarations for the bootstrap compiler. * * Copyright © 2025 Samuel Lidén Borell * * SPDX-License-Identifier: EUPL-1.2+ OR LGPL-2.1-or-later OR MIT-0 * * Note that the "Common Definitions and Macros" section is shared with * the rtlincl/rtl.h file. Therefore, this file is triple licensed under * EUPL-1.2 or later / LGPL-2.1 or later / MIT-0. */ #ifndef SLUL_COMPILER_H #define SLUL_COMPILER_H /* ======================================================================= ||||||||||||||||||| Common Definitions and Macros ||||||||||||||||||| ======================================================================= */ #if !defined(__STDC_VERSION__) || __STDC_VERSION__ < 199901L #define bool int #define true 1 #define false 0 #elif __STDC_VERSION__ < 202311L /* untested */ #include #endif #include /* for uint64_t. Not actually part of C89 */ #include #include #define FAIL(s) do { \ fprintf(stderr, "failure in bootstrap compiler at %s:%d: %s\n", \ __FILE__, __LINE__, s); \ abort(); \ } while (1) #define NO_NEG(expr) do { if ((expr) < 0) FAIL(#expr); } while (0) #define NO_NULL(expr) do { if ((expr) == NULL) FAIL(#expr); } while (0) /* Define unreachable() */ #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L /* C23 already has unreachable in stddef.h */ #elif (defined(__GNUC__) && __GNUC__ > 4) || defined(__clang__) #define unreachable() __builtin_unreachable() #else #define unreachable() ((void)0) #endif /* Define NORETURN */ #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L #define NORETURN [[noreturn]] #elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define NORETURN _Noreturn #elif (defined(__GNUC__) && __GNUC__ > 2) || defined(__clang__) #define NORETURN __attribute__((__noreturn__)) #else #define NORETURN #endif /* ======================================================================= ||||||||||||||||||||||| Basic Data Structures ||||||||||||||||||||||| ======================================================================= */ typedef unsigned HashCode; /** * A head of a tree node. This structure is supposed to be followed by * node data. */ struct TreeNode { HashCode hashcode; unsigned length : 8; unsigned rankdelta : 2; unsigned is_new : 1; unsigned is_defined : 1; unsigned line : 20; struct TreeNode *lower, *higher; const char *name; }; #define TREE_MAXDEPTH 24 /* 16 million entries */ #define TREENODE_LEN_MAX 255 #define TREENODE_LEN_TYPE unsigned char /* to suppress conversion warning */ int treenode_equals(const struct TreeNode *n, const char *s, size_t len); /** * Looks up a node. * * \return A pointer the to node if it existed, or NULL if not found. */ struct TreeNode *tree_search(const struct TreeNode *root, HashCode h, size_t len, const char *name); struct TreeNode *tree_search_node(const struct TreeNode *root, const struct TreeNode *n); /** * Looks up or inserts a node. If newnode is NULL, the node is allocated * if it does not exist. * * The is_new field is set to 0 if the node already existed, or * 1 if it was just inserted. * * \return A pointer the to node, or NULL is returned on memory * allocation failure. */ struct TreeNode *tree_insert(struct TreeNode **root, HashCode h, const char *name, size_t len, struct TreeNode *newnode, size_t newsize); struct TreeNode *tree_insert_str(struct TreeNode **root, const char *name, size_t len, struct TreeNode *newnode, size_t newsize); /* ======================================================================= |||||||||||||||||||||||| Abstract Syntax Tree ||||||||||||||||||||||| ======================================================================= */ struct Stmt; struct Expr; struct SourceLocation { const char *filename; int line; }; struct Ident { struct TreeNode node; struct SourceLocation srcloc; }; #define ident_equals(ident) (treenode_equals((ident)->node)) struct Type { struct Ident ident; struct Type *outer; struct TreeNode *funcs; struct Func *funcs_list; struct TreeNode *inner_types; struct Type *inner_types_list; /* TODO flags, fields, ... */ struct Type *next; }; struct TypeRefNumeric { /** Range of type */ uint64_t min, max; unsigned min_neg : 1; unsigned max_neg : 1; unsigned maybe_zero : 1; }; extern const struct TypeRefNumeric range_bool; enum TypeRefKind { TR_UNKNOWN, TR_VOID, TR_BOOL, TR_INT, TR_CLASS }; struct TypeRef { enum TypeRefKind kind; unsigned quals; union { const struct TypeRefNumeric *num; struct Type *class_; } u; }; struct ExprInteger { /* Negative numbers can be represented in two ways: 1) Initially, they are formed by a E_INTEGER followed by a E_NEGATE. 2) After type checking, the E_NEGATE is removed, and the value of the E_INTEGER is instead stored in the TypeRef */ uint64_t num; }; extern struct ExprString *string_constants; extern unsigned next_string_const_id; struct ExprString { struct ExprString *next; size_t len; unsigned id; char *s; }; struct ExprIdent { size_t namelen; union { /* if namelen != 0 */ const char *name; /* if namelen == 0 */ struct Var *var; } u; }; struct CallArg { struct CallArg *next; struct Expr *expr; }; struct ExprCall { struct ExprIdent ident; struct CallArg *args; struct CallArg **nextptr; }; struct ExprBinary { int left_id; /* the right_id is: expr->id - 1 */ }; enum ExprGroupKind { GK_GROUPING_PAREN, GK_ARRAY_LITERAL }; enum ExprKind { E_GROUP_TEMP, E_SEQPOINT, /* Terminals - Scalar constants */ E_NONE, E_FALSE, E_TRUE, E_INTEGER, E_STRING, /* Terminals - Identifiers */ E_IDENT, E_MEMBER, /* Terminals - Multi-argument */ E_ARRAY, E_CALL, /* Unary operators */ E_NEGATE, E_BOOL_NOT, /* Binary operators */ E_ADD, E_SUB, E_MUL, E_DIV, E_MOD, E_EQUAL, E_NOT_EQUAL, E_LESS, E_GREATER, E_LESS_EQUAL, E_GREATER_EQUAL, E_BOOL_AND, E_BOOL_OR }; #define NUM_EXPRKINDS (E_BOOL_OR+1) struct Expr { enum ExprKind kind; int id; union { struct ExprInteger intval; struct ExprString *strval; struct ExprBinary binary; struct ExprIdent ident; struct ExprCall *call; enum ExprGroupKind grouptemp; struct Expr *seqpoint_end; /* TODO member exprs */ /* TODO array exprs */ } u; struct TypeRef *typeref; struct Expr *rpnnext; }; struct Var { struct Ident ident; struct TypeRef *typeref; unsigned is_funcparam : 1; unsigned is_modifiable : 1; struct Expr *initval; struct Var *next; }; struct LoopInfo { struct Expr *cond; struct Stmt *body; struct Stmt *loopempty; struct Stmt *loopend; }; struct StmtFor { struct LoopInfo loop; struct Var *var; }; /** "else if". Only appears inside StmtIf */ struct StmtElif { struct Expr *cond; struct Stmt *body; struct StmtElif *next; }; struct StmtIf { struct Expr *cond; struct Stmt *true_; struct StmtElif *elifs; struct Stmt *false_; }; struct StmtBreak { struct Stmt *loop; }; struct Case { struct Case *next; struct Expr *value; struct Stmt *block; }; struct StmtSwitch { struct Expr *cond; struct Case *cases; struct Stmt *default_; }; struct AssignDestination { struct Expr *expr; struct AssignDestination *next; }; struct StmtAssign { struct AssignDestination first_dest; struct Expr *sourceexpr; }; enum StmtType { S_ASSERT, S_ASSIGN, S_BREAK, S_CONTINUE, S_EXPR, S_FOR, S_IF, S_RETURN_NOVALUE, S_RETURN_VALUE, S_SWITCH, S_VARDECL, S_WHILE }; struct Stmt { struct Stmt *next; enum StmtType kind; int line; int id; union { struct Expr *expr; struct Var *var; struct LoopInfo loop; struct StmtBreak break_; struct StmtFor for_; struct StmtIf if_; struct StmtSwitch switch_; struct StmtAssign assign; } u; }; struct Section { struct Ident ident; struct Stmt *code; struct Section *next; }; #define FUNCPARAMS_MAX 255 struct Func { struct Ident ident; struct Type *class_; struct Var *params; struct Var *returns; struct Var *vardecls; struct Stmt *code; struct Section *section_first; struct TreeNode *section_by_name; unsigned is_noreturn : 1; size_t num_params, num_returns; struct Func *next; }; struct Module { struct TreeNode *types; struct TreeNode *funcs; struct Type *types_list; struct Func *funcs_list; struct Module *next; }; void module_start(void); struct Type *map_named_type(const char *name, size_t len); void type_start(const char *name, size_t len); void type_end(void); struct Func *map_named_func(const char *name, size_t len); void func_start(const char *name, size_t len); void func_end(void); void srcloc_init(struct SourceLocation *srcloc); /* ======================================================================= ||||||||||||||||||||||||||| Module State |||||||||||||||||||||||||||| ======================================================================= */ #define MAX_SOURCES 256 extern int num_sources; extern char *(sources[MAX_SOURCES]); extern const char *current_filename; extern int current_line; extern struct Module *module, *modules; extern struct Type *current_type; extern struct Func *current_func; extern struct TreeNode *current_funcparams; /* ======================================================================= ||||||||||||||||||||| Internal Parser Functions ||||||||||||||||||||| ======================================================================= */ enum VarType { VAR_DECL_ONLY, VAR_ALLOW_INITVAL /* TODO might not a VAR_ALLOW_CONST_INITVAL also */ }; void parse_func_body(void); struct Expr *parse_expr(void); struct Var *parse_var(struct TreeNode **root, enum VarType vartype, struct Var **list_out); struct Var *lookup_local_var(const char *name, size_t len); /** Reports an error in the source code and exits */ NORETURN void error(const char *s); /** Reports a warning in the source code (doesn't exit) */ void warning(const char *s); /** Returns true if the given range is a statically allocated constant range in parsedecl.c */ bool is_builtin_range_ptr(const struct TypeRefNumeric *range); void typeref_free(struct TypeRef *tr); /* ======================================================================= |||||||||||||||||||||| Main Parsing Functions ||||||||||||||||||||||| ======================================================================= */ /** Parses a source index file */ void parse_source_index(FILE *f); /** Parses the code in a source file. basename is the filename without the path */ void parse_file(FILE *f, const char *basename); /* ======================================================================= ||||||||||||||||||||||||| Semantic Checking ||||||||||||||||||||||||| ======================================================================= */ /** * Binds types of the subexpressions in an expression, and also performs * type checks. If the typeref parameter is NULL, then the expr can have * any result type (but it must have known and non-ambiguous type), and * if not NULL, then it must be assignment-compatible to the given * typref. */ void typecheck_expr(const struct TypeRef *typeref, struct Expr *expr); /** * Determines the type of the expression. * * The return value must be free'd by the caller, with typeref_free(). */ struct TypeRef *determine_expr_typeref(struct Expr *expr); /** * Type checks a function call expression. Called by typecheck_expr(). */ struct TypeRef funccall_check(struct Expr *expr); enum TypeCompatMode { TC_ASSIGN, /**< Assignment a=b */ TC_COMPARE /**< Comparison */ }; /** * Checks whether two types are compatible. */ void check_type_compat( const struct TypeRef *tr_a, const struct TypeRef *tr_b, enum TypeCompatMode mode); /** Returns true if a range can only contain one specific value, i.e. that min==max. */ bool is_const(const struct TypeRefNumeric *range); /** Returns true if an expression is constant */ bool is_expr_const(const struct Expr *expr); /* ======================================================================= ||||||||||||||||||||||||| C Code Generation ||||||||||||||||||||||||| ======================================================================= */ void emit_c_code(const char *filename); /* ======================================================================= ||||||||||||||||||||||||| Utility Functions ||||||||||||||||||||||||| ======================================================================= */ #define SOURCELINE_MAX 512 enum ReadSourceMode { KEEP_COMMENTS, STRIP_COMMENTS }; /** Reads a line from a file, optionall skipping any comments */ bool read_source_line(FILE *f, char line[SOURCELINE_MAX], size_t *len_out, enum ReadSourceMode mode); /** Replace all occurrences of given character in a string */ void replacechar(char *s, char from, char to, size_t len); /** Duplicates memory with a null-terminator at the end (len excludes it) */ char *dupmemz(const char *s, size_t len); /** Checks that a filename is "safe" (on modern systems) */ void check_filename(const char *s); /** Returns the last component (i.e. the name of the file) in a file path */ const char *path_basename(const char *path); /** Very fast but stupid hash function. Used to avoid string comparison in AVL trees */ HashCode hash_str(const char *s, size_t len); #endif