/* * Common declarations for the bootstrap compiler. * * Copyright © 2025 Samuel Lidén Borell * * SPDX-License-Identifier: EUPL-1.2+ AND (EUPL-1.2+ OR MIT-0) * * Note that the "Common Definitions and Macros" section is shared * with the rtlincl/rtl.h file and is dual EUPL-1.2+ / MIT-0 licensed. */ #ifndef SLUL_COMPILER_H #define SLUL_COMPILER_H /* ======================================================================= ||||||||||||||||||| Common Definitions and Macros ||||||||||||||||||| ======================================================================= */ #if !defined(__STDC_VERSION__) || __STDC_VERSION__ < 199901L #define bool int #define true 1 #define false 0 #elif __STDC_VERSION__ < 202311L /* untested */ #include #endif #include /* for uint64_t. Not actually part of C89 */ #include #include #define FAIL(s) do { \ fprintf(stderr, "failure in bootstrap compiler at %s:%d: %s\n", \ __FILE__, __LINE__, s); \ abort(); \ } while (1) #define NO_NEG(expr) do { if ((expr) < 0) FAIL(#expr); } while (0) #define NO_NULL(expr) do { if ((expr) == NULL) FAIL(#expr); } while (0) /* Define unreachable() */ #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L /* C23 already has unreachable in stddef.h */ #elif (defined(__GNUC__) && __GNUC__ > 4) || defined(__clang__) #define unreachable() __builtin_unreachable() #else #define unreachable() ((void)0) #endif /* Define NORETURN */ #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L #define NORETURN [[noreturn]] #elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define NORETURN _Noreturn #elif (defined(__GNUC__) && __GNUC__ > 2) || defined(__clang__) #define NORETURN __attribute__((__noreturn__)) #else #define NORETURN #endif /* ======================================================================= ||||||||||||||||||||||| Basic Data Structures ||||||||||||||||||||||| ======================================================================= */ typedef unsigned HashCode; /** * A head of a tree node. This structure is supposed to be followed by * node data. */ struct TreeNode { HashCode hashcode; unsigned length : 8; unsigned rankdelta : 2; unsigned is_new : 1; unsigned is_defined : 1; unsigned line : 20; struct TreeNode *lower, *higher; const char *name; }; #define TREE_MAXDEPTH 24 /* 16 million entries */ #define TREENODE_LEN_MAX 255 #define TREENODE_LEN_TYPE unsigned char /* to suppress conversion warning */ int treenode_equals(const struct TreeNode *n, const char *s, size_t len); /** * Looks up a node. * * \return A pointer the to node if it existed, or NULL if not found. */ struct TreeNode *tree_search(const struct TreeNode *root, HashCode h, size_t len, const char *name); struct TreeNode *tree_search_node(const struct TreeNode *root, const struct TreeNode *n); /** * Looks up or inserts a node. If newnode is NULL, the node is allocated * if it does not exist. * * The is_new field is set to 0 if the node already existed, or * 1 if it was just inserted. * * \return A pointer the to node, or NULL is returned on memory * allocation failure. */ struct TreeNode *tree_insert(struct TreeNode **root, HashCode h, const char *name, size_t len, struct TreeNode *newnode, size_t newsize); struct TreeNode *tree_insert_str(struct TreeNode **root, const char *name, size_t len, struct TreeNode *newnode, size_t newsize); /* ======================================================================= |||||||||||||||||||||||| Abstract Syntax Tree ||||||||||||||||||||||| ======================================================================= */ struct Stmt; struct Expr; struct SourceLocation { const char *filename; int line; }; struct Ident { struct TreeNode node; struct SourceLocation srcloc; unsigned is_defined : 1; }; #define ident_equals(ident) (treenode_equals((ident)->node)) struct Type { struct Ident ident; struct Type *outer; struct TreeNode *funcs; struct Func *funcs_list; struct TreeNode *inner_types; struct Type *inner_types_list; /* TODO flags, fields, ... */ struct Type *next; }; struct TypeRefNumeric { /** Range of target type */ uint64_t tmin, tmax; /** Range of source type */ uint64_t smin, smax; unsigned tmin_neg : 1; unsigned tmax_neg : 1; unsigned smin_neg : 1; unsigned smax_neg : 1; }; enum TypeRefKind { TR_UNKNOWN, TR_BOOL, TR_INT, TR_STRING, TR_CLASS }; struct TypeRef { enum TypeRefKind kind; unsigned quals; union { struct TypeRefNumeric *num; struct Type *class_; } u; }; struct ExprInteger { /* Negative numbers are created with the unary minus operator */ uint64_t num; }; extern struct ExprString *string_constants; extern unsigned next_string_const_id; struct ExprString { struct ExprString *next; size_t len; unsigned id; char *s; }; struct ExprIdent { size_t namelen; union { /* if namelen != 0 */ const char *name; /* if namelen == 0 */ /* TODO bound identifier */ } u; }; struct CallArg { struct CallArg *next; struct Expr *expr; }; struct ExprCall { struct ExprIdent ident; struct CallArg *args; struct CallArg **nextptr; }; struct ExprBinary { int left_id; /* the right_id is: expr->id - 1 */ }; enum ExprGroupKind { GK_GROUPING_PAREN, GK_ARRAY_LITERAL }; enum ExprKind { E_GROUP_TEMP, E_SEQPOINT, /* Terminals - Scalar constants */ E_NONE, E_FALSE, E_TRUE, E_INTEGER, E_STRING, /* Terminals - Identifiers */ E_IDENT, E_MEMBER, /* Terminals - Multi-argument */ E_ARRAY, E_CALL, /* Unary operators */ E_NEGATE, E_BOOL_NOT, /* Binary operators */ E_ADD, E_SUB, E_MUL, E_DIV, E_MOD, E_EQUAL, E_NOT_EQUAL, E_LESS, E_GREATER, E_LESS_EQUAL, E_GREATER_EQUAL, E_BOOL_AND, E_BOOL_OR, E_ASSIGN, E_ASSIGN_FINAL }; #define NUM_EXPRKINDS (E_ASSIGN_FINAL+1) struct Expr { enum ExprKind kind; int id; union { struct ExprInteger intval; struct ExprString *strval; struct ExprBinary binary; struct ExprIdent ident; struct ExprCall *call; enum ExprGroupKind grouptemp; struct Expr *seqpoint_end; /* TODO identifier exprs */ /* TODO member exprs */ /* TODO array exprs */ /* TODO cal exprs */ } u; struct TypeRef *typeref; struct Expr *rpnnext; }; struct Var { struct Ident ident; struct TypeRef *typeref; unsigned is_funcparam : 1; unsigned is_initially_final : 1; struct Expr *initval; struct Var *next; }; struct LoopInfo { struct Expr *cond; struct Stmt *body; struct Stmt *loopempty; struct Stmt *loopend; }; struct StmtFor { struct LoopInfo loop; struct Var *var; }; /** "else if". Only appears inside StmtIf */ struct StmtElif { struct Expr *cond; struct Stmt *body; struct StmtElif *next; }; struct StmtIf { struct Expr *cond; struct Stmt *true_; struct StmtElif *elifs; struct Stmt *false_; }; struct StmtBreak { struct Stmt *loop; }; struct Case { struct Case *next; struct Expr *value; struct Stmt *block; }; struct StmtSwitch { struct Expr *cond; struct Case *cases; struct Stmt *default_; }; enum StmtType { S_ASSERT, S_BREAK, S_CONTINUE, S_EXPR, S_FOR, S_IF, S_RETURN_NOVALUE, S_RETURN_VALUE, S_SWITCH, S_VARDECL, S_WHILE }; struct Stmt { struct Stmt *next; enum StmtType kind; int line; int id; union { struct Expr *expr; struct Var *var; struct LoopInfo loop; struct StmtBreak break_; struct StmtFor for_; struct StmtIf if_; struct StmtSwitch switch_; } u; }; struct Section { struct Ident ident; struct Stmt *code; struct Section *next; }; #define FUNCPARAMS_MAX 255 struct Func { struct Ident ident; struct Type *class_; struct Var *params; struct Var *returns; struct Var *vardecls; struct TreeNode *vars; struct Stmt *code; struct Section *section_first; struct TreeNode *section_by_name; unsigned is_noreturn : 1; size_t num_params, num_returns; struct Func *next; }; struct Module { struct TreeNode *types; struct TreeNode *funcs; struct Type *types_list; struct Func *funcs_list; struct Module *next; }; void module_start(void); struct Type *map_named_type(const char *name, size_t len); void type_start(const char *name, size_t len); void type_end(void); struct Func *map_named_func(const char *name, size_t len); void func_start(const char *name, size_t len); void func_end(void); /* ======================================================================= ||||||||||||||||||||||||||| Module State |||||||||||||||||||||||||||| ======================================================================= */ #define MAX_SOURCES 256 extern int num_sources; extern char *(sources[MAX_SOURCES]); extern char *current_filename; extern int current_line; extern struct Module *module, *modules; extern struct Type *current_type; extern struct Func *current_func; /* ======================================================================= ||||||||||||||||||||| Internal Parser Functions ||||||||||||||||||||| ======================================================================= */ enum VarType { VAR_DECL_ONLY, VAR_ALLOW_INITVAL /* TODO might not a VAR_ALLOW_CONST_INITVAL also */ }; void parse_func_body(void); struct Expr *parse_expr(void); struct Var *parse_var(struct TreeNode **root, enum VarType vartype, struct Var **list_out); /** Reports an error in the source code */ NORETURN void error(const char *s); /* ======================================================================= |||||||||||||||||||||| Main Parsing Functions ||||||||||||||||||||||| ======================================================================= */ /** Parses a source index file */ void parse_source_index(FILE *f); /** Parses the code in a source file. basename is the filename without the path */ void parse_file(FILE *f, const char *basename); /* ======================================================================= ||||||||||||||||||||||||| C Code Generation ||||||||||||||||||||||||| ======================================================================= */ void emit_c_code(const char *filename); /* ======================================================================= ||||||||||||||||||||||||| Utility Functions ||||||||||||||||||||||||| ======================================================================= */ #define SOURCELINE_MAX 512 enum ReadSourceMode { KEEP_COMMENTS, STRIP_COMMENTS }; /** Reads a line from a file, optionall skipping any comments */ bool read_source_line(FILE *f, char line[SOURCELINE_MAX], size_t *len_out, enum ReadSourceMode mode); /** Replace all occurrences of given character in a string */ void memreplace(char *s, char from, char to, size_t len); /** Duplicates memory with a null-terminator at the end (len excludes it) */ char *memzdup(const char *s, size_t len); /** Checks that a filename is "safe" (on modern systems) */ void check_filename(const char *s); /** Returns the last component (i.e. the name of the file) in a file path */ const char *path_basename(const char *path); /** Very fast but stupid hash function. Used to avoid string comparison in AVL trees */ HashCode hash_str(const char *s, size_t len); #endif