/* ast.h -- Syntax tree Copyright © 2020-2024 Samuel Lidén Borell Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef CSLUL_AST_H #define CSLUL_AST_H #include "internal.h" #define D_DEFINED 0x1 /**< Identifier has been defined */ #define D_FUNC 0x2 /**< Function (not a variable) */ #define D_LOCAL 0x4 /**< Local variable or function parameter */ #define IS_IDENT_DEFINED(identdecl) \ (((identdecl)->type.defflags & D_DEFINED) != 0) #define IS_IDENT_IMPLEMENTED(identdecl) \ (((identdecl)->type.defflags & D_DEFINED) != 0 && \ (identdecl)->type.type != T_IMPORTED) #define IS_FUNC_DECL(identdecl) \ (((identdecl)->type.defflags & D_FUNC) != 0) #define IS_IDENT_LOCAL(identdecl) \ (((identdecl)->type.defflags & D_LOCAL) != 0) #define T_INVALID 0 #define T_REF 1 #define T_ARRAY 2 #define T_STRUCT 3 #define T_ENUM 4 #define T_IDENT 5 #define T_ELMNTRY 6 #define T_BITS 7 #define T_FUNC 8 #define T_METHOD 9 /**< A function that operates on an object */ #define T_GENERICDEF 10 /**< Definition of generic type */ #define T_GENERICSPEC 11 /**< Specification of parameters for generic type */ #define T_GENERICVAR 12 /**< Type "variable" for generic type */ #define T_LIFETIMED 13 /**< Type has lifetime attribute */ #define T_PRIVATE 14 /**< Opaque type. Only meaningful in interfaces and with arena parameters */ #define T_IMPORTED 15 /**< Imported from another module (or a type parameter that was used before its definition) */ #define T_INTERNAL 16 /**< Internal types (such as "comparable") */ #define T_SLOT 17 /**< Slots are substituted with type parameters. Slots are always fixed-size, and can hold a ref or an int (or a smaller type). */ #define T_GENERICSEEN 18 /**< A generic type whose type parameters have been seen, but not yet the type itself (Used during parsing only). */ /* TODO #define T_IFACE 14 #define T_DISJOINT 15 #define T_JAGGED 16*/ #define Q_VAR 0x01 /**< Modifiable after initialization */ #define Q_WRONLY 0x02 /**< Only writeable */ #define Q_ALIASED 0x04 /**< Might be accessible through another pointer */ #define Q_THREADED 0x0C /**< Might be accessible by other threads */ #define Q_CLOSED 0x10 /**< Future extensions to the type are disallowed */ #define Q_TAKEADDR 0x20 /**< Address computation only (don't evaluate). Used internally. */ #define MAXIMUM_QUALS (Q_VAR|Q_ALIASED|Q_THREADED) #define IS_CONST_QUALS(q) (((q)&0xF) == 0) /** Qualifiers that are forbidden on top-level data items */ #define FORBIDDEN_QUALS_TLDATA 0x0F #define FORBIDDEN_QUALS_FOR (Q_WRONLY|Q_ALIASED|Q_THREADED) #define FORBIDDEN_QUALS_LOCALVAR Q_WRONLY /* FIXME how about referenced items in params? e.g. "ref var Thing field" inside a struct param or return */ #define FORBIDDEN_QUALS_FUNCPARAM 0xF #define FORBIDDEN_QUALS_TYPEDECL 0xF #define FORBIDDEN_QUALS_ENUM 0xF /* "misc" field values for struct types */ #define M_KNOWN_SIZE 0x01 /* "misc" field values for enum types */ #define M_EXPLICIT_BASETYPE 0x01 /* "misc" field values for array types */ #define M_LONG_LENGTH 0x7F #define M_ANY_LENGTH 0x7E /* only used during semantic checking */ #define IS_SHORT_ARRAYLEN(arrtype) ((arrtype)->misc < 0x7D) /* "misc" field values for function types */ #define M_VOIDRETURN 0x01 #define M_NORETURN 0x02 #define M_OPTIONAL 0x04 #define IS_FUNCREF_OPTIONAL(reftype) (((reftype).misc & 0x4) != 0) #define HAS_RETURN(functype) (((functype)->misc & 0x03) == 0) #define IS_VOIDFUNC(functype) (((functype)->misc & M_VOIDRETURN) == 0) /* "misc" field values for refs */ #define M_NORMAL 0x00 #define M_ARENA 0x01 #define M_OWN 0x02 #define M_ANYREF 0x03 /**< Used only internally in exprchk */ #define IS_REF_NORMAL(reftype) (((reftype).misc & 0x3) == 0) #define IS_REF_ARENA(reftype) (((reftype).misc & 0x3) == 1) #define IS_REF_OWN(reftype) (((reftype).misc & 0x3) == 2) #define IS_REF_ANYREF(reftype) (((reftype).misc & 0x3) == 3) #define REF_KIND(reftype) ((reftype).misc & 0x3) #define M_OPTIONAL 0x04 #define IS_REF_OPTIONAL(reftype) (((reftype).misc & 0x4) != 0) /* "misc" field values for builtin types */ #define M_BT_OPTIONAL 0x01 /**< strings can be optional */ #define IS_BT_OPTIONAL(btype) (((btype).misc & 0x1) != 0) /* "misc" field for type param definitions contain any one of the PT_ constants */ enum BuiltinType { BT_Bool, BT_USize, BT_SSize, BT_FileOffs, BT_String, BT_Int8, BT_Byte, BT_WUInt8, BT_Int16, BT_UInt16, BT_WUInt16, BT_Int, BT_UInt, BT_WUInt, BT_Int32, BT_UInt32, BT_WUInt32, BT_Int64, BT_UInt64, BT_WUInt64 }; #define BT_FIRST BT_Bool #define BT_LAST BT_WUInt64 #define BT_NUMTYPES (BT_LAST+1) #define BT_IS_NUMERIC(bt) ((bt) != BT_Bool && (bt) != BT_String) #define BT_IS_INTEGER(bt) ((bt) != BT_Bool && (bt) != BT_String && \ /* TODO (bt) != BT_Float*/1) enum InternalType { /* Starts from 33 to not share numbers with BuiltinType */ IT_UnsizedInt = 33, /**< integer type of unknown size. used in subexprs of relational ops and in enums */ IT_Void, /**< from void functions */ IT_Any /**< at root of expression statements */ }; #define IT_FIRST IT_UnsizedInt #define IT_LAST IT_Any #define IT_NUMTYPES (IT_LAST-IT_FIRST+1) #define INTERNAL_TYPE(itnum) (ctx->basedefs->internal_type[(itnum)-IT_FIRST]) #define INTERNAL_TR(itnum) (ctx->basedefs->internal_tr[(itnum)-IT_FIRST]) struct Type { unsigned defflags : 3; unsigned quals : 6; unsigned misc : 7; unsigned type : 5; unsigned column : 11; unsigned line; union { /* TODO wrapper for complex qualifiers? (can contain a type directly, to avoid one pointer indirection) */ const struct Type *nested; struct TypeDecl *ident; struct FieldOrParamList *fields; struct EnumType *enu; struct FuncType *func; struct ArrayType *arr; /* if length doesn't fit in "misc" */ enum BuiltinType builtin; struct GenericDef *gdef; /**< for T_GENERICDEF */ struct GenericPrm *gprm; /**< for T_GENERICSPEC */ struct TreeNode *genericseen; /**< for T_GENERICSEEN */ struct Lifetimed *lifetimed; enum InternalType internal; } u; STRUCT_TRAILER }; /** Describes that a decl has greater or equal lifetime compared to the decl referenced by greater_than */ struct Lifetimed { struct Type type; struct IdentDecl *greater_than; STRUCT_TRAILER }; /** Data or function declaration */ struct IdentDecl { struct TreeNode ident; struct Type type; union { struct ExprRoot *initval; struct FuncBody *funcbody; } u; STRUCT_TRAILER }; struct TypeDecl { struct TreeNode ident; struct Type type; /* will be T_GENERICDEF if generic */ /** Stores typeidents (e.g. ".new") and methods (e.g. ".free"). These are distinguished by their type (T_FUNC for typeidents and T_METHOD for methods) */ struct TreeNode *typeidents; STRUCT_TRAILER }; /* NOTE: TopLevelType and TopLevelIdent must have an identical format. */ struct TopLevelType { struct TypeDecl decl; /** The version(s) where this symbol was added. Can be more than one in case of backports. */ struct ApiRefList *sinceversions; struct TopLevelType *iface_decl; struct TopLevelType *next; unsigned id; unsigned padding1; void *padding2; /* to keep size same as of TopLevelIdent */ STRUCT_TRAILER }; /* NOTE: TopLevelType and TopLevelIdent must have an identical format. NOTE: TopLevelIdent and VersionedDecl must start with the same fields. */ struct TopLevelIdent { struct IdentDecl decl; /** The version(s) where this symbol was added. Can be more than one in case of backports. */ struct ApiRefList *sinceversions; struct TopLevelIdent *iface_decl; struct TopLevelIdent *next; unsigned id; /* datadecls and funcdecls have overlapping IDs */ unsigned is_typeident : 1; struct TypeDecl *class_; /**< For method definitions */ STRUCT_TRAILER }; /* NOTE: TopLevelIdent and VersionedDecl must start with the same fields. */ struct VersionedDecl { struct IdentDecl decl; struct ApiRefList *sinceversions; STRUCT_TRAILER }; struct VarDef { struct IdentDecl decl; unsigned var_id; STRUCT_TRAILER }; /** Member of a struct or a function parameter. */ struct FieldOrParam { struct VarDef vardef; struct ApiRefList *sinceversions; STRUCT_TRAILER }; struct FieldOrParamEntry { struct FieldOrParamEntry *next; struct FieldOrParam f; STRUCT_TRAILER }; struct FieldOrParamList { size_t count; struct FieldOrParamEntry *first; struct TreeNode *fields_root; /* tree of FieldOrParam */ STRUCT_TRAILER }; struct EnumValue { struct TreeNode treenode; struct VersionedDecl vd; STRUCT_TRAILER }; struct EnumValueEntry { struct EnumValueEntry *next; struct EnumValue e; STRUCT_TRAILER }; struct EnumType { struct Type base; struct EnumValueEntry *values; struct TreeNode *values_root; /* tree of EnumValue */ STRUCT_TRAILER }; struct FuncType { struct FieldOrParamList params; struct Type returntype; STRUCT_TRAILER }; struct ArrayType { struct Type elemtype; struct ExprRoot *lengthexpr; STRUCT_TRAILER }; #define PT_TYPEMASK 0xF #define PT_PARAMTYPE(paramtype) ((paramtype).misc & M_SLOT_PTMASK) #define PT_ANY 0x0 /**< Any type that fits in a data pointer on a 32 bit platform. */ #define PT_REFTYPE 0x1 /* XXX type covariance: can this accept an "own" or "arena" ref also? Only if the type can be casted to ref? */ #define PT_OWNTYPE 0x2 #define PT_ARENATYPE 0x3 #define PT_ENUMTYPE 0x4 #define PT_OPTIONAL 0x10 struct PrmDefEntry { unsigned prmtype : 8; struct TypeDecl paramdecl; struct PrmDefEntry *next; STRUCT_TRAILER }; struct GenericDef { struct Type basetype; struct PrmDefEntry paramdef; struct TreeNode *params_root; STRUCT_TRAILER }; struct PrmEntry { struct Type type; struct PrmEntry *next; STRUCT_TRAILER }; struct GenericPrm { struct Type *generictype; /* The number of parameters is stored in type.misc */ struct PrmEntry param; STRUCT_TRAILER }; /*#define E_ERROR 0*/ #define E_UNARYOP 1 #define E_BINARYOP 2 #define E_CONDITIONAL 3 #define E_CONDCHOICES 4 #define E_IDENT 5 #define E_INTEGER 6 #define E_FLOAT 7 #define E_STRING 8 #define E_NONE 9 #define E_UNDEF 10 #define E_ARRAY 11 #define E_STRUCT 12 #define E_INDEX 13 #define E_CALL 14 #define E_FIELD 15 #define E_TYPEIDENT 16 #define E_THIS 17 #define E_BOOL 18 #define E_FIELDINIT 19 enum OpNum { /* Unary operators */ OP_POS = 0, OP_NEG, OP_REFTO, OP_DEREF, OP_OPTIONAL, /* Binary operators (and OP_NOT) */ OP_ADD, OP_SUB, OP_MUL, OP_DIV, OP_MOD, OP_NOT, OP_AND, OP_OR, OP_REF_IS, OP_REF_IS_NOT, OP_EQ, OP_NOTEQ, OP_LESS, OP_LEQ, OP_GREATER, OP_GEQ, OP_ASSIGN, OP_ADDASSIGN, OP_SUBASSIGN, OP_MULASSIGN, OP_DIVASSIGN, OP_MEMBER, /* Ternary operators */ OP_CONDITIONAL }; #define IS_ASSIGN_OP(op) ((op) >= OP_ASSIGN && (op) <= OP_DIVASSIGN) struct StringLiteral { char *data; union { size_t length; struct StringChunk *chunks; /* if data==NULL */ } u; STRUCT_TRAILER }; /** For multi-line strings */ struct StringChunk { char *data; struct StringChunk *next; size_t length; STRUCT_TRAILER }; struct TempVarInfo { unsigned var_id : UINTSIZE-3; unsigned varlane : 3; STRUCT_TRAILER }; #define EIF_NEGATIVE 0x01 #define INTLITERAL_IS_NEGATIVE(e) (((e).b.intflags & EIF_NEGATIVE) != 0) /** Constant compound (array/struct) literal. */ #define COMPOUNDEXPR_CONST 0x01 /** Nested compound literal. These are ignored */ #define COMPOUNDEXPR_NESTEDCONST 0x02 #define IS_COMPOUNDEXPR_CONST(e) \ (((e)->misc & COMPOUNDEXPR_CONST) != 0) #define IS_COMPOUNDEXPR_NESTEDCONST(e) \ (((e)->misc & COMPOUNDEXPR_NESTEDCONST) != 0) struct ExprNode { unsigned exprtype : 5; unsigned op : 6; unsigned is_assigned : 1; /**< whether the expr is assigned to */ unsigned is_element_base : 1; /**< whether it's a base in a index/field expr */ unsigned is_called : 1; /**< whether it is a function that gets called */ /* Used during parsing */ unsigned rpntokentype : 8; unsigned rpncontext : 3; /** Usage depends on exprtype: - For typeidents: Length of typeident. - For arrays/structs: Whether it is a constant literal. */ unsigned misc : 8; /* Line offset relative to the expr start / ExprRoot */ unsigned line_offset : 16; unsigned column : 16; /** First operand */ union { struct ExprNode *expr; struct StringLiteral *strval; uint64 intval; /**< Integer or boolean value */ /* TODO float */ struct ExprList *exprlist; struct CondChoices *condchoices; /* TODO or general "select" expr? i.e. for any enum type */ const struct TreeNode *ident; struct TypeDecl *thisclass; /**< Class for "this" expression */ HashCode unbound_hash; size_t rpnargs; /**< Temporary length attribute during parsing */ } a; /** Second operand */ union { struct ExprNode *expr; const char *unbound_ident; /**< Temporary attribute for struct fields and type scope identifiers. Length goes in exprnode.misc */ const struct FieldOrParam *bound_field; const struct IdentDecl *bound_method; unsigned intflags; /* Bitmask of EIF_... */ /* TODO: Float sign, exponent and exp-sign */ struct TempVarInfo tempvar; /**< The backend overwrites b with this */ } b; union { struct TypeRef tr; /**< Type of this expression */ struct ExprNode *exprnext; /**< Next subexpression in AST tree. Used only during parsing */ } u; struct ExprNode *rpnnext; /**< Next subexpression in RPN order */ STRUCT_TRAILER }; enum RPNContext { RC_TERMINAL, /**< no arguments follow because it's a terminal symbol */ RC_OP, /**< number of arguments depends on the operator type */ RC_UNARY, /**< a single argument follows (used for unary +/-) */ RC_ARGLIST, /**< function call or array index access */ RC_GROUPING, /**< grouping, e.g. (x+x)*2 */ RC_ELEMLIST /**< structs and arrays */ }; struct ExprRoot { struct ExprNode *root; struct ExprNode *rpn; /* XXX can probably get out of sync after constexpr evaluation */ /* For error messages */ const char *filename; unsigned line_start; unsigned column_start : UINTSIZE-1; /** Whether the expression is computed at runtime/load-time */ unsigned is_computed : 1; STRUCT_TRAILER }; struct ExprList { size_t length; struct ExprNode **elems; STRUCT_TRAILER }; struct CondChoices { struct ExprNode *trueexpr; struct ExprNode *falseexpr; STRUCT_TRAILER }; /** Tracks the state of a variable during checking of a function */ struct VarStateEntry { unsigned assigned : 1; unsigned value_known : 1; /* XXX what does this mean? */ unsigned not_none : 1; unsigned owned : 1; unsigned de_owned : 1; /**< used with assigned=0 for better error messages */ unsigned owned_by : UINTSIZE-6; union { struct { uint64 min; uint64 max; } ui; struct { int64 min; int64 max; } si; } bounds; STRUCT_TRAILER }; /** A set of states for all variables in a function */ struct VarStates { int refcount; struct VarStateEntry *vars; STRUCT_TRAILER }; #define S_BLOCK 0 #define S_DECL 1 #define S_EXPR 2 /*#define S_ASSIGN 3*/ #define S_RETURN 4 #define S_BREAK 5 #define S_CONT 6 #define S_GOTO 7 #define S_IF 8 #define S_WHILE 9 #define S_DOWHILE 10 #define S_FOR 11 #define S_SWITCH 12 #define S_ASSERT 13 #define S_TARGET 14 #define S_SUBCASE 15 #define S_UNREACH 16 #define S_NOP 17 /* No operation. Hack for blank blocks */ struct Stmt { unsigned type : 5; unsigned column : UINTSIZE-5; unsigned line; struct Stmt *next; union { struct ExprRoot *expr; struct VarDef *vardef; struct GotoIdent *gotoident; struct StmtBlock *block; struct CtlIf *ifstm; struct CtlWhile *whilestm; struct CtlFor *forstm; struct CtlSwitch *switchstm; struct CtlSubCase *subcase; struct LoopInfo *loopinfo; } u; STRUCT_TRAILER }; struct StmtBlock { struct StmtBlock *base; struct Stmt stmt; struct TreeNode *idents; union { struct { unsigned always_ret : 1; unsigned has_gototarget : 1; unsigned has_outbound_goto : 1; unsigned prev_goto_id : UINTSIZE-3; } f; struct { unsigned bits; } all; } f; unsigned last_goto_id; STRUCT_TRAILER }; struct LoopInfo { struct LoopInfo *base; struct StmtBlock block; struct StmtBlock *empty_block; struct StmtBlock *end_block; unsigned has_break : 1; STRUCT_TRAILER }; struct CtlIf { struct ExprRoot *cond; struct StmtBlock true_block; struct StmtBlock *false_block; STRUCT_TRAILER }; struct CtlWhile { struct LoopInfo l; struct ExprRoot *cond; STRUCT_TRAILER }; /** Looping over an array */ #define LT_ARRAY 1 /** Looping using a pre-existing iterator */ #define LT_ITERATOR 2 /** Implicitly creating an iterator and looping using it */ #define LT_ITERABLE 3 struct CtlFor { struct LoopInfo l; struct IdentDecl loopvar; struct ExprRoot *loopexpr; unsigned looptype; STRUCT_TRAILER }; struct CaseValue { struct CaseValue *next; struct ExprRoot *expr; /* null for default case */ STRUCT_TRAILER }; struct SwitchCase { struct SwitchCase *next; struct SwitchCase *base; /**< Used for subcases */ struct CaseValue values; struct StmtBlock block; unsigned has_subcases : 1; STRUCT_TRAILER }; struct CtlSwitch { struct ExprRoot *cond; struct SwitchCase *cases; unsigned has_default : 1; STRUCT_TRAILER }; struct GotoIdent { struct TreeNode node; struct GotoIdent *next; struct Stmt *target_stmt; /** The varstates at the location of the target */ struct VarStates *target_vs; /** The common subset of defined variables/variable states at the locations of the gotos */ struct VarStates *jump_vs; unsigned defined : 1; unsigned seen_column : UINTSIZE-1; unsigned seen_line; /**< Line where is was first seen */ unsigned goto_id; STRUCT_TRAILER }; struct CtlSubCase { struct SwitchCase *thecase; struct ExprRoot *value; struct StmtBlock block; STRUCT_TRAILER }; struct FuncBody { /** Module where the function is defined */ struct Module *module; /** Declaration of the function */ struct IdentDecl *ident; /** The version(s) that the function was added in (only applicable to stable-versioned modules) */ struct ApiRefList *sinceversions; struct StmtBlock stmtblock; struct GotoIdent *gotoidents; struct TreeNode *goto_tree; const char *filename; unsigned has_retval : 1; /** Total number of variables (including parameters, return value, but excluding temporaries) */ unsigned num_variables; unsigned num_temporaries; /** Number of basic blocks */ unsigned num_ebb; struct FuncBody *next; STRUCT_TRAILER }; #define BI_INT 0x1 #define BI_FLOAT 0x3 #define BI_OTHER 0x2 #define BI_IS_NUMERIC(t) (((t) & 0x1) != 0) struct BuiltinInfo { unsigned type : 2; unsigned is_wrapping : 1; unsigned is_sysdependent : 1; uint64 negmin, max; }; extern const struct BuiltinInfo builtin_infos[BT_NUMTYPES]; struct BaseDefs { struct Type builtin_type[BT_NUMTYPES]; struct Type internal_type[IT_NUMTYPES]; struct TypeRef builtin_tr[BT_NUMTYPES]; struct TypeRef internal_tr[IT_NUMTYPES]; STRUCT_TRAILER }; #endif