/* internal.h -- Internal SLUL data structures and functions Copyright © 2021-2024 Samuel Lidén Borell Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef CSLUL_INTERNAL_H #define CSLUL_INTERNAL_H #include "cslul.h" #include #include typedef int32_t int32; typedef int64_t int64; typedef uint32_t uint32; typedef uint64_t uint64; typedef uintptr_t uintptr; #define HASHCODE_MAX UINT32_MAX #define PTRSIZE sizeof(void *) #define UINTSIZE (sizeof(unsigned)*8) /* noreturn qualifier for functions */ #if defined(__GNUC__) || defined(__clang__) || defined(__cppcheck__) # define NORETURN __attribute__((__noreturn__)) #else # define NORETURN #endif #if (defined(__GNUC__) && __GNUC__ > 2) || defined(__clang__) # define LIKELY(cond) __builtin_expect(!!(cond), 1) # define UNLIKELY(cond) __builtin_expect(!!(cond), 0) #else # define LIKELY(cond) (cond) # define UNLIKELY(cond) (cond) #endif #if defined(__linux__) || defined(__unix__) || defined(__HAIKU__) #define PLATFORM_NIX #elif defined(_WIN32) || defined(_WIN64) #define PLATFORM_WIN #endif #ifdef _CSLULWINLIBC #define PLATFORM_FAKEPOSIX #endif #if defined(CSLUL_WINDOWS) #define DIRSEP '\\' #define PATHSEP ';' #else #define DIRSEP '/' #define PATHSEP ':' #endif #define MIN(a,b) ((a)<(b) ? (a) : (b)) #define MAX(a,b) ((a)>(b) ? (a) : (b)) #ifdef SLUL_DEBUG # define ENABLE_STRUCTTYPE_PROTECTION #endif /* * Struct-type protection detects writes using the wrong type (i.e. type * confusion). This works by tracking the type in both the structs * themselves and also in a separate array. The values are then compared * at de-allocation time, and any deviations trigger an assertion failure. * * It is enabled for some of the testing targets in the Makefile. */ #ifdef ENABLE_STRUCTTYPE_PROTECTION /** Struct trailers are needed when struct-type protection is enabled. */ # define STRUCT_TRAILER uint32 _trailer; /** Registers a struct so that incorrect writes to it (as the wrong type) can get detected (in many cases, but not all). */ # define PROTECT_STRUCT(s) \ (register_trailer(ctx, &(s)._trailer, 12345 + 7*sizeof(s))) /** Sets the protection value of a struct, but does not register its address. This must be used for structs that are allocated on the stack */ # define PROTECT_STACK_STRUCT(s) ((s)._trailer = 12345 + 7*sizeof(s)) # define CHECK_STRUCT(s) assert((s)._trailer == 12345 + 7*sizeof(s)) # ifndef MAX_TRACKED_TRAILERS # define MAX_TRACKED_TRAILERS (32*1024) # endif void register_trailer(struct CSlul *ctx, unsigned *trailer_ptr, unsigned type); #else # define STRUCT_TRAILER # define PROTECT_STRUCT(s) # define PROTECT_STACK_STRUCT(s) # define CHECK_STRUCT(s) #endif /* Unique up to 4 ASCII characters */ typedef uint32 HashCode; /* ======================================================================= ||||||||||||||||||||||||||| Configuration ||||||||||||||||||||||||||| ======================================================================= */ enum CpuArch { IR_DUMP, I386, X86_64, ARM, /* FIXME this has a lot of different versions... both LE/BE and versions 2-7 */ AARCH64, RV32, RV64GC, PPC64EL, MIPSEL }; #define NUM_CPU_ARCHES (MIPSEL+1) /** System call interface */ enum SysCalls { SYSCALLS_IR_GENERIC, LINUX, OPENBSD68, FREEBSD, NETBSD, HURD, WINDOWS, /* XXX w64 seems to be used for 64 bit windows. what is used for 32 bit windows? */ SLUSYS }; #define NUM_SYS_TYPES (SLUSYS+1) /** System library and ABI / calling conventions */ enum UserABI { USERABI_IR_GENERIC, GNU, MUSL, /* XXX BSD libc's don't seem ABI stable. Definitely not OpenBSD, and probably not FreeBSD either. It might be a better idea to use a special "slulrt" library on BSD (or perhaps on all platforms, possibly except Windows) */ OPENBSD68_LIBC, FREEBSD_LIBC, NETBSD_LIBC, WINNT_DLLS, /**< kernel32/user32/ntdll calls rather than MSVCRT calls */ SLULIB }; #define NUM_USERABI_TYPES (SLULIB+1) enum OutFormat { OF_RAW, OF_ELFEXE, OF_ELFOBJFILE, /* FIXME this is actually two types, non-PIC and PIC. (this applies to a lesser degree to executables also) */ OF_ELFDYNLIB, OF_ELFSTATLIB, OF_PECLI, OF_PEGUI, OF_COFFOBJFILE, OF_PEDYNLIB, OF_COFFSTATLIB, OF_CHEADER /* future extension? OF_LLVMIR, -- LLVM IR OF_HASH, -- SHA256 of internal representation of functional elements (i.e. functions, type structure, imported/exported identifiers) */ }; #define NUM_OUTFORMATS (OF_CHEADER+1) enum CodeGen { CG_INTERNAL, CG_LLVM_IR }; /** Path to a directory with available interface files for modules */ struct InterfaceDir { const char *path; size_t pathlen; struct InterfaceDir *next; STRUCT_TRAILER }; struct Target { struct Target *next; enum CpuArch cpu; enum SysCalls syscalls; enum UserABI userabi; const char *archtriple; STRUCT_TRAILER }; struct CSlulConfig { /* ---- Options ---- */ int use_arch_dirs; /**< Use lib// or bin// dirs for outputs */ struct Target *targets; enum CodeGen codegen; const char *outdir; enum CSlulOutputType requested_outputtype; /* ---- Message handling ---- */ enum CSlulMessageLevel msglevel; CSlulMessageHandler *msghandler; /* ---- Initialization and system params ---- */ struct CSlulInitParams params; struct InterfaceDir *iface_dirs; /**< Search paths for interface files */ /* ---- Used by unit tests only ---- */ unsigned skip_implicit_slulrt_dep : 1; /* ---- Internal state ---- */ struct CfgAlloc *allocs; /**< Tracking of allocations from cfgalloc() */ char *alloced_appdir; int has_errors; STRUCT_TRAILER }; /** * Tracked alloaction for configuration specific stuff. * Automatically free'd when the configuration is free'd. */ void *cfgalloc(struct CSlulConfig *cfg, size_t size); void *lowlvl_alloc(const struct CSlulConfig *cfg, size_t size); void *lowlvl_realloc(const struct CSlulConfig *cfg, void *p, size_t size); void lowlvl_free(const struct CSlulConfig *cfg, void *p); void cfgerror(struct CSlulConfig *cfg, enum CSlulErrorCode errorcode); void cfgerror_textlen(struct CSlulConfig *cfg, enum CSlulErrorCode errorcode, const char *text, int len); int add_arch(struct CSlulConfig *cfg, const char *arch, size_t archlen); /* ======================================================================= |||||||||||||||||||||||||||| Parser state ||||||||||||||||||||||||||| ======================================================================= */ enum UTF8State { UTF8ST_NONE = 0, UTF8ST_BYTE2, UTF8ST_MB3_BYTE3, UTF8ST_MB4_BYTE3, UTF8ST_MB4_BYTE4 }; enum TokenState { TDone = 0, TInNewline, TInWhitespace, TInComment, TInMaybeMLCommentStart1, TInMaybeMLCommentStart2, TInMaybeMLCommentEnd0, TInMaybeMLCommentEnd1, TInMaybeMLCommentEnd2, TInIdent, TInVersion, TInOperator, TInString, TEscapeStart, TEscapeHex, TEscapeUnicode, TEscapeScripts, TZeroPrefixed, TNumberHex, TNumberBin, TNumberDec, TNumberExpSign, TNumberExp, TNumberSkip }; enum ParserState { PDone = 0, PToplevelStart, PDataType, PDataIdentStart, PDataIdent, PDataEquals, PDataValue, PTypeIdent, PTypeEquals, PTypeParamDefStart, PTypeParamDefs, PTypeType, PFuncIdentStart, PFuncClass, PFuncClassDot, PFuncIdent, PFuncTypeParamDefStart, PFuncTypeParamDefs, PFuncType, PFuncLCurly, PFuncBody, PSkipTopLevel }; enum TypeParseState { TPSBoundary = -1, TPSDone = 0, TPSQuals, TPSTypeParamLBracket, TPSTypeParamComma, TPSArrayExpr, TPSArrayRSquare, TPSStructLCurly, TPSStructMemberStart, TPSStructMemberIdent, TPSStructMemberSince, TPSFuncLParen, TPSFuncParamStart, TPSFuncParamIdent, TPSFuncParamComma, TPSFuncReturnArrow, TPSFuncReturnType, TPSFuncLifetimeKW, TPSFuncLifetimeIdentA, TPSFuncLifetimeGreater, TPSFuncLifetimeIdentB, TPSEnumStart, TPSEnumLCurly, TPSEnumValues, TPSEnumIdent, TPSEnumSince, TPSEnumEquals, TPSEnumExpr, TPSOptionalType, TPSSlotIdent }; enum FunctionParseState { FPSDone = 0, FPSExpr, FPSIfExpr, FPSWhileExpr, FPSDoWhileExpr, FPSForType, FPSForIdent, FPSForIn, FPSForExpr, FPSSwitchExpr, FPSSwitchLCurly, FPSSwitchFirstCase, FPSCaseValue, FPSCaseSeparator, FPSCaseDefaultLookahead, FPSCaseColon, FPSCaseAfterColon, FPSSubCaseValue, FPSBlock, FPSIfBlock, FPSElseBlock, FPSWhileBlock, FPSDoWhileBlock, FPSForBlock, FPSLoopEmptyBlock, FPSLoopEndBlock, FPSCaseBlock, FPSMismatchedRCurly, FPSAfterRCurly, FPSMaybeLoopEmpty, FPSGoto, FPSVardefType, FPSVardefIdentLookahead, FPSVardefIdent, FPSVardefEquals, FPSVardefInitval, FPSSkipBlockStart, FPSSkipBlock, FPSSkipStatment }; enum ExprParseState { /* Note: These start with XPS instead of EPS to avoid a possible conflict with future versions of errno.h */ XPSDone = 0, XPSArglistLookahead, XPSCommaCheck, XPSDot, XPSMaybeFieldInit, XPSStringLookahead }; enum GenericDefParseState { GDSParamType = 0, GDSParamOptionalType, GDSParamIdent, GDSParamCommaOrEnd }; enum SinceVersionParseState { SVDone = 0, SVVersionStart, SVVersionList }; enum MHTokenState { MHTDone = 0, MHTInNewline, MHTInComment, MHTInMaybeMLCommentStart1, MHTInMaybeMLCommentStart2, MHTInMaybeMLCommentEnd0, MHTInMaybeMLCommentEnd1, MHTInMaybeMLCommentEnd2, MHTInIndentation, MHTAttrStart, MHTAttrName, MHTAttrSpaceStart, MHTAttrSpace, MHTAttrValue, MHTBadCharCheck }; enum MHParserState { MHPDone = 0, MHPAttribute, MHPSlulImpl, MHPSlulImplVer, MHPSlulUpTo, MHPSlulMaxVer, MHPVersionFlags, MHPDependsVersion, MHPDependsFlags, MHPIfaceDepVersion, MHPIfaceDepFlags, MHPIfaceDepSinceVer, MHPApiDefHash, MHPApiDefKeywords, MHPSkipAttribute }; /* Attributes in the module header */ enum ModuleAttr { MANone = 0, MASlul, MAName, MAType, MAVersion, MARepo, MARepoMirror, MAWebsite, MALicense, MALicenseText, MALicenseList, MADepends, MAInterfaceDepends, MAApiDef, MASource /*MASourceList*/ /* TODO */ }; enum IdentParseMode { ParseIdent, ParseVersion, ParseAttrName, ParseAttrValue }; enum ExprLocation { LEDataInitval, /**< Directly in initial value of data item */ LETypeConstant, /**< Array length or enum value (but NOT array index) */ LEFuncBodyExpr /**< Expression inside a function body */ }; /* TODO */ #define IS_CONST_EXPRLOC(loc) ((loc)==LEDataInitval || (loc)==LETypeConstant) #define IS_TOPLEVEL_EXPRLOC(loc) ((loc)==LEDataInitval || (loc)==LETypeConstant) enum TypeLocation { LTTypedef, /**< Directly in typedef */ LTToplevelFunc, /**< Directly in toplevel definition of function */ LTToplevelData, /**< Directly in toplevel definition of data */ LTLocalVar, /**< Directly in a local variable definition */ LTRef, /**< Directly in a reference */ LTParam, /**< Directly in a parameter */ LTGenericPrm, /**< Directly in a type parameter */ LTInner /**< Nested inside a type (e.g. field or array element) */ }; /* First working released version will be 0.1.0 */ enum LanguageVersion { LANGVER_BAD = -1, LANGVER_UNSET = 0, LANGVER_0_0_0 = 1 /**< current, incomplete version */ }; #define LANGVER_LATEST LANGVER_0_0_0 #define MAXDEPTH 25 /* max ~2^24 nodes = ~16.8 million */ /* For TreeNode.state */ #define TNS_INITIAL 0 #define TNS_PROCESSING 1 #define TNS_DONE 2 /** * A head of a tree node. This structure is supposed to be followed by * node data. */ struct TreeNode { HashCode hashcode; unsigned rankdelta : 2; unsigned state : 2; /**< TNS_*. For detecting circular refs */ unsigned length : UINTSIZE-4; struct TreeNode *lower, *higher; unsigned line; unsigned column : UINTSIZE-1; unsigned is_new : 1; /**< 0 if the node was de-duplicated */ const char *filename; /* TODO use an integer id (or offset) and use bits from length (length need only 8 bits, so this could use 22 bits, i.e. max 4 million files per module) */ union { char copy[PTRSIZE]; const char *ptr; } name; STRUCT_TRAILER }; /** An iterator for iterating through all nodes in a tree. */ struct TreeIter { short depth; struct TreeNode *stack[MAXDEPTH]; STRUCT_TRAILER }; struct Output { struct Output *next; enum OutFormat format; const char *filename; CSlulFile file; int arch_id; const struct Target *target; STRUCT_TRAILER }; struct TypeStackEntry { struct Type *type; union { struct FieldOrParamEntry **field; struct EnumValueEntry **enumval; struct PrmEntry **prmentry; } nextptr; enum TypeParseState state; STRUCT_TRAILER }; struct BlockStackEntry { unsigned has_braces : 1; /** Used to detect line breaks in some cases. TODO Rename */ unsigned misc : UINTSIZE-1; enum FunctionParseState state; struct StmtBlock *stmtblock; struct Stmt *stmt, *prevstmt; STRUCT_TRAILER }; struct ExprStackEntry { enum ExprParseState state; int operator_expected; int last_tok_line; struct ExprNode *opstack; struct ExprNode *out; struct ExprRoot *exprroot; STRUCT_TRAILER }; struct VarDef; struct VarStates; struct VarStateEntry; /** Contains toplevel symbols for an implementation or interface */ struct TopLevels { /** Functions and global data. Each node is an IdentDecl */ struct TreeNode *idents_root; /** Type declarations. Each node is a TypeDecl */ struct TreeNode *types_root; /** Linked list of functions and global data */ struct TopLevelIdent *idents_list; /** Linked list of types */ struct TopLevelType *types_list; STRUCT_TRAILER }; /** The API version definition contains a hash of the API definition to lock to */ #define APIDEF_HAS_HASH 0x1 /** * The API version has been retracted. Following versions will not include * any symbols from this version. * * Only the most recent version can be retracted (or a contiguous sequence * up to the most recent version), but after retraction, it is possible to * add new versions. */ #define APIDEF_RETRACTED 0x2 /* TODO #define APIDEF_UNRELEASED 0x4 */ /** An API version that is exposed by a module */ struct ApiDef { struct TreeNode verstr; struct ApiDef *next; unsigned flags : 3; unsigned index : UINTSIZE-3; unsigned char apihash[CSLUL_APIHASHBYTES]; STRUCT_TRAILER }; /** A linked list of since-versions */ struct ApiRefList { struct ApiRefList *next; struct ApiDef *version; STRUCT_TRAILER }; /** Used in error reporting only */ struct ClosestSincever { const struct Module *too_old_module; const struct CSlulDependency *too_old_dep; const struct ApiDef *best_sincever, *first_sincever; }; struct Module { const char *name; /** The available version of the module (strictly speaking, of the interface file) */ const char *version; const char *filename; uint32 namelen, namehash; unsigned versionlen; enum CSlulModuleType type; unsigned is_unstable : 1; /** The minimum required language version, for parsing the interface */ enum LanguageVersion min_langver; /** The minimum required language version, for compiling the module */ enum LanguageVersion impl_minlangver; /** The maximum allowed language version */ enum LanguageVersion max_langver; /** The minimum language version, from which new features can be used */ enum LanguageVersion effective_minlangver; /** The maximum langauge version. Restrictions and deprecations from this version will be in effect. */ enum LanguageVersion effective_maxlangver; /* Module dependencies */ struct TreeNode *deps_root; struct CSlulDependency *first_dep, *last_dep; /** Interface definitions */ struct TopLevels iface; /* API definitions */ unsigned num_apidefs; struct TreeNode *apidefs_root; struct ApiDef *first_apidef, *last_apidef; struct ApiDef **apidefs; STRUCT_TRAILER }; struct CSlulSourceFile { struct TreeNode node; /* contains filename */ struct CSlulSourceFile *next; STRUCT_TRAILER }; struct CsbeLibrary; /** Whether this is a dependency of a dependency. Note that only dependencies of the main module are parsed. */ #define IS_DEP_OF_DEP(dep) (!(dep)->module.name) struct CSlulDependency { /* Requested module */ struct TreeNode node; const char *min_version; /* also \0 terminated */ uint32 minverlen; unsigned flags, iface_flags; /** Available module. Always de-duplicated in nested deps, or NULL if non-existent. */ struct Module *module; const struct ApiDef *effective_apiver; /* Available interface versions */ struct TreeNode *ifacever_root; /**< Indexed by "since version" */ struct CSlulInterfaceDep *first_ifacever, *last_ifacever; struct CSlulDependency *next; /* IR generator state */ struct CsbeLibrary *importlib; STRUCT_TRAILER }; /** A dependency that is required to parse/use the interface. There can be more than one interface dependency on the same module, if the version dependency has changed. */ struct CSlulInterfaceDep { /* Requested module */ struct TreeNode node; /**< With "since version" as name. For detecting dups */ const char *min_version; uint32 minverlen; /* Dependency for the module (de-duplicated) */ struct CSlulDependency *dep; struct CSlulInterfaceDep *sincever_next; /**< Next dep. on the same module */ STRUCT_TRAILER }; struct TypeParam { const struct TypeParam *next; const struct TypeDecl *t_decl; struct Type *bound_to; STRUCT_TRAILER }; /** Reference to a type. Can have type parameters and qualifiers */ struct TypeRef { const struct Type *type; const struct TypeParam *prm; unsigned quals; STRUCT_TRAILER }; struct TypeChkCtx { /** 1 if the type has the same "real deref type" as the typedef. 0 if not in a typedef or if inside a compound type. */ int realtype_equals_typedef; /** Precense of this type means that there's a cycle */ const struct Type *canary_type; /** Number of steps since the last time cycle_max was increased. */ int step; /** Cycles of at most this length are being scanned for. This is increased exponentially */ int cycle_max; /* No STRUCT_TRAILER since ctx is unavailable in some code paths */ }; /** Keep temporary information about the total length of each dimension in an array. Used when generating IR for array-index operations. */ struct DimensionInfo { /** Total length of the row including nested arrays */ size_t rowlen; /** If the elements in this dimension are arrays, then this is the total length of each nested array. Otherwise, it is 1. This is used to calculate offsets (in elements) into the array, since the backend (CSBE) does not support multi-dimensional arrays */ size_t elemlen; }; /** Stored in ctx->parser.slul.numdigits when the number has syntax errors */ #define INVALID_NUMBER (-1) /** For uninitialized boolean variables. Used in funcchk.c */ #define INVALID_BOOL 2 /** The number of source files is limited to 10K. This is done to protect editors and tools that work on main.slul files, which might not be able to handle large files well (each source file needs one line in the main.slul file). */ #define MAX_SOURCEFILES 10000 /** Maximum number of API version definitions in a module. This is limited for the same reason as MAX_SOURCEFILES */ #define MAX_APIDEFS 10000 #ifdef FUZZ_TIGHT_MODE #define MAX_TOKEN_LEN 32U #define MAX_IDENT_LEN 15U #define MAXTYPEDEPTH 5 #define MAXEXPRDEPTH 5 #define MAXBLOCKDEPTH 5 #define MAX_VFY_RECURSION_DEPTH 10 #define MAX_TYPE_PARAMS 5 #else #define MAX_TOKEN_LEN 512U #define MAX_IDENT_LEN 100U #define MAXTYPEDEPTH 100 #define MAXEXPRDEPTH 100 #define MAXBLOCKDEPTH 100 #define MAX_VFY_RECURSION_DEPTH 100 #define MAX_TYPE_PARAMS 100 #endif #define MSG_CHARBUFF_SIZE 4 /* For allowed_scripts */ #define SCRIPT_LATIN 0x1 #define SCRIPT_CYRILLIC 0x2 #define SCRIPT_GREEK 0x4 #define SCRIPT_SPECIALS 0x8 #define SCRIPT_OTHER 0x10 #define SCRIPT_ALL 0xFF #define SCRIPT_RTL 0x100 struct CSlul { /** Stack of memory allocation arenas */ struct ArenaHolder *arenas; /* ---- Input buffer ---- */ const char *buffer; /**< Input buffer. Owned by caller. Pointer is gets advanced when parsing. */ const char *bufferend; /**< Points to the byte after the last byte */ const char *bufferstart; /**< Buffer start */ int last_buffer; /**< 1 if this is the last buffer before end of file */ /* ---- Tokenization ---- */ const char *tokval; /**< Identifier or unescaped string */ size_t toklen; /**< Length of identifier or unescaped string */ char toktmp[MAX_TOKEN_LEN]; /**< Temporary buffer for token data */ size_t tmplen; HashCode tokhash; int case_insens; /**< If 1, hashing and tree search is case-insensitive */ union { enum TokenState slul; enum MHTokenState mh; } tokstate; union { enum CSlulToken slul; enum CSlulModuleHeaderToken mh; } reused_token; /**< Token to return again */ int line; int startcolumn; /**< Column at start of buffer */ int mbtrailerbytes; /**< Number of UTF-8 trailer bytes. May be negative if a bad UTF-8 sequence spans across two buffers */ int tokline, tokcolumn; int prev_tok_line, prev_tok_col, prev_tok_endcol; const char *linestart; int numspaces; int in_multiline_comment; int multilinecomment_startline; enum UTF8State utf8state; char utf8byte; uint32 utf8code; /* ---- Parsing ---- */ int in_moduleheader; enum CSlulPhase phase; /**< Whether we're parsing a module header, module interface or module implementation */ union { struct { /* Tokenizer state */ enum CSlulToken two_char_first; /**< For two char operators */ uint32 escape; short escapelen; short numdigits; /**< Or INVALID_NUMBER */ short has_digits; short floatnum, decpointpos, exponent; uint64 number; /* Parser state */ enum ParserState state; /** Parameters when parsing a lifetime specification, or NULL for the return */ struct IdentDecl *lifetime_a, *lifetime_b; /** The class when parsing a method */ struct TypeDecl *current_class; struct EnumValueEntry *current_enumval, *previous_enumval; /** When parsing a typedef, this holds any previously seen typeparams */ struct TreeNode *seen_typeparams; /** Set to the current line number if a since keyword was just seen. This makes numbers get parsed as versions instead of floats. */ int version_line; int sincekeyword_line, sincekeyword_column; enum SinceVersionParseState sincever_state; unsigned forbidden_quals : 8; unsigned in_toplevel_type : 1; unsigned in_toplevel_data : 1; unsigned is_typeident : 1; unsigned is_method : 1; } slul; struct { enum MHParserState state; enum ModuleAttr attr; unsigned attr_line, attr_col; unsigned depflags; struct CSlulDependency *current_dep; const char *version; size_t verlen; /** Whether there's an explicit \depends slulrt (with a specific version / api_hash) */ unsigned seen_explicit_slulrt_dep : 1; } mh; } parser; /** Currently parsed toplevel item */ union { struct TopLevelIdent *tlident; struct TopLevelType *tltype; /* Shortcuts */ struct IdentDecl *decl; struct TypeDecl *typedecl; } current; /** After certain statements, the next statement must start on a new line. In that case, this field contains the last (=forbidden) line. */ int required_linebreak; int last_void_return_line; /**< For improved error messages */ int typedepth; int blockdepth; int exprdepth; int generic_param_depth; unsigned allowed_scripts; /**< Allowed scripts etc. in a string */ unsigned current_goto_id; unsigned num_typedefs; unsigned num_datadefs, num_literals, next_ir_literal; unsigned num_funcdefs; unsigned has_app_main : 1; /**< Whether the module has an entry point */ /** Currently parsed string literal. Used to support multi-line strings */ struct ExprNode *current_expr; struct StringChunk *last_stringchunk; struct Type *current_functype; /** Current function body. Also works as a linked list of all function bodies (in all source files, and inlined functions in all used modules). */ struct FuncBody *funcbody; /** Currently parsed loop (deepest nested one). Used for mapping break/continue to the right loop */ struct LoopInfo *current_loop; /** Currently parsed case. Used for mapping subcase statements to the outer case. */ struct SwitchCase *current_case; /** Root of identifier tree for current statement block */ struct StmtBlock *current_block; /* Currently parsed generic type, when parsing type parameter defs */ struct GenericDef *current_gdef; struct PrmDefEntry *current_prmdef; struct PrmDefEntry **next_prmdef; /** Type parameter definitions for current decl */ struct TreeNode *params_root; enum GenericDefParseState gdef_state; /** Since versions of symbol being parsed */ struct ApiRefList *current_sinceversions; struct ApiRefList *previous_member_sinceversions; struct ApiRefList *outer_member_sincevers; struct ApiRefList *next_decl_sinceversions; /** Since versions of type declaration being parsed */ struct ApiRefList *current_type_sinceversions; /** Stores a pointer to either the top-level root, or to the root of a type's typeident root (which, in functions, is a temporary tree node until the return type has been parsed) */ struct TreeNode **current_identroot; /** Special handling for function declarations, where the type for the type scope (i.e. the return type) comes after the identifier: Stores the currently parsed type identifier, if any */ struct TreeNode *typeident_temp_root; /* ---- Used during verification ---- */ struct ExprRoot *current_exprroot; struct VarStates *varstates; struct TypeRef this_tr; /**< Type of "this" / Class of current function */ int recursion_level; unsigned verifying_impl : 1; unsigned in_typedef_check : 1; /** Used to detect when identifiers are being assigned to */ struct ExprNode *current_assign_lvalue; /** Used to detect when identifiers are being checked for 'none' */ struct ExprNode *current_nonecheck_expr; const struct TreeNode *current_targettype_ident; const struct TreeNode *current_sourcetype_ident; /* ---- Used during the IR generation phase ---- */ struct DimensionInfo *ir_dims_buffer; /**< Dimension row-lengths */ size_t ir_dims_capacity; /* ---- Type, data and function declarations ---- */ struct TopLevels tl; /**< Temporary during parsing */ struct TopLevels impl; /* interface toplevels are stored in module */ /* ---- Module headers ---- */ int num_sourcefiles; struct TreeNode *sources_root; struct CSlulSourceFile *first_srcfile, *last_srcfile; struct Module module; /**< Module to be compiled ("main module") */ struct Module *parsed_module; /**< Module currently being parsed */ /* ---- Output ---- */ enum CSlulOutputType outputtype; struct Output *outputs; /* ---- Configuration ---- */ const struct CSlulConfig *cfg; struct CSlulConfig *owned_config; /**< Will be free'd if not NULL */ /* ---- Internal state ---- */ int has_errors, has_fatal_errors, must_have_errors; struct CSlulState msgstate; /**< For error reporting */ char msg_charbuff[MSG_CHARBUFF_SIZE]; const char *current_filename; const struct BaseDefs *basedefs; /* ---- Fixed size stacks ---- */ /** Stack of types. NULL is used a sentinel value when a type can contain expressions, which in turn can contain types */ /* TODO use linked lists instead? */ struct TypeStackEntry typestack[MAXTYPEDEPTH]; struct BlockStackEntry blockstack[MAXBLOCKDEPTH]; struct ExprStackEntry exprstack[MAXEXPRDEPTH]; STRUCT_TRAILER }; /* For generating error codes for internal errors. Each .c file with code (except main.c and tests) should have an INTERR_xxx macro for generating internal error codes. */ enum IntErrBase { INTERRBASE_ARENA = 0x1000, INTERRBASE_BUILD = 0x1100, INTERRBASE_BUILTINS = 0x1200, INTERRBASE_CONTEXT = 0x1300, INTERRBASE_PARSE = 0x1400, INTERRBASE_MHPARSE = 0x1500, INTERRBASE_TREE = 0x1600, INTERRBASE_MISC = 0x1700, INTERRBASE_BWRAPPER = 0x1800, INTERRBASE_PLATFORM = 0x1900, INTERRBASE_ARCH = 0x1A00, INTERRBASE_TLVERIFY = 0x1B00, INTERRBASE_EXPRCHK = 0x1C00, INTERRBASE_FUNCCHK = 0x1D00, INTERRBASE_TYPECHK = 0x1E00, INTERRBASE_TOKEN = 0x1F00, INTERRBASE_MHTOKEN = 0x2000, INTERRBASE_CONFIG = 0x2100, INTERRBASE_IR = 0x2200, INTERRBASE_CHKUTIL = 0x2300, INTERRBASE_TYPECOMPAT = 0x2400 }; #define MAKE_INTERR(minor, base) ((enum CSlulErrorCode)((minor) | (base))) #define INTERR_BASE(errnum) ((enum IntErrBase)((errnum) & 0x7F00)) #define CHK(e) if (UNLIKELY(!(e))) goto assert_error #define SOFTCHK(e) if (UNLIKELY(!(e))) goto soft_assert_error #define ZCHK(res) if (UNLIKELY(!(res))) return 0 #define STRICTCHK(res) if (UNLIKELY(!(res))) assert(0); extern const char modtype2str[][12]; const char *lookup_error_message(enum CSlulErrorCode errorcode); enum CSlulMessageLevel lookup_error_level(enum CSlulErrorCode errorcode); #ifdef CSLUL_IN_TESTS extern int num_ctxs; /**< For detecting leaks */ #endif /* Functions that can be overridden in the compilation context */ CSlulFile ctx_fopen(struct CSlul *ctx, const char *filename, const char *mode); CSlulFile ctx_createexec(struct CSlul *ctx, const char *filename); CSlulFile silent_open(struct CSlul *ctx, const char *filename, const char *mode); int ctx_fclose(struct CSlul *ctx, CSlulFile file); int ctx_ferror(struct CSlul *ctx, CSlulFile file); int ctx_remove(struct CSlul *ctx, const char *filename); size_t ctx_fread(struct CSlul *ctx, void *buffer, size_t size, size_t nmemb, CSlulFile file); size_t ctx_fwrite(struct CSlul *ctx, const void *buffer, size_t size, size_t nmemb, CSlulFile file); int ctx_mkdir(struct CSlul *ctx, const char *filename); int ctx_dropprivs(struct CSlul *ctx); /* Allocation functions */ void ctx_outofmem(struct CSlul *ctx); int arena_new(struct CSlul *ctx); void arena_free(struct CSlul *ctx); #define aallocp(ctx, size) aalloc((ctx), (size), sizeof(void *)) #define aallocp64(ctx, size) aalloc((ctx), (size), \ sizeof(void *) > sizeof(uint64) ? sizeof(void *) : sizeof(uint64)) void *aalloc(struct CSlul *ctx, size_t size, size_t align); char *aalloc_memzdup(struct CSlul *ctx, const char *s, size_t size); /* Tree functions */ struct TreeNode *tree_search(const struct CSlul *ctx, const struct TreeNode *root, HashCode h, uint32 len, const char *name); struct TreeNode *tree_search_node(const struct CSlul *ctx, const struct TreeNode *root, const struct TreeNode *n); #define USE_EXISTING UINT32_MAX struct TreeNode *tree_insert(struct CSlul *ctx, struct TreeNode **root, HashCode h, uint32 len, const char *name, struct TreeNode *newnode, size_t newsize); void tree_iter_init(struct TreeIter *iter, struct TreeNode *root); int tree_iter_next(struct TreeIter *iter, struct TreeNode **nodeptr); const char *node_nameptr(const struct TreeNode *n); int ident_equals(const struct TreeNode *n, const char *s, size_t len); #define ONLY_ONE_NODE(root) ((root)->lower == NULL && (root)->higher == NULL) /* Simple error functions */ void internal_error(struct CSlul *ctx, enum CSlulErrorCode errorcode); void error_tok(struct CSlul *ctx, enum CSlulErrorCode errorcode); void error_prevtok_start(struct CSlul *ctx, enum CSlulErrorCode errorcode); void error_prevtok_end(struct CSlul *ctx, enum CSlulErrorCode errorcode); void error_prevtok_line(struct CSlul *ctx, enum CSlulErrorCode errorcode); void error_sameline(struct CSlul *ctx, enum CSlulErrorCode errorcode); void error_sincever(struct CSlul *ctx, enum CSlulErrorCode errorcode); void error_text(struct CSlul *ctx, enum CSlulErrorCode errorcode, int line, int column, const char *text); void error_textlen(struct CSlul *ctx, enum CSlulErrorCode errorcode, int line, int column, const char *text, int len); void error_opt(struct CSlul *ctx, enum CSlulErrorCode errorcode, const char *text); void error_linecol(struct CSlul *ctx, enum CSlulErrorCode errorcode, int line, int column); void error_expr(struct CSlul *ctx, enum CSlulErrorCode errorcode, const struct ExprRoot *exprroot, const struct ExprNode *subexpr); void error_stmt(struct CSlul *ctx, enum CSlulErrorCode errorcode, const struct FuncBody *func, const struct Stmt *stmt); void message(struct CSlul *ctx, enum CSlulErrorCode errorcode, const char *filename, int line, int column, int length); /* Error functions for reporting errors with multiple locations */ void message_set_token(struct CSlul *ctx, int index, enum CSlulLocationType loctype); void message_set_ident(struct CSlul *ctx, int index, enum CSlulLocationType loctype, const struct TreeNode *ident); void message_set_typedecl(struct CSlul *ctx, int index, enum CSlulLocationType loctype, const struct TypeDecl *decl); void message_set_type(struct CSlul *ctx, int index, enum CSlulLocationType loctype, const struct TypeDecl *typedecl, const struct Type *type); void message_set_type_ident(struct CSlul *ctx, int index, enum CSlulLocationType loctype, const struct TreeNode *ident, const struct Type *type); void message_set_expr(struct CSlul *ctx, int index, enum CSlulLocationType loctype, const struct ExprRoot *exprroot, const struct ExprNode *subexpr); void message_set_expr_text(struct CSlul *ctx, int index, enum CSlulLocationType loctype, const struct ExprRoot *exprroot, const struct ExprNode *subexpr, const char *text, int textlen); void message_set_stmt(struct CSlul *ctx, int index, enum CSlulLocationType loctype, const struct FuncBody *func, const struct Stmt *stmt); void message_set_filemsg(struct CSlul *ctx, int index, enum CSlulLocationType loctype, const char *filename, const char *text, int textlen); void message_set_module(struct CSlul *ctx, int index, enum CSlulLocationType loctype, const struct Module *mod); void message_set_text(struct CSlul *ctx, int index, enum CSlulLocationType loctype, const char *text); void message_set_textlen(struct CSlul *ctx, int index, enum CSlulLocationType loctype, const char *text, size_t textlen); void message_final(struct CSlul *ctx, enum CSlulErrorCode errorcode); void reset_msgstate(struct CSlulState *st); /* Builtin definitions like "bool" */ int builtins_init(struct CSlul *ctx); /* File path functions */ const char *make_path_n(struct CSlul *ctx, const char *directory, const char *file, size_t filelen); const char *make_path(struct CSlul *ctx, const char *directory, const char *file); const char *make_nzpath(struct CSlul *ctx, const char *directory, const char *file, size_t filelen); /* Source and dependency references */ enum CSlulErrorCode build_add_source(struct CSlul *ctx, HashCode h, uint32 len, const char *filename); enum CSlulErrorCode build_add_dep(struct CSlul *ctx, HashCode h, uint32 len, const char *modname); struct CSlulDependency *findadd_dep(struct CSlul *ctx, HashCode h, uint32 len, const char *modname); enum CSlulErrorCode build_add_ifacedep(struct CSlul *ctx, struct CSlulDependency *dep, uint32 minverlen, const char *minver, HashCode sinceverhash, uint32 sinceverlen, const char *sincever); /* Target/output file control */ int init_outputs(struct CSlul *ctx, const char *outdir); int backend_output(struct CSlul *ctx); /* Parsing functions. These are defined in parse.c, but also used from mhparse.c */ void parse_moduleheader(struct CSlul *ctx); void error_char(struct CSlul *ctx, const char *bp, enum CSlulErrorCode errorcode); void error_char_offs(struct CSlul *ctx, const char *bp, int offset, enum CSlulErrorCode errorcode); const char *skip_utf8(struct CSlul *ctx, const char *bp, const char *bend); const char *unexpected_utf8(struct CSlul *ctx, const char *bp, const char *bend); const char *tokenize_ident(struct CSlul *ctx, const char *bp, const char *bend, int *status, enum IdentParseMode mode); void ident_start(struct CSlul *ctx); void token_start(struct CSlul *ctx, const char *bp); void token_eof(struct CSlul *ctx, const char *bp); struct Type *get_typescope_type(struct CSlul *ctx, const struct Type *type); int token_could_be_mh_attr(struct CSlul *ctx); /* File/Module context functions */ void start_of_code(struct CSlul *ctx); /* end of module header also */ void determine_effective_langver(struct CSlul *ctx, int in_impl); void module_init(struct Module *mod); int is_app(const struct Module *mod); /* Top-level verification functions */ int tlverify_bind_iface_refs(struct CSlul *ctx); int tlverify_iface_decls(struct CSlul *ctx); int tlverify_impl_decls(struct CSlul *ctx); int tlverify_check_modspecific(struct CSlul *ctx); int tlverify_funcs(struct CSlul *ctx); /* Verification of a specify top-level identifier */ int verify_tlident(struct CSlul *ctx, struct IdentDecl *decl); /* Common functions for the semantic checking and IR generation */ int is_pointer_type(struct CSlul *ctx, const struct TypeRef *tr); int is_lvalue(struct CSlul *ctx, const struct ExprNode *exprnode); #define THIS_VAR_ID 0 int is_lvalue_ex(struct CSlul *ctx, const struct ExprNode *exprnode, unsigned *varnum_out); struct TypeRef root_tr(struct Type *type); struct TypeRef root_tr_const(const struct Type *type); struct TypeRef nested_tr(struct Type *type, struct TypeRef *source); struct TypeRef nested_tr_const(const struct Type *type, const struct TypeRef *source); const struct Type *funcdecl_real_type(const struct Type *type); struct Type *real_tr(struct CSlul *ctx, const struct TypeRef *tr, struct TypeRef *out_tr); struct Type *real_deref_tr(struct CSlul *ctx, const struct TypeRef *tr, const struct ExprNode *sourceexpr, struct TypeRef *out_tr); struct Type *real_deref_opt_tr(struct CSlul *ctx, const struct TypeRef *tr, struct TypeRef *out_tr); const struct Type *real_type_tr(struct CSlul *ctx, const struct TypeRef *tr); void typechk_ctx_init(struct TypeChkCtx *tcctx); int detect_type_cycles(struct CSlul *ctx, const struct Type *type, struct TypeChkCtx *tcctx, const struct TypeDecl *error_decl); int bind_type_params(struct CSlul *ctx, const struct Type *type, const struct TypeParam **prm); int substitute_type_params(const struct Type **type, const struct TypeParam **prm, unsigned *quals); int substitute_type_params_tr(const struct TypeRef **tr, const struct Type **type, struct TypeRef *alloced_tr); struct TypeDecl *get_typescope(struct CSlul *ctx, const struct ExprNode *e, const struct TypeRef *target_tr); struct IdentDecl *find_typeident(struct CSlul *ctx, const struct TypeDecl *tsdecl, HashCode h, uint32 len, const char *name, const struct ExprRoot *error_exprroot, const struct ExprNode *error_subexpr, enum CSlulErrorCode errcode); struct TypeDecl *get_identtype_decl(const struct Type *type); struct IdentDecl *get_identexpr_decl(const struct ExprNode *exprnode); struct ExprRoot *make_number_expr(struct CSlul *ctx, uint64 num, const struct ExprNode *source); void check_seen_typeparams(struct CSlul *ctx, struct TreeNode *actual_params, struct TreeNode *seen_params); void typeparams_on_non_generic(struct CSlul *ctx, struct TreeNode *seen_params); int check_decl_sinceversion( const struct CSlulDependency *dep, const struct Module *module, const struct ApiRefList *sinceversions, struct ClosestSincever *closest); enum TypeOrigin { TYORG_NOT_FOUND = -1, TYORG_MAIN_MODULE = 0, TYORG_OTHER_MODULE = 1 }; enum TypeOrigin find_dep_of_type(struct CSlul *ctx, const struct TypeRef *tr, const struct TypeDecl **sdecl_out, const struct CSlulDependency **dep_out); int ident_is_appmain(const struct TopLevelIdent *tlident); int is_valid_appmain(struct CSlul *ctx, const struct TopLevelIdent *tlident); /* Semantic checking of types, exprs, etc. */ struct ArrTypeInfo { const struct Type *elemtype; const struct ExprRoot *len_expr; uint64 constant_len; unsigned is_variable; }; enum ArrInfoState { NORMAL_ARRAYS = 0, IGNORE_LENGTH, UNKNOWN_LENGTH }; void get_array_info(const struct Type *arrtype, struct ArrTypeInfo *out, enum ArrInfoState *state); enum TypeCompatCheck { TC_PTREXACT, /**< Types must match exactly. Used inside pointer types */ TC_EXACT, /**< Types must match exactly. Used inside other types */ TC_PTRTARGET, /**< Types must match, but qualifiers can be compatible */ TC_ASSIGNABLE, /**< Source type needs to be assignable to target type */ TC_ASSIGNABLE_TYPEIDENT, /**< Special handling for enum typeidents */ TC_COMPARABLE /**< Types must be mutually comparable */ }; int require_type_compat(struct CSlul *ctx, const struct TypeRef *target_tr, const struct TypeRef *subexpr_tr, const struct ExprNode *sourceexpr, const struct VarStateEntry *varstate, enum TypeCompatCheck mode); int check_type(struct CSlul *ctx, const struct TypeDecl *fromref, const struct TypeDecl *toref, const struct Type *type, enum TypeLocation loc); int check_expr(struct CSlul *ctx, const struct TypeRef *target_tr, struct ExprRoot *expr, enum ExprLocation loc); int check_funcbody(struct CSlul *ctx, struct FuncBody *func); int varstates_copyonwrite(struct CSlul *ctx); int var_assigned(struct CSlul *ctx, struct VarDef *vardef, struct ExprNode *value); void require_assigned(struct CSlul *ctx, struct VarDef *vardef, const struct ExprNode *identexpr, const struct VarStateEntry **varstate_out); /* Misc functions */ int unbase64(const char *in, size_t inlen, unsigned char *out, size_t outlen); unsigned get_unicode_script(uint32 codepoint); int versioncmp(const char *a, size_t alen, const char *b, size_t blen); int node_vercmp(const struct TreeNode *a, const char *b, size_t blen); int nodes_vercmp(const struct TreeNode *a, const struct TreeNode *b); HashCode hash_str(const char *s, size_t len); /* Various low level functions */ int silly_casestrcmp(const char *a, const char *b, size_t len); CSlulFile slul_createexec(const char *filename); int slul_mkdir(const char *filename); #endif