/* parser.h -- Builds an AST from a token stream. Copyright © 2011-2016 Samuel Lidén Borell Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef LRL_PARSER_H #define LRL_PARSER_H #include "context.h" #include "identifier.h" #include "tokenizer.h" #include "builtins.h" typedef enum { /* Namespaces */ LRL_AST_Namespace, LRL_AST_Uses, LRL_AST_Interop, /* Definitions statements */ LRL_AST_Def_Type, LRL_AST_Def_Data, LRL_AST_Def_Function, /* Values */ LRL_AST_Value_Undefined, LRL_AST_Value_None, LRL_AST_Value_NaN, LRL_AST_Value_Inf, LRL_AST_Value_Ident, LRL_AST_Value_TypeIdent, LRL_AST_Value_Scalar, LRL_AST_Value_Struct, LRL_AST_Value_Array, /* Expressions */ LRL_AST_Expr_ArrayIndex, LRL_AST_Expr_Member, LRL_AST_Expr_FuncMember, LRL_AST_Expr_Call, LRL_AST_Expr_As, LRL_AST_Expr_TypeAssert, LRL_AST_Expr_UnaryOp, LRL_AST_Expr_BinaryOp, LRL_AST_Expr_Conditional, LRL_AST_Expr_Evaluated, /*< used internally for constexpr */ /* Types */ LRL_AST_Type_Ident, LRL_AST_Type_Enum, LRL_AST_Type_Bitfield, LRL_AST_Type_Struct, LRL_AST_Type_Union, LRL_AST_Type_Function, LRL_AST_Type_Array, LRL_AST_Type_Pointer, LRL_AST_Type_Optional, LRL_AST_Type_Parametric, LRL_AST_Type_Builtin, LRL_AST_Type_Private, LRL_AST_Type_Any, /* Statements */ LRL_AST_Stmt_Compound, LRL_AST_Stmt_Decl, LRL_AST_Stmt_Expr, LRL_AST_Stmt_DefType, LRL_AST_Stmt_If, LRL_AST_Stmt_Switch, LRL_AST_Stmt_While, LRL_AST_Stmt_DoWhile, LRL_AST_Stmt_For, LRL_AST_Stmt_Goto, LRL_AST_Stmt_SkipTo, LRL_AST_Stmt_RepeatFrom, LRL_AST_Stmt_Label, LRL_AST_Stmt_Break, LRL_AST_Stmt_Continue, LRL_AST_Stmt_Return, LRL_AST_Stmt_Unreachable, LRL_AST_Stmt_TypeAssert, LRL_AST_Stmt_Assert, LRL_AST_LastType } LRLASTNodeType; typedef struct LRLASTUses LRLASTUses; typedef struct LRLASTDefType LRLASTDefType; typedef struct LRLASTDefData LRLASTDefData; typedef struct LRLASTDefFunction LRLASTDefFunction; typedef struct LRLASTDef LRLASTDef; typedef struct LRLASTValueIdent LRLASTValueIdent; typedef struct LRLASTValueScalar LRLASTValueScalar; typedef struct LRLASTValueStruct LRLASTValueStruct; typedef struct LRLASTValueArray LRLASTValueArray; typedef struct LRLASTExprArrayIndex LRLASTExprArrayIndex; typedef struct LRLASTExprMember LRLASTExprMember; typedef struct LRLASTExprCall LRLASTExprCall; typedef struct LRLASTExprAs LRLASTExprAs; typedef struct LRLASTExprTypeAssert LRLASTExprTypeAssert; typedef struct LRLASTExprUnaryOp LRLASTExprUnaryOp; typedef struct LRLASTExprBinaryOp LRLASTExprBinaryOp; typedef struct LRLASTExprConditional LRLASTExprConditional; typedef struct LRLASTExprEvaluated LRLASTExprEvaluated; typedef struct LRLASTExprList LRLASTExprList; typedef struct LRLASTTypeEnum LRLASTTypeEnum; typedef struct LRLASTTypeBitfield LRLASTTypeBitfield; typedef struct LRLASTTypeStruct LRLASTTypeStruct; typedef struct LRLASTTypeUnion LRLASTTypeUnion; typedef struct LRLASTTypeFunction LRLASTTypeFunction; typedef struct LRLASTTypeArray LRLASTTypeArray; typedef struct LRLASTTypePointer LRLASTTypePointer; typedef struct LRLASTTypeOptional LRLASTTypeOptional; typedef struct LRLASTTypeParametric LRLASTTypeParametric; typedef struct LRLASTTypeList LRLASTTypeList; typedef struct LRLASTStmt LRLASTStmt; typedef struct LRLASTCtlIf LRLASTCtlIf; typedef struct LRLASTCtlSwitch LRLASTCtlSwitch; typedef struct LRLASTCase LRLASTCase; typedef struct LRLASTCaseMatch LRLASTCaseMatch; typedef struct LRLASTCtlWhile LRLASTCtlWhile; typedef struct LRLASTCtlFor LRLASTCtlFor; typedef struct LRLASTCtlGoto LRLASTCtlGoto; typedef struct LRLASTCtlLabel LRLASTCtlLabel; typedef struct LRLASTCtlReturn LRLASTCtlReturn; typedef struct LRLASTCtlBreak LRLASTCtlBreak; typedef struct LRLTypeAssert LRLTypeAssert; typedef struct LRLASTCtlTypeAssert LRLASTCtlTypeAssert; typedef struct LRLASTCtlAssert LRLASTCtlAssert; typedef struct LRLASTStmtList LRLASTStmtList; typedef union LRLASTDefOrStmt LRLASTDefOrStmt; /* Case label defines to catch a group of node types */ #define LRL_case_ast_namespaces \ case LRL_AST_Namespace: \ case LRL_AST_Uses: \ case LRL_AST_Interop: #define LRL_case_ast_defs \ case LRL_AST_Def_Type: \ case LRL_AST_Def_Data: \ case LRL_AST_Def_Function: #define LRL_case_ast_values \ case LRL_AST_Value_Undefined: \ case LRL_AST_Value_None: \ case LRL_AST_Value_NaN: \ case LRL_AST_Value_Inf: \ case LRL_AST_Value_Ident: \ case LRL_AST_Value_TypeIdent: \ case LRL_AST_Value_Scalar: \ case LRL_AST_Value_Struct: \ case LRL_AST_Value_Array: #define LRL_case_ast_exprs \ case LRL_AST_Expr_ArrayIndex: \ case LRL_AST_Expr_Member: \ case LRL_AST_Expr_FuncMember: \ case LRL_AST_Expr_Call: \ case LRL_AST_Expr_As: \ case LRL_AST_Expr_TypeAssert: \ case LRL_AST_Expr_UnaryOp: \ case LRL_AST_Expr_BinaryOp: \ case LRL_AST_Expr_Conditional: \ case LRL_AST_Expr_Evaluated: #define LRL_case_ast_exprs_values \ LRL_case_ast_values \ LRL_case_ast_exprs #define LRL_case_ast_types \ case LRL_AST_Type_Ident: \ case LRL_AST_Type_Enum: \ case LRL_AST_Type_Bitfield: \ case LRL_AST_Type_Struct: \ case LRL_AST_Type_Union: \ case LRL_AST_Type_Function: \ case LRL_AST_Type_Array: \ case LRL_AST_Type_Pointer: \ case LRL_AST_Type_Optional: \ case LRL_AST_Type_Parametric: \ case LRL_AST_Type_Builtin: \ case LRL_AST_Type_Private: \ case LRL_AST_Type_Any: #define LRL_case_ast_stmts_except_def \ case LRL_AST_Stmt_Compound: \ case LRL_AST_Stmt_Expr: \ case LRL_AST_Stmt_DefType: \ case LRL_AST_Stmt_If: \ case LRL_AST_Stmt_Switch: \ case LRL_AST_Stmt_While: \ case LRL_AST_Stmt_DoWhile: \ case LRL_AST_Stmt_For: \ case LRL_AST_Stmt_Goto: \ case LRL_AST_Stmt_SkipTo: \ case LRL_AST_Stmt_RepeatFrom: \ case LRL_AST_Stmt_Label: \ case LRL_AST_Stmt_Break: \ case LRL_AST_Stmt_Continue: \ case LRL_AST_Stmt_Return: \ case LRL_AST_Stmt_Unreachable: \ case LRL_AST_Stmt_TypeAssert: \ case LRL_AST_Stmt_Assert: #define LRL_case_ast_stmts \ LRL_case_ast_stmts_except_def \ case LRL_AST_Stmt_Decl: #define LRL_cast_ast_specials \ case LRL_AST_LastType: #define LRL_case_except_ast_namespaces_defs \ LRL_case_ast_values \ LRL_case_ast_exprs \ LRL_case_ast_types \ LRL_case_ast_stmts \ LRL_cast_ast_specials #define LRL_case_except_ast_namespaces_defs_defstmt \ LRL_case_ast_values \ LRL_case_ast_exprs \ LRL_case_ast_types \ LRL_case_ast_stmts_except_def \ LRL_cast_ast_specials #define LRL_case_except_ast_defs \ LRL_case_ast_namespaces \ LRL_case_ast_values \ LRL_case_ast_exprs \ LRL_case_ast_types \ LRL_case_ast_stmts \ LRL_cast_ast_specials #define LRL_case_except_ast_exprs_values \ LRL_case_ast_namespaces \ LRL_case_ast_defs \ LRL_case_ast_types \ LRL_case_ast_stmts \ LRL_cast_ast_specials #define LRL_case_except_ast_types \ LRL_case_ast_namespaces \ LRL_case_ast_defs \ LRL_case_ast_values \ LRL_case_ast_exprs \ LRL_case_ast_stmts \ LRL_cast_ast_specials #define LRL_case_except_ast_stmts \ LRL_case_ast_namespaces \ LRL_case_ast_defs \ LRL_case_ast_values \ LRL_case_ast_exprs \ LRL_case_ast_types \ LRL_cast_ast_specials /* For initialization of unions. Only the first element can be initialized in ANSI C, so these types are placed first in all union types. */ typedef struct { void *a; } LRLInit1; typedef struct { void *a; void *b; } LRLInit2; typedef struct { void *a; void *b; void *c; } LRLInit3; typedef struct { void *a; void *b; void *c; void *d; } LRLInit4; typedef struct { void *a; void *b; void *c; void *d; void *e; } LRLInit5; typedef struct { unsigned int segment_id; unsigned int offset, size; } LRLBackendOffsSize; /* * Types */ typedef enum { LRL_Qual_Const = 0x1, /* Explicit const when it's not the default */ LRL_Qual_Gc = 0x2, LRL_Qual_Mine = 0x4, LRL_Qual_Own = 0x8, LRL_Qual_Shared = 0x10, LRL_Qual_Var = 0x20, LRL_InternQual_Private = 0x100, LRL_InternQual_TypeParam = 0x200, LRL_InternQual_ParametricStorage = 0x400, /* privateness depends on type param */ LRL_InternQual_NotApplicable = 0x1000, LRL_InternQual_Processing = 0x2000, LRL_InternQual_ConstMemory = 0x4000 } LRLTypeQualifiers; #define LRL_Transitive_PtrQuals (LRL_Qual_Gc) #define LRL_VarConst_Quals (LRL_Qual_Const|LRL_Qual_Var) #define LRL_SharedMine_Quals (LRL_Qual_Mine|LRL_Qual_Shared) #define LRL_NonInternal_Quals (LRL_Qual_Const|LRL_Qual_Var|LRL_Qual_Mine|LRL_Qual_Shared) #define LRL_Private_Quals (LRL_InternQual_Private|LRL_InternQual_ParametricStorage) typedef struct LRLBoundParams { struct LRLBoundParams *next; const LRLASTDefType *name; const LRLASTType *bound_type; } LRLBoundParams; struct LRLTypeRef { LRLTypeQualifiers quals; /* only "var"/"const" and "shared"/"mine" are used */ LRLBoundParams *prm; const LRLASTType *type; }; struct LRLASTTypeEnum { LRLASTDefList *values; const LRLIdent *scope; /* of values */ LRLASTType *base_type; }; struct LRLASTTypeBitfield { /* Must be compatible with LRLASTTypeStruct */ LRLASTDefList *members; const LRLIdent *scope; /* of values */ LRLASTType *base_type; /* only integer types are allowed */ }; typedef enum { LRL_FF_NoReturn = 0x1 } LRLFunctionFlags; struct LRLASTTypeFunction { struct LRLASTType *ret; struct LRLASTType *args; LRLFunctionFlags flags; }; typedef enum { LRL_SF_CVarArg = 0x1 /* variadic function parameters */ } LRLStructFlags; struct LRLASTTypeStruct { struct LRLASTDefList *members; const LRLIdent *scope; /* of members */ LRLStructFlags flags; }; struct LRLASTTypeUnion { /* Must be compatible with LRLASTTypeStruct */ struct LRLASTDefList *members; const LRLIdent *scope; /* of members */ }; struct LRLASTTypeArray { LRLASTType *type; LRLASTExpr *length; /* should this be a constraint instead? */ }; typedef enum { /** Flexible pointers allow pointer arithmetic */ LRL_PF_Flexible = 0x1, /** Raw pointers may point to any location including null */ LRL_PF_Raw = 0x2, /** Dereferenced pointers do not need to match the exact type */ LRL_InternPF_Dereferenced = 0x4 } LRLPointerFlags; struct LRLASTTypePointer { LRLASTType *type; LRLPointerFlags flags; }; struct LRLASTTypeOptional { LRLASTType *type; }; struct LRLASTTypeParametric { struct LRLASTType *type; struct LRLASTTypeList *params; }; struct LRLASTType { LRLASTNodeType ast_type; LRLTypeQualifiers quals; LRLHashCode unique_id; const LRLToken *from, *to; union { LRLInit4 initme; LRLIdentRef identref; LRLASTTypeFunction function; LRLASTTypeEnum enu; LRLASTTypeBitfield bitfield; LRLASTTypeStruct struc; LRLASTTypeUnion unio; LRLASTTypeArray array; LRLASTTypePointer pointer; LRLASTTypeOptional optional; LRLASTTypeParametric parametric; LRLBuiltinType builtin; } kind; /* TODO constraints */ }; #define LRL_UNIQUEID_UNSET (((LRLHashCode)0)-1) struct LRLASTTypeList { LRLASTTypeList *next; LRLASTType *type; }; /* * Values */ struct LRLASTExprList { size_t num_args; struct LRLASTExpr **values; }; struct LRLASTValueIdent { LRLIdentRef identref; /* for parametric types */ struct LRLASTTypeList *type_params; }; struct LRLASTValueScalar { const LRLToken *token; int is_negative; /* preceeded by the unary minus operator */ }; struct LRLASTValueStruct { LRLASTExprList values; }; struct LRLASTValueArray { LRLASTExprList values; }; struct LRLASTExprArrayIndex { struct LRLASTExpr *array; struct LRLASTExpr *index; }; struct LRLASTExprMember { struct LRLASTExpr *struc; const LRLToken *token; const LRLIdent *ident; }; struct LRLASTExprCall { LRLASTExprList args; struct LRLASTExpr *function; }; struct LRLASTExprAs { struct LRLASTExpr *expr; LRLASTType *type; }; struct LRLASTExprTypeAssert { struct LRLASTExpr *expr; LRLASTType *type; }; struct LRLASTExprUnaryOp { LRLTokenType token_type; struct LRLASTExpr *operand; }; struct LRLASTExprBinaryOp { LRLTokenType token_type; struct LRLASTExpr *operand1, *operand2; }; struct LRLASTExprConditional { struct LRLASTExpr *condexpr, *trueexpr, *falseexpr; }; /** An expr that has been evaluted with constexpr */ struct LRLASTExprEvaluated { struct LRLASTExpr *evaluated, *original; }; struct LRLASTExpr { LRLASTNodeType ast_type; LRLTypeRef typeref; const LRLToken *from, *to; union { LRLInit5 initme; LRLASTValueIdent ident; LRLASTValueScalar scalar; LRLASTValueStruct struc; LRLASTValueArray array; LRLASTExprArrayIndex index; LRLASTExprMember member; LRLASTValueIdent typeident; LRLASTExprCall call; LRLASTExprAs asexpr; LRLASTExprTypeAssert typeassert; LRLASTExprUnaryOp unary_op; LRLASTExprBinaryOp binary_op; LRLASTExprConditional conditional; LRLASTExprEvaluated evaluated; LRLASTExprList exprlist; } kind; }; /* * Definition statements */ typedef enum { LRL_DeFl_Incomplete = 0x1, LRL_DeFl_Alias = 0x2, LRL_DeFl_Local = 0x4, LRL_DeFl_DeclOnly = 0x8, LRL_DeFl_Export = 0x10, LRL_DeFl_Import = 0x20, LRL_DeFl_Deprecated = 0x40, LRL_DeFl_Internal_MaybeIncomplete = 0x1000, /*< Defined by interop, which does not know if it is incomplete or not */ LRL_DeFl_Internal_TypeAssert = 0x2000, LRL_DeFl_Internal_PrivateOffset = 0x4000, LRL_DeFl_Internal_DefinedByBackend = 0x8000 } LRLDefFlags; #define LRL_LinkageFlags (LRL_DeFl_Local|LRL_DeFl_DeclOnly|LRL_DeFl_Export|LRL_DeFl_Import) struct LRLASTDefType { LRLIdent *ident; LRLDefFlags flags; LRLASTType *type; LRLASTDefList *typenames; }; struct LRLASTDefData { LRLIdent *ident; LRLDefFlags flags; LRLASTType *type; LRLASTExpr *value; LRLBackendOffsSize backend_offs; }; struct LRLASTDefFunction { LRLIdent *ident; LRLDefFlags flags; LRLASTType type; /* always contains a LRLASTTypeFunction */ LRLASTDefList *typenames; LRLASTStmt *code; LRLBackendOffsSize backend_offs; }; struct LRLASTUses { LRLIdentRef identref; }; struct LRLASTInterop { LRLIdent *ident; const LRLToken *name; LRLASTExpr *options_expr; const LRLInteropImpl *impl; const LRLASTType *options_type; LRLASTDefList *translated; struct LRLASTInterop *next; /* next in queue */ }; struct LRLASTDef { LRLASTNodeType ast_type; union { LRLInit4 initme; LRLIdent *ident; /* Definition statements */ LRLASTDefType type; LRLASTDefData data; LRLASTDefFunction function; LRLASTUses uses; LRLASTNamespace *namespac; LRLASTInterop interop; char *configvalue; /* not used in parser */ } kind; }; struct LRLASTDefList { LRLASTDefList *next; LRLASTDef def; }; /* * Statements */ struct LRLASTCtlIf { LRLASTExpr *boolexpr; LRLASTStmt *body_true; LRLASTStmt *body_false; }; struct LRLASTCaseMatch { LRLASTExpr *expr; LRLASTStmt *withstm; }; struct LRLASTCase { size_t num_matchvalues; LRLASTCaseMatch *matchvalues; LRLASTStmt *stmt; }; struct LRLASTCtlSwitch { LRLASTExpr *switchexpr; size_t num_cases; LRLASTCase *cases; LRLASTStmt *defaultstm; }; struct LRLASTCtlWhile { LRLASTExpr *boolexpr; LRLASTStmt *body; int loopid; /*< may be used by the backend for break/continue */ }; typedef struct LRLASTCtlForTemp { enum { LRL_AST_LT_DirectIter = 1, LRL_AST_LT_IndirectIter, LRL_AST_LT_ArrayIter } loop_type; LRLIdent *next_element_ident; LRLIdent *get_iterator_ident; LRLASTExpr *length_expr; } LRLASTCtlForTemp; struct LRLASTCtlFor { LRLASTDef valuedef; LRLASTExpr *iterexpr; LRLASTStmt *body; LRLASTStmt *body_end; /* executed if no "break" was executed */ LRLASTStmt *body_empty; /* executed if body was never reached */ LRLASTCtlForTemp *temp; int loopid; /*< may be used by the backend for break/continue */ }; struct LRLASTCtlGoto { LRLIdentRef identref; LRLASTStmt *target; }; struct LRLASTCtlLabel { LRLIdent *ident; }; struct LRLASTCtlReturn { LRLASTExpr *retexpr; }; struct LRLASTCtlBreak { LRLASTStmt *outer_stmt; /*< jumps to the end of this statement */ }; struct LRLTypeAssert { LRLTypeAssert *next; LRLASTDef def; /* Definition of variable with target type */ LRLASTExpr refexpr; /* Reference to variable of source type */ /* TODO range */ }; struct LRLASTCtlTypeAssert { LRLTypeAssert *asserts; LRLASTStmt *body_do; LRLASTStmt *body_else; }; struct LRLASTCtlAssert { LRLASTExpr *boolexpr; const char* exprstart; size_t exprlength; }; struct LRLASTStmt { LRLASTNodeType ast_type; union { LRLInit4 initme; /* Compound statements */ LRLASTStmtList *compound; /* Definition statements */ LRLASTDefData data; LRLASTDefType deftype; /* TODO should we allow nested functions? */ /* Pure expression statements */ LRLASTExpr *expr; /* Control statements */ LRLASTCtlIf ifstm; LRLASTCtlSwitch switchstm; LRLASTCtlWhile whilestm; LRLASTCtlFor forstm; LRLASTCtlGoto gotostm; LRLASTCtlLabel labelstm; LRLASTCtlReturn retstm; LRLASTCtlBreak breakstm; LRLASTCtlTypeAssert typeassertstm; LRLASTCtlAssert assertstm; } kind; const LRLToken *token; /* for error reporting */ LRLIdent scope; }; struct LRLASTStmtList { LRLASTStmtList *next; LRLASTStmt *statement; }; union LRLASTDefOrStmt { /* XXX create a deforstmt pointer type also? might make it possible to remove a lot of casts */ LRLASTNodeType ast_type; LRLASTDef def; LRLASTStmt stmt; }; struct LRLASTNamespace { LRLIdent *ident; /* TODO generics */ LRLASTDefList *list; }; #define lrl_expr_is_literal(expr) ( \ (expr)->ast_type == LRL_AST_Value_Undefined || \ (expr)->ast_type == LRL_AST_Value_None || \ (expr)->ast_type == LRL_AST_Value_NaN || \ (expr)->ast_type == LRL_AST_Value_Inf || \ (expr)->ast_type == LRL_AST_Value_Scalar || \ (expr)->ast_type == LRL_AST_Value_Struct || \ (expr)->ast_type == LRL_AST_Value_Array || \ (((expr)->ast_type == LRL_AST_Value_Ident || \ (expr)->ast_type == LRL_AST_Value_TypeIdent) && \ (expr)->kind.ident.identref.ident && \ ((expr)->kind.ident.identref.ident->flags & LRL_IdFl_EnumValue)) \ ) LRLASTNamespace *lrl_parse(LRLCtx *ctx, LRLIdent *scope, const LRLToken *tokens); #endif