/* csbe_internal.h -- Internal datastructures and definitions for CSBE Copyright © 2022-2024 Samuel Lidén Borell Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef CSBE_INTERNAL_H #define CSBE_INTERNAL_H #include "include/csbe.h" #include #define UINTSIZE (sizeof(unsigned)*8) #if (defined(__GNUC__) && __GNUC__ > 2) || defined(__clang__) # define LIKELY(cond) __builtin_expect(!!(cond), 1) # define UNLIKELY(cond) __builtin_expect(!!(cond), 0) #else # define LIKELY(cond) (cond) # define UNLIKELY(cond) (cond) #endif #define MAX(a,b) ((a)>(b) ? (a) : (b)) #define MIN(a,b) ((a)<(b) ? (a) : (b)) #define UINT_ALIGN(size, align) (((size)+((align)-1)) & ~((align)-1)) #define MAX_FIELDS 4096 #define MAX_PARAMS 4096 #define MAX_EBBS (UINT_MAX/sizeof(struct Ebb)/2 - 1) #define MAX_VARDEFS (UINT_MAX/sizeof(struct Var)/2 - 1) #define CSBE_ARRAY_MAX (UINT_MAX/2 - 1) #define uint8 CsbeUint8 #define uint16 CsbeUint16 #define uint32 CsbeUint32 #define uint64 CsbeUint64 #define int8 CsbeInt8 #define int16 CsbeInt16 #define int32 CsbeInt32 #define int64 CsbeInt64 #define uint_fast32 uint_fast32_t enum OpArgType { OPERAND_VAR_IN = 1, OPERAND_VAR_OUT, OPERAND_VAR_IN_OR_IMMED, OPERAND_INDEX, OPERAND_EBB, OPERAND_FUNCDEF, OPERAND_DATADEF, OPERAND_TYPEDEF, OPERAND_FLAGS, OPERAND_ENUM, OPERAND_TYPE, OPERAND_PTR }; #define OPERAND_IS_DEST 0x40 #define OPERAND_TYPE(arg) ((arg) & ~OPERAND_IS_DEST) struct OpArgInfo { unsigned char num_args; unsigned char args[5]; }; #define MAX_OP_NAMELEN (sizeof("CSBEO_CALL_DISCARD_RETURN")) extern const struct OpArgInfo op_argdefs[]; extern const char op_names[][MAX_OP_NAMELEN]; #define CSBE_TYPE_IS_VOID(t) \ (!(t)->is_array && !(t)->is_struct && (t)->num_fields == 0) struct CsbeType { unsigned is_struct : 1; unsigned is_array : 1; unsigned is_defined : 1; unsigned is_unbound : 1; unsigned num_fields : 12; unsigned saved_slots_left : 12; /* in structs */ unsigned array_length; enum CsbeTypeKind simple_kind; /* in non-struct types */ enum CsbePackMode packmode; /* for structs, if num_fields!=0 */ /** Element type in case of array of structs, or array of field types in case of struct */ struct CsbeType *elemtypes; /** "Back pointer" when working with nested structs */ struct CsbeType *containing_type; }; struct CsbeTypedef { struct CsbeTypedef *next; struct CsbeType type; struct CsbeType *refs; }; enum CsbeSlotType { CSBEST_INVALID, CSBEST_UINT64, CSBEST_BYTES, CSBEST_UINT64_ARRAY }; struct DatadefValueSlot { enum CsbeSlotType slot_type; unsigned array_length; union { CsbeUint64 uint64_value; const unsigned char *bytes; const CsbeUint64 *uint64_array; } u; }; #define CSBEDD_INTERN_USED 0x100 #define CSBEDD_INTERN_EXPORTED 0x200 struct CsbeDatadef { struct CsbeDatadef *next; struct CsbeType type; unsigned data_id; unsigned flags; unsigned namelen; unsigned num_valueslots; struct DatadefValueSlot *valueslots; const char *name; }; #define CSBEFD_INTERN_USED 0x100 #define CSBEFD_INTERN_EXPORTED 0x200 struct CsbeFuncdef { struct CsbeFuncdef *next; unsigned func_id; unsigned num_params; enum CsbeAbi abi; unsigned flags; unsigned namelen; struct CsbeType *returntype, *paramtypes; struct FuncIR *funcbody; const char *name; }; struct FuncIR { struct FuncIR *next; struct CsbeFuncdef *funcdef; unsigned has_trap_ebb : 1; /** 1 if any call is as likely as the function itself */ unsigned contains_likely_calls : 1; /** 1 if no return is as likely as the function itself */ unsigned likely_no_return : 1; unsigned num_ebb, num_chunks, num_vars, trap_ebb; unsigned num_opchunks; unsigned first_non_temporary_varlane; unsigned first_callee_saved_varlane; unsigned first_addressable_varlane; /** Maximum number of parameters used by in any outbound function call */ unsigned max_out_params; struct Ebb *ebbs; struct Var *vars; struct OpChunk *first_opchunk, *last_opchunk; struct OpChunk **opchunk_by_id; }; struct CgCtx; struct CgRelocEntry; struct BinWriter; struct Codegen { /* * * * Initialization functions * * * */ int (*has_endianness)(enum CsbeEndianness endianness); int (*has_feature_state)(enum CsbeCpu cpu, int feature, int state); enum CsbeErr (*initialize)(struct CsbeConfig *cfg); /* * * * Code generation functions * * * */ int (*codegen)(struct CgCtx *cg, const struct CsbeFuncdef *f); int (*gen_elf_plt)(struct CgCtx *cg, uint32 got, uint32 gotoffs, uint32 pc); int (*get_elf_plt_size)(const struct CgCtx *cg); int (*process_reloc)(struct CgCtx *cg, struct BinWriter *bw, struct CgRelocEntry *re, uint32 fileoffs, uint32 addr, uint32 newaddr); int (*gen_start_code)(struct CgCtx *cg); int (*gen_helper_code)(struct CgCtx *cg); }; #define MAX_ARCH_FEATURES 1024 #define HAS_ARCH_FEATURE(ai, id) \ ((ai).features[(id)/UINTSIZE] & (1 << ((id)%UINTSIZE))) struct ArchInfo { struct Codegen cgen; unsigned features[(MAX_ARCH_FEATURES+UINTSIZE-1)/UINTSIZE]; unsigned wordbits:8, dptrbits:8, fptrbits:8; unsigned big_endian : 1; unsigned hardfloat : 1; unsigned elf_relocs_have_addends : 1; unsigned is_textual : 1; /** Variables up to this size (in bytes) can be reg-allocated */ unsigned reg_size : 8; /** Number of register for temporaries (caller-saved & not func-params) */ unsigned tempreg_count : 8; /** Number of callee-saved registers */ unsigned savedreg_count : 8; /** Number of registers that are dedicated to function parameters */ unsigned paramreg_count : 8; /* unsigned struct_reg_maxbits : 8;*/ /* TODO */ /** Maximum space usage for a single symbol (including everything, i.e. symtab, dymsym, GOT/PLT, hash tables) */ unsigned symbytes; unsigned symnamebytes; /**< Maximum space usage per sym-name byte */ /** Maximum space usage for a single library reference */ unsigned libbytes; unsigned libnamebytes; /**< Maximum space usage per lib-name byte */ const uint32 *elf_reloc_types; enum CsbeCpu cpu; enum CsbeOsType os; enum CsbeOutputFormat outformat; const char *elf_interp_path; unsigned short types_sizes[NUM_TYPEKINDS]; unsigned short types_align[NUM_TYPEKINDS]; /** * Architectures can have separate registers for: * - byte/short/int/long, * - values vs pointers, * - integer vs float, * - signed vs unsigned, * - TODO loop variables? * This table maps enum CsbeTypeKind to a number of a register set. * (If the arch has only one type of registers, then this will contain * zero in all elements). */ unsigned char regset[NUM_TYPEKINDS]; unsigned char num_regsets; }; struct ArchInfoEntry { struct ArchInfoEntry *next; struct ArchInfo archinfo; }; struct CsbeConfig { int api_version; void *userctx; struct CsbeFuncPtrs funcptrs; unsigned num_arch; struct ArchInfoEntry *last_arch; }; #define CSBEVD_INTERN_DEFINED 0x10 #define CSBEVD_INTERN_USED 0x20 #define CSBEVD_INTERN_FUNCPARAM 0x40 #define CSBEVD_INTERN_SINGLE_EBB 0x80 #define CSBEVD_INTERN_CURRENTLY_LIVE 0x100 #define CSBEVD_INTERN_LIVE_ACROSS_CALL 0x200 #define CSBEVD_INTERN_LIVE_ACROSS_JUMP 0x400 #define CSBEVD_INTERN_HAS_ADDRESS 0x800 #define CSBE_INTERN_VARLANE_FIRSTPARAM 0x8000 struct Var { struct CsbeType type; unsigned flags; /**< Bitmask of CSBExxx_* or CSBExxx_INTERN_* */ unsigned lane; /**< var-lane id, or 0 if not assigned yet. */ unsigned ebb_id; /**< If used only in a single EBB */ }; #define CSBEEF_INTERN_DEFINED 0x80 #define CSBEEF_INTERN_UNCOND_JUMP 0x40 /** Extended Basic Block */ struct Ebb { /*struct EbbArchInfo *arch;*/ /* one per target arch */ unsigned first_op; /**< index of first op */ unsigned end_op; /**< last op is end_op-1 */ unsigned next_varlane; /**< For single-EBB optimization. See analyze.c */ unsigned flags : 8; }; union LargeOpnd { uint64 u64; void *ptr; }; #define OP_MAX_UNSIGNED_ARG 8 /* 5, and 3 extra for var flags */ #define OP_MAX_LARGE_ARG 2 /** Used to distinguish variables from immediates for VAR_IN_OR_IMMED args */ #define CSBE_OPV_VARIABLE 0x80 #define IS_VAR(opnd) (((opnd) & CSBE_OPV_VARIABLE) != 0) #define IS_IMMED(opnd) (((opnd) & CSBE_OPV_VARIABLE) == 0) #define IS_DISCARD(opnd) (((opnd) & CSBE_OPV_DISCARD) != 0) #define IS_NON_DISCARD(opnd) (((opnd) & CSBE_OPV_DISCARD) == 0) /** An operation, e.g. ADD, MOVE, CONDJUMP, etc. */ struct Op { unsigned op : 8; unsigned opnd[OP_MAX_UNSIGNED_ARG]; union LargeOpnd opnd_large[OP_MAX_LARGE_ARG]; }; #define IS_INTEGRAL_TYPE(type) \ ((type).simple_kind >= CSBET_BOOL && (type).simple_kind <= CSBET_USIZE) #define IS_DPTR_TYPE(type) \ ((type).simple_kind >= CSBET_DPTR && \ (type).simple_kind <= CSBET_DPTR_THREADED) #define IS_NONPOINTER_TYPE(type) \ ((type).simple_kind < CSBET_DPTR) #define OPS_PER_CHUNK 64 struct OpChunk { struct OpChunk *next; unsigned num_ops; struct Op ops[OPS_PER_CHUNK]; }; enum CallState { CSBE_CS_NO, CSBE_CS_IN_START, CSBE_CS_ARGS_OR_FUNC, CSBE_CS_IN_ARG, CSBE_CS_RETURN }; struct LibrarySymbol { struct LibrarySymbol *next; enum CsbeSymbolType symtype; const char *name; size_t namelen; union { struct CsbeFuncdef *func; struct CsbeDatadef *data; } decl; }; struct CsbeSymVer { struct CsbeSymVer *next; const char *prefix; /**< Typically the module name in uppercase */ const char *ver; /**< Version string */ size_t verlen; size_t prefixlen; struct LibrarySymbol *syms; }; struct CsbeLibrary { struct CsbeLibrary *next; const char *name; size_t namelen; unsigned num_versions; struct CsbeSymVer *versions; struct LibrarySymbol *unversioned_syms; /* Used in elf.c */ size_t dynstr_offs; }; struct Csbe { struct CsbeConfig cfg; enum CsbeErr error; struct ArchInfo *archs; /* one per target arch */ const char *inputname; /**< Name of input source */ /* Definitions */ struct CsbeTypedef *typedefs; struct CsbeFuncdef *funcdefs; struct CsbeDatadef *datadefs; struct CsbeTypedef *td; struct CsbeDatadef *dd; struct CsbeFuncdef *fd; struct CsbeLibrary *imported_libs; struct CsbeSymVer *exported_symvers; struct LibrarySymbol *exported_unversioned_syms; struct CsbeFuncdef *main_func; /** Type being added currently */ struct CsbeType *type; struct CsbeType *containing_type; unsigned array_length; unsigned is_array : 1; /** How many "slots" are remaining (number of fields, or <= 1 if not a field) */ unsigned type_slots_left : UINTSIZE-2; unsigned defs_done : 1; unsigned value_slots_left : UINTSIZE-2; unsigned has_unfinalized_op : 1; unsigned num_funcs; unsigned num_data; unsigned max_funcdef_count, max_datadef_count, max_typedef_count; /** Next "valueslot" in initival value of data definition */ struct DatadefValueSlot *valueslot; /** Base type of type being added (e.g. struct base type) */ struct CsbeType *base_type; /** Linked list of function bodies. Last added comes first */ struct FuncIR *funcbody; /* Handling of function bodies */ struct CsbeType *current_paramtype; struct Var *current_var; struct Ebb *current_ebb; unsigned current_ebb_id; unsigned params_left, vars_left; unsigned current_opnum; unsigned current_operand_pos; struct Op *current_op, *opchunk_end; struct Op *call_op_lastarg, *call_op_start; struct Op *pending_prev_op; /**< Previous operation. May get optimized */ unsigned call_op_argsleft; enum CallState call_state; unsigned lowest_unused_var_id; /**< Used to eliminate unallocated variables */ unsigned *next_opnd; union LargeOpnd *next_opnd_large; unsigned *last_var_out; /*< Last output operand */ unsigned *prev_var_out; /*< Previous output operand */ CsbeAllocFunc *saved_allocator; void *saved_userctx; }; #ifdef CSBE_TRAP_ON_ERROR # ifdef NDEBUG # error "Can't mix CSBE_TRAP_ON_ERROR and NDEBUG" # endif # define SET_ERROR(e) (assert(0), csbe->error = (e)) #else # define SET_ERROR(e) (csbe->error = (e)) #endif #define INVERT_BRANCHTYPE(bt) ((bt)^1) void *allocp(struct Csbe *csbe, size_t size); void *allocpa(struct Csbe *csbe, size_t size, size_t count); void *alloc_dptr_list(struct Csbe *csbe, size_t count); /** Performs crude register "pre-allocation". */ enum CsbeErr analyze_ir(struct Csbe *csbe, struct FuncIR *fb); int init_cgctx(struct CgCtx *cg, struct Csbe *csbe, struct ArchInfo *arch); /** Called just before generating code for a function */ int gather_func_info(struct CgCtx *cg, const struct FuncIR *funcbody); #endif