/* irdump.c -- Dumps IR in text form Copyright © 2023-2024 Samuel Lidén Borell Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "codegen_undef.h" #include "codegen_common.h" #include #include #include #include #include #define MAX_ARGLEN 100 static void outb(struct CgCtx *cg, const char *b, int len) { const char *cp; for (cp = b; len--; cp++) { IEMIT1(*cp); } return; oom: assert(0); } static void outf(struct CgCtx *cg, const char *fmt, ...) { char buffer[2048]; int len; va_list ap; va_start(ap, fmt); len = vsprintf(buffer, fmt, ap); outb(cg, buffer, len); va_end(ap); } static int irdump_has_endianness(enum CsbeEndianness endianness) { return !endianness; } static int irdump_has_feature_state(enum CsbeCpu cpu, int feature, int state) { (void)cpu; (void)state; switch (feature) { case CSBEF_COMMON_HARDFLOAT: return 1; /* allow FP to be disabled for e.g. kernel-mode software */ } return 0; } #define NUM_REGSETS 1 static enum CsbeErr irdump_initialize(struct CsbeConfig *cfg) { struct ArchInfo *arch; csbe_config_arch_feature(cfg, CSBEF_COMMON_HARDFLOAT, 1); arch = &cfg->last_arch->archinfo; arch->is_textual = 1; arch->num_regsets = NUM_REGSETS; return CSBEE_OK; } static void outfuncname(struct CgCtx *cg, const struct CsbeFuncdef *f) { if (f->name) { outb(cg, f->name, f->namelen); } else { outf(cg, "_Func%u", f->func_id); } } static void outdatadefname(struct CgCtx *cg, const struct CsbeDatadef *d, unsigned datadef_id) { /* TODO data defs need to have names for exporting/importing them */ (void)d; outf(cg, "D%u", datadef_id); } static const char typekind_names[NUM_TYPEKINDS][13+1] = { /* CSBET_BOOL */ "bool", /* CSBET_INT */ "int", /* CSBET_UINT */ "uint", /* CSBET_LONG */ "long", /* CSBET_ULONG */ "ulong", /* CSBET_I8 */ "i8", /* CSBET_U8 */ "u8", /* CSBET_I16 */ "i16", /* CSBET_U16 */ "u16", /* CSBET_I32 */ "i32", /* CSBET_U32 */ "u32", /* CSBET_I64 */ "i64", /* CSBET_U64 */ "u64", /* CSBET_F32 */ "f32", /* CSBET_F64 */ "f64", /* CSBET_SSIZE */ "ssize", /* CSBET_USIZE */ "usize", /* CSBET_DPTR */ "dptr", /* CSBET_DPTR_ALIASED */ "dptr_alised", /* CSBET_DPTR_THREADED */ "dptr_threaded", /* CSBET_FPTR */ "fptr" }; static const char typekind_prefixes[NUM_TYPEKINDS][4+1] = { /* CSBET_BOOL */ "b", /* CSBET_INT */ "i", /* CSBET_UINT */ "u", /* CSBET_LONG */ "lo", /* CSBET_ULONG */ "ul", /* CSBET_I8 */ "i", /* CSBET_U8 */ "u", /* CSBET_I16 */ "i", /* CSBET_U16 */ "u", /* CSBET_I32 */ "i", /* CSBET_U32 */ "u", /* CSBET_I64 */ "i", /* CSBET_U64 */ "u", /* CSBET_F32 */ "flt", /* CSBET_F64 */ "flt", /* CSBET_SSIZE */ "slen", /* CSBET_USIZE */ "len", /* CSBET_DPTR */ "p", /* CSBET_DPTR_ALIASED */ "dpa", /* CSBET_DPTR_THREADED */ "dpt", /* CSBET_FPTR */ "fptr" }; static void outtype(struct CgCtx *cg, const struct CsbeType *t) { assert(!t->is_unbound); if (t->is_array) { outf(cg, "[%u]", t->array_length); } if (t->is_struct) { unsigned fieldsleft; struct CsbeType *field; outf(cg, "{"); field = t->elemtypes; for (fieldsleft = t->num_fields; fieldsleft--; field++) { outtype(cg, field); if (fieldsleft) { outf(cg, ","); } } outf(cg, "}"); } else if (t->num_fields == 0) { outf(cg, "void"); } else { outf(cg, "%s", typekind_names[t->simple_kind]); } } static int is_signed(enum CsbeTypeKind typekind) { switch ((int)typekind) { case CSBET_INT: case CSBET_LONG: case CSBET_I8: case CSBET_I16: case CSBET_I32: case CSBET_I64: case CSBET_SSIZE: return 1; default: return 0; } } static const char *vartype(const struct FuncIR *fb, unsigned varnum) { const struct Var *var = &fb->vars[varnum]; unsigned flags = var->flags; if ((flags & CSBEVD_INTERN_FUNCPARAM) != 0) { return "arg"; /* Function parameter */ } else if ((flags & (CSBEVD_MULTI_READ|CSBEVD_MULTI_WRITE)) == 0) { /* Temporary */ if (var->type.is_array) return "tarr"; if (var->type.is_struct) return "tobj"; if (var->type.simple_kind == CSBET_DPTR) return "tp"; return "t"; } else if (var->type.is_array) { return "arr"; } else if (var->type.is_struct) { return "obj"; } else { return typekind_prefixes[var->type.simple_kind]; } } static void arg_enum(struct CgCtx *cg, const struct Op *op, int argind) { static const char branchtypes[][3+1] = { /* CSBEBT_Z */ "z", /* CSBEBT_NZ */ "nz", /* CSBEBT_LZ */ "lz", /* CSBEBT_GEZ */ "gez", /* CSBEBT_GZ */ "gz", /* CSBEBT_LEZ */ "lez" }; static const char branchbalances[][8+1] = { /* CSBEBB_UNKNOWN */ "_", /* CSBEBB_EQUAL */ "balanced", /* CSBEBB_NEVER */ "never", /* CSBEBB_ALWAYS */ "always", /* CSBEBB_SOMEWHAT_UNLIKELY */ "unlikely", /* CSBEBB_SOMEWHAT_LIKELY */ "likely" }; static const char selectbalances[][9+1] = { /* CSBESB_UNKNOWN */ "_", /* CSBESB_EQUAL */ "balanced", /* CSBESB_1_ONLY */ "leftonly", /* CSBESB_2_ONLY */ "rightonly", /* CSBESB_1_MAINLY */ "moreleft", /* CSBESB_2_MAINLY */ "moreright", }; static const char callmodes[][8+1] = { /* CSBECM_NORMAL */ "normal", /* CSBECM_TAILCALL */ "tailcall", }; static const char sizeofkinds[][14+1] = { /* CSBES_SIZEOF_ALIGNED */ "sizeof_aligned", /* CSBES_SIZEOF_INNER */ "sizeof_inner", /* CSBES_ALIGNOF */ "alignof", }; unsigned opc = op->op; unsigned i = op->opnd[argind]; const char *s; switch (opc) { case CSBEO_CONDJUMP: if (argind == 3) { s = branchtypes[i]; } else if (argind == 4) { s = branchbalances[i]; } else assert(0); break; case CSBEO_COMPAREJUMP: if (argind == 5) { s = branchtypes[i]; } else if (argind == 6) { s = branchbalances[i]; } else assert(0); break; case CSBEO_CONDTRAP: if (argind == 2) { s = branchtypes[i]; } else assert(0); break; case CSBEO_CALL_FUNCDEF: if (argind == 0) { s = callmodes[i]; } else assert(0); break; case CSBEO_CALL_FUNCVAR: if (argind == 0) { s = callmodes[i]; } else assert(0); break; case CSBEO_CHOICE: if (argind == 2) { s = selectbalances[i]; } else assert(0); break; case CSBEO_SIZEOF: if (argind == 0) { s = sizeofkinds[i]; } else assert(0); break; default: assert(0); } outb(cg, s, strlen(s)); } static void print_separator(struct CgCtx *cg, int need_comma) { const char *sep; int seplen; if (need_comma == 0) { sep = " "; seplen = 1; } else { sep = ", "; seplen = need_comma<2 ? 2 : 3; } outb(cg, sep, seplen); } struct OpArgPositions { int argind; /**< Index of current 32-bit argument */ int arg64ind; /**< Index of current 64-bit/ptr argument */ int need_comma; /**< Separator to print: 0:" ", 1:", ", 2:", " */ }; /** * Prints an operand of an IR operation, and increments the argument position. * Note that many operand types use multiple arguments in the op->opnd array! * Also note that some operands are internal/invisible. * * \param cg Codegen context object. * \param f Function currently being printed. * \param op IR operation. * \param argtype Operand type (OPERAND_*) * \param argpos Structure with argument positions. Incremented if consumed * \return 1 if an operand was printed, 0 if not. */ static int print_argument(struct CgCtx *cg, const struct FuncIR *f, const struct Op *op, unsigned char argtype, struct OpArgPositions *argpos) { int ai = argpos->argind; int a64i = argpos->arg64ind; int printed = 1; argtype = OPERAND_TYPE(argtype); if (argtype != OPERAND_FLAGS && argtype != OPERAND_PTR) { print_separator(cg, argpos->need_comma); } switch (argtype) { case OPERAND_VAR_IN: case OPERAND_VAR_OUT: case OPERAND_VAR_IN_OR_IMMED: { unsigned flags = op->opnd[ai]; if ((flags & CSBE_OPV_VARIABLE) != 0) { unsigned varnum = op->opnd[ai+1]; outf(cg, "%s%d", vartype(f, varnum), varnum); if ((flags & CSBE_OPV_DISCARD) != 0) { outf(cg, "~"); } } else if (is_signed(op->opnd[ai+1])) { outf(cg, "%" PRId64, (int64)op->opnd_large[a64i++].u64); } else { outf(cg, "%" PRIu64, op->opnd_large[a64i++].u64); } ai += 2; break; } case OPERAND_INDEX: outf(cg, "%d", op->opnd[ai]); ai++; break; case OPERAND_EBB: outf(cg, "ebb%d", op->opnd[ai]); ai++; break; case OPERAND_FUNCDEF: { unsigned funcdef_id = op->opnd[ai]; outfuncname(cg, &cg->csbe->funcdefs[funcdef_id]); ai++; break; } case OPERAND_DATADEF: { unsigned datadef_id = op->opnd[ai]; outdatadefname(cg, &cg->csbe->datadefs[datadef_id], datadef_id); ai++; break; } case OPERAND_ENUM: arg_enum(cg, op, ai); ai++; break; case OPERAND_TYPE: { const struct CsbeType *type = (const struct CsbeType *)op->opnd_large[a64i].ptr; assert(type); assert(type->is_defined); outtype(cg, type); a64i++; break; } case OPERAND_FLAGS: /* Used internally. Not printed */ ai++; printed = 0; break; case OPERAND_PTR: /* Used internally. Not printed */ a64i++; printed = 0; break; } argpos->argind = ai; argpos->arg64ind = a64i; if (printed) { argpos->need_comma = 1; } return printed; } /** Finds the first argument index of the destination operand */ static int get_dest_argind(const struct OpArgInfo *opinfo) { int i, ai = 0; /* Find argument index for destination operand */ for (i = 0; i < opinfo->num_args-1; i++) { unsigned char argtype = opinfo->args[i]; assert((argtype & OPERAND_IS_DEST) == 0); if (argtype == OPERAND_VAR_IN || argtype == OPERAND_VAR_OUT || argtype == OPERAND_VAR_IN_OR_IMMED) { ai += 2; } else if (argtype != OPERAND_TYPE && argtype != OPERAND_PTR) { ai++; } } return ai; } static void irdump_op(struct CgCtx *cg, const struct FuncIR *f, struct IrOpIter *ir_op_iter) { const struct Op *op = ir_op_iter->op; const struct OpArgInfo *opinfo = &op_argdefs[op->op]; int num_args = opinfo->num_args, i; struct OpArgPositions argpos = { 0, 0, 0 }; /* Print name of IR instruction */ outf(cg, num_args ? " %-10s" : " %s", op_names[op->op]+sizeof("CSBEO")); /* Print destination argument, if any */ if (num_args && (opinfo->args[num_args-1] & OPERAND_IS_DEST) != 0) { argpos.argind = get_dest_argind(opinfo); print_argument(cg, f, op, opinfo->args[num_args-1], &argpos); argpos.need_comma = 2; } /* Print normal/source arguments */ argpos.argind = 0; assert(argpos.arg64ind == 0); for (i = 0; i < num_args; i++) { if ((opinfo->args[i] & OPERAND_IS_DEST) != 0) { assert(i == num_args-1); break; } print_argument(cg, f, op, opinfo->args[i], &argpos); } outf(cg, "\n"); } static const char abinames[][4] = { "c", "any" }; static void dump_variables(struct CgCtx *cg, const struct FuncIR *fb) { unsigned i; for (i = 0; i < fb->num_vars; i++) { unsigned flags = fb->vars[i].flags; if ((flags & (CSBEVD_INTERN_USED|CSBEVD_INTERN_FUNCPARAM)) == 0) { continue; } assert((flags & CSBEVD_INTERN_DEFINED) != 0); outf(cg, " VAR %3s%-3u ", vartype(fb,i), i); outtype(cg, &fb->vars[i].type); if ((flags & CSBEVD_INTERN_FUNCPARAM) != 0) outf(cg, " PARAM"); if ((flags & CSBEVD_INTERN_HAS_ADDRESS) != 0) outf(cg, " ADDR"); if ((flags & (CSBEVD_MULTI_READ|CSBEVD_MULTI_WRITE)) == 0) { outf(cg, " TEMP"); } else if ((flags & CSBEVD_MULTI_WRITE) != 0) outf(cg, " MUTABLE"); if ((flags & CSBEVD_INTERN_USED) == 0) outf(cg, " UNUSED"); outf(cg, "\n"); } } static int irdump_codegen(struct CgCtx *cg, const struct CsbeFuncdef *f) { struct EbbIter ebb_iter; const struct FuncIR *fb = f->funcbody; unsigned ebb_num; unsigned i; outf(cg, "\n# ==================================================\n"); outf(cg, "abi %s\n", abinames[f->abi]); if ((f->flags & CSBEFD_NORETURN) != 0) outf(cg, "noreturn\n"); if ((f->flags & CSBEFD_OBJGLOBAL) != 0) outf(cg, "objglobal\n"); outf(cg, "function "); outfuncname(cg, f); outf(cg, "("); for (i = 0; i < f->num_params; i++) { outf(cg, " "); outtype(cg, &f->paramtypes[i]); } if (i != 0) outf(cg, " "); outf(cg, ") -> "); outtype(cg, f->returntype); outf(cg, "\n"); dump_variables(cg, fb); ebb_num = 0; EBB_ITER(ebb_iter, fb) { struct IrOpIter ir_op_iter; outf(cg, "ebb%d:\n", ebb_num); IR_OP_ITER(ir_op_iter, ebb_iter) { irdump_op(cg, fb, &ir_op_iter); } ebb_num++; } return 1; } /* Dummy functions */ static int irdump_gen_elf_plt(struct CgCtx *cg, uint32 got, uint32 gotoffs, uint32 pc) { (void)cg; (void)got; (void)gotoffs; (void)pc; return 1; } static int irdump_get_elf_plt_size(const struct CgCtx *cg) { (void)cg; return 0; } static int irdump_process_reloc(struct CgCtx *cg, struct BinWriter *bw, struct CgRelocEntry *re, uint32 fileoffs, uint32 addr, uint32 newaddr) { (void)cg; (void)bw; (void)re; (void)fileoffs; (void)addr; (void)newaddr; return 1; } static int irdump_gen_start_code(struct CgCtx *cg) { (void)cg; return 1; } static int irdump_gen_helper_code(struct CgCtx *cg) { (void)cg; return 1; } void irdump_get_codegen(struct Codegen *cgen) { cgen->has_endianness = irdump_has_endianness; cgen->has_feature_state = irdump_has_feature_state; cgen->initialize = irdump_initialize; cgen->codegen = irdump_codegen; /* Dummy functions */ cgen->gen_elf_plt = irdump_gen_elf_plt; cgen->get_elf_plt_size = irdump_get_elf_plt_size; cgen->process_reloc = irdump_process_reloc; cgen->gen_start_code = irdump_gen_start_code; cgen->gen_helper_code = irdump_gen_helper_code; }