/* x86.c -- Code generator for i386 and x86_64 Copyright © 2022-2024 Samuel Lidén Borell Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "codegen_undef.h" #include "codegen_common.h" #include "../outformat/outformat_common.h" #include #include #define STACKALIGN 16 #define PREALLOC_INSN IPREALLOC(16) static int x86_has_endianness(enum CsbeEndianness endianness) { return endianness == CSBEEN_LITTLE_ENDIAN; } /*#define OPTIONAL32_ALWAYS64(state) (cpu == CSBEC_I386 ? 1 : (state)==1)*/ static int x86_has_feature_state(enum CsbeCpu cpu, int feature, int state) { (void)cpu; (void)state; switch (feature) { case CSBEF_COMMON_HARDFLOAT: /*return OPTIONAL32_ALWAYS64(state);*/ return 1; /* allow FP to be disabled for e.g. kernel-mode software */ } return 0; } /* XXX AH/BH/CH/DH is not allowed with a REX prefix... what does that mean in practice? and many instructions don't seem to support usage of high-byte registers. */ /* XXX there are no overlapping (high) 16 or 32 bit regs. so maybe WORD/DWORD/QWORD can be merged */ #define RS_BYTE 0 #define RS_WORD 1 #define RS_DWORD 2 #define RS_QWORD 3 #define RS_FLOAT 4 #define NUM_REGSETS 5 static const unsigned char x86_regset[NUM_TYPEKINDS] = { /* CSBET_BOOL = */ RS_BYTE, /* CSBET_INT = */ RS_DWORD, /* CSBET_UINT = */ RS_DWORD, /* CSBET_LONG = */ RS_DWORD, /* CSBET_ULONG = */ RS_DWORD, /* CSBET_I8 = */ RS_BYTE, /* CSBET_U8 = */ RS_BYTE, /* CSBET_I16 = */ RS_WORD, /* CSBET_U16 = */ RS_WORD, /* CSBET_I32 = */ RS_DWORD, /* CSBET_U32 = */ RS_DWORD, /* CSBET_I64 = */ RS_QWORD, /* CSBET_U64 = */ RS_QWORD, /* CSBET_F32 = */ RS_FLOAT, /* CSBET_F64 = */ RS_FLOAT, /* CSBET_SSIZE = */ RS_DWORD, /* CSBET_USIZE = */ RS_DWORD, /* CSBET_DPTR = */ RS_DWORD, /* CSBET_DPTR_ALIASED = */ RS_DWORD, /* CSBET_DPTR_THREADED = */ RS_DWORD, /* CSBET_FPTR = */ RS_DWORD }; /* TODO */ static const uint32 x86_elf_reloc_types_32[CG_NUM_RELOC_KINDS] = { /* CG_RK_FUNCCALL = */0, /* CG_RK_FUNCADDR = */0, /* CG_RK_DATAADDR = */0, /* CG_RK_DATALOAD = */0, /* CG_RK_GOTPC = */0 }; /* TODO */ static const uint32 x86_elf_reloc_types_64[CG_NUM_RELOC_KINDS] = { /* CG_RK_FUNCCALL = */0, /* CG_RK_FUNCADDR = */0, /* CG_RK_DATAADDR = */0, /* CG_RK_DATALOAD = */0, /* CG_RK_GOTPC = */0 }; static const unsigned short i386_typesizes[NUM_TYPEKINDS] = { /* CSBET_BOOL */ 1, /* CSBET_INT */ 4, /* CSBET_UINT */ 4, /* CSBET_LONG */ 4, /* CSBET_ULONG */ 4, /* CSBET_I8 */ 1, /* CSBET_U8 */ 1, /* CSBET_I16 */ 2, /* CSBET_U16 */ 2, /* CSBET_I32 */ 4, /* CSBET_U32 */ 4, /* CSBET_I64 */ 8, /* CSBET_U64 */ 8, /* CSBET_F32 */ 4, /* CSBET_F64 */ 8, /* CSBET_SSIZE */ 4, /* CSBET_USIZE */ 4, /* CSBET_DPTR */ 4, /* CSBET_DPTR_ALIASED */ 4, /* CSBET_DPTR_THREADED */ 4, /* CSBET_FPTR */ 4 }; static const unsigned short i386_typealign[NUM_TYPEKINDS] = { /* CSBET_BOOL */ 1, /* CSBET_INT */ 4, /* CSBET_UINT */ 4, /* CSBET_LONG */ 4, /* CSBET_ULONG */ 4, /* CSBET_I8 */ 1, /* CSBET_U8 */ 1, /* CSBET_I16 */ 2, /* CSBET_U16 */ 2, /* CSBET_I32 */ 4, /* CSBET_U32 */ 4, /* CSBET_I64 */ 4, /* CSBET_U64 */ 4, /* CSBET_F32 */ 4, /* CSBET_F64 */ 8, /* XXX or is this 4 ? */ /* CSBET_SSIZE */ 4, /* CSBET_USIZE */ 4, /* CSBET_DPTR */ 4, /* CSBET_DPTR_ALIASED */ 4, /* CSBET_DPTR_THREADED */ 4, /* CSBET_FPTR */ 4 }; static const unsigned short x86_64_typesizes[NUM_TYPEKINDS] = { /* CSBET_BOOL */ 1, /* CSBET_INT */ 4, /* CSBET_UINT */ 4, /* CSBET_LONG */ 8, /* CSBET_ULONG */ 8, /* CSBET_I8 */ 1, /* CSBET_U8 */ 1, /* CSBET_I16 */ 2, /* CSBET_U16 */ 2, /* CSBET_I32 */ 4, /* CSBET_U32 */ 4, /* CSBET_I64 */ 8, /* CSBET_U64 */ 8, /* CSBET_F32 */ 4, /* CSBET_F64 */ 8, /* CSBET_SSIZE */ 8, /* CSBET_USIZE */ 8, /* CSBET_DPTR */ 8, /* CSBET_DPTR_ALIASED */ 8, /* CSBET_DPTR_THREADED */ 8, /* CSBET_FPTR */ 8 }; static enum CsbeErr x86_initialize(struct CsbeConfig *cfg) { struct ArchInfo *arch; csbe_config_arch_feature(cfg, CSBEF_COMMON_HARDFLOAT, 1); arch = &cfg->last_arch->archinfo; memcpy(arch->regset, x86_regset, sizeof(arch->regset)); arch->num_regsets = NUM_REGSETS; arch->is_textual = 0; switch ((int)arch->cpu) { case CSBEC_I386: arch->wordbits = 32; arch->dptrbits = 32; arch->fptrbits = 32; arch->reg_size = 4; arch->tempreg_count = 3; /* eax,ecx,edx */ arch->savedreg_count = 2; /* esi,edi (excl. ebx,esp,ebp) */ arch->paramreg_count = 0; /* push/pop */ arch->elf_relocs_have_addends = 0; arch->elf_reloc_types = x86_elf_reloc_types_32; memcpy(arch->types_sizes, i386_typesizes, sizeof(arch->types_sizes)); memcpy(arch->types_align, i386_typealign, sizeof(arch->types_align)); break; case CSBEC_X86_64: arch->wordbits = 64; arch->dptrbits = 64; arch->fptrbits = 64; arch->reg_size = 8; arch->tempreg_count = 3; /* rax,r10,r11 */ arch->savedreg_count = 4; /* r12-r15 */ arch->paramreg_count = 6; /* rdi,rsi,rdx,rcx,r8,r9 */ if (arch->os != CSBEOS_WINDOWS) { arch->regset[CSBET_LONG] = RS_QWORD; } arch->regset[CSBET_SSIZE] = RS_QWORD; arch->regset[CSBET_USIZE] = RS_QWORD; arch->regset[CSBET_DPTR] = RS_QWORD; arch->regset[CSBET_DPTR_ALIASED] = RS_QWORD; arch->regset[CSBET_DPTR_THREADED] = RS_QWORD; arch->regset[CSBET_FPTR] = RS_QWORD; arch->elf_relocs_have_addends = 1; arch->elf_reloc_types = x86_elf_reloc_types_64; memcpy(arch->types_sizes, x86_64_typesizes, sizeof(arch->types_sizes)); /* alignment==size for all types */ memcpy(arch->types_align, x86_64_typesizes, sizeof(arch->types_align)); break; default: assert(0); } return CSBEE_OK; } #define LO(x) (((uint32)(x)) & 0xFF) #define HI(x) (((uint32)(x))>>8 & 0xFF) #define H3(x) (((uint32)(x))>>16 & 0xFF) #define H4(x) (((uint32)(x))>>24 & 0xFF) #define AX 0 #define CX 1 #define DX 2 #define BX 3 #define SP 4 #define BP 5 #define SI 6 #define DI 7 /* These use REX prefixes for the high-order bit */ #define R8 0 #define R9 1 #define R10 2 #define R11 3 #define R12 4 #define R13 5 #define R14 6 #define R15 7 /* Prefixes */ #define REXB 0x41 /**< adds high-order bit in ModRM-rm, SIB-base */ #define REXX 0x42 /**< adds high-order bit in SIB-index */ #define REXR 0x44 /**< adds high-order bit in ModRM-reg */ #define REXW 0x48 /**< selects 64-bit operand size */ #define OP_OVR 0x66 #define ADDR_OVR 0x67 /* ModRM byte */ #define MODRM(mod,reg,rm) ((mod)<<6 | (reg)<<3 | (rm)) enum Mod { PTR = 0, PTR_DISP_8 = 1, PTR_DISP_32 = 2, /**< 32 bit displacement, or 16 bit with 0x67 prefix */ DIRECT = 3 }; #define SIB(scale,index,base) ((scale)<<6 | (index)<<3 | (base)) #define SCALE_1X 0 #define SCALE_2X 1 #define SCALE_4X 2 #define SCALE_8X 3 #define NO_INDEX SP #define MOV_R_R(rd,rs) IEMIT2(0x89, MODRM(DIRECT,rs,rd)) #define XOR_R_R(rd,rs) IEMIT2(0x31, MODRM(DIRECT,rs,rd)) #define MOV_FROM_PTR_DISP8(rd,rs,sdisp) do { assert((rs)!=SP); \ IEMIT3(0x8B, MODRM(PTR_DISP_8,rd,rs), sdisp); } while (0) #define MOV_FROM_PTR_SIB(rd,scale,index,base) \ IEMIT3(0x8B, MODRM(PTR,rd,4), SIB(scale,index,base)) #define LEA_RIP_DISP32(rd,i) \ IEMIT6(0x8D, MODRM(0,rd,5), LO(i), HI(i), H3(i), H4(i)) #define ADD 0 #define OR 1 #define ADC 2 #define SBB 3 #define AND 4 #define SUB 5 #define XOR 6 #define CMP 7 #define OP_R8_I8(op,r,i) IEMIT3(0x80, MODRM(DIRECT,op,r), i) #define OP_R_I16(op,r,i) IEMIT4(0x81, MODRM(DIRECT,op,r), LO(i), HI(i)) #define OP_R_I32(op,r,i) IEMIT6(0x81, MODRM(DIRECT,op,r), LO(i), HI(i), H3(i), H4(i)) #define OP_R_I8(op,r,i) IEMIT3(0x83, MODRM(DIRECT,op,r), i) #define NOP IEMIT1(0x90) #define NOP2 IEMIT2(OP_OVR, 0x90) #define PUSH_R(r) IEMIT1(0x50+r) #define PUSH_I8(i) IEMIT2(0x6A, i) #define POP(r) IEMIT1(0x58+r) #define RETN(i) IEMIT3(0xC2, LO(i), HI(i)) #define RET IEMIT1(0xC3) #define INT3 IEMIT1(0xCC) #define JMP_R_I32(r,i) IEMIT6(0xFF, MODRM(PTR_DISP_32,4,r), LO(i), HI(i), H3(i), H4(i)) #define JMP_RIPREL(i) IEMIT6(0xFF, MODRM(PTR,4,5), LO(i), HI(i), H3(i), H4(i)) #define CALL_I32(i) IEMIT5(0xE8, LO(i), HI(i), H3(i), H4(i)) #define ADDRSLOT_DISP8 1 #define ADDRSLOT_DISP32 2 /** Fills in address offsets in forward jumps */ static void x86_process_addrslots(struct CgCtx *cg, struct CgAddrSlot *first) { struct CgAddrSlot *as; uint32 pos = cg_get_position(cg); for (as = first; as; as = as->next) { uint32 delta = pos - as->location; switch (as->addrtype) { case ADDRSLOT_DISP8: delta -= 2; assert(delta <= 0x7f || delta >= UINT_MAX-0x7f); cg_edit_bytes_le(cg, as, 1, delta&0xFF, 1); break; case ADDRSLOT_DISP32: delta -= 5; cg_edit_bytes_le(cg, as, 1, delta, 4); break; default: assert(0); } } } /*static void add_reg_immed(struct CgCtx *cg, unsigned reg, unsigned immed) { }*/ /* 64-bit, with stack size < 128 (gdb) disas /r main Dump of assembler code for function main: 0x0000000000001150 <+0>: 48 83 ec 20 sub $0x20,%rsp 0x0000000000001154 <+4>: 89 7c 24 0c mov %edi,0xc(%rsp) 0x0000000000001158 <+8>: 48 89 34 24 mov %rsi,(%rsp) 0x000000000000115c <+12>: 48 8d 54 24 1c lea 0x1c(%rsp),%rdx 0x0000000000001161 <+17>: 8b 44 24 0c mov 0xc(%rsp),%eax 0x0000000000001165 <+21>: 89 c6 mov %eax,%esi 0x0000000000001167 <+23>: bf 61 00 00 00 mov $0x61,%edi 0x000000000000116c <+28>: e8 b4 ff ff ff call 0x1125 0x0000000000001171 <+33>: 8b 44 24 1c mov 0x1c(%rsp),%eax 0x0000000000001175 <+37>: 83 c0 05 add $0x5,%eax 0x0000000000001178 <+40>: 48 83 c4 20 add $0x20,%rsp 0x000000000000117c <+44>: c3 ret 64-bit, with stack size < 4096 0x0000000000001159 <+0>: 48 81 ec 10 02 00 00 sub $0x210,%rsp 0x0000000000001160 <+7>: 89 7c 24 0c mov %edi,0xc(%rsp) 0x0000000000001164 <+11>: 48 89 34 24 mov %rsi,(%rsp) 0x0000000000001168 <+15>: 48 8d 54 24 10 lea 0x10(%rsp),%rdx 0x000000000000116d <+20>: 8b 44 24 0c mov 0xc(%rsp),%eax 0x0000000000001171 <+24>: 89 c6 mov %eax,%esi 0x0000000000001173 <+26>: bf 61 00 00 00 mov $0x61,%edi 0x0000000000001178 <+31>: e8 a8 ff ff ff call 0x1125 0x000000000000117d <+36>: 0f b6 44 24 51 movzbl 0x51(%rsp),%eax 0x0000000000001182 <+41>: 0f be c0 movsbl %al,%eax 0x0000000000001185 <+44>: 83 c0 05 add $0x5,%eax 0x0000000000001188 <+47>: 48 81 c4 10 02 00 00 add $0x210,%rsp 0x000000000000118f <+54>: c3 ret */ static int x86_prologue(struct CgCtx *cg) { /* TODO code alignment */ if (cg->func_stacksize != 0) { assert(cg->func_stacksize < 4096); /* TODO above this size, each page has to be touched */ cg_align_stacksize(cg, 0, STACKALIGN); if (cg->arch->wordbits == 64) IEMIT1(REXW); /* TODO check prologue on 32-bit x86 */ if (cg->func_stacksize <= 127) { OP_R_I8(SUB, SP, cg->func_stacksize); } else { OP_R_I32(SUB, SP, cg->func_stacksize); } } return 1; oom: return 0; } static int x86_epilogue(struct CgCtx *cg) { /* TODO different calling conventions... */ if (cg->func_stacksize != 0) { assert(cg->func_stacksize < 4096); /* TODO above this size, each page has to be touched */ if (cg->arch->wordbits == 64) IEMIT1(REXW); /* TODO check epilogue on 32-bit x86 */ if (cg->func_stacksize <= 127) { OP_R_I8(ADD, SP, cg->func_stacksize); } else { OP_R_I32(ADD, SP, cg->func_stacksize); } } /* caller does the arg cleanup */ RET; return 1; oom: return 0; } static int x86_codegen(struct CgCtx *cg, const struct CsbeFuncdef *f) { struct EbbIter ebb_iter; struct FuncIR *fb = f->funcbody; if (!x86_prologue(cg)) goto oom; EBB_ITER(ebb_iter, fb) { struct IrOpIter ir_op_iter; /* TODO mark as relocation target */ IR_OP_ITER(ir_op_iter, ebb_iter) { /* TODO */ /*if (!x86_ir_op(cg, fb, &ir_op_iter)) return 0;*/ } } if (!x86_epilogue(cg)) goto oom; return 1; oom: return 0; } static int x86_gen_elf_plt(struct CgCtx *cg, uint32 got, uint32 gotoffs, uint32 pc) { if (cg->arch->wordbits == 32) { JMP_R_I32(BX, gotoffs); } else { uint32 offs = (got+gotoffs) - (pc+6); JMP_RIPREL(offs); } NOP2; return 1; oom: return 0; } static int x86_get_elf_plt_size(const struct CgCtx *cg) { (void)cg; return 8; } static int x86_process_reloc(struct CgCtx *cg, struct BinWriter *bw, struct CgRelocEntry *re, uint32 fileoffs, uint32 addr, uint32 newaddr) { uint32 delta = newaddr - addr; uint32 value; (void)cg; switch (re->kind) { case CG_RK_FUNCCALL: fileoffs += 1; value = delta-5; break; case CG_RK_FUNCADDR: if (cg->arch->wordbits == 32) { fileoffs += 2; value = newaddr - cg->got_vaddr; } else { fileoffs += 3; value = delta - 6 -1; /* not sure why it needs -1 */ } break; case CG_RK_DATAADDR: assert(0);/* TODO */ break; case CG_RK_GOTPC: fileoffs += 2; value = delta; break; default: assert(0); } binwriter_seekto(bw, fileoffs); out32(bw, value); return 1; } static int x86_gen_start_code(struct CgCtx *cg) { const struct ArchInfo *arch = cg->arch; unsigned main_id = cg->csbe->main_func->func_id; if (arch->outformat == CSBEOF_ELF_EXE) { if (arch->wordbits == 32) { XOR_R_R(BP, BP); POP(SI); /* Get argc */ MOV_R_R(CX, SP); /* Save argv */ OP_R_I8(AND, SP, 0xF0); /* Align stack */ PREALLOC_INSN; ZCHK(cg_add_addrslot(cg, &cg->start_helper_calls, ADDRSLOT_DISP32)); CALL_I32(0); /* Get pc */ PREALLOC_INSN; ZCHK(cg_add_reloc(cg, CG_RK_GOTPC, 0, 0)); OP_R_I32(ADD, BX, 0); /* Compute GOT address */ /* Call __libc_start_main */ PUSH_R(SP); /* stack_end */ PUSH_I8(0); /* rtld_fini (not used) */ PUSH_I8(0); /* fini (not used) */ PUSH_I8(0); /* init (not used) */ PUSH_R(CX); /* argv */ PUSH_R(SI); /* argc */ MOV_R_R(CX, BX); PREALLOC_INSN; ZCHK(cg_add_reloc(cg, CG_RK_FUNCADDR, main_id, 0)); OP_R_I32(ADD, CX, 0); PUSH_R(CX); /* main addr */ PREALLOC_INSN; ZCHK(cg_add_reloc(cg, CG_RK_FUNCCALL, FUNCID_LIBC_INIT, 0)); CALL_I32(0); NOP; INT3; } else { XOR_R_R(BP, BP); IEMIT1(REXB|REXW);MOV_R_R(R9, DX); POP(SI); /* Get argc */ IEMIT1(REXW);MOV_R_R(DX, SP); /* Save argv */ IEMIT1(REXW);OP_R_I8(AND, SP, 0xF0); /* Align stack */ /* Call __libc_start_main. Parameters go in rdi,rsi,rdx,rcx,r8,r9 according to SysV ABI */ PUSH_I8(0); /* no idea what this is */ PUSH_R(SP); /* stack_end */ IEMIT1(REXB|REXR|REXW);XOR_R_R(R9, R9); /* rtld_fini (not used) */ IEMIT1(REXB|REXR|REXW);XOR_R_R(R8, R8); /* fini (not used) */ XOR_R_R(CX, CX); /* init (not used) */ /*rdx = argv */ /*rsi = argc */ PREALLOC_INSN; ZCHK(cg_add_reloc(cg, CG_RK_FUNCADDR, main_id, 0)); IEMIT1(REXW);LEA_RIP_DISP32(DI, 0); /* main addr */ PREALLOC_INSN; ZCHK(cg_add_reloc(cg, CG_RK_FUNCCALL, FUNCID_LIBC_INIT, 0)); CALL_I32(0); NOP; INT3; } } else /* TODO */ { /* TODO for PE: - align stack - call main addr (which will have argv == NULL) */ NOP; INT3; /* test */ } return 1; oom: return 0; } static int x86_gen_helper_code(struct CgCtx *cg) { if (cg->arch->wordbits == 32) { struct SymInfo *sym; sym = cg_helper_func_start(cg, "__x86.get_pc_thunk.bx"); cg->helper_offset = cg_get_position(cg); x86_process_addrslots(cg, cg->start_helper_calls); if (!sym) return 0; MOV_FROM_PTR_SIB(BX, SCALE_1X,NO_INDEX,SP); RET; cg_helper_func_end(cg, sym); } return 1; oom: return 0; } void x86_get_codegen(struct Codegen *cgen) { cgen->has_endianness = x86_has_endianness; cgen->has_feature_state = x86_has_feature_state; cgen->initialize = x86_initialize; cgen->codegen = x86_codegen; cgen->gen_elf_plt = x86_gen_elf_plt; cgen->get_elf_plt_size = x86_get_elf_plt_size; cgen->process_reloc = x86_process_reloc; cgen->gen_start_code = x86_gen_start_code; cgen->gen_helper_code = x86_gen_helper_code; }