/* output.c -- Output generation Copyright © 2022-2024 Samuel Lidén Borell Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include #include "include/csbe.h" #include "csbe_internal.h" #include "outformat/outformat_common.h" #include "codegen/codegen_common.h" int init_cgctx(struct CgCtx *cg, struct Csbe *csbe, struct ArchInfo *arch) { unsigned num_funcs; cg->csbe = csbe; cg->arch = arch; cgcode_reset(&cg->code); cg->start_helper_calls = NULL; cg->import_sym_count = 0; cg->import_func_count = 0; cg->export_sym_count = 0; cg->rodata_present = 0; cg->syminfos = NULL; cg->syminfo_nextptr = &cg->syminfos; cg->relocs = NULL; cg->reloc_nextptr = &cg->relocs; num_funcs = cg->csbe->num_funcs; if (num_funcs != 0) { cg->sym_by_func_id = allocp(cg->csbe, sizeof(struct SymInfo *)*cg->csbe->max_funcdef_count); ZCHK(cg->sym_by_func_id); cg->sym_by_data_id = allocp(cg->csbe, sizeof(struct SymInfo *)*cg->csbe->max_datadef_count); ZCHK(cg->sym_by_data_id); cg->internal_calls = alloc_dptr_list(csbe, num_funcs); ZCHK(cg->internal_calls); cg->func_offsets = allocp(cg->csbe, sizeof(uint32)*num_funcs); ZCHK(cg->func_offsets); memset(cg->func_offsets, INVALID_OFFSET_BYTE, sizeof(uint32)*num_funcs); } return 1; } static size_t minimum_output_size(struct ArchInfo *arch) { /* TODO arch->outformat, arch->wordbits */ (void)arch; return 4096*3 + 10000; /* for alignment padding + file headers */ } /** Returns the maximum space usage (in bytes) for library references. */ static size_t get_lib_refs_size(struct Csbe *csbe, struct ArchInfo *arch) { struct CsbeLibrary *lib; size_t max_size = 0; for (lib = csbe->imported_libs; lib; lib = lib->next) { max_size += arch->libbytes + lib->namelen+arch->libnamebytes; } return max_size; } int gather_func_info(struct CgCtx *cg, const struct FuncIR *funcbody) { unsigned num_ebb; cg->func_stacksize = 0; /* Computed in cg_stack_alloc_vars */ cg->jumps_to_epilogue = NULL; num_ebb = funcbody->num_ebb; cg->relative_jumps = alloc_dptr_list(cg->csbe, num_ebb); ZCHK(cg->relative_jumps); cg->ebb_offsets = allocp(cg->csbe, sizeof(uint32)*num_ebb); ZCHK(cg->ebb_offsets); memset(cg->ebb_offsets, INVALID_OFFSET_BYTE, sizeof(uint32)*num_ebb); return 1; } static enum CsbeErr generate_code(struct CgCtx *cg, struct Csbe *csbe, struct ArchInfo *arch, size_t *max_size_out) { struct CsbeFuncdef *funcdef; unsigned symbytes = 0; if (!init_cgctx(cg, csbe, arch)) { assert(csbe->error != CSBEE_OK); return csbe->error; } if (arch->outformat == CSBEOF_ELF_EXE) { assert(csbe->main_func != NULL); if (UNLIKELY(!arch->cgen.gen_start_code(cg))) { assert(csbe->error != CSBEE_OK); return csbe->error; } } cg->startcode_len = cg_get_position(cg); if (UNLIKELY(!arch->cgen.gen_helper_code(cg))) { assert(csbe->error != CSBEE_OK); return csbe->error; } for (funcdef = csbe->fd; funcdef; funcdef = funcdef->next) { const struct FuncIR *funcbody = funcdef->funcbody; struct SymInfo *sym; if (!funcbody && (funcdef->flags & CSBEFD_INTERN_USED) == 0) continue; /* Add symbol */ sym = new_sym(cg); if (UNLIKELY(!sym)) goto oom; sym->funcdef = funcdef; sym->datadef = NULL; sym->mode = SYMMODE_IMPORT; sym->name = funcdef->name; sym->namelen = funcdef->namelen; symbytes += arch->symbytes + sym->namelen*arch->symnamebytes; cg->sym_by_func_id[funcdef->func_id] = sym; if (!funcbody) { cg->import_sym_count++; cg->import_func_count++; continue; } if ((funcdef->flags & CSBEFD_INTERN_EXPORTED) != 0) { cg->export_sym_count++; sym->mode = SYMMODE_EXPORT; } else if ((funcdef->flags & CSBEFD_OBJGLOBAL) != 0) { sym->mode = SYMMODE_OBJGLOBAL; } else { sym->mode = SYMMODE_LOCAL; } /* Generate code */ if (UNLIKELY(!gather_func_info(cg, funcbody))) goto oom; sym->offset = cg->func_offsets[funcdef->func_id] = cg_get_position(cg); if (!arch->cgen.codegen(cg, funcbody->funcdef)) { return SET_ERROR(CSBEE_INTERNAL_ERROR); } sym->size = cg_get_position(cg) - sym->offset; } cg_lastchunk(cg); /* TODO data size + size of symbol tables */ *max_size_out = cg->code.size + symbytes; return CSBEE_OK; oom: csbe->error = CSBEE_OUT_OF_MEM; return CSBEE_OUT_OF_MEM; } static enum CsbeErr alloc_datadefs(struct CgCtx *cg, struct Csbe *csbe, struct ArchInfo *arch, size_t *max_size_out) { struct CsbeDatadef *datadef; size_t offset = 0; unsigned symbytes = 0; if (cg->arch->is_textual) goto end; for (datadef = csbe->dd; datadef; datadef = datadef->next) { struct SymInfo *sym; unsigned size, align; /* Add symbol */ sym = new_sym(cg); if (UNLIKELY(!sym)) goto oom; sym->funcdef = NULL; sym->datadef = datadef; sym->mode = SYMMODE_IMPORT; sym->name = datadef->name; sym->namelen = datadef->namelen; symbytes += arch->symbytes + sym->namelen*arch->symnamebytes; cg->sym_by_data_id[datadef->data_id] = sym; if ((datadef->flags & CSBEDD_EXTERNAL) != 0) { cg->import_sym_count++; cg->import_data_count++; continue; } if ((datadef->flags & CSBEDD_INTERN_EXPORTED) != 0) { cg->export_sym_count++; sym->mode = SYMMODE_EXPORT; } else if ((datadef->flags & CSBEDD_OBJGLOBAL) != 0) { sym->mode = SYMMODE_OBJGLOBAL; } else { sym->mode = SYMMODE_LOCAL; } if (UNLIKELY(!cg_get_type_size(cg, &datadef->type, &size, &align))) { assert(cg->csbe->error != CSBEE_OK); return cg->csbe->error; } cg->rodata_present = 1; /* FIXME this only works when there is a single data section (e.g. only .rodata) */ offset = UINT_ALIGN(offset, align); sym->offset = offset; sym->size = size; sym->align = align; offset += MAX(size, 1); } end: *max_size_out = offset + symbytes; return CSBEE_OK; oom: csbe->error = CSBEE_OUT_OF_MEM; return CSBEE_OUT_OF_MEM; } enum CsbeErr csbe_output(struct Csbe *csbe, int arch_id, unsigned char **buffer_out, size_t *size_out, CsbeBufferAllocFunc *allocfn, void *userparam) { struct ArchInfo *arch; struct CgCtx cg; struct BinWriter bw; unsigned char *buff; size_t max_size, funcdefs_size, datadefs_size; enum CsbeErr err; assert(arch_id >= 0 && (unsigned)arch_id < csbe->cfg.num_arch); assert(buffer_out != NULL); assert(size_out != NULL); arch = &csbe->archs[arch_id]; /* Process function definitions & generate code */ err = generate_code(&cg, csbe, arch, &funcdefs_size); if (err != CSBEE_OK) return err; /* Process data definitions. Computes sizes only */ err = alloc_datadefs(&cg, csbe, arch, &datadefs_size); if (err != CSBEE_OK) return err; max_size = minimum_output_size(arch) + get_lib_refs_size(csbe, arch) + funcdefs_size + datadefs_size; buff = allocfn ? allocfn(userparam, max_size) : malloc(max_size); if (!buff) return CSBEE_OUT_OF_MEM; *buffer_out = buff; memset(buff, 0, max_size); /* Make sure we don't leak sensitive info! */ binwriter_init(&bw, arch, buff, max_size); switch (arch->outformat) { case CSBEOF_RAW: err = raw_output(csbe, &cg, arch, &bw); break; CASE_ELF err = elf_output(csbe, &cg, arch, &bw); break; /* TODO more output output formats */ CASE_PE default: assert(0); return CSBEE_INTERNAL_ERROR; } *size_out = binwriter_get_position(&bw); return err; }