/* minicg.c -- Minimalistic code generator Copyright © 2017 Samuel Lidén Borell Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "../backend.h" #include "../misc.h" #include "../platform.h" #include #include typedef enum { Prepare = 0x01, GenCode = 0x02, GenConstData = 0x04, GenRWData = 0x08 } ProcessingFlags; typedef enum { ELF = 0 } ExecFormat; typedef enum { AARCH64 = 0, LM32 = 1, OR1K = 2, PPC64EL = 3, RV32I = 4, RV64GC = 5, X86 = 6, X86_64 = 7 } Arch; typedef struct { FILE *file; LRLCtx *ctx; const LRLBackendOptions *options; ExecFormat execformat; Arch arch; int debug; /* Sections. Sections with little data can be merged with other sections, for example: constzero -> code/constinit constinit -> code varzero -> varinit varuninit -> varinit/varzero */ /* TODO */ /*size_t constzero_size; size_t constinit_size; size_t varzero_size; size_t varinit_size; size_t varuninit_size;*/ /* ELF stuff */ size_t interp_start, interp_size; size_t strtab_start, strtab_size; size_t text_start, text_size; size_t rwdata_start, rwdata_size; /*size_t constzero_start; size_t constinit_start; size_t varzero_start; size_t varinit_start; size_t varuninit_start;*/ size_t dynamic_start, dynamic_size; size_t hash_start, hash_size; size_t dynsym_start, dynsym_size, dynsym_count; size_t symtab_start, symtab_size, symtab_count; size_t name_strtab, name_text, name_dynsym, name_symtab, name_hash, name_dynamic; } MiniCGCtx; #define CUC const unsigned char #define LE 1 #define BE 2 #define B32 1 #define B64 2 #define TODO 0 /* AA64 LM32 OR1K PPC64 RV32 RV64 x86 x86-64 */ static CUC elfmach[] = { 0xB7, 0x8A, 0x5C, 0x33, 0xF3, 0xF3, 0x03, 0x3E }; /* TODO does any of these need a non-zero elfabi ? */ static CUC endian[] = { LE, BE, BE, LE, LE, LE, LE, LE }; static CUC bits[] = { B64, B32, B32, B64, B32, B64, B32, B64 }; static CUC elfflags[] = { 0, 0, 0, 0, 0x0, 0x5, 0, 0 }; /* TODO check these for AArch64, LM32, OR1K, PW64 */ static CUC instralign[] = { 4, 4, 4, 4, 2, 4, 16, 16 }; /* TODO check the values for other platforms than x86 and x86-64 */ static const unsigned pagealign[] = { 0x200000, 0x1000, 0x1000, 0x1000, 0x200000, 0x200000, 0x1000, 0x200000 /* TODO check the values for other platforms than x86 and x86-64 */ }; static const unsigned char elf_mag[4] = { 0x7F, 'E', 'L', 'F' }; static const unsigned char elf_ver = 1; static const unsigned char elf_os = 0; static const unsigned char elf_abi = 0; static const unsigned char elf_pad7[7]; static const unsigned char padding[16]; /* generic instruction, used to align the .text section */ #define ELF_EXEC 0x1 #define ELF_WRITE 0x2 #define ELF_READ 0x4 #define SH_WRITE 0x1 #define SH_ALLOC 0x2 #define SH_EXEC 0x4 #define STI_LOCAL 0x00 #define STI_GLOBAL 0x10 #define STI_WEAK 0x20 #define STI_NOTYPE 0x00 #define STI_OBJECT 0x01 #define STI_FUNC 0x02 #define STI_SECTION 0x03 #define STI_FILE 0x04 #define DT_NULL 0 #define DT_NEEDED 1 #define DT_PLTRELSZ 2 #define DT_PLTGOT 3 #define DT_HASH 4 #define DT_STRTAB 5 #define DT_SYMTAB 6 #define DT_STRSZ 10 #define DT_SYMENT 11 #define DT_SONAME 14 #define DT_SYMBOLIC 16 #define SECTION_CODE 1 #define SECTION_CONSTZERODATA 2 #define SECTION_CONSTINITDATA 3 #define SECTION_RWINITDATA 4 #define SECTION_RWUNDEFDATA 5 #define SECTION_STRUCTMEMBER 6 #define SECTION_REGPARAM 7 #define SECTION_STACKPARAM 8 #define SECTION_STACKRETURN 9 #define SECTION_STACKVAR 10 #define set_data_addr(def, secid, offs) \ ((def).flags |= LRL_DeFl_Internal_DefinedByBackend, \ (def).section_id = (secid), \ (def).offset = (offs)); static int write(MiniCGCtx *mcg, const unsigned char *buff, size_t size) { size_t numwritten = fwrite(buff, size, 1, mcg->file); if (numwritten != 1) { perror("writing to output file"); return 2; } return 0; } #define WRITE(buff) do { \ int wrerr = write((mcg), (&(buff)), sizeof(buff)); \ if (wrerr) return wrerr; \ } while (0) #define WRITE_ARR(buff) do { \ int wrerr = write((mcg), (buff), sizeof(buff)); \ if (wrerr) return wrerr; \ } while (0) #define WRITE_LIMIT(buff, size) do { \ int wrerr = write((mcg), (buff), (size)); \ if (wrerr) return wrerr; \ } while (0) #define WRITE8(val) do { \ unsigned char wrbuff = (val); \ int wrerr = write((mcg), (&wrbuff), 1); \ if (wrerr) return wrerr; \ } while (0) static int write16(MiniCGCtx *mcg, unsigned short val) { unsigned char wrbuff[2]; if (endian[mcg->arch] == LE) { wrbuff[0] = val; wrbuff[1] = val >> 8; } else { wrbuff[0] = val >> 8; wrbuff[1] = val; } return write(mcg, wrbuff, sizeof(wrbuff)); } #define WRITE16(val) do { \ int wrerr = write16((mcg), (val)); \ if (wrerr) return wrerr; \ } while (0) static int write32(MiniCGCtx *mcg, unsigned int val) { unsigned char wrbuff[4]; if (endian[mcg->arch] == LE) { wrbuff[0] = val; wrbuff[1] = val >> 8; wrbuff[2] = val >> 16; wrbuff[3] = val >> 24; } else { wrbuff[0] = val >> 24; wrbuff[1] = val >> 16; wrbuff[2] = val >> 8; wrbuff[3] = val; } return write(mcg, wrbuff, sizeof(wrbuff)); } #define WRITE32(val) do { \ int wrerr = write32((mcg), (val)); \ if (wrerr) return wrerr; \ } while (0) static int write64(MiniCGCtx *mcg, unsigned int high, unsigned int low) { unsigned char wrbuff[8]; if (endian[mcg->arch] == LE) { wrbuff[0] = low; wrbuff[1] = low >> 8; wrbuff[2] = low >> 16; wrbuff[3] = low >> 24; wrbuff[4] = high; wrbuff[5] = high >> 8; wrbuff[6] = high >> 16; wrbuff[7] = high >> 24; } else { wrbuff[0] = high >> 24; wrbuff[1] = high >> 16; wrbuff[2] = high >> 8; wrbuff[3] = high; wrbuff[4] = low >> 24; wrbuff[5] = low >> 16; wrbuff[6] = low >> 8; wrbuff[7] = low; } return write(mcg, wrbuff, sizeof(wrbuff)); } #define WRITE64(high, low) do { \ int wrerr = write64((mcg), (high), (low)); \ if (wrerr) return wrerr; \ } while (0) static int write_align(MiniCGCtx *mcg, size_t align) { size_t pos = ftell(mcg->file); if ((pos & (align-1)) != 0) { size_t extra = (size_t)align - (pos & (align-1)); WRITE_LIMIT(padding, extra); } return 0; } #define WRITE_ALIGN(align) do { \ int wrerr = write_align((mcg), (align)); \ if (wrerr) return wrerr; \ } while (0) static int write_elf_ph(MiniCGCtx *mcg, unsigned phtype, unsigned flags, unsigned offs, unsigned vaddrh, unsigned vaddrl, unsigned paddrh, unsigned paddrl, unsigned filesize, unsigned memsize, unsigned align) { WRITE32(phtype); if (bits[mcg->arch] != B64) { WRITE32(offs); WRITE32(vaddrl); WRITE32(paddrl); WRITE32(filesize); WRITE32(memsize); WRITE32(flags); WRITE32(align); } else { WRITE32(flags); WRITE64(0, offs); WRITE64(vaddrh, vaddrl); WRITE64(paddrh, paddrl); WRITE64(0, filesize); WRITE64(0, memsize); WRITE64(0, align); } return 0; } #define WRITE_PH(ty, fl, of, vh, vl, ph, pl, fs, ms, al) do { \ int wrerr = write_elf_ph((mcg), (ty), (fl), (of), (vh), (vl), \ (ph), (pl), (fs), (ms), (al)); \ if (wrerr) return wrerr; \ } while (0) static int write_strtab(MiniCGCtx *mcg, const char *str, size_t *offsvar) { size_t wrlen = strlen(str)+1; if (fwrite((str), wrlen, 1, mcg->file) != 1) { return 2; } if (offsvar) { *offsvar = mcg->strtab_size; } mcg->strtab_size += wrlen; return 0; } #define WRITE_STRTAB(str, offsvar) do { \ int wrerr = write_strtab((mcg), (str), (offsvar)); \ if (wrerr) return wrerr; \ } while (0) #define SEEK(pos) do { \ if (fseek(mcg->file, (pos), SEEK_SET) == -1) { \ perror("seeking in output file"); \ return 2; \ } \ } while (0) static const char *get_elf_interp(MiniCGCtx *mcg) { switch (mcg->arch) { case AARCH64: return "/lib/ld-linux-aarch64.so.1"; case LM32: case OR1K: case PPC64EL: case RV32I: case RV64GC: fail("minicg_archnotimpl"); return NULL; case X86: return "/lib/ld-linux.so.2"; case X86_64: return "/lib64/ld-linux-x86-64.so.2"; default: fail("minicg_badarch"); } } static int write_elf_header(MiniCGCtx *mcg) { unsigned short type; int ehsize, phsize, num_ph, is_64; const char *interp; is_64 = (bits[mcg->arch] == B64); /* Write ELF header */ WRITE_ARR(elf_mag); WRITE_LIMIT(&bits[mcg->arch], 1); WRITE_LIMIT(&endian[mcg->arch], 1); WRITE(elf_ver); WRITE(elf_os); WRITE(elf_abi); WRITE_ARR(elf_pad7); switch (mcg->options->op) { case LRL_BO_MakeObject: type = 1; /* relocatable file */ num_ph = 0; /*num_sh = 3;*/ break; case LRL_BO_MakeExec: case LRL_BO_DumpIR: type = 3; /* executable file (2 = non-PIC, 3=PIC/PIE) */ num_ph = 6; /*num_sh = 4;*/ break; default: fail("minicg_badswitch"); } WRITE16(type); WRITE16(elfmach[mcg->arch]); WRITE32(1); /* elf version */ if (!is_64) { WRITE32(0/*num_ph?0x1000:0*/); /* entry point */ WRITE32(num_ph?0x34:0); /* program header offset */ WRITE32(0x0); /* section header offset (filled in later) */ WRITE32(elfflags[mcg->arch]); WRITE16(0x34); /* ehsize */ WRITE16(num_ph?32:0); /* program header entry size */ WRITE16(num_ph); /* number of program headers */ WRITE16(40); /* section header entry size */ ehsize = 0x34; } else { WRITE64(0, 0/*num_ph?0x1000:0*/); /* entry point */ WRITE64(0, num_ph?0x40:0); /* program header offset */ WRITE64(0, 0x0); /* section header offset (filled in later) */ WRITE32(elfflags[mcg->arch]); WRITE16(0x40); /* ehsize */ WRITE16(num_ph?56:0); /* program header entry size */ WRITE16(num_ph); /* number of program headers */ WRITE16(64); /* section header entry size */ ehsize = 0x40; } #if 0 WRITE16(num_sh); /* number of section headers */ WRITE16(type == 2 || type == 3 ? 2 : 1); /* section with section names */ #endif WRITE16(0); /* number of section headers (filled in later) */ WRITE16(0); /* section with section names (filled in later) */ /* Write Program Headers if generatic an executable or library */ if (type == 2 || type == 3) { phsize = is_64 ? 56*num_ph : 32*num_ph; WRITE_PH(0x6/* PHDR */, ELF_READ|ELF_EXEC, ehsize, 0, ehsize, 0, ehsize, phsize, phsize, !is_64?4:8); mcg->interp_start = ehsize+phsize; interp = get_elf_interp(mcg); mcg->interp_size = strlen(interp)+1; WRITE_PH(0x3/* INTERP */, ELF_READ, mcg->interp_start, 0, mcg->interp_start, 0, mcg->interp_start, mcg->interp_size, mcg->interp_size, 1); /* LOAD (text) segment */ WRITE_PH(0x1/* LOAD */, ELF_READ|ELF_EXEC, 0, 0, 0, 0, 0, /* relative addresses (PIC binary) */ 1234, 1234, pagealign[mcg->arch]); /* size is filled in later */ /* LOAD (rw-data) segment */ WRITE_PH(0x1/* LOAD */, ELF_READ|ELF_WRITE, 0, 0, 0, 0, 0, 1234, 1234, pagealign[mcg->arch]); /* size is filled in later */ /* DYNAMIC segment */ WRITE_PH(2/* DYNAMIC */, ELF_READ|ELF_WRITE, 0, 0, 0, 0, 0, /* addr and size are filled in later */ 1234, 1234, !is_64?4:8); /* GNU_STACK segment (make stack non-executable */ WRITE_PH(0x6474e551/* GNU_STACK */, ELF_READ|ELF_WRITE, 0, 0, 0, 0, 0, 0, 0, 0); /* TODO RELRO with bindnow */ /* Write Interpreter segment */ WRITE_LIMIT((unsigned char*)interp, mcg->interp_size); /* TODO generate DYNAMIC header etc. */ /* Padding before the code segment */ WRITE_ALIGN(instralign[mcg->arch]); } return 0; } static int write_elf_sh(MiniCGCtx *mcg, unsigned nameoffs, unsigned type, unsigned flags, unsigned vaddrh, unsigned vaddrl, unsigned offs, unsigned filesize, unsigned link, unsigned info, unsigned align, unsigned entsize) { WRITE32(nameoffs); WRITE32(type); if (bits[mcg->arch] != B64) { WRITE32(flags); WRITE32(vaddrl); WRITE32(offs); WRITE32(filesize); WRITE32(link); WRITE32(info); WRITE32(align); WRITE32(entsize); } else { WRITE64(0,flags); WRITE64(vaddrh, vaddrl); WRITE64(0, offs); WRITE64(0, filesize); WRITE32(link); WRITE32(info); WRITE64(0, align); WRITE64(0, entsize); } return 0; } #define WRITE_SH(nm, ty, fl, vh, vl, of, fs, li, in, al, es) do { \ int wrerr = write_elf_sh((mcg), (nm), (ty), (fl), (vh), (vl), \ (of), (fs), (li), (in), (al), (es)); \ if (wrerr) return wrerr; \ num_sh++; \ } while (0) static int write_elf_sym(MiniCGCtx *mcg, unsigned name, unsigned val, unsigned size, unsigned char info, unsigned char other, unsigned short sh_index) { if (bits[mcg->arch] != B64) { WRITE32(name); WRITE32(val); WRITE32(size); WRITE8(info); WRITE8(other); WRITE16(sh_index); } else { WRITE32(name); WRITE8(info); WRITE8(other); WRITE16(sh_index); WRITE64(0, val); WRITE64(0, size); } return 0; } #define WRITE_SYM(name, val, size, info, other, shidx) do { \ int wrerr = write_elf_sym((mcg), (name), (val), (size), \ (info), (other), (shidx)); \ if (wrerr) return wrerr; \ } while (0) static int write_elf_dyn(MiniCGCtx *mcg, unsigned type, unsigned val) { if (bits[mcg->arch] != B64) { WRITE32(type); WRITE32(val); } else { WRITE64(0, type); WRITE64(0, val); } return 0; } #define WRITE_DYN(type, val) do { \ int wrerr = write_elf_dyn((mcg), (type), (val)); \ if (wrerr) return wrerr; \ } while (0) static int write_elf_footer(MiniCGCtx *mcg) { int is_64 = (bits[mcg->arch] == B64); int num_sh; size_t shstart, strtab_idx, dynsym_idx, symtab_idx; size_t str_start, str_exit, str_libc; /* XXX test stuff */ size_t load_text_size; size_t load_data_size; size_t load_data_start; /* Write string table (section names, symbols, etc.) */ mcg->strtab_start = ftell(mcg->file); mcg->strtab_size = 1; WRITE8(0); if (mcg->options->op == LRL_BO_MakeExec) { WRITE_STRTAB(".interp", NULL); } WRITE_STRTAB(".strtab", &mcg->name_strtab); WRITE_STRTAB(".text", &mcg->name_text); WRITE_STRTAB(".dynsym", &mcg->name_dynsym); WRITE_STRTAB(".symtab", &mcg->name_symtab); WRITE_STRTAB(".hash", &mcg->name_hash); WRITE_STRTAB(".dynamic", &mcg->name_dynamic); /* TODO write dynamic linker stuff here also! */ WRITE_STRTAB("_start", &str_start); WRITE_STRTAB("libc.so.6", &str_libc); WRITE_STRTAB("exit", &str_exit); /* Write dynsym section */ WRITE_ALIGN(!is_64 ? 4 : 8); mcg->dynsym_start = ftell(mcg->file); mcg->dynsym_count = 1; mcg->dynsym_size = (!is_64?16:24)*mcg->dynsym_count; WRITE_SYM(0, 0, 0, 0, 0, 0); /* TODO */ /* Write symtab section */ WRITE_ALIGN(!is_64 ? 4 : 8); mcg->symtab_start = ftell(mcg->file); mcg->symtab_count = 1; mcg->symtab_size = (!is_64?16:24)*mcg->dynsym_count; /*WRITE_SYM(str_start, mcg->text_start, 0, STI_GLOBAL|STI_FUNC, 0, 2);*/ /* FIXME might be in sh 1 also */ WRITE_SYM(0, 0, 0, 0, 0, 0); /* TODO */ /* Write hash section */ WRITE_ALIGN(!is_64 ? 4 : 8); mcg->hash_start = ftell(mcg->file); mcg->hash_size = 4; WRITE32(0); /* number of buckets */ WRITE32(0); /* number of chains */ /* TODO */ /* End of text segment */ WRITE32(0); /* it seems that readelf doesn't like when an empty .hash is at the end of a segment */ WRITE_ALIGN(!is_64 ? 4 : 8); mcg->text_size = ftell(mcg->file) - mcg->text_start; load_text_size = mcg->text_start + mcg->text_size; load_data_start = pagealign[mcg->arch];/* TODO will need more space, if the data section is larger than 1 page */ /* TODO need to split function, because the data segments should go here */ mcg->rwdata_start = ftell(mcg->file); mcg->rwdata_size = 0; WRITE_ALIGN(!is_64 ? 4 : 8); /* TODO uninitialized RW data could be placed in the beginning of the LOAD segment, which contains junk from te last page of the text section */ /* Write dynamic segment/section (this must be inside the RW LOAD segment) */ /* TODO this is apparently usually the first section in the data segment */ mcg->dynamic_start = ftell(mcg->file); WRITE_DYN(DT_NEEDED, str_libc); /* TODO library dependencies should not be hardcoded */ /*WRITE_DYN(DT_PLTGOT, xxx);*/ WRITE_DYN(DT_HASH, mcg->hash_start); WRITE_DYN(DT_STRTAB, mcg->strtab_start); WRITE_DYN(DT_SYMTAB, mcg->symtab_start); WRITE_DYN(DT_STRSZ, mcg->strtab_size); WRITE_DYN(DT_SYMENT, !is_64?16:24); WRITE_DYN(DT_NULL, 0); mcg->dynamic_size = ftell(mcg->file) - mcg->dynamic_start; load_data_size = (mcg->dynamic_start + mcg->dynamic_size) - mcg->rwdata_start; /* Section headers - LOAD (executable) segment */ shstart = ftell(mcg->file); num_sh = 0; WRITE_SH(0, 0x0/* NULL */, 0, 0, 0, 0, 0, 0, 0, 0, 0); if (mcg->options->op == LRL_BO_MakeExec) { WRITE_SH(1, 0x1/* PROGBITS */, SH_ALLOC, 0, mcg->interp_start, mcg->interp_start, mcg->interp_size, 0, 0, 1, 0); } WRITE_SH(mcg->name_text, 0x1/* PROGBITS */, SH_ALLOC|SH_EXEC, 0, mcg->text_start, mcg->text_start, mcg->text_size, 0, 0, 1, instralign[mcg->arch]); strtab_idx = num_sh; WRITE_SH(mcg->name_strtab, 0x3/* STRTAB */, SH_ALLOC, /* SH_ALLOC is needed for dynsyms, section header names don't need it */ 0, 0, mcg->strtab_start, mcg->strtab_size, 0, 0, 1, 0); dynsym_idx = num_sh; WRITE_SH(mcg->name_dynsym, 11/* DYNSYM */, SH_ALLOC, 0, mcg->dynsym_start, mcg->dynsym_start, mcg->dynsym_size, strtab_idx, mcg->dynsym_count, !is_64?4:8, !is_64?16:24); symtab_idx = num_sh; WRITE_SH(mcg->name_symtab, 2/* SYMTAB */, SH_ALLOC, 0, mcg->symtab_start, mcg->symtab_start, mcg->symtab_size, strtab_idx, mcg->symtab_count, !is_64?4:8, !is_64?16:24); WRITE_SH(mcg->name_hash, 5/* HASH */, SH_ALLOC, 0, mcg->hash_start, mcg->hash_start, mcg->hash_size, dynsym_idx, 0, !is_64?4:8, 0); /* Section headers - LOAD (read-write or read-only) segments */ /* TODO data sections (must come after the sections above, which go into the text segment) SHT_PROGBITS = initialized data? SHT_NOBITS = uninitialized data? */ /* Section headers - DYNAMIC segment */ WRITE_SH(mcg->name_dynamic, 6/* DYNAMIC */, SH_ALLOC|SH_WRITE, /* TODO some archs do not need this to be writable */ 0, mcg->dynamic_start+load_data_start, mcg->dynamic_start, mcg->dynamic_size, strtab_idx, 0, !is_64?4:8, 0); /* Fill in number of sections in the ehdr */ SEEK(!is_64 ? 0x30 : 0x3C); WRITE16(num_sh); /* number of section headers */ WRITE16(strtab_idx); /* string table index */ if (mcg->options->op == LRL_BO_MakeExec) { if (!is_64) { /* Fill in the entry point */ SEEK(0x18); WRITE32(mcg->text_start); /* Fill in the section header offset */ SEEK(0x20); WRITE32(shstart); /* Fill in size in the LOAD (code) phdr */ SEEK(0x34 + 32*2 + 0x10); WRITE32(load_text_size); /* size in file */ WRITE32(load_text_size); /* size in memory */ /* Fill in addr and size in the LOAD (rw-data) phdr */ SEEK(0x34 + 32*3 + 0x4); WRITE32(mcg->rwdata_start); WRITE32(mcg->rwdata_start + load_data_start); WRITE32(mcg->rwdata_start + load_data_start); WRITE32(load_data_size); WRITE32(load_data_size); /* Fill in addr and size in DYNAMIC phdr */ SEEK(0x34 + 32*4 + 0x4); WRITE32(mcg->dynamic_start); WRITE32(mcg->dynamic_start + load_data_start); WRITE32(mcg->dynamic_start + load_data_start); WRITE32(mcg->dynamic_size); WRITE32(mcg->dynamic_size); } else { /* Fill in the entry point */ SEEK(0x18); WRITE64(0, mcg->text_start); /* Fill in the section header offset */ SEEK(0x28); WRITE64(0, shstart); /* Fill in size in the LOAD (code) phdr */ SEEK(0x40 + 56*2 + 0x20); WRITE64(0, load_text_size); /* size in file */ WRITE64(0, load_text_size); /* size in memory */ /* Fill in addr and size in the LOAD (rw-data) phdr */ SEEK(0x40 + 56*3 + 0x8); WRITE64(0, mcg->rwdata_start); WRITE64(0, mcg->rwdata_start + load_data_start); WRITE64(0, mcg->rwdata_start + load_data_start); WRITE64(0, load_data_size); WRITE64(0, load_data_size); /* Fill in addr and size in DYNAMIC phdr */ SEEK(0x40 + 56*4 + 0x8); WRITE64(0, mcg->dynamic_start); WRITE64(0, mcg->dynamic_start + load_data_start); WRITE64(0, mcg->dynamic_start + load_data_start); WRITE64(0, mcg->dynamic_size); WRITE64(0, mcg->dynamic_size); } } return 0; } static int parse_options(MiniCGCtx *mcg) { const char *execformat_str; const char *arch_str; /* TODO these things should be command line options! */ execformat_str = lrl_config_get(mcg->options->config, "backend minicg", "execformat", "elf"); if (!strcmp(execformat_str, "elf")) { mcg->execformat = ELF; } else { fprintf(stderr, "invalid execformat specified: %s\n", execformat_str); return 2; } arch_str = lrl_config_get(mcg->options->config, "backend minicg", "arch", "x86_64"); if (!strcmp(arch_str, "aarch64")) { mcg->arch = AARCH64; } else if (!strcmp(arch_str, "lm32")) { mcg->arch = LM32; } else if (!strcmp(arch_str, "or1k")) { mcg->arch = OR1K; } else if (!strcmp(arch_str, "ppc64el")) { mcg->arch = PPC64EL; } else if (!strcmp(arch_str, "rv32i")) { mcg->arch = RV32I; } else if (!strcmp(arch_str, "rv64gc")) { mcg->arch = RV64GC; } else if (!strcmp(arch_str, "x86")) { mcg->arch = X86; } else if (!strcmp(arch_str, "x86_64")) { mcg->arch = X86_64; } else { fprintf(stderr, "invalid arch specified: %s\n", arch_str); return 2; } return 0; } static void process_namespace(MiniCGCtx *mcg, LRLASTNamespace *ns, ProcessingFlags flags) { LRLASTDefList *entry; for (entry = ns->list; entry; entry = entry->next) { switch (entry->def.ast_type) { case LRL_AST_Def_Type: /* Nothing needs to be done here */ break; case LRL_AST_Def_Data: /*process_datadef(mcg, &entry->def, falgs);*/ break; case LRL_AST_Def_Function: /*process_funcdef(mcg, &entry->def, flags);*/ break; case LRL_AST_Namespace: process_namespace(mcg, entry->def.kind.namespac, flags); break; case LRL_AST_Uses: /* Ignored */ break; case LRL_AST_Interop: { LRLASTNamespace wrapper; wrapper.ident = NULL; wrapper.list = entry->def.kind.interop.translated; process_namespace(mcg, &wrapper, flags); break; } LRL_case_except_ast_namespaces_defs default: fail("minicg_badswitch"); } } } /* TODO */ static const unsigned char startup_aarch64[] = { /* Does an exit(42) */ /* mov w8, 93 [__NR_exit] */ 0xa8, 0x0b, 0x80, 0x52, /* mov x0, 42 */ 0x40, 0x05, 0x80, 0xd2, /* svc #0 */ 0x01, 0x00, 0x00, 0xd4 }; static const unsigned char startup_lm32[] = { 0x00 }; static const unsigned char startup_or1k[] = { 0x00 }; static const unsigned char startup_ppc64el[] = { 0x00 }; static const unsigned char startup_rv32i[] = { 0x00 }; static const unsigned char startup_rv64gc[] = { 0x00 }; static const unsigned char startup_x86[] = { /* ; currently just makes a exit(0) syscall use32 xor eax,eax mov al, 1 xor ebx, ebx int 0x80 */ 0x31, 0xc0, 0xb0, 0x01, 0x31, 0xdb, 0xcd, 0x80, 0x90, 0xCC, }; static const unsigned char startup_x86_64[] = { /* ; currently just makes a exit(0) syscall use64 xor rax,rax mov al, 60 xor edi, edi syscall */ 0x48, 0x31, 0xc0, 0xb0, 0x3c, 0x31, 0xff, 0x0f, 0x05, 0x90, 0xCC, }; static int gen_code(MiniCGCtx *mcg, LRLASTNamespace *ns) { const unsigned char *startup_code; size_t startup_size; switch (mcg->arch) { case AARCH64: startup_code = startup_aarch64; startup_size = sizeof(startup_aarch64); break; case LM32: startup_code = startup_lm32; startup_size = sizeof(startup_lm32); break; case OR1K: startup_code = startup_or1k; startup_size = sizeof(startup_or1k); break; case PPC64EL: startup_code = startup_ppc64el; startup_size = sizeof(startup_ppc64el); break; case RV32I: startup_code = startup_rv32i; startup_size = sizeof(startup_rv32i); break; case RV64GC: startup_code = startup_rv64gc; startup_size = sizeof(startup_rv64gc); break; case X86: startup_code = startup_x86; startup_size = sizeof(startup_x86); break; case X86_64: startup_code = startup_x86_64; startup_size = sizeof(startup_x86_64); break; } WRITE_LIMIT(startup_code, startup_size); process_namespace(mcg, ns, GenCode); /* This is followed by the .strtab, so no need to align */ return 0; } static int minicg_process(LRLCtx *ctx, LRLASTNamespace *root, const LRLBackendOptions *options) { MiniCGCtx mcg; int errcode; const char *filename; memset(&mcg, 0, sizeof(mcg)); mcg.ctx = ctx; mcg.options = options; fprintf(stderr, "The \"minicg\" backend is a work in progress, " "and does not yet work!\nContinuing anyway...\n"); errcode = parse_options(&mcg); if (errcode) { return 2; } if (options->op == LRL_BO_MakeObject) { fprintf(stderr, "Generation of relocatable files (.o) is not supported by the \"minicg\" backend.\n"); return 2; } /* Determine size of data sections etc. */ /* TODO or always generate relocations, and do this at the end? but the depending on the offset, the instruction might have a different length! */ process_namespace(&mcg, root, Prepare); /* Open output file */ if (options->op == LRL_BO_DumpIR) { /* TODO what should this do? disassemble the output? */ /* Debug output only */ filename = "/dev/null"; /* FIXME will not work on windows */ mcg.debug = 1; } else { filename = options->output; mcg.debug = 0; } mcg.file = fopen(filename, "w+b"); if (!mcg.file) { perror(filename); return 1; } switch (mcg.execformat) { case ELF: /* Write header */ errcode = write_elf_header(&mcg); if (errcode) return errcode; /* Write code (.text) segment */ mcg.text_start = ftell(mcg.file); mcg.text_size = 0; errcode = gen_code(&mcg, root); if (errcode) return errcode; /* Write footer */ errcode = write_elf_footer(&mcg); if (errcode) return errcode; break; default: fail("minicg_badswitch"); } if (options->op == LRL_BO_MakeExec) { fflush(mcg.file); mark_executable(mcg.file, options->output); } fclose(mcg.file); return 0; } const LRLBackend minicg_backend = { "minicg", &minicg_process, };