/* arch.c -- Handling of targets/multiarch Copyright © 2021-2024 Samuel Lidén Borell Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "internal.h" #include #include #include #if defined(__OpenBSD__) #include #endif #define INTERR_ARCH(errnum) MAKE_INTERR(errnum, INTERRBASE_ARCH) #define INTERR_BADMODTYPE1 INTERR_ARCH(0x01) #define INTERR_BADMODTYPE2 INTERR_ARCH(0x02) #define INTERR_BADOUTTYPE1 INTERR_ARCH(0x03) #define INTERR_BADSYSCTYPE1 INTERR_ARCH(0x04) static const char cpu_names[][11+1] = { "ir", "i386", "x86_64", "arm", "aarch64", "riscv32i", "riscv64gc", "powerpc64el", "mipsel" }; static const char sys_names[][10+1] = { "", /* IR generic */ "linux", "openbsd6.8", "freebsd", "netbsd", "gnu", "windows", "slusys" }; static const char userabi_names[][6+1] = { "", /* IR generic */ "gnu", "musl", "", /* openbsd */ "", /* freebsd */ "", /* netbsd */ "", /* windows */ "slusys" }; /** Makes an multiarch string of the cpu/syscalls/userabi enums */ static const char *make_archtriple(struct CSlulConfig *cfg, const struct Target *target) { const char *cpu, *sys, *uabi; char *ret, *end; size_t cpulen, syslen, uabilen, total; assert(target->cpu < NUM_CPU_ARCHES); assert(target->syscalls < NUM_SYS_TYPES); assert(target->userabi < NUM_USERABI_TYPES); cpu = cpu_names[target->cpu]; cpulen = strlen(cpu); sys = sys_names[target->syscalls]; syslen = strlen(sys); uabi = userabi_names[target->userabi]; uabilen = target->syscalls != HURD ? strlen(uabi) : 0; total = cpulen+syslen+2; if (uabilen) total += uabilen+1; ret = cfgalloc(cfg, total+1); if (!ret) return NULL; end = ret; memcpy(end, cpu, cpulen); end += cpulen; *(end++) = '-'; memcpy(end, sys, syslen); end += syslen; if (uabilen) { *(end++) = '-'; memcpy(end, uabi, uabilen); end += uabilen; } *end = '\0'; return ret; } #define REQUIREOS(sc) if (syscalls != (sc)) goto bad_sys_user_combo /** Adds the given cpu/syscalls/userabi combination as a target */ static int add_target(struct CSlulConfig *cfg, enum CpuArch cpu, enum SysCalls syscalls, enum UserABI userabi) { struct Target *target; /* Check that the combo is valid */ switch (userabi) { case USERABI_IR_GENERIC: if (cpu != IR_DUMP || syscalls != SYSCALLS_IR_GENERIC) { goto bad_sys_user_combo; } break; case MUSL: if (syscalls != LINUX) goto bad_sys_user_combo; break; case GNU: /* XXX glibc is used with "k*bsd" naming for glibc + BSD kernels */ if (syscalls == OPENBSD68 || syscalls == FREEBSD || syscalls == NETBSD || syscalls == SLUSYS || syscalls == SYSCALLS_IR_GENERIC) goto bad_sys_user_combo; break; case OPENBSD68_LIBC: REQUIREOS(OPENBSD68); break; case FREEBSD_LIBC: REQUIREOS(FREEBSD); break; case NETBSD_LIBC: REQUIREOS(NETBSD); break; case WINNT_DLLS: /* XXX in theory, Linux/BSD+WINNT could work in with libwine, but we probably need to know the C library (glibc/musl/BSD libc) also */ REQUIREOS(WINDOWS); if (cpu != I386 && cpu != X86_64 && cpu != AARCH64) { goto bad_cpu_user_combo; } break; case SLULIB: REQUIREOS(SLUSYS); break; } /* Add the target */ target = cfgalloc(cfg, sizeof(struct Target)); if (!target) return 0; target->cpu = cpu; target->syscalls = syscalls; target->userabi = userabi; target->archtriple = make_archtriple(cfg, target); if (!target->archtriple) goto outofmem; target->next = cfg->targets; cfg->targets = target; return 1; bad_sys_user_combo: /*error_text2(ctx, CSLUL_E_BADSYSABICOMBO, syscname, abiname);*/ cfgerror(cfg, CSLUL_E_BADSYSABICOMBO); return 0; bad_cpu_user_combo: /*error_text2(ctx, CSLUL_E_BADCPUABICOMBO, syscname, cpuname);*/ cfgerror(cfg, CSLUL_E_BADCPUABICOMBO); return 0; outofmem: return 0; } #define CMPARCH(cident, name, cenum) \ if (!memcmp(start, (name), size)) { \ (cident) = (cenum); \ goto cident ## _set; \ } /** Adds a single multiarch triple as a target. */ int add_arch(struct CSlulConfig *cfg, const char *arch, size_t archlen) { enum CpuArch cpu; enum SysCalls syscalls; enum UserABI userabi; const char *start, *dash; size_t size, remaining; enum { VENDOR, SYSCALLS, USERABI } index; enum CSlulErrorCode errcode = CSLUL_E_BADARCHFORMAT; if (archlen == 4 && !memcmp(arch, "host", 4)) { return cslul_config_add_host_arch(cfg); } /* FIXME there appears to be many ways to write target "triples": (the names and definitions of the parts don't seem to be standardized either) - -- - -- --- -- - looks like all components except cpu are optional? - also, it looks like the vendor field is abused on windows platforms as a way of specifying "uwp". - the part may contain details at the end, e.g. arm7a or riscv64imac - *BSD does not seem to ever have any specified. - same with GNU/Hurd, which is confusingly named "gnu" just like the (or maybe Hurd is simply the default for "gnu" ?) - for windows, there is a "msvc" , but i think msvc versions are not binary compatible, so that would actually be multiple abis... - some items have several names. 64-bit windows is "windows" in rustc but "w64" in mingw's multiarch filename on Debian. - "cloudabi" might sound like a but is actually the - syscalls=none,userabi=elf appears to be in use for no-OS building. - syscalls?=uefi appears to be in use also So it is really messy to parse... Maybe the simplest way is to use this algorithm: 1. Match the part against a large list of full strings. - then properties internally, such which backend to use and which cpu "subtype" to use. 2. Set the "minimum index" to 2 For each segment after : 2. Find the matching item (of any type!) for the segment - if lower than the minimum index, then report error. 3. Set any defaults based on this item (e.g. syscalls and userabi) 4. Set "minimum index" to the index of the matching item + 1 */ /* CPU */ dash = memchr(arch, '-', archlen); size = dash ? (size_t)(dash-arch) : archlen; cpu = (enum CpuArch)-1; switch (size) { case 2: if (!memcmp(arch, "ir", size)) { if (dash) goto error; cpu = IR_DUMP; syscalls = SYSCALLS_IR_GENERIC; userabi = USERABI_IR_GENERIC; goto done; } break; case 3: if (!memcmp(arch, "arm", size)) cpu = ARM; break; case 4: if (!memcmp(arch, "i386", size)) cpu = I386; break; case 6: if (!memcmp(arch, "x86_64", size)) cpu = X86_64; if (!memcmp(arch, "mipsel", size)) cpu = MIPSEL; break; case 7: if (!memcmp(arch, "aarch64", size)) cpu = AARCH64; break; case 8: if (!memcmp(arch, "riscv32i", size)) cpu = RV32; break; case 9: if (!memcmp(arch, "riscv64gc", size)) cpu = RV64GC; break; case 11: if (!memcmp(arch, "powerpc64el", size)) cpu = PPC64EL; break; } if ((int)cpu == -1) { errcode = CSLUL_E_BADARCHCPU; goto error; } if (!dash) goto error; /* Now the , and componets follow. All are seem to be optional, but they have to come in that order. */ index = VENDOR; syscalls = (enum SysCalls)-1; userabi = (enum UserABI)-1; remaining = archlen; for (;;) { remaining -= size+1; start = dash+1; dash = memchr(start, '-', remaining); size = dash ? (size_t)(dash-start) : remaining; switch (size) { case 3: CMPARCH(userabi, "gnu", GNU) break; case 4: CMPARCH(userabi, "musl", MUSL) break; case 5: CMPARCH(syscalls, "linux", LINUX) break; case 6: CMPARCH(syscalls, "netbsd", NETBSD) CMPARCH(syscalls, "slusys", SLUSYS) break; case 7: CMPARCH(syscalls, "freebsd", FREEBSD) CMPARCH(syscalls, "windows", WINDOWS) /* FIXME or is this a User ABI? */ if (!memcmp(start, "unknown", size)) { if (index++ == USERABI || !dash) goto userabi_set; continue; } break; case 10: CMPARCH(syscalls, "openbsd6.8", OPENBSD68) break; } cfgerror_textlen(cfg, CSLUL_E_BADARCHITEM, start, size); goto error; syscalls_set: if (index > SYSCALLS) { errcode = CSLUL_E_BADARCHFORMAT; goto error; } index = USERABI; if (!dash) goto done; continue; userabi_set: if (remaining > size) { /* more -parts after UserABI */ errcode = CSLUL_E_BADARCHFORMAT; goto error; } break; } done: if ((int)syscalls == -1 && (int)userabi == -1) { errcode = CSLUL_E_BADARCHFORMAT; goto error; } else if ((int)userabi == -1) { static const enum UserABI default_abi[] = { USERABI_IR_GENERIC, GNU, /* for Linux */ OPENBSD68_LIBC, /* for OpenBSD */ FREEBSD_LIBC, /* for FreeBSD */ NETBSD_LIBC, /* for NetBSD */ GNU, /* for Hurd */ WINNT_DLLS, /* for Windows */ SLULIB /* for SLUSYS */ }; userabi = default_abi[syscalls]; } else if ((int)syscalls == -1) { static const enum SysCalls default_syscalls[] = { SYSCALLS_IR_GENERIC, HURD, LINUX, OPENBSD68, FREEBSD, NETBSD, WINDOWS, SLUSYS }; syscalls = default_syscalls[userabi]; } return add_target(cfg, cpu, syscalls, userabi); error: cfgerror_textlen(cfg, errcode, arch, archlen); return 0; } int cslul_config_add_host_arch(struct CSlulConfig *cfg) { #ifndef NO_HOST_TARGET enum CpuArch cpu; enum SysCalls syscalls; enum UserABI userabi; #endif #ifdef NO_HOST_TARGET error_config_linecol(cfg, CSLUL_E_NOHOSTTARGET); return 0; #else /* Most of these come from https://sourceforge.net/p/predef/wiki/ */ /* FIXME most of these are untested. */ /* TODO move out this messy code to a separate file */ #if defined(DEFAULT_CPU) cpu = DEFAULT_CPU; #elif (defined(__aarch64__) || defined(_M_ARM64)) && !defined(__AARCH64EB__) cpu = AARCH64; #elif (defined(__arm__) || defined(_ARM) || defined(__arm) || defined(_M_ARM)) && !defined(__ARMEB__) cpu = ARM; /* FIXME this has a lot of different versions... both LE/BE and versions 2-7 */ #elif defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64) || defined(_M_X64) cpu = X86_64; #elif defined(__i386__) || defined(__i386) || defined(_X86_) || defined(__X86__) || defined(_M_IX86) cpu = I386; #elif defined(_MIPSEL) || defined(__MIPSEL) || defined(__MIPSEL__) cpu = MIPSEL; #elif defined(__powerpc64__) || defined(_ARCH_PPC64) cpu = PPC64EL; #elif defined(_M_RISCV32) || (defined(__riscv) && __riscv_xlen==32) cpu = RV32; #elif defined(_M_RISCV64) || (defined(__riscv) && __riscv_xlen==64) cpu = RV64GC; /* FIXME how to detect non-GC variants? */ #else #error Unknown CPU architecture, please set with -DDEFAULT_CPU=... #endif /* XXX With an OS independent format + OS specific runtimes+loaders it would be possible to remove the SYSCALLABI and USERABI parts */ #if defined(DEFAULT_SYSCALLABI) syscalls = DEFAULT_SYSCALLABI; #elif defined(__linux__) syscalls = LINUX; #elif defined(__OpenBSD__) && defined(OpenBSD6_8) syscalls = OPENBSD68; #elif defined(__FreeBSD__) syscalls = FREEBSD; #elif defined(__NetBSD__) syscalls = NETBSD; #elif defined(__GNU__) || defined(__gnu_hurd__) syscalls = HURD; #elif defined(_WIN32) || defined(_WIN64) || defined(__CYGWIN__) || defined(__WIN32__) || defined(__WINDOWS__) syscalls = WINDOWS; #else #error Unkown syscall ABI, please set with -DDEFAULT_SYSCALLABI=... #endif /* These (except WINNT and BSD) could possibly be detected at runtime also. */ #if defined(DEFAULT_USERABI) userabi = DEFAULT_USERABI; #elif defined(__GLIBC__) userabi = GNU; #elif defined(__OpenBSD__) && defined(OpenBSD6_8) userabi = OPENBSD68_LIBC; #elif defined(__FreeBSD__) userabi = FREEBSD_LIBC; #elif defined(__NetBSD__) userabi = NETBSD_LIBC; /* should be binary compatible between versions, BUT it looks like there are/were plans to change the ABI for cleanup, with a new separate libc (and loader?) */ #elif defined(_WIN32) || defined(_WIN64) || defined(__CYGWIN__) || defined(__WIN32__) || defined(__WINDOWS__) userabi = WINNT_DLLS; #else /* Use -DDEFAULT_USERABI=MUSL for Musl */ #error Unkown user ABI, please set with -DDEFAULT_USERABI=... #endif return add_target(cfg, cpu, syscalls, userabi); #endif } /** * Sets the output type based on the default for type of the module being * compiled. If the output type has been overridden, then it checks that the * output type is valid for the module type. */ static int detect_output_type(struct CSlul *ctx) { enum CSlulErrorCode errcode; if (ctx->cfg->requested_outputtype == CSLUL_OT_AUTO) { enum CSlulOutputType outtype; switch (ctx->module.type) { case CSLUL_MT_INTERNAL: outtype = CSLUL_OT_OBJFILE; break; case CSLUL_MT_LIBRARY: outtype = CSLUL_OT_LIBRARY; break; case CSLUL_MT_PLUGIN: outtype = CSLUL_OT_PLUGIN; break; case CSLUL_MT_LIBRARYSPEC: case CSLUL_MT_PLUGINSPEC: outtype = CSLUL_OT_CHECK; break; /* XXX or CSLUL_OT_HEADER */ case CSLUL_MT_APP: outtype = CSLUL_OT_EXE; break; case CSLUL_MT_INVALID: case CSLUL_MT_UNSET: /* should not happen */ default: internal_error(ctx, INTERR_BADMODTYPE1); return 0; } ctx->outputtype = outtype; return 1; } /* Not auto-detected. Check the requested output type */ ctx->outputtype = ctx->cfg->requested_outputtype; if (ctx->module.type == CSLUL_MT_LIBRARYSPEC || ctx->module.type == CSLUL_MT_PLUGINSPEC) { if (ctx->outputtype == CSLUL_OT_CHECK || ctx->outputtype == CSLUL_OT_HEADER) return 1; else { error_linecol(ctx, CSLUL_E_OUTPUTSPEC, 0, 0); return 0; } } switch (ctx->outputtype) { case CSLUL_OT_AUTO: /* should not happen */ case CSLUL_OT_CHECK: return 1; case CSLUL_OT_OBJFILE: return 1; case CSLUL_OT_LIBRARY: case CSLUL_OT_DYNLIB: case CSLUL_OT_STATLIB: case CSLUL_OT_HEADER: if (ctx->module.type == CSLUL_MT_LIBRARY || ctx->module.type == CSLUL_MT_INTERNAL) return 1; errcode = CSLUL_E_OUTPUTLIB; goto error; case CSLUL_OT_PLUGIN: if (ctx->module.type == CSLUL_MT_PLUGIN) return 1; errcode = CSLUL_E_OUTPUTPLUGIN; goto error; case CSLUL_OT_EXE: if (ctx->module.type == CSLUL_MT_APP) return 1; errcode = CSLUL_E_OUTPUTEXE; goto error; default: internal_error(ctx, INTERR_BADMODTYPE2); return 0; } error: error_text(ctx, errcode, 0, 0, modtype2str[ctx->module.type]); return 0; } /** * Adds a single output file to the given target. */ static int add_output(struct CSlul *ctx, const struct Target *target, enum OutFormat format, const char *outdir, const char *prefix, const char *suffix) { char *name, *np; const char *fullpath; size_t prefixlen = prefix ? strlen(prefix) : 0; size_t namelen = strlen(ctx->module.name); size_t suffixlen = suffix ? strlen(suffix) : 0; CSlulFile f; struct Output *output; name = aalloc(ctx, prefixlen+namelen+suffixlen+1, 1); if (!name) return 0; np = name; if (prefix) { memcpy(np, prefix, prefixlen); np += prefixlen; } memcpy(np, ctx->module.name, namelen); np += namelen; if (suffix) { memcpy(np, suffix, suffixlen); np += suffixlen; } *np = '\0'; fullpath = make_path(ctx, outdir, name); if (!fullpath) return 0; /* Open file. It will be closed in backend_output() */ if (format == OF_ELFEXE || format == OF_PECLI || format == OF_PEGUI) { f = ctx_createexec(ctx, fullpath); } else { f = ctx_fopen(ctx, fullpath, "wb"); } if (!f) return 0; /* Add */ output = aallocp(ctx, sizeof(struct Output)); if (!output) return 0; PROTECT_STRUCT(*output); output->format = format; output->filename = fullpath; output->file = f; output->target = target; output->next = ctx->outputs; ctx->outputs = output; return 1; } /** Adds the output types and filenames for posix targets */ static int create_outputs_posix(struct CSlul *ctx, const struct Target *target, const char *outdir) { switch (ctx->outputtype) { case CSLUL_OT_CHECK: return 1; case CSLUL_OT_OBJFILE: return add_output(ctx, target, OF_ELFOBJFILE, outdir, NULL, ".o"); case CSLUL_OT_LIBRARY: return add_output(ctx, target, OF_ELFDYNLIB, outdir, "lib", ".so") /*&& add_output(ctx, target, OF_ELFSTATLIB, outdir, "lib", ".a")*/; case CSLUL_OT_DYNLIB: return add_output(ctx, target, OF_ELFDYNLIB, outdir, "lib", ".so"); case CSLUL_OT_STATLIB: return add_output(ctx, target, OF_ELFSTATLIB, outdir, "lib", ".a"); case CSLUL_OT_HEADER: return add_output(ctx, target, OF_CHEADER, outdir, NULL, ".h"); case CSLUL_OT_PLUGIN: return add_output(ctx, target, OF_ELFDYNLIB, outdir, NULL, ".so"); case CSLUL_OT_EXE: return add_output(ctx, target, OF_ELFEXE, outdir, NULL, NULL); case CSLUL_OT_AUTO: ; /* should not happen */ } internal_error(ctx, INTERR_BADOUTTYPE1); return 0; } /** Adds the output types and filenames for win32/win64 targets */ static int create_outputs_win(struct CSlul *ctx, const struct Target *target, const char *outdir) { switch (ctx->outputtype) { case CSLUL_OT_CHECK: return 1; case CSLUL_OT_OBJFILE: return add_output(ctx, target, OF_COFFOBJFILE, outdir, NULL, ".o"); /* FIXME check which file ext is used on windows */ case CSLUL_OT_LIBRARY: return add_output(ctx, target, OF_PEDYNLIB, outdir, NULL, ".dll") /*&& add_output(ctx, target, OF_COFFSTATLIB, outdir, NULL, ".a")*/; /* FIXME check which file ext is used on windows */ case CSLUL_OT_DYNLIB: return add_output(ctx, target, OF_PEDYNLIB, outdir, NULL, ".dll"); case CSLUL_OT_STATLIB: return add_output(ctx, target, OF_COFFSTATLIB, outdir, NULL, ".a"); /* FIXME check which file ext is used on windows */ case CSLUL_OT_HEADER: return add_output(ctx, target, OF_CHEADER, outdir, NULL, ".h"); case CSLUL_OT_PLUGIN: return add_output(ctx, target, OF_PEDYNLIB, outdir, NULL, ".dll"); case CSLUL_OT_EXE: return add_output(ctx, target, OF_PEGUI, outdir, NULL, ".exe"); case CSLUL_OT_AUTO: ; /* should not happen */ } internal_error(ctx, INTERR_BADOUTTYPE1); return 0; } /** * Returns the output directory for a given target, and creates any necessary * bin/ or lib/ subdirectory if needed. */ static const char *create_target_dirs(struct CSlul *ctx, const char *outdir, const struct Target *target) { char *ret, *end; const char *typedir; size_t outdir_len, archtriple_len; assert(outdir != NULL); /* FIXME this check seems wrong */ if (!ctx->cfg->use_arch_dirs && ctx->outputtype != CSLUL_OT_HEADER) { /* FIXME should we put C include files in /include ? */ return outdir; } /* Build the string: /{bin,lib}/\0 and create each directory component (except for outdir) */ if (outdir) { outdir_len = strlen(outdir); while (outdir_len > 0 && outdir[outdir_len-1] == ctx->cfg->params.dirsep) { outdir_len--; } outdir_len++; } else { outdir_len = 0; } archtriple_len = strlen(target->archtriple); ret = aalloc(ctx, outdir_len + 4 + archtriple_len + 1, 1); if (!ret) return NULL; end = ret; if (outdir) { outdir_len--; /* directory separator is added separately */ memcpy(end, outdir, outdir_len); end += outdir_len; *(end++) = ctx->cfg->params.dirsep; } typedir = (ctx->outputtype == CSLUL_OT_EXE ? "bin" : "lib"); memcpy(end, typedir, 3); end += 3; *end = '\0'; if (!ctx_mkdir(ctx, ret)) return NULL; /* make "bin" or "lib" dir */ *(end++) = ctx->cfg->params.dirsep; memcpy(end, target->archtriple, archtriple_len); end += archtriple_len; *end = '\0'; if (!ctx_mkdir(ctx, ret)) return NULL; /* make dir */ return ret; } /** Opens the output files for writing */ static int create_outputs(struct CSlul *ctx, const char *outdir) { const struct Target *target = ctx->cfg->targets; if (ctx->outputtype == CSLUL_OT_CHECK) return 1; /* no output */ for (; target; target = target->next) { const char *fulldir = create_target_dirs(ctx, outdir, target); if (!fulldir) return 0; switch (target->userabi) { case USERABI_IR_GENERIC: if (!add_output(ctx, target, OF_RAW, fulldir, NULL, ".ir")) { return 0; } continue; case GNU: case MUSL: case OPENBSD68_LIBC: case FREEBSD_LIBC: case NETBSD_LIBC: if (!create_outputs_posix(ctx, target, fulldir)) return 0; continue; case WINNT_DLLS: if (!create_outputs_win(ctx, target, fulldir)) return 0; continue; case SLULIB: ;/* TODO */ } internal_error(ctx, INTERR_BADSYSCTYPE1); return 0; } return 1; } int init_outputs(struct CSlul *ctx, const char *outdir) { return detect_output_type(ctx) && create_outputs(ctx, outdir); }