/* * Various utility functions for the bootstrap compiler. * * Copyright © 2021-2026 Samuel Lidén Borell * * SPDX-License-Identifier: EUPL-1.2+ OR LGPL-2.1-or-later */ #include #include #include #include #include "compiler.h" bool read_source_line(FILE *f, char line[SOURCELINE_MAX], size_t *len_out, enum ReadSourceMode mode) { bool in_comment = false; char *end = line; for (;;) { int ch = fgetc(f); if (ch == '\n') current_line++; if (ch == EOF) { break; } else if (in_comment) { if (ch == '\n' || ch == '\r') { in_comment = false; if (end != line) break; } } else if (ch == '\n' || ch == '\r') { if (end != line) break; } else if (ch == '#' && mode == STRIP_COMMENTS) { in_comment = true; } else if (end-line > SOURCELINE_MAX) { current_line++; error("Line too long for bootstrap compiler."); } else if (ch < ' ') { error("Invalid control character"); } else { /* TODO should validate UTF-8 characters! */ *(end++) = (char)ch; } } if (end != line) { while (end > line && (end[-1] == ' ' || end[-1] == '\t')) { end--; } *end = '\0'; if (len_out) { assert(end >= line); assert(end-line <= SOURCELINE_MAX); *len_out = (size_t)(end-line); } return true; } else { return false; } } void replacechar(char *s, char from, char to, size_t len) { if (from == to) return; for (; len--; s++) { if (*s == from) { *s = to; } } } char *dupmemz(const char *s, size_t len) { char *dup; NO_NULL(s); assert(len > 0); dup = malloc(len+1); NO_NULL(dup); memcpy(dup, s, len); dup[len] = '\0'; return dup; } static void reject_filename(const char *comp, const char *reject, size_t len) { const char *s1 = comp, *s2 = reject; while (len--) { /* Case insensitive comparison. Works for ASCII letters */ unsigned a = (unsigned)*(s1++) | 0x20; unsigned b = (unsigned)*(s2++) | 0x20; if (a != b) { return; } } error_str("Filename might not work on Windows/DOS", comp); } /** Checks for special filename components that are reserved on Windows/DOS */ static void check_device_filenames(const char *s, const char *end) { switch (end - s) { case 3: reject_filename(s, "aux", 3); reject_filename(s, "con", 3); reject_filename(s, "lst", 3); /* should work on Windows, but better safe than sorry. */ reject_filename(s, "nul", 3); reject_filename(s, "prn", 3); break; case 4: /* COM1 - 9 and LPT1 - 9 are device filenames on Windows/DOS. COM0 and LPT0 are additionally disallowed by Windows Explorer. */ if (s[3] >= '0' && s[3] <= '9') { reject_filename(s, "com", 3); reject_filename(s, "lpt", 3); } break; } } void check_filename(const char *s, const char *fileext) { const char *fn = s; const char *component = s; if (*s == '/') error_str("Absolute paths are not allowed", fn); if (*s == '.') error_str("Hidden files are not allowed", fn); if (strlen(s) > 50) { error_str("Filename is very long (over arbitrary limit of 50 " "characters). Long paths might not work on all systems", fn); } for (; *s; s++) { char c = *s; if (c == '/') { check_device_filenames(component, s); if (s[1] == '/') error_str("Double slash is not allowed", fn); if (s[1] == '.') error_str("Hidden files are not allowed", fn); } else if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_') { } else if (c == '.') { check_device_filenames(component, s); if (!strcmp(s+1, fileext)) { return; } else { error_str("File extension not allowed", fn); } } else if (*s == '\\') { error_str("Use / not \\ as a directory separator", fn); } else { error_str("Disallowed character in filename", fn); } } error_str("Missing file extension", fn); } const char *path_basename(const char *path) { const char *base = strrchr(path, '/'); return base && base[1] ? base+1 : path; } /* Fast but very stupid hash function. It guarantees unique hashes for ASCII strings up to 4 chars (assuming unsigned int is 32 bits), but note that it is NOT cryptographically secure. */ #define HASH(hash, ch) (((hash) << 7) + ((hash) >> 22) + (unsigned char)(ch)) HashCode hash_str(const char *s, size_t len) { HashCode h = 0; while (len--) { h = HASH(h, *(s++)); } return h; }