/* * Various utility functions for the bootstrap compiler. * * Copyright © 2021-2025 Samuel Lidén Borell * * SPDX-License-Identifier: EUPL-1.2+ */ #include #include #include #include #include "compiler.h" bool read_source_line(FILE *f, char line[SOURCELINE_MAX], size_t *len_out, enum ReadSourceMode mode) { bool in_comment = false; char *end = line; for (;;) { int ch = fgetc(f); if (ch == '\n') current_line++; if (ch == EOF) { break; } else if (in_comment) { if (ch == '\n' || ch == '\r') { in_comment = false; if (end != line) break; } } else if (ch == '\n' || ch == '\r') { if (end != line) break; } else if (ch == '#' && mode == STRIP_COMMENTS) { in_comment = true; } else if (end-line > SOURCELINE_MAX) { current_line++; error("Line too long for bootstrap compiler."); } else if (ch < ' ') { error("Invalid control character"); } else { /* TODO should validate UTF-8 characters! */ *(end++) = (char)ch; } } if (end != line) { while (end > line && (end[-1] == ' ' || end[-1] == '\t')) { end--; } *end = '\0'; if (len_out) { assert(end >= line); assert(end-line <= SOURCELINE_MAX); *len_out = (size_t)(end-line); } return true; } else { return false; } } void memreplace(char *s, char from, char to, size_t len) { if (from == to) return; for (; len--; s++) { if (*s == from) { *s = to; } } } char *memzdup(const char *s, size_t len) { char *dup; NO_NULL(s); assert(len > 0); dup = malloc(len+1); NO_NULL(dup); memcpy(dup, s, len); dup[len] = '\0'; return dup; } void check_filename(const char *s) { if (*s == '/') error("Absolute paths are not allowed"); if (*s == '.') error("Hidden files are not allowed"); for (; *s; s++) { char c = *s; if (c == '/') { if (s[1] == '/') error("Double slash is not allowed"); if (s[1] == '.') error("Hidden files are not allowed"); } else if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_' || c == '.') { /* Probably safe (ignoring check for Windows/DOS con/aux/prn... in the bootstrap compiler) */ } else if (*s == '\\') { error("Use / not \\ as a directory separator"); } else { error("Disallowed character in filename"); } } } const char *path_basename(const char *path) { const char *base = strrchr(path, '/'); return base && base[1] ? base+1 : path; } /* Fast but very stupid hash function. It guarantees unique hashes for ASCII strings up to 4 chars (assuming unsigned int is 32 bits), but note that it is NOT cryptographically secure. */ #define HASH(hash, ch) (((hash) << 7) + ((hash) >> 22) + (unsigned char)(ch)) HashCode hash_str(const char *s, size_t len) { HashCode h = 0; while (len--) { h = HASH(h, *(s++)); } return h; }