1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
|
/*
* Various utility functions for the bootstrap compiler.
*
* Copyright © 2021-2025 Samuel Lidén Borell <samuel@kodafritt.se>
*
* SPDX-License-Identifier: EUPL-1.2+
*/
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "compiler.h"
bool read_source_line(FILE *f, char line[SOURCELINE_MAX], size_t *len_out,
enum ReadSourceMode mode)
{
bool in_comment = false;
char *end = line;
for (;;) {
int ch = fgetc(f);
if (ch == '\n') current_line++;
if (ch == EOF) {
break;
} else if (in_comment) {
if (ch == '\n' || ch == '\r') {
in_comment = false;
if (end != line) break;
}
} else if (ch == '\n' || ch == '\r') {
if (end != line) break;
} else if (ch == '#' && mode == STRIP_COMMENTS) {
in_comment = true;
} else if (end-line > SOURCELINE_MAX) {
current_line++;
error("Line too long for bootstrap compiler.");
} else if (ch < ' ') {
error("Invalid control character");
} else {
/* TODO should validate UTF-8 characters! */
*(end++) = (char)ch;
}
}
if (end != line) {
while (end > line && (end[-1] == ' ' || end[-1] == '\t')) {
end--;
}
*end = '\0';
if (len_out) {
assert(end >= line);
assert(end-line <= SOURCELINE_MAX);
*len_out = (size_t)(end-line);
}
return true;
} else {
return false;
}
}
void memreplace(char *s, char from, char to, size_t len)
{
if (from == to) return;
for (; len--; s++) {
if (*s == from) {
*s = to;
}
}
}
char *memzdup(const char *s, size_t len)
{
char *dup;
NO_NULL(s);
assert(len > 0);
dup = malloc(len+1);
NO_NULL(dup);
memcpy(dup, s, len);
dup[len] = '\0';
return dup;
}
void check_filename(const char *s)
{
if (*s == '/') error("Absolute paths are not allowed");
if (*s == '.') error("Hidden files are not allowed");
for (; *s; s++) {
char c = *s;
if (c == '/') {
if (s[1] == '/') error("Double slash is not allowed");
if (s[1] == '.') error("Hidden files are not allowed");
} else if ((c >= 'a' && c <= 'z') ||
(c >= 'A' && c <= 'Z') ||
(c >= '0' && c <= '9') ||
c == '_' || c == '.') {
/* Probably safe (ignoring check for Windows/DOS
con/aux/prn... in the bootstrap compiler) */
} else if (*s == '\\') {
error("Use / not \\ as a directory separator");
} else {
error("Disallowed character in filename");
}
}
}
const char *path_basename(const char *path)
{
const char *base = strrchr(path, '/');
return base && base[1] ? base+1 : path;
}
/* Fast but very stupid hash function. It guarantees unique hashes for ASCII
strings up to 4 chars (assuming unsigned int is 32 bits), but note that it
is NOT cryptographically secure. */
#define HASH(hash, ch) (((hash) << 7) + ((hash) >> 22) + (unsigned char)(ch))
HashCode hash_str(const char *s, size_t len)
{
HashCode h = 0;
while (len--) {
h = HASH(h, *(s++));
}
return h;
}
|