1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
|
/*
* Various utility functions for the bootstrap compiler.
*
* Copyright © 2021-2026 Samuel Lidén Borell <samuel@kodafritt.se>
*
* SPDX-License-Identifier: EUPL-1.2+ OR LGPL-2.1-or-later
*/
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "compiler.h"
bool read_source_line(FILE *f, char line[SOURCELINE_MAX], size_t *len_out,
enum ReadSourceMode mode)
{
bool in_comment = false;
char *end = line;
for (;;) {
int ch = fgetc(f);
if (ch == '\n') current_line++;
if (ch == EOF) {
break;
} else if (in_comment) {
if (ch == '\n' || ch == '\r') {
in_comment = false;
if (end != line) break;
}
} else if (ch == '\n' || ch == '\r') {
if (end != line) break;
} else if (ch == '#' && mode == STRIP_COMMENTS) {
in_comment = true;
} else if (end-line > SOURCELINE_MAX) {
current_line++;
error("Line too long for bootstrap compiler.");
} else if (ch < ' ') {
error("Invalid control character");
} else {
/* TODO should validate UTF-8 characters! */
*(end++) = (char)ch;
}
}
if (end != line) {
while (end > line && (end[-1] == ' ' || end[-1] == '\t')) {
end--;
}
*end = '\0';
if (len_out) {
assert(end >= line);
assert(end-line <= SOURCELINE_MAX);
*len_out = (size_t)(end-line);
}
return true;
} else {
return false;
}
}
void replacechar(char *s, char from, char to, size_t len)
{
if (from == to) return;
for (; len--; s++) {
if (*s == from) {
*s = to;
}
}
}
char *dupmemz(const char *s, size_t len)
{
char *dup;
NO_NULL(s);
assert(len > 0);
dup = malloc(len+1);
NO_NULL(dup);
memcpy(dup, s, len);
dup[len] = '\0';
return dup;
}
static void reject_filename(const char *comp, const char *reject, size_t len)
{
const char *s1 = comp, *s2 = reject;
while (len--) {
/* Case insensitive comparison. Works for ASCII letters */
unsigned a = (unsigned)*(s1++) | 0x20;
unsigned b = (unsigned)*(s2++) | 0x20;
if (a != b) { return; }
}
error_str("Filename might not work on Windows/DOS", comp);
}
/** Checks for special filename components that are reserved on Windows/DOS */
static void check_device_filenames(const char *s, const char *end)
{
switch (end - s) {
case 3:
reject_filename(s, "aux", 3);
reject_filename(s, "con", 3);
reject_filename(s, "lst", 3); /* should work on Windows, but
better safe than sorry. */
reject_filename(s, "nul", 3);
reject_filename(s, "prn", 3);
break;
case 4:
/* COM1 - 9 and LPT1 - 9 are device filenames on Windows/DOS.
COM0 and LPT0 are additionally disallowed by Windows Explorer. */
if (s[3] >= '0' && s[3] <= '9') {
reject_filename(s, "com", 3);
reject_filename(s, "lpt", 3);
}
break;
}
}
void check_filename(const char *s, const char *fileext)
{
const char *fn = s;
const char *component = s;
if (*s == '/') error_str("Absolute paths are not allowed", fn);
if (*s == '.') error_str("Hidden files are not allowed", fn);
if (strlen(s) > 50) {
error_str("Filename is very long (over arbitrary limit of 50 "
"characters). Long paths might not work on all systems", fn);
}
for (; *s; s++) {
char c = *s;
if (c == '/') {
check_device_filenames(component, s);
if (s[1] == '/') error_str("Double slash is not allowed", fn);
if (s[1] == '.') error_str("Hidden files are not allowed", fn);
} else if ((c >= 'a' && c <= 'z') ||
(c >= 'A' && c <= 'Z') ||
(c >= '0' && c <= '9') ||
c == '_') {
} else if (c == '.') {
check_device_filenames(component, s);
if (!strcmp(s+1, fileext)) {
return;
} else {
error_str("File extension not allowed", fn);
}
} else if (*s == '\\') {
error_str("Use / not \\ as a directory separator", fn);
} else {
error_str("Disallowed character in filename", fn);
}
}
error_str("Missing file extension", fn);
}
const char *path_basename(const char *path)
{
const char *base = strrchr(path, '/');
return base && base[1] ? base+1 : path;
}
/* Fast but very stupid hash function. It guarantees unique hashes for ASCII
strings up to 4 chars (assuming unsigned int is 32 bits), but note that it
is NOT cryptographically secure. */
#define HASH(hash, ch) (((hash) << 7) + ((hash) >> 22) + (unsigned char)(ch))
HashCode hash_str(const char *s, size_t len)
{
HashCode h = 0;
while (len--) {
h = HASH(h, *(s++));
}
return h;
}
|