1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
|
/*
* Outputs C expressions.
*
* Copyright © 2020-2025 Samuel Lidén Borell <samuel@kodafritt.se>
*
* SPDX-License-Identifier: EUPL-1.2+
*/
#include <assert.h>
#include <inttypes.h> /* This one is not in C89 */
#include "compiler.h"
#include "out.h"
static void emit_string_data(const char *s, size_t len)
{
while (len) {
char c = *s;
if (c == '\\' || c == '"') {
outc('\\');
outc(c);
} else if (c >= ' ' && c < 0x7f) {
outc(c);
} else {
outf("\\x%x", (unsigned char)c);
if (len) {
/* If the following character is a hex digit,
then we need to split the string here */
char c1 = s[1];
if ((c1 >= '0' && c1 <= '9') ||
(c1 >= 'a' && c1 <= 'z') ||
(c1 >= 'A' && c1 <= 'Z')) {
outf("\" \"");
}
}
}
s++;
len--;
}
}
/** Strings are splitted both for readability (to avoid extremely long lines),
and also to avoid the 509 byte limit in C89.
The longest possible escape sequence is 7 characters ('\xFF" "'), and
509/7 conveniently happens to be 72.7, so 70 is a good chunk size. */
#define STRCHUNK_SIZE 70
static void emit_string_header(struct ExprString *str)
{
size_t len = str->len;
unsigned chunk_num = 0;
outf(
"static const struct {\n"
" unsigned char info;\n");
if (len <= 0x7f) {
/* length is stored in `info` */
} else if (len <= 0x17f) {
outf(" unsigned char len;\n");
} else if (len <= 0x1017f) {
outf(" unsigned uint_least16_t len;\n");
} else if (len <= 0x3fffffff) {
outf(" unsigned uint_least32_t len;\n");
} else {
/* Fairly arbitrary limit. Could probably be a lot lower */
ast_error("String constants may not exceed 1 GiB");
}
while (len > 0) {
unsigned chunksize = (len < STRCHUNK_SIZE ?
(unsigned)len : STRCHUNK_SIZE);
outf(" unsigned char chunk%u[%u];\n", chunk_num++, chunksize);
len -= chunksize;
}
outf("} strconst_%u", str->id);
}
void emit_string_constants(void)
{
struct ExprString *str;
for (str = string_constants; str != NULL; str = str->next) {
size_t len;
const char *strdata;
emit_string_header(str);
len = str->len;
outf(" = {\n");
if (len <= 0x7f) {
outf(" %u", len);
} else if (len <= 0x17f) {
outf(" SLUL_STRINFO_UPTO_0x17f, %u", len-0x80);
} else if (len <= 0x1017f) {
outf(" SLUL_STRINFO_UPTO_0x1017f, %" PRIuLEAST16, len-0x180);
} else {
outf(" SLUL_STRINFO_UPTO_0x10001017f, %" PRIuLEAST32, len-0x10180);
}
strdata = str->s;
while (len > 0) {
unsigned chunksize = (len < STRCHUNK_SIZE ?
(unsigned)len : STRCHUNK_SIZE);
outf(",\n \"");
emit_string_data(strdata, chunksize);
outc('"');
strdata += chunksize;
len -= chunksize;
}
outf("\n};\n");
}
}
/** Emit the name of the destination variable for a subexpression.
target_ident and target_num together make up an optional destination
variable for the entire expression. */
static void emit_subexpr_varname(struct Expr *subexpr,
const char *target_ident, int target_num)
{
if (subexpr->rpnnext) {
/* Output to temporary */
outf("t%d", subexpr->id);
} else {
assert(target_ident != NULL);
outf("%s", target_ident);
if (target_num >= 0) {
outf("%d", target_num);
}
}
}
/** Declared all temporary variables needed for an expression */
static void declare_expr_temps(struct Expr *expr,
const char *target_ident, int target_num)
{
for (; expr; expr = expr->rpnnext) {
/* Skip terminals that don't need any temporaries */
/* TODO */
/* Booleans operations have a preceding E_SEQPOINT. It has the same
ID/variable as the following boolean variable. */
if (expr->kind == E_SEQPOINT) continue;
/* Output type of sub-expression */
if (expr->rpnnext || target_ident) {
indent();
/* Declare the variable, except for boolean `or`/`and` which
share the ID with the preceding E_SEQPOINT and and hence
already declared at this point. */
outf("int");/* TODO use expr->tr */
/*emit_typeref_prefix(expr->typeref);*/
outc(' ');
emit_subexpr_varname(expr, target_ident, target_num);
/*emit_typeref_suffix(expr->typeref);*/
outf(";\n");
}
}
}
/*
* Emits an expression. The expression is stored as a linked list in
* RPN (Reverse Polish Notation) order, e.g. 1,2,3,*,+ means (1 (2 3 *) +)
*
* First, temporary variables are outputed for the subexpressions
* (in declare_expr_temps).
*
* Second, each operation is performed, and `goto`s are inserted for
* short-circuiting boolean operations (`and` and `or`).
*
* The final result may optionally be stored in the variable given by
* concatenating target_ident and target_num.
*/
/* FIXME is the typeref parameter needed? */
void emit_expr(const struct TypeRef *typeref,
const char *target_ident, int target_num,
struct Expr *expr)
{
int last_id = 0;
const char *op;
assert(expr != NULL);
declare_expr_temps(expr, target_ident, target_num);
for (; expr; expr = expr->rpnnext) {
/* Skip terminals that don't need any temporaries */
/* TODO */
indent();
if (expr->kind == E_SEQPOINT) {
/* Sequence point created by `and` or `or` */
struct Expr *boolop = expr->u.seqpoint_end;
assert(boolop->id == expr->id);
assert(boolop != NULL);
assert(boolop->kind == E_BOOL_AND || boolop->kind == E_BOOL_OR);
outf("if (%st%d) { ", boolop->kind == E_BOOL_AND ? "!" : "", last_id);
emit_subexpr_varname(boolop, target_ident, target_num);
outf(" = %s; goto bool_op_end_%d_%d; }\n",
boolop->kind == E_BOOL_AND ? "false" : "true", /* result */
stmt_id, boolop->id /* goto ID */
);
goto no_semicolon;
}
if (expr->rpnnext || target_ident) {
emit_subexpr_varname(expr, target_ident, target_num);
outf(" = ");
}
switch (expr->kind) {
case E_GROUP_TEMP:
ast_error("E_GROUP_TEMP remained after parsing");
case E_SEQPOINT:
unreachable();
/* Terminals - Scalar constants */
case E_NONE:
outf("NULL");
break;
case E_FALSE:
outf("false");
break;
case E_TRUE:
outf("true");
break;
case E_INTEGER:
if (expr->rpnnext && expr->rpnnext->kind == E_NEGATE) {
/* Special handling for minimum signed values,
because those cannot be represented directly in C,
since the minus sign is handled as a separate negation
operation on the absolute value after it. */
if (expr->u.intval.num == 0x8000000000000000) {
outf("(-9223372036854775807-1)");
break;
} else if (expr->u.intval.num == 0x80000000) {
outf("(-2147483647-1)");
break;
}
}
outf("%" PRIu64, expr->u.intval.num);
break;
case E_STRING:
outf("&strconst_%u", expr->u.strval->id);
break;
/* Terminals - Identifiers */
case E_IDENT:
/* TODO output class name as a prefix also */
assert(expr->u.ident.namelen != 0);
outf("%s", expr->u.ident.u.name);
break;
case E_MEMBER:
/* TODO */
break;
/* Terminals - Multi-argument */
case E_ARRAY:
/* TODO */
break;
case E_CALL: {
struct CallArg *arg;
assert(expr->u.call != NULL);
assert(expr->u.call->ident.namelen != 0);
outf("%s(", expr->u.call->ident.u.name);
for (arg = expr->u.call->args; arg; arg = arg->next) {
assert(arg->expr != expr);
assert(arg->expr->rpnnext != NULL);
emit_subexpr_varname(arg->expr, NULL, -1);
if (arg->next) outf(", ");
}
outf(")");
break; }
/* Unary operators */
case E_NEGATE:
/* TODO range checks? */
outf("-t%d", last_id);
break;
case E_BOOL_NOT:
outf("!t%d", last_id);
break;
/* Binary operators: Boolean (non-trivial due to short-circuiting) */
case E_BOOL_AND:
case E_BOOL_OR:
outf("t%d;\n", last_id);
indentf("bool_op_end_%d_%d: ", stmt_id, expr->id);
break;
/* Binary operators: Comparison (non-trivial due to signedness) */
case E_LESS:
op = "<";
goto compare_op;
case E_GREATER:
op = ">";
goto compare_op;
case E_LESS_EQUAL:
op = "<=";
goto compare_op;
case E_GREATER_EQUAL:
op = ">=";
compare_op:
/* TODO handle mixed signedness/bitness/overflow? */
goto binop;
/* Binary operators: Arithmetic (non-trivial due to range checks) */
case E_ADD:
/* TODO range checks? */
op = "+";
goto binop;
case E_SUB:
op = "-";
goto binop;
case E_MUL:
op = "*";
goto binop;
case E_DIV:
op = "/";
goto binop;
case E_MOD:
/* FIXME handle negative modulus the expected way (-1 % 4 == 3),
not the C way */
op = "%";
goto binop;
/* Binary operators: == != */
case E_EQUAL:
/* TODO string/struct/array comparison */
op = "==";
goto binop;
case E_NOT_EQUAL:
/* TODO string/struct/array comparison */
op = "!=";
goto binop;
binop:
assert(last_id >= 0);
outf("t%d %s t%d", expr->u.binary.left_id, op, last_id);
break;
case E_ASSIGN:
case E_ASSIGN_FINAL:
/* TODO needs special handling of lvalue.
can be nested, like `a=b=c=x` */
break;
default:
ast_error("unknown expr kind in AST");
}
outf(";\n");
no_semicolon:
last_id = expr->id;
}
}
|