/* * Outputs C expressions. * * Copyright © 2020-2025 Samuel Lidén Borell * * SPDX-License-Identifier: EUPL-1.2+ OR LGPL-2.1-or-later */ #include #include "compiler.h" #include "out.h" static void emit_string_data(const char *s, size_t len) { while (len) { char c = *s; if (c == '\\' || c == '"') { outc('\\'); outc(c); } else if (c >= ' ' && c < 0x7f && c != '?') { outc(c); } else { /* Binary, or `?` which could form a trigraph */ outf("\\x%x", (unsigned char)c); if (len) { /* If the following character is a hex digit, then we need to split the string here */ char c1 = s[1]; if ((c1 >= '0' && c1 <= '9') || (c1 >= 'a' && c1 <= 'z') || (c1 >= 'A' && c1 <= 'Z')) { outf("\" \""); } } } s++; len--; } } /** Strings are splitted both for readability (to avoid extremely long lines), and also to avoid the 509 byte limit in C89. The longest possible escape sequence is 7 characters ('\xFF" "'), and 509/7 conveniently happens to be 72.7, so 70 is a good chunk size. */ #define STRCHUNK_SIZE 70 static void emit_string_header(struct ExprString *str) { size_t len = str->len; unsigned chunk_num = 0; outf( "static const struct {\n" " unsigned char info;\n"); if (len <= 0x7f) { /* length is stored in `info` */ } else if (len <= 0x17f) { outf(" unsigned char len;\n"); } else if (len <= 0x1017f) { outf(" unsigned short len;\n"); } else { ast_error("The bootstrap compiler does not support string " "constants larger than 65919 bytes"); } while (len > 0) { unsigned chunksize = (len < STRCHUNK_SIZE ? (unsigned)len : STRCHUNK_SIZE); outf(" unsigned char chunk%u[%u];\n", chunk_num++, chunksize); len -= chunksize; } outf("} strconst__%u", str->id); } void emit_string_constants(void) { struct ExprString *str; for (str = string_constants; str != NULL; str = str->next) { size_t len; const char *strdata; emit_string_header(str); len = str->len; if (len > SLUL_INT_MAX) { ast_error("String too long"); } outf(" = {\n"); if (len <= 0x7f) { outf(" %u", len); } else if (len <= 0x17f) { outf(" SLUL_SL_UPTO_17F, %" SLUL_INT_FMT "U", (SlulInt)(len-0x80)); } else if (len <= 0x1017f) { outf(" SLUL_SL_UPTO_1017F, %" SLUL_INT_FMT "U", (SlulInt)(len-0x180)); } else { assert(0); } strdata = str->s; while (len > 0) { unsigned chunksize = (len < STRCHUNK_SIZE ? (unsigned)len : STRCHUNK_SIZE); outf(",\n \""); emit_string_data(strdata, chunksize); outc('"'); strdata += chunksize; len -= chunksize; } outf("\n};\n"); } } /** Emit the name of the destination variable for a subexpression. target_ident and target_num together make up an optional destination variable for the entire expression. */ static void emit_subexpr_varname(struct Expr *subexpr, const char *target_ident, int target_num) { if (subexpr->rpnnext) { /* Output to temporary */ outf("t__%d", subexpr->id); } else { assert(target_ident != NULL); outf("%s", target_ident); if (target_num >= 0) { outf("%d", target_num); } } } /** Declared all temporary variables needed for an expression */ static void declare_expr_temps(struct Expr *expr, const char *target_ident, int target_num, bool declare_target) { assert(!declare_target || target_ident != NULL); for (; expr; expr = expr->rpnnext) { /* Skip terminals that don't need any temporaries */ /* TODO */ /* Booleans operations have a preceding E_SEQPOINT. It has the same ID/variable as the following boolean variable. */ if (expr->kind == E_SEQPOINT) continue; /* Output type of sub-expression */ if (expr->rpnnext || declare_target) { indent(); /* Declare the variable, except for boolean `or`/`and` which share the ID with the preceding E_SEQPOINT and and hence already declared at this point. */ emit_typeref_prefix(expr->typeref); outc(' '); emit_subexpr_varname(expr, target_ident, target_num); emit_typeref_suffix(expr->typeref); outf(";\n"); } } } /* * Emits an expression. The expression is stored as a linked list in * RPN (Reverse Polish Notation) order, e.g. 1,2,3,*,+ means (1 (2 3 *) +) * * First, temporary variables are outputed for the subexpressions * (in declare_expr_temps). * * Second, each operation is performed, and `goto`s are inserted for * short-circuiting boolean operations (`and` and `or`). * * The final expression may optionally be assigned to/from in the * variable given by concatenating target_ident and target_num. */ void emit_expr(const struct TypeRef *typeref, struct Expr *expr, const char *target_ident, int target_num, enum EmitExprMode mode) { int last_id = 0; const char *op; assert(expr != NULL); typecheck_expr(typeref, expr); declare_expr_temps(expr, target_ident, target_num, (mode == DECLARE_VAR)); for (; expr; expr = expr->rpnnext) { /* Skip terminals that don't need any temporaries */ /* TODO */ indent(); if (expr->kind == E_SEQPOINT) { /* Sequence point created by `and` or `or` */ struct Expr *boolop = expr->u.seqpoint_end; assert(boolop->id == expr->id); assert(boolop != NULL); assert(boolop->kind == E_BOOL_AND || boolop->kind == E_BOOL_OR); outf("if (%st__%d) { ", boolop->kind == E_BOOL_AND ? "!" : "", last_id); emit_subexpr_varname(boolop, target_ident, target_num); outf(" = %s; goto bool_op_end__%d_%d; }\n", boolop->kind == E_BOOL_AND ? "false" : "true", /* result */ stmt_id, boolop->id /* goto ID */ ); goto no_semicolon; } if (expr->rpnnext || mode == ASSIGN_TO_VAR || mode == DECLARE_VAR) { emit_subexpr_varname(expr, target_ident, target_num); outf(" = "); } switch (expr->kind) { case E_GROUP_TEMP: ast_error("E_GROUP_TEMP remained after parsing"); case E_SEQPOINT: unreachable(); /* Terminals - Scalar constants */ case E_NONE: outf("NULL"); break; case E_FALSE: assert(expr->typeref != NULL); assert(is_const(&expr->typeref->u.num)); assert(expr->typeref->u.num.min == 0); outf("false"); break; case E_TRUE: assert(expr->typeref != NULL); assert(is_const(&expr->typeref->u.num)); assert(expr->typeref->u.num.min == 1); outf("true"); break; case E_INTEGER: { const struct TypeRef *tr; tr = expr->typeref; assert(tr != NULL); assert(tr->kind == TR_INT); assert(is_const(&tr->u.num)); /* min == max */ outf("%" SLUL_INT_FMT "U", tr->u.num.min); break; } case E_STRING: outf("((const struct String *)&strconst__%u)", expr->u.strval->id); break; /* Terminals - Identifiers */ case E_IDENT: { const struct TreeNode *ident; assert(expr->u.ident.namelen == 0); ident = &expr->u.ident.u.var->ident.node; outf("%.*s", (int)ident->length, ident->name); break; } case E_MEMBER: /* TODO */ break; /* Terminals - Multi-argument */ case E_ARRAY: /* TODO */ break; case E_CALL: { struct CallArg *arg; assert(expr->u.call != NULL); assert(expr->u.call->ident.namelen != 0); outf("%s(", expr->u.call->ident.u.name); for (arg = expr->u.call->args; arg; arg = arg->next) { assert(arg->expr != expr); assert(arg->expr->rpnnext != NULL); emit_subexpr_varname(arg->expr, NULL, -1); if (arg->next) outf(", "); } outf(")"); break; } /* Unary operators */ case E_BOOL_NOT: outf("!t__%d", last_id); break; /* Binary operators: Boolean (non-trivial due to short-circuiting) */ case E_BOOL_AND: case E_BOOL_OR: outf("t__%d;\n", last_id); indentf("bool_op_end__%d_%d: ", stmt_id, expr->id); break; /* Binary operators: Comparison (non-trivial due to signedness) */ case E_LESS: op = "<"; goto compare_op; case E_GREATER: op = ">"; goto compare_op; case E_LESS_EQUAL: op = "<="; goto compare_op; case E_GREATER_EQUAL: op = ">="; compare_op: /* TODO handle mixed signedness/bitness/overflow? */ goto binop; /* Binary operators: Arithmetic (non-trivial due to range checks) */ case E_ADD: /* TODO range checks? */ op = "+"; goto binop; case E_SUB: op = "-"; goto binop; case E_MUL: op = "*"; goto binop; case E_DIV: /* XXX how does C handle rounding? what does other proglangs do? C89: - integer division is implementation defined when either operand is negative. (both sign and rounding) - float conversion always strips fractional part (= rounds towards zero) Linux/glibc/x86_64: (logical, but has illogical results for % then) 3/2 = 1 -3/2 = -1 3/-2 = -1 -3/-2 = 1 Pythons: 3//2 = 1, -3//2 = -2 (!) 3//-2 = -2 (!) -3//-2 = 1 SLUL should at least have consistent behavior, and preferably logical behavior. */ op = "/"; goto binop; case E_MOD: /* FIXME handle negative modulus the expected way (-1 % 4 == 3)? but see above (although it has to do with rounding). In C89, it's implementation-defined, but on Linux/glibc/x86_64: -1 % 4 = -1 */ op = "%"; goto binop; /* Binary operators: == != */ case E_EQUAL: /* TODO string/struct/array comparison */ op = "=="; goto binop; case E_NOT_EQUAL: /* TODO string/struct/array comparison */ op = "!="; goto binop; binop: assert(last_id >= 0); outf("t__%d %s t__%d", expr->u.binary.left_id, op, last_id); break; default: ast_error("unknown expr kind in AST"); } if (mode == ASSIGN_FROM_VAR && !expr->rpnnext) { outf(" = %s%d", target_ident, target_num); } outf(";\n"); no_semicolon: last_id = expr->id; } }