/* * Outputs C expressions. * * Copyright © 2020-2025 Samuel Lidén Borell * * SPDX-License-Identifier: EUPL-1.2+ OR LGPL-2.1-or-later */ #include #include "compiler.h" #include "out.h" static void emit_string_data(const char *s, size_t len) { while (len) { char c = *s; if (c == '\\' || c == '"') { outc('\\'); outc(c); } else if (c >= ' ' && c < 0x7f && c != '?') { outc(c); } else { /* Binary, or `?` which could form a trigraph */ outf("\\x%x", (unsigned char)c); if (len) { /* If the following character is a hex digit, then we need to split the string here */ char c1 = s[1]; if ((c1 >= '0' && c1 <= '9') || (c1 >= 'a' && c1 <= 'z') || (c1 >= 'A' && c1 <= 'Z')) { outf("\" \""); } } } s++; len--; } } /** Strings are splitted both for readability (to avoid extremely long lines), and also to avoid the 509 byte limit in C89. The longest possible escape sequence is 7 characters ('\xFF" "'), and 509/7 conveniently happens to be 72.7, so 70 is a good chunk size. */ #define STRCHUNK_SIZE 70 static void emit_string_header(struct ExprString *str) { size_t len = str->len; unsigned chunk_num = 0; outf( "static const struct {\n" " unsigned char info;\n"); if (len <= 0x7f) { /* length is stored in `info` */ } else if (len <= 0x17f) { outf(" unsigned char len;\n"); } else { assert(len <= 0x1017f); outf(" unsigned short len;\n"); } while (len > 0) { unsigned chunksize = (len < STRCHUNK_SIZE ? (unsigned)len : STRCHUNK_SIZE); len -= chunksize; if (!len) { chunksize++; /* null terminator */ } outf(" unsigned char chunk%u[%u];\n", chunk_num++, chunksize); } outf("} strconst__%u", str->id); } void emit_string_constants(void) { struct ExprString *str; for (str = string_constants; str != NULL; str = str->next) { size_t len; const char *strdata; emit_string_header(str); len = str->len; if (len > 0x7FFE) { /* Better disallow huge strings constants. Also leave 1 byte to allow for and past-end-of-string index and/or a null terminator. */ ast_error("String too long. Maximum in bootstrap compiler " " is 32766 bytes."); } outf(" = {\n"); if (len <= 0x7f) { outf(" %u", len); } else if (len <= 0x17f) { outf(" SLUL_SL_UPTO_17F, %" SLUL_INT_FMT "U", (SlulInt)(len-0x80)); } else { assert(len <= 0x1017f); outf(" SLUL_SL_UPTO_1017F, %" SLUL_INT_FMT "U", (SlulInt)(len-0x180)); } strdata = str->s; while (len > 0) { unsigned chunksize = (len < STRCHUNK_SIZE ? (unsigned)len : STRCHUNK_SIZE); outf(",\n \""); emit_string_data(strdata, chunksize); outc('"'); strdata += chunksize; len -= chunksize; } outf("\n};\n"); } } /** Emit the name of the destination variable for a subexpression. target_ident and target_num together make up an optional destination variable for the entire expression. */ static void emit_subexpr_varname(struct Expr *subexpr, const char *target_ident, int target_num) { if (subexpr->rpnnext) { /* Output to temporary */ outf("t__%d", subexpr->id); } else { assert(target_ident != NULL); outf("%s", target_ident); if (target_num >= 0) { outf("%d", target_num); } } } /** Declared all temporary variables needed for an expression */ static void declare_expr_temps(struct Expr *expr, const char *target_ident, int target_num, bool declare_target) { assert(!declare_target || target_ident != NULL); for (; expr; expr = expr->rpnnext) { /* Skip terminals that don't need any temporaries */ /* TODO */ /* Booleans operations have a preceding E_SEQPOINT. It has the same ID/variable as the following boolean variable. */ if (expr->kind == E_SEQPOINT) continue; /* Output type of sub-expression */ if (expr->rpnnext || declare_target) { indent(); /* Declare the variable, except for boolean `or`/`and` which share the ID with the preceding E_SEQPOINT and and hence already declared at this point. */ emit_typeref_prefix(expr->typeref); outc(' '); emit_subexpr_varname(expr, target_ident, target_num); emit_typeref_suffix(expr->typeref); outf(";\n"); } } } void emit_subexpr(struct Expr *expr, int last_id) { const char *op; switch (expr->kind) { case E_GROUP_TEMP: ast_error("E_GROUP_TEMP remained after parsing"); case E_SEQPOINT: unreachable(); /* Terminals - Scalar constants */ case E_NONE: outf("NULL"); break; case E_FALSE: assert(expr->typeref != NULL); assert(is_const(&expr->typeref->u.num)); assert(expr->typeref->u.num.min == 0); outf("false"); break; case E_TRUE: assert(expr->typeref != NULL); assert(is_const(&expr->typeref->u.num)); assert(expr->typeref->u.num.min == 1); outf("true"); break; case E_THIS: outf("this"); break; case E_INTEGER: { const struct TypeRef *tr; tr = expr->typeref; assert(tr != NULL); assert(tr->kind == TR_INT); assert(is_const(&tr->u.num)); /* min == max */ outf("%" SLUL_INT_FMT "U", tr->u.num.min); break; } case E_STRING: outf("((const struct String *)&strconst__%u)", expr->u.strval->id); break; /* Terminals - Identifiers */ case E_INSTVAR: outf("this->"); /* Fall through */ case E_LOCALVAR: { const struct TreeNode *ident; assert(expr->u.ident.namelen == 0); ident = &expr->u.ident.u.var->ident.node; outf("%.*s", (int)ident->length, ident->name); break; } /* Terminals - Multi-argument */ case E_ARRAY: /* TODO */ break; case E_METHODCALL: /* This can be either a field access or a method call */ assert(expr->u.mcall != NULL); if (expr->u.mcall->is_field) { outf("t__%d->%s", last_id, expr->u.mcall->call.ident.u.name); } else { /* Method call */ struct Type *class_; assert(expr->u.mcall->call.ident.namelen == 0);/* = resolved */ assert(expr->u.mcall->object != NULL); assert(expr->u.mcall->object->typeref != NULL); assert(expr->u.mcall->object->typeref->kind == TR_CLASS); class_ = expr->u.mcall->object->typeref->u.class_; assert(class_ != NULL); emit_func_ident(expr->u.mcall->call.ident.u.func); outc('('); outf("t__%d", expr->u.mcall->object->id); if (expr->u.mcall->call.args) { outf(", "); } goto emit_args; } break; case E_CALL: { struct CallArg *arg; assert(expr->u.call != NULL); assert(expr->u.call->ident.namelen == 0); /* = resolved */ emit_func_ident(expr->u.call->ident.u.func); outc('('); emit_args: for (arg = expr->u.call->args; arg; arg = arg->next) { assert(arg->expr != expr); assert(arg->expr->rpnnext != NULL); emit_subexpr_varname(arg->expr, NULL, -1); if (arg->next) outf(", "); } outf(")"); break; } /* Unary operators */ case E_BOOL_NOT: outf("!t__%d", last_id); break; /* Binary operators: Boolean (non-trivial due to short-circuiting) */ case E_BOOL_AND: case E_BOOL_OR: outf("t__%d;\n", last_id); indentf("bool_op_end__%d_%d: ", stmt_id, expr->id); break; /* Binary operators: Comparison (non-trivial due to signedness) */ case E_LESS: op = "<"; goto compare_op; case E_GREATER: op = ">"; goto compare_op; case E_LESS_EQUAL: op = "<="; goto compare_op; case E_GREATER_EQUAL: op = ">="; compare_op: /* Normally we would have to handle mixed signedness/size here. But the bootstrap compiler only supports sign-less 32 bit types, so that is not necessary. */ goto binop; /* Binary operators: Arithmetic (non-trivial due to range checks) */ case E_ADD: /* TODO range checks? */ op = "+"; goto binop; case E_SUB: op = "-"; goto binop; case E_MUL: op = "*"; goto binop; case E_DIV: /* XXX how does C handle rounding? what does other proglangs do? C89: - integer division is implementation defined when either operand is negative. (both sign and rounding) - float conversion always strips fractional part (= rounds towards zero) Linux/glibc/x86_64: (logical, but has illogical results for % then) 3/2 = 1 -3/2 = -1 3/-2 = -1 -3/-2 = 1 Pythons: 3//2 = 1, -3//2 = -2 (!) 3//-2 = -2 (!) -3//-2 = 1 SLUL should at least have consistent behavior, and preferably logical behavior. */ op = "/"; goto binop; case E_MOD: /* Normally we would have to special-case for negative modulus. But signed types aren't supported in the bootstrap compiler */ op = "%"; goto binop; /* Binary operators: == != */ case E_EQUAL: /* TODO string/struct/array comparison */ op = "=="; goto binop; case E_NOT_EQUAL: /* TODO string/struct/array comparison */ op = "!="; goto binop; binop: assert(last_id >= 0); outf("t__%d %s t__%d", expr->u.binary.left_id, op, last_id); break; default: ast_error("unknown expr kind in AST"); } } /* * Emits an expression. The expression is stored as a linked list in * RPN (Reverse Polish Notation) order, e.g. 1,2,3,*,+ means (1 (2 3 *) +) * * First, temporary variables are outputed for the subexpressions * (in declare_expr_temps). * * Second, each operation is performed, and `goto`s are inserted for * short-circuiting boolean operations (`and` and `or`). * * The final expression may optionally be assigned to/from in the * variable given by concatenating target_ident and target_num. */ void emit_expr(const struct TypeRef *typeref, struct Expr *expr, const char *target_ident, int target_num, enum EmitExprMode mode) { int last_id = 0; assert(expr != NULL); typecheck_expr(typeref, expr); declare_expr_temps(expr, target_ident, target_num, (mode == DECLARE_VAR)); for (; expr; expr = expr->rpnnext) { /* Skip terminals that don't need any temporaries */ /* TODO */ indent(); if (expr->kind == E_SEQPOINT) { /* Sequence point created by `and` or `or` */ struct Expr *boolop = expr->u.seqpoint_end; assert(boolop->id == expr->id); assert(boolop != NULL); assert(boolop->kind == E_BOOL_AND || boolop->kind == E_BOOL_OR); outf("if (%st__%d) { ", boolop->kind == E_BOOL_AND ? "!" : "", last_id); emit_subexpr_varname(boolop, target_ident, target_num); outf(" = %s; goto bool_op_end__%d_%d; }\n", boolop->kind == E_BOOL_AND ? "false" : "true", /* result */ stmt_id, boolop->id /* goto ID */ ); goto no_semicolon; } if (expr->rpnnext || mode == ASSIGN_TO_VAR || mode == DECLARE_VAR) { emit_subexpr_varname(expr, target_ident, target_num); outf(" = "); } emit_subexpr(expr, last_id); if (mode == ASSIGN_FROM_VAR && !expr->rpnnext) { outf(" = %s%d", target_ident, target_num); } outf(";\n"); no_semicolon: last_id = expr->id; } }