/* * Outputs C expressions. * * Copyright © 2020-2025 Samuel Lidén Borell * * SPDX-License-Identifier: EUPL-1.2+ */ #include #include /* This one is not in C89 */ #include "compiler.h" #include "out.h" static void emit_string_data(const char *s, size_t len) { while (len) { char c = *s; if (c == '\\' || c == '"') { outc('\\'); outc(c); } else if (c >= ' ' && c < 0x7f) { outc(c); } else { outf("\\x%x", (unsigned char)c); if (len) { /* If the following character is a hex digit, then we need to split the string here */ char c1 = s[1]; if ((c1 >= '0' && c1 <= '9') || (c1 >= 'a' && c1 <= 'z') || (c1 >= 'A' && c1 <= 'Z')) { outf("\" \""); } } } s++; len--; } } /** Strings are splitted both for readability (to avoid extremely long lines), and also to avoid the 509 byte limit in C89. The longest possible escape sequence is 7 characters ('\xFF" "'), and 509/7 conveniently happens to be 72.7, so 70 is a good chunk size. */ #define STRCHUNK_SIZE 70 static void emit_string_header(struct ExprString *str) { size_t len = str->len; unsigned chunk_num = 0; outf( "static const struct {\n" " unsigned char info;\n"); if (len <= 0x7f) { /* length is stored in `info` */ } else if (len <= 0x17f) { outf(" unsigned char len;\n"); } else if (len <= 0x1017f) { outf(" unsigned uint_least16_t len;\n"); } else if (len <= 0x3fffffff) { outf(" unsigned uint_least32_t len;\n"); } else { /* Fairly arbitrary limit. Could probably be a lot lower */ ast_error("String constants may not exceed 1 GiB"); } while (len > 0) { unsigned chunksize = (len < STRCHUNK_SIZE ? (unsigned)len : STRCHUNK_SIZE); outf(" unsigned char chunk%u[%u];\n", chunk_num++, chunksize); len -= chunksize; } outf("} strconst_%u", str->id); } void emit_string_constants(void) { struct ExprString *str; for (str = string_constants; str != NULL; str = str->next) { size_t len; const char *strdata; emit_string_header(str); len = str->len; outf(" = {\n"); if (len <= 0x7f) { outf(" %u", len); } else if (len <= 0x17f) { outf(" SLUL_STRINFO_UPTO_0x17f, %u", len-0x80); } else if (len <= 0x1017f) { outf(" SLUL_STRINFO_UPTO_0x1017f, %" PRIuLEAST16, len-0x180); } else { outf(" SLUL_STRINFO_UPTO_0x10001017f, %" PRIuLEAST32, len-0x10180); } strdata = str->s; while (len > 0) { unsigned chunksize = (len < STRCHUNK_SIZE ? (unsigned)len : STRCHUNK_SIZE); outf(",\n \""); emit_string_data(strdata, chunksize); outc('"'); strdata += chunksize; len -= chunksize; } outf("\n};\n"); } } /** Emit the name of the destination variable for a subexpression. target_ident and target_num together make up an optional destination variable for the entire expression. */ static void emit_subexpr_varname(struct Expr *subexpr, const char *target_ident, int target_num) { if (subexpr->rpnnext) { /* Output to temporary */ outf("t%d", subexpr->id); } else { assert(target_ident != NULL); outf("%s", target_ident); if (target_num >= 0) { outf("%d", target_num); } } } /** Declared all temporary variables needed for an expression */ static void declare_expr_temps(struct Expr *expr, const char *target_ident, int target_num) { for (; expr; expr = expr->rpnnext) { /* Skip terminals that don't need any temporaries */ /* TODO */ /* Booleans operations have a preceding E_SEQPOINT. It has the same ID/variable as the following boolean variable. */ if (expr->kind == E_SEQPOINT) continue; /* Output type of sub-expression */ if (expr->rpnnext || target_ident) { indent(); /* Declare the variable, except for boolean `or`/`and` which share the ID with the preceding E_SEQPOINT and and hence already declared at this point. */ outf("int");/* TODO use expr->tr */ /*emit_typeref_prefix(expr->typeref);*/ outc(' '); emit_subexpr_varname(expr, target_ident, target_num); /*emit_typeref_suffix(expr->typeref);*/ outf(";\n"); } } } /* * Emits an expression. The expression is stored as a linked list in * RPN (Reverse Polish Notation) order, e.g. 1,2,3,*,+ means (1 (2 3 *) +) * * First, temporary variables are outputed for the subexpressions * (in declare_expr_temps). * * Second, each operation is performed, and `goto`s are inserted for * short-circuiting boolean operations (`and` and `or`). * * The final result may optionally be stored in the variable given by * concatenating target_ident and target_num. */ /* FIXME is the typeref parameter needed? */ void emit_expr(const struct TypeRef *typeref, const char *target_ident, int target_num, struct Expr *expr) { int last_id = 0; const char *op; assert(expr != NULL); declare_expr_temps(expr, target_ident, target_num); for (; expr; expr = expr->rpnnext) { /* Skip terminals that don't need any temporaries */ /* TODO */ indent(); if (expr->kind == E_SEQPOINT) { /* Sequence point created by `and` or `or` */ struct Expr *boolop = expr->u.seqpoint_end; assert(boolop->id == expr->id); assert(boolop != NULL); assert(boolop->kind == E_BOOL_AND || boolop->kind == E_BOOL_OR); outf("if (%st%d) { ", boolop->kind == E_BOOL_AND ? "!" : "", last_id); emit_subexpr_varname(boolop, target_ident, target_num); outf(" = %s; goto bool_op_end_%d_%d; }\n", boolop->kind == E_BOOL_AND ? "false" : "true", /* result */ stmt_id, boolop->id /* goto ID */ ); goto no_semicolon; } if (expr->rpnnext || target_ident) { emit_subexpr_varname(expr, target_ident, target_num); outf(" = "); } switch (expr->kind) { case E_GROUP_TEMP: ast_error("E_GROUP_TEMP remained after parsing"); case E_SEQPOINT: unreachable(); /* Terminals - Scalar constants */ case E_NONE: outf("NULL"); break; case E_FALSE: outf("false"); break; case E_TRUE: outf("true"); break; case E_INTEGER: if (expr->rpnnext && expr->rpnnext->kind == E_NEGATE) { /* Special handling for minimum signed values, because those cannot be represented directly in C, since the minus sign is handled as a separate negation operation on the absolute value after it. */ if (expr->u.intval.num == 0x8000000000000000) { outf("(-9223372036854775807-1)"); break; } else if (expr->u.intval.num == 0x80000000) { outf("(-2147483647-1)"); break; } } outf("%" PRIu64, expr->u.intval.num); break; case E_STRING: outf("&strconst_%u", expr->u.strval->id); break; /* Terminals - Identifiers */ case E_IDENT: /* TODO output class name as a prefix also */ assert(expr->u.ident.namelen != 0); outf("%s", expr->u.ident.u.name); break; case E_MEMBER: /* TODO */ break; /* Terminals - Multi-argument */ case E_ARRAY: /* TODO */ break; case E_CALL: { struct CallArg *arg; assert(expr->u.call != NULL); assert(expr->u.call->ident.namelen != 0); outf("%s(", expr->u.call->ident.u.name); for (arg = expr->u.call->args; arg; arg = arg->next) { assert(arg->expr != expr); assert(arg->expr->rpnnext != NULL); emit_subexpr_varname(arg->expr, NULL, -1); if (arg->next) outf(", "); } outf(")"); break; } /* Unary operators */ case E_NEGATE: /* TODO range checks? */ outf("-t%d", last_id); break; case E_BOOL_NOT: outf("!t%d", last_id); break; /* Binary operators: Boolean (non-trivial due to short-circuiting) */ case E_BOOL_AND: case E_BOOL_OR: outf("t%d;\n", last_id); indentf("bool_op_end_%d_%d: ", stmt_id, expr->id); break; /* Binary operators: Comparison (non-trivial due to signedness) */ case E_LESS: op = "<"; goto compare_op; case E_GREATER: op = ">"; goto compare_op; case E_LESS_EQUAL: op = "<="; goto compare_op; case E_GREATER_EQUAL: op = ">="; compare_op: /* TODO handle mixed signedness/bitness/overflow? */ goto binop; /* Binary operators: Arithmetic (non-trivial due to range checks) */ case E_ADD: /* TODO range checks? */ op = "+"; goto binop; case E_SUB: op = "-"; goto binop; case E_MUL: op = "*"; goto binop; case E_DIV: op = "/"; goto binop; case E_MOD: /* FIXME handle negative modulus the expected way (-1 % 4 == 3), not the C way */ op = "%"; goto binop; /* Binary operators: == != */ case E_EQUAL: /* TODO string/struct/array comparison */ op = "=="; goto binop; case E_NOT_EQUAL: /* TODO string/struct/array comparison */ op = "!="; goto binop; binop: assert(last_id >= 0); outf("t%d %s t%d", expr->u.binary.left_id, op, last_id); break; case E_ASSIGN: case E_ASSIGN_FINAL: /* TODO needs special handling of lvalue. can be nested, like `a=b=c=x` */ break; default: ast_error("unknown expr kind in AST"); } outf(";\n"); no_semicolon: last_id = expr->id; } }