/* ctrans.c -- C code generator for LRL bootstrap transpiler Copyright © 2020-2021 Samuel Lidén Borell Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "bootstrap.h" #include #include enum PrintExpr { /** Never put parentheses around expr */ NO_PAREN = 0x1, /** Only put parentheses around if there's a comma expression */ COMMA_PAREN = 0x2 }; static FILE *out; static int indentlevel; static struct Type *returntype; static void print_innermost_type(const struct Type *type); static void print_type_prefix(const struct Type *type); static void print_type_suffix(const struct Type *type); static void print_type(const struct Type *type, const char *name); static void trans_typedef(const struct Ident *ident); static void print_expr(const struct Expr *expr, enum PrintExpr flags); static void print_expr_temps(struct Expr *expr, struct TypeRef *tr); static void print_block(struct Block *block); static void print_loopblock(struct Block *block, int has_loopempty); static void start_endempty(struct Block *end, struct Block *empty); static void end_endempty(struct Block *end, struct Block *empty, struct LoopInfo *loop); void output_set_file(FILE *outfile) { out = outfile; } static void indent() { int spaces = 4*indentlevel; while (spaces--) putc(' ', out); } static void print_brace(int ch) { indent(); putc(ch, out); putc('\n', out); } static void print_fieldsorparams(const struct FieldOrParamList *fps, const char *separator, int printlast) { struct FieldOrParam *fp; for (fp = fps->first; fp; fp = fp->list_next) { print_type(&fp->type, fp->name); if (printlast || fp->list_next) fputs(separator, out); } } static void print_struct(const struct Type *type, const char *name) { if (name) fprintf(out, "struct %s {\n", name); else fprintf(out, "struct {\n"); print_fieldsorparams(type->kind.fields, ";\n", 1); putc('}', out); } static void print_enum(const struct Type *type, const char *name) { struct EnumValue *ev; /* FIXME how to handle base types? */ fprintf(out, "enum %s {\n", name); for (ev = type->kind.enu->values; ev; ev = ev->next) { fputs(ev->name, out); if (ev->value) { fputs(" = ", out); print_expr(ev->value, COMMA_PAREN); } if (ev->next) fputs(",\n", out); } fprintf(out, "};\n"); } static void print_elementary_type(unsigned int builtin) { const char *name; int unsignd = 0; switch (builtin) { case KEYWORD_TO_BUILTIN(KW_Void): name = "void"; break; case KEYWORD_TO_BUILTIN(KW_Bool): name = "int"; break; case KEYWORD_TO_BUILTIN(KW_USize): name = "size_t"; break; case KEYWORD_TO_BUILTIN(KW_SSize): name = "ssize_t"; break; case KEYWORD_TO_BUILTIN(KW_FileOffs): name = "off_t"; break; case KEYWORD_TO_BUILTIN(KW_Byte): unsignd = 1; /* fall through */ case KEYWORD_TO_BUILTIN(KW_Char): name = "char"; break; case KEYWORD_TO_BUILTIN(KW_UShort): case KEYWORD_TO_BUILTIN(KW_Short): unsignd = 1; /* fall through */ case KEYWORD_TO_BUILTIN(KW_WUShort): name = "short"; break; case KEYWORD_TO_BUILTIN(KW_UInt): case KEYWORD_TO_BUILTIN(KW_WUInt): unsignd = 1; /* fall through */ case KEYWORD_TO_BUILTIN(KW_Int): name = "int"; break; case KEYWORD_TO_BUILTIN(KW_ULong): case KEYWORD_TO_BUILTIN(KW_WULong): unsignd = 1; /* fall through */ case KEYWORD_TO_BUILTIN(KW_Long): name = "long"; break; case KEYWORD_TO_BUILTIN(KW_Int32): name = "int32_t"; break; case KEYWORD_TO_BUILTIN(KW_UInt32): case KEYWORD_TO_BUILTIN(KW_WUInt32): name = "uint32_t"; break; default: fprintf(stderr, "Internal compiler error\n"); abort(); } if (unsignd) fputs("unsigned ", out); fprintf(out, "%s", name); } static const struct Type *determine_ref_type(const struct Type *type, const char **nameptr) { const struct Type *reftype = &type->kind.ident->decl.type; *nameptr = type->kind.ident->name; while (reftype->type == T_IDENT) { *nameptr = reftype->kind.ident->name; reftype = &reftype->kind.ident->decl.type; } return reftype; } static void print_innermost_type(const struct Type *type) { deeper: switch (type->type) { case T_STRUCT: print_struct(type, NULL); break; case T_ENUM: print_enum(type, NULL); break; case T_REF: /* TODO special handling for private types (should be void*) */ type = type->kind.nested; goto deeper; case T_ARRAY: type = ARR_ELEM_TYPE(type); goto deeper; case T_OPTIONAL: /* Pointers are always optional in C, and enums support an -1 value, so nothing needs to be done here. IF we would support C99 bool, then we would need some kind of special handling. */ type = type->kind.nested; goto deeper; case T_IDENT: { /* For struct and enum types, we can use the tag. For other types we simply print the referenced type. */ const char *name; const struct Type *reftype = determine_ref_type(type, &name); unsigned typeoftype = reftype->type; if (typeoftype == T_STRUCT) { fprintf(out, "struct %s", name); } else if (typeoftype == T_ENUM) { fprintf(out, "enum %s", name); } else { type = reftype; goto deeper; } break; } case T_ELMNTRY: print_elementary_type(type->misc); break; case T_FUNC: /* TODO handle array return? */ type = &type->kind.func->returntype; goto deeper; case T_BITS: /* TODO */ break; } } static void print_type_prefix(const struct Type *type) { switch (type->type) { case T_STRUCT: case T_ENUM: case T_ELMNTRY: break; case T_REF: print_type_prefix(type->kind.nested); /* TODO don't print for private types */ putc('*', out); putc('(', out); break; case T_ARRAY: print_type_prefix(ARR_ELEM_TYPE(type)); putc('(', out); break; case T_OPTIONAL: /* Pointers are always optional in C, and enums support an -1 value, so nothing needs to be done here. IF we would support C99 bool, then we would need some kind of special handling. */ print_type_prefix(type->kind.nested); break; case T_IDENT: { const char *name; const struct Type *reftype = determine_ref_type(type, &name); unsigned typeoftype = reftype->type; if (typeoftype != T_STRUCT && typeoftype != T_ENUM) { print_type_prefix(reftype); } break; } case T_FUNC: /* TODO handle array return? */ putc('(', out); print_type_prefix(&type->kind.func->returntype); putc('(', out); break; case T_BITS: /* TODO */ break; } } static void print_type_suffix(const struct Type *type) { switch (type->type) { case T_STRUCT: case T_ENUM: case T_ELMNTRY: break; case T_REF: putc(')', out); print_type_prefix(type->kind.nested); /* TODO don't print for private types */ break; case T_ARRAY: putc(')', out); /* TODO undefined- or runtime-length arrays should be translated to a pointer */ putc('[', out); /* TODO length */ putc(']', out); print_type_suffix(ARR_ELEM_TYPE(type)); break; case T_OPTIONAL: /* Pointers are always optional in C, and enums support an -1 value, so nothing needs to be done here. IF we would support C99 bool, then we would need some kind of special handling. */ print_type_suffix(type->kind.nested); break; case T_IDENT: { const char *name; const struct Type *reftype = determine_ref_type(type, &name); unsigned typeoftype = reftype->type; if (typeoftype != T_STRUCT && typeoftype != T_ENUM) { print_type_suffix(reftype); } break; } case T_FUNC: putc(')', out); putc('(', out); if (type->kind.func->params.count) { print_fieldsorparams(&type->kind.func->params, ", ", 0); /* TODO varargs (if we should support varargs) */ } else { fputs("void", out); } fputs("))", out); /* TODO handle array return? */ print_type_suffix(&type->kind.func->returntype); break; case T_BITS: /* TODO */ break; } } static void print_type(const struct Type *type, const char *name) { print_innermost_type(type); putc(' ', out); print_type_prefix(type); fputs(name, out); print_type_suffix(type); } /* we translate type definitions to type tags */ static void trans_typedef(const struct Ident *ident) { fprintf(out, "/* type: %s */\n", ident->name); /* XXX debugging output */ switch (ident->decl.type.type) { case T_STRUCT: print_struct(&ident->decl.type, ident->name); fprintf(out, ";\n"); break; case T_ENUM: print_enum(&ident->decl.type, ident->name); fprintf(out, ";\n"); break; case T_REF: case T_ARRAY: case T_OPTIONAL: case T_IDENT: case T_ELMNTRY: case T_FUNC: case T_BITS: /* Included as is. Does not need a type tag */ break; } } static void print_unop_temps(struct Expr *expr, struct TypeRef *tr) { /* print_expr_temps(expr->a.expr, NULL);*/ /* TODO */ } static void print_binop_temps(struct Expr *expr, struct TypeRef *tr) { /*print_expr_temps(expr->a.expr, NULL); print_expr_temps(expr->b.expr, NULL);*/ /* TODO */ } static void print_expr_temps(struct Expr *expr, struct TypeRef *tr) { if (!expr) return; /* TODO determine type and set expr->tmp.tr */ /* TODO or should we check types in the parser in the 2nd pass? - initial values of declarations should be ready in this pass. - globals referenced from function bodies should be ready - locals referenced from function bodies should also be ready */ expr->tmp.tr = *tr; switch (expr->type) { case E_UNARYOP: print_unop_temps(expr, tr); break; case E_BINARYOP: print_binop_temps(expr, tr); break; case E_CONDITIONAL: /* TODO */ break; case E_CONDCHOICES: /* TODO */ break; case E_LOCALIDENT: expr->tmp.tr = root_tr(&expr->a.localident->type); goto compare_tr; case E_GLOBALIDENT: expr->tmp.tr = root_tr(&expr->a.ident->decl.type); compare_tr: check_compat(expr->a.localident->name, &expr->tmp.tr, tr); break; case E_NUMBER: case E_STRING: case E_NONE: case E_UNDEF: require_tr(expr, "Literals need an explicit type"); break; case E_ARRAY: { struct ExprElement *elem; struct TypeRef arrtr, elemtr; require_tr(expr, "Literals need an explicit type"); arrtr = require_expr_type(expr, T_ARRAY); elemtr = get_elem_tr(&arrtr); /* TODO these should have target types */ for (elem = &expr->a.exprlist->first; elem; elem = elem->next) { print_expr_temps(elem->expr, &elemtr); } break; } case E_STRUCT: { struct ExprElement *elem; struct TypeRef structtr; require_tr(expr, "Literals need an explicit type"); structtr = require_expr_type(expr, T_STRUCT); /* TODO have a special struct expr list? */ /* TODO these should have target types */ for (elem = &expr->a.exprlist->first; elem; elem = elem->next) { /*print_expr_temps(elem->expr, NULL);*/ } break; } case E_INDEX: { struct ExprElement *elem; struct TypeRef arrtr; if (tr) { /* TODO get type of element, and make array tr */ print_expr_temps(expr->b.expr, &arrtr); } else { print_expr_temps(expr->b.expr, NULL); } require_expr_type(expr->b.expr, T_ARRAY); for (elem = &expr->a.exprlist->first; elem; elem = elem->next) { /* TODO custom index types */ print_expr_temps(elem->expr, &builtin_tr[BT_USIZE]); } break; } case E_CALL: { struct ExprElement *elem; struct TypeRef functr, returntr; struct FunctionType *func; struct FieldOrParam *param; print_expr_temps(expr->b.expr, NULL); functr = require_expr_type(expr->b.expr, T_FUNC); /* TODO and allow ptr-to-function? */ if (!functr.type) break; func = functr.type->kind.func; returntr = nested_tr(&func->returntype, &functr); check_compat(NULL/*TODO include function name?*/, &returntr, tr); for (elem = &expr->a.exprlist->first, param = func->params.first; elem && param; elem = elem->next, param = param->list_next) { struct TypeRef paramtr = nested_tr(¶m->type, &functr); print_expr_temps(elem->expr, ¶mtr); } if (param && func->params.count) { error_linecol(-1, -1, "Too few function parameters"); } else if (elem) { error_linecol(-1, -1, "Too many function parameters"); } expr->tmp.tr = functr; break; } } } static void print_exprlist(const struct ExprList *exprlist) { const struct ExprElement *elem = &exprlist->first; for (; elem; elem = elem->next) { print_expr(elem->expr, COMMA_PAREN); if (elem->next) fprintf(out, ", "); } } static const char c_ops[][2+1] = { "++", "--", "+", "-", "*", "/", "%", /* FIXME how to translate modulus operation for negative numbers? */ "!", "&&", "||", "==", "!=", "<", "<=", ">", ">=", "=", "+=", "-=", "*=", "/=", ".", "?" }; static void print_unary_op(unsigned op, const struct Expr *a, enum PrintExpr flags) { switch (op) { /* TODO should we have inc/dec operators */ case OP_INC: case OP_DEC: fputs(c_ops[op], out); print_expr(a, 0); break; /* TODO */ } } static void print_binary_op(unsigned op, const struct Expr *a, const struct Expr *b, enum PrintExpr flags) { switch (op) { case OP_ADD: case OP_SUB: case OP_MUL: case OP_DIV: case OP_MOD: /* FIXME modulus for negative numbers? */ case OP_EQ: case OP_NOTEQ: case OP_LESS: case OP_LEQ: case OP_GREATER: case OP_GEQ: if (!(flags & (NO_PAREN|COMMA_PAREN))) putc('(', out); print_expr(a, 0); fputs(c_ops[op], out); print_expr(b, 0); if (!(flags & (NO_PAREN|COMMA_PAREN))) putc(')', out); break; case OP_ASSIGN: case OP_ADDASSIGN: case OP_SUBASSIGN: case OP_MULASSIGN: case OP_DIVASSIGN: /* These are only allowed as root expressions, so no parantheses are needed. */ print_expr(a, 0); fputs(c_ops[op], out); print_expr(b, 0); break; case OP_MEMBER: print_expr(b, 0); fputs(c_ops[op], out); /* TODO */ break; } } static void print_expr(const struct Expr *expr, enum PrintExpr flags) { /* TODO need to print type casts, because LRL uses target types and C uses type promotion. */ if (!expr) return; switch (expr->type) { case E_UNARYOP: print_unary_op(expr->op, expr->a.expr, flags); break; case E_BINARYOP: /* TODO need to know: - types of operands first (for comparisons, both integer and non-scalar) - left operand OR target type for assignment */ print_binary_op(expr->op, expr->a.expr, expr->b.expr, flags); break; case E_CONDITIONAL: /* TODO */ break; case E_CONDCHOICES: /* TODO */ break; case E_LOCALIDENT: fputs(expr->a.localident->name, out); break; case E_GLOBALIDENT: fputs(expr->a.ident->name, out); break; case E_NUMBER: { /* TODO - We only need to handle 8, 16 and 32 bit types in the transpiler - If we don't need 16-bit target support we can ignore -INTMAX issues. - Do we need to translate hex to dec? - Do we need to translate float somehow? - Binary numbers need translation. */ const char *s = expr->a.value->data; size_t len = expr->a.value->length; while (len--) { char ch = *(s++); if (ch != '_') fputc(ch, out); } break; } case E_STRING: /* TODO */ break; case E_NONE: /* TODO enums/ints with a none value */ fprintf(out, "NULL"); break; case E_UNDEF: /* TODO */ break; case E_ARRAY: /* TODO */ break; case E_STRUCT: /* TODO */ break; case E_CALL: print_expr(expr->b.expr, flags); putc('(', out); print_exprlist(expr->a.exprlist); putc(')', out); break; } } static void trans_funcdef(const struct Ident *ident) { /*struct Type type;*/ fprintf(out, "/* func: %s */\n", ident->name); /* XXX debugging output */ print_type(&ident->decl.type, ident->name); fprintf(out, ";\n"); } static void trans_datadef(const struct Ident *ident) { fprintf(out, "/* data: %s */\n", ident->name); /* XXX debugging output */ print_type(&ident->decl.type, ident->name); fprintf(out, ";\n"); } static int is_typedef_ready(const struct Type *type) { deeper: switch (type->type) { case T_REF: case T_ENUM: case T_OPTIONAL: case T_ELMNTRY: case T_FUNC: case T_BITS: return 1; case T_ARRAY: if (type->misc == M_LONG_ARR_LENGTH) { type = &type->kind.arr->elemtype; goto deeper; } else { type = type->kind.nested; goto deeper; } case T_STRUCT: { struct FieldOrParam *field = type->kind.fields->first; for (; field; field = field->list_next) { if (!is_typedef_ready(&field->type)) return 0; } return 1; } case T_IDENT: return type->kind.ident->decl.type.deftype != D_MARKED; default: internal_error(9); } } int output_dependencies(void) { /* First, output all struct and enum type definitions (other kinds of types are simply replaced with the definition). A waiting list is used to queue structs that depend on later defined struct types. */ struct Ident *first_type = ident_list(D_TYPEDEF); struct Ident *waiting = NULL; struct Ident *ident = first_type; for (; ident; ident = ident->chain_next) { ident->decl.type.deftype = D_MARKED; /* mark as not defined */ } ident = first_type; while (ident) { if (is_typedef_ready(&ident->decl.type)) { /* Mark as done and perform translation */ ident->decl.type.deftype = D_TYPEDEF; trans_typedef(ident); ident = ident->chain_next; } else { /* This type depends on a marked (not processed) type. Put it on the waiting list. */ struct Ident *next = ident->chain_next; ident->chain_next = waiting; waiting = ident; ident = next; } if (!ident) { ident = waiting; waiting = NULL; if (!ident) break; } } /* Second, output all function definitions (but not function bodies yet, since they may call other functions) */ ident = ident_list(D_FUNCDEF); for (; ident; ident = ident->chain_next) { trans_funcdef(ident); } /* Third, output all data definitions */ ident = ident_list(D_DATADEF); for (; ident; ident = ident->chain_next) { trans_datadef(ident); } /* TODO */ return !ferror(out); } static void print_vardef(struct Type *type, char *name) { indent(); print_type(type, name); fprintf(out, ";\n"); } static void print_stmt_decls(struct Stmt *stmt) { switch (stmt->type) { case S_BLOCK: /* Variables will be declared inside the block */ case S_FOR: /* dito */ case S_BREAK: case S_CONT: case S_GOTO: case S_LABEL: case S_UNREACH: case S_NOP: break; case S_DECL: { struct LocalIdent *localvar = stmt->kind.localvar; struct TypeRef tr = root_tr(&localvar->type); print_vardef(&localvar->type, localvar->name); print_expr_temps(localvar->initval, &tr); break; } case S_EXPR: /*case S_ASSIGN:*/ /* FIXME should we have this? */ /* TODO determine type of expr */ /*print_expr_temps(stmt->kind.expr, NULL);*/ break; case S_RETURN: { struct TypeRef tr = root_tr(returntype); print_expr_temps(stmt->kind.expr, &tr); break; } case S_IF: print_expr_temps(stmt->kind.ifstm->cond, &builtin_tr[BT_BOOL]); break; case S_WHILE: case S_DOWHILE: print_expr_temps(stmt->kind.whilestm->cond, &builtin_tr[BT_BOOL]); break; case S_ASSERT: print_expr_temps(stmt->kind.expr, &builtin_tr[BT_BOOL]); break; case S_SWITCH: /* TODO determine type of expr */ /*print_expr_temps(stmt->kind.switch->expr, NULL);*/ break; } } static void print_stmt(struct Stmt *stmt) { switch (stmt->type) { case S_BLOCK: print_block(stmt->kind.block); break; case S_DECL: /* TODO print assign expr */ break; case S_EXPR: indent(); print_expr(stmt->kind.expr, NO_PAREN); fprintf(out, ";\n"); break; case S_RETURN: indent(); fprintf(out, "return "); print_expr(stmt->kind.expr, NO_PAREN); fprintf(out, ";\n"); break; case S_BREAK: indent(); fprintf(out, "goto _LRL_loopout%lu;\n", stmt->kind.loopinfo->gotoid); break; case S_CONT: indent(); fprintf(out, "continue;\n"); break; case S_GOTO: indent(); fprintf(out, "goto %s;\n", stmt->kind.gotoident->name); break; case S_LABEL: { /* Half-deindentation for readability */ int spaces = 4*indentlevel - 2; while (spaces--) putc(' ', out); /* The trailing semicolon is required when the goto appears at the end of a {} block */ fprintf(out, "%s:;\n", stmt->kind.gotoident->name); break; } case S_IF: { struct CtlIf *ctl = stmt->kind.ifstm; indent(); fprintf(out, "if ("); print_expr(ctl->cond, NO_PAREN); fprintf(out, ")\n"); print_block(&ctl->true_block); if (ctl->false_block) { indent(); fprintf(out, "else\n"); print_block(ctl->false_block); } break; } case S_WHILE: { struct CtlWhile *ctl = stmt->kind.whilestm; start_endempty(ctl->end, ctl->empty); indent(); fprintf(out, "while ("); print_expr(ctl->cond, NO_PAREN); fprintf(out, ")\n"); print_loopblock(&ctl->block, ctl->empty!=NULL); end_endempty(ctl->end, ctl->empty, &ctl->loopinfo); break; } case S_DOWHILE: { struct CtlWhile *ctl = stmt->kind.whilestm; start_endempty(ctl->end, NULL); indent(); fprintf(out, "do\n"); print_loopblock(&ctl->block, ctl->empty!=NULL); indent(); fprintf(out, "while ("); print_expr(ctl->cond, NO_PAREN); fprintf(out, ");\n"); end_endempty(ctl->end, NULL, &ctl->loopinfo); break; } case S_FOR: case S_SWITCH: case S_ASSERT: case S_UNREACH: /* TODO */ break; case S_NOP: /* No operation */ break; } } static void print_block(struct Block *block) { struct Stmt *stmt; print_brace('{'); indentlevel++; if (block) { for (stmt = &block->stmt; stmt; stmt = stmt->next) { print_stmt_decls(stmt); } for (stmt = &block->stmt; stmt; stmt = stmt->next) { print_stmt(stmt); } } indentlevel--; print_brace('}'); } static void print_loopblock(struct Block *block, int has_loopempty) { struct Stmt *stmt; print_brace('{'); indentlevel++; if (block) { for (stmt = &block->stmt; stmt; stmt = stmt->next) { print_stmt_decls(stmt); } } if (has_loopempty) { indent(); fprintf(out, "_LRL_loopempty = 0;\n"); } if (block) { for (stmt = &block->stmt; stmt; stmt = stmt->next) { print_stmt(stmt); } } indentlevel--; print_brace('}'); } static void start_endempty(struct Block *end, struct Block *empty) { if (!end && !empty) return; if (empty) { print_brace('{'); indentlevel++; indent(); fprintf(out, "int _LRL_loopempty = 1;\n"); } } static void end_endempty(struct Block *end, struct Block *empty, struct LoopInfo *loop) { if (end) { print_block(end); } if (empty) { indent(); fprintf(out, "if (_LRL_loopempty)\n"); print_block(empty); indentlevel--; print_brace('}'); } if (loop->has_break) { indent(); fprintf(out, "_LRL_loopout%lu:;\n", loop->gotoid); } } int output_funcbody(struct Ident *ident, struct Block *funcbody) { print_type(&ident->decl.type, ident->name); indentlevel = 0; putc('\n', out); returntype = &ident->decl.type.kind.func->returntype; print_block(funcbody); return !ferror(out); } int output_initval(struct Ident *ident, struct Expr *initval) { print_type(&ident->decl.type, ident->name); fprintf(out, " = "); print_expr(initval, NO_PAREN); fprintf(out, ";\n"); return !ferror(out); }