/* c_builtin.c -- Interop for C, using a built-in parser Copyright © 2013-2014 Samuel Lidén Borell Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #define _POSIX_C_SOURCE 2 #include "../interop.h" #include "../configfile.h" #include "../context_private.h" #include "../misc.h" #include "../platform.h" #include "../verify.h" #include #if ENABLE_INTEROP_C_BUILTIN != 0 typedef enum { Unknown, TokenEOF, TooLong, Identifier, Number, LParen, RParen, LCurly, RCurly, LSquare, RSquare, Comma, Colon, Semicolon, Asterisk, Conditional, Assign, Dot, TripleDots, Plus, Minus, Slash, Modulo, Ampersand, Pipe, Circumflex, Compl, LShift, RShift, Not, And, Or, Equal, NotEqual, Less, LessEqual, Greater, GreaterEqual, KWAlignof, KWAsm, KWAttribute, KWConst, KWEnum, KWExtern, KWRestrict, KWSizeof, KWStatic, KWStruct, KWUnion, KWTypedef, KWVoid, KWVolatile } CTokenType; #define MAXIDENT 256 #define MAXFILENAME 256 typedef struct { LRLCtx *ctx; LRLASTInterop *interop_ast; const LRLToken *srctok; /* points to the interop name token */ /* C source input */ FILE *pipe; /* C parser state */ CTokenType token; char tokstr[MAXIDENT]; int toklen; /* Error reporting state */ char filename[MAXFILENAME]; int line; /* Interop options */ size_t num_strip; const char **strip; LRLASTDefList *def_first, *def_last; } CInteropCtx; static LRLASTType *parse_c_type(CInteropCtx *cictx, LRLIdent **target_ident, LRLIdent **ext_ident); static LRLToken *make_linkname_token(const char *tokstr, size_t toklen); static void report_error(CInteropCtx *cictx, LRLErrorType errcode) { lrl_err_set_interop(cictx->ctx, cictx->filename, cictx->line, 0, cictx->interop_ast); lrl_err_finish(cictx->ctx, errcode); } static int is_supported(const char *name, size_t namelen) { static const char supported[] = "c\0c89\0c90\0c99\0c_builtin\0"; const char *elem = supported; while (*elem) { size_t elemlen = strlen(elem); if (namelen == elemlen && !strncmp(name, elem, namelen)) { return 1; } elem += elemlen+1; } return 0; } static const char interop_scope_name[] = "+c_builtin_interop"; static const char typedef_str[] = "typedef HeaderSearch = enum (system, current_dir);\n" "namespace Option {\n" " typedef here = private^;\n" /* fake value */ " Option strip(char+> prefix);\n" "}\n" "typedef InteropType = (\n" " HeaderSearch search_type,\n" " char+> filename,\n" " Option#[undefined]^ options,\n" ");\n"; typedef enum { System, CurrentDir } HeaderSearch; static const LRLASTType *get_options_type(LRLCtx *ctx) { LRLIdent *interop_scope, *ident; ctx->no_filesystem++; interop_scope = lrl_ident_get_string(ctx->internals_scope, interop_scope_name); if (!interop_scope) { LRLToken *tokens; LRLASTNamespace *ast; interop_scope = lrl_ident_insert_string(ctx, ctx->internals_scope, (char*)interop_scope_name); lrl_tokenize(ctx, &tokens, typedef_str); ast = lrl_parse(ctx, interop_scope, tokens); lrl_vfy_namespace(ctx, ast); } ident = lrl_ident_get_string(interop_scope, "InteropType"); ctx->no_filesystem--; return ident->def_node->def.kind.type.type; } /** * Extracts options from the interop AST subtree in cictx. The header * search type and filename options are placed in the corresponding * arguments, while other options are stored directly in cictx. * * Returns 0 if successfull, 1 if the AST is incorrect or 2 if the AST hasn't * been fully verified (by verify_expr() etc.) yet. */ static int extract_options(CInteropCtx *cictx, HeaderSearch *headertype, char **header_filename) { const LRLASTInterop *iast = cictx->interop_ast; const LRLASTExpr *searchdir_expr, *filename_expr, *extraopts_expr; const LRLCodeLocation *headertype_loc; const LRLToken *filename_token; size_t oi; const LRLASTExpr *optarr; size_t capa_strip; char *res_hdrfilename = NULL; /* Check the AST */ if (!iast->options_expr->typeref.type) goto not_ready; if (iast->options_expr->typeref.type != iast->options_type || iast->options_expr->ast_type != LRL_AST_Value_Struct || iast->options_expr->kind.struc.values.num_args != 3) goto error; searchdir_expr = iast->options_expr->kind.struc.values.values[0]; filename_expr = iast->options_expr->kind.struc.values.values[1]; extraopts_expr = iast->options_expr->kind.struc.values.values[2]; if (!searchdir_expr || !searchdir_expr->typeref.type || !filename_expr || !filename_expr->typeref.type || !extraopts_expr || !extraopts_expr->typeref.type) goto not_ready; /* TODO check types (not real types, but we could evaluate the values with constexpr) */ if (searchdir_expr->ast_type != LRL_AST_Value_TypeIdent || searchdir_expr->kind.typeident.identref.first_token->type != LRL_TT_Ident || searchdir_expr->kind.typeident.identref.first_token[1].type == LRL_Sym_NamespaceSep || filename_expr->ast_type != LRL_AST_Value_Scalar || filename_expr->kind.scalar.token->type != LRL_TT_String || extraopts_expr->ast_type != LRL_AST_Expr_UnaryOp || extraopts_expr->kind.unary_op.token_type != LRL_Op_AddrOf) goto error; optarr = extraopts_expr->kind.unary_op.operand; if (!optarr) goto not_ready; if (optarr->ast_type != LRL_AST_Value_Array) goto error; /* The AST is OK. Extract the values */ headertype_loc = &searchdir_expr->kind.typeident.identref.first_token->loc; if (headertype_loc->length == 11 && !strncmp(headertype_loc->start, "current_dir", 11)) { *headertype = CurrentDir; } else if (headertype_loc->length == 6 && !strncmp(headertype_loc->start, "system", 6)) { *headertype = System; } else { goto error; } filename_token = filename_expr->kind.scalar.token; res_hdrfilename = malloc(filename_token->loc.length-2+1); /* -" +null */ memcpy(res_hdrfilename, filename_token->loc.start+1, filename_token->loc.length-2); res_hdrfilename[filename_token->loc.length-2] = '\0'; /* Extract extra options. These are in the form :function(args...) */ init_list(&cictx->strip, &cictx->num_strip, &capa_strip, 2); for (oi = 0; oi < optarr->kind.array.values.num_args; oi++) { const LRLASTExpr *opt = optarr->kind.array.values.values[oi]; const LRLIdentRef *identref; const char *name; size_t namelen; LRLASTExpr **args; size_t num_args; /* Get function name */ if (!opt) goto not_ready; if (opt->ast_type != LRL_AST_Expr_Call) goto error; if (!opt->kind.call.function) goto not_ready; if (opt->kind.call.function->ast_type != LRL_AST_Value_TypeIdent) goto error; identref = &opt->kind.call.function->kind.ident.identref; if (!identref->ident) goto not_ready; name = identref->first_token->loc.start; namelen = identref->first_token->loc.length; num_args = opt->kind.call.args.num_args; args = opt->kind.call.args.values; if (namelen == 5 && !strncmp(name, "strip", 5)) { char *prefix; size_t prefixlen; if (num_args != 1 || args[0]->ast_type != LRL_AST_Value_Scalar || args[0]->kind.scalar.token->type != LRL_TT_String || args[0]->kind.scalar.token->loc.length < 2) goto error; prefixlen = args[0]->kind.scalar.token->loc.length-2; prefix = malloc(prefixlen+1); memcpy(prefix, args[0]->kind.scalar.token->loc.start+1, prefixlen); prefix[prefixlen] = '\0'; list_push(&cictx->strip, &cictx->num_strip, &capa_strip, prefix); } else goto error; } *header_filename = res_hdrfilename; return 0; error: free(res_hdrfilename); return 1; not_ready: free(res_hdrfilename); return 2; } static FILE *open_preprocessor_pipe(LRLCtx *ctx, HeaderSearch headertype, const char *dir, const char *header_filename) { FILE *file; const char *cpp = lrl_config_get(ctx->config, "interop c_builtin", "cpp", "cpp"); const char *cppopt = lrl_config_get(ctx->config, "interop c_builtin", "cppopt", "-E -x c -std=c89 -dD"); char *cmd; if (headertype == CurrentDir) { static const char cmdfmt[] = "%s %s " QUOTESTR "%s%s" QUOTESTR; cmd = malloc(strlen(cmdfmt)-(4*2) + strlen(cpp)+3+strlen(cppopt)+ strlen(dir)+strlen(header_filename)+1); sprintf(cmd, cmdfmt, cpp, cppopt, dir, header_filename); } else { /* System header */ /* FIXME this will probably not work on Windows */ static const char cmdfmt[] = "echo " QUOTESTR "#include <%s>" QUOTESTR " | %s %s -"; cmd = malloc(strlen(cmdfmt)-(3*2) + strlen(header_filename) + strlen(cpp)+3+strlen(cppopt)+1); sprintf(cmd, cmdfmt, header_filename, cpp, cppopt); } fprintf(stderr, "running >%s<\n", cmd); file = popen(cmd, "r"); free(cmd); return file; } /** * Creates an LRL alias from a C #define * * If is_number is set, then "value" is interpreted as a number. Otherwise * it is interpreted as a reference to another #define. */ static void create_alias(CInteropCtx *cictx, const char *ident, const char *value, int is_number) { /* TODO */ /*fprintf(stderr, "alias [%s] %d[%s]\n", ident, is_number, value);*/ } #define SKIP_WHITESPACE \ while (ch == ' ' || ch == '\t') { \ ch = fgetc(pipe); \ if (ch == EOF) return cictx->token=TokenEOF; \ } #define READ_NUMBER do { \ do { \ ch = fgetc(pipe); \ cictx->tokstr[cictx->toklen++] = ch; \ } while ((ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'z') || \ (ch >= 'A' && ch <= 'Z') || ch == '.' || \ cictx->toklen >= MAXIDENT-1); \ ungetc(ch, pipe); \ cictx->toklen--; \ cictx->tokstr[cictx->toklen] = '\0'; \ } while (0) #define READ_IDENTIFIER do { \ do { \ ch = fgetc(pipe); \ cictx->tokstr[cictx->toklen++] = ch; \ if (cictx->toklen >= MAXIDENT-1) { \ cictx->tokstr[cictx->toklen] = '\0'; \ return cictx->token=TooLong; \ } \ } while ((ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'z') || \ (ch >= 'A' && ch <= 'Z') || ch == '_'); \ ungetc(ch, pipe); \ cictx->toklen--; \ cictx->tokstr[cictx->toklen] = '\0'; \ } while (0) static CTokenType nexttoken(CInteropCtx *cictx) { int ch; FILE *pipe = cictx->pipe; ignore_token: ch = fgetc(pipe); cictx->toklen = 1; again: cictx->tokstr[0] = ch; switch (ch) { case EOF: cictx->toklen = 0; return cictx->token=TokenEOF; case '#': /* Try to read non-standard line info from the preprocessor */ ch = fgetc(pipe); if (ch == EOF) return cictx->token=TokenEOF; while (ch == ' ' || ch == '\t') { ch = fgetc(pipe); } if (ch >= '0' && ch <= '9') { /* Line information from the preprocessor */ int line; int res; ungetc(ch, pipe); res = fscanf(pipe, "%d", &line); if (res == EOF) return cictx->token=TokenEOF; else if (res == 1 && line > 0) { cictx->line = line-1; /* TODO use proper string parsing */ res = fscanf(pipe, " \"%255s", cictx->filename); if (res == EOF) { return cictx->token=TokenEOF; } else if (!res) { memcpy(cictx->filename, "", 10); } else { char *endq = strrchr(cictx->filename, '"'); if (endq) *endq = '\0'; } } } else if (ch == 'd') { char cmd[10]; char name[MAXIDENT]; int res = fscanf(pipe, "%9s", cmd); if (res == EOF) { return cictx->token=TokenEOF; } else if (res && !strcmp(cmd, "efine")) { short ni = 0; int number; /* Read name of define */ ch = fgetc(pipe); SKIP_WHITESPACE; for (;;) { if (ch == EOF) return cictx->token=TokenEOF; if (!((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9') || ch == '_')) break; if (ni >= MAXIDENT-2) goto skip_pp; name[ni++] = ch; ch = fgetc(pipe); } name[ni] = '\0'; /* Read rest of line, and try to parse it as an integer constant or identifier. */ SKIP_WHITESPACE; if (ch == '\n' || ch == '\r') { /* #define D */ goto again; } else if (ch >= '0' && ch <= '9') { /* #define D 123 */ ungetc(ch, pipe); cictx->toklen = 0; READ_NUMBER; number = 1; } else { /* #define D OTHER */ ungetc(ch, pipe); cictx->toklen = 0; READ_IDENTIFIER; number = 0; } cictx->toklen = 1; if (!cictx->toklen) goto skip_pp; /* Unable to parse */ while (ch == ' ' || ch == '\t') { ch = fgetc(pipe); if (ch == EOF) return cictx->token=TokenEOF; } if (ch != '\n' && ch != '\r') { goto skip_pp; /* Unable to parse */ } create_alias(cictx, name, cictx->tokstr, number); goto again; } } /* Ignore all other preprocessor stuff */ skip_pp: while (ch != '\n' && ch != '\r' && ch != EOF) { ch = fgetc(pipe); } if (ch == '\r') { ch = fgetc(pipe); if (ch == EOF) return cictx->token=TokenEOF; if (ch != '\n') ungetc(ch, pipe); } cictx->line++; goto again; case '\r': /* Windows or old Mac line ending */ ch = fgetc(pipe); if (ch != '\n') cictx->line++; goto again; case '\n': cictx->line++; /* Fall through */ case ' ': case '\t': case '\v': case '\f': /* Ignore whitespace */ ch = fgetc(pipe); goto again; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': READ_NUMBER; return cictx->token=Number; case '(': return cictx->token=LParen; case ')': return cictx->token=RParen; case '{': return cictx->token=LCurly; case '}': return cictx->token=RCurly; case '[': return cictx->token=LSquare; case ']': return cictx->token=RSquare; case ',': return cictx->token=Comma; case ':': return cictx->token=Colon; case ';': return cictx->token=Semicolon; case '+': return cictx->token=Plus; case '-': return cictx->token=Minus; case '*': return cictx->token=Asterisk; case '/': return cictx->token=Slash; case '%': return cictx->token=Modulo; case '?': return cictx->token=Conditional; case '&': ch = fgetc(pipe); if (ch == '&') { return cictx->token=And; } else { ungetc(ch, pipe); return cictx->token=Ampersand; } case '|': ch = fgetc(pipe); if (ch == '&') { return cictx->token=Or; } else { ungetc(ch, pipe); return cictx->token=Pipe; } case '^': return cictx->token=Circumflex; case '~': return cictx->token=Compl; case '<': ch = fgetc(pipe); if (ch == '<') { return cictx->token=LShift; } else if (ch == '=') { return cictx->token=LessEqual; } else { ungetc(ch, pipe); return cictx->token=Less; } case '>': ch = fgetc(pipe); if (ch == '>') { return cictx->token=RShift; } else if (ch == '=') { return cictx->token=GreaterEqual; } else { ungetc(ch, pipe); return cictx->token=Greater; } case '=': ch = fgetc(pipe); if (ch == '&') { return cictx->token=Equal; } else { ungetc(ch, pipe); return cictx->token=Assign; } case '!': ch = fgetc(pipe); if (ch == '=') { return cictx->token=NotEqual; } else { ungetc(ch, pipe); return cictx->token=Not; } case '.': ch = fgetc(pipe); if (ch != '.') { ungetc(ch, pipe); return cictx->token=Dot; } ch = fgetc(pipe); if (ch != '.') { ungetc(ch, pipe); report_error(cictx, LRL_Err_CI_UnknownToken); return cictx->token=Unknown; } return cictx->token=TripleDots; default: if ((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') || ch == '_') { const char *name; /* Identifier */ READ_IDENTIFIER; /* TODO add all keywords */ /* TODO ifs could be optimized */ name = cictx->tokstr; if (!strcmp(name, "__asm__")) return cictx->token=KWAsm; else if (!strcmp(name, "__attribute__")) return cictx->token=KWAttribute; else if (!strcmp(name, "__extension__")) goto ignore_token; else if (!strcmp(name, "_Alignof")) return cictx->token=KWAlignof; else if (!strcmp(name, "const")) return cictx->token=KWConst; else if (!strcmp(name, "enum")) return cictx->token=KWEnum; else if (!strcmp(name, "extern")) return cictx->token=KWExtern; else if (!strcmp(name, "inline") || !strcmp(name, "__inline") || !strcmp(name, "__inline__")) goto ignore_token; /* TODO implement */ else if (!strcmp(name, "restrict") || !strcmp(name, "__restrict")) return cictx->token=KWRestrict; else if (!strcmp(name, "sizeof")) return cictx->token=KWSizeof; else if (!strcmp(name, "static")) return cictx->token=KWStatic; else if (!strcmp(name, "struct")) return cictx->token=KWStruct; else if (!strcmp(name, "typedef")) return cictx->token=KWTypedef; else if (!strcmp(name, "union")) return cictx->token=KWUnion; else if (!strcmp(name, "void")) return cictx->token=KWVoid; else if (!strcmp(name, "volatile")) return cictx->token=KWVolatile; else return cictx->token=Identifier; } else { return cictx->token=Unknown; } } } static LRLTypeQualifiers read_quals(CInteropCtx *cictx) { LRLTypeQualifiers quals = 0; while (1) { if (cictx->token == KWConst) { quals |= LRL_Qual_Const; } else if (cictx->token == KWRestrict) { /* TODO implement restrict. should at least remove the "shared" qualifier */ } else if (cictx->token == KWVolatile) { quals |= LRL_Qual_Shared; /* not exactly the same */ } else break; nexttoken(cictx); } if ((quals & LRL_Qual_Const) == 0) { quals |= LRL_Qual_Var; } return quals; } static void skip_attributes(CInteropCtx *cictx) { while (1) { int depth; if (cictx->token == KWAttribute && nexttoken(cictx) == LParen && nexttoken(cictx) == LParen) { depth = 2; } else if (cictx->token == KWAsm && nexttoken(cictx) == LParen) { depth = 1; } else break; while (depth) { nexttoken(cictx); if (cictx->token == TokenEOF) break; else if (cictx->token == LParen) depth++; else if (cictx->token == RParen) depth--; } if (cictx->token == RParen) { nexttoken(cictx); } } } /* TODO replace with something faster */ static LRLIdent *get_builtin_ident(LRLCtx *ctx, LRLBuiltinType bt) { return lrl_ident_get_string(ctx->builtins_scope, lrl_builtin_get_name(bt)); } static LRLIdent *get_external_ident(LRLCtx *ctx, const char *name, size_t namelen) { LRLToken nametok[2]; nametok[0].type = LRL_TT_Ident; nametok[0].loc.start = name; nametok[0].loc.length = namelen; nametok[1].type = LRL_TT_EOF; nametok[1].loc.start = NULL; nametok[1].loc.length = 0; return lrl_ident_get(ctx, ctx->external_scope, nametok, LRL_Ident_Find, NULL); } static LRLIdent *create_ident(CInteropCtx *cictx, LRLIdent *scope, char *name) { LRLIdent *newident; LRLToken *linknametok = NULL; size_t namelen = strlen(name); size_t i; if (scope != cictx->ctx->external_scope) { for (i = 0; i < cictx->num_strip; i++) { size_t prefixlen = strlen(cictx->strip[i]); if (prefixlen > namelen) continue; if (!strncmp(cictx->strip[i], name, prefixlen)) { linknametok = make_linkname_token(name, namelen); name += prefixlen; break; } } } newident = lrl_ident_insert_string(cictx->ctx, scope, name); newident->flags |= LRL_IdFl_FromInterop; newident->linkname = linknametok; return newident; } typedef struct { CTokenType toktype; int is_binary; const char *value; /* for identifiers, numbers etc. */ LRLASTType *typeop; /* for sizeof and _Alignof */ } CRPNEntry; typedef struct { unsigned char precedence; unsigned char right_assoc; enum { Binary, Ternary, Postfix, Prefix } type; } COpInfo; static COpInfo get_opinfo(CTokenType type, int operator_expected) { COpInfo info; switch ((int)type) { case Dot: info.precedence = 15; info.right_assoc = 0; info.type = Binary; break; case Not: case Compl: case KWSizeof: info.precedence = 14; info.right_assoc = 1; info.type = Prefix; break; case Asterisk: case Slash: case Modulo: info.precedence = 13; info.right_assoc = 0; info.type = Binary; break; case Plus: case Minus: if (operator_expected) { info.precedence = 12; info.right_assoc = 0; info.type = Binary; } else { info.precedence = 14; info.right_assoc = 1; info.type = Prefix; } break; case LShift: case RShift: info.precedence = 11; info.right_assoc = 0; info.type = Binary; break; case Less: case LessEqual: case Greater: case GreaterEqual: info.precedence = 10; info.right_assoc = 0; info.type = Binary; break; case Equal: case NotEqual: info.precedence = 9; info.right_assoc = 0; info.type = Binary; break; case Ampersand: if (operator_expected) { info.precedence = 8; info.right_assoc = 0; info.type = Binary; } else { info.precedence = 14; info.right_assoc = 1; info.type = Prefix; } break; case Circumflex: info.precedence = 7; info.right_assoc = 0; info.type = Binary; break; case Pipe: info.precedence = 6; info.right_assoc = 0; info.type = Binary; break; case And: info.precedence = 5; info.right_assoc = 0; info.type = Binary; break; case Or: info.precedence = 4; info.right_assoc = 0; info.type = Binary; break; case Conditional: case Colon: info.precedence = 3; info.right_assoc = 1; info.type = Ternary; break; case Assign: info.precedence = 2; info.right_assoc = 1; info.type = Binary; break; /*case Comma: info.precedence = 1; info.right_assoc = 0; info.type = Binary; break;*/ default: info.precedence = 0; info.right_assoc = 0; info.type = Binary; } return info; } static LRLTokenType ctok_to_lrltok(CTokenType toktype, int is_binary) { switch ((int)toktype) { case Identifier: return LRL_TT_Ident; case Number: return LRL_TT_Integer; case Asterisk: return is_binary ? LRL_Op_Times : LRL_Op_Deref; case Assign: return LRL_Op_Assign; case Plus: return LRL_Op_Plus; case Minus: return LRL_Op_Minus; case Slash: return LRL_Op_Divide; case Modulo: return LRL_Op_Modulo; case Ampersand: return is_binary ? LRL_Op_BitAnd : LRL_Op_AddrOf; case Pipe: return LRL_Op_BitOr; case Circumflex: return LRL_Op_BitXor; case Compl: return LRL_Op_Compl; case LShift: return LRL_Op_ShiftL; case RShift: return LRL_Op_ShiftR; case Not: return LRL_Op_LNot; case And: return LRL_Op_LAnd; case Or: return LRL_Op_LOr; case Equal: return LRL_Op_Equal; case NotEqual: return LRL_Op_NotEqual; case Less: return LRL_Op_Less; case LessEqual: return LRL_Op_LessEqual; case Greater: return LRL_Op_Greater; case GreaterEqual: return LRL_Op_GreaterEqual; /* TODO sizeof */ default: return LRL_TT_Error; } } /** * Converts a stack of tokens in Reverse Polish Notation to an AST substree * TODO type casts */ static LRLASTExpr *c_rpn_to_ast(CInteropCtx *cictx, CTokenType operator, const CRPNEntry *out_stack, size_t *out_size) { LRLASTExpr *expr; const CRPNEntry *entry; CTokenType type; COpInfo op; if (*out_size == 0) { if (operator) { report_error(cictx, LRL_Err_CI_MissingOperandInExpr); } return NULL; } /* Pop last token */ entry = &out_stack[--*out_size]; type = entry->toktype; op = get_opinfo(type, entry->is_binary); expr = malloc(sizeof(LRLASTExpr)); expr->from = cictx->srctok; expr->to = cictx->srctok; memset(&expr->typeref, 0, sizeof(LRLTypeRef)); if (type == KWSizeof || type == KWAlignof) { /* Special operators that accept types as operands. */ /* We translate sizeof(T) into sizeof(undefined as T) */ LRLASTExpr *undefexpr = malloc(sizeof(LRLASTExpr)); LRLASTExpr *asexpr = malloc(sizeof(LRLASTExpr)); undefexpr->ast_type = LRL_AST_Value_Undefined; undefexpr->from = undefexpr->to = cictx->srctok; undefexpr->typeref.quals = 0; undefexpr->typeref.prm = NULL; undefexpr->typeref.type = entry->typeop; asexpr->ast_type = LRL_AST_Expr_As; asexpr->from = asexpr->to = cictx->srctok; asexpr->typeref.quals = 0; asexpr->typeref.prm = NULL; asexpr->typeref.type = entry->typeop; asexpr->kind.asexpr.type = entry->typeop; asexpr->kind.asexpr.expr = undefexpr; expr->ast_type = LRL_AST_Expr_UnaryOp; expr->kind.unary_op.token_type = (type == KWSizeof ? LRL_Op_SizeOf : LRL_Op_AlignOf); expr->kind.unary_op.operand = asexpr; } else if (!op.precedence) { /* Values */ switch ((int)type) { case Identifier: { size_t namelen = strlen(entry->value); LRLIdent *extident = get_external_ident(cictx->ctx, entry->value, namelen); if (extident && (extident->flags & LRL_IdFl_EnumValue) != 0 && extident->def_node && extident->def_node->ast_type == LRL_AST_Def_Data && extident->def_node->def.kind.data.value) { /* Enum identifier. These are NOT compatible with their base types in LRL, so we replace the value with the base value. */ free(expr); return extident->def_node->def.kind.data.value; } else { /* Normal identifier (or enum without explicit value) */ LRLIdentRef *identref = &expr->kind.ident.identref; LRLToken *nametok = malloc(2*sizeof(LRLToken)); nametok[0].type = LRL_TT_Ident; nametok[0].loc.start = entry->value; nametok[0].loc.length = namelen; nametok[1].type = LRL_TT_EOF; nametok[1].loc.start = NULL; nametok[1].loc.length = 0; expr->ast_type = LRL_AST_Value_Ident; identref->ident = NULL; /* Deferred */ identref->first_token = nametok; identref->scope = cictx->ctx->external_scope; identref->next = LRL_IDENTREF_NEW; lrl_ident_defer(cictx->ctx, identref); expr->kind.ident.type_params = NULL; if (extident && (extident->flags & LRL_IdFl_EnumValue) != 0) { /* Enum without explicit value. Use "enumbase" operation. */ LRLASTExpr *ebexpr = malloc(sizeof(LRLASTExpr)); ebexpr->ast_type = LRL_AST_Expr_UnaryOp; ebexpr->from = ebexpr->to = cictx->srctok; ebexpr->typeref.quals = 0; ebexpr->typeref.prm = NULL; ebexpr->typeref.type = entry->typeop; expr->typeref.type = NULL; ebexpr->kind.unary_op.token_type = LRL_Op_EnumBase; ebexpr->kind.unary_op.operand = expr; expr = ebexpr; } } break; } case Number: { LRLToken *token = malloc(sizeof(LRLToken)); token->type = LRL_TT_Integer; /* TODO could be floating point also */ token->loc.start = entry->value; token->loc.length = strlen(entry->value); expr->ast_type = LRL_AST_Value_Scalar; expr->kind.scalar.token = token; expr->kind.scalar.is_negative = 0; /* it's set from unary op */ break; } case LRL_Sym_LParen: /* TODO not implemented */ fprintf(stderr, "function call and array index exprs aren't implemented in the C interop\n"); break; default: report_error(cictx, LRL_Err_CI_UnexpectedTokenInExpr); goto error; } } else if (op.type == Binary) { /* Binary operators */ expr->ast_type = LRL_AST_Expr_BinaryOp; expr->kind.binary_op.token_type = ctok_to_lrltok(type, entry->is_binary); expr->kind.binary_op.operand2 = c_rpn_to_ast(cictx, type, out_stack, out_size); expr->kind.binary_op.operand1 = c_rpn_to_ast(cictx, type, out_stack, out_size); } else if (op.type == Ternary) { /* Ternary operator. The ?: operator is the only one in C */ if (type != Colon) fail("c_interop_badternaryop"); expr->ast_type = LRL_AST_Expr_Conditional; expr->kind.conditional.falseexpr = c_rpn_to_ast(cictx, type, out_stack, out_size); expr->kind.conditional.trueexpr = c_rpn_to_ast(cictx, type, out_stack, out_size); expr->kind.conditional.condexpr = c_rpn_to_ast(cictx, type, out_stack, out_size); } else { /* Unary operators */ expr->ast_type = LRL_AST_Expr_UnaryOp; /* prefix or postfix */ expr->kind.unary_op.token_type = ctok_to_lrltok(type, entry->is_binary); expr->kind.unary_op.operand = c_rpn_to_ast(cictx, type, out_stack, out_size); /* Special handling of negative literals, e.g. -128 */ if (type == Minus && expr->kind.unary_op.operand->ast_type == LRL_AST_Value_Scalar) { expr->kind.unary_op.operand->kind.scalar.is_negative = 1; } } return expr; error: free(expr); return NULL; } /** * Another modified version of the "Shunting-yard algorithm" by Edsger Dijkstra. * https://en.wikipedia.org/wiki/Shunting-yard_algorithm * TODO type casts */ static LRLASTExpr *parse_c_expr(CInteropCtx *cictx) { LRLASTExpr *expr; CRPNEntry entry; int operator_expected = 0; /* Operator stack */ size_t op_size, op_capacity; CRPNEntry *op_stack; /* Output stack */ size_t out_size, out_capacity; CRPNEntry *out_stack; init_list(&op_stack, &op_size, &op_capacity, 16); init_list(&out_stack, &out_size, &out_capacity, 16); while (1) { CTokenType toktype = cictx->token; entry.toktype = toktype; entry.is_binary = operator_expected; switch ((int)toktype) { case Identifier: case Number: { char *value; if (operator_expected) { report_error(cictx, LRL_Err_CI_OperatorExpected); break; } value = malloc(cictx->toklen+1); memcpy(value, cictx->tokstr, cictx->toklen); value[cictx->toklen] = '\0'; entry.value = value; list_push(&out_stack, &out_size, &out_capacity, entry); operator_expected = 1; break; } case LParen: /* Grouping parentesis or type cast */ nexttoken(cictx); /* TODO implement real parsing, and with pointers etc. */ if (cictx->token == Identifier) { /* Simple hack so we can parse sys/select.h */ if (!strcmp(cictx->tokstr, "int")) { do { nexttoken(cictx); if (cictx->token == Semicolon) { report_error(cictx, LRL_Err_CI_UnexpectedSemicolon); break; } } while (cictx->token != Unknown && cictx->token != TokenEOF && cictx->token != RParen); break; } } list_push(&op_stack, &op_size, &op_capacity, entry); operator_expected = 0; continue; case RParen: { int found = 0; while (op_size > 0) { if (op_stack[op_size-1].toktype == LParen) { found = 1; break; } list_push(&out_stack, &out_size, &out_capacity, op_stack[op_size-1]); op_size--; } /* Check for end of expression */ if (!found) { report_error(cictx, LRL_Err_CI_TooManyClosingParens); goto finished; } operator_expected = 1; /* Pop the left parenthesis of the stack */ op_size--; break; } case Semicolon: case Comma: case LCurly: case RCurly: case RSquare: goto finished; case KWSizeof: case KWAlignof: { LRLIdent *dummyident; /* TODO Only sizeof(type) is handled, not sizeof expr */ if (operator_expected) { report_error(cictx, LRL_Err_CI_OperatorExpected); break; } nexttoken(cictx); if (cictx->token != LParen) { fprintf(stderr, "only sizeof(Type) and _Alignof(Type) are supported\n"); break; } nexttoken(cictx); entry.typeop = parse_c_type(cictx, &dummyident, NULL); if (cictx->token != RParen) { report_error(cictx, LRL_Err_CI_SizeOfEndError); } list_push(&out_stack, &out_size, &out_capacity, entry); operator_expected = 1; break; } case Colon: { int found; /* Last part of conditional ?: operator */ if (!operator_expected) { report_error(cictx, LRL_Err_CI_OperatorExpected); break; } operator_expected = 0; /* Pop everything off the stack up to and including "then" */ found = 0; while (op_size > 0) { const CRPNEntry *st_entry = &op_stack[op_size-1]; CTokenType st_type = st_entry->toktype; op_size--; if (st_type == Conditional) { /* deleted from stack */ found = 1; break; } list_push(&out_stack, &out_size, &out_capacity, *st_entry); } /* Check for end of expression */ if (!found) goto finished; /* Push ternary ":" operator to the operator stack */ list_push(&op_stack, &op_size, &op_capacity, entry); break; } default: { COpInfo op = get_opinfo(toktype, operator_expected); if (!op.precedence) { report_error(cictx, LRL_Err_CI_UnexpectedTokenInExpr); break; } /* Prefix operator? */ if (op.type == Prefix) { list_push(&op_stack, &op_size, &op_capacity, entry); operator_expected = 0; break; } /* Binary or postfix operator */ while (op_size > 0) { const CRPNEntry *st_entry = &op_stack[op_size-1]; const CTokenType st_type = st_entry->toktype; const COpInfo st = get_opinfo(st_type, operator_expected); if (!st.precedence) break; if ((!st.right_assoc && op.precedence <= st.precedence) || (st.right_assoc && op.precedence < st.precedence)) { /* Move to the output stack */ list_push(&out_stack, &out_size, &out_capacity, *st_entry); op_size--; continue; } break; } if (op.type == Postfix) { list_push(&out_stack, &out_size, &out_capacity, entry); operator_expected = 1; break; } /* Push to the operator stack */ list_push(&op_stack, &op_size, &op_capacity, entry); operator_expected = 0; } } nexttoken(cictx); } finished: /* Move remaining tokens on the operator stack to the output stack */ while (op_size > 0) { const CRPNEntry *st_entry = &op_stack[op_size-1]; CTokenType st_type = st_entry->toktype; if (st_type == LParen) { report_error(cictx, LRL_Err_CI_MismatchedParenthesisInExpr); break; } list_push(&out_stack, &out_size, &out_capacity, *st_entry); op_size--; } if (out_size == 0 || !operator_expected) { report_error(cictx, LRL_Err_CI_IncompleteExpr); expr = NULL; goto cleanup; } /* Read from stack in reverse order, and build the ASTExpr */ expr = c_rpn_to_ast(cictx, 0, out_stack, &out_size); if (out_size != 0) { report_error(cictx, LRL_Err_CI_IncompleteExpr); } /*printf("EXPR="); lrl_display_expr(expr); printf("\n");*/ cleanup: free(out_stack); free(op_stack); return expr; } static LRLToken *make_linkname_token(const char *tokstr, size_t toklen) { char *str; LRLToken *linkname = malloc(sizeof(LRLToken)); linkname->type = LRL_TT_String; str = malloc(toklen+3); sprintf(str, "\"%s\"", tokstr); linkname->loc.start = str; linkname->loc.length = toklen+2; return linkname; } static LRLIdent empty_namespace; static LRLASTType void_type = { LRL_AST_Type_Struct, 0, /* quals */ LRL_UNIQUEID_UNSET, NULL, NULL, /* from, to */ { { NULL, &empty_namespace, NULL, NULL } } }; static LRLASTType any_type = { LRL_AST_Type_Any, 0, /* quals */ LRL_UNIQUEID_UNSET, NULL, NULL, /* from, to */ { { NULL, &empty_namespace, NULL, NULL } } }; /** * The "innermost" type is for instance "char" in "(char *)(*x)[2];" */ static LRLASTType *translate_innermost_type(CInteropCtx *cictx) { /* Read qualifiers */ LRLTypeQualifiers quals = read_quals(cictx); if (cictx->token == Identifier) { LRLBuiltinType builtin; LRLToken *typetok; LRLASTType *type; int is_signed = 0, is_unsigned = 0; int is_short = 0, is_long = 0; int reuse_token = 0; const char *tokstr = cictx->tokstr; /* TODO ifs could be optimized */ /* Read sign and size keywords */ while (1) { if (!strcmp(tokstr, "signed")) is_signed = 1; else if (!strcmp(tokstr, "unsigned")) is_unsigned = 1; else if (!strcmp(tokstr, "short")) is_short = 1; else if (!strcmp(tokstr, "long")) is_long++; else break; nexttoken(cictx); tokstr = cictx->tokstr; } /* Types that don't have short/long variants */ if (!strcmp(tokstr, "char")) builtin = is_unsigned ? LRL_BT_byte : LRL_BT_char; else if (!strcmp(tokstr, "float")) builtin = LRL_BT_cfloat; else if (!strcmp(tokstr, "double")) { builtin = is_long ? LRL_BT_clongdouble : LRL_BT_cdouble; } else if (!strcmp(tokstr, "_Float128")) builtin = LRL_BT_float128; else if (!strcmp(tokstr, "size_t")) builtin = LRL_BT_count; else if (!strcmp(tokstr, "uint8_t")) builtin = LRL_BT_uint8; else if (!strcmp(tokstr, "uint16_t")) builtin = LRL_BT_uint16; else if (!strcmp(tokstr, "uint32_t")) builtin = LRL_BT_uint32; else if (!strcmp(tokstr, "uint64_t")) builtin = LRL_BT_uint64; else if (!strcmp(tokstr, "uint128_t") || !strcmp(tokstr, "__uint128_t")) builtin = LRL_BT_uint128; else if (!strcmp(tokstr, "int8_t")) builtin = LRL_BT_int8; else if (!strcmp(tokstr, "int16_t")) builtin = LRL_BT_int16; else if (!strcmp(tokstr, "int32_t")) builtin = LRL_BT_int32; else if (!strcmp(tokstr, "int64_t")) builtin = LRL_BT_int64; else if (!strcmp(tokstr, "int128_t") || !strcmp(tokstr, "__int128_t")) builtin = LRL_BT_int128; else if (!strcmp(tokstr, "int") || is_signed || is_unsigned || is_short || is_long) { if (is_unsigned) { if (is_short) builtin = LRL_BT_ushort; else if (is_long == 1) builtin = LRL_BT_ulong; else if (is_long == 2) builtin = LRL_BT_ulonglong; else builtin = LRL_BT_uint; } else { if (is_short) builtin = LRL_BT_short; else if (is_long == 1) builtin = LRL_BT_long; else if (is_long == 2) builtin = LRL_BT_longlong; else builtin = LRL_BT_int; } reuse_token = (strcmp(tokstr, "int") != 0); } else { /* Identifier */ builtin = -1; } typetok = malloc(2*sizeof(LRLToken)); typetok[0].type = LRL_TT_Ident; if ((int)builtin == -1) { typetok[0].loc.start = lrl_strdup(tokstr); typetok[0].loc.length = cictx->toklen; } else { typetok[0].loc.start = lrl_builtin_get_name(builtin); typetok[0].loc.length = strlen(typetok->loc.start); } typetok[1].type = LRL_TT_EOF; typetok[1].loc.start = NULL; typetok[1].loc.length = 0; type = malloc(sizeof(LRLASTType)); type->ast_type = LRL_AST_Type_Ident; type->from = type->to = cictx->srctok; type->quals = quals; type->unique_id = LRL_UNIQUEID_UNSET; type->kind.identref.first_token = typetok; if ((int)builtin == -1) { /* Identifier type */ type->kind.identref.ident = lrl_ident_get_string(cictx->ctx->external_scope, tokstr); if (!type->kind.identref.ident) { report_error(cictx, LRL_Err_CI_NoSuchType); } type->kind.identref.scope = cictx->ctx->external_scope; } else { /* Builtin type */ type->kind.identref.ident = get_builtin_ident(cictx->ctx, builtin); if (!type->kind.identref.ident) { fail("c_interop_builtinnotfound"); } type->kind.identref.scope = cictx->ctx->builtins_scope; } type->kind.identref.next = NULL; if (!reuse_token) { nexttoken(cictx); } return type; } else if (cictx->token == KWVoid) { nexttoken(cictx); return &any_type; } else if (cictx->token == KWStruct || cictx->token == KWUnion || cictx->token == KWEnum) { int is_enum = cictx->token = (cictx->token == KWEnum); int is_union = cictx->token = (cictx->token == KWUnion); LRLASTType *type = NULL, *identtype = NULL; LRLIdent *members_scope = calloc(1, sizeof(LRLIdent)); LRLASTDefList **member_inspoint; LRLToken *nametok; /* Identifier */ nexttoken(cictx); if (cictx->token == Identifier) { char *fullname; /* Create "_Tagged:xx" token */ fullname = malloc(8+cictx->toklen+1); memcpy(fullname, "_Tagged:", 8); memcpy(fullname+8, cictx->tokstr, cictx->toklen); fullname[8+cictx->toklen] = '\0'; nametok = malloc(4*sizeof(LRLToken)); nametok[0].type = LRL_TT_Ident; nametok[0].loc.start = fullname; nametok[0].loc.length = 7; nametok[1].type = LRL_Sym_NamespaceSep; nametok[1].loc.start = fullname+7; nametok[1].loc.length = 1; nametok[2].type = LRL_TT_Ident; nametok[2].loc.start = fullname+8; nametok[2].loc.length = cictx->toklen; nametok[3].type = LRL_TT_EOF; nametok[3].loc.start = NULL; nametok[3].loc.length = 0; identtype = malloc(sizeof(LRLASTType)); identtype->ast_type = LRL_AST_Type_Ident; identtype->from = identtype->to = cictx->srctok; identtype->quals = quals; identtype->unique_id = LRL_UNIQUEID_UNSET; identtype->kind.identref.first_token = nametok; identtype->kind.identref.ident = NULL; identtype->kind.identref.scope = cictx->ctx->external_scope; identtype->kind.identref.next = LRL_IDENTREF_NEW; nexttoken(cictx); } if (cictx->token == LCurly) { nexttoken(cictx); /* Create type */ type = malloc(sizeof(LRLASTType)); type->from = type->to = cictx->srctok; type->quals = quals; type->unique_id = LRL_UNIQUEID_UNSET; if (!is_enum) { type->ast_type = is_union ? LRL_AST_Type_Union : LRL_AST_Type_Struct; type->kind.struc.scope = members_scope; type->kind.struc.members = NULL; type->kind.struc.flags = 0; member_inspoint = &type->kind.struc.members; } else { type->ast_type = LRL_AST_Type_Enum; type->kind.enu.scope = members_scope; type->kind.enu.values = NULL; type->kind.enu.base_type = lrl_builtin_get_type(LRL_BT_int); member_inspoint = &type->kind.enu.values; } /* Struct/union/enum members */ while (cictx->token != RCurly && cictx->token != TokenEOF) { /* TODO handle , in defs e.g. "int *p, *q" */ LRLIdent *memberident = members_scope; LRLASTDefList *member = malloc(sizeof(LRLASTDefList)); member->next = NULL; member->def.ast_type = LRL_AST_Def_Data; if (!is_enum) { /* read struct/union member */ LRLASTType *membertype = parse_c_type(cictx, &memberident, NULL); if (cictx->token == Colon) { if (nexttoken(cictx) == Number) { /* bitfield */ /* TODO implement */ fprintf(stderr, "bitfields are not yet implemented\n"); nexttoken(cictx); } else { report_error(cictx, LRL_Err_CI_ExpectedBitFieldSize); } } if (memberident) { memberident->flags |= LRL_IdFl_StructMember; } member->def.kind.data.ident = memberident; member->def.kind.data.flags = 0; member->def.kind.data.type = membertype; } else if (cictx->token == Identifier) { LRLIdent *globalident; /* read enum value */ memberident = create_ident(cictx, members_scope, lrl_strdup(cictx->tokstr)); memberident->flags |= LRL_IdFl_EnumValue; memberident->linkname = make_linkname_token(cictx->tokstr, cictx->toklen); member->def.kind.data.ident = memberident; member->def.kind.data.flags = 0; member->def.kind.data.type = type; /* enum values are declared in the main scope */ globalident = create_ident(cictx, cictx->ctx->external_scope, lrl_strdup(cictx->tokstr)); globalident->flags |= LRL_IdFl_EnumValue; globalident->linkname = memberident->linkname; globalident->def_node = (LRLASTDefOrStmt*)&member->def; nexttoken(cictx); } else { report_error(cictx, LRL_Err_CI_ExpectedIdentInEnum); free(member); break; } if (memberident) { memberident->def_node = (LRLASTDefOrStmt*)&member->def; } *member_inspoint = member; member_inspoint = &member->next; /* Read enum value */ member->def.kind.data.value = (is_enum && cictx->token == Assign && nexttoken(cictx) != TokenEOF ? parse_c_expr(cictx) : NULL); if (cictx->token == (is_enum ? Comma : Semicolon)) { nexttoken(cictx); } else if (cictx->token == RCurly) { break; } else { /* TODO include info in error and remove fprintf */ fprintf(stderr, "unexpected token in %s type: %.*s\n", is_enum ? "enum" : is_union ? "union" : "struct", cictx->toklen, cictx->tokstr); report_error(cictx, LRL_Err_CI_UnexpectedToken); break; } } if (cictx->token == RCurly) { nexttoken(cictx); } /* End of members */ } /* A "struct X" or "struct X { ... }" is not only a type but also a definition of a tagged struct type. Define it here. */ if (identtype) { LRLASTType *tagtype; LRLASTDefList *def = NULL; LRLIdent *tagident; tagident = lrl_ident_get(cictx->ctx, cictx->ctx->external_scope, nametok, LRL_Ident_Extend, NULL); if (!tagident) { fail("c_interop_nulltagident1"); } tagident->flags &= ~LRL_IdFl_NotLoaded; tagident->flags |= LRL_IdFl_FromInterop; tagident->linkname = make_linkname_token(tagident->def_token->loc.start, tagident->def_token->loc.length); ((LRLIdent*)tagident->scope)->flags &= ~LRL_IdFl_NotLoaded; identtype->kind.identref.ident = tagident; if (!tagident->def_node || tagident->def_node->ast_type != LRL_AST_Def_Type || !tagident->def_node->def.kind.type.type || tagident->def_node->def.kind.type.type->ast_type == LRL_AST_Type_Private) { if (type) { /* Create "typedef _Tagged:xx = ; */ tagtype = type; } else { /* Create "typedef _Tagged:xx = private" */ tagtype = malloc(sizeof(LRLASTType)); tagtype->ast_type = LRL_AST_Type_Private; tagtype->from = tagtype->to = cictx->srctok; tagtype->quals = quals; tagtype->unique_id = LRL_UNIQUEID_UNSET; } linked_append(&def, &cictx->def_first, &cictx->def_last); def->def.ast_type = LRL_AST_Def_Type; def->def.kind.type.ident = tagident; def->def.kind.type.flags = LRL_DeFl_Internal_MaybeIncomplete; def->def.kind.type.type = tagtype; def->def.kind.type.typenames = NULL; tagident->def_node = (LRLASTDefOrStmt*)&def->def; /* Insert enum values into the tag scope */ memcpy(&tagident->contents, &members_scope->contents, sizeof(members_scope->contents)); /* Insert identifier into target scope also */ tagident = lrl_ident_get(cictx->ctx, cictx->interop_ast->ident, nametok, LRL_Ident_Extend, NULL); if (!tagident) { fail("c_interop_nulltagident2"); } tagident->def_node = (LRLASTDefOrStmt*)&def->def; tagident->flags &= ~LRL_IdFl_NotLoaded; tagident->flags |= LRL_IdFl_FromInterop; tagident->linkname = make_linkname_token(tagident->def_token->loc.start, tagident->def_token->loc.length); ((LRLIdent*)tagident->scope)->flags &= ~LRL_IdFl_NotLoaded; /* Insert enum values into the target scope */ memcpy(&tagident->contents, &members_scope->contents, sizeof(members_scope->contents)); } } return identtype ? identtype : type; } else { /* TODO handle error */ return NULL; } } /** * Changes C void types into the proper types */ static void fix_type(LRLASTType *type) { LRLASTType *current = type; while (current) { switch (current->ast_type) { case LRL_AST_Type_Pointer: current = current->kind.pointer.type; break; case LRL_AST_Type_Function: fix_type(current->kind.function.args); if (current->kind.function.ret == &any_type) { current->kind.function.ret = &void_type; return; } else { current = current->kind.function.ret; } break; case LRL_AST_Type_Struct: case LRL_AST_Type_Union: { LRLASTDefList *member = current->kind.struc.members; for (; member; member = member->next) { fix_type(member->def.kind.data.type); } return; } case LRL_AST_Type_Ident: case LRL_AST_Type_Enum: case LRL_AST_Type_Bitfield: case LRL_AST_Type_Array: case LRL_AST_Type_Optional: case LRL_AST_Type_Parametric: case LRL_AST_Type_Builtin: case LRL_AST_Type_Private: case LRL_AST_Type_Any: return; LRL_case_except_ast_types default: fail("c_interop_fixtype_switch"); } } } /* we abuse a field variable in the LRLASTType struct */ #define INSERTLEVEL unique_id #define INSLEVELTYPE LRLHashCode static LRLASTType *parse_c_type(CInteropCtx *cictx, LRLIdent **target_ident, LRLIdent **ext_ident) { LRLASTType *type, *insert_before; LRLASTType **insert_at; INSLEVELTYPE paren_depth; /* Read the "innermost" type */ type = translate_innermost_type(cictx); if (!type) { *target_ident = NULL; if (ext_ident) *ext_ident = NULL; goto end; } type->quals |= read_quals(cictx); /* Read "prefix" part of type */ paren_depth = 1; while (1) { if (cictx->token == Asterisk) { LRLTypeQualifiers ptrquals; LRLASTType *ptrtype; nexttoken(cictx); ptrquals = read_quals(cictx); ptrtype = malloc(sizeof(LRLASTType)); ptrtype->ast_type = LRL_AST_Type_Pointer; ptrtype->from = ptrtype->to = cictx->srctok; ptrtype->quals = ptrquals; ptrtype->INSERTLEVEL = paren_depth; ptrtype->kind.pointer.type = type; ptrtype->kind.pointer.flags = LRL_PF_Raw | LRL_PF_Flexible; type = ptrtype; } else if (cictx->token == LParen) { nexttoken(cictx); paren_depth++; } else break; } /* Read identifier part of type */ if (cictx->token == Identifier) { char *identstr = lrl_strdup(cictx->tokstr); if (ext_ident) { /* Visible identifier, e.g. function name */ *ext_ident = lrl_ident_get_string(cictx->ctx->external_scope, cictx->tokstr); if (!*ext_ident) { *ext_ident = create_ident(cictx, cictx->ctx->external_scope, identstr); } else { /* Existing identifier. Check that the def is equivalent */ /* TODO check equivalence */ } *target_ident = create_ident(cictx, cictx->interop_ast->ident, identstr); /* TODO check for duplicates? */ } else { /* Non-visible identifier, e.g. function parameter */ LRLIdent *scope = *target_ident; *target_ident = create_ident(cictx, scope, identstr); } /* Add linkname */ (*target_ident)->linkname = make_linkname_token(cictx->tokstr, cictx->toklen); if (ext_ident && *ext_ident && !(*ext_ident)->linkname) { (*ext_ident)->linkname = (*target_ident)->linkname; } nexttoken(cictx); } else { *target_ident = NULL; } /* Read postfix part of type */ insert_before = type; /* used to build the type correctly */ insert_at = &type; /* if there are parenthesises. */ while (1) { if (cictx->token == LParen) { /* Function type */ LRLASTType *functype = malloc(sizeof(LRLASTType)); LRLASTType *args; LRLASTDefList **arg_inspoint; LRLIdent *paramscope; functype->ast_type = LRL_AST_Type_Function; functype->from = functype->to = cictx->srctok; functype->quals = 0; functype->INSERTLEVEL = paren_depth; functype->kind.function.ret = insert_before; functype->kind.function.flags = 0; /* TODO noreturn */ insert_before = functype; if (insert_at) { *insert_at = functype; } paramscope = lrl_ident_create_priv_scope(*target_ident); args = malloc(sizeof(LRLASTType)); args->ast_type = LRL_AST_Type_Struct; args->from = args->to = cictx->srctok; args->quals = 0; args->unique_id = LRL_UNIQUEID_UNSET; args->kind.struc.members = NULL; args->kind.struc.scope = paramscope; args->kind.struc.flags = 0; functype->kind.function.args = args; /* Read parameters */ arg_inspoint = &args->kind.struc.members; nexttoken(cictx); while (1) { LRLASTType *paramtype; LRLIdent *paramident; LRLASTDefList *param; if (cictx->token == RParen || cictx->token == TokenEOF) break; paramident = paramscope; paramtype = parse_c_type(cictx, ¶mident, NULL); if (paramtype == &any_type) break; param = malloc(sizeof(LRLASTDefList)); param->next = NULL; param->def.ast_type = LRL_AST_Def_Data; param->def.kind.data.ident = paramident; param->def.kind.data.flags = 0; param->def.kind.data.type = paramtype; param->def.kind.data.value = NULL; /* the lrl_display_* functions might read this value */ if (paramident) { paramident->def_node = (LRLASTDefOrStmt*)¶m->def; } *arg_inspoint = param; arg_inspoint = ¶m->next; if (cictx->token == RParen || cictx->token == TokenEOF) break; else if (cictx->token != Comma) { /* TODO proper error handling */ fprintf(stderr, "unexpected token in parameter list: %.*s\n", cictx->toklen, cictx->tokstr); report_error(cictx, LRL_Err_CI_UnexpectedToken); break; } nexttoken(cictx); if (cictx->token == TripleDots) { args->kind.struc.flags |= LRL_SF_CVarArg; if (nexttoken(cictx) == RParen) break; fprintf(stderr, "expected \")\" after \"...\": %.*s\n", cictx->toklen, cictx->tokstr); report_error(cictx, LRL_Err_CI_ExpectedClosingParen); } } if (cictx->token == RParen) { nexttoken(cictx); } } else if (cictx->token == LSquare) { /* Array type */ LRLASTType *arrtype = malloc(sizeof(LRLASTType)); LRLASTExpr *lengthexpr; arrtype->ast_type = LRL_AST_Type_Array; arrtype->from = arrtype->to = cictx->srctok; arrtype->INSERTLEVEL = paren_depth; arrtype->kind.array.type = insert_before; insert_before = arrtype; if (insert_at) { *insert_at = arrtype; } nexttoken(cictx); arrtype->quals = read_quals(cictx); if (cictx->token == RSquare) { /* Length is unspecified */ /* TODO wrap in a pointer unless this is a toplevel def or inside a struct */ unspecified_length: lengthexpr = malloc(sizeof(LRLASTExpr)); lengthexpr->from = lengthexpr->to = cictx->srctok; lengthexpr->typeref.quals = 0; lengthexpr->typeref.prm = NULL; lengthexpr->typeref.type = lrl_builtin_get_type(LRL_BT_count); lengthexpr->ast_type = LRL_AST_Value_Undefined; } else { lengthexpr = parse_c_expr(cictx); if (!lengthexpr) goto unspecified_length; } arrtype->kind.array.length = lengthexpr; if (cictx->token != RSquare) { /* TODO improve error handling */ fprintf(stderr, "unsupported token in array length: %.*s\n", cictx->toklen, cictx->tokstr); report_error(cictx, LRL_Err_CI_UnexpectedTokenInArrayLength); } else { nexttoken(cictx); } } else if (cictx->token == RParen) { if (paren_depth == 1) break; paren_depth--; while ((int)insert_before->INSERTLEVEL > (int)paren_depth) { switch (insert_before->ast_type) { case LRL_AST_Type_Pointer: insert_before->unique_id = LRL_UNIQUEID_UNSET; insert_at = &insert_before->kind.pointer.type; insert_before = insert_before->kind.pointer.type; break; case LRL_AST_Type_Function: insert_before->unique_id = LRL_UNIQUEID_UNSET; insert_at = &insert_before->kind.function.ret; insert_before = insert_before->kind.function.ret; break; case LRL_AST_Type_Ident: case LRL_AST_Type_Enum: case LRL_AST_Type_Bitfield: case LRL_AST_Type_Struct: case LRL_AST_Type_Union: case LRL_AST_Type_Array: case LRL_AST_Type_Optional: case LRL_AST_Type_Parametric: case LRL_AST_Type_Builtin: case LRL_AST_Type_Private: case LRL_AST_Type_Any: /* Wrong parenthesis depth */ report_error(cictx, LRL_Err_CI_ParenMismatch); goto end; LRL_case_except_ast_types default: fail("c_interop_parsetype_paren_switch"); } } nexttoken(cictx); } else break; } /* Ignore __attribute__ */ skip_attributes(cictx); /* Change C void types into the proper types */ fix_type(type); end: return type; } static LRLASTDefList *parse_c_code(CInteropCtx *cictx) { cictx->def_first = NULL; cictx->def_last = NULL; while (1) { LRLASTDefList *def = NULL; nexttoken(cictx); /* Ignore __attribute__ */ skip_attributes(cictx); switch ((int)cictx->token) { case Identifier: case KWConst: case KWRestrict: case KWVolatile: case KWEnum: case KWExtern: case KWStatic: case KWStruct: case KWUnion: case KWVoid: case KWTypedef: { LRLIdent *target_ident = NULL, *ext_ident = NULL; LRLASTType *type; int is_typedef, is_tagdef; LRLDefFlags defflags; if (cictx->token == KWExtern) { /* No, it's not DeclOnly. Symbol visibility is the default in C, and then it corresponds to LRL "import" linkage */ defflags = LRL_DeFl_Import; nexttoken(cictx); skip_attributes(cictx); } else if (cictx->token == KWStatic) { defflags = LRL_DeFl_Local; nexttoken(cictx); skip_attributes(cictx); } else { /* Probably also an import, since it's in a header */ defflags = LRL_DeFl_Import; } is_typedef = (cictx->token == KWTypedef); is_tagdef = (cictx->token == KWStruct || cictx->token == KWUnion || cictx->token == KWEnum); if (is_typedef) { nexttoken(cictx); } type = parse_c_type(cictx, &target_ident, &ext_ident); if (is_tagdef && !target_ident) { /* Tagged struct or union definition. The type is defined inside translate_innermost_type */ break; } /* Add to the AST */ if (is_typedef) { /* Type definition */ linked_append(&def, &cictx->def_first, &cictx->def_last); def->def.ast_type = LRL_AST_Def_Type; def->def.kind.type.ident = target_ident; def->def.kind.type.flags = defflags | LRL_DeFl_Internal_MaybeIncomplete; def->def.kind.type.type = type; def->def.kind.type.typenames = NULL; if (ext_ident) { ext_ident->def_node = (LRLASTDefOrStmt*)&def->def; } if (target_ident) { target_ident->def_node = (LRLASTDefOrStmt*)&def->def; if (type->ast_type == LRL_AST_Type_Enum) { memcpy(&target_ident->contents, &type->kind.enu.scope->contents, sizeof(type->kind.enu.scope->contents)); } else if (type->ast_type == LRL_AST_Type_Struct || type->ast_type == LRL_AST_Type_Union) { memcpy(&target_ident->contents, &type->kind.struc.scope->contents, sizeof(type->kind.struc.scope->contents)); } } } else if (type && type->ast_type == LRL_AST_Type_Function) { /* Function declaration */ linked_append(&def, &cictx->def_first, &cictx->def_last); def->def.ast_type = LRL_AST_Def_Function; def->def.kind.function.ident = target_ident; def->def.kind.function.flags = defflags; def->def.kind.function.type = *type; def->def.kind.function.typenames = NULL; def->def.kind.function.code = NULL; if (ext_ident) { ext_ident->def_node = (LRLASTDefOrStmt*)&def->def; } if (target_ident) { target_ident->def_node = (LRLASTDefOrStmt*)&def->def; } /* Function body */ /* TODO implement translation of code */ if (cictx->token == LCurly) { int level = 1; while (1) { if (cictx->token == LCurly) { level++; } else if (cictx->token == RCurly) { if (--level) break; } else if (cictx->token == TokenEOF) { report_error(cictx, LRL_Err_CI_EOFInFunction); } nexttoken(cictx); } } } else if (type) { /* Data declaration */ linked_append(&def, &cictx->def_first, &cictx->def_last); def->def.ast_type = LRL_AST_Def_Data; def->def.kind.data.ident = target_ident; def->def.kind.data.flags = defflags; def->def.kind.data.type = type; if (ext_ident) { ext_ident->def_node = (LRLASTDefOrStmt*)&def->def; } if (target_ident) { target_ident->def_node = (LRLASTDefOrStmt*)&def->def; } /* Read initial value */ if (cictx->token == Assign) { /* TODO */ goto not_implemented; } else { not_implemented: def->def.kind.data.value = malloc(sizeof(LRLASTExpr)); def->def.kind.data.value->ast_type = LRL_AST_Value_Undefined; def->def.kind.data.value->from = cictx->srctok; def->def.kind.data.value->to = cictx->srctok; def->def.kind.data.value->typeref.quals = 0; def->def.kind.data.value->typeref.prm = NULL; def->def.kind.data.value->typeref.type = type; } } break; } case TokenEOF: goto out; default: /* TODO proper error reporting */ fprintf(stderr, "unexpected token: %d(%.*s)\n", cictx->token, cictx->toklen, cictx->tokstr); report_error(cictx, LRL_Err_CI_UnexpectedToken); } } out: return cictx->def_first; } static const char empty[1] = ""; /** * Imports a C header into LRL. The interop_ast paramter is the "interop" * statement, which is filled with an AST generated from the C header * (and any dependencies). * * Returns 0 on success, 1 on error or 2 if the options expr isn't ready yet. * When it returns 1 it always reports an error. */ static int translate(LRLCtx *ctx, LRLASTInterop *interop_ast) { char *header_filename, *dir; HeaderSearch headertype; int ret; int status, col, line; CInteropCtx cictx; cictx.ctx = ctx; cictx.interop_ast = interop_ast; cictx.srctok = interop_ast->name; /* Check options */ status = extract_options(&cictx, &headertype, &header_filename); if (status != 0) { if (status == 1) { lrl_err_expr(ctx, LRL_Err_BadInteropOptions, interop_ast->options_expr); } return status; } /* Locate the containing directory of the source file */ if (headertype == System) { dir = (char*)empty; } else { const char *srcptr = interop_ast->options_expr->from->loc.start; const char *srcfile, *dirsep; size_t dirlen; lrl_ctx_find_source(ctx, srcptr, &srcfile, &col, &line); dirsep = srcfile; while (1) { const char *next = strstr(dirsep, PATHSEP); if (next) dirsep = next+strlen(PATHSEP); else break; } dirlen = dirsep-srcfile; dir = malloc(dirlen+1); memcpy(dir, srcfile, dirlen); dir[dirlen] = '\0'; } cictx.filename[0] = '\0'; strncat(cictx.filename, header_filename, MAXFILENAME-1); cictx.filename[MAXFILENAME-1] = '\0'; cictx.line = 1; /* Pipe from preprocessor */ if (strchr(header_filename, QUOTECHAR)) { /* TODO escape or use execv+pipe instead of popen */ lrl_err_expr(ctx, LRL_Err_InteropFailed, interop_ast->options_expr); fprintf(stderr, "error: c_builtin interop does not yet allow ' and \" in filenames\n"); ret = 1; goto end; } cictx.pipe = open_preprocessor_pipe(ctx, headertype, dir, header_filename); if (!cictx.pipe) { lrl_err_expr(ctx, LRL_Err_InteropFailed, interop_ast->options_expr); fprintf(stderr, "error: failed to execute C preprocessor command.\n"); ret = 1; goto end; } /* Parse C code */ interop_ast->translated = parse_c_code(&cictx); /* Clean up */ status = pclose(cictx.pipe); ret = pclose_exit_ok(status) ? 0 : 1; if (ret != 0) { lrl_err_expr(ctx, LRL_Err_InteropFailed, interop_ast->options_expr); fprintf(stderr, "error: C preprocessor returned error code.\n"); } end: free(header_filename); if (dir != empty) { free(dir); } return ret; } const LRLInteropImpl c_builtin_interop = { "c_builtin", &is_supported, &get_options_type, &translate, }; #endif