/* ident.c -- Identifier handling for LRL bootstrap transpiler Copyright © 2020 Samuel Lidén Borell Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "bootstrap.h" #include #include /* getting ident declarations in the right order: - we can use "struct T" refs in C to avoid problems with circular references. - should LRL allow non struct/enum typedefs? -- Java, for example, does not -- sometimes, typedefs to elementary types can be useful, e.g. "type HashCode = uint32" -- pointer typedefs harm more than they help. [U] = no ordering required [O] = dependency ordering required 1. typedefs with elementary types [U] 2. typedefs with enums [U - Assuming no sizeof in enum values] 3. all other typedefs [O - Usage of non-pointer struct and all named types count as dependencies] 4. all data declarations [U - We don't allow pointers in initialized data, so no refrences can occur] 5. all function declarations [U] If compiling an implementation file: 6. all data definitions [U] 7. all function bodies [U] */ #define NUM_CHAINS 3 #define IC_TYPES 0 #define IC_FUNCS 1 #define IC_DATAS 2 #define DEFTYPE2CHAIN(deftype) ((deftype)-1) /* Hard-coded for now. Will lead to lots of collisions in the hashtables */ #define NUM_BUCKETS 256 #define BUCKET_MASK 0xff struct IdentArena { struct IdentArena *base; struct Ident *chains[NUM_CHAINS]; struct Ident *buckets[NUM_BUCKETS]; }; static struct IdentArena def_idarena; static struct IdentArena *idarena = &def_idarena; struct Ident *ident_insert(unsigned deftype, const struct Token *token) { unsigned int hash; int bucket, chain; struct Ident *ident, *scan; char *name; /* Check for existing identifier */ hash = token->hash; bucket = hash & BUCKET_MASK; scan = idarena->buckets[bucket]; for (; scan; scan = scan->bucket_next) { if (scan->hash == hash && !strcmp(token->data, scan->name)) { if (deftype) { if (scan->decl.type.deftype) { error_tok(token, "Duplicate identifier"); return NULL; } /* We have a declaration of an already seen identifier */ ident = scan; goto add_decl; } return scan; } } /* Also check in base namespaces (e.g. "header namespaces") */ /* TODO... or should we require explicit imports? it might be better to use versioning (and require added symbols to have a version) */ /* XXX we still need to create the ident in the current namespace, in case a definition follows later. when we are done with the namespace we can check if undeclared identifiers exist in parent namespaces. */ /* XXX on the other hand, we may want to forbid identifier hiding to make code review easier. But can we always be sure that the base namespaces are parsed first? Headers are parsed before implementations, and we can enforce the same for enums. But with constraints, things might get more complicated... */ /*if (!deftype) { struct IdentArena *curarena = idarena->base; for (; curarena; curarena = curarena->base) { scan = curarena->buckets[bucket]; for (; scan; scan = scan->bucket_next) { if (scan->hash == hash && !strcmp(token->data, scan->name)) { } } }*/ /* Identifier not seen before */ name = aalloc(token->size+1, sizeof(char)); memcpy(name, token->data, token->size+1); /* FIXME params etc. should go into a separate area */ ident = aalloc(sizeof(*ident), sizeof(void*)); ident->name = name; ident->hash = hash; ident->bucket_next = idarena->buckets[bucket]; idarena->buckets[bucket] = ident; if (!deftype) { /* TODO in this case we should also track the line number, in case the identifier does not exist */ ident->decl.type.deftype = 0; return ident; } add_decl: ident->decl.type.deftype = deftype; chain = DEFTYPE2CHAIN(deftype); ident->chain_next = idarena->chains[chain]; idarena->chains[chain] = ident; return ident; } /* FIXME could we use arena allocation instead? (we always need a "free" though, to switch the pointer) */ void identarena_new(void) { struct IdentArena *newarena = calloc(1, sizeof(struct IdentArena)); if (!newarena) out_of_memory(); newarena->base = idarena; idarena = newarena; } void identarena_free(void) { struct IdentArena *oldarena = idarena; idarena = idarena->base; free(oldarena); } struct Ident *ident_list(unsigned deftype) { return idarena->chains[DEFTYPE2CHAIN(deftype)]; } static struct LocalIdent *query_localident(struct Block *block, struct LocalIdent *create, const struct Token *token) { struct LocalIdent **insptr = &block->idents; struct LocalIdent *node = block->idents; while (node) { if (token->hash > node->hash) { is_higher: insptr = &node->higher; node = node->higher; } else if (token->hash < node->hash) { is_lower: insptr = &node->higher; node = node->higher; } else { int cmp = strcmp(token->data, node->name); if (!cmp) return node; else if (cmp < 0) goto is_higher; else goto is_lower; } } if (create) { create->lower = NULL; create->higher = NULL; create->hash = token->hash; create->name = aalloc(token->size+1, 1); memcpy(create->name, token->data, token->size+1); *insptr = create; } return NULL; } int localident_insert(struct Block *block, struct LocalIdent *ident, const struct Token *token) { if (block->base && localident_find(block->base, token)) { error_tok(token, "Identifier hiding (same name in different scope) is not allowed"); return 0; } if (query_localident(block, ident, token)) { error_tok(token, "Duplicate local identifier name"); return 0; } return 1; } struct LocalIdent *localident_find(struct Block *block, const struct Token *token) { struct LocalIdent *ident = query_localident(block, NULL, token); if (ident) return ident; if (block->base) return localident_find(block->base, token); return NULL; } struct GotoIdent *gotoident(struct FunctionState *funcstate, const struct Token *token) { struct GotoIdent **insptr = &funcstate->gotoidents; struct GotoIdent *node = funcstate->gotoidents; struct GotoIdent *newident; while (node) { if (token->hash > node->hash) { is_higher: insptr = &node->higher; node = node->higher; } else if (token->hash < node->hash) { is_lower: insptr = &node->higher; node = node->higher; } else { int cmp = strcmp(token->data, node->name); if (!cmp) return node; else if (cmp < 0) goto is_higher; else goto is_lower; } } newident = aalloc(sizeof(struct GotoIdent), sizeof(void*)); newident->lower = NULL; newident->higher = NULL; newident->declared = 0; newident->hash = token->hash; newident->name = aalloc(token->size+1, 1); memcpy(newident->name, token->data, token->size+1); *insptr = newident; return newident; }