/* identifier.c -- The identifier/namespace/scope data structure. Copyright © 2011-2016 Samuel Lidén Borell Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include "identifier.h" #include "context_private.h" #include "misc.h" #include "builtins.h" #include "filesys.h" #define is_create(op) ((op) == LRL_Ident_Create || \ (op) == LRL_Ident_CreateMember || \ (op) == LRL_Ident_Extend) static int compare_name(const LRLIdent *ident, const char *name, size_t length) { size_t ident_len; const char *ident_name; if (ident->def_token) { ident_len = ident->def_token->loc.length; ident_name = ident->def_token->loc.start; } else { ident_len = strlen(ident->name); ident_name = ident->name; } return (length == ident_len && !strncmp(name, ident_name, length)); } /** * Compares the names (not the scope/namespace) of two identifiers. * Returns 1 if equal, 0 if not. */ int lrl_ident_names_equal(const LRLIdent *a, const LRLIdent *b) { size_t alen, blen; const char *astr, *bstr; if (a->def_token) { alen = a->def_token->loc.length; astr = a->def_token->loc.start; } else { alen = strlen(a->name); astr = a->name; } if (b->def_token) { blen = b->def_token->loc.length; bstr = b->def_token->loc.start; } else { blen = strlen(b->name); bstr = b->name; } return (alen == blen && !strncmp(astr, bstr, alen)); } /* bucket = hash % 2^key_bits */ #define which_bucket(map, hash) ((hash) & ~(UINT_MAX << (map).key_bits)) static void init_hashmap(LRLIdent *ns) { /* Create an empty hashmap */ ns->contents.buckets = calloc(4, sizeof(LRLIdent*)); if (!ns->contents.buckets) return; ns->contents.num_buckets = 4; ns->contents.key_bits = 2; /* 2^2 = 4 */ ns->contents.size = 0; } static void grow_hashmap(LRLIdent *ns) { size_t old_end = ns->contents.num_buckets; size_t split_bitmask, i; /* Double the number of buckets */ ns->contents.buckets = try_realloc(ns->contents.buckets, ns->contents.num_buckets*2*sizeof(LRLIdent*)); ns->contents.num_buckets *= 2; ns->contents.key_bits++; /* We have one more key bit now. Split each buckets in two, and place the hashes with the new key bit set in the new buckets. That is, split xyy into 0yy (in the old bucket) and 1yy (the new bucket). */ i = old_end; split_bitmask = 1 << (ns->contents.key_bits-1); while (i-- > 0) { LRLIdent *elem = ns->contents.buckets[i]; LRLIdent **first_bucket = &ns->contents.buckets[i]; LRLIdent **second_bucket = &ns->contents.buckets[old_end+i]; while (elem) { if (elem->hash & split_bitmask) { /* bit=1, put in new bucket */ *second_bucket = elem; second_bucket = &elem->next; } else { /* bit=0, put in old bucket */ *first_bucket = elem; first_bucket = &elem->next; } elem = elem->next; } /* End of list */ *first_bucket = *second_bucket = NULL; } } /** * Dereferences links, i.e. uses statements, in an identifier. */ LRLIdent *lrl_ident_deref_links(LRLIdent *ident) { while (ident->flags & LRL_IdFl_Link) { if (ident->def_node->ast_type != LRL_AST_Uses) { fail("idfstr_linktype"); } ident = (LRLIdent*)ident->def_node->def.kind.uses.identref.ident; if (!lrl_ident_valid(ident)) return NULL; } return ident; } /** * Low-level function for looking up or creating identifiers in a given scope. * This function looks up a single (sub)identifier, and does NOT look in * parent namespaces. * * newident may optionally point to a pre-allocated identifier to use when * inserting a new identifier. If NULL, then it's allocated by this function. * * TODO check that the namespace has been made visible with "uses" if * op==LRL_Ident_Find */ static LRLIdent *ident_from_string(LRLCtx *ctx, LRLIdent *scope, const char *name, size_t namelen, const LRLIdentOperation op, LRLIdent *newident) { size_t index; unsigned long hash; LRLIdent **inspoint; /* insertion point */ if ((scope->flags & LRL_IdFl_Link) && is_create(op)) { lrl_err_set_token(ctx, scope->def_token, 0); lrl_err_finish(ctx, LRL_Err_ModifyNonLocalNamespace); return NULL; } scope = lrl_ident_deref_links(scope); if (!scope) return NULL; /* Check if empty */ if (scope->contents.num_buckets == 0) { if (is_create(op)) { init_hashmap(scope); } else { /* Might not have been loaded from file system */ if ((scope->flags & LRL_IdFl_NotLoaded) == 0 || !lrl_filesys_ensure_loaded(ctx, scope)) { return NULL; } /* Still empty? */ if (scope->contents.num_buckets == 0) return NULL; } } if (!scope->contents.buckets) { fail("idfstr_bucketsarenull"); } /* Skip uninitilized interop statements */ if (scope->flags & LRL_IdFl_Uninitialized) { return NULL; } /* Calculate hash */ hash = hash_name(name, namelen); /* Find the right location in the hashmap. Repeated if we need to increase the number of buckets, or if we had to load from the file system. */ for (;;) { /* Find the right bucket */ index = which_bucket(scope->contents, hash); /* Find the right place */ inspoint = &scope->contents.buckets[index]; for (; (*inspoint) != NULL; inspoint = &(*inspoint)->next) { /* Hashes come in ascending order */ LRLHashCode inspoint_hash = (*inspoint)->hash; if (inspoint_hash < hash) continue; if (inspoint_hash > hash) break; /* Check if there's an exact match */ if (compare_name(*inspoint, name, namelen)) { /* Existing identifier! */ if ((*inspoint)->flags & LRL_IdFl_NotLoaded) { lrl_filesys_ensure_loaded(ctx, *inspoint); } return lrl_ident_deref_links(*inspoint); } } /* The identifier didn't exist */ if (!is_create(op)) { if (scope->flags & LRL_IdFl_NotLoaded) { /* Load namespace from file system and retry */ /* FIXME we should not call this function inside an implementation namespace */ if (lrl_filesys_ensure_loaded(ctx, scope)) continue; } /* If this is an implementation namespace it won't have a name (it's a private scope). Instead, check the name of the interface namespace's name. */ if (scope->flags & LRL_IdFl_Implementation && scope->scope && compare_name(scope->scope, name, namelen) && !is_create(op)) { return scope; } return NULL; } /* Should we grow the bucket list first? */ if (scope->contents.size > 16*scope->contents.num_buckets) { grow_hashmap(scope); continue; } /* Create the identifier */ if (!newident) newident = calloc(1, sizeof(LRLIdent)); newident->next = *inspoint; newident->hash = hash; newident->scope = scope; newident->def_token = NULL; newident->name = NULL; /* "Extend" means that the scope can exist in different files or source lines. */ if (op == LRL_Ident_Extend && ctx && !ctx->no_filesystem) { newident->flags = LRL_IdFl_NotLoaded; } *inspoint = newident; scope->contents.size++; return newident; } } static LRLIdent *ident_from_token(LRLCtx *ctx, LRLIdent *scope, const LRLToken *token, const LRLIdentOperation op, LRLIdent *newident) { /* only "create" the last subidentifier, "extend" the others (create reports an error if it exists already) */ LRLIdentOperation real_op = (op == LRL_Ident_Create && token[1].type == LRL_Sym_NamespaceSep ? LRL_Ident_Extend : op); LRLIdent *ident; if (token->type == LRL_KW_Here) { /* "here" identifiers refer to the containing namespace and are useful to e.g. give a namespace a type */ if (!scope || (scope->flags & LRL_IdFl_HasHere) == 0) { lrl_err_set_token(ctx, token, 0); if (scope && scope->def_token) { lrl_err_set_ident(ctx, scope->def_token, 1); } lrl_err_finish(ctx, LRL_Err_HereNotAllowedHere); return NULL; } if ((scope->flags & LRL_IdFl_Typedef) != 0 && real_op == LRL_Ident_Create) { /* A type has already been declared with this identifier */ lrl_err_set_token(ctx, token, 0); if (scope->def_token) { lrl_err_set_ident(ctx, scope->def_token, 1); } lrl_err_finish(ctx, LRL_Err_DuplicateTypedef); return NULL; } ident = scope; } else { /* Normal identifier */ ident = ident_from_string(ctx, scope, token->loc.start, token->loc.length, real_op, newident); /* Link new identifiers to the first token at the definition */ if (ident && is_create(op) && !ident->def_token) ident->def_token = token; /* Check for duplicate identifiers */ if (ident && ident->def_node && ident->def_token != token && ctx && (real_op == LRL_Ident_Create || real_op == LRL_Ident_CreateMember)) { lrl_err_set_token(ctx, token, 0); if (ident->def_token) { lrl_err_set_ident(ctx, ident->def_token, 1); } lrl_err_finish(ctx, LRL_Err_DuplicateIdentifier); return NULL; } } return ident; } LRLIdent *lrl_ident_get(LRLCtx *ctx, LRLIdent *scope, const LRLToken *tokens, const LRLIdentOperation op, LRLIdent *newident) { LRLIdent *top; /* Find/insert first identifier */ if (tokens->type != LRL_TT_Ident && tokens->type != LRL_KW_Here) { fail("idfget_wrongtok1"); } top = ident_from_token(ctx, scope, tokens, op, newident); if (!top) { if (is_create(op) || op == LRL_Ident_FindMember || !scope->scope || (op == LRL_Ident_FindLocal && !(scope->flags & LRL_IdFl_Statement))) { return NULL; } /* Search in parent namespace */ return lrl_ident_get(ctx, (LRLIdent*)scope->scope, tokens, op, newident); } tokens++; if ((op == LRL_Ident_FindMember || op == LRL_Ident_CreateMember) && tokens->type == LRL_Sym_NamespaceSep) { return top; } /* Check if there are subidentifiers */ while (tokens->type == LRL_Sym_NamespaceSep) { tokens++; /* Find/insert subidentifier */ if (tokens->type != LRL_TT_Ident) fail("idfget_wrongtok2"); top = ident_from_token(ctx, top, tokens, op, NULL); if (!top) return NULL; if ((top->flags & LRL_IdFl_TypedefParam) != 0) { lrl_err_set_token(ctx, tokens, 0); lrl_err_finish(ctx, LRL_Err_TypeParamWithNamespace); return NULL; } tokens++; } return top; } /** * Checks if the root identifier or any of the subidentifiers are * LRL_IdFl_Uninitialized (e.g. interop scopes that need multiple passes) * * Returns 1 if uninitialized and -1 if broken and errors should be suppressed. */ static int is_uninitialized(LRLCtx *ctx, LRLIdent *scope, const LRLToken *tokens) { LRLIdent *top; if (!scope) return 0; /* Find first identifier */ top = ident_from_token(ctx, scope, tokens, LRL_Ident_Find, NULL); if (!top) { /* Search in parent namespace */ return is_uninitialized(ctx, (LRLIdent*)scope->scope, tokens); } if (top->flags & LRL_IdFl_Broken) return -1; if (top->flags & LRL_IdFl_Uninitialized) return 1; tokens++; /* Check if there are subidentifiers */ while (tokens->type == LRL_Sym_NamespaceSep) { tokens++; /* Find subidentifier */ top = ident_from_token(ctx, top, tokens, LRL_Ident_Find, NULL); if (!top) return 0; else if (top->flags & LRL_IdFl_Broken) return -1; else if (top->flags & LRL_IdFl_Uninitialized) return 1; tokens++; } return 0; } /** * Searches for duplicate identifiers in the local block scope. */ void lrl_ident_check_duplicates(LRLCtx *ctx, const LRLIdent *scope, const LRLToken *token) { const LRLIdent *dup; if (token->type != LRL_TT_Ident) return; dup = lrl_ident_get(ctx, (LRLIdent*)scope, token, LRL_Ident_FindLocal, NULL); if (dup) { lrl_err_set_token(ctx, token, 0); if (dup->def_token) { lrl_err_set_token(ctx, dup->def_token, 1); } lrl_err_finish(ctx, LRL_Err_DuplicateIdentifier); } } LRLIdent *lrl_ident_insert_string(LRLCtx *ctx, LRLIdent *scope, char *name) { LRLIdent *ident = ident_from_string(ctx, scope, name, strlen(name), LRL_Ident_Extend, NULL); ident->flags &= ~LRL_IdFl_NotLoaded; /* not on the filesystem */ ident->name = name; return ident; } LRLIdent *lrl_ident_get_string(LRLIdent *scope, const char *name) { return ident_from_string(NULL, scope, name, strlen(name), LRL_Ident_Find, NULL); } /** * Creates the root scope for all modules and adds all builtins. */ LRLIdent *lrl_ident_create_root(void) { LRLIdent *root = calloc(1, sizeof(LRLIdent)); lrl_builtins_add(root); return root; } /** * Inserts a path into a namespace. */ LRLIdent *lrl_ident_insert_path(LRLCtx *ctx, LRLIdent *root, const char *path) { while (*path != '\0') { LRLIdent *ident; const char *dirend, *nameend; char *name; size_t length; /* Get next path element */ skip_path_sep(&path); dirend = end_of_path_elem(path); /* Find file extension. It's removed */ nameend = strchr(path, '.'); if (!nameend || nameend > dirend) nameend = dirend; /* Find or create this identifier */ length = nameend - path; name = lrl_strndup(path, length); ident = ident_from_string(ctx, root, name, length, LRL_Ident_Extend, NULL); ident->name = name; root = ident; path = dirend; } return root; } /** * Creates a private scope (implemented as an anonymous scope). Private * scopes are used to hide identifiers in an implementation files. */ LRLIdent *lrl_ident_create_priv_scope(const LRLIdent *scope) { LRLIdent *priv = calloc(1, sizeof(LRLIdent)); priv->scope = scope; return priv; } /** * Checks if the identifier hasn't been looked up yet, and queues a look up * if necessary. Returns 0 when the identifier is ready. */ int lrl_identref_queue(LRLCtx *ctx, LRLIdentRef *identref) { if (identref->ident != LRL_IDENT_MISSING) { if (identref->ident && (identref->ident->flags & LRL_IdFl_NotLoaded) == 0) return 0; lrl_ident_defer(ctx, identref); } lrl_ctx_set_has_deferred(ctx); return 1; } /** * Queues the identifier to be looked up and defined. */ void lrl_ident_defer(LRLCtx *ctx, LRLIdentRef *identref) { lrl_ctx_set_has_deferred(ctx); /* Don't add broken identrefs */ if (!identref->scope) return; /* Check if already deferred */ if (identref->next != LRL_IDENTREF_NEW) return; /* Don't add already looked up idents */ if (identref->ident && (identref->ident->flags & LRL_IdFl_NotLoaded) == 0) return; /* Put in the deferred list */ identref->next = ctx->deferred_list; ctx->deferred_list = identref; } void lrl_ident_defer_uses(LRLCtx *ctx, LRLIdentRef *identref) { lrl_ctx_set_has_deferred(ctx); /* Check if already deferred */ if (identref->next != LRL_IDENTREF_NEW) return; /* Put in the "uses" list */ identref->next = ctx->uses_list; ctx->uses_list = identref; } static int typeparam_visible(const LRLIdent *typeparam, const LRLIdentRef *fromref) { const LRLIdent *tpscope = typeparam->scope; const LRLIdent *searchscope = fromref->scope; while (searchscope && (searchscope->flags & (LRL_IdFl_Statement|LRL_IdFl_Typedef)) != LRL_IdFl_Typedef) { if (searchscope == tpscope) return 1; searchscope = searchscope->scope; } return searchscope == tpscope; } static int identifier_process_refs(LRLCtx *ctx, LRLIdentRef **list, LRLIdentOperation op) { int found_one = 0; LRLIdentRef *identref = *list; *list = NULL; /* TODO list is reversed, we should print the errors in reverse order */ for (; identref; identref = identref->next) { LRLIdent *ident; skip_deref_next: if (identref->ident) continue; ident = lrl_ident_get(ctx, (LRLIdent*)identref->scope, identref->first_token, op, NULL); /* Check if the identifier wasn't found */ if (!ident) { int status = is_uninitialized(ctx, (LRLIdent*)identref->scope, identref->first_token); if (status == 1) { /* Not yet initialized. Put it back */ LRLIdentRef *tmp = identref; identref = identref->next; tmp->next = LRL_IDENTREF_NEW; if (!identref) break; goto skip_deref_next; } else { /* -1 = suppress error */ if (status != -1) { lrl_err_set_ident(ctx, identref->first_token, 0); lrl_err_finish(ctx, LRL_Err_UndeclaredIdentifier); } /* Mark as invalid */ identref->ident = LRL_IDENT_MISSING; } } else { /* Found */ identref->ident = ident; /* Type parameter defs cannot be used outside of the scope of the type they belong to */ if ((ident->flags & LRL_IdFl_TypedefParam) != 0 && op == LRL_Ident_Find && !typeparam_visible(ident, identref)) { lrl_err_set_ident(ctx, identref->first_token, 0); lrl_err_finish(ctx, LRL_Err_TypeParamFromOtherType); } found_one = 1; } } return found_one; } int lrl_ident_lookup_deferred(LRLCtx *ctx) { return identifier_process_refs(ctx, &ctx->deferred_list, LRL_Ident_Find); } int lrl_ident_lookup_uses_entries(LRLCtx *ctx) { return identifier_process_refs(ctx, &ctx->uses_list, LRL_Ident_FindUses); } int lrl_ident_has_deferred(LRLCtx *ctx) { return ctx->deferred_list || ctx->uses_list || ctx->interop_list; }