diff options
Diffstat (limited to 'bootstrap')
| -rw-r--r-- | bootstrap/Makefile | 2 | ||||
| -rw-r--r-- | bootstrap/b64url.c | 83 | ||||
| -rw-r--r-- | bootstrap/compiler.h | 37 | ||||
| -rw-r--r-- | bootstrap/main.c | 18 | ||||
| -rw-r--r-- | bootstrap/parsemod.c | 149 |
5 files changed, 288 insertions, 1 deletions
diff --git a/bootstrap/Makefile b/bootstrap/Makefile index e7cd4ed..f042abf 100644 --- a/bootstrap/Makefile +++ b/bootstrap/Makefile @@ -46,6 +46,7 @@ CLANG_ANALYZE_OPTS = --analyzer-output text # Stage 1 compiler (written in C) C_SOURCES = \ $(srcdir)/ast.c \ + $(srcdir)/b64url.c \ $(srcdir)/builtins.c \ $(srcdir)/funccall.c \ $(srcdir)/intrange.c \ @@ -72,6 +73,7 @@ C_HEADERS = \ $(srcdir)/token.h OBJECTS = \ $(builddir)/ast.o \ + $(builddir)/b64url.o \ $(builddir)/builtins.o \ $(builddir)/funccall.o \ $(builddir)/intrange.o \ diff --git a/bootstrap/b64url.c b/bootstrap/b64url.c new file mode 100644 index 0000000..4d08947 --- /dev/null +++ b/bootstrap/b64url.c @@ -0,0 +1,83 @@ +/* + * Base64url (RFC 4648 section 5) decoding function. Since SLUL explicitly + * does NOT use padding, the variant without padding is used (see section 3.2). + * + * Copyright © 2021-2026 Samuel Lidén Borell <samuel@kodafritt.se> + * + * SPDX-License-Identifier: EUPL-1.2+ OR LGPL-2.1-or-later + */ + +#include "compiler.h" +#include <assert.h> +#include <string.h> + +#define X 0xFF +static const unsigned char unb64url[128-32] = { + /* 20-2F: special characters */ + X, X, X, X, X, X, X, X, X, X, X, X, X, 62, X, X, + /* 30-3F: 01234... */ + 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, X, X, X, X, X, X, + /* 40-4F: @ABCD... */ + X, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + /* 50-5F: PQRST... */ + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, X, X, X, X, 63, + /* 60-6F: `abcd... */ + X, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, + /* 70-7F: pqrst... */ + 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, X, X, X, X, X +}; +#undef X + +#define GETCHAR \ + c = (unsigned char)*(in++); \ + if (c < 32 || c > 127) return 0; \ + c = unb64url[c-32]; \ + if (c > 63) return 0; + +bool unbase64url(const char *in, size_t inlen, + unsigned char *out, size_t outlen) +{ + SlulInt v; + unsigned char c; + for (;;) { + if (inlen < 4) goto end_chunk; + if (outlen < 3) return 0; + + GETCHAR + v = (SlulInt)c << 18; + GETCHAR + v |= (SlulInt)c << 12; + GETCHAR + v |= (SlulInt)c << 6; + GETCHAR + v |= (SlulInt)c; + + *(out++) = (v >> 16) & 0xFF; + *(out++) = (v >> 8) & 0xFF; + *(out++) = v & 0xFF; + inlen -= 4; + outlen -= 3; + } + end_chunk: + if (!inlen) return true; + /* We can have 2 or 3 trailing characters */ + if (inlen == 1) return false; + GETCHAR + v = (SlulInt)c << 18; + GETCHAR + v |= (SlulInt)c << 12; + if (inlen >= 3) { + GETCHAR + v |= (SlulInt)c << 6; + if (outlen < 2) return false; + } else if (outlen < 1) return false; + + *(out++) = (v >> 16) & 0xFF; + /* Check that there are no extranous bits */ + v &= ~0xFF0000U; + if (inlen >= 3) { + *(out++) = (v >> 8) & 0xFF; + v &= ~0xFF00U; + } + return !v; +} diff --git a/bootstrap/compiler.h b/bootstrap/compiler.h index 8090865..6e3fa19 100644 --- a/bootstrap/compiler.h +++ b/bootstrap/compiler.h @@ -548,6 +548,28 @@ void srcloc_init(struct SourceLocation *srcloc); extern unsigned num_sources; extern char *(sources[MAX_SOURCES]); +struct DepApiVersion { + struct TreeNode node; /* of API hash */ + char *apiversion; /* version number */ + size_t apiversion_len; + struct DepApiVersion *next; +}; + +struct DependencyInfo { + struct TreeNode node; /* of module name */ + struct TreeNode *apiversions; + struct DepApiVersion *apiversions_list; + /*unsigned char apihash[32]; + unsigned apihash_present : 1; + char *modname; + char *apiversion; + size_t modname_len; + size_t apiversion_len;*/ + struct DependencyInfo *next; +}; + +extern struct DependencyInfo *dependencies_list; + extern const char *current_filename; extern unsigned current_line; extern bool interfaces_done; @@ -597,6 +619,8 @@ enum IdentKind classify_ident_str(const char *name, size_t len); /** Parses a source index file */ void parse_source_index(FILE *f); +/** Parses a dependency list file */ +void parse_dependencies_list(FILE *f); /** Parses the code in a source file. basename is the filename without the path */ void parse_file(FILE *f, const char *basename); @@ -641,5 +665,18 @@ const char *path_basename(const char *path); /** Very fast but stupid hash function. Used to avoid string comparison in AVL trees */ HashCode hash_str(const char *s, size_t len); +/** + * Decodes a string from base64url to binary. No padding is allowed at + * the end. + * + * \param in Base64url encoded input string + * \param inlen Length of input in bytes + * \param out Binary output + * \param outlen Length of output + * \return true if successful, or false if the input not not correctly encoded + * or if the output buffer is too small. + */ +bool unbase64url(const char *in, size_t inlen, + unsigned char *out, size_t outlen); #endif diff --git a/bootstrap/main.c b/bootstrap/main.c index 8ce8894..61b4e1e 100644 --- a/bootstrap/main.c +++ b/bootstrap/main.c @@ -143,12 +143,28 @@ static void parse(void) module_start(); /* TODO Decide on a file name here. In particular, - check that "sources.index" is not used for anything else. */ + check that "sources.index" is not used for anything else. + - Go uses extensions: go.mod, go.sum, etc. + How about: + slul.sources + slul.list + slul.index + And then, for the other types of files: + slul.deps + slul.exports + slul.known (known hashes / pubkeys) + (Also, how about renaming slul?) + */ open_file(rootdir, rootdir_len, "sources.index"); parse_source_index(f); close_file(); free(last_alloced_filename); + open_file(rootdir, rootdir_len, "deps.index"); + parse_dependencies_list(f); + close_file(); + free(last_alloced_filename); + parse_core_interface(); if (is_library) { parse_exported_interface(); diff --git a/bootstrap/parsemod.c b/bootstrap/parsemod.c index 1c06487..45c1f11 100644 --- a/bootstrap/parsemod.c +++ b/bootstrap/parsemod.c @@ -7,11 +7,16 @@ */ #include "compiler.h" #include "token.h" +#include <assert.h> #include <string.h> unsigned num_sources = 0; char *(sources[MAX_SOURCES]) = { 0 }; +static struct TreeNode *dependencies = NULL; +struct DependencyInfo *dependencies_list = NULL; + + void parse_source_index(FILE *f) { char line[SOURCELINE_MAX]; @@ -34,3 +39,147 @@ void parse_source_index(FILE *f) } } } + +static const char *next_word(const char *last_start, size_t *prev_len_out) +{ + const char *s = last_start; + bool spaces = false; + assert(*s && *s != ' ' && *s != '\t'); + while (*s && *s != ' ' && *s != '\t') s++; + *prev_len_out = (unsigned)(s - last_start); + /* Skip whitespace after */ + while (*s == ' ' || *s == '\t') { s++; spaces = true; } + if (!*s) { + if (spaces) { + warning("Trailing whitespace"); + } + return NULL; + } + return s; +} + +static void check_modname(const char *name, size_t len) +{ + if (len > 50) { + error_len("Module name of dependency too long", name, len); + } + /* TODO */ + /* XXX require that the first letter is lowercase? or all letters? */ +} + +static void check_version(const char *version, size_t len) +{ + if (len > 50) { + error_len("Version of dependency too long", version, len); + } + /* TODO */ +} + +static void add_dependency(const char *apihash, + const char *modname, size_t modname_len, + const char *apiversion, size_t apiversion_len) +{ + struct DependencyInfo *dep; + + assert(modname != NULL); + check_modname(modname, modname_len); + if (apiversion) { + assert(apihash != NULL); + check_version(apiversion, apiversion_len); + } + + dep = (struct DependencyInfo *)tree_insert_str(&dependencies, + modname, modname_len, NULL, sizeof(struct DependencyInfo)); + NO_NULL(dep); + dep->next = dependencies_list; + dependencies_list = dep; + if (dep->node.is_new) { + dep->apiversions = NULL; + dep->apiversions_list = NULL; + } else if ((dep->apiversions && !apihash) || + (!dep->apiversions && apihash)) { + error("Cannot mix dependencies on the same module " + "with and without API hashes."); + } + + if (apihash) { + unsigned char binhash[32]; /* copied by tree_insert/create_node */ + struct DepApiVersion *ver; + + if (!unbase64url(apihash, 43, binhash, 32)) { + error("Invalid encoding of API hash (must be Base64url)"); + } + + ver = (struct DepApiVersion *)tree_insert_str(&dep->apiversions, + (const char*)binhash, 32, + NULL, sizeof(struct DepApiVersion)); + NO_NULL(ver); + if (!ver->node.is_new) { + error("Duplicate API version"); + } + ver->apiversion = apiversion ? + dupmemz(apiversion, apiversion_len) : NULL; + ver->apiversion_len = apiversion_len; + ver->next = dep->apiversions_list; + dep->apiversions_list = ver; + } +} + +static void parse_dependency_line(const char *line) +{ + const char *word1, *word2; + const char *apihash, *modname, *apiversion; + size_t len1; + size_t modname_len, apiversion_len; + + if (!line[0]) { + return; /* Empty line */ + } else if (line[0] == ' ' || line[0] == '\t') { + error("Leading whitespace in dependencies file"); + } + + word1 = line; + word2 = next_word(word1, &len1); + assert(len1 > 0); + if (word2) { + const char *junk; + /* Full line with <api-hash> <module-name> [<api-version>] */ + apihash = word1; + if (len1 != 43) { + error_len("Invalid length of API hash of dependency, " + "should be 43 characters (256 bits)", + apihash, len1); + } + modname = word2; + apiversion = next_word(word2, &modname_len); + if (apiversion) { + junk = next_word(apiversion, &apiversion_len); + if (junk != NULL) { + error_str("Unexpected stuff at end of dependency line", junk); + } + } else { + apiversion_len = 0; + } + } else { + /* Short line with just a module name. + This is for prototyping and internal submodules only. */ + modname = word1; + modname_len = len1; + apihash = NULL; + apiversion = NULL; + apiversion_len = 0; + } + + add_dependency(apihash, + modname, modname_len, + apiversion, apiversion_len); +} + +void parse_dependencies_list(FILE *f) +{ + char line[SOURCELINE_MAX]; + size_t len; + while (read_source_line(f, line, &len, STRIP_COMMENTS)) { + parse_dependency_line(line); + } +} |
