aboutsummaryrefslogtreecommitdiff
path: root/bootstrap
diff options
context:
space:
mode:
Diffstat (limited to 'bootstrap')
-rw-r--r--bootstrap/Makefile2
-rw-r--r--bootstrap/b64url.c83
-rw-r--r--bootstrap/compiler.h37
-rw-r--r--bootstrap/main.c18
-rw-r--r--bootstrap/parsemod.c149
5 files changed, 288 insertions, 1 deletions
diff --git a/bootstrap/Makefile b/bootstrap/Makefile
index e7cd4ed..f042abf 100644
--- a/bootstrap/Makefile
+++ b/bootstrap/Makefile
@@ -46,6 +46,7 @@ CLANG_ANALYZE_OPTS = --analyzer-output text
# Stage 1 compiler (written in C)
C_SOURCES = \
$(srcdir)/ast.c \
+ $(srcdir)/b64url.c \
$(srcdir)/builtins.c \
$(srcdir)/funccall.c \
$(srcdir)/intrange.c \
@@ -72,6 +73,7 @@ C_HEADERS = \
$(srcdir)/token.h
OBJECTS = \
$(builddir)/ast.o \
+ $(builddir)/b64url.o \
$(builddir)/builtins.o \
$(builddir)/funccall.o \
$(builddir)/intrange.o \
diff --git a/bootstrap/b64url.c b/bootstrap/b64url.c
new file mode 100644
index 0000000..4d08947
--- /dev/null
+++ b/bootstrap/b64url.c
@@ -0,0 +1,83 @@
+/*
+ * Base64url (RFC 4648 section 5) decoding function. Since SLUL explicitly
+ * does NOT use padding, the variant without padding is used (see section 3.2).
+ *
+ * Copyright © 2021-2026 Samuel Lidén Borell <samuel@kodafritt.se>
+ *
+ * SPDX-License-Identifier: EUPL-1.2+ OR LGPL-2.1-or-later
+ */
+
+#include "compiler.h"
+#include <assert.h>
+#include <string.h>
+
+#define X 0xFF
+static const unsigned char unb64url[128-32] = {
+ /* 20-2F: special characters */
+ X, X, X, X, X, X, X, X, X, X, X, X, X, 62, X, X,
+ /* 30-3F: 01234... */
+ 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, X, X, X, X, X, X,
+ /* 40-4F: @ABCD... */
+ X, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
+ /* 50-5F: PQRST... */
+ 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, X, X, X, X, 63,
+ /* 60-6F: `abcd... */
+ X, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
+ /* 70-7F: pqrst... */
+ 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, X, X, X, X, X
+};
+#undef X
+
+#define GETCHAR \
+ c = (unsigned char)*(in++); \
+ if (c < 32 || c > 127) return 0; \
+ c = unb64url[c-32]; \
+ if (c > 63) return 0;
+
+bool unbase64url(const char *in, size_t inlen,
+ unsigned char *out, size_t outlen)
+{
+ SlulInt v;
+ unsigned char c;
+ for (;;) {
+ if (inlen < 4) goto end_chunk;
+ if (outlen < 3) return 0;
+
+ GETCHAR
+ v = (SlulInt)c << 18;
+ GETCHAR
+ v |= (SlulInt)c << 12;
+ GETCHAR
+ v |= (SlulInt)c << 6;
+ GETCHAR
+ v |= (SlulInt)c;
+
+ *(out++) = (v >> 16) & 0xFF;
+ *(out++) = (v >> 8) & 0xFF;
+ *(out++) = v & 0xFF;
+ inlen -= 4;
+ outlen -= 3;
+ }
+ end_chunk:
+ if (!inlen) return true;
+ /* We can have 2 or 3 trailing characters */
+ if (inlen == 1) return false;
+ GETCHAR
+ v = (SlulInt)c << 18;
+ GETCHAR
+ v |= (SlulInt)c << 12;
+ if (inlen >= 3) {
+ GETCHAR
+ v |= (SlulInt)c << 6;
+ if (outlen < 2) return false;
+ } else if (outlen < 1) return false;
+
+ *(out++) = (v >> 16) & 0xFF;
+ /* Check that there are no extranous bits */
+ v &= ~0xFF0000U;
+ if (inlen >= 3) {
+ *(out++) = (v >> 8) & 0xFF;
+ v &= ~0xFF00U;
+ }
+ return !v;
+}
diff --git a/bootstrap/compiler.h b/bootstrap/compiler.h
index 8090865..6e3fa19 100644
--- a/bootstrap/compiler.h
+++ b/bootstrap/compiler.h
@@ -548,6 +548,28 @@ void srcloc_init(struct SourceLocation *srcloc);
extern unsigned num_sources;
extern char *(sources[MAX_SOURCES]);
+struct DepApiVersion {
+ struct TreeNode node; /* of API hash */
+ char *apiversion; /* version number */
+ size_t apiversion_len;
+ struct DepApiVersion *next;
+};
+
+struct DependencyInfo {
+ struct TreeNode node; /* of module name */
+ struct TreeNode *apiversions;
+ struct DepApiVersion *apiversions_list;
+ /*unsigned char apihash[32];
+ unsigned apihash_present : 1;
+ char *modname;
+ char *apiversion;
+ size_t modname_len;
+ size_t apiversion_len;*/
+ struct DependencyInfo *next;
+};
+
+extern struct DependencyInfo *dependencies_list;
+
extern const char *current_filename;
extern unsigned current_line;
extern bool interfaces_done;
@@ -597,6 +619,8 @@ enum IdentKind classify_ident_str(const char *name, size_t len);
/** Parses a source index file */
void parse_source_index(FILE *f);
+/** Parses a dependency list file */
+void parse_dependencies_list(FILE *f);
/** Parses the code in a source file.
basename is the filename without the path */
void parse_file(FILE *f, const char *basename);
@@ -641,5 +665,18 @@ const char *path_basename(const char *path);
/** Very fast but stupid hash function. Used to avoid string comparison
in AVL trees */
HashCode hash_str(const char *s, size_t len);
+/**
+ * Decodes a string from base64url to binary. No padding is allowed at
+ * the end.
+ *
+ * \param in Base64url encoded input string
+ * \param inlen Length of input in bytes
+ * \param out Binary output
+ * \param outlen Length of output
+ * \return true if successful, or false if the input not not correctly encoded
+ * or if the output buffer is too small.
+ */
+bool unbase64url(const char *in, size_t inlen,
+ unsigned char *out, size_t outlen);
#endif
diff --git a/bootstrap/main.c b/bootstrap/main.c
index 8ce8894..61b4e1e 100644
--- a/bootstrap/main.c
+++ b/bootstrap/main.c
@@ -143,12 +143,28 @@ static void parse(void)
module_start();
/* TODO Decide on a file name here. In particular,
- check that "sources.index" is not used for anything else. */
+ check that "sources.index" is not used for anything else.
+ - Go uses extensions: go.mod, go.sum, etc.
+ How about:
+ slul.sources
+ slul.list
+ slul.index
+ And then, for the other types of files:
+ slul.deps
+ slul.exports
+ slul.known (known hashes / pubkeys)
+ (Also, how about renaming slul?)
+ */
open_file(rootdir, rootdir_len, "sources.index");
parse_source_index(f);
close_file();
free(last_alloced_filename);
+ open_file(rootdir, rootdir_len, "deps.index");
+ parse_dependencies_list(f);
+ close_file();
+ free(last_alloced_filename);
+
parse_core_interface();
if (is_library) {
parse_exported_interface();
diff --git a/bootstrap/parsemod.c b/bootstrap/parsemod.c
index 1c06487..45c1f11 100644
--- a/bootstrap/parsemod.c
+++ b/bootstrap/parsemod.c
@@ -7,11 +7,16 @@
*/
#include "compiler.h"
#include "token.h"
+#include <assert.h>
#include <string.h>
unsigned num_sources = 0;
char *(sources[MAX_SOURCES]) = { 0 };
+static struct TreeNode *dependencies = NULL;
+struct DependencyInfo *dependencies_list = NULL;
+
+
void parse_source_index(FILE *f)
{
char line[SOURCELINE_MAX];
@@ -34,3 +39,147 @@ void parse_source_index(FILE *f)
}
}
}
+
+static const char *next_word(const char *last_start, size_t *prev_len_out)
+{
+ const char *s = last_start;
+ bool spaces = false;
+ assert(*s && *s != ' ' && *s != '\t');
+ while (*s && *s != ' ' && *s != '\t') s++;
+ *prev_len_out = (unsigned)(s - last_start);
+ /* Skip whitespace after */
+ while (*s == ' ' || *s == '\t') { s++; spaces = true; }
+ if (!*s) {
+ if (spaces) {
+ warning("Trailing whitespace");
+ }
+ return NULL;
+ }
+ return s;
+}
+
+static void check_modname(const char *name, size_t len)
+{
+ if (len > 50) {
+ error_len("Module name of dependency too long", name, len);
+ }
+ /* TODO */
+ /* XXX require that the first letter is lowercase? or all letters? */
+}
+
+static void check_version(const char *version, size_t len)
+{
+ if (len > 50) {
+ error_len("Version of dependency too long", version, len);
+ }
+ /* TODO */
+}
+
+static void add_dependency(const char *apihash,
+ const char *modname, size_t modname_len,
+ const char *apiversion, size_t apiversion_len)
+{
+ struct DependencyInfo *dep;
+
+ assert(modname != NULL);
+ check_modname(modname, modname_len);
+ if (apiversion) {
+ assert(apihash != NULL);
+ check_version(apiversion, apiversion_len);
+ }
+
+ dep = (struct DependencyInfo *)tree_insert_str(&dependencies,
+ modname, modname_len, NULL, sizeof(struct DependencyInfo));
+ NO_NULL(dep);
+ dep->next = dependencies_list;
+ dependencies_list = dep;
+ if (dep->node.is_new) {
+ dep->apiversions = NULL;
+ dep->apiversions_list = NULL;
+ } else if ((dep->apiversions && !apihash) ||
+ (!dep->apiversions && apihash)) {
+ error("Cannot mix dependencies on the same module "
+ "with and without API hashes.");
+ }
+
+ if (apihash) {
+ unsigned char binhash[32]; /* copied by tree_insert/create_node */
+ struct DepApiVersion *ver;
+
+ if (!unbase64url(apihash, 43, binhash, 32)) {
+ error("Invalid encoding of API hash (must be Base64url)");
+ }
+
+ ver = (struct DepApiVersion *)tree_insert_str(&dep->apiversions,
+ (const char*)binhash, 32,
+ NULL, sizeof(struct DepApiVersion));
+ NO_NULL(ver);
+ if (!ver->node.is_new) {
+ error("Duplicate API version");
+ }
+ ver->apiversion = apiversion ?
+ dupmemz(apiversion, apiversion_len) : NULL;
+ ver->apiversion_len = apiversion_len;
+ ver->next = dep->apiversions_list;
+ dep->apiversions_list = ver;
+ }
+}
+
+static void parse_dependency_line(const char *line)
+{
+ const char *word1, *word2;
+ const char *apihash, *modname, *apiversion;
+ size_t len1;
+ size_t modname_len, apiversion_len;
+
+ if (!line[0]) {
+ return; /* Empty line */
+ } else if (line[0] == ' ' || line[0] == '\t') {
+ error("Leading whitespace in dependencies file");
+ }
+
+ word1 = line;
+ word2 = next_word(word1, &len1);
+ assert(len1 > 0);
+ if (word2) {
+ const char *junk;
+ /* Full line with <api-hash> <module-name> [<api-version>] */
+ apihash = word1;
+ if (len1 != 43) {
+ error_len("Invalid length of API hash of dependency, "
+ "should be 43 characters (256 bits)",
+ apihash, len1);
+ }
+ modname = word2;
+ apiversion = next_word(word2, &modname_len);
+ if (apiversion) {
+ junk = next_word(apiversion, &apiversion_len);
+ if (junk != NULL) {
+ error_str("Unexpected stuff at end of dependency line", junk);
+ }
+ } else {
+ apiversion_len = 0;
+ }
+ } else {
+ /* Short line with just a module name.
+ This is for prototyping and internal submodules only. */
+ modname = word1;
+ modname_len = len1;
+ apihash = NULL;
+ apiversion = NULL;
+ apiversion_len = 0;
+ }
+
+ add_dependency(apihash,
+ modname, modname_len,
+ apiversion, apiversion_len);
+}
+
+void parse_dependencies_list(FILE *f)
+{
+ char line[SOURCELINE_MAX];
+ size_t len;
+ while (read_source_line(f, line, &len, STRIP_COMMENTS)) {
+ parse_dependency_line(line);
+ }
+}