diff options
Diffstat (limited to 'bootstrap')
| -rw-r--r-- | bootstrap/compiler.h | 1 | ||||
| -rw-r--r-- | bootstrap/parsedecl.c | 13 | ||||
| -rw-r--r-- | bootstrap/parsespec.c | 35 | ||||
| -rw-r--r-- | bootstrap/token.c | 53 | ||||
| -rw-r--r-- | bootstrap/token.h | 5 |
5 files changed, 105 insertions, 2 deletions
diff --git a/bootstrap/compiler.h b/bootstrap/compiler.h index a971282..bebdd4c 100644 --- a/bootstrap/compiler.h +++ b/bootstrap/compiler.h @@ -590,6 +590,7 @@ struct Var *lookup_local_var(const char *name, size_t len); struct Var *lookup_instance_var(const char *name, size_t len); void parse_svctype_spec(const char *classname, size_t cnlen); void parse_giveme_section(void); +void parse_versions_section(void); /** Reports an error in the source code and exits */ NORETURN void error(const char *s); NORETURN void error_str(const char *s, const char *arg); diff --git a/bootstrap/parsedecl.c b/bootstrap/parsedecl.c index 92778f5..c642225 100644 --- a/bootstrap/parsedecl.c +++ b/bootstrap/parsedecl.c @@ -16,6 +16,7 @@ struct TreeNode *current_funcparams = NULL; /* These are used to check that the definitions come in the correct order */ enum SeenDefs { SEEN_NONE, + SEEN_VERSIONS, SEEN_GIVEME, SEEN_INSTANCEVARS, SEEN_CTORS, @@ -130,6 +131,18 @@ void parse_file(FILE *f, const char *basename) expect_next_line(); parse_giveme_section(); break; + case T_KW_versions: + if (interfaces_done) { + error("Can only have `versions` sections in interface files"); + } + if (seen_defs >= SEEN_VERSIONS) { + error(seen_defs == SEEN_VERSIONS ? + "Can't have more than one `versions` section" : + "`versions` section must come first"); + } + expect_next_line(); + parse_versions_section(); + break; case T_UpperIdent: { /* Can be either a service type specification: CommandEntry diff --git a/bootstrap/parsespec.c b/bootstrap/parsespec.c index 7f0ce55..dfd3fe5 100644 --- a/bootstrap/parsespec.c +++ b/bootstrap/parsespec.c @@ -98,3 +98,38 @@ void parse_giveme_section(void) } } } + +static void add_version(const char *s, size_t len) +{ + /* TODO */ + (void)s; + (void)len; + fprintf(stderr, "add version >%.*s<\n", (int)len, s); +} + +void parse_versions_section(void) +{ + assert(!tokenize_numbers_as_versions); + tokenize_numbers_as_versions = true; + for (;;) { + struct LexemeInfo li; + enum Token t = tokenize(&li); + if (t == T_EOL) { + if (!tokenizer_next_line()) { + break; /* EOF */ + } + continue; /* Blank line */ + } else if (t != T_Version) { + if (tokenizer_line_is_indented()) { + error("Unexpected token in `versions` section " + "(or unexpected identation before symbol)"); + } + unread_line(); /* End of version section */ + break; + } + + add_version(li.string, li.len); + expect_next_line_or_eof(); + } + tokenize_numbers_as_versions = false; +} diff --git a/bootstrap/token.c b/bootstrap/token.c index fa6fa3f..1b63405 100644 --- a/bootstrap/token.c +++ b/bootstrap/token.c @@ -16,10 +16,22 @@ static char line[SOURCELINE_MAX]; static const char *s; static const char *last_token_start; static bool has_unread_line; +/* This is a hack (mode parameter) to be able to parse versions, without having + to have complex multi-part tokens. However, it would be possible to turn + remove this hack by adding two new tokens, given as regular expressions here + (where `$` would match either the module version): + + T_WholeLineVersion: + ^ *([0-9][-0-9a-zA-Z._+~]*) *$ + T_ModuleWithVersion: + ^ *module *([a-zA-Z][a-zA-Z0-9_]*) (*[0-9][-0-9a-zA-Z._+~]*) *$ +*/ +bool tokenize_numbers_as_versions = false; static enum Token tok_symbol(struct LexemeInfo *li_out); static enum Token tok_alphanum(struct LexemeInfo *li_out); static enum Token tok_number(struct LexemeInfo *li_out); +static enum Token tok_version(struct LexemeInfo *li_out); static enum Token tok_string(struct LexemeInfo *li_out); void tokenizer_init(FILE *file) @@ -100,7 +112,11 @@ enum Token tokenize(struct LexemeInfo *li_out) } else if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_') { return tok_alphanum(li_out); } else if (c >= '0' && c <= '9') { - return tok_number(li_out); + if (tokenize_numbers_as_versions) { + return tok_version(li_out); + } else { + return tok_number(li_out); + } } else if (c == '"') { return tok_string(li_out); } else { @@ -394,6 +410,41 @@ static enum Token tok_number(struct LexemeInfo *li_out) return T_Integer; } +#define DOTCHARS ".~-+" + +static enum Token tok_version(struct LexemeInfo *li_out) +{ + size_t len = 0; + const char *start = s; + char c; + + do { + c = *(++s); + if (c == '_' && s[-1] == '_') { + error("Double underscore is not allowed in versions"); + } else if (c && strchr(DOTCHARS,c) && strchr(DOTCHARS,s[-1])) { + error("Repeated .~-+ are not allowed in versions"); + } + len++; + } while ((c >= 'a' && c <= 'z') || /* uppercase shouldn't be needed */ + (c >= '0' && c <= '9') || + c == '_' || c == '.' || c == '~' || c == '-' || c == '+'); + + if (len > 1 && *start == '0' && !strchr(DOTCHARS,start[1])) { + /* But zeros in middle components are allowed, to allow for + "calendar versions" like 2021.01.02 */ + error("Versions may not have leading zeros"); + } else if (strchr(DOTCHARS,s[-1])) { + error("Versions may not end with .~-+"); + } else if (len > 50) { + error("Versions may not be longer than 50 characters"); + } + + li_out->len = len; + li_out->string = start; + return T_Version; +} + static enum Token tok_string(struct LexemeInfo *li_out) { const char *start = ++s; diff --git a/bootstrap/token.h b/bootstrap/token.h index 0c94562..dfa1ca3 100644 --- a/bootstrap/token.h +++ b/bootstrap/token.h @@ -17,6 +17,7 @@ enum Token { T_UpperIdent, T_LowerIdent, T_Integer, + T_Version, T_String, T_SYM_Dot, T_SYM_ExclMark, @@ -81,7 +82,7 @@ enum Token { T_KW_section, T_KW_sets, T_KW_since, - /* Usetype lines */ + /* `usetype` lines */ T_KW_apihash, /* Statements */ T_KW_assert, @@ -130,6 +131,8 @@ struct LexemeInfo { SlulInt num; }; +extern bool tokenize_numbers_as_versions; + void tokenizer_init(FILE *f); bool tokenizer_next_line(void); bool tokenizer_line_is_indented(void); |
