/* * Declarations for the tokenizer in the bootstrap compiler. * * Copyright © 2025-2026 Samuel Lidén Borell * * SPDX-License-Identifier: EUPL-1.2+ OR LGPL-2.1-or-later */ #ifndef SLUL_TOKEN_H #define SLUL_TOKEN_H #include #include "compiler.h" enum Token { T_EOL, /**< End of line */ /* Expressions */ T_UpperIdent, T_LowerIdent, T_Integer, T_Version, T_String, T_SYM_Dot, T_SYM_ExclMark, T_SYM_SingleEqual, T_SYM_PlusEqual, T_SYM_MinusEqual, T_SYM_AsteriskEqual, T_SYM_SlashEqual, T_SYM_DoubleEqual, T_SYM_NotEqual, T_SYM_Less, T_SYM_Greater, T_SYM_LessEqual, T_SYM_GreaterEqual, T_SYM_Plus, T_SYM_Minus, T_SYM_Asterisk, T_SYM_Slash, T_SYM_LParen, T_SYM_RParen, T_SYM_LBracket, T_SYM_RBracket, /* Common stuff */ T_KW_end, /* Toplevels */ T_KW_class, T_KW_entry, T_KW_enum, T_KW_func, T_KW_constructor, T_KW_giveme, T_KW_main, T_KW_only, T_KW_ignore, T_KW_record, T_KW_templates, T_KW_trait, T_KW_usetype, T_KW_versions, /* Qualifiers */ T_KW_aliased, T_KW_volatile, /* Generics */ T_KW_of, T_KW_to, T_KW_from, /* Function sections. These can additionally contain T_KW_return */ T_KW_calledfrom, T_KW_code, T_KW_export, T_KW_io, T_KW_local, T_KW_modifies, T_KW_reads, T_KW_section, T_KW_sets, T_KW_since, /* `usetype` lines */ T_KW_apihash, /* Statements */ T_KW_assert, T_KW_break, T_KW_case, T_KW_continue, T_KW_default, T_KW_elif, T_KW_else, T_KW_for, T_KW_if, T_KW_in, T_KW_loopend, T_KW_loopempty, T_KW_return, T_KW_switch, T_KW_while, /* Operators */ T_KW_and, T_KW_mod, T_KW_not, T_KW_or, /* Special values */ T_KW_False, T_KW_True, T_KW_None, T_KW_this }; #define FIRST_QUALIFIER T_KW_aliased #define LAST_QUALIFIER T_KW_volatile #define NUM_QUALIFIERS (LAST_QUALIFIER+1 - FIRST_QUALIFIER) /* Please keep in sync with Q_* in compiler.h */ #define TOKEN_CASES_QUALIFIERS \ case T_KW_aliased: \ case T_KW_volatile: struct LexemeInfo { size_t len; const char *string; SlulInt num; }; extern bool tokenize_numbers_as_versions; void tokenizer_init(FILE *file); /** Reads the next line. Skips lines without tokens. Returns false on EOF */ bool tokenizer_next_line(void); bool tokenizer_line_is_indented(void); enum Token tokenize(struct LexemeInfo *li_out); enum Token lookahead_token(void); /** Unreads a token (but never unreads the line itself) */ void unread_token(void); /** Unreads a full line */ void unread_line(void); void expect(struct LexemeInfo *li_out, enum Token expected, const char *errmsg); void expect_next_line(void); bool expect_next_line_or_eof(void); const char *last_token_endptr(void); enum IdentKind classify_ident(const struct LexemeInfo *li); void unescape_string(const struct LexemeInfo *li, const char **str_out, size_t *len_out); NORETURN void error_token(const char *s, const struct LexemeInfo *li); #endif