Errors are reported for invalid * UTF-8, such as overlong encodings, and for disallowed codepoints (e.g. * control and surrogate characters). * * This function handles UTF-8 characters that span across multiple buffers. * * The number of "trailer bytes" are tracked, so the column number can be * calculated by subtracting the number of trailer bytes. A "trailer byte" * is a byte, in a valid UTF-8 character, that is not the first byte. */ const char *skip_utf8(struct CSlul *ctx, const char *bp, const char *bend) { unsigned char c; uint32 code; /* If we are at the start of a buffer, there might be some UTF-8 byte from the previous buffer that we need to take into account */ if (UNLIKELY(ctx->utf8state)) { assert(bp != bend); c = ctx->utf8byte; code = ctx->utf8code; switch (ctx->utf8state) { case UTF8ST_BYTE2: goto continue_byte2; case UTF8ST_MB3_BYTE3: goto continue_mb3_byte3; case UTF8ST_MB4_BYTE3: goto continue_mb4_byte3; case UTF8ST_MB4_BYTE4: goto continue_mb4_byte4; case UTF8ST_NONE: ; /* Should never happen */ } } while (bp != bend) { c = (unsigned char)*bp; if (c < 128) break; /* Plain ASCII character */ if (UNLIKELY((c & 0xc0) == 0x80)) { /* Not a valid start byte */ error_char(ctx, bp, CSLUL_E_BADUTF8); bp++; continue; } else if (UNLIKELY(bp+1 == bend)) { ctx->utf8state = UTF8ST_BYTE2; ctx->utf8byte = c; if (ctx->last_buffer) { error_char(ctx, bp, CSLUL_E_BADUTF8); } bp++; goto splitted; } else bp++; continue_byte2: code = 0; if ((c & 0xe0) == 0xc0) { /* 2 byte character */ code |= (c & 0x1fU) << 6U; c = *bp; if (UNLIKELY((c & 0xc0) != 0x80)) { error_char(ctx, bp, CSLUL_E_BADUTF8); continue; } code |= (c & 0x3fU); ctx->mbtrailerbytes++; if (UNLIKELY(code <= 0x7F)) { /* Overlong encoding */ error_char(ctx, bp, CSLUL_E_BADUTF8); ctx->mbtrailerbytes--; continue; } bp++; } else if ((c & 0xf0) == 0xe0) { /* 3 byte character */ code |= (uint32)(c & 0x0fU) << 12U; c = *bp; if (UNLIKELY((c & 0xc0) != 0x80)) { error_char(ctx, bp, CSLUL_E_BADUTF8); continue; } code |= (c & 0x3fU) << 6U; ctx->mbtrailerbytes++; if (UNLIKELY(code <= 0x7FF)) { /* Overlong encoding */ error_char(ctx, bp, CSLUL_E_BADUTF8); ctx->mbtrailerbytes--; continue; } if (UNLIKELY(bp+1 == bend)) { ctx->utf8state = UTF8ST_MB3_BYTE3; ctx->utf8code = code; if (ctx->last_buffer) { error_char(ctx, bp, CSLUL_E_BADUTF8); } bp++; goto splitted; } bp++; continue_mb3_byte3: c = *bp; ctx->mbtrailerbytes++; if (UNLIKELY((c & 0xc0) != 0x80)) { error_char(ctx, bp, CSLUL_E_BADUTF8); ctx->mbtrailerbytes -= 2; continue; } code |= c & 0x3f; bp++; } else /*if ((c & 0xf0) == 0xf0)*/ { /* 4 byte character */ code |= (uint32)(c & 0x07U) << 18U; c = *bp; if (UNLIKELY((c & 0xc0) != 0x80)) { error_char(ctx, bp, CSLUL_E_BADUTF8); continue; } code |= (uint32)(c & 0x3fU) << 12U; ctx->mbtrailerbytes++; if (UNLIKELY(code <= 0xFFFF)) { /* Overlong encoding */ error_char(ctx, bp, CSLUL_E_BADUTF8); ctx->mbtrailerbytes--; continue; } if (UNLIKELY(bp+1 == bend)) { ctx->utf8state = UTF8ST_MB4_BYTE3; ctx->utf8code = code; if (ctx->last_buffer) { error_char(ctx, bp, CSLUL_E_BADUTF8); } bp++; goto splitted; } bp++; continue_mb4_byte3: c = *bp; ctx->mbtrailerbytes++; if (UNLIKELY((c & 0xc0) != 0x80)) { error_char(ctx, bp, CSLUL_E_BADUTF8); ctx->mbtrailerbytes -= 2; continue; } code |= (c & 0x3fU) << 6U; if (UNLIKELY(bp+1 == bend)) { ctx->utf8state = UTF8ST_MB4_BYTE4; ctx->utf8code = code; if (ctx->last_buffer) { error_char(ctx, bp, CSLUL_E_BADUTF8); } bp++; goto splitted; } bp++; continue_mb4_byte4: c = *bp; ctx->mbtrailerbytes++; if (UNLIKELY((c & 0xc0) != 0x80)) { error_char(ctx, bp, CSLUL_E_BADUTF8); ctx->mbtrailerbytes -= 3; continue; } code |= c & 0x3f; bp++; } /* Check what these characters actually do. Maybe some of them can be allowed. */ if (UNLIKELY( (code >= 0x80 && code <= 0x9F) || /* control characters */ (code >= 0xD800 && code <= 0xDFFF) || /* surrogate characters */ (code >= 0x2028 && code <= 0x202E) || /* line sep., RLO, etc */ (code >= 0x2060 && code <= 0x206F) || /* digit override etc */ (code >= 0xFDD0 && code <= 0xFDEF) || /* non-characters */ code == 0xFEFF || /* Byte Order Mark / ZWNBSP */ ((code & 0xFFFE) == 0xFFFE) || /* ??FFFE-FFFF are non-characters */ code > 0x10FFFF)) { /* Last Unicode character */ /* Disallowed to prevent deceptive source text */ error_char_offs(ctx, bp, -1, CSLUL_E_DISALLOWEDUNICODE); } else if (ctx->allowed_scripts != (SCRIPT_ALL|SCRIPT_RTL)) { unsigned script = get_unicode_script(code); if (UNLIKELY((script & ~ctx->allowed_scripts) != 0)) { enum CSlulErrorCode err; switch (script) { case SCRIPT_LATIN: err = CSLUL_E_SCRIPTLATIN; break; case SCRIPT_CYRILLIC: err = CSLUL_E_SCRIPTCYRILLIC; break; case SCRIPT_GREEK: err = CSLUL_E_SCRIPTGREEK; break; case SCRIPT_SPECIALS: err = CSLUL_E_SCRIPTSPECIALS; break; case SCRIPT_OTHER: err = CSLUL_E_SCRIPTOTHER; break; case SCRIPT_RTL: err = CSLUL_E_SCRIPTRTL; break; default: internal_error(ctx, INTERR_BADSCRIPT); goto interr; } error_char_offs(ctx, bp, -1, err); ctx->allowed_scripts |= script; /* Silence further errors */ interr: ; } } } ctx->utf8state = UTF8ST_NONE; splitted: return bp; } /** * Like skip_utf8, but allows bounding the string range * \param ctx Compilation context * \param bp Current character pointer * \param stop Bounding end pointer * \param at_end If this the end pointer is at EOF * \return New character pointer */ static const char *skip_utf8_bounded(struct CSlul *ctx, const char *bp, const char *stop, int at_end) { const char *ret; int savedlast = ctx->last_buffer; ctx->last_buffer = at_end; ret = skip_utf8(ctx, bp, stop); ctx->last_buffer = savedlast; return ret; } /** * Reports an error about an unexpected UTF-8 character. * Returns a pointer to the position past the UTF-8 character. */ const char *unexpected_utf8(struct CSlul *ctx, const char *bp, const char *bend) { error_char(ctx, bp, CSLUL_E_INVALIDCHAR); ctx->allowed_scripts = SCRIPT_ALL|SCRIPT_RTL; return skip_utf8(ctx, bp, bend); } #define X 0 #define N ((unsigned char)CSLUL_T_Integer) #define U ((unsigned char)CSLUL_T_UpperIdent) #define L ((unsigned char)CSLUL_T_LowerIdent) static const unsigned char char2tok[128] = { /* -------------------- 0x00 - 0x0F -------------------- */ X, X, X, X, X, X, X, X, X, CSLUL_INT_Whitespace, CSLUL_T_Newline, X, X, CSLUL_T_Newline, X, X, /* -------------------- 0x10 - 0x1F -------------------- */ X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, /* -------------------- 0x20 - 0x2F -------------------- */ CSLUL_INT_Whitespace, /* ! */ CSLUL_T_Exclamation, /* " */ CSLUL_T_String, /* # */ CSLUL_INT_Comment, /* $ */ X, /* % */ X, /* & */ X, /* ' */ X, /* ( */ CSLUL_T_LParen, /* ) */ CSLUL_T_RParen, /* * */ CSLUL_T_Asterisk, /* + */ CSLUL_T_Plus, /* , */ CSLUL_T_Comma, /* - */ CSLUL_T_Minus, /* . */ CSLUL_T_Dot, /* / */ CSLUL_T_Slash, /* -------------------- 0x30 - 0x3F -------------------- */ /* 0-9 */ N, N, N, N, N, N, N, N, N, N, /* : */ CSLUL_T_Colon, /* ; */ CSLUL_T_Semicolon, /* < */ CSLUL_T_Less, /* = */ CSLUL_T_Assign, /* > */ CSLUL_T_Greater, /* ? */ CSLUL_T_Question, /* -------------------- 0x40 - 0x4F -------------------- */ /* @ */ X, /* A-O */ U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, /* -------------------- 0x50 - 0x5F -------------------- */ /* P-Z */ U, U, U, U, U, U, U, U, U, U, U, /* [ */ CSLUL_T_LSquare, /* \ */ X, /* ] */ CSLUL_T_RSquare, /* ^ */ X, /* _ */ L, /* -------------------- 0x60 - 0x6F -------------------- */ /* ` */ X, /* a-o */ L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, /* -------------------- 0x70 - 0x7F -------------------- */ /* p-z */ L, L, L, L, L, L, L, L, L, L, L, /* { */ CSLUL_T_LCurly, /* | */ X, /* } */ CSLUL_T_RCurly, /* ~ */ X, /* 0x7F */ X }; #undef L #undef U #undef N /** Appends data to the temporary buffer. The caller is reponsible for ensuring that the data is not longer than MAX_TOKEN_LEN */ static void copytotmp(struct CSlul *ctx, const char *start, const char *end) { size_t len = end - start; assert(start <= end); assert(ctx->tmplen+len <= MAX_TOKEN_LEN); memcpy(&ctx->toktmp[ctx->tmplen], start, len); ctx->tmplen += len; } static void copytotmp_safe(struct CSlul *ctx, const char *start, const char *end) { size_t len = end - start; assert(start != NULL); assert(start <= end); assert(end <= ctx->bufferend); if (UNLIKELY(len >= MAX_TOKEN_LEN || len + ctx->tmplen >= MAX_TOKEN_LEN)) { end = start + (MAX_TOKEN_LEN - ctx->tmplen); assert(end >= start); error_char(ctx, end, CSLUL_E_STRINGTOOLONG); } copytotmp(ctx, start, end); } static int valid_ident_char(unsigned char ch, enum IdentParseMode mode) { enum CSlulToken tok; if (UNLIKELY(ch > 127)) return 0; tok = (enum CSlulToken)char2tok[(unsigned)ch]; return LIKELY(tok == CSLUL_T_UpperIdent || tok == CSLUL_T_LowerIdent || tok == CSLUL_T_Integer) || (mode != ParseIdent && ch == '.') || (mode == ParseVersion && ch == '~'); } /** * Tokenizes an identifier or attribute part. Sets *status to 1 if successful. * Identifiers than span across buffers are handled by storing identifier * bytes in ctx->toktmp. * * The "mode" parameter controls how parsing should be done: * - ParseIdent: Only alphanumeric characters and underscore allowed. * - ParseVersion: Alphanumeric characters and ._~ allowed * (but _ is forbidden in versions!) * - ParseAttrName: Like ParseIdent, but dots are also allowed. * - ParseAttrValue: Parse until whitespace (space/tab/newline). * Invalid UTF-8 or control characters generate an error. */ const char *tokenize_ident(struct CSlul *ctx, const char *bp, const char *bend, int *status, enum IdentParseMode mode) { /* FIXME this function reports a "too long" error for idents that are exactly 100 bytes! */ const char *start = bp; size_t len = ctx->tmplen; HashCode hash = ctx->tokhash; unsigned hash_or = (ctx->case_insens ? 0x20 : 0); const char *stop; assert(bp <= bend); stop = ((size_t)(bend - bp) > (size_t)MAX_IDENT_LEN-len ? bp+(MAX_IDENT_LEN-len) : bend); if (ctx->toklen == MAX_IDENT_LEN) goto too_long; assert(bp <= stop); assert(stop <= bend); assert(ctx->toklen <= MAX_IDENT_LEN); if (mode != ParseAttrValue) { for (;;) { unsigned char ch; if (UNLIKELY(bp == stop)) goto stopped; ch = *bp; if (!valid_ident_char(ch, mode)) break; bp++; hash = HASH(hash, ch | hash_or); } } else { /* Parsing value in module header */ for (;;) { unsigned char ch; if (UNLIKELY(bp == stop)) goto stopped; ch = *bp; if (ch <= 0x20 || ch == 127) { if (LIKELY(ch == '\n' || ch == '\r' || ch == ' ' || ch == '\t')) break; error_char(ctx, bp, CSLUL_E_INVALIDCHAR); bp++; } else if (LIKELY(ch < 127)) { hash = HASH(hash, ch | hash_or); bp++; } else { /* UTF-8 character */ hash = HASH(hash, 0); bp = skip_utf8_bounded(ctx, bp, stop, (ctx->last_buffer && bend==stop)); } } } done: *status = 1; len = ctx->tmplen; if (UNLIKELY(len)) { copytotmp(ctx, start, bp); ctx->tokval = (const char*)&ctx->toktmp; } else { ctx->tokval = start; } ctx->toklen = (bp-start) + len; ctx->tokhash = hash; return bp; stopped: /* We either reached the end, or the ident is too long */ ctx->toklen += bp-start; assert(bp >= start); if ((size_t)(bp - start) >= MAX_IDENT_LEN - ctx->tmplen) goto too_long; if (bp != bend) goto too_long; /* End of buffer */ if (ctx->last_buffer) goto done; *status = 0; copytotmp(ctx, start, bp); ctx->tokhash = hash; assert(ctx->toklen > 0); return bp; too_long: assert(ctx->toklen == MAX_IDENT_LEN); if (ctx->tmplen) { assert(ctx->tmplen + (bp-start) == MAX_IDENT_LEN); copytotmp(ctx, start, bp); ctx->tokval = (const char*)&ctx->toktmp; } else { ctx->tokval = start; } if (UNLIKELY(bp-start)) { /* will be 0 if this is a second chunk (= report once) */ error_char(ctx, bp, CSLUL_E_IDENTTOOLONG); } ctx->tokhash = hash; /* Skip the oversized identifier/value */ if (mode != ParseAttrValue) { for (;;) { if (bp == bend) goto buffer_end_toolong; if (!valid_ident_char(*bp, mode)) break; bp++; } } else { unsigned char ch; for (;;) { if (bp == bend) goto buffer_end_toolong; ch = *bp; if (ch <= 0x20 || ch == 127) { if (LIKELY(ch == '\n' || ch == '\r' || ch == ' ' || ch == '\t')) break; error_char(ctx, bp, CSLUL_E_INVALIDCHAR); bp++; } else if (ch < 127) { bp++; } else { /* UTF-8 character */ bp = skip_utf8(ctx, bp, bend); if (bp == bend) goto buffer_end_toolong; } } } identend_toolong: assert(ctx->toklen == MAX_IDENT_LEN); *status = 1; /* pretend it was successful for error recovery */ return bp; buffer_end_toolong: if (ctx->last_buffer) goto identend_toolong; assert(ctx->toklen == MAX_IDENT_LEN); *status = 0; return bp; } /** Resets identifier state to the starting state. */ void ident_start(struct CSlul *ctx) { ctx->toklen = 0; ctx->tmplen = 0; ctx->tokhash = 0; } void token_start(struct CSlul *ctx, const char *bp) { ctx->prev_tok_line = ctx->tokline; ctx->prev_tok_col = ctx->tokcolumn; ctx->prev_tok_endcol = ctx->tokcolumn+ctx->toklen; ctx->tokline = ctx->line; ctx->tokcolumn = ctx->startcolumn + (bp - ctx->linestart - ctx->mbtrailerbytes); } void token_eof(struct CSlul *ctx, const char *bp) { if (UNLIKELY(bp != ctx->bufferstart && bp[-1] != '\n' && bp[-1] != '\r')) { error_char(ctx, bp, CSLUL_E_NOEOFNEWLINE); } if (UNLIKELY(ctx->in_multiline_comment)) { error_linecol(ctx, CSLUL_E_MLCOMMENTNOTCLOSED, ctx->multilinecomment_startline, 1); } ctx->tokcolumn += ctx->toklen; ctx->toklen = 0; ctx->prev_tok_line = ctx->tokline; ctx->prev_tok_endcol = ctx->tokcolumn; } static enum CSlulToken match_keyword(struct CSlul *ctx) { size_t toklen = ctx->toklen; const char *tokval = ctx->tokval; assert(ctx->toklen >= 1); switch (ctx->tokhash) { /* TODO change "case" -> "matches" (to have 8-space indentation) TODO add "unreachable" in addition to "undef" */ case H_NOT: TOK_EQ_RETURN("not", CSLUL_T_KW_Not) case H_AND: TOK_EQ_RETURN("and", CSLUL_T_KW_And) case H_OR: TOK_EQ_RETURN("or", CSLUL_T_KW_Or) case H_MOD: TOK_EQ_RETURN("mod", CSLUL_T_KW_Mod) case H_DEREF: TOK_EQ_RETURN("deref", CSLUL_T_KW_Deref) case H_REFTO: TOK_EQ_RETURN("refto", CSLUL_T_KW_RefTo) case H_REF_IS: TOK_EQ_RETURN("ref_is", CSLUL_T_KW_RefIs) case H_REF_IS_NOT: TOK_EQ_RETURN("ref_is_not", CSLUL_T_KW_RefIsNot) case H_DATA: TOK_EQ_RETURN("data", CSLUL_T_KW_Data) case H_FUNC: TOK_EQ_RETURN("func", CSLUL_T_KW_Func) case H_TYPE: TOK_EQ_RETURN("type", CSLUL_T_KW_Type) case H_BOOL: TOK_EQ_RETURN("bool", CSLUL_T_KW_Bool) case H_USIZE: TOK_EQ_RETURN("usize", CSLUL_T_KW_USize) case H_SSIZE: TOK_EQ_RETURN("ssize", CSLUL_T_KW_SSize) case H_FILEOFFS: TOK_EQ_RETURN("fileoffs", CSLUL_T_KW_FileOffs) case H_STRING: TOK_EQ_RETURN("string", CSLUL_T_KW_String) case H_INT8: TOK_EQ_RETURN("int8", CSLUL_T_KW_Int8) case H_BYTE: TOK_EQ_RETURN("byte", CSLUL_T_KW_Byte) case H_WUINT8: TOK_EQ_RETURN("wuint8", CSLUL_T_KW_WUInt8) case H_INT16: TOK_EQ_RETURN("int16", CSLUL_T_KW_Int16) case H_UINT16: TOK_EQ_RETURN("uint16", CSLUL_T_KW_UInt16) case H_WUINT16: TOK_EQ_RETURN("wuint16", CSLUL_T_KW_WUInt16) case H_INT: TOK_EQ_RETURN("int", CSLUL_T_KW_Int) case H_UINT: TOK_EQ_RETURN("uint", CSLUL_T_KW_UInt) case H_WUINT: TOK_EQ_RETURN("wuint", CSLUL_T_KW_WUInt) case H_INT32: TOK_EQ_RETURN("int32", CSLUL_T_KW_Int32) case H_UINT32: TOK_EQ_RETURN("uint32", CSLUL_T_KW_UInt32) case H_WUINT32: TOK_EQ_RETURN("wuint32", CSLUL_T_KW_WUInt32) case H_INT64: TOK_EQ_RETURN("int64", CSLUL_T_KW_Int64) case H_UINT64: TOK_EQ_RETURN("uint64", CSLUL_T_KW_UInt64) case H_WUINT64: TOK_EQ_RETURN("wuint64", CSLUL_T_KW_WUInt64) case H_REF: TOK_EQ_RETURN("ref", CSLUL_T_KW_Ref) case H_OWN: TOK_EQ_RETURN("own", CSLUL_T_KW_Own) case H_ARENA: TOK_EQ_RETURN("arena", CSLUL_T_KW_Arena) case H_SLOT: TOK_EQ_RETURN("slot", CSLUL_T_KW_Slot) case H_FUNCREF: TOK_EQ_RETURN("funcref", CSLUL_T_KW_FuncRef) case H_NORETURN: TOK_EQ_RETURN("noreturn", CSLUL_T_KW_NoReturn) case H_STRUCT: TOK_EQ_RETURN("struct", CSLUL_T_KW_Struct) case H_ENUM: TOK_EQ_RETURN("enum", CSLUL_T_KW_Enum) case H_LIFETIME: TOK_EQ_RETURN("lifetime", CSLUL_T_KW_Lifetime) case H_SINCE: TOK_EQ_RETURN("since", CSLUL_T_KW_Since) case H_VAR: TOK_EQ_RETURN("var", CSLUL_T_KW_Var) case H_WRITEONLY: TOK_EQ_RETURN("writeonly", CSLUL_T_KW_WriteOnly) case H_ALIASED: TOK_EQ_RETURN("aliased", CSLUL_T_KW_Aliased) case H_THREADED: TOK_EQ_RETURN("threaded", CSLUL_T_KW_Threaded) case H_CLOSED: TOK_EQ_RETURN("closed", CSLUL_T_KW_Closed) case H_NONE: TOK_EQ_RETURN("none", CSLUL_T_KW_None) case H_THIS: TOK_EQ_RETURN("this", CSLUL_T_KW_This) case H_UNDEF: TOK_EQ_RETURN("undef", CSLUL_T_KW_Undef) case H_FALSE: TOK_EQ_RETURN("false", CSLUL_T_KW_False) case H_TRUE: TOK_EQ_RETURN("true", CSLUL_T_KW_True) case H_IF: TOK_EQ_RETURN("if", CSLUL_T_KW_If) case H_ELSE: TOK_EQ_RETURN("else", CSLUL_T_KW_Else) case H_WHILE: TOK_EQ_RETURN("while", CSLUL_T_KW_While) case H_DO: TOK_EQ_RETURN("do", CSLUL_T_KW_Do) case H_FOR: TOK_EQ_RETURN("for", CSLUL_T_KW_For) case H_IN: TOK_EQ_RETURN("in", CSLUL_T_KW_In) case H_LOOPEND: TOK_EQ_RETURN("loopend", CSLUL_T_KW_LoopEnd) case H_LOOPEMPTY: TOK_EQ_RETURN("loopempty", CSLUL_T_KW_LoopEmpty) case H_SWITCH: TOK_EQ_RETURN("switch", CSLUL_T_KW_Switch) case H_CASE: TOK_EQ_RETURN("case", CSLUL_T_KW_Case) case H_WITH: TOK_EQ_RETURN("with", CSLUL_T_KW_With) case H_DEFAULT: TOK_EQ_RETURN("default", CSLUL_T_KW_Default) case H_SUBCASE: TOK_EQ_RETURN("subcase", CSLUL_T_KW_SubCase) case H_ASSERT: TOK_EQ_RETURN("assert", CSLUL_T_KW_Assert) case H_BREAK: TOK_EQ_RETURN("break", CSLUL_T_KW_Break) case H_CONTINUE: TOK_EQ_RETURN("continue", CSLUL_T_KW_Continue) case H_GOTO: TOK_EQ_RETURN("goto", CSLUL_T_KW_Goto) case H_RETURN: TOK_EQ_RETURN("return", CSLUL_T_KW_Return) } return tokval[0] >= 'a' || tokval[0] == '_' ? CSLUL_T_LowerIdent : CSLUL_T_UpperIdent; } /** * Returns 1 if the token is not a known keyword, and could syntactically * be a module header keyword (i.e. is lowercase). */ int token_could_be_mh_attr(struct CSlul *ctx) { return match_keyword(ctx) == CSLUL_T_LowerIdent && !(ctx->toklen == 4 && ctx->tokhash == H_VOID); /* C programmer?! */ } enum CSlulToken cslul_ll_next_slul_token(struct CSlul *ctx) { const char *bp = ctx->buffer; const char *bend = ctx->bufferend; unsigned char ch; enum CSlulToken tok; enum TokenState tokstate = TDone; ctx->linestart = bp; ctx->mbtrailerbytes = 0; if (ctx->reused_token.slul) { tok = ctx->reused_token.slul; ctx->reused_token.slul = 0; return tok; } if (UNLIKELY(ctx->tokstate.slul != TDone)) { switch (ctx->tokstate.slul) { case TInNewline: goto in_newline; case TInWhitespace: goto in_whitespace; case TInComment: goto in_comment; case TInMaybeMLCommentStart1: goto in_maybe_ml_comment_start1; case TInMaybeMLCommentStart2: goto in_maybe_ml_comment_start2; case TInMaybeMLCommentEnd0: goto in_maybe_ml_comment_end0; case TInMaybeMLCommentEnd1: goto in_maybe_ml_comment_end1; case TInMaybeMLCommentEnd2: goto in_maybe_ml_comment_end2; case TInIdent: goto in_ident; case TInVersion: goto in_version; case TInOperator: goto in_operator; case TInString: goto in_string; case TEscapeStart: goto escape_start; case TEscapeHex: goto escapehex; case TEscapeUnicode: goto escapeunicode; case TEscapeScripts: goto escapescripts; case TZeroPrefixed: goto zeroprefixed; case TNumberHex: goto number_hex; case TNumberBin: goto number_bin; case TNumberDec: goto number_dec; case TNumberExpSign: goto number_expsign; case TNumberExp: goto number_exp; case TNumberSkip: goto number_skip; case TDone:; } } if (UNLIKELY(ctx->utf8state)) { if (bp == bend) goto buffer_end; bp = skip_utf8(ctx, bp, bend); } nextchar: if (UNLIKELY(bp == bend)) goto buffer_end; ch = *bp; havechar: if (UNLIKELY(ch > 127)) { bp = unexpected_utf8(ctx, bp, bend); goto nextchar; } tok = (enum CSlulToken)char2tok[(unsigned)ch]; switch (tok) { case X: error_char(ctx, bp, CSLUL_E_INVALIDCHAR); bp++; goto nextchar; case CSLUL_T_Newline: newline_start: bp++; if (LIKELY(ch == '\n')) { newline_end: ctx->line++; ctx->startcolumn = 1; ctx->mbtrailerbytes = 0; ctx->linestart = bp; ctx->numspaces = 0; if (ctx->in_multiline_comment) goto in_maybe_ml_comment_end0; goto nextchar; } in_newline: if (UNLIKELY(bp == bend)) { /* \r */ if (ctx->last_buffer) { error_char_offs(ctx, bp, -1, CSLUL_E_CRNEWLINE); } tokstate = TInNewline; goto buffer_end; } else { if (LIKELY(*bp == '\n')) bp++; /* \r\n */ else error_char_offs(ctx, bp, -1, CSLUL_E_CRNEWLINE); goto newline_end; } break; /* unreachable */ in_maybe_ml_comment_end0: if (UNLIKELY(bp == bend)) { tokstate = TInMaybeMLCommentEnd0; goto buffer_end; } if (*bp != '#') goto in_comment; bp++; in_maybe_ml_comment_end1: if (UNLIKELY(bp == bend)) { tokstate = TInMaybeMLCommentEnd1; goto buffer_end; } if (*bp != '}') goto in_maybe_ml_comment_start1; bp++; in_maybe_ml_comment_end2: if (UNLIKELY(bp == bend)) { tokstate = TInMaybeMLCommentEnd2; goto buffer_end; } if (*bp == '}') ctx->in_multiline_comment--; bp++; goto in_comment; /* skip to end of line */ case CSLUL_INT_Whitespace: { const char *start; ctx->numspaces = 0; in_whitespace: start = bp; for (;;) { if (UNLIKELY(bp == bend)) goto buffer_end_ws; ch = *bp; if (ch != ' ') break; bp++; } ctx->numspaces += bp-start; if (UNLIKELY(ch == '\t')) { error_char(ctx, bp, CSLUL_E_TAB); /* TODO skip multiple tabs */ bp++; } else if (UNLIKELY(ch == '\n' || ch == '\r')) { int col = ctx->startcolumn + (bp-ctx->linestart); error_char(ctx, bp, (ctx->numspaces == col-1 ? CSLUL_E_INDENTEDBLANKLINE : CSLUL_E_TRAILINGSPACE)); } goto havechar; buffer_end_ws: ctx->numspaces += bp-start; tokstate = TInWhitespace; goto buffer_end; } case CSLUL_INT_Comment: bp++; ctx->allowed_scripts = SCRIPT_ALL|SCRIPT_RTL; ctx->in_multiline_comment = 0; in_maybe_ml_comment_start1: if (UNLIKELY(bp == bend)) { tokstate = TInMaybeMLCommentStart1; goto buffer_end; } if (*bp != '{') goto in_comment; bp++; in_maybe_ml_comment_start2: if (UNLIKELY(bp == bend)) { tokstate = TInMaybeMLCommentStart2; goto buffer_end; } if (*bp == '{') { if (ctx->startcolumn + (bp-ctx->linestart) != 3) { error_linecol(ctx, CSLUL_E_MLCOMMENTNOTLINESTART, ctx->line, ctx->startcolumn + (bp-ctx->linestart) - 2); goto in_comment; } if (!ctx->in_multiline_comment) { ctx->multilinecomment_startline = ctx->line; } ctx->in_multiline_comment++; } bp++; in_comment: for (;;) { while (bp != bend && *bp >= 0x20 && *bp < 127) bp++; if (UNLIKELY(bp == bend)) { tokstate = TInComment; goto buffer_end; } ch = *bp; if (ch == '\n' || ch == '\r') break; if (ch >= 128) { bp = skip_utf8(ctx, bp, bend); } else { error_char(ctx, bp, CSLUL_E_INVALIDCHAR); bp++; } } goto newline_start; case CSLUL_T_UpperIdent: case CSLUL_T_LowerIdent: { int status; token_start(ctx, bp); ident_start(ctx); in_ident: /* FIXME disallow _ in UpperIdent */ bp = tokenize_ident(ctx, bp, bend, &status, ParseIdent); if (LIKELY(status)) { tok = match_keyword(ctx); ctx->tmplen = 0; if (UNLIKELY(ctx->toklen >= 2 && !memcmp(ctx->tokval, "__", 2))) { error_tok(ctx, CSLUL_E_DOUBLEUNDERSCORE); } else if (UNLIKELY(tok == CSLUL_T_UpperIdent && memchr(ctx->tokval, '_', ctx->toklen))) { error_tok(ctx, CSLUL_E_TYPEUNDERSCORE); } else if (tok == CSLUL_T_LowerIdent && LIKELY(bp != bend)) { if (*bp == ':') { tok = CSLUL_T_GotoTarget; bp++; } /* XXX how about [] and <> */ } else if (tok == CSLUL_T_KW_Since) { ctx->parser.slul.version_line = ctx->tokline; } goto have_token; } tokstate = TInIdent; goto buffer_end; } case CSLUL_T_Integer: token_start(ctx, bp); ctx->toklen = 0; if (ctx->parser.slul.version_line == ctx->tokline) { goto version_start; } ctx->parser.slul.number = 0; ctx->parser.slul.numdigits = 0; ctx->parser.slul.floatnum = 0; if (ch == '0') { ctx->parser.slul.has_digits = 0; bp++; zeroprefixed: if (UNLIKELY(bp == bend)) { tok = CSLUL_T_Integer; if (ctx->last_buffer) goto have_number_nocheck; /* zero */ tokstate = TZeroPrefixed; goto buffer_end; } ch = *bp; if (ch == 'x') { bp++; number_hex: tokstate = TNumberHex; tok = CSLUL_T_Integer; for (;; bp++) { if (UNLIKELY(bp == bend)) goto number_eob; ch = *bp; if (ch >= '0' && ch <= '9') ch -= '0'; else if (ch >= 'a' && ch <= 'f') ch = ch-'a' + 0xA; else if (ch >= 'A' && ch <= 'F') ch = ch-'A' + 0xA; else if (ch == '_') continue; else goto have_number; if (ctx->parser.slul.numdigits >= 16) goto numbertoolarge; if (ctx->parser.slul.number || ch) { ctx->parser.slul.numdigits++; } ctx->parser.slul.has_digits = 1; ctx->parser.slul.number = (ctx->parser.slul.number << 4) | ch; } } else if (ch == 'b') { bp++; number_bin: tokstate = TNumberBin; tok = CSLUL_T_Integer; for (;; bp++) { if (bp == bend) goto number_eob; ch = *bp; if (ch == '0' || ch == '1') ch -= '0'; else if (ch == '_') continue; else goto have_number; if (ctx->parser.slul.numdigits >= 64) goto numbertoolarge; if (ctx->parser.slul.number || ch) { ctx->parser.slul.numdigits++; } ctx->parser.slul.has_digits = 1; ctx->parser.slul.number = (ctx->parser.slul.number << 1) | ch; } } else if (ch == '.') { ctx->parser.slul.numdigits = 1; goto zero_dot_x; } else if ((ch >= '0' && ch <= '9') || ch == '_') { /* This could be confused with an octal number */ error_char(ctx, bp, CSLUL_E_LEADINGZERO); goto number_skip; } else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) { error_char(ctx, bp, CSLUL_E_BADNUMBERTYPE); goto number_skip; } else { /* Zero */ assert(tokstate == TDone); ctx->parser.slul.numdigits = 1; ctx->parser.slul.has_digits = 1; tok = CSLUL_T_Integer; goto have_token; } } else { zero_dot_x: ctx->parser.slul.decpointpos = 0; number_dec: tokstate = TNumberDec; tok = CSLUL_T_Integer; } /* Decimal number */ for (;; bp++) { if (UNLIKELY(bp == bend)) goto number_eob; ch = *bp; if (LIKELY(ch >= '0' && ch <= '9')) { uint64 number = ctx->parser.slul.number; ch -= '0'; /* The 2^64 limit also applies to the integer part of floating point numbers. Use exponents to write larger numbers. Rationale: Large numbers without exponent syntax are hard to read and better avoided. This limitation also simplifies the code somewhat. */ if (UNLIKELY(number >= UINT64_MAX/10)) { /* 2^64-1 = 18446744073709551615 */ if (number > UINT64_MAX/10 || ch > 5) goto numbertoolarge; } if (number || ch) { ctx->parser.slul.numdigits++; } ctx->parser.slul.has_digits = 1; ctx->parser.slul.number = number*10 + ch; if (ctx->parser.slul.floatnum) { ctx->parser.slul.decpointpos++; } } else if (ch == '_') continue; else if (ch == '.') { if (ctx->parser.slul.floatnum) goto double_decpoint; ctx->parser.slul.floatnum = 1; ctx->parser.slul.exponent = 0; ctx->parser.slul.has_digits = 0; continue; } else if (ch == 'e' || ch == 'E') { if (!ctx->parser.slul.has_digits) goto no_frac_digits; ctx->parser.slul.exponent = 0; bp++; goto number_expsign; } else { tok = ctx->parser.slul.floatnum ? CSLUL_T_Float : CSLUL_T_Integer; goto have_number; } } number_expsign: tok = CSLUL_T_Float; tokstate = TNumberExpSign; if (UNLIKELY(bp == bend)) goto number_eob; ch = *bp; if (ch == '-') { ctx->parser.slul.floatnum = -1; bp++; } else { ctx->parser.slul.floatnum = +1; if (ch == '+') bp++; } ctx->parser.slul.numdigits = 0; number_exp: tok = CSLUL_T_Float; tokstate = TNumberExp; for (;; bp++) { if (UNLIKELY(bp == bend)) goto number_eob; ch = *bp; if (ch < '0' || ch > '9') break; if (UNLIKELY(++ctx->parser.slul.numdigits > 4)) { error_char(ctx, bp, CSLUL_E_EXPONENTTOOLARGE); goto number_skip; } ch -= '0'; ctx->parser.slul.exponent = ctx->parser.slul.exponent*10 + ch; } if (ctx->parser.slul.floatnum < 0) { ctx->parser.slul.exponent = -ctx->parser.slul.exponent; } if (UNLIKELY(!ctx->parser.slul.numdigits)) { error_char(ctx, bp, CSLUL_E_NOEXPDIGITS); goto number_skip; } have_number: /* End of number OR invalid character in number */ if (UNLIKELY((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') || (ch == '.') || (ch >= '0' && ch <= '9'))) { error_char(ctx, bp, CSLUL_E_BADNUMBERCHAR); goto number_skip; } else if (UNLIKELY(!ctx->parser.slul.has_digits)) { no_frac_digits: error_char(ctx, bp, ctx->parser.slul.floatnum ? CSLUL_E_NOFRACDIGITS : CSLUL_E_NODIGITS); goto number_skip; } else if (ctx->parser.slul.floatnum) { if (!ctx->parser.slul.number) ctx->parser.slul.exponent = 0; else { ctx->parser.slul.exponent -= ctx->parser.slul.decpointpos; } } /* Fall through */ have_number_nocheck: ctx->toklen = (ctx->startcolumn + (bp - ctx->linestart - ctx->mbtrailerbytes)) - ctx->tokcolumn; tokstate = TDone; goto have_token; /* Error handling */ double_decpoint: error_char(ctx, bp, CSLUL_E_DOUBLEDECPOINT); goto number_skip; numbertoolarge: error_char(ctx, bp, CSLUL_E_NUMBERTOOLARGE); number_skip: tokstate = TNumberSkip; ctx->parser.slul.numdigits = INVALID_NUMBER; for (;; bp++) { if (UNLIKELY(bp == bend)) goto buffer_end; ch = *bp; if (!((ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F') || ch == '.' || ch == '_')) break; } tok = CSLUL_T_Integer; ctx->parser.slul.numdigits = INVALID_NUMBER; goto have_number_nocheck; number_eob: if (ctx->last_buffer) { if (ctx->parser.slul.numdigits == 0) { error_char(ctx, bp, CSLUL_E_NODIGITS); } tokstate = TDone; goto have_token; } goto buffer_end; case CSLUL_T_String: { /* Try to parse without temp buffer first */ const char *start; token_start(ctx, bp); ctx->tmplen = 0; /* TODO make this value configurable in the module header */ ctx->allowed_scripts = SCRIPT_LATIN; start = ++bp; /* TODO hash strings for de-duplication? or skip and tail merge? */ for (;;) { if (UNLIKELY(bp == bend)) goto incomplete_string; ch = *bp; if (ch == '\\') { copytotmp_safe(ctx, start, bp); break; } else if (ch == '"') { simple_string_end: if ((size_t)(bp - start) > MAX_TOKEN_LEN) { error_char(ctx, bp, CSLUL_E_STRINGTOOLONG); ctx->toklen = MAX_TOKEN_LEN; } else { ctx->toklen = bp - start; } ctx->tokval = start; if (ch == '"') bp++; goto have_token; } else if (LIKELY(ch >= 0x20 && ch < 127)) bp++; else if (ch > 127) { bp = skip_utf8(ctx, bp, bend); } else if (ch == '\n' || ch == '\r') { error_char(ctx, bp, CSLUL_E_UNTERMINATEDSTRING); goto simple_string_end; } else { error_char(ctx, bp, CSLUL_E_INVALIDCHAR); bp++; } } in_string: start = bp; for (;;) { next_string_char: if (UNLIKELY(bp == bend)) goto incomplete_string; ch = *bp; if (ch == '\\') { short maxlen; copytotmp_safe(ctx, start, bp); bp++; escape_start: if (UNLIKELY(bp == bend)) { tokstate = TEscapeStart; goto buffer_end_noeof; } ch = *(bp++); ctx->parser.slul.escape = 0; ctx->parser.slul.escapelen = 0; switch (ch) { case '0': ch = '\0'; break; case 'b': ch = '\b'; break; case 't': ch = '\t'; break; case 'n': ch = '\n'; break; case 'r': ch = '\r'; break; case '\"': ch = '\"'; break; case '\\': ch = '\\'; break; case 'A': case 'C': case 'G': case 'L': case 'O': case 'S': case ';': ctx->allowed_scripts = 0; for (;;) { unsigned add; switch (ch) { case 'A': add = SCRIPT_ALL; break; case 'C': add = SCRIPT_CYRILLIC; break; case 'G': add = SCRIPT_GREEK; break; case 'L': add = SCRIPT_LATIN; break; case 'O': add = SCRIPT_OTHER; break; case 'S': add = SCRIPT_SPECIALS; break; /* FIXME Add a way of allowing RTL text. RTL text needs to go on a separate line, BUT it also needs some additional (non-LTR) characters to indicate that there is an RTL string. - Easiest way might be to have begin_bidi/end_bidi keywords on lines before and after (and only allows string literals in between) */ case ';': start = bp; goto next_string_char; default: bp--; error_char(ctx, bp, (ch >= 'A' && ch <= 'Z' ? CSLUL_E_SCRIPTESCAPEUNKNOWN : CSLUL_E_SCRIPTESCAPEBAD)); start = bp; goto next_string_char; } if ((ctx->allowed_scripts & add) != 0) { error_char_offs(ctx, bp, -1, CSLUL_E_SCRIPTESCAPEDUPL); } else if (add < ctx->allowed_scripts) { error_char_offs(ctx, bp, -1, CSLUL_E_SCRIPTESCAPEORDER); } ctx->allowed_scripts |= add; escapescripts: tokstate = TEscapeScripts; if (bp == bend) goto buffer_end_noeof; ch = *(bp++); } break; case 'x': /* Single byte hex escape */ escapehex: tokstate = TEscapeHex; maxlen = 2; goto long_escape; case 'u': /* Unicode hex escape */ escapeunicode: tokstate = TEscapeUnicode; maxlen = 6; /* 0x10FFFF */ goto long_escape; default: error_char(ctx, bp, CSLUL_E_BADESCAPE); } /* Single char escape like \n */ start = bp; have_escaped_byte: if (LIKELY(ctx->tmplen < MAX_TOKEN_LEN)) { ctx->toktmp[ctx->tmplen] = ch; ctx->tmplen++; } else { error_char_offs(ctx, bp, -1, CSLUL_E_STRINGTOOLONG); } continue; long_escape: for (;;) { if (UNLIKELY(bp == bend)) goto buffer_end_noeof; ch = *bp; if (ch >= '0' && ch <= '9') ch -= '0'; else if (ch >= 'a' && ch <= 'f') ch = ch-'a' + 0xA; else if (ch >= 'A' && ch <= 'F') ch = ch-'A' + 0xA; else break; if (UNLIKELY(++ctx->parser.slul.escapelen > maxlen)) { error_char(ctx, bp, CSLUL_E_ESCAPETOOLONG); break; } ctx->parser.slul.escape = (ctx->parser.slul.escape << 4) | ch; bp++; } start = bp; if (UNLIKELY(ctx->parser.slul.escapelen == 0)) { error_char(ctx, bp, CSLUL_E_MISSINGESCAPE); ch = 0; goto have_escaped_byte; } if (tokstate == TEscapeHex) { /* Single byte */ ch = ctx->parser.slul.escape; goto have_escaped_byte; } else { uint32 code = ctx->parser.slul.escape; unsigned char *res; /* Add UTF-8 bytes */ if (UNLIKELY(code > 0x10FFFF || (code >= 0xD800 && code <= 0xDFFF))) { error_char_offs(ctx, bp, -1, CSLUL_E_BADUNICODEESCAPE); continue; } res = (unsigned char*)&ctx->toktmp[ctx->tmplen]; if (code <= 0x7F) { if (ctx->tmplen >= MAX_TOKEN_LEN-1) goto utf8toolarge; *(res++) = code; ctx->tmplen += 1; continue; } else if (code <= 0x7FF) { if (ctx->tmplen >= MAX_TOKEN_LEN-2) goto utf8toolarge; *(res++) = 0xC0U | (code >> 6U); *(res++) = 0x80U | (code & 0x3FU); ctx->tmplen += 2; continue; } else if (code <= 0xFFFF) { if (ctx->tmplen >= MAX_TOKEN_LEN-3) goto utf8toolarge; *(res++) = 0xE0U | (code >> 12U); *(res++) = 0x80U | ((code >> 6U) & 0x3FU); *(res++) = 0x80U | (code & 0x3FU); ctx->tmplen += 3; continue; } else { if (ctx->tmplen >= MAX_TOKEN_LEN-4) goto utf8toolarge; *(res++) = 0xF0U | (code >> 18U); *(res++) = 0x80U | ((code >> 12U) & 0x3FU); *(res++) = 0x80U | ((code >> 6U) & 0x3FU); *(res++) = 0x80U | (code & 0x3FU); ctx->tmplen += 4; continue; } utf8toolarge: error_char(ctx, bp, CSLUL_E_STRINGTOOLONG); } } else if (ch == '"') { string_end: copytotmp_safe(ctx, start, bp); if (ch == '"') bp++; ctx->tokval = &ctx->toktmp[0]; ctx->toklen = ctx->tmplen; tok = CSLUL_T_String; tokstate = TDone; goto have_token; } else if (LIKELY(ch >= 0x20 && ch < 127)) bp++; else if (ch > 127) { bp = skip_utf8(ctx, bp, bend); } else if (ch == '\n' || ch == '\r') { error_char(ctx, bp, CSLUL_E_UNTERMINATEDSTRING); goto string_end; } else { error_char(ctx, bp, CSLUL_E_INVALIDCHAR); bp++; } } incomplete_string: copytotmp_safe(ctx, start, bp); tokstate = TInString; goto buffer_end_noeof; } case CSLUL_T_LParen: case CSLUL_T_RParen: case CSLUL_T_LSquare: case CSLUL_T_RSquare: case CSLUL_T_LCurly: case CSLUL_T_RCurly: case CSLUL_T_Comma: case CSLUL_T_Dot: case CSLUL_T_Question: case CSLUL_T_Colon: case CSLUL_T_Semicolon: token_start(ctx, bp); bp++; ctx->toklen = 1; goto have_token; case CSLUL_T_Plus: case CSLUL_T_Minus: case CSLUL_T_Asterisk: case CSLUL_T_Slash: case CSLUL_T_Less: case CSLUL_T_Assign: case CSLUL_T_Greater: case CSLUL_T_Exclamation: { enum CSlulToken two_char_first; token_start(ctx, bp); /* We need to check if it's a two character operator. These can span over two buffers! */ two_char_first = tok; bp++; continue_in_operator: if (UNLIKELY(bp == bend)) { if (ctx->last_buffer) { ctx->toklen = 1; tokstate = TDone; tok = two_char_first; error_char(ctx, bp, CSLUL_E_NOEOFNEWLINE); goto have_token; } ctx->parser.slul.two_char_first = two_char_first; tokstate = TInOperator; goto buffer_end; } ch = *bp; if (ch == '=') { /* += -= etc */ bp++; ctx->toklen = 2; tok = two_char_first + (CSLUL_T_PlusAssign - CSLUL_T_Plus); goto have_token; } else if (ch == '>' && two_char_first == CSLUL_T_Minus) { bp++; ctx->toklen = 2; tok = CSLUL_T_RArrow; goto have_token; } else if (UNLIKELY(ch > 127)) { ctx->toklen = 1; bp = unexpected_utf8(ctx, bp, bend); goto nextchar; } else { /* Single character token. Re-use the second character */ ctx->toklen = 1; tok = two_char_first; goto have_token; } break; in_operator: two_char_first = ctx->parser.slul.two_char_first; goto continue_in_operator; } CASE_ALL_KEYWORDS CASE_MULTICHAR_OPS case CSLUL_T_Float: case CSLUL_T_Version: case CSLUL_T_EOF: default: /* This should never happen */ error_char(ctx, bp, INTERR_BADCHARTYPE); bp++; goto nextchar; } /* Version tokens are special. A "since" keyword is always followed by a version. Without this special handling, it would have been parsed as an integer/a float. */ version_start: { int status; ident_start(ctx); ctx->parser.slul.version_line = 0; in_version: bp = tokenize_ident(ctx, bp, bend, &status, ParseVersion); if (LIKELY(status)) { tok = CSLUL_T_Version; ctx->tmplen = 0; goto have_token; } tokstate = TInVersion; goto buffer_end; } /* These are the possible function exit states */ buffer_end_noeof: if (ctx->last_buffer) error_char(ctx, bp, CSLUL_E_UNEXPECTEDEOF); buffer_end: if (ctx->last_buffer) { token_eof(ctx, bp); tok = CSLUL_T_EOF; } else { tok = CSLUL_T_NEEDDATA; } have_token: ctx->buffer = bp; ctx->startcolumn += (bp - ctx->linestart - ctx->mbtrailerbytes); ctx->tokstate.slul = tokstate; return tok; } void cslul_ll_current_value(struct CSlul *ctx, const char **name, size_t *length) { *name = ctx->tokval; *length = ctx->toklen; } #undef X /* undefine for unity builds */