/* * String escape handling. * * Copyright © 2026 Samuel Lidén Borell * * SPDX-License-Identifier: EUPL-1.2+ OR LGPL-2.1-or-later */ #include #include "token.h" static unsigned hexchar(char c) { if (c >= '0' && c <= '9') { return (unsigned)(c - '0'); } else if (c >= 'a' && c <= 'f') { return (unsigned)(c - 'a') + 10; } else if (c >= 'A' && c <= 'F') { return (unsigned)(c - 'A') + 10; } else { error("Invalid hex character"); return (unsigned)-1; /* silence warning from tcc */ } } void unescape_string(const struct LexemeInfo *li, const char **str_out, size_t *len_out) { const char *si; char *so; size_t inlen = li->len; size_t outlen; if (!inlen) { *str_out = NULL; *len_out = 0; return; } si = li->string; so = malloc(inlen); NO_NULL(so); *str_out = so; outlen = 0; while (inlen--) { char c = *(si++); if (c != '\\') { one_char_out: *(so++) = c; outlen++; } else if (inlen-- != 0) { c = *(si++); switch (c) { case '"': case '\\': goto one_char_out; case 'n': c = '\n'; goto one_char_out; /* XXX how many escapes should there be? some are quite uncommon. */ case 'r': c = '\r'; goto one_char_out; case 't': c = '\t'; goto one_char_out; case '0': c = '\0'; goto one_char_out; case 'x': { unsigned high, low; if (inlen < 2) { error("Unexpected end of string in escape sequence"); } high = hexchar(si[0]); low = hexchar(si[1]); si += 2; inlen -= 2; c = (char)((high << 4U) | low); goto one_char_out; } /* TODO unicode escapes. They should emit UTF+8 bytes. Which syntax to use? - variable length \u with up to 6 hexdigits? - fixed-length \u with 4 and \U with 6 hexdigits? - semicolon-terminated \u123; Or skip unicode escapes, and rely on UTF-8 byte escapes? */ default: error("Invalid escape sequence"); } } else { error("Unexpected end of string in escape sequence"); } } *len_out = outlen; }