blob: 6f38f5b21d3960f559cf8895c25a5871263b4c71 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
|
/*
* String escape handling.
*
* Copyright © 2026 Samuel Lidén Borell <samuel@kodafritt.se>
*
* SPDX-License-Identifier: EUPL-1.2+ OR LGPL-2.1-or-later
*/
#include <stdlib.h>
#include "token.h"
static unsigned hexchar(char c)
{
if (c >= '0' && c <= '9') {
return (unsigned)(c - '0');
} else if (c >= 'a' && c <= 'f') {
return (unsigned)(c - 'a') + 10;
} else if (c >= 'A' && c <= 'F') {
return (unsigned)(c - 'A') + 10;
} else {
error("Invalid hex character");
return (unsigned)-1; /* silence warning from tcc */
}
}
void unescape_string(const struct LexemeInfo *li,
const char **str_out, size_t *len_out)
{
const char *si;
char *so;
size_t inlen = li->len;
size_t outlen;
if (!inlen) {
*str_out = NULL;
*len_out = 0;
return;
}
si = li->string;
so = malloc(inlen);
NO_NULL(so);
*str_out = so;
outlen = 0;
while (inlen--) {
char c = *(si++);
if (c != '\\') {
one_char_out:
*(so++) = c;
outlen++;
} else if (inlen-- != 0) {
c = *(si++);
switch (c) {
case '"':
case '\\':
goto one_char_out;
case 'n':
c = '\n';
goto one_char_out;
/* XXX how many escapes should there be?
some are quite uncommon. */
case 'r':
c = '\r';
goto one_char_out;
case 't':
c = '\t';
goto one_char_out;
case '0':
c = '\0';
goto one_char_out;
case 'x': {
unsigned high, low;
if (inlen < 2) {
error("Unexpected end of string in escape sequence");
}
high = hexchar(si[0]);
low = hexchar(si[1]);
si += 2;
inlen -= 2;
c = (char)((high << 4U) | low);
goto one_char_out; }
/* TODO unicode escapes.
They should emit UTF+8 bytes.
Which syntax to use?
- variable length \u with up to 6 hexdigits?
- fixed-length \u with 4 and \U with 6 hexdigits?
- semicolon-terminated \u123;
Or skip unicode escapes, and rely on UTF-8 byte escapes?
*/
default:
error("Invalid escape sequence");
}
} else {
error("Unexpected end of string in escape sequence");
}
}
*len_out = outlen;
}
|