aboutsummaryrefslogtreecommitdiff
path: root/bootstrap/util.c
blob: 3148b26032e4ab081720e684a4cd1cb97265f74a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122

/*
 * Various utility functions for the bootstrap compiler.
 *
 * Copyright © 2021-2025 Samuel Lidén Borell <samuel@kodafritt.se>
 *
 * SPDX-License-Identifier: EUPL-1.2+
 */
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "compiler.h"

bool read_source_line(FILE *f, char line[SOURCELINE_MAX], size_t *len_out,
                      enum ReadSourceMode mode)
{
    bool in_comment = false;
    char *end = line;
    for (;;) {
        int ch = fgetc(f);
        if (ch == '\n') current_line++;
        if (ch == EOF) {
            break;
        } else if (in_comment) {
            if (ch == '\n' || ch == '\r') {
                in_comment = false;
                if (end != line) break;
            }
        } else if (ch == '\n' || ch == '\r') {
            if (end != line) break;
        } else if (ch == '#' && mode == STRIP_COMMENTS) {
            in_comment = true;
        } else if (end-line > SOURCELINE_MAX) {
            current_line++;
            error("Line too long for bootstrap compiler.");
        } else if (ch < ' ') {
            error("Invalid control character");
        } else {
            /* TODO should validate UTF-8 characters! */
            *(end++) = (char)ch;
        }
    }
    if (end != line) {
        while (end > line && (end[-1] == ' ' || end[-1] == '\t')) {
            end--;
        }
        *end = '\0';
        if (len_out) {
            assert(end >= line);
            assert(end-line <= SOURCELINE_MAX);
            *len_out = (size_t)(end-line);
        }
        return true;
    } else {
        return false;
    }
}

void memreplace(char *s, char from, char to, size_t len)
{
    if (from == to) return;
    for (; len--; s++) {
        if (*s == from) {
            *s = to;
        }
    }
}

char *memzdup(const char *s, size_t len)
{
    char *dup;
    NO_NULL(s);
    assert(len > 0);
    dup = malloc(len+1);
    NO_NULL(dup);
    memcpy(dup, s, len);
    dup[len] = '\0';
    return dup;
}

void check_filename(const char *s)
{
    if (*s == '/') error("Absolute paths are not allowed");
    if (*s == '.') error("Hidden files are not allowed");
    for (; *s; s++) {
        char c = *s;
        if (c == '/') {
            if (s[1] == '/') error("Double slash is not allowed");
            if (s[1] == '.') error("Hidden files are not allowed");
        } else if ((c >= 'a' && c <= 'z') ||
                   (c >= 'A' && c <= 'Z') ||
                   (c >= '0' && c <= '9') ||
                   c == '_' || c == '.') {
            /* Probably safe (ignoring check for Windows/DOS
               con/aux/prn... in the bootstrap compiler) */
        } else if (*s == '\\') {
            error("Use / not \\ as a directory separator");
        } else {
            error("Disallowed character in filename");
        }
    }
}

const char *path_basename(const char *path)
{
    const char *base = strrchr(path, '/');
    return base && base[1] ? base+1 : path;
}

/* Fast but very stupid hash function. It guarantees unique hashes for ASCII
   strings up to 4 chars (assuming unsigned int is 32 bits), but note that it
   is NOT cryptographically secure. */
#define HASH(hash, ch) (((hash) << 7) + ((hash) >> 22) + (unsigned char)(ch))

HashCode hash_str(const char *s, size_t len)
{
    HashCode h = 0;
    while (len--) {
        h = HASH(h, *(s++));
    }
    return h;
}