aboutsummaryrefslogtreecommitdiffhomepage
path: root/notes/map_data_structure3.txt
blob: a63c1695f64406de3feddafb4f14a0707cd244de (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123

Map/dictionary data structure
=============================

Requirements
------------

* Fast in the common case
* Safe against Denial-of-Service
* Must support arena allocation

Key interface
-------------

In order to support the cases above, it is necessary for objects to have
a hash (for fast lookup) and a comparison function (for Denial-of-Service
protection in case of hash collisions).

    type Key<T> = struct {
        slot T key
    }

    type ComparisonResult = closed enum {
        lower  = -1
        equal  =  0
        higher =  1
    }

    func Key<T>.hash() -> uint     # or should it be uint64 ?
    func Key<T>.compare_to(Key<T> other) -> ComparisonResult

Other languages like Zig use a context parameter to support different
types of keys without RTTI (run-time type information).

But it would be really nice to avoid this for common key types:

* String
* Integer
* Reference


Data structure
--------------

    meta-data -> buckets -> one AVL tree per bucket

When the buckets array needs to grow, it will have to be copied to a new
space. But the old space can be re-used for tree nodes!

Public structure:
TODO: add support for inaccessible/private struct fields to SLUL.

    type Map<K,V> = struct {
        private var usize
        private var ref
    }

    # How to to iteration?
    #   - have an array in the iterator for the stack?
    #   - have a next-pointer in each node?
    #
    #type MapIterator<K,V> = struct {
    #    private ref
    #    private ref
    #    private usize
    #}


Private structures (probably can't be implemented in SLUL):
    
    type Map<K,V> = struct {
        var usize num_elems
        "union {"
            # if num_elems == 1
            var ref MapNode<K,V> single_node
            # if num_elems >= 2
            var ref var MapNode<K,V>[round_up_to_pow2(num_elems)] buckets
        "}"
    }

    type MapNode<K,V> = struct {
        var AvlRankDelta avl_rankdelta
        var uint8 tag
        uint hash
        slot K key
        var slot V value
    }

    type AvlRankDelta = enum {
        minus = -1
        zero  =  0
        plus  =  1
    }

Private iterator structure

    type MapIterator<K,V> = struct {
        # TODO
    }


Map client interface
--------------------

TODO: are lifetime specifications needed?
TODO: nullable type parameters?
TODO: decide on / implement var(this) or var_this syntax. only allow var(this), i.e. not other variants? and only in method returns and in parameters?
TODO: aliasing?

    func Map<K,V>.contains(slot K key) -> boolean
    func Map<K,V>.find(slot K key) -> ?ref var(this) MapNode<K,V>
    func Map<K,V>.must_find(arena, slot K key) -> slot V   # throws if absent
    func Map<K,V>.iterator(arena) -> MapIterator<K,V>
        lifetime return <= this

    func var Map<K,V>.put(slot K key, slot V value) -> ?ref var(this) MapNode<K,V>
    func var Map<K,V>.put_if_absent(slot K key, slot V value) -> boolean

    # FIXME how can MapNode work with serialized maps?
    func MapNode<K,V>.get_key() -> var(this) slot K
    func MapNode<K,V>.get_value() -> var(this) slot V
    func var MapNode<K,V>.set_value(slot V) -> slot V
    func MapNode<K,V>.get_tag() -> uint8
    func var MapNode<K,V>.set_tag(uint8 tag)