/*
 * Functions for checking API soundness / compatibility of modules
 *
 * Copyright © 2026 Samuel Lidén Borell <samuel@kodafritt.se>
 *
 * SPDX-License-Identifier: EUPL-1.2+ OR LGPL-2.1-or-later
 */
#include <assert.h>
#include <string.h>
#include "compiler.h"
#include "semchk.h"

void compare_vardef(const struct Var *expv, const struct Var *implv)
{
    size_t namelen;
    assert(implv != NULL);
    if (!expv) {
        error_ident("Paramter in implementation doesn't exist in interface",
                   &implv->ident);
    }

    namelen = implv->ident.node.length;
    if (expv->ident.node.length != namelen ||
            memcmp(expv->ident.node.name, implv->ident.node.name, namelen)) {
        error_ident("Parameter does not match name in interface",
                    &expv->ident);
    }

    check_type_compat(expv->typeref, implv->typeref, TC_EXACT);
}

static int identcmp(const struct Ident *a, const struct Ident *b)
{
    size_t alen = a->node.length, blen = b->node.length;
    size_t commonlen = alen < blen ? alen : blen;
    if (commonlen) {
        int diff = memcmp(a->node.name, b->node.name, commonlen);
        if (diff) return diff;
    }
    assert(alen != blen); /* duplicate identifiers are not allowed */
    return alen ? 1 : -1;
}

/*
   Versioned symbols must come in alphabetical order to have a normalised
   (and deterministic across implementations) order of symbols for computation
   of the API hash.

   add_versioned_type and add_versioned_func are responsible for keeping
   a sorted list types/functions for each API version.

   XXX The functions use insertion sort, and will be slow if there's a
   lot of symbols in a given API version.
*/

void add_versioned_type(struct VersionDecl *ver)
{
    struct Type **inspoint = &ver->types;
    const struct Ident *ident = &current_type->ident;

    for (;;) {
        struct Type *t = *inspoint;

        if (!t || identcmp(&t->ident, ident) > 0) {
            current_type->sincever_next = t;
            *inspoint = current_type;
            return;
        }
        inspoint = &t->sincever_next;
    }
}

static int funccmp(const struct Func *a, const struct Func *b)
{
    if (a->class_ != b->class_) {
        if (!a->class_) return -1;
        else if (!b->class_) return 1;
        return identcmp(&a->class_->ident, &b->class_->ident);
    }
    return identcmp(&a->ident, &b->ident);
}

void add_versioned_func(struct VersionDecl *ver)
{
    struct Func **inspoint = &ver->funcs;

    assert(current_func != NULL);
    for (;;) {
        struct Func *f = *inspoint;

        if (!f || funccmp(f, current_func) > 0) {
            current_func->sincever_next = f;
            *inspoint = current_func;
            return;
        }
        inspoint = &f->sincever_next;
    }
}

static int varcmp(const struct VarInVersion *a,
                  const struct Var *b,
                  const struct Type *b_class)
{
    if (a->class_ != b_class) {
        if (!a->class_) return -1;
        else if (!b_class) return 1;
        return identcmp(&a->class_->ident, &b_class->ident);
    }
    return identcmp(&a->var->ident, &b->ident);
}

void add_versioned_instancevar(struct VersionDecl *ver,
                               const struct Var *instvar)
{
    struct VarInVersion **inspoint = &ver->instvars;

    assert(instvar != NULL);
    assert(current_type != NULL);
    for (;;) {
        struct VarInVersion *vv = *inspoint;

        if (!vv || varcmp(vv, instvar, current_type) > 0) {
            struct VarInVersion *newvar = malloc(sizeof(struct VarInVersion));
            NO_NULL(newvar);
            newvar->var = instvar;
            newvar->sincever_next = vv;
            *inspoint = newvar;
            return;
        }
        inspoint = &vv->sincever_next;
    }
}


/* TODO decide on a hash function to use */
/* TODO use the "key" parameter or not?
   is it secure to use with a publicly-known key?
   also, it is not defined by RFC-7693, only reserved :( */
struct SomeHashState { char dummy; };
static void somehash_init(struct SomeHashState *state, size_t outlen,
                          const unsigned char *key, size_t keylen)
{
    (void)state;
    (void)outlen;
    (void)key;
    (void)keylen;
}
static void somehash_update(struct SomeHashState *state,
                            const unsigned char *data,
                            size_t datalen)
{
    (void)state;
    (void)data;
    (void)datalen;
}
static void somehash_final(struct SomeHashState *state,
                           unsigned char *out)
{
    (void)state;
    (void)out;
}

enum RecordKind {
    RECORDKIND_END = 0,
    RECORDKIND_TYPE = 1,
    RECORDKIND_FUNC,
    RECORDKIND_VAR,
    RECORDKIND_INSTANCEVAR,
    RECORDKIND_PRECEEDING_VERSION
};

static void feed_byte(struct SomeHashState *state, unsigned char b)
{
    somehash_update(state, &b, 1);
}

static void feed_string(struct SomeHashState *state, const char *s, size_t len)
{
    if (len) {
        somehash_update(state, (const unsigned char *)s, len);
    }
    feed_byte(state, 0);
}

static void feed_ident(struct SomeHashState *state, const struct Ident *ident)
{
    feed_string(state, ident->node.name, ident->node.length);
}

static void feed_classref(struct SomeHashState *state,
                          const struct Type *class_)
{
    /* TODO optimise: emit some special value (e.g. 1) when the class name
            is repeated.
            - perhaps track the last outputted (defined or referenced)
              class name? */
    if (class_) {
        assert(class_->ident.node.length != 0);
        feed_ident(state, &class_->ident);
    } else {
        feed_byte(state, 0);
    }
}

static void feed_typeref(struct SomeHashState *state, const struct TypeRef *tr)
{
    unsigned char quals = 0x00;

    if ((tr->quals & Q_VAR) != 0)      quals |= 0x01;
    /* TODO "io" qualifier? */
    if ((tr->quals & Q_ALIASED) != 0)  quals |= 0x04;
    if ((tr->quals & Q_VOLATILE) != 0) quals |= 0x08;
    /* TODO should these be qualifiers or separate types? */
    if ((tr->quals & Q_SIGNED) != 0)   quals |= 0x10;
    if ((tr->quals & Q_UNSIGNED) != 0) quals |= 0x20;
    if ((tr->quals & Q_WRAPPING) != 0) quals |= 0x40;
    feed_byte(state, quals);

    switch (tr->kind) {
    case TR_CLASS: {
        const struct Type *t = tr->u.class_;
        assert(t != NULL);
        /* TODO external identifiers:
           - kind=2 (instead of 1)
           - API hash etc. */
        feed_byte(state, 1);
        feed_ident(state, &t->ident);
        break; }
    case TR_BOOL:
        feed_byte(state, 3);
        break;
    case TR_INT:
        /* TODO integer range and/or different integer types */
        feed_byte(state, 4);
        break;
    case TR_UNKNOWN:
    case TR_VOID:
    default:
        assert(0);
        break;
    }
}

static void feed_var(struct SomeHashState *state, const struct Var *v)
{
    unsigned char varkind = 0x00;

    assert(v->ident.node.length != 0);
    feed_ident(state, &v->ident);

    if (v->is_modifiable) varkind |= 0x01;
    /* TODO optional etc. */
    feed_byte(state, varkind);

    feed_typeref(state, v->typeref);
}

static void feed_varlist(struct SomeHashState *state, const struct Var *vars,
                         size_t count)
{
    const struct Var *v = vars;
    size_t remaining = count;

    while (remaining--) {
        assert(v != NULL);
        feed_byte(state, RECORDKIND_VAR);
        feed_var(state, v);
        v = v->next;
    }
    feed_byte(state, RECORDKIND_END);
}

#define APIHASH_SIZE 32

static void compute_api_hash(struct VersionDecl *ver)
{
    struct SomeHashState state;
    struct Type *t;
    struct Func *f;
    struct VarInVersion *v;

    /*somehash_init(&state, 32,
                  ver->preceeding ? ver->preceeding->apihash : NULL, 32);*/
    somehash_init(&state, 32, NULL, 0);
    feed_string(&state, "SLUL API definition to be hashed", 32);
    feed_byte(&state, 0); /* version */

    feed_string(&state, ver->node.name, ver->node.length);

    for (t = ver->types; t; t = t->sincever_next) {
        feed_byte(&state, RECORDKIND_TYPE);
        feed_ident(&state, &t->ident);
        /* TODO type parameters */

        feed_byte(&state, 0); /* TODO flags such as "closed" */
    }
    feed_byte(&state, RECORDKIND_END);

    for (f = ver->funcs; f; f = f->sincever_next) {
        unsigned char funckind;

        feed_byte(&state, RECORDKIND_FUNC);
        feed_classref(&state, f->class_);
        feed_ident(&state, &f->ident);

        assert(!f->is_entry);
        assert(!f->is_service_ctor);
        funckind = 0x00;
        if (f->is_modifying)    funckind |= 0x01;
        if (f->is_constructor)  funckind |= 0x02;
        if (f->is_noreturn)     funckind |= 0x04;
        feed_byte(&state, funckind);

        feed_varlist(&state, f->params, f->num_params);
        feed_varlist(&state, f->returns, f->num_returns);
        /* XXX Perhaps add extensible functions? I.e. where more params can be
           added in future versions. Can it be implemented efficiently across
           all platforms? Is it a good idea? */
    }
    feed_byte(&state, RECORDKIND_END);

    for (v = ver->instvars; v; v = v->sincever_next) {
        feed_byte(&state, RECORDKIND_INSTANCEVAR);
        feed_classref(&state, v->class_);
        feed_var(&state, v->var);
    }
    feed_byte(&state, RECORDKIND_END);

    /* TODO enum values */

    /* The API hash depends on the previous hashes, so it is added last to
       allow for parallel implementations. */
    feed_byte(&state, RECORDKIND_PRECEEDING_VERSION);
    feed_byte(&state, ver->preceeding != NULL ? 1 : 0);
    if (ver->preceeding) {
        somehash_update(&state, ver->preceeding->apihash, APIHASH_SIZE);
    }

    somehash_final(&state, ver->apihash);
}

void compute_api_hashes(void)
{
    struct VersionDecl *ver = mod_declared_versions_list;

    for (; ver; ver = ver->next) {
        compute_api_hash(ver);
    }
}

/*

 TODO api hash computation.

  pre-requisites:
  1. process exported symbols in this order:
    a. version order.
    b. then kind of symbol (type vs top-level function vs constant/variable)
    c. then alphabetical order.
  2. blake3, in a portable way (ANSI C).
    - portable SlulInt instead of uint32_t, with masks to strip high bits
      if > 32 bits (test this with 64 bits!)
    - portable unsigned char instead of uint_8, with masks to strip high bits
      if > 8 bits (test this with 16 bits or more).
  2 (alternative option:) use blake2s, since the input is probably small (make
     a guesstimate) and the complexity / I-cache usage / etc. might not be
     worth the possible speedup of blake3.
        - and it has a (finalised) RFC:  RFC-7693

  regarding blake2s:

    - could use the the following as the key:
        previous API-hash (or all zeros)
            - NOTE: the key is reserved but NOT defined in RFC-7693!
    - there's an IETF RFC, but it's not a NIST standard.
      also, the RFC is "informational".
      could this be a problem?
    - sometimes blake3 and/or blake2b is supported but not blake2s
      (for example, in the CycloneDX spec). are people moving away from
      blake2s?
        - but on the other hand, CycloneDX has only "blake3" which i think is
          the 512-bit version :(
          but it has several output lengths for blake2b

  regarding SHA-256:

  - it is perhaps considered "more standard" than blake2s, even though the
    latter has an RFC.
  - it is apparently often significantly faster than blake3/2b/2s due to being
    supported in hardware in many processors.
    (but on modern x86_64 there seems to be little difference,
    on 64 .. 4096 byte datasets at least)
  - however, is it as secure? compare security levels of SHA-256 vs blake2s-256
  - a more future-proof hash is preferred since it will be very difficult to
    change.

  regarding SHA-512/256 (truncated SHA-512)

  - this avoids length-extension attacks
  - 64-bit word size
  - standardized and should be future-proof security wise.
  - is is NOT simply truncated. It uses a different initialisation!
  - slow. how many % of the compilation time is used on hashing interfaces?
  - how complex is the code?

  regarding SHA-3 / SHAKE-256:

   - SHAKE-256 is somehow based on Keccak (which SHA-3 is based on as well)
        - looks like SHAKE-256 is the fixed-size output version,
          while SHA-3 is the variable-sized output version.
   - SHAKE-256 appears to be slower than blake2 (not sure if b or s or both)
   - there's a KangarooTwelwe function that is faster, for smaller inputs
        - I assume that this means that SHA-3 is slow on small inputs?
          (and API-hash-preimages will often be small)

  regarding the hash pre-image / "to-be-hashed-data":

   - all items in lists must start with non-zero byte!
     (since zero is used as a terminator byte)
   - (non-closed) classes CAN be repeated to extend them with more fields.
     same for enums.

 */