scc/lib/utils/ds/hashtable.c

#include "hashtable.h"

#define INIT_HASH_TABLE_SIZE (32)

void hashtable_init(hash_table_t* ht) {
    vector_init(ht->entries);
    ht->count = 0;
    ht->tombstone_count = 0;
    Assert(ht->key_cmp != NULL && ht->hash_func != NULL);
}

static int next_power_of_two(int n) {
    n--;
    n |= n >> 1;
    n |= n >> 2;
    n |= n >> 4;
    n |= n >> 8;
    n |= n >> 16;
    return n + 1;
}

static hash_entry_t* find_entry(hash_table_t* ht, const void* key, u32_t hash) {
    if (ht->entries.cap == 0) return NULL;

    u32_t index = hash & (ht->entries.cap - 1); // 容量是2的幂
    u32_t probe = 0;

    hash_entry_t* tombstone = NULL;

    while (1) {
        hash_entry_t* entry = &vector_at(ht->entries, index);
        if (entry->state == ENTRY_EMPTY) {
            return tombstone ? tombstone : entry;
        }

        if (entry->state == ENTRY_TOMBSTONE) {
            if (!tombstone) tombstone = entry;
        } else if (entry->hash == hash && ht->key_cmp(entry->key, key) == 0) {
            return entry;
        }

        // Liner finding
        index = (index + 1) & (ht->entries.cap - 1);
        probe++;
        if (probe >= ht->entries.cap) break;
    }
    LOG_ERROR("hashset_find: hash table is full");
    return NULL;
}

static void adjust_capacity(hash_table_t* ht, int new_cap) {
    new_cap = next_power_of_two(new_cap);
    Assert(new_cap >= ht->entries.cap);

    vector_header(old_entries, hash_entry_t);
    old_entries.data = ht->entries.data;
    old_entries.cap = ht->entries.cap;

    // Not used size but for gdb python extention debug
    ht->entries.size = new_cap;
    ht->entries.cap = new_cap;
    ht->entries.data = salloc_realloc(NULL, new_cap * sizeof(hash_entry_t));
    rt_memset(ht->entries.data, 0, new_cap * sizeof(hash_entry_t));

    // rehash the all of the old data
    for (rt_size_t i = 0; i < old_entries.cap; i++) {
        hash_entry_t* entry = &vector_at(old_entries, i);
        if (entry->state == ENTRY_ACTIVE) {
            hash_entry_t* dest = find_entry(ht, entry->key, entry->hash);
            *dest = *entry;
        }
    }

    vector_free(old_entries);
    ht->tombstone_count = 0;
}

void* hashtable_set(hash_table_t* ht, const void* key, void* value) {
    if (ht->count + ht->tombstone_count >= ht->entries.cap * 0.75) {
        int new_cap = ht->entries.cap < INIT_HASH_TABLE_SIZE ? INIT_HASH_TABLE_SIZE : ht->entries.cap * 2;
        adjust_capacity(ht, new_cap);
    }

    u32_t hash = ht->hash_func(key);
    hash_entry_t* entry = find_entry(ht, key, hash);

    void* old_value = NULL;
    if (entry->state == ENTRY_ACTIVE) {
        old_value = entry->value;
    } else {
        if (entry->state == ENTRY_TOMBSTONE) ht->tombstone_count--;
        ht->count++;
    }

    entry->key = key;
    entry->value = value;
    entry->hash = hash;
    entry->state = ENTRY_ACTIVE;
    return old_value;
}

void* hashtable_get(hash_table_t* ht, const void* key) {
    if (ht->entries.cap == 0) return NULL;

    u32_t hash = ht->hash_func(key);
    hash_entry_t* entry = find_entry(ht, key, hash);
    return (entry && entry->state == ENTRY_ACTIVE) ? entry->value : NULL;
}

void* hashtable_del(hash_table_t* ht, const void* key) {
    if (ht->entries.cap == 0) return NULL;

    u32_t hash = ht->hash_func(key);
    hash_entry_t* entry = find_entry(ht, key, hash);

    if (entry == NULL || entry->state != ENTRY_ACTIVE) return NULL;

    void* value = entry->value;
    entry->state = ENTRY_TOMBSTONE;
    ht->count--;
    ht->tombstone_count++;
    return value;
}

void hashtable_destory(hash_table_t* ht) {
    vector_free(ht->entries);
    ht->count = 0;
    ht->tombstone_count = 0;
}