scc/lib/utils/ds/hashtable.c

#include "hashtable.h"

#define LOAD_FACTOR 0.75f
// 素数表用于桶扩容（最后一个元素为最大允许容量）
static const int PRIME_CAPACITIES[] = {
    11, 23, 47, 97, 193, 389, 769, 1543, 3079,
    6151, 12289, 24593, 49157, 98317, 196613, 393241,
    786433, 1572869, 3145739, 6291469, 12582917, 25165843
};

// 私有函数声明
static u32_t calc_hash(const char* str, int len);
static void rehash(hash_table_t* ht);

hash_table_t* new_hash_table(int init_size, int max_cap) {
    hash_table_t* ht = salloc_alloc(sizeof(hash_table_t));
    hash_table_init(ht, init_size, max_cap);
    return ht;
}

static inline get_real_size(int size) {
    // 查找第一个不小于size的素数容量
    int cap_idx = 0;
    if (size < 0) {
        return PRIME_CAPACITIES[SMCC_ARRLEN(PRIME_CAPACITIES)-1];
    }
    while (PRIME_CAPACITIES[cap_idx] < size && cap_idx < SMCC_ARRLEN(PRIME_CAPACITIES)-1)  {
        cap_idx++;
    }
    return PRIME_CAPACITIES[cap_idx];
}

void hash_table_init(hash_table_t* ht, int init_size, int max_cap) {
    // 限制最大容量索引
    ht->max_cap = get_real_size(max_cap);
    // 应用实际容量
    ht->cap = get_real_size(init_size);
    ht->size = 0;
    ht->buckets = NULL;
    ht->buckets = salloc_realloc(ht->buckets, sizeof(hash_node_t*) * ht->cap);
}

void hash_table_insert(hash_table_t* ht, const char* str, int len) {
    // 自动扩容检查
    if (ht->size >= ht->cap * LOAD_FACTOR && ht->cap < ht->max_cap) {
        rehash(ht);
    }

    if (ht->size >= ht->cap) {
        LOG_TRACE("Hash table size exceeds maximum capacity. Consider increasing max_capacity.");
    }

    // 计算哈希值
    u32_t hash = calc_hash(str, len);
    int bucket_idx = hash % ht->cap;

    // 检查重复
    hash_node_t* node = ht->buckets[bucket_idx];
    while (node) {
        if (node->hash == hash &&
            node->len == len &&
            memcmp(node->str, str, len) == 0) {
            return; // 已存在
        }
        node = node->next;
    }

    // 创建新节点
    hash_node_t* new_node = salloc_alloc(sizeof(hash_node_t));
    new_node->str = str;
    new_node->len = len;
    new_node->hash = hash;
    new_node->next = ht->buckets[bucket_idx];
    ht->buckets[bucket_idx] = new_node;
    ht->size++;
}

hash_node_t* hash_table_find(hash_table_t* ht, const char* str, int len) {
    u32_t hash = calc_hash(str, len);
    int bucket_idx = hash % ht->cap;

    hash_node_t* node = ht->buckets[bucket_idx];
    while (node) {
        if (node->hash == hash &&
            node->len == len &&
            memcmp(node->str, str, len) == 0) {
            return node;
        }
        node = node->next;
    }
    return NULL;
}

static void rehash(hash_table_t* ht) {
    int old_cap = ht->cap;
    hash_node_t** old_buckets = ht->buckets;

    // 查找下一个素数容量
    int new_cap_idx = 0;
    while (PRIME_CAPACITIES[new_cap_idx] <= old_cap &&
           new_cap_idx < ht->max_cap) {
        new_cap_idx++;
    }
    ht->cap = PRIME_CAPACITIES[new_cap_idx];

    // 分配新桶数组
    ht->buckets = salloc_alloc(sizeof(hash_node_t*) * ht->cap);
    memset(ht->buckets, 0, sizeof(hash_node_t*) * ht->cap);

    // 重新哈希所有节点
    for (int i = 0; i < old_cap; i++) {
        hash_node_t* node = old_buckets[i];
        while (node) {
            hash_node_t* next = node->next;
            int new_bucket = node->hash % ht->cap;
            node->next = ht->buckets[new_bucket];
            ht->buckets[new_bucket] = node;
            node = next;
        }
    }

    salloc_free(old_buckets);
}

static u32_t calc_hash(const char* str, int len) {
    // 使用与HASH_FNV_1A宏一致的算法
    rt_strhash(str);
}

void hash_table_destroy(hash_table_t* ht) {
    for (int i = 0; i < ht->cap; i++) {
        hash_node_t* node = ht->buckets[i];
        while (node) {
            hash_node_t* next = node->next;
            salloc_free(node);
            node = next;
        }
    }
    salloc_free(ht->buckets);
    ht->buckets = NULL;
    ht->size = ht->cap = 0;
}