feat(frontend): 重构词法分析器

- 添加 .gitignore 文件，忽略编译器生成的二进制文件 - 重构 lexer.c 文件，改进了关键字处理和字符串处理 - 更新前端的前端、解析器和 AST 相关文件，以适应新的词法分析器 - 优化了 token 相关的定义和函数，引入了新的 token 类型
2025-03-23 12:13:16 +08:00
parent 05c637e594
commit 2b4857001c
33 changed files with 532 additions and 624 deletions
--- a/lib/utils/ds/hashtable.c
+++ b/lib/utils/ds/hashtable.c
@ -1,142 +1,129 @@
 #include "hashtable.h"

-#define LOAD_FACTOR 0.75f
-// 素数表用于桶扩容（最后一个元素为最大允许容量）
-static const int PRIME_CAPACITIES[] = { 
-    11, 23, 47, 97, 193, 389, 769, 1543, 3079, 
-    6151, 12289, 24593, 49157, 98317, 196613, 393241,
-    786433, 1572869, 3145739, 6291469, 12582917, 25165843
-};
+#define INIT_HASH_TABLE_SIZE (32)

-// 私有函数声明
-static u32_t calc_hash(const char* str, int len);
-static void rehash(hash_table_t* ht);
-
-hash_table_t* new_hash_table(int init_size, int max_cap) {
-    hash_table_t* ht = salloc_alloc(sizeof(hash_table_t));
-    hash_table_init(ht, init_size, max_cap);
-    return ht;
+void hashtable_init(hash_table_t* ht) {
+    vector_init(ht->entries);
+    ht->count = 0;
+    ht->tombstone_count = 0;
+    Assert(ht->key_cmp != NULL && ht->hash_func != NULL);
 }

-static inline get_real_size(int size) {
-    // 查找第一个不小于size的素数容量
-    int cap_idx = 0;
-    if (size < 0) {
-        return PRIME_CAPACITIES[SMCC_ARRLEN(PRIME_CAPACITIES)-1];
-    }
-    while (PRIME_CAPACITIES[cap_idx] < size && cap_idx < SMCC_ARRLEN(PRIME_CAPACITIES)-1)  {
-        cap_idx++;
-    }
-    return PRIME_CAPACITIES[cap_idx];
+static int next_power_of_two(int n) {
+    n--;
+    n |= n >> 1;
+    n |= n >> 2;
+    n |= n >> 4;
+    n |= n >> 8;
+    n |= n >> 16;
+    return n + 1;
 }

-void hash_table_init(hash_table_t* ht, int init_size, int max_cap) {
-    // 限制最大容量索引
-    ht->max_cap = get_real_size(max_cap);
-    // 应用实际容量
-    ht->cap = get_real_size(init_size);
-    ht->size = 0;
-    ht->buckets = NULL;
-    ht->buckets = salloc_realloc(ht->buckets, sizeof(hash_node_t*) * ht->cap);
-}
+static hash_entry_t* find_entry(hash_table_t* ht, const void* key, u32_t hash) {
+    if (ht->entries.cap == 0) return NULL;
+    
+    u32_t index = hash & (ht->entries.cap - 1); // 容量是2的幂
+    u32_t probe = 0;

-void hash_table_insert(hash_table_t* ht, const char* str, int len) {
-    // 自动扩容检查
-    if (ht->size >= ht->cap * LOAD_FACTOR && ht->cap < ht->max_cap) {
-        rehash(ht);
-    }
-
-    if (ht->size >= ht->cap) {
-        LOG_TRACE("Hash table size exceeds maximum capacity. Consider increasing max_capacity.");
-    }
-
-    // 计算哈希值
-    u32_t hash = calc_hash(str, len);
-    int bucket_idx = hash % ht->cap;
-
-    // 检查重复
-    hash_node_t* node = ht->buckets[bucket_idx];
-    while (node) {
-        if (node->hash == hash && 
-            node->len == len && 
-            memcmp(node->str, str, len) == 0) {
-            return; // 已存在
+    hash_entry_t* tombstone = NULL;
+    
+    while (1) {
+        hash_entry_t* entry = &vector_at(ht->entries, index);
+        if (entry->state == ENTRY_EMPTY) {
+            return tombstone ? tombstone : entry;
        }
-        node = node->next;
-    }
-
-    // 创建新节点
-    hash_node_t* new_node = salloc_alloc(sizeof(hash_node_t));
-    new_node->str = str;
-    new_node->len = len;
-    new_node->hash = hash;
-    new_node->next = ht->buckets[bucket_idx];
-    ht->buckets[bucket_idx] = new_node;
-    ht->size++;
-}
-
-hash_node_t* hash_table_find(hash_table_t* ht, const char* str, int len) {
-    u32_t hash = calc_hash(str, len);
-    int bucket_idx = hash % ht->cap;
-
-    hash_node_t* node = ht->buckets[bucket_idx];
-    while (node) {
-        if (node->hash == hash && 
-            node->len == len && 
-            memcmp(node->str, str, len) == 0) {
-            return node;
+        
+        if (entry->state == ENTRY_TOMBSTONE) {
+            if (!tombstone) tombstone = entry;
+        } else if (entry->hash == hash && ht->key_cmp(entry->key, key) == 0) {
+            return entry;
        }
-        node = node->next;
+        
+        // Liner finding
+        index = (index + 1) & (ht->entries.cap - 1);
+        probe++;
+        if (probe >= ht->entries.cap) break;
    }
+    LOG_ERROR("hashset_find: hash table is full");
    return NULL;
 }

-static void rehash(hash_table_t* ht) {
-    int old_cap = ht->cap;
-    hash_node_t** old_buckets = ht->buckets;
+static void adjust_capacity(hash_table_t* ht, int new_cap) {
+    new_cap = next_power_of_two(new_cap);
+    Assert(new_cap >= ht->entries.cap);

-    // 查找下一个素数容量
-    int new_cap_idx = 0;
-    while (PRIME_CAPACITIES[new_cap_idx] <= old_cap && 
-           new_cap_idx < ht->max_cap) {
-        new_cap_idx++;
-    }
-    ht->cap = PRIME_CAPACITIES[new_cap_idx];
+    vector_header(old_entries, hash_entry_t);
+    old_entries.data = ht->entries.data;
+    old_entries.cap = ht->entries.cap;

-    // 分配新桶数组
-    ht->buckets = salloc_alloc(sizeof(hash_node_t*) * ht->cap);
-    memset(ht->buckets, 0, sizeof(hash_node_t*) * ht->cap);
+    // Not used size but for gdb python extention debug
+    ht->entries.size = new_cap;
+    ht->entries.cap = new_cap;
+    ht->entries.data = salloc_realloc(NULL, new_cap * sizeof(hash_entry_t));
+    rt_memset(ht->entries.data, 0, new_cap * sizeof(hash_entry_t));

-    // 重新哈希所有节点
-    for (int i = 0; i < old_cap; i++) {
-        hash_node_t* node = old_buckets[i];
-        while (node) {
-            hash_node_t* next = node->next;
-            int new_bucket = node->hash % ht->cap;
-            node->next = ht->buckets[new_bucket];
-            ht->buckets[new_bucket] = node;
-            node = next;
+    // rehash the all of the old data
+    for (rt_size_t i = 0; i < old_entries.cap; i++) {
+        hash_entry_t* entry = &vector_at(old_entries, i);
+        if (entry->state == ENTRY_ACTIVE) {
+            hash_entry_t* dest = find_entry(ht, entry->key, entry->hash);
+            *dest = *entry;
        }
    }

-    salloc_free(old_buckets);
+    vector_free(old_entries);
+    ht->tombstone_count = 0;
 }

-static u32_t calc_hash(const char* str, int len) {
-    // 使用与HASH_FNV_1A宏一致的算法
-    rt_strhash(str);
-}
-
-void hash_table_destroy(hash_table_t* ht) {
-    for (int i = 0; i < ht->cap; i++) {
-        hash_node_t* node = ht->buckets[i];
-        while (node) {
-            hash_node_t* next = node->next;
-            salloc_free(node);
-            node = next;
-        }
+void* hashtable_set(hash_table_t* ht, const void* key, void* value) {
+    if (ht->count + ht->tombstone_count >= ht->entries.cap * 0.75) {
+        int new_cap = ht->entries.cap < INIT_HASH_TABLE_SIZE ? INIT_HASH_TABLE_SIZE : ht->entries.cap * 2;
+        adjust_capacity(ht, new_cap);
    }
-    salloc_free(ht->buckets);
-    ht->buckets = NULL;
-    ht->size = ht->cap = 0;
-}
+
+    u32_t hash = ht->hash_func(key);
+    hash_entry_t* entry = find_entry(ht, key, hash);
+    
+    void* old_value = NULL;
+    if (entry->state == ENTRY_ACTIVE) {
+        old_value = entry->value;
+    } else {
+        if (entry->state == ENTRY_TOMBSTONE) ht->tombstone_count--;
+        ht->count++;
+    }
+    
+    entry->key = key;
+    entry->value = value;
+    entry->hash = hash;
+    entry->state = ENTRY_ACTIVE;
+    return old_value;
+}
+
+void* hashtable_get(hash_table_t* ht, const void* key) {
+    if (ht->entries.cap == 0) return NULL;
+    
+    u32_t hash = ht->hash_func(key);
+    hash_entry_t* entry = find_entry(ht, key, hash);
+    return (entry && entry->state == ENTRY_ACTIVE) ? entry->value : NULL;
+}
+
+void* hashtable_del(hash_table_t* ht, const void* key) {
+    if (ht->entries.cap == 0) return NULL;
+    
+    u32_t hash = ht->hash_func(key);
+    hash_entry_t* entry = find_entry(ht, key, hash);
+    
+    if (entry == NULL || entry->state != ENTRY_ACTIVE) return NULL;
+    
+    void* value = entry->value;
+    entry->state = ENTRY_TOMBSTONE;
+    ht->count--;
+    ht->tombstone_count++;
+    return value;
+}
+
+void hashtable_destory(hash_table_t* ht) {
+    vector_free(ht->entries);
+    ht->count = 0;
+    ht->tombstone_count = 0;
+}
--- a/lib/utils/ds/hashtable.h
+++ b/lib/utils/ds/hashtable.h
@ -1,27 +1,39 @@
 #ifndef __SMCC_HASHTABLE_H__
 #define __SMCC_HASHTABLE_H__

-#include <lib/rt/rt.h>
+#include <lib/rt/rt_alloc.h>
+#include "vector.h"

-typedef struct hash_node {
-    const char* str;
-    int len;
-    u32_t hash;
-    struct hash_node* next;
-} hash_node_t;
+// 哈希表条目状态标记
+typedef enum hash_table_entry_state {
+    ENTRY_EMPTY,
+    ENTRY_ACTIVE,
+    ENTRY_TOMBSTONE
+} ht_entry_state_t;

+// 哈希表条目结构（不管理key/value内存）
+typedef struct hash_entry {
+    const void* key;        // 由调用者管理
+    void* value;            // 由调用者管理
+    u32_t hash;            // 预计算哈希值
+    ht_entry_state_t state;    // 条目状态
+} hash_entry_t;
+
+// 哈希表主体结构
 typedef struct hash_table {
-    hash_node_t** buckets;
-    int size;
-    int cap;
-    int max_cap;
+    vector_header(entries, hash_entry_t);  // 使用vector管理条目
+    u32_t count;           // 有效条目数（不含墓碑）
+    u32_t tombstone_count; // 墓碑数量
+    u32_t (*hash_func)(const void* key);
+    int(*key_cmp)(const void* key1, const void* key2);
 } hash_table_t;

-hash_table_t* new_hash_table(int init_size, int max_cap);
-void hash_table_init(hash_table_t* ht, int init_size, int max_cap);
-void hash_table_destroy(hash_table_t* ht);
+// WARN you need set hash_func and key_cmp before use
+void hashtable_init(hash_table_t* ht) ;

-void hash_table_insert(hash_table_t* ht, const char* str, int len);
-hash_node_t* hash_table_find(hash_table_t* ht, const char* str, int len);
+void* hashtable_set(hash_table_t* ht, const void* key, void* value);
+void* hashtable_get(hash_table_t* ht, const void* key);
+void* hashtable_get(hash_table_t* ht, const void* key);
+void hashtable_destory(hash_table_t* ht);

 #endif // __SMCC_HASHTABLE_H__
--- a/lib/utils/strpool/strpool.c
+++ b/lib/utils/strpool/strpool.c
@ -0,0 +1,32 @@
+#include "strpool.h"
+
+void init_strpool(strpool_t* pool) {
+    lalloc_init(&pool->stralloc);
+    
+    pool->ht.hash_func = (u32_t(*)(const void*))rt_strhash;
+    pool->ht.key_cmp = (int(*)(const void*, const void*))rt_strcmp;
+    hashtable_init(&pool->ht);
+}
+
+const char* strpool_intern(strpool_t* pool, const char* str) {
+    void* existing = hashtable_get(&pool->ht, str);
+    if (existing) {
+        return existing;
+    }
+
+    rt_size_t len = rt_strlen(str) + 1;
+    char* new_str = lalloc_alloc(&pool->stralloc, len);
+    if (!new_str) {
+        LOG_ERROR("strpool: Failed to allocate memory for string");
+        return NULL;
+    }
+    rt_memcpy(new_str, str, len);
+
+    hashtable_set(&pool->ht, new_str, new_str);
+    return new_str;
+}
+
+void strpool_destroy(strpool_t* pool) {
+    hashtable_destory(&pool->ht);
+    lalloc_destroy(&pool->stralloc);
+}
--- a/lib/utils/strpool/strpool.h
+++ b/lib/utils/strpool/strpool.h
@ -2,11 +2,16 @@
 #define __SMCC_STRPOOL_H__

 #include <lib/core.h>
-#include "../ds/hash.h"
-typedef struct strpool {
-    long_alloc_t *long_alloc;
-} strpool_t; 
+#include <lib/rt/rt_alloc.h>
+#include <lib/utils/ds/hashtable.h>

-void new_strpool();
+typedef struct strpool {
+    hash_table_t ht;       // 用于快速查找字符串
+    long_alloc_t stralloc; // 专门用于字符串存储的分配器
+} strpool_t;
+
+void init_strpool(strpool_t* pool);
+const char* strpool_intern(strpool_t* pool, const char* str);
+void strpool_destroy(strpool_t* pool);

 #endif // __SMCC_STRPOOL_H__
--- a/lib/utils/symtab/symtab.h
+++ b/lib/utils/symtab/symtab.h
@ -0,0 +1,6 @@
+#ifndef __SMCC_SYMTABL_H__
+#define __SMCC_SYMTABL_H__
+
+
+
+#endif
--- a/lib/utils/tokbuf/tokbuf.c
+++ b/lib/utils/tokbuf/tokbuf.c
--- a/lib/utils/tokbuf/tokbuf.h
+++ b/lib/utils/tokbuf/tokbuf.h
@ -7,18 +7,20 @@ typedef struct loc {
    const char *fname;
    int line;
    int col;
-    short len;
+    int len;
 } loc_t;

-typedef enum tok_type {
+typedef enum tok_basic_type {
    TK_BASIC_INVALID,     // 错误占位
    TK_BASIC_KEYWORD,     // 关键字
    TK_BASIC_OPERATOR,    // 操作符
    TK_BASIC_IDENTIFIER,  // 标识符
    TK_BASIC_LITERAL,     // 字面量
-    TK_BASIC_PUNCTUATOR,  // 标点符号
+
+    TK_BASIC_WHITESPACE,  // 空白
+    TK_BASIC_COMMENT,     // 注释
    TK_BASIC_EOF          // 结束标记
-} tok_type_t;
+} tok_basic_type_t;

 typedef union ctype {
    u8_t    u8;
@ -34,10 +36,15 @@ typedef union ctype {
    iptr_t  iptr;
    uptr_t  uptr;
    void*   ptr;
+    char    ch;
+    int     i;
+
+    // MUST BE strpool ptr
+    const char* str;
 } ctype_t;

 typedef struct tok {
-    tok_type_t type;
+    tok_basic_type_t type;
    int sub_type;
    loc_t loc;
    ctype_t val;
--- a/lib/utils/utils.h
+++ b/lib/utils/utils.h
@ -0,0 +1,8 @@
+#ifndef __SMCC_LIB_UTILS_H__
+#define __SMCC_LIB_UTILS_H__
+
+#include "strpool/strpool.h"
+#include "symtab/symtab.h"
+#include "tokbuf/tokbuf.h"
+
+#endif