feat(frontend): 重构词法分析器

- 添加 .gitignore 文件,忽略编译器生成的二进制文件
- 重构 lexer.c 文件,改进了关键字处理和字符串处理
- 更新前端的前端、解析器和 AST 相关文件,以适应新的词法分析器
- 优化了 token 相关的定义和函数,引入了新的 token 类型
This commit is contained in:
ZZY
2025-03-23 12:13:16 +08:00
parent 05c637e594
commit 2b4857001c
33 changed files with 532 additions and 624 deletions

View File

@ -1,142 +1,129 @@
#include "hashtable.h"
#define LOAD_FACTOR 0.75f
// 素数表用于桶扩容(最后一个元素为最大允许容量)
static const int PRIME_CAPACITIES[] = {
11, 23, 47, 97, 193, 389, 769, 1543, 3079,
6151, 12289, 24593, 49157, 98317, 196613, 393241,
786433, 1572869, 3145739, 6291469, 12582917, 25165843
};
#define INIT_HASH_TABLE_SIZE (32)
// 私有函数声明
static u32_t calc_hash(const char* str, int len);
static void rehash(hash_table_t* ht);
hash_table_t* new_hash_table(int init_size, int max_cap) {
hash_table_t* ht = salloc_alloc(sizeof(hash_table_t));
hash_table_init(ht, init_size, max_cap);
return ht;
void hashtable_init(hash_table_t* ht) {
vector_init(ht->entries);
ht->count = 0;
ht->tombstone_count = 0;
Assert(ht->key_cmp != NULL && ht->hash_func != NULL);
}
static inline get_real_size(int size) {
// 查找第一个不小于size的素数容量
int cap_idx = 0;
if (size < 0) {
return PRIME_CAPACITIES[SMCC_ARRLEN(PRIME_CAPACITIES)-1];
}
while (PRIME_CAPACITIES[cap_idx] < size && cap_idx < SMCC_ARRLEN(PRIME_CAPACITIES)-1) {
cap_idx++;
}
return PRIME_CAPACITIES[cap_idx];
static int next_power_of_two(int n) {
n--;
n |= n >> 1;
n |= n >> 2;
n |= n >> 4;
n |= n >> 8;
n |= n >> 16;
return n + 1;
}
void hash_table_init(hash_table_t* ht, int init_size, int max_cap) {
// 限制最大容量索引
ht->max_cap = get_real_size(max_cap);
// 应用实际容量
ht->cap = get_real_size(init_size);
ht->size = 0;
ht->buckets = NULL;
ht->buckets = salloc_realloc(ht->buckets, sizeof(hash_node_t*) * ht->cap);
}
static hash_entry_t* find_entry(hash_table_t* ht, const void* key, u32_t hash) {
if (ht->entries.cap == 0) return NULL;
u32_t index = hash & (ht->entries.cap - 1); // 容量是2的幂
u32_t probe = 0;
void hash_table_insert(hash_table_t* ht, const char* str, int len) {
// 自动扩容检查
if (ht->size >= ht->cap * LOAD_FACTOR && ht->cap < ht->max_cap) {
rehash(ht);
}
if (ht->size >= ht->cap) {
LOG_TRACE("Hash table size exceeds maximum capacity. Consider increasing max_capacity.");
}
// 计算哈希值
u32_t hash = calc_hash(str, len);
int bucket_idx = hash % ht->cap;
// 检查重复
hash_node_t* node = ht->buckets[bucket_idx];
while (node) {
if (node->hash == hash &&
node->len == len &&
memcmp(node->str, str, len) == 0) {
return; // 已存在
hash_entry_t* tombstone = NULL;
while (1) {
hash_entry_t* entry = &vector_at(ht->entries, index);
if (entry->state == ENTRY_EMPTY) {
return tombstone ? tombstone : entry;
}
node = node->next;
}
// 创建新节点
hash_node_t* new_node = salloc_alloc(sizeof(hash_node_t));
new_node->str = str;
new_node->len = len;
new_node->hash = hash;
new_node->next = ht->buckets[bucket_idx];
ht->buckets[bucket_idx] = new_node;
ht->size++;
}
hash_node_t* hash_table_find(hash_table_t* ht, const char* str, int len) {
u32_t hash = calc_hash(str, len);
int bucket_idx = hash % ht->cap;
hash_node_t* node = ht->buckets[bucket_idx];
while (node) {
if (node->hash == hash &&
node->len == len &&
memcmp(node->str, str, len) == 0) {
return node;
if (entry->state == ENTRY_TOMBSTONE) {
if (!tombstone) tombstone = entry;
} else if (entry->hash == hash && ht->key_cmp(entry->key, key) == 0) {
return entry;
}
node = node->next;
// Liner finding
index = (index + 1) & (ht->entries.cap - 1);
probe++;
if (probe >= ht->entries.cap) break;
}
LOG_ERROR("hashset_find: hash table is full");
return NULL;
}
static void rehash(hash_table_t* ht) {
int old_cap = ht->cap;
hash_node_t** old_buckets = ht->buckets;
static void adjust_capacity(hash_table_t* ht, int new_cap) {
new_cap = next_power_of_two(new_cap);
Assert(new_cap >= ht->entries.cap);
// 查找下一个素数容量
int new_cap_idx = 0;
while (PRIME_CAPACITIES[new_cap_idx] <= old_cap &&
new_cap_idx < ht->max_cap) {
new_cap_idx++;
}
ht->cap = PRIME_CAPACITIES[new_cap_idx];
vector_header(old_entries, hash_entry_t);
old_entries.data = ht->entries.data;
old_entries.cap = ht->entries.cap;
// 分配新桶数组
ht->buckets = salloc_alloc(sizeof(hash_node_t*) * ht->cap);
memset(ht->buckets, 0, sizeof(hash_node_t*) * ht->cap);
// Not used size but for gdb python extention debug
ht->entries.size = new_cap;
ht->entries.cap = new_cap;
ht->entries.data = salloc_realloc(NULL, new_cap * sizeof(hash_entry_t));
rt_memset(ht->entries.data, 0, new_cap * sizeof(hash_entry_t));
// 重新哈希所有节点
for (int i = 0; i < old_cap; i++) {
hash_node_t* node = old_buckets[i];
while (node) {
hash_node_t* next = node->next;
int new_bucket = node->hash % ht->cap;
node->next = ht->buckets[new_bucket];
ht->buckets[new_bucket] = node;
node = next;
// rehash the all of the old data
for (rt_size_t i = 0; i < old_entries.cap; i++) {
hash_entry_t* entry = &vector_at(old_entries, i);
if (entry->state == ENTRY_ACTIVE) {
hash_entry_t* dest = find_entry(ht, entry->key, entry->hash);
*dest = *entry;
}
}
salloc_free(old_buckets);
vector_free(old_entries);
ht->tombstone_count = 0;
}
static u32_t calc_hash(const char* str, int len) {
// 使用与HASH_FNV_1A宏一致的算法
rt_strhash(str);
}
void hash_table_destroy(hash_table_t* ht) {
for (int i = 0; i < ht->cap; i++) {
hash_node_t* node = ht->buckets[i];
while (node) {
hash_node_t* next = node->next;
salloc_free(node);
node = next;
}
void* hashtable_set(hash_table_t* ht, const void* key, void* value) {
if (ht->count + ht->tombstone_count >= ht->entries.cap * 0.75) {
int new_cap = ht->entries.cap < INIT_HASH_TABLE_SIZE ? INIT_HASH_TABLE_SIZE : ht->entries.cap * 2;
adjust_capacity(ht, new_cap);
}
salloc_free(ht->buckets);
ht->buckets = NULL;
ht->size = ht->cap = 0;
}
u32_t hash = ht->hash_func(key);
hash_entry_t* entry = find_entry(ht, key, hash);
void* old_value = NULL;
if (entry->state == ENTRY_ACTIVE) {
old_value = entry->value;
} else {
if (entry->state == ENTRY_TOMBSTONE) ht->tombstone_count--;
ht->count++;
}
entry->key = key;
entry->value = value;
entry->hash = hash;
entry->state = ENTRY_ACTIVE;
return old_value;
}
void* hashtable_get(hash_table_t* ht, const void* key) {
if (ht->entries.cap == 0) return NULL;
u32_t hash = ht->hash_func(key);
hash_entry_t* entry = find_entry(ht, key, hash);
return (entry && entry->state == ENTRY_ACTIVE) ? entry->value : NULL;
}
void* hashtable_del(hash_table_t* ht, const void* key) {
if (ht->entries.cap == 0) return NULL;
u32_t hash = ht->hash_func(key);
hash_entry_t* entry = find_entry(ht, key, hash);
if (entry == NULL || entry->state != ENTRY_ACTIVE) return NULL;
void* value = entry->value;
entry->state = ENTRY_TOMBSTONE;
ht->count--;
ht->tombstone_count++;
return value;
}
void hashtable_destory(hash_table_t* ht) {
vector_free(ht->entries);
ht->count = 0;
ht->tombstone_count = 0;
}

View File

@ -1,27 +1,39 @@
#ifndef __SMCC_HASHTABLE_H__
#define __SMCC_HASHTABLE_H__
#include <lib/rt/rt.h>
#include <lib/rt/rt_alloc.h>
#include "vector.h"
typedef struct hash_node {
const char* str;
int len;
u32_t hash;
struct hash_node* next;
} hash_node_t;
// 哈希表条目状态标记
typedef enum hash_table_entry_state {
ENTRY_EMPTY,
ENTRY_ACTIVE,
ENTRY_TOMBSTONE
} ht_entry_state_t;
// 哈希表条目结构不管理key/value内存
typedef struct hash_entry {
const void* key; // 由调用者管理
void* value; // 由调用者管理
u32_t hash; // 预计算哈希值
ht_entry_state_t state; // 条目状态
} hash_entry_t;
// 哈希表主体结构
typedef struct hash_table {
hash_node_t** buckets;
int size;
int cap;
int max_cap;
vector_header(entries, hash_entry_t); // 使用vector管理条目
u32_t count; // 有效条目数(不含墓碑)
u32_t tombstone_count; // 墓碑数量
u32_t (*hash_func)(const void* key);
int(*key_cmp)(const void* key1, const void* key2);
} hash_table_t;
hash_table_t* new_hash_table(int init_size, int max_cap);
void hash_table_init(hash_table_t* ht, int init_size, int max_cap);
void hash_table_destroy(hash_table_t* ht);
// WARN you need set hash_func and key_cmp before use
void hashtable_init(hash_table_t* ht) ;
void hash_table_insert(hash_table_t* ht, const char* str, int len);
hash_node_t* hash_table_find(hash_table_t* ht, const char* str, int len);
void* hashtable_set(hash_table_t* ht, const void* key, void* value);
void* hashtable_get(hash_table_t* ht, const void* key);
void* hashtable_get(hash_table_t* ht, const void* key);
void hashtable_destory(hash_table_t* ht);
#endif // __SMCC_HASHTABLE_H__

View File

@ -0,0 +1,32 @@
#include "strpool.h"
void init_strpool(strpool_t* pool) {
lalloc_init(&pool->stralloc);
pool->ht.hash_func = (u32_t(*)(const void*))rt_strhash;
pool->ht.key_cmp = (int(*)(const void*, const void*))rt_strcmp;
hashtable_init(&pool->ht);
}
const char* strpool_intern(strpool_t* pool, const char* str) {
void* existing = hashtable_get(&pool->ht, str);
if (existing) {
return existing;
}
rt_size_t len = rt_strlen(str) + 1;
char* new_str = lalloc_alloc(&pool->stralloc, len);
if (!new_str) {
LOG_ERROR("strpool: Failed to allocate memory for string");
return NULL;
}
rt_memcpy(new_str, str, len);
hashtable_set(&pool->ht, new_str, new_str);
return new_str;
}
void strpool_destroy(strpool_t* pool) {
hashtable_destory(&pool->ht);
lalloc_destroy(&pool->stralloc);
}

View File

@ -2,11 +2,16 @@
#define __SMCC_STRPOOL_H__
#include <lib/core.h>
#include "../ds/hash.h"
typedef struct strpool {
long_alloc_t *long_alloc;
} strpool_t;
#include <lib/rt/rt_alloc.h>
#include <lib/utils/ds/hashtable.h>
void new_strpool();
typedef struct strpool {
hash_table_t ht; // 用于快速查找字符串
long_alloc_t stralloc; // 专门用于字符串存储的分配器
} strpool_t;
void init_strpool(strpool_t* pool);
const char* strpool_intern(strpool_t* pool, const char* str);
void strpool_destroy(strpool_t* pool);
#endif // __SMCC_STRPOOL_H__

View File

@ -0,0 +1,6 @@
#ifndef __SMCC_SYMTABL_H__
#define __SMCC_SYMTABL_H__
#endif

View File

@ -7,18 +7,20 @@ typedef struct loc {
const char *fname;
int line;
int col;
short len;
int len;
} loc_t;
typedef enum tok_type {
typedef enum tok_basic_type {
TK_BASIC_INVALID, // 错误占位
TK_BASIC_KEYWORD, // 关键字
TK_BASIC_OPERATOR, // 操作符
TK_BASIC_IDENTIFIER, // 标识符
TK_BASIC_LITERAL, // 字面量
TK_BASIC_PUNCTUATOR, // 标点符号
TK_BASIC_WHITESPACE, // 空白
TK_BASIC_COMMENT, // 注释
TK_BASIC_EOF // 结束标记
} tok_type_t;
} tok_basic_type_t;
typedef union ctype {
u8_t u8;
@ -34,10 +36,15 @@ typedef union ctype {
iptr_t iptr;
uptr_t uptr;
void* ptr;
char ch;
int i;
// MUST BE strpool ptr
const char* str;
} ctype_t;
typedef struct tok {
tok_type_t type;
tok_basic_type_t type;
int sub_type;
loc_t loc;
ctype_t val;

8
lib/utils/utils.h Normal file
View File

@ -0,0 +1,8 @@
#ifndef __SMCC_LIB_UTILS_H__
#define __SMCC_LIB_UTILS_H__
#include "strpool/strpool.h"
#include "symtab/symtab.h"
#include "tokbuf/tokbuf.h"
#endif