feat(frontend): 重构词法分析器
- 添加 .gitignore 文件,忽略编译器生成的二进制文件 - 重构 lexer.c 文件,改进了关键字处理和字符串处理 - 更新前端的前端、解析器和 AST 相关文件,以适应新的词法分析器 - 优化了 token 相关的定义和函数,引入了新的 token 类型
This commit is contained in:
@ -1,142 +1,129 @@
|
||||
#include "hashtable.h"
|
||||
|
||||
#define LOAD_FACTOR 0.75f
|
||||
// 素数表用于桶扩容(最后一个元素为最大允许容量)
|
||||
static const int PRIME_CAPACITIES[] = {
|
||||
11, 23, 47, 97, 193, 389, 769, 1543, 3079,
|
||||
6151, 12289, 24593, 49157, 98317, 196613, 393241,
|
||||
786433, 1572869, 3145739, 6291469, 12582917, 25165843
|
||||
};
|
||||
#define INIT_HASH_TABLE_SIZE (32)
|
||||
|
||||
// 私有函数声明
|
||||
static u32_t calc_hash(const char* str, int len);
|
||||
static void rehash(hash_table_t* ht);
|
||||
|
||||
hash_table_t* new_hash_table(int init_size, int max_cap) {
|
||||
hash_table_t* ht = salloc_alloc(sizeof(hash_table_t));
|
||||
hash_table_init(ht, init_size, max_cap);
|
||||
return ht;
|
||||
void hashtable_init(hash_table_t* ht) {
|
||||
vector_init(ht->entries);
|
||||
ht->count = 0;
|
||||
ht->tombstone_count = 0;
|
||||
Assert(ht->key_cmp != NULL && ht->hash_func != NULL);
|
||||
}
|
||||
|
||||
static inline get_real_size(int size) {
|
||||
// 查找第一个不小于size的素数容量
|
||||
int cap_idx = 0;
|
||||
if (size < 0) {
|
||||
return PRIME_CAPACITIES[SMCC_ARRLEN(PRIME_CAPACITIES)-1];
|
||||
}
|
||||
while (PRIME_CAPACITIES[cap_idx] < size && cap_idx < SMCC_ARRLEN(PRIME_CAPACITIES)-1) {
|
||||
cap_idx++;
|
||||
}
|
||||
return PRIME_CAPACITIES[cap_idx];
|
||||
static int next_power_of_two(int n) {
|
||||
n--;
|
||||
n |= n >> 1;
|
||||
n |= n >> 2;
|
||||
n |= n >> 4;
|
||||
n |= n >> 8;
|
||||
n |= n >> 16;
|
||||
return n + 1;
|
||||
}
|
||||
|
||||
void hash_table_init(hash_table_t* ht, int init_size, int max_cap) {
|
||||
// 限制最大容量索引
|
||||
ht->max_cap = get_real_size(max_cap);
|
||||
// 应用实际容量
|
||||
ht->cap = get_real_size(init_size);
|
||||
ht->size = 0;
|
||||
ht->buckets = NULL;
|
||||
ht->buckets = salloc_realloc(ht->buckets, sizeof(hash_node_t*) * ht->cap);
|
||||
}
|
||||
static hash_entry_t* find_entry(hash_table_t* ht, const void* key, u32_t hash) {
|
||||
if (ht->entries.cap == 0) return NULL;
|
||||
|
||||
u32_t index = hash & (ht->entries.cap - 1); // 容量是2的幂
|
||||
u32_t probe = 0;
|
||||
|
||||
void hash_table_insert(hash_table_t* ht, const char* str, int len) {
|
||||
// 自动扩容检查
|
||||
if (ht->size >= ht->cap * LOAD_FACTOR && ht->cap < ht->max_cap) {
|
||||
rehash(ht);
|
||||
}
|
||||
|
||||
if (ht->size >= ht->cap) {
|
||||
LOG_TRACE("Hash table size exceeds maximum capacity. Consider increasing max_capacity.");
|
||||
}
|
||||
|
||||
// 计算哈希值
|
||||
u32_t hash = calc_hash(str, len);
|
||||
int bucket_idx = hash % ht->cap;
|
||||
|
||||
// 检查重复
|
||||
hash_node_t* node = ht->buckets[bucket_idx];
|
||||
while (node) {
|
||||
if (node->hash == hash &&
|
||||
node->len == len &&
|
||||
memcmp(node->str, str, len) == 0) {
|
||||
return; // 已存在
|
||||
hash_entry_t* tombstone = NULL;
|
||||
|
||||
while (1) {
|
||||
hash_entry_t* entry = &vector_at(ht->entries, index);
|
||||
if (entry->state == ENTRY_EMPTY) {
|
||||
return tombstone ? tombstone : entry;
|
||||
}
|
||||
node = node->next;
|
||||
}
|
||||
|
||||
// 创建新节点
|
||||
hash_node_t* new_node = salloc_alloc(sizeof(hash_node_t));
|
||||
new_node->str = str;
|
||||
new_node->len = len;
|
||||
new_node->hash = hash;
|
||||
new_node->next = ht->buckets[bucket_idx];
|
||||
ht->buckets[bucket_idx] = new_node;
|
||||
ht->size++;
|
||||
}
|
||||
|
||||
hash_node_t* hash_table_find(hash_table_t* ht, const char* str, int len) {
|
||||
u32_t hash = calc_hash(str, len);
|
||||
int bucket_idx = hash % ht->cap;
|
||||
|
||||
hash_node_t* node = ht->buckets[bucket_idx];
|
||||
while (node) {
|
||||
if (node->hash == hash &&
|
||||
node->len == len &&
|
||||
memcmp(node->str, str, len) == 0) {
|
||||
return node;
|
||||
|
||||
if (entry->state == ENTRY_TOMBSTONE) {
|
||||
if (!tombstone) tombstone = entry;
|
||||
} else if (entry->hash == hash && ht->key_cmp(entry->key, key) == 0) {
|
||||
return entry;
|
||||
}
|
||||
node = node->next;
|
||||
|
||||
// Liner finding
|
||||
index = (index + 1) & (ht->entries.cap - 1);
|
||||
probe++;
|
||||
if (probe >= ht->entries.cap) break;
|
||||
}
|
||||
LOG_ERROR("hashset_find: hash table is full");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void rehash(hash_table_t* ht) {
|
||||
int old_cap = ht->cap;
|
||||
hash_node_t** old_buckets = ht->buckets;
|
||||
static void adjust_capacity(hash_table_t* ht, int new_cap) {
|
||||
new_cap = next_power_of_two(new_cap);
|
||||
Assert(new_cap >= ht->entries.cap);
|
||||
|
||||
// 查找下一个素数容量
|
||||
int new_cap_idx = 0;
|
||||
while (PRIME_CAPACITIES[new_cap_idx] <= old_cap &&
|
||||
new_cap_idx < ht->max_cap) {
|
||||
new_cap_idx++;
|
||||
}
|
||||
ht->cap = PRIME_CAPACITIES[new_cap_idx];
|
||||
vector_header(old_entries, hash_entry_t);
|
||||
old_entries.data = ht->entries.data;
|
||||
old_entries.cap = ht->entries.cap;
|
||||
|
||||
// 分配新桶数组
|
||||
ht->buckets = salloc_alloc(sizeof(hash_node_t*) * ht->cap);
|
||||
memset(ht->buckets, 0, sizeof(hash_node_t*) * ht->cap);
|
||||
// Not used size but for gdb python extention debug
|
||||
ht->entries.size = new_cap;
|
||||
ht->entries.cap = new_cap;
|
||||
ht->entries.data = salloc_realloc(NULL, new_cap * sizeof(hash_entry_t));
|
||||
rt_memset(ht->entries.data, 0, new_cap * sizeof(hash_entry_t));
|
||||
|
||||
// 重新哈希所有节点
|
||||
for (int i = 0; i < old_cap; i++) {
|
||||
hash_node_t* node = old_buckets[i];
|
||||
while (node) {
|
||||
hash_node_t* next = node->next;
|
||||
int new_bucket = node->hash % ht->cap;
|
||||
node->next = ht->buckets[new_bucket];
|
||||
ht->buckets[new_bucket] = node;
|
||||
node = next;
|
||||
// rehash the all of the old data
|
||||
for (rt_size_t i = 0; i < old_entries.cap; i++) {
|
||||
hash_entry_t* entry = &vector_at(old_entries, i);
|
||||
if (entry->state == ENTRY_ACTIVE) {
|
||||
hash_entry_t* dest = find_entry(ht, entry->key, entry->hash);
|
||||
*dest = *entry;
|
||||
}
|
||||
}
|
||||
|
||||
salloc_free(old_buckets);
|
||||
vector_free(old_entries);
|
||||
ht->tombstone_count = 0;
|
||||
}
|
||||
|
||||
static u32_t calc_hash(const char* str, int len) {
|
||||
// 使用与HASH_FNV_1A宏一致的算法
|
||||
rt_strhash(str);
|
||||
}
|
||||
|
||||
void hash_table_destroy(hash_table_t* ht) {
|
||||
for (int i = 0; i < ht->cap; i++) {
|
||||
hash_node_t* node = ht->buckets[i];
|
||||
while (node) {
|
||||
hash_node_t* next = node->next;
|
||||
salloc_free(node);
|
||||
node = next;
|
||||
}
|
||||
void* hashtable_set(hash_table_t* ht, const void* key, void* value) {
|
||||
if (ht->count + ht->tombstone_count >= ht->entries.cap * 0.75) {
|
||||
int new_cap = ht->entries.cap < INIT_HASH_TABLE_SIZE ? INIT_HASH_TABLE_SIZE : ht->entries.cap * 2;
|
||||
adjust_capacity(ht, new_cap);
|
||||
}
|
||||
salloc_free(ht->buckets);
|
||||
ht->buckets = NULL;
|
||||
ht->size = ht->cap = 0;
|
||||
}
|
||||
|
||||
u32_t hash = ht->hash_func(key);
|
||||
hash_entry_t* entry = find_entry(ht, key, hash);
|
||||
|
||||
void* old_value = NULL;
|
||||
if (entry->state == ENTRY_ACTIVE) {
|
||||
old_value = entry->value;
|
||||
} else {
|
||||
if (entry->state == ENTRY_TOMBSTONE) ht->tombstone_count--;
|
||||
ht->count++;
|
||||
}
|
||||
|
||||
entry->key = key;
|
||||
entry->value = value;
|
||||
entry->hash = hash;
|
||||
entry->state = ENTRY_ACTIVE;
|
||||
return old_value;
|
||||
}
|
||||
|
||||
void* hashtable_get(hash_table_t* ht, const void* key) {
|
||||
if (ht->entries.cap == 0) return NULL;
|
||||
|
||||
u32_t hash = ht->hash_func(key);
|
||||
hash_entry_t* entry = find_entry(ht, key, hash);
|
||||
return (entry && entry->state == ENTRY_ACTIVE) ? entry->value : NULL;
|
||||
}
|
||||
|
||||
void* hashtable_del(hash_table_t* ht, const void* key) {
|
||||
if (ht->entries.cap == 0) return NULL;
|
||||
|
||||
u32_t hash = ht->hash_func(key);
|
||||
hash_entry_t* entry = find_entry(ht, key, hash);
|
||||
|
||||
if (entry == NULL || entry->state != ENTRY_ACTIVE) return NULL;
|
||||
|
||||
void* value = entry->value;
|
||||
entry->state = ENTRY_TOMBSTONE;
|
||||
ht->count--;
|
||||
ht->tombstone_count++;
|
||||
return value;
|
||||
}
|
||||
|
||||
void hashtable_destory(hash_table_t* ht) {
|
||||
vector_free(ht->entries);
|
||||
ht->count = 0;
|
||||
ht->tombstone_count = 0;
|
||||
}
|
||||
|
@ -1,27 +1,39 @@
|
||||
#ifndef __SMCC_HASHTABLE_H__
|
||||
#define __SMCC_HASHTABLE_H__
|
||||
|
||||
#include <lib/rt/rt.h>
|
||||
#include <lib/rt/rt_alloc.h>
|
||||
#include "vector.h"
|
||||
|
||||
typedef struct hash_node {
|
||||
const char* str;
|
||||
int len;
|
||||
u32_t hash;
|
||||
struct hash_node* next;
|
||||
} hash_node_t;
|
||||
// 哈希表条目状态标记
|
||||
typedef enum hash_table_entry_state {
|
||||
ENTRY_EMPTY,
|
||||
ENTRY_ACTIVE,
|
||||
ENTRY_TOMBSTONE
|
||||
} ht_entry_state_t;
|
||||
|
||||
// 哈希表条目结构(不管理key/value内存)
|
||||
typedef struct hash_entry {
|
||||
const void* key; // 由调用者管理
|
||||
void* value; // 由调用者管理
|
||||
u32_t hash; // 预计算哈希值
|
||||
ht_entry_state_t state; // 条目状态
|
||||
} hash_entry_t;
|
||||
|
||||
// 哈希表主体结构
|
||||
typedef struct hash_table {
|
||||
hash_node_t** buckets;
|
||||
int size;
|
||||
int cap;
|
||||
int max_cap;
|
||||
vector_header(entries, hash_entry_t); // 使用vector管理条目
|
||||
u32_t count; // 有效条目数(不含墓碑)
|
||||
u32_t tombstone_count; // 墓碑数量
|
||||
u32_t (*hash_func)(const void* key);
|
||||
int(*key_cmp)(const void* key1, const void* key2);
|
||||
} hash_table_t;
|
||||
|
||||
hash_table_t* new_hash_table(int init_size, int max_cap);
|
||||
void hash_table_init(hash_table_t* ht, int init_size, int max_cap);
|
||||
void hash_table_destroy(hash_table_t* ht);
|
||||
// WARN you need set hash_func and key_cmp before use
|
||||
void hashtable_init(hash_table_t* ht) ;
|
||||
|
||||
void hash_table_insert(hash_table_t* ht, const char* str, int len);
|
||||
hash_node_t* hash_table_find(hash_table_t* ht, const char* str, int len);
|
||||
void* hashtable_set(hash_table_t* ht, const void* key, void* value);
|
||||
void* hashtable_get(hash_table_t* ht, const void* key);
|
||||
void* hashtable_get(hash_table_t* ht, const void* key);
|
||||
void hashtable_destory(hash_table_t* ht);
|
||||
|
||||
#endif // __SMCC_HASHTABLE_H__
|
||||
|
@ -0,0 +1,32 @@
|
||||
#include "strpool.h"
|
||||
|
||||
void init_strpool(strpool_t* pool) {
|
||||
lalloc_init(&pool->stralloc);
|
||||
|
||||
pool->ht.hash_func = (u32_t(*)(const void*))rt_strhash;
|
||||
pool->ht.key_cmp = (int(*)(const void*, const void*))rt_strcmp;
|
||||
hashtable_init(&pool->ht);
|
||||
}
|
||||
|
||||
const char* strpool_intern(strpool_t* pool, const char* str) {
|
||||
void* existing = hashtable_get(&pool->ht, str);
|
||||
if (existing) {
|
||||
return existing;
|
||||
}
|
||||
|
||||
rt_size_t len = rt_strlen(str) + 1;
|
||||
char* new_str = lalloc_alloc(&pool->stralloc, len);
|
||||
if (!new_str) {
|
||||
LOG_ERROR("strpool: Failed to allocate memory for string");
|
||||
return NULL;
|
||||
}
|
||||
rt_memcpy(new_str, str, len);
|
||||
|
||||
hashtable_set(&pool->ht, new_str, new_str);
|
||||
return new_str;
|
||||
}
|
||||
|
||||
void strpool_destroy(strpool_t* pool) {
|
||||
hashtable_destory(&pool->ht);
|
||||
lalloc_destroy(&pool->stralloc);
|
||||
}
|
||||
|
@ -2,11 +2,16 @@
|
||||
#define __SMCC_STRPOOL_H__
|
||||
|
||||
#include <lib/core.h>
|
||||
#include "../ds/hash.h"
|
||||
typedef struct strpool {
|
||||
long_alloc_t *long_alloc;
|
||||
} strpool_t;
|
||||
#include <lib/rt/rt_alloc.h>
|
||||
#include <lib/utils/ds/hashtable.h>
|
||||
|
||||
void new_strpool();
|
||||
typedef struct strpool {
|
||||
hash_table_t ht; // 用于快速查找字符串
|
||||
long_alloc_t stralloc; // 专门用于字符串存储的分配器
|
||||
} strpool_t;
|
||||
|
||||
void init_strpool(strpool_t* pool);
|
||||
const char* strpool_intern(strpool_t* pool, const char* str);
|
||||
void strpool_destroy(strpool_t* pool);
|
||||
|
||||
#endif // __SMCC_STRPOOL_H__
|
||||
|
@ -0,0 +1,6 @@
|
||||
#ifndef __SMCC_SYMTABL_H__
|
||||
#define __SMCC_SYMTABL_H__
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
|
@ -7,18 +7,20 @@ typedef struct loc {
|
||||
const char *fname;
|
||||
int line;
|
||||
int col;
|
||||
short len;
|
||||
int len;
|
||||
} loc_t;
|
||||
|
||||
typedef enum tok_type {
|
||||
typedef enum tok_basic_type {
|
||||
TK_BASIC_INVALID, // 错误占位
|
||||
TK_BASIC_KEYWORD, // 关键字
|
||||
TK_BASIC_OPERATOR, // 操作符
|
||||
TK_BASIC_IDENTIFIER, // 标识符
|
||||
TK_BASIC_LITERAL, // 字面量
|
||||
TK_BASIC_PUNCTUATOR, // 标点符号
|
||||
|
||||
TK_BASIC_WHITESPACE, // 空白
|
||||
TK_BASIC_COMMENT, // 注释
|
||||
TK_BASIC_EOF // 结束标记
|
||||
} tok_type_t;
|
||||
} tok_basic_type_t;
|
||||
|
||||
typedef union ctype {
|
||||
u8_t u8;
|
||||
@ -34,10 +36,15 @@ typedef union ctype {
|
||||
iptr_t iptr;
|
||||
uptr_t uptr;
|
||||
void* ptr;
|
||||
char ch;
|
||||
int i;
|
||||
|
||||
// MUST BE strpool ptr
|
||||
const char* str;
|
||||
} ctype_t;
|
||||
|
||||
typedef struct tok {
|
||||
tok_type_t type;
|
||||
tok_basic_type_t type;
|
||||
int sub_type;
|
||||
loc_t loc;
|
||||
ctype_t val;
|
8
lib/utils/utils.h
Normal file
8
lib/utils/utils.h
Normal file
@ -0,0 +1,8 @@
|
||||
#ifndef __SMCC_LIB_UTILS_H__
|
||||
#define __SMCC_LIB_UTILS_H__
|
||||
|
||||
#include "strpool/strpool.h"
|
||||
#include "symtab/symtab.h"
|
||||
#include "tokbuf/tokbuf.h"
|
||||
|
||||
#endif
|
Reference in New Issue
Block a user