feat(frontend): 重构词法分析器

- 添加 .gitignore 文件,忽略编译器生成的二进制文件
- 重构 lexer.c 文件,改进了关键字处理和字符串处理
- 更新前端的前端、解析器和 AST 相关文件,以适应新的词法分析器
- 优化了 token 相关的定义和函数,引入了新的 token 类型
This commit is contained in:
ZZY
2025-03-23 12:13:16 +08:00
parent 05c637e594
commit 2b4857001c
33 changed files with 532 additions and 624 deletions

View File

@ -19,7 +19,7 @@ ast_node_t* parse_block(parser_t* parser) {
symtab_enter_scope(parser->symtab);
tok_stream_t *tokbuf = &parser->tokbuf;
flush_peek_tok(tokbuf);
tok_type_t ttype;
cc_tktype_t ttype;
ast_node_t* node = new_ast_node_block();
expect_pop_tok(tokbuf, TOKEN_L_BRACE);

View File

@ -37,7 +37,7 @@ int peek_decl(tok_stream_t* tokbuf) {
ast_node_t* parse_decl_val(parser_t* parser) {
tok_stream_t* tokbuf = &parser->tokbuf;
tok_type_t ttype;
cc_tktype_t ttype;
flush_peek_tok(tokbuf);
ast_node_t* node;
@ -69,7 +69,7 @@ ast_node_t* parse_decl_val(parser_t* parser) {
ast_node_t* parse_decl(parser_t* parser) {
tok_stream_t* tokbuf = &parser->tokbuf;
flush_peek_tok(tokbuf);
tok_type_t ttype;
cc_tktype_t ttype;
ast_node_t* node;
if (peek_decl(tokbuf) == 0) {

View File

@ -82,7 +82,7 @@ static ast_node_t* parse_comma(tok_stream_t* tokbuf, symtab_t *symtab, ast_node_
static ast_node_t* parse_assign(tok_stream_t* tokbuf, symtab_t *symtab, ast_node_t* left) {
flush_peek_tok(tokbuf);
tok_type_t ttype = peek_tok_type(tokbuf);
cc_tktype_t ttype = peek_tok_type(tokbuf);
pop_tok(tokbuf);
ast_node_t* node = new_ast_node();
node->type = NT_ASSIGN;
@ -133,7 +133,7 @@ static ast_node_t* parse_assign(tok_stream_t* tokbuf, symtab_t *symtab, ast_node
static ast_node_t* parse_cmp(tok_stream_t* tokbuf, symtab_t *symtab, ast_node_t* left) {
flush_peek_tok(tokbuf);
tok_type_t ttype = peek_tok_type(tokbuf);
cc_tktype_t ttype = peek_tok_type(tokbuf);
pop_tok(tokbuf);
ast_node_t* node = new_ast_node();
// saved left
@ -171,7 +171,7 @@ static ast_node_t* parse_cmp(tok_stream_t* tokbuf, symtab_t *symtab, ast_node_t*
static ast_node_t* parse_cal(tok_stream_t* tokbuf, symtab_t *symtab, ast_node_t* left) {
flush_peek_tok(tokbuf);
tok_type_t ttype = peek_tok_type(tokbuf);
cc_tktype_t ttype = peek_tok_type(tokbuf);
pop_tok(tokbuf);
ast_node_t* node = new_ast_node();
node->expr.left = left;
@ -238,7 +238,7 @@ static ast_node_t* parse_call(tok_stream_t* tokbuf, symtab_t *symtab, ast_node_t
vector_init(node->call.params->params.params);
pop_tok(tokbuf); // 跳过 '('
tok_type_t ttype;
cc_tktype_t ttype;
while (1) {
flush_peek_tok(tokbuf);
ttype = peek_tok_type(tokbuf);
@ -330,7 +330,7 @@ static ast_node_t *parse_primary_expression(tok_stream_t* tokbuf, symtab_t *symt
node->type = NT_TERM_VAL;
node->syms.tok = *tok;
switch (tok->type) {
switch (tok->sub_type) {
case TOKEN_INT_LITERAL:
// node->data.data_type = TYPE_INT;
break;
@ -344,7 +344,7 @@ static ast_node_t *parse_primary_expression(tok_stream_t* tokbuf, symtab_t *symt
// node->data.data_type = TYPE_POINTER;
case TOKEN_IDENT:
node = expect_pop_ident(tokbuf);
tok_type_t ttype = peek_tok_type(tokbuf);
cc_tktype_t ttype = peek_tok_type(tokbuf);
if (ttype == TOKEN_L_PAREN) {
node = parse_call(tokbuf, symtab, node);
} else {
@ -365,7 +365,7 @@ END:
}
static ast_node_t *parse_subexpression(tok_stream_t* tokbuf, symtab_t *symtab, enum Precedence prec) {
tok_type_t ttype;
cc_tktype_t ttype;
struct expr_prec_table_t* work;
ast_node_t* left;
@ -400,7 +400,7 @@ ast_node_t* parse_expr(parser_t* parser) {
tok_stream_t* tokbuf = &(parser->tokbuf);
symtab_t *symtab = parser->symtab;
flush_peek_tok(tokbuf);
tok_type_t ttype = peek_tok_type(tokbuf);
cc_tktype_t ttype = peek_tok_type(tokbuf);
switch (ttype) {
case TOKEN_NOT:
case TOKEN_AND:

View File

@ -9,7 +9,7 @@
// TODO 语义分析压入符号表
static void parse_params(parser_t* parser, tok_stream_t* cache, ast_node_t* node) {
flush_peek_tok(cache);
tok_type_t ttype;
cc_tktype_t ttype;
ast_node_t *params = new_ast_node();
node->decl_func.params = params;
vector_init(params->params.params);
@ -79,7 +79,7 @@ ast_type_t check_is_func_decl(tok_stream_t* tokbuf, tok_stream_t* cache) {
LOG_ERROR("function parameter list too long");
}
cache->buf[cache->size++] = *tok;
switch (tok->type) {
switch (tok->sub_type) {
case TOKEN_L_PAREN:
depth++;
break;

View File

@ -4,7 +4,7 @@
ast_node_t* parse_stmt(parser_t* parser) {
tok_stream_t* tokbuf = &parser->tokbuf;
flush_peek_tok(tokbuf);
tok_type_t ttype = peek_tok_type(tokbuf);
cc_tktype_t ttype = peek_tok_type(tokbuf);
ast_node_t* node = new_ast_node();
switch (ttype) {
case TOKEN_IF: {

View File

@ -3,8 +3,8 @@
#include "../type.h"
ast_node_t* new_ast_ident_node(tok_t* tok) {
if (tok->type != TOKEN_IDENT) {
LOG_ERROR("syntax error: want identifier but got %d", tok->type);
if (tok->sub_type != TOKEN_IDENT) {
LOG_ERROR("syntax error: want identifier but got %d", tok->sub_type);
}
ast_node_t* node = new_ast_node();
node->type = NT_TERM_IDENT;
@ -24,7 +24,7 @@ ast_node_t* expect_pop_ident(tok_stream_t* tokbuf) {
ast_node_t* parse_type(parser_t* parser) {
tok_stream_t* tokbuf = &parser->tokbuf;
flush_peek_tok(tokbuf);
tok_type_t ttype = peek_tok_type(tokbuf);
cc_tktype_t ttype = peek_tok_type(tokbuf);
data_type_t dtype;
switch(ttype) {
case TOKEN_VOID: dtype = TYPE_VOID; break;

View File

@ -1,53 +0,0 @@
// hashmap.c
#include "hashmap.h"
#include <stdlib.h>
#include <string.h>
// DJB2哈希算法
static unsigned long hash(const char* str) {
unsigned long hash = 5381;
int c;
while ((c = *str++))
hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
return hash % HMAP_SIZE;
}
void hmap_init(HashMap* map) {
memset(map->buckets, 0, sizeof(map->buckets));
}
void hmap_put(HashMap* map, const char* key, void* value) {
unsigned long idx = hash(key);
HashMapEntry* entry = malloc(sizeof(HashMapEntry));
entry->key = strdup(key);
entry->value = value;
entry->next = map->buckets[idx];
map->buckets[idx] = entry;
}
void* hmap_get(HashMap* map, const char* key) {
unsigned long idx = hash(key);
HashMapEntry* entry = map->buckets[idx];
while (entry) {
if (strcmp(entry->key, key) == 0)
return entry->value;
entry = entry->next;
}
return NULL;
}
int hmap_contains(HashMap* map, const char* key) {
return hmap_get(map, key) != NULL;
}
void hmap_destroy(HashMap* map) {
for (int i = 0; i < HMAP_SIZE; i++) {
HashMapEntry* entry = map->buckets[i];
while (entry) {
HashMapEntry* next = entry->next;
free(entry->key);
free(entry);
entry = next;
}
}
}

View File

@ -1,31 +0,0 @@
#ifndef HASHMAP_H
#define HASHMAP_H
#define HMAP_SIZE 64
typedef struct HashMapEntry {
char* key;
void* value;
struct HashMapEntry* next;
} HashMapEntry;
typedef struct {
HashMapEntry* buckets[HMAP_SIZE];
} HashMap;
// 初始化哈希表
void hmap_init(HashMap* map);
// 插入键值对
void hmap_put(HashMap* map, const char* key, void* value);
// 查找键值
void* hmap_get(HashMap* map, const char* key);
// 检查键是否存在
int hmap_contains(HashMap* map, const char* key);
// 释放哈希表内存不释放value
void hmap_destroy(HashMap* map);
#endif

View File

@ -1,43 +0,0 @@
// scope.c
#include "scope.h"
#include <stdio.h>
#include <stdlib.h>
typedef struct Scope Scope;
Scope* scope_create(Scope* parent) {
Scope* scope = malloc(sizeof(Scope));
hmap_init(&scope->symbols);
scope->parent = parent;
scope->base_offset = 0;
scope->cur_offset = 0;
return scope;
}
void scope_destroy(Scope* scope) {
hmap_destroy(&scope->symbols);
free(scope);
}
void scope_insert(Scope* scope, const char* name, void* symbol) {
if (hmap_contains(&scope->symbols, name)) {
// 处理重复定义错误
fprintf(stderr, "Error: Symbol '%s' already defined\n", name);
exit(EXIT_FAILURE);
}
hmap_put(&scope->symbols, name, symbol);
}
void* scope_lookup(Scope* scope, const char* name) {
void* symbol = NULL;
while (scope) {
symbol = hmap_get(&scope->symbols, name);
if (symbol) break;
scope = scope->parent;
}
return symbol;
}
void* scope_lookup_current(Scope* scope, const char* name) {
return hmap_get(&scope->symbols, name);
}

View File

@ -1,28 +0,0 @@
#ifndef SCOPE_H
#define SCOPE_H
#include "hashmap.h"
struct Scope {
HashMap symbols; // 当前作用域符号表
struct Scope* parent; // 上层作用域
int base_offset;
int cur_offset;
};
// 创建新作用域父作用域可为NULL
struct Scope* scope_create(struct Scope* parent);
// 销毁作用域
void scope_destroy(struct Scope* scope);
// 在当前作用域插入符号
void scope_insert(struct Scope* scope, const char* name, void* symbol);
// 逐级查找符号
void* scope_lookup(struct Scope* scope, const char* name);
// 仅在当前作用域查找
void* scope_lookup_current(struct Scope* scope, const char* name);
#endif

View File

@ -1,50 +0,0 @@
// symtab.c
#include "../../frontend.h"
#include <lib/core.h>
#include "scope.h"
#include "symtab.h"
typedef symtab_t symtab_t;
typedef struct Scope Scope;
void init_symtab(symtab_t* symtab) {
symtab->global_scope = scope_create(NULL);
symtab->cur_scope = symtab->global_scope;
}
void del_symtab(symtab_t* symtab) {
scope_destroy(symtab->global_scope);
}
void symtab_enter_scope(symtab_t* symtab) {
struct Scope* scope = scope_create(symtab->cur_scope);
scope->base_offset = symtab->cur_scope->base_offset + symtab->cur_scope->cur_offset;
symtab->cur_scope = scope;
}
void symtab_leave_scope(symtab_t* symtab) {
Scope * scope = symtab->cur_scope;
if (scope == NULL) {
LOG_ERROR("cannot leave NULL scope or global scope");
}
symtab->cur_scope = symtab->cur_scope->parent;
scope_destroy(scope);
}
void* symtab_add_symbol(symtab_t* symtab, const char* name, void* ast_node, int can_duplicate) {
struct Scope* scope = symtab->cur_scope;
void* node = scope_lookup_current(scope, name);
if (node != NULL) {
if (!can_duplicate) {
LOG_ERROR("duplicate symbol %s", name);
}
return node;
}
scope_insert(scope, name, ast_node);
return node;
}
void* symtab_lookup_symbol(symtab_t* symtab, const char* name) {
return scope_lookup(symtab->cur_scope, name);
}

View File

@ -1,18 +0,0 @@
// symtab.h
#ifndef __SYMTAB_H__
#define __SYMTAB_H__
typedef struct symtab {
struct Scope* cur_scope;
struct Scope* global_scope;
} symtab_t;
void init_symtab(symtab_t* symtab);
void del_symtab(symtab_t* symtab);
void symtab_enter_scope(symtab_t* symtab);
void symtab_leave_scope(symtab_t* symtab);
void* symtab_add_symbol(symtab_t* symtab, const char* name, void* ast_node, int can_duplicate);
void* symtab_lookup_symbol(symtab_t* symtab, const char* name);
#endif

View File

@ -6,6 +6,7 @@
// gcc -g ../parser.c ../../lexer/lexer.c ../ast/ast.c ../ast/block.c ../ast/decl.c ../ast/expr.c ../ast/func.c ../ast/program.c ../ast/stmt.c ../ast/term.c ../symtab/hashmap.c ../symtab/scope.c ../symtab/symtab.c test_parser.c -o test_parser
// gcc -g test_parser.c -L../.. -lfrontend -o test_parser
int main(int argc, char** argv) {
init_lib_core();
const char* file_name = "test_file.c";
if (argc == 2) {
file_name = argv[1];
@ -17,8 +18,10 @@ int main(int argc, char** argv) {
}
printf("open file success\n");
struct Lexer lexer;
init_lexer(&lexer, file_name, fp, (lexer_sread_fn)fread_s);
lexer_t lexer;
strpool_t strpool;
init_strpool(&strpool);
init_lexer(&lexer, file_name, fp, (lexer_sread_fn)fread_s, &strpool);
struct SymbolTable symtab;
init_symtab(&symtab);