feat(frontend): 重构词法分析器
- 添加 .gitignore 文件,忽略编译器生成的二进制文件 - 重构 lexer.c 文件,改进了关键字处理和字符串处理 - 更新前端的前端、解析器和 AST 相关文件,以适应新的词法分析器 - 优化了 token 相关的定义和函数,引入了新的 token 类型
This commit is contained in:
@ -19,7 +19,7 @@ ast_node_t* parse_block(parser_t* parser) {
|
||||
symtab_enter_scope(parser->symtab);
|
||||
tok_stream_t *tokbuf = &parser->tokbuf;
|
||||
flush_peek_tok(tokbuf);
|
||||
tok_type_t ttype;
|
||||
cc_tktype_t ttype;
|
||||
ast_node_t* node = new_ast_node_block();
|
||||
|
||||
expect_pop_tok(tokbuf, TOKEN_L_BRACE);
|
||||
|
@ -37,7 +37,7 @@ int peek_decl(tok_stream_t* tokbuf) {
|
||||
|
||||
ast_node_t* parse_decl_val(parser_t* parser) {
|
||||
tok_stream_t* tokbuf = &parser->tokbuf;
|
||||
tok_type_t ttype;
|
||||
cc_tktype_t ttype;
|
||||
flush_peek_tok(tokbuf);
|
||||
|
||||
ast_node_t* node;
|
||||
@ -69,7 +69,7 @@ ast_node_t* parse_decl_val(parser_t* parser) {
|
||||
ast_node_t* parse_decl(parser_t* parser) {
|
||||
tok_stream_t* tokbuf = &parser->tokbuf;
|
||||
flush_peek_tok(tokbuf);
|
||||
tok_type_t ttype;
|
||||
cc_tktype_t ttype;
|
||||
ast_node_t* node;
|
||||
|
||||
if (peek_decl(tokbuf) == 0) {
|
||||
|
@ -82,7 +82,7 @@ static ast_node_t* parse_comma(tok_stream_t* tokbuf, symtab_t *symtab, ast_node_
|
||||
|
||||
static ast_node_t* parse_assign(tok_stream_t* tokbuf, symtab_t *symtab, ast_node_t* left) {
|
||||
flush_peek_tok(tokbuf);
|
||||
tok_type_t ttype = peek_tok_type(tokbuf);
|
||||
cc_tktype_t ttype = peek_tok_type(tokbuf);
|
||||
pop_tok(tokbuf);
|
||||
ast_node_t* node = new_ast_node();
|
||||
node->type = NT_ASSIGN;
|
||||
@ -133,7 +133,7 @@ static ast_node_t* parse_assign(tok_stream_t* tokbuf, symtab_t *symtab, ast_node
|
||||
|
||||
static ast_node_t* parse_cmp(tok_stream_t* tokbuf, symtab_t *symtab, ast_node_t* left) {
|
||||
flush_peek_tok(tokbuf);
|
||||
tok_type_t ttype = peek_tok_type(tokbuf);
|
||||
cc_tktype_t ttype = peek_tok_type(tokbuf);
|
||||
pop_tok(tokbuf);
|
||||
ast_node_t* node = new_ast_node();
|
||||
// saved left
|
||||
@ -171,7 +171,7 @@ static ast_node_t* parse_cmp(tok_stream_t* tokbuf, symtab_t *symtab, ast_node_t*
|
||||
|
||||
static ast_node_t* parse_cal(tok_stream_t* tokbuf, symtab_t *symtab, ast_node_t* left) {
|
||||
flush_peek_tok(tokbuf);
|
||||
tok_type_t ttype = peek_tok_type(tokbuf);
|
||||
cc_tktype_t ttype = peek_tok_type(tokbuf);
|
||||
pop_tok(tokbuf);
|
||||
ast_node_t* node = new_ast_node();
|
||||
node->expr.left = left;
|
||||
@ -238,7 +238,7 @@ static ast_node_t* parse_call(tok_stream_t* tokbuf, symtab_t *symtab, ast_node_t
|
||||
vector_init(node->call.params->params.params);
|
||||
pop_tok(tokbuf); // 跳过 '('
|
||||
|
||||
tok_type_t ttype;
|
||||
cc_tktype_t ttype;
|
||||
while (1) {
|
||||
flush_peek_tok(tokbuf);
|
||||
ttype = peek_tok_type(tokbuf);
|
||||
@ -330,7 +330,7 @@ static ast_node_t *parse_primary_expression(tok_stream_t* tokbuf, symtab_t *symt
|
||||
node->type = NT_TERM_VAL;
|
||||
node->syms.tok = *tok;
|
||||
|
||||
switch (tok->type) {
|
||||
switch (tok->sub_type) {
|
||||
case TOKEN_INT_LITERAL:
|
||||
// node->data.data_type = TYPE_INT;
|
||||
break;
|
||||
@ -344,7 +344,7 @@ static ast_node_t *parse_primary_expression(tok_stream_t* tokbuf, symtab_t *symt
|
||||
// node->data.data_type = TYPE_POINTER;
|
||||
case TOKEN_IDENT:
|
||||
node = expect_pop_ident(tokbuf);
|
||||
tok_type_t ttype = peek_tok_type(tokbuf);
|
||||
cc_tktype_t ttype = peek_tok_type(tokbuf);
|
||||
if (ttype == TOKEN_L_PAREN) {
|
||||
node = parse_call(tokbuf, symtab, node);
|
||||
} else {
|
||||
@ -365,7 +365,7 @@ END:
|
||||
}
|
||||
|
||||
static ast_node_t *parse_subexpression(tok_stream_t* tokbuf, symtab_t *symtab, enum Precedence prec) {
|
||||
tok_type_t ttype;
|
||||
cc_tktype_t ttype;
|
||||
struct expr_prec_table_t* work;
|
||||
ast_node_t* left;
|
||||
|
||||
@ -400,7 +400,7 @@ ast_node_t* parse_expr(parser_t* parser) {
|
||||
tok_stream_t* tokbuf = &(parser->tokbuf);
|
||||
symtab_t *symtab = parser->symtab;
|
||||
flush_peek_tok(tokbuf);
|
||||
tok_type_t ttype = peek_tok_type(tokbuf);
|
||||
cc_tktype_t ttype = peek_tok_type(tokbuf);
|
||||
switch (ttype) {
|
||||
case TOKEN_NOT:
|
||||
case TOKEN_AND:
|
||||
|
@ -9,7 +9,7 @@
|
||||
// TODO 语义分析压入符号表
|
||||
static void parse_params(parser_t* parser, tok_stream_t* cache, ast_node_t* node) {
|
||||
flush_peek_tok(cache);
|
||||
tok_type_t ttype;
|
||||
cc_tktype_t ttype;
|
||||
ast_node_t *params = new_ast_node();
|
||||
node->decl_func.params = params;
|
||||
vector_init(params->params.params);
|
||||
@ -79,7 +79,7 @@ ast_type_t check_is_func_decl(tok_stream_t* tokbuf, tok_stream_t* cache) {
|
||||
LOG_ERROR("function parameter list too long");
|
||||
}
|
||||
cache->buf[cache->size++] = *tok;
|
||||
switch (tok->type) {
|
||||
switch (tok->sub_type) {
|
||||
case TOKEN_L_PAREN:
|
||||
depth++;
|
||||
break;
|
||||
|
@ -4,7 +4,7 @@
|
||||
ast_node_t* parse_stmt(parser_t* parser) {
|
||||
tok_stream_t* tokbuf = &parser->tokbuf;
|
||||
flush_peek_tok(tokbuf);
|
||||
tok_type_t ttype = peek_tok_type(tokbuf);
|
||||
cc_tktype_t ttype = peek_tok_type(tokbuf);
|
||||
ast_node_t* node = new_ast_node();
|
||||
switch (ttype) {
|
||||
case TOKEN_IF: {
|
||||
|
@ -3,8 +3,8 @@
|
||||
#include "../type.h"
|
||||
|
||||
ast_node_t* new_ast_ident_node(tok_t* tok) {
|
||||
if (tok->type != TOKEN_IDENT) {
|
||||
LOG_ERROR("syntax error: want identifier but got %d", tok->type);
|
||||
if (tok->sub_type != TOKEN_IDENT) {
|
||||
LOG_ERROR("syntax error: want identifier but got %d", tok->sub_type);
|
||||
}
|
||||
ast_node_t* node = new_ast_node();
|
||||
node->type = NT_TERM_IDENT;
|
||||
@ -24,7 +24,7 @@ ast_node_t* expect_pop_ident(tok_stream_t* tokbuf) {
|
||||
ast_node_t* parse_type(parser_t* parser) {
|
||||
tok_stream_t* tokbuf = &parser->tokbuf;
|
||||
flush_peek_tok(tokbuf);
|
||||
tok_type_t ttype = peek_tok_type(tokbuf);
|
||||
cc_tktype_t ttype = peek_tok_type(tokbuf);
|
||||
data_type_t dtype;
|
||||
switch(ttype) {
|
||||
case TOKEN_VOID: dtype = TYPE_VOID; break;
|
||||
|
@ -1,53 +0,0 @@
|
||||
// hashmap.c
|
||||
#include "hashmap.h"
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
// DJB2哈希算法
|
||||
static unsigned long hash(const char* str) {
|
||||
unsigned long hash = 5381;
|
||||
int c;
|
||||
while ((c = *str++))
|
||||
hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
|
||||
return hash % HMAP_SIZE;
|
||||
}
|
||||
|
||||
void hmap_init(HashMap* map) {
|
||||
memset(map->buckets, 0, sizeof(map->buckets));
|
||||
}
|
||||
|
||||
void hmap_put(HashMap* map, const char* key, void* value) {
|
||||
unsigned long idx = hash(key);
|
||||
HashMapEntry* entry = malloc(sizeof(HashMapEntry));
|
||||
entry->key = strdup(key);
|
||||
entry->value = value;
|
||||
entry->next = map->buckets[idx];
|
||||
map->buckets[idx] = entry;
|
||||
}
|
||||
|
||||
void* hmap_get(HashMap* map, const char* key) {
|
||||
unsigned long idx = hash(key);
|
||||
HashMapEntry* entry = map->buckets[idx];
|
||||
while (entry) {
|
||||
if (strcmp(entry->key, key) == 0)
|
||||
return entry->value;
|
||||
entry = entry->next;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int hmap_contains(HashMap* map, const char* key) {
|
||||
return hmap_get(map, key) != NULL;
|
||||
}
|
||||
|
||||
void hmap_destroy(HashMap* map) {
|
||||
for (int i = 0; i < HMAP_SIZE; i++) {
|
||||
HashMapEntry* entry = map->buckets[i];
|
||||
while (entry) {
|
||||
HashMapEntry* next = entry->next;
|
||||
free(entry->key);
|
||||
free(entry);
|
||||
entry = next;
|
||||
}
|
||||
}
|
||||
}
|
@ -1,31 +0,0 @@
|
||||
#ifndef HASHMAP_H
|
||||
#define HASHMAP_H
|
||||
|
||||
#define HMAP_SIZE 64
|
||||
|
||||
typedef struct HashMapEntry {
|
||||
char* key;
|
||||
void* value;
|
||||
struct HashMapEntry* next;
|
||||
} HashMapEntry;
|
||||
|
||||
typedef struct {
|
||||
HashMapEntry* buckets[HMAP_SIZE];
|
||||
} HashMap;
|
||||
|
||||
// 初始化哈希表
|
||||
void hmap_init(HashMap* map);
|
||||
|
||||
// 插入键值对
|
||||
void hmap_put(HashMap* map, const char* key, void* value);
|
||||
|
||||
// 查找键值
|
||||
void* hmap_get(HashMap* map, const char* key);
|
||||
|
||||
// 检查键是否存在
|
||||
int hmap_contains(HashMap* map, const char* key);
|
||||
|
||||
// 释放哈希表内存(不释放value)
|
||||
void hmap_destroy(HashMap* map);
|
||||
|
||||
#endif
|
@ -1,43 +0,0 @@
|
||||
// scope.c
|
||||
#include "scope.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
typedef struct Scope Scope;
|
||||
|
||||
Scope* scope_create(Scope* parent) {
|
||||
Scope* scope = malloc(sizeof(Scope));
|
||||
hmap_init(&scope->symbols);
|
||||
scope->parent = parent;
|
||||
scope->base_offset = 0;
|
||||
scope->cur_offset = 0;
|
||||
return scope;
|
||||
}
|
||||
|
||||
void scope_destroy(Scope* scope) {
|
||||
hmap_destroy(&scope->symbols);
|
||||
free(scope);
|
||||
}
|
||||
|
||||
void scope_insert(Scope* scope, const char* name, void* symbol) {
|
||||
if (hmap_contains(&scope->symbols, name)) {
|
||||
// 处理重复定义错误
|
||||
fprintf(stderr, "Error: Symbol '%s' already defined\n", name);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
hmap_put(&scope->symbols, name, symbol);
|
||||
}
|
||||
|
||||
void* scope_lookup(Scope* scope, const char* name) {
|
||||
void* symbol = NULL;
|
||||
while (scope) {
|
||||
symbol = hmap_get(&scope->symbols, name);
|
||||
if (symbol) break;
|
||||
scope = scope->parent;
|
||||
}
|
||||
return symbol;
|
||||
}
|
||||
|
||||
void* scope_lookup_current(Scope* scope, const char* name) {
|
||||
return hmap_get(&scope->symbols, name);
|
||||
}
|
@ -1,28 +0,0 @@
|
||||
#ifndef SCOPE_H
|
||||
#define SCOPE_H
|
||||
|
||||
#include "hashmap.h"
|
||||
|
||||
struct Scope {
|
||||
HashMap symbols; // 当前作用域符号表
|
||||
struct Scope* parent; // 上层作用域
|
||||
int base_offset;
|
||||
int cur_offset;
|
||||
};
|
||||
|
||||
// 创建新作用域(父作用域可为NULL)
|
||||
struct Scope* scope_create(struct Scope* parent);
|
||||
|
||||
// 销毁作用域
|
||||
void scope_destroy(struct Scope* scope);
|
||||
|
||||
// 在当前作用域插入符号
|
||||
void scope_insert(struct Scope* scope, const char* name, void* symbol);
|
||||
|
||||
// 逐级查找符号
|
||||
void* scope_lookup(struct Scope* scope, const char* name);
|
||||
|
||||
// 仅在当前作用域查找
|
||||
void* scope_lookup_current(struct Scope* scope, const char* name);
|
||||
|
||||
#endif
|
@ -1,50 +0,0 @@
|
||||
// symtab.c
|
||||
#include "../../frontend.h"
|
||||
#include <lib/core.h>
|
||||
#include "scope.h"
|
||||
#include "symtab.h"
|
||||
|
||||
typedef symtab_t symtab_t;
|
||||
typedef struct Scope Scope;
|
||||
|
||||
void init_symtab(symtab_t* symtab) {
|
||||
symtab->global_scope = scope_create(NULL);
|
||||
symtab->cur_scope = symtab->global_scope;
|
||||
}
|
||||
|
||||
void del_symtab(symtab_t* symtab) {
|
||||
scope_destroy(symtab->global_scope);
|
||||
}
|
||||
|
||||
void symtab_enter_scope(symtab_t* symtab) {
|
||||
struct Scope* scope = scope_create(symtab->cur_scope);
|
||||
scope->base_offset = symtab->cur_scope->base_offset + symtab->cur_scope->cur_offset;
|
||||
symtab->cur_scope = scope;
|
||||
}
|
||||
|
||||
void symtab_leave_scope(symtab_t* symtab) {
|
||||
Scope * scope = symtab->cur_scope;
|
||||
if (scope == NULL) {
|
||||
LOG_ERROR("cannot leave NULL scope or global scope");
|
||||
}
|
||||
symtab->cur_scope = symtab->cur_scope->parent;
|
||||
scope_destroy(scope);
|
||||
}
|
||||
|
||||
void* symtab_add_symbol(symtab_t* symtab, const char* name, void* ast_node, int can_duplicate) {
|
||||
struct Scope* scope = symtab->cur_scope;
|
||||
void* node = scope_lookup_current(scope, name);
|
||||
if (node != NULL) {
|
||||
if (!can_duplicate) {
|
||||
LOG_ERROR("duplicate symbol %s", name);
|
||||
}
|
||||
return node;
|
||||
}
|
||||
|
||||
scope_insert(scope, name, ast_node);
|
||||
return node;
|
||||
}
|
||||
|
||||
void* symtab_lookup_symbol(symtab_t* symtab, const char* name) {
|
||||
return scope_lookup(symtab->cur_scope, name);
|
||||
}
|
@ -1,18 +0,0 @@
|
||||
// symtab.h
|
||||
#ifndef __SYMTAB_H__
|
||||
#define __SYMTAB_H__
|
||||
|
||||
typedef struct symtab {
|
||||
struct Scope* cur_scope;
|
||||
struct Scope* global_scope;
|
||||
} symtab_t;
|
||||
|
||||
void init_symtab(symtab_t* symtab);
|
||||
void del_symtab(symtab_t* symtab);
|
||||
|
||||
void symtab_enter_scope(symtab_t* symtab);
|
||||
void symtab_leave_scope(symtab_t* symtab);
|
||||
void* symtab_add_symbol(symtab_t* symtab, const char* name, void* ast_node, int can_duplicate);
|
||||
void* symtab_lookup_symbol(symtab_t* symtab, const char* name);
|
||||
|
||||
#endif
|
@ -6,6 +6,7 @@
|
||||
// gcc -g ../parser.c ../../lexer/lexer.c ../ast/ast.c ../ast/block.c ../ast/decl.c ../ast/expr.c ../ast/func.c ../ast/program.c ../ast/stmt.c ../ast/term.c ../symtab/hashmap.c ../symtab/scope.c ../symtab/symtab.c test_parser.c -o test_parser
|
||||
// gcc -g test_parser.c -L../.. -lfrontend -o test_parser
|
||||
int main(int argc, char** argv) {
|
||||
init_lib_core();
|
||||
const char* file_name = "test_file.c";
|
||||
if (argc == 2) {
|
||||
file_name = argv[1];
|
||||
@ -17,8 +18,10 @@ int main(int argc, char** argv) {
|
||||
}
|
||||
printf("open file success\n");
|
||||
|
||||
struct Lexer lexer;
|
||||
init_lexer(&lexer, file_name, fp, (lexer_sread_fn)fread_s);
|
||||
lexer_t lexer;
|
||||
strpool_t strpool;
|
||||
init_strpool(&strpool);
|
||||
init_lexer(&lexer, file_name, fp, (lexer_sread_fn)fread_s, &strpool);
|
||||
|
||||
struct SymbolTable symtab;
|
||||
init_symtab(&symtab);
|
||||
|
Reference in New Issue
Block a user