feat(parser): 启用parser和ast模块并重构解析器结构

- 在cbuild.toml中启用parser和ast依赖项
- 将AST内置类型枚举重命名为SCC_AST_BUILTIN_TYPE_*前缀格式
- 修复ast_def.h中的类型字段命名,将builtin改为type
- 添加逗号操作符支持到表达式操作符枚举中
- 更新字面量表达式的lexeme字段为const char*指针和owned标志
- 重构解析器头文件结构,分离为parser.h、parser_utils.h、scc_sema.h等
- 实现新的解析器工具函数,包括预览、消费、回溯等功能
- 更新声明解析逻辑,使用新的解析器接口进行token处理
- 添加符号表语义分析功能框架
- 修复词法分析器中token移动时的空指针检查
- 统一使用scc_tree_dump_printf替代直接的scc_printf调用
This commit is contained in:
zzy
2026-03-09 15:25:12 +08:00
parent a805814d3f
commit 1fceeca011
28 changed files with 2759 additions and 1987 deletions

View File

@@ -0,0 +1,453 @@
#include <assert.h>
#include <scc_lexer.h>
#include <scc_parser.h>
#include <string.h>
#include <utest/acutest.h>
typedef scc_ast_node_t *(*scc_parse_node_func)(scc_parser_t *parser);
static scc_ast_node_t *process_input(const char *input,
scc_parse_node_func parse_func) {
int res = 0;
scc_sstream_t mem_stream;
res = scc_sstream_init_by_buffer(&mem_stream, input, strlen(input), false,
16);
Assert(res == 0);
scc_lexer_t lexer;
scc_lexer_init(&lexer, scc_sstream_to_ring(&mem_stream));
scc_lexer_tok_ring_t *tok_ring = scc_lexer_to_ring(&lexer, 8, false);
scc_parser_t parser;
scc_parser_init(&parser, tok_ring, null);
scc_ast_node_t *ret = parse_func(&parser);
cbool not_eof = false;
scc_ring_not_eof(*parser.ring, not_eof);
Assert(!not_eof == true);
scc_lexer_drop_ring(parser.ring);
scc_parser_drop(&parser);
scc_lexer_drop(&lexer);
scc_sstream_drop(&mem_stream);
return ret;
}
typedef void (*scc_tree_dump_output_t)(void *userdata, const char *fmt, ...);
#define BUFFER_SIZE (4096)
char expect_buffer[BUFFER_SIZE];
char output_buffer[BUFFER_SIZE];
static void dump2buffer(void *_buffer, const char *fmt, ...) {
char *buffer = _buffer;
va_list args;
va_start(args, fmt);
scc_vsnprintf(buffer + strlen(buffer), BUFFER_SIZE - strlen(buffer) - 1,
fmt, args);
va_end(args);
}
#define SCC_CHECK_AST(expect_node_ptr, str, parse_func) \
do { \
scc_ast_node_t *output_node_ptr = \
process_input(str, (scc_parse_node_func)parse_func); \
scc_tree_dump_ctx_t ctx; \
expect_buffer[0] = '\n', expect_buffer[1] = '\0'; \
scc_tree_dump_ctx_init(&ctx, true, dump2buffer, expect_buffer); \
scc_ast_dump_node(&ctx, expect_node_ptr); \
scc_tree_dump_ctx_drop(&ctx); \
output_buffer[0] = '\n', output_buffer[1] = '\0'; \
scc_tree_dump_ctx_init(&ctx, true, dump2buffer, output_buffer); \
scc_ast_dump_node(&ctx, output_node_ptr); \
scc_tree_dump_ctx_drop(&ctx); \
TEST_CHECK(strcmp(output_buffer, expect_buffer) == 0); \
TEST_MSG("Expected: %s", expect_buffer); \
TEST_MSG("Produced: %s", output_buffer); \
} while (0);
static void test_parser_unit(void) {
scc_ast_decl_t int_decl = {
.base.type = SCC_AST_DECL_VAR,
.var.name = "a",
.var.init = null,
.var.type = &(scc_ast_type_t){.base.type = SCC_AST_TYPE_BUILTIN,
.builtin.type = SCC_AST_BUILTIN_TYPE_INT},
};
SCC_CHECK_AST(&int_decl.base, "int a;", scc_parse_declaration);
scc_ast_decl_t func_decl = {
.base.type = SCC_AST_DECL_FUNC,
.func.name = "main",
.func.body =
&(scc_ast_stmt_t){
.base.type = SCC_AST_STMT_COMPOUND,
.compound.block_items = {0},
},
.func.type =
&(scc_ast_type_t){
.base.type = SCC_AST_TYPE_FUNCTION,
.function.is_variadic = false,
.function.param_types = {0},
.function.return_type =
&(scc_ast_type_t){.base.type = SCC_AST_TYPE_BUILTIN,
.builtin.type = SCC_AST_BUILTIN_TYPE_INT},
},
};
SCC_CHECK_AST(&func_decl.base, "int main(void) {}", scc_parse_declaration);
scc_ast_decl_t *decls[] = {&func_decl};
scc_ast_translation_unit_t tu = {
.base.type = SCC_AST_TRANSLATION_UNIT,
.declarations.data = decls,
.declarations.cap = 1,
.declarations.size = 1,
};
SCC_CHECK_AST(&tu.base, "int main(void) {}", scc_parse_translation_unit);
// SCC_CHECK_AST(&func_decl.base, "int main(void);", scc_parse_declaration);
{
scc_ast_node_t *items[] = {
(scc_ast_node_t *)&(scc_ast_stmt_t){
.base.type = SCC_AST_STMT_RETURN,
.return_stmt.expr =
&(scc_ast_expr_t){
.base.type = SCC_AST_EXPR_INT_LITERAL,
.literal.lexme = "0",
},
},
};
scc_ast_decl_t func_decl = {
.base.type = SCC_AST_DECL_FUNC,
.func.name = "main",
.func.body =
&(scc_ast_stmt_t){
.base.type = SCC_AST_STMT_COMPOUND,
.compound.block_items.cap = 1,
.compound.block_items.size = 1,
.compound.block_items.data = items,
},
.func.type =
&(scc_ast_type_t){
.base.type = SCC_AST_TYPE_FUNCTION,
.function.is_variadic = false,
.function.param_types = {0},
.function.return_type =
&(scc_ast_type_t){.base.type = SCC_AST_TYPE_BUILTIN,
.builtin.type =
SCC_AST_BUILTIN_TYPE_INT},
},
};
scc_ast_decl_t *decls[] = {&func_decl};
scc_ast_translation_unit_t tu = {
.base.type = SCC_AST_TRANSLATION_UNIT,
.declarations.cap = 1,
.declarations.size = 1,
.declarations.data = decls,
};
SCC_CHECK_AST(&tu.base, "int main(void) { return 0; }",
scc_parse_translation_unit);
}
}
static scc_ast_expr_t make_binary(scc_ast_expr_op_t op, scc_ast_expr_t *lhs,
scc_ast_expr_t *rhs) {
scc_ast_expr_t expr = {.base.type = SCC_AST_EXPR_BINARY};
expr.binary.op = op;
expr.binary.lhs = lhs;
expr.binary.rhs = rhs;
return expr;
}
static scc_ast_expr_t make_unary(scc_ast_expr_op_t op,
scc_ast_expr_t *operand) {
scc_ast_expr_t expr = {.base.type = SCC_AST_EXPR_UNARY};
expr.unary.op = op;
expr.unary.operand = operand;
return expr;
}
static scc_ast_expr_t make_identifier(char *name) {
scc_ast_expr_t expr = {.base.type = SCC_AST_EXPR_IDENTIFIER};
expr.identifier.name = name;
return expr;
}
static scc_ast_expr_t make_int_literal(char *val) {
scc_ast_expr_t expr = {.base.type = SCC_AST_EXPR_INT_LITERAL};
expr.literal.lexme = val;
return expr;
}
static scc_ast_expr_t make_float_literal(char *val) {
scc_ast_expr_t expr = {.base.type = SCC_AST_EXPR_FLOAT_LITERAL};
expr.literal.lexme = val;
return expr;
}
static scc_ast_expr_t make_string_literal(char *val) {
scc_ast_expr_t expr = {.base.type = SCC_AST_EXPR_STRING_LITERAL};
expr.literal.lexme = val;
return expr;
}
static scc_ast_expr_t make_char_literal(char *val) {
scc_ast_expr_t expr = {.base.type = SCC_AST_EXPR_CHAR_LITERAL};
expr.literal.lexme = val;
return expr;
}
static scc_ast_expr_t make_conditional(scc_ast_expr_t *cond,
scc_ast_expr_t *then_expr,
scc_ast_expr_t *else_expr) {
scc_ast_expr_t expr = {.base.type = SCC_AST_EXPR_COND};
expr.cond.cond = cond;
expr.cond.then_expr = then_expr;
expr.cond.else_expr = else_expr;
return expr;
}
static scc_ast_expr_t make_call(scc_ast_expr_t *callee,
scc_ast_expr_vec_t *args) {
scc_ast_expr_t expr = {.base.type = SCC_AST_EXPR_CALL};
expr.call.callee = callee;
// 注意args 需要提前初始化,此处简化处理,实际测试中可能需要动态分配
// 我们将在具体测试中手动初始化 args 数组
return expr;
}
static void test_parser_expression(void) {
// 1. 基本表达式:标识符、整数常量、字符串字面量、括号
{
scc_ast_expr_t ident = make_identifier("x");
SCC_CHECK_AST(&ident.base, "x", scc_parse_expression);
scc_ast_expr_t int_lit = make_int_literal("42");
SCC_CHECK_AST(&int_lit.base, "42", scc_parse_expression);
scc_ast_expr_t str_lit = make_string_literal("\"hello\"");
SCC_CHECK_AST(&str_lit.base, "\"hello\"", scc_parse_expression);
// 括号表达式
scc_ast_expr_t paren_ident = make_identifier("y");
SCC_CHECK_AST(&paren_ident.base, "(y)", scc_parse_expression);
}
// 2. 后缀表达式
{
// 数组下标a[10]
scc_ast_expr_t a = make_identifier("a");
scc_ast_expr_t index = make_int_literal("10");
scc_ast_expr_t subscript = {.base.type = SCC_AST_EXPR_ARRAY_SUBSCRIPT};
subscript.subscript.array = &a;
subscript.subscript.index = &index;
SCC_CHECK_AST(&subscript.base, "a[10]", scc_parse_expression);
// 函数调用f()
scc_ast_expr_t f = make_identifier("f");
scc_ast_expr_t call = {.base.type = SCC_AST_EXPR_CALL};
call.call.callee = &f;
scc_ast_expr_vec_t args;
scc_vec_init(args);
call.call.args = args; // 空参数列表
SCC_CHECK_AST(&call.base, "f()", scc_parse_expression);
// 函数调用带参数f(1, x)
scc_ast_expr_t f2 = make_identifier("f");
scc_ast_expr_t arg1 = make_int_literal("1");
scc_ast_expr_t arg2 = make_identifier("x");
scc_ast_expr_vec_t args2;
scc_vec_init(args2);
scc_vec_push(args2, &arg1);
scc_vec_push(args2, &arg2);
scc_ast_expr_t call2 = {.base.type = SCC_AST_EXPR_CALL};
call2.call.callee = &f2;
call2.call.args = args2;
SCC_CHECK_AST(&call2.base, "f(1, x)", scc_parse_expression);
// 成员访问 . 和 ->
scc_ast_expr_t s = make_identifier("s");
scc_ast_expr_t dot = {.base.type = SCC_AST_EXPR_MEMBER};
dot.member.base = &s;
dot.member.member_name = "field";
SCC_CHECK_AST(&dot.base, "s.field", scc_parse_expression);
scc_ast_expr_t p = make_identifier("p");
scc_ast_expr_t arrow = {.base.type = SCC_AST_EXPR_PTR_MEMBER};
arrow.ptr_member.base = &p;
arrow.ptr_member.member_name = "field";
SCC_CHECK_AST(&arrow.base, "p->field", scc_parse_expression);
// 后缀 ++/--
scc_ast_expr_t x = make_identifier("x");
scc_ast_expr_t post_inc = make_unary(SCC_AST_OP_POSTFIX_INCREMENT, &x);
SCC_CHECK_AST(&post_inc.base, "x++", scc_parse_expression);
scc_ast_expr_t post_dec = make_unary(SCC_AST_OP_POSTFIX_DECREMENT, &x);
SCC_CHECK_AST(&post_dec.base, "x--", scc_parse_expression);
// 复合字面量 TODO: (int){1,2} 需要更复杂的构造,暂略
// SCC_CHECK_AST(..., "(int){1,2}", scc_parse_expression);
}
// 3. 一元表达式
{
scc_ast_expr_t x = make_identifier("x");
scc_ast_expr_t pre_inc = make_unary(SCC_AST_OP_PREFIX_INCREMENT, &x);
SCC_CHECK_AST(&pre_inc.base, "++x", scc_parse_expression);
scc_ast_expr_t pre_dec = make_unary(SCC_AST_OP_PREFIX_DECREMENT, &x);
SCC_CHECK_AST(&pre_dec.base, "--x", scc_parse_expression);
scc_ast_expr_t addr = make_unary(SCC_AST_OP_ADDRESS_OF, &x);
SCC_CHECK_AST(&addr.base, "&x", scc_parse_expression);
scc_ast_expr_t deref = make_unary(SCC_AST_OP_INDIRECTION, &x);
SCC_CHECK_AST(&deref.base, "*x", scc_parse_expression);
scc_ast_expr_t plus = make_unary(SCC_AST_OP_UNARY_PLUS, &x);
SCC_CHECK_AST(&plus.base, "+x", scc_parse_expression);
scc_ast_expr_t minus = make_unary(SCC_AST_OP_UNARY_MINUS, &x);
SCC_CHECK_AST(&minus.base, "-x", scc_parse_expression);
scc_ast_expr_t bit_not = make_unary(SCC_AST_OP_BITWISE_NOT, &x);
SCC_CHECK_AST(&bit_not.base, "~x", scc_parse_expression);
scc_ast_expr_t log_not = make_unary(SCC_AST_OP_LOGICAL_NOT, &x);
SCC_CHECK_AST(&log_not.base, "!x", scc_parse_expression);
// sizeof 两种形式
// sizeof 表达式
scc_ast_expr_t sizeof_expr = {.base.type = SCC_AST_EXPR_SIZE_OF};
sizeof_expr.attr_of.expr = &x;
SCC_CHECK_AST(&sizeof_expr.base, "sizeof x", scc_parse_expression);
// sizeof(类型名) 需要构造类型节点暂时略用TODO
// SCC_CHECK_AST(..., "sizeof(int)", scc_parse_expression);
}
// 4. 类型转换
{
// (int)x
// 需要构造类型节点这里简化用TODO
// scc_ast_type_t int_type = { .base.type = SCC_AST_TYPE_BUILTIN,
// .builtin.type = SCC_AST_BUILTIN_TYPE_INT }; scc_ast_expr_t x =
// make_identifier("x"); scc_ast_expr_t cast = { .base.type =
// SCC_AST_EXPR_CAST }; cast.cast.type = &int_type; cast.cast.expr = &x;
// SCC_CHECK_AST(&cast.base, "(int)x", scc_parse_expression);
}
// 5. 二元运算符(按优先级测试)
{
scc_ast_expr_t a = make_identifier("a");
scc_ast_expr_t b = make_identifier("b");
scc_ast_expr_t c = make_identifier("c");
scc_ast_expr_t d = make_identifier("d");
// 乘除模优先级高于加减
scc_ast_expr_t mul = make_binary(SCC_AST_OP_MUL, &a, &b);
scc_ast_expr_t add = make_binary(SCC_AST_OP_ADD, &mul, &c);
SCC_CHECK_AST(&add.base, "a * b + c", scc_parse_expression);
// 左结合性 a - b - c => (a - b) - c
scc_ast_expr_t sub1 = make_binary(SCC_AST_OP_SUB, &a, &b);
scc_ast_expr_t sub2 = make_binary(SCC_AST_OP_SUB, &sub1, &c);
SCC_CHECK_AST(&sub2.base, "a - b - c", scc_parse_expression);
// 移位
scc_ast_expr_t shift = make_binary(SCC_AST_OP_LEFT_SHIFT, &a, &b);
SCC_CHECK_AST(&shift.base, "a << b", scc_parse_expression);
// 关系
scc_ast_expr_t lt = make_binary(SCC_AST_OP_LESS, &a, &b);
SCC_CHECK_AST(&lt.base, "a < b", scc_parse_expression);
// 相等
scc_ast_expr_t eq = make_binary(SCC_AST_OP_EQUAL, &a, &b);
SCC_CHECK_AST(&eq.base, "a == b", scc_parse_expression);
// 按位与、异或、或的优先级:& 高于 ^ 高于 |
scc_ast_expr_t bitand = make_binary(SCC_AST_OP_BITWISE_AND, &a, &b);
scc_ast_expr_t bitxor =
make_binary(SCC_AST_OP_BITWISE_XOR, &bitand, &c);
scc_ast_expr_t bitor = make_binary(SCC_AST_OP_BITWISE_OR, &bitxor, &d);
SCC_CHECK_AST(&bitor.base, "a & b ^ c | d", scc_parse_expression);
// 逻辑与、或:&& 高于 ||
scc_ast_expr_t logand = make_binary(SCC_AST_OP_LOGICAL_AND, &a, &b);
scc_ast_expr_t logor = make_binary(SCC_AST_OP_LOGICAL_OR, &logand, &c);
SCC_CHECK_AST(&logor.base, "a && b || c", scc_parse_expression);
}
// 6. 三元运算符
{
scc_ast_expr_t cond = make_identifier("a");
scc_ast_expr_t then_expr = make_identifier("b");
scc_ast_expr_t else_expr = make_identifier("c");
scc_ast_expr_t cond_expr =
make_conditional(&cond, &then_expr, &else_expr);
SCC_CHECK_AST(&cond_expr.base, "a ? b : c", scc_parse_expression);
// 右结合性 a ? b : c ? d : e => a ? b : (c ? d : e)
scc_ast_expr_t cond2 = make_identifier("c");
scc_ast_expr_t then2 = make_identifier("d");
scc_ast_expr_t else2 = make_identifier("e");
scc_ast_expr_t inner_cond = make_conditional(&cond2, &then2, &else2);
scc_ast_expr_t outer_cond =
make_conditional(&cond, &then_expr, &inner_cond);
SCC_CHECK_AST(&outer_cond.base, "a ? b : c ? d : e",
scc_parse_expression);
}
// 7. 赋值运算符(右结合)
{
// scc_ast_expr_t a = make_identifier("a");
// scc_ast_expr_t b = make_identifier("b");
// scc_ast_expr_t c = make_identifier("c");
// scc_ast_expr_t assign1 = make_binary(SCC_AST_OP_ASSIGN, &a, &b);
// scc_ast_expr_t assign2 =
// make_binary(SCC_AST_OP_ASSIGN, &assign1, &c); // a = (b = c)
// SCC_CHECK_AST(&assign2.base, "a = b = c", scc_parse_expression);
// scc_ast_expr_t add_assign = make_binary(SCC_AST_OP_ASSIGN_ADD, &a,
// &b); SCC_CHECK_AST(&add_assign.base, "a += b", scc_parse_expression);
}
// 8. 逗号运算符
{
scc_ast_expr_t a = make_identifier("a");
scc_ast_expr_t b = make_identifier("b");
scc_ast_expr_t comma1 = make_binary(SCC_AST_OP_COMMA, &a, &b);
SCC_CHECK_AST(&comma1.base, "a, b", scc_parse_expression);
}
// 9. 混合优先级测试
{
scc_ast_expr_t a = make_identifier("a");
scc_ast_expr_t b = make_identifier("b");
scc_ast_expr_t c = make_identifier("c");
scc_ast_expr_t d = make_identifier("d");
// a + b * c - d => (a + (b * c)) - d
scc_ast_expr_t mul = make_binary(SCC_AST_OP_MUL, &b, &c);
scc_ast_expr_t add = make_binary(SCC_AST_OP_ADD, &a, &mul);
scc_ast_expr_t sub = make_binary(SCC_AST_OP_SUB, &add, &d);
SCC_CHECK_AST(&sub.base, "a + b * c - d", scc_parse_expression);
// *p++ => *(p++)
scc_ast_expr_t p = make_identifier("p");
scc_ast_expr_t post_inc = make_unary(SCC_AST_OP_POSTFIX_INCREMENT, &p);
scc_ast_expr_t deref = make_unary(SCC_AST_OP_INDIRECTION, &post_inc);
SCC_CHECK_AST(&deref.base, "*p++", scc_parse_expression);
}
}
TEST_LIST = {
{"parser_unit", test_parser_unit},
{"parser_expression", test_parser_expression},
{NULL, NULL},
};