Files
scc/libs/parser/tests/test_parser_unit.c
zzy 1fceeca011 feat(parser): 启用parser和ast模块并重构解析器结构
- 在cbuild.toml中启用parser和ast依赖项
- 将AST内置类型枚举重命名为SCC_AST_BUILTIN_TYPE_*前缀格式
- 修复ast_def.h中的类型字段命名,将builtin改为type
- 添加逗号操作符支持到表达式操作符枚举中
- 更新字面量表达式的lexeme字段为const char*指针和owned标志
- 重构解析器头文件结构,分离为parser.h、parser_utils.h、scc_sema.h等
- 实现新的解析器工具函数,包括预览、消费、回溯等功能
- 更新声明解析逻辑,使用新的解析器接口进行token处理
- 添加符号表语义分析功能框架
- 修复词法分析器中token移动时的空指针检查
- 统一使用scc_tree_dump_printf替代直接的scc_printf调用
2026-03-09 15:25:12 +08:00

453 lines
18 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#include <assert.h>
#include <scc_lexer.h>
#include <scc_parser.h>
#include <string.h>
#include <utest/acutest.h>
typedef scc_ast_node_t *(*scc_parse_node_func)(scc_parser_t *parser);
static scc_ast_node_t *process_input(const char *input,
scc_parse_node_func parse_func) {
int res = 0;
scc_sstream_t mem_stream;
res = scc_sstream_init_by_buffer(&mem_stream, input, strlen(input), false,
16);
Assert(res == 0);
scc_lexer_t lexer;
scc_lexer_init(&lexer, scc_sstream_to_ring(&mem_stream));
scc_lexer_tok_ring_t *tok_ring = scc_lexer_to_ring(&lexer, 8, false);
scc_parser_t parser;
scc_parser_init(&parser, tok_ring, null);
scc_ast_node_t *ret = parse_func(&parser);
cbool not_eof = false;
scc_ring_not_eof(*parser.ring, not_eof);
Assert(!not_eof == true);
scc_lexer_drop_ring(parser.ring);
scc_parser_drop(&parser);
scc_lexer_drop(&lexer);
scc_sstream_drop(&mem_stream);
return ret;
}
typedef void (*scc_tree_dump_output_t)(void *userdata, const char *fmt, ...);
#define BUFFER_SIZE (4096)
char expect_buffer[BUFFER_SIZE];
char output_buffer[BUFFER_SIZE];
static void dump2buffer(void *_buffer, const char *fmt, ...) {
char *buffer = _buffer;
va_list args;
va_start(args, fmt);
scc_vsnprintf(buffer + strlen(buffer), BUFFER_SIZE - strlen(buffer) - 1,
fmt, args);
va_end(args);
}
#define SCC_CHECK_AST(expect_node_ptr, str, parse_func) \
do { \
scc_ast_node_t *output_node_ptr = \
process_input(str, (scc_parse_node_func)parse_func); \
scc_tree_dump_ctx_t ctx; \
expect_buffer[0] = '\n', expect_buffer[1] = '\0'; \
scc_tree_dump_ctx_init(&ctx, true, dump2buffer, expect_buffer); \
scc_ast_dump_node(&ctx, expect_node_ptr); \
scc_tree_dump_ctx_drop(&ctx); \
output_buffer[0] = '\n', output_buffer[1] = '\0'; \
scc_tree_dump_ctx_init(&ctx, true, dump2buffer, output_buffer); \
scc_ast_dump_node(&ctx, output_node_ptr); \
scc_tree_dump_ctx_drop(&ctx); \
TEST_CHECK(strcmp(output_buffer, expect_buffer) == 0); \
TEST_MSG("Expected: %s", expect_buffer); \
TEST_MSG("Produced: %s", output_buffer); \
} while (0);
static void test_parser_unit(void) {
scc_ast_decl_t int_decl = {
.base.type = SCC_AST_DECL_VAR,
.var.name = "a",
.var.init = null,
.var.type = &(scc_ast_type_t){.base.type = SCC_AST_TYPE_BUILTIN,
.builtin.type = SCC_AST_BUILTIN_TYPE_INT},
};
SCC_CHECK_AST(&int_decl.base, "int a;", scc_parse_declaration);
scc_ast_decl_t func_decl = {
.base.type = SCC_AST_DECL_FUNC,
.func.name = "main",
.func.body =
&(scc_ast_stmt_t){
.base.type = SCC_AST_STMT_COMPOUND,
.compound.block_items = {0},
},
.func.type =
&(scc_ast_type_t){
.base.type = SCC_AST_TYPE_FUNCTION,
.function.is_variadic = false,
.function.param_types = {0},
.function.return_type =
&(scc_ast_type_t){.base.type = SCC_AST_TYPE_BUILTIN,
.builtin.type = SCC_AST_BUILTIN_TYPE_INT},
},
};
SCC_CHECK_AST(&func_decl.base, "int main(void) {}", scc_parse_declaration);
scc_ast_decl_t *decls[] = {&func_decl};
scc_ast_translation_unit_t tu = {
.base.type = SCC_AST_TRANSLATION_UNIT,
.declarations.data = decls,
.declarations.cap = 1,
.declarations.size = 1,
};
SCC_CHECK_AST(&tu.base, "int main(void) {}", scc_parse_translation_unit);
// SCC_CHECK_AST(&func_decl.base, "int main(void);", scc_parse_declaration);
{
scc_ast_node_t *items[] = {
(scc_ast_node_t *)&(scc_ast_stmt_t){
.base.type = SCC_AST_STMT_RETURN,
.return_stmt.expr =
&(scc_ast_expr_t){
.base.type = SCC_AST_EXPR_INT_LITERAL,
.literal.lexme = "0",
},
},
};
scc_ast_decl_t func_decl = {
.base.type = SCC_AST_DECL_FUNC,
.func.name = "main",
.func.body =
&(scc_ast_stmt_t){
.base.type = SCC_AST_STMT_COMPOUND,
.compound.block_items.cap = 1,
.compound.block_items.size = 1,
.compound.block_items.data = items,
},
.func.type =
&(scc_ast_type_t){
.base.type = SCC_AST_TYPE_FUNCTION,
.function.is_variadic = false,
.function.param_types = {0},
.function.return_type =
&(scc_ast_type_t){.base.type = SCC_AST_TYPE_BUILTIN,
.builtin.type =
SCC_AST_BUILTIN_TYPE_INT},
},
};
scc_ast_decl_t *decls[] = {&func_decl};
scc_ast_translation_unit_t tu = {
.base.type = SCC_AST_TRANSLATION_UNIT,
.declarations.cap = 1,
.declarations.size = 1,
.declarations.data = decls,
};
SCC_CHECK_AST(&tu.base, "int main(void) { return 0; }",
scc_parse_translation_unit);
}
}
static scc_ast_expr_t make_binary(scc_ast_expr_op_t op, scc_ast_expr_t *lhs,
scc_ast_expr_t *rhs) {
scc_ast_expr_t expr = {.base.type = SCC_AST_EXPR_BINARY};
expr.binary.op = op;
expr.binary.lhs = lhs;
expr.binary.rhs = rhs;
return expr;
}
static scc_ast_expr_t make_unary(scc_ast_expr_op_t op,
scc_ast_expr_t *operand) {
scc_ast_expr_t expr = {.base.type = SCC_AST_EXPR_UNARY};
expr.unary.op = op;
expr.unary.operand = operand;
return expr;
}
static scc_ast_expr_t make_identifier(char *name) {
scc_ast_expr_t expr = {.base.type = SCC_AST_EXPR_IDENTIFIER};
expr.identifier.name = name;
return expr;
}
static scc_ast_expr_t make_int_literal(char *val) {
scc_ast_expr_t expr = {.base.type = SCC_AST_EXPR_INT_LITERAL};
expr.literal.lexme = val;
return expr;
}
static scc_ast_expr_t make_float_literal(char *val) {
scc_ast_expr_t expr = {.base.type = SCC_AST_EXPR_FLOAT_LITERAL};
expr.literal.lexme = val;
return expr;
}
static scc_ast_expr_t make_string_literal(char *val) {
scc_ast_expr_t expr = {.base.type = SCC_AST_EXPR_STRING_LITERAL};
expr.literal.lexme = val;
return expr;
}
static scc_ast_expr_t make_char_literal(char *val) {
scc_ast_expr_t expr = {.base.type = SCC_AST_EXPR_CHAR_LITERAL};
expr.literal.lexme = val;
return expr;
}
static scc_ast_expr_t make_conditional(scc_ast_expr_t *cond,
scc_ast_expr_t *then_expr,
scc_ast_expr_t *else_expr) {
scc_ast_expr_t expr = {.base.type = SCC_AST_EXPR_COND};
expr.cond.cond = cond;
expr.cond.then_expr = then_expr;
expr.cond.else_expr = else_expr;
return expr;
}
static scc_ast_expr_t make_call(scc_ast_expr_t *callee,
scc_ast_expr_vec_t *args) {
scc_ast_expr_t expr = {.base.type = SCC_AST_EXPR_CALL};
expr.call.callee = callee;
// 注意args 需要提前初始化,此处简化处理,实际测试中可能需要动态分配
// 我们将在具体测试中手动初始化 args 数组
return expr;
}
static void test_parser_expression(void) {
// 1. 基本表达式:标识符、整数常量、字符串字面量、括号
{
scc_ast_expr_t ident = make_identifier("x");
SCC_CHECK_AST(&ident.base, "x", scc_parse_expression);
scc_ast_expr_t int_lit = make_int_literal("42");
SCC_CHECK_AST(&int_lit.base, "42", scc_parse_expression);
scc_ast_expr_t str_lit = make_string_literal("\"hello\"");
SCC_CHECK_AST(&str_lit.base, "\"hello\"", scc_parse_expression);
// 括号表达式
scc_ast_expr_t paren_ident = make_identifier("y");
SCC_CHECK_AST(&paren_ident.base, "(y)", scc_parse_expression);
}
// 2. 后缀表达式
{
// 数组下标a[10]
scc_ast_expr_t a = make_identifier("a");
scc_ast_expr_t index = make_int_literal("10");
scc_ast_expr_t subscript = {.base.type = SCC_AST_EXPR_ARRAY_SUBSCRIPT};
subscript.subscript.array = &a;
subscript.subscript.index = &index;
SCC_CHECK_AST(&subscript.base, "a[10]", scc_parse_expression);
// 函数调用f()
scc_ast_expr_t f = make_identifier("f");
scc_ast_expr_t call = {.base.type = SCC_AST_EXPR_CALL};
call.call.callee = &f;
scc_ast_expr_vec_t args;
scc_vec_init(args);
call.call.args = args; // 空参数列表
SCC_CHECK_AST(&call.base, "f()", scc_parse_expression);
// 函数调用带参数f(1, x)
scc_ast_expr_t f2 = make_identifier("f");
scc_ast_expr_t arg1 = make_int_literal("1");
scc_ast_expr_t arg2 = make_identifier("x");
scc_ast_expr_vec_t args2;
scc_vec_init(args2);
scc_vec_push(args2, &arg1);
scc_vec_push(args2, &arg2);
scc_ast_expr_t call2 = {.base.type = SCC_AST_EXPR_CALL};
call2.call.callee = &f2;
call2.call.args = args2;
SCC_CHECK_AST(&call2.base, "f(1, x)", scc_parse_expression);
// 成员访问 . 和 ->
scc_ast_expr_t s = make_identifier("s");
scc_ast_expr_t dot = {.base.type = SCC_AST_EXPR_MEMBER};
dot.member.base = &s;
dot.member.member_name = "field";
SCC_CHECK_AST(&dot.base, "s.field", scc_parse_expression);
scc_ast_expr_t p = make_identifier("p");
scc_ast_expr_t arrow = {.base.type = SCC_AST_EXPR_PTR_MEMBER};
arrow.ptr_member.base = &p;
arrow.ptr_member.member_name = "field";
SCC_CHECK_AST(&arrow.base, "p->field", scc_parse_expression);
// 后缀 ++/--
scc_ast_expr_t x = make_identifier("x");
scc_ast_expr_t post_inc = make_unary(SCC_AST_OP_POSTFIX_INCREMENT, &x);
SCC_CHECK_AST(&post_inc.base, "x++", scc_parse_expression);
scc_ast_expr_t post_dec = make_unary(SCC_AST_OP_POSTFIX_DECREMENT, &x);
SCC_CHECK_AST(&post_dec.base, "x--", scc_parse_expression);
// 复合字面量 TODO: (int){1,2} 需要更复杂的构造,暂略
// SCC_CHECK_AST(..., "(int){1,2}", scc_parse_expression);
}
// 3. 一元表达式
{
scc_ast_expr_t x = make_identifier("x");
scc_ast_expr_t pre_inc = make_unary(SCC_AST_OP_PREFIX_INCREMENT, &x);
SCC_CHECK_AST(&pre_inc.base, "++x", scc_parse_expression);
scc_ast_expr_t pre_dec = make_unary(SCC_AST_OP_PREFIX_DECREMENT, &x);
SCC_CHECK_AST(&pre_dec.base, "--x", scc_parse_expression);
scc_ast_expr_t addr = make_unary(SCC_AST_OP_ADDRESS_OF, &x);
SCC_CHECK_AST(&addr.base, "&x", scc_parse_expression);
scc_ast_expr_t deref = make_unary(SCC_AST_OP_INDIRECTION, &x);
SCC_CHECK_AST(&deref.base, "*x", scc_parse_expression);
scc_ast_expr_t plus = make_unary(SCC_AST_OP_UNARY_PLUS, &x);
SCC_CHECK_AST(&plus.base, "+x", scc_parse_expression);
scc_ast_expr_t minus = make_unary(SCC_AST_OP_UNARY_MINUS, &x);
SCC_CHECK_AST(&minus.base, "-x", scc_parse_expression);
scc_ast_expr_t bit_not = make_unary(SCC_AST_OP_BITWISE_NOT, &x);
SCC_CHECK_AST(&bit_not.base, "~x", scc_parse_expression);
scc_ast_expr_t log_not = make_unary(SCC_AST_OP_LOGICAL_NOT, &x);
SCC_CHECK_AST(&log_not.base, "!x", scc_parse_expression);
// sizeof 两种形式
// sizeof 表达式
scc_ast_expr_t sizeof_expr = {.base.type = SCC_AST_EXPR_SIZE_OF};
sizeof_expr.attr_of.expr = &x;
SCC_CHECK_AST(&sizeof_expr.base, "sizeof x", scc_parse_expression);
// sizeof(类型名) 需要构造类型节点暂时略用TODO
// SCC_CHECK_AST(..., "sizeof(int)", scc_parse_expression);
}
// 4. 类型转换
{
// (int)x
// 需要构造类型节点这里简化用TODO
// scc_ast_type_t int_type = { .base.type = SCC_AST_TYPE_BUILTIN,
// .builtin.type = SCC_AST_BUILTIN_TYPE_INT }; scc_ast_expr_t x =
// make_identifier("x"); scc_ast_expr_t cast = { .base.type =
// SCC_AST_EXPR_CAST }; cast.cast.type = &int_type; cast.cast.expr = &x;
// SCC_CHECK_AST(&cast.base, "(int)x", scc_parse_expression);
}
// 5. 二元运算符(按优先级测试)
{
scc_ast_expr_t a = make_identifier("a");
scc_ast_expr_t b = make_identifier("b");
scc_ast_expr_t c = make_identifier("c");
scc_ast_expr_t d = make_identifier("d");
// 乘除模优先级高于加减
scc_ast_expr_t mul = make_binary(SCC_AST_OP_MUL, &a, &b);
scc_ast_expr_t add = make_binary(SCC_AST_OP_ADD, &mul, &c);
SCC_CHECK_AST(&add.base, "a * b + c", scc_parse_expression);
// 左结合性 a - b - c => (a - b) - c
scc_ast_expr_t sub1 = make_binary(SCC_AST_OP_SUB, &a, &b);
scc_ast_expr_t sub2 = make_binary(SCC_AST_OP_SUB, &sub1, &c);
SCC_CHECK_AST(&sub2.base, "a - b - c", scc_parse_expression);
// 移位
scc_ast_expr_t shift = make_binary(SCC_AST_OP_LEFT_SHIFT, &a, &b);
SCC_CHECK_AST(&shift.base, "a << b", scc_parse_expression);
// 关系
scc_ast_expr_t lt = make_binary(SCC_AST_OP_LESS, &a, &b);
SCC_CHECK_AST(&lt.base, "a < b", scc_parse_expression);
// 相等
scc_ast_expr_t eq = make_binary(SCC_AST_OP_EQUAL, &a, &b);
SCC_CHECK_AST(&eq.base, "a == b", scc_parse_expression);
// 按位与、异或、或的优先级:& 高于 ^ 高于 |
scc_ast_expr_t bitand = make_binary(SCC_AST_OP_BITWISE_AND, &a, &b);
scc_ast_expr_t bitxor =
make_binary(SCC_AST_OP_BITWISE_XOR, &bitand, &c);
scc_ast_expr_t bitor = make_binary(SCC_AST_OP_BITWISE_OR, &bitxor, &d);
SCC_CHECK_AST(&bitor.base, "a & b ^ c | d", scc_parse_expression);
// 逻辑与、或:&& 高于 ||
scc_ast_expr_t logand = make_binary(SCC_AST_OP_LOGICAL_AND, &a, &b);
scc_ast_expr_t logor = make_binary(SCC_AST_OP_LOGICAL_OR, &logand, &c);
SCC_CHECK_AST(&logor.base, "a && b || c", scc_parse_expression);
}
// 6. 三元运算符
{
scc_ast_expr_t cond = make_identifier("a");
scc_ast_expr_t then_expr = make_identifier("b");
scc_ast_expr_t else_expr = make_identifier("c");
scc_ast_expr_t cond_expr =
make_conditional(&cond, &then_expr, &else_expr);
SCC_CHECK_AST(&cond_expr.base, "a ? b : c", scc_parse_expression);
// 右结合性 a ? b : c ? d : e => a ? b : (c ? d : e)
scc_ast_expr_t cond2 = make_identifier("c");
scc_ast_expr_t then2 = make_identifier("d");
scc_ast_expr_t else2 = make_identifier("e");
scc_ast_expr_t inner_cond = make_conditional(&cond2, &then2, &else2);
scc_ast_expr_t outer_cond =
make_conditional(&cond, &then_expr, &inner_cond);
SCC_CHECK_AST(&outer_cond.base, "a ? b : c ? d : e",
scc_parse_expression);
}
// 7. 赋值运算符(右结合)
{
// scc_ast_expr_t a = make_identifier("a");
// scc_ast_expr_t b = make_identifier("b");
// scc_ast_expr_t c = make_identifier("c");
// scc_ast_expr_t assign1 = make_binary(SCC_AST_OP_ASSIGN, &a, &b);
// scc_ast_expr_t assign2 =
// make_binary(SCC_AST_OP_ASSIGN, &assign1, &c); // a = (b = c)
// SCC_CHECK_AST(&assign2.base, "a = b = c", scc_parse_expression);
// scc_ast_expr_t add_assign = make_binary(SCC_AST_OP_ASSIGN_ADD, &a,
// &b); SCC_CHECK_AST(&add_assign.base, "a += b", scc_parse_expression);
}
// 8. 逗号运算符
{
scc_ast_expr_t a = make_identifier("a");
scc_ast_expr_t b = make_identifier("b");
scc_ast_expr_t comma1 = make_binary(SCC_AST_OP_COMMA, &a, &b);
SCC_CHECK_AST(&comma1.base, "a, b", scc_parse_expression);
}
// 9. 混合优先级测试
{
scc_ast_expr_t a = make_identifier("a");
scc_ast_expr_t b = make_identifier("b");
scc_ast_expr_t c = make_identifier("c");
scc_ast_expr_t d = make_identifier("d");
// a + b * c - d => (a + (b * c)) - d
scc_ast_expr_t mul = make_binary(SCC_AST_OP_MUL, &b, &c);
scc_ast_expr_t add = make_binary(SCC_AST_OP_ADD, &a, &mul);
scc_ast_expr_t sub = make_binary(SCC_AST_OP_SUB, &add, &d);
SCC_CHECK_AST(&sub.base, "a + b * c - d", scc_parse_expression);
// *p++ => *(p++)
scc_ast_expr_t p = make_identifier("p");
scc_ast_expr_t post_inc = make_unary(SCC_AST_OP_POSTFIX_INCREMENT, &p);
scc_ast_expr_t deref = make_unary(SCC_AST_OP_INDIRECTION, &post_inc);
SCC_CHECK_AST(&deref.base, "*p++", scc_parse_expression);
}
}
TEST_LIST = {
{"parser_unit", test_parser_unit},
{"parser_expression", test_parser_expression},
{NULL, NULL},
};