From 80714fe7e53f383fa05f60529703933c370783c7 Mon Sep 17 00:00:00 2001 From: zzy <2450266535@qq.com> Date: Mon, 9 Mar 2026 22:45:18 +0800 Subject: [PATCH] =?UTF-8?q?feat(parser):=20=E5=AE=8C=E5=96=84=E7=B1=BB?= =?UTF-8?q?=E5=9E=8B=E8=A7=A3=E6=9E=90=E5=92=8C=E8=A1=A8=E8=BE=BE=E5=BC=8F?= =?UTF-8?q?=E8=A7=A3=E6=9E=90=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 完善了scc_parse_type函数以正确解析基本类型,修复了条件表达式解析逻辑, 实现了for循环中声明和表达式的混合处理,并添加了对赋值语句和复杂表达式的支持。 fix(parser): 修复内存泄漏和解析器状态管理问题 修复了当tok参数为null时的内存泄漏问题,在标签语句解析中正确处理解析器状态回退, 并改进了表达式和声明的错误处理机制。 test(parser): 更新单元测试以验证修复的功能 更新了返回语句的测试值,添加了包含变量声明、赋值语句和复杂表达式的综合测试用例, 验证了赋值运算符的右结合性和复杂表达式的解析正确性。 --- libs/parser/include/parser_utils.h | 6 +- libs/parser/src/parse_decl.c | 23 +--- libs/parser/src/parse_expr.c | 3 - libs/parser/src/parse_stmt.c | 8 ++ libs/parser/src/parse_type.c | 18 ++- libs/parser/tests/test_parser_unit.c | 163 ++++++++++++++++++++++++--- 6 files changed, 182 insertions(+), 39 deletions(-) diff --git a/libs/parser/include/parser_utils.h b/libs/parser/include/parser_utils.h index c845631..6ea8486 100644 --- a/libs/parser/include/parser_utils.h +++ b/libs/parser/include/parser_utils.h @@ -53,7 +53,11 @@ static inline cbool scc_parser_next_consume(scc_parser_t *parser, cbool ok = false; scc_lexer_tok_t *raw_tok_ref = null; scc_ring_unsafe_next_ref_consume(*parser->ring, raw_tok_ref, ok); - scc_lexer_tok_move(tok, raw_tok_ref); + if (tok == null) { + scc_lexer_tok_drop(raw_tok_ref); + } else { + scc_lexer_tok_move(tok, raw_tok_ref); + } return ok; } diff --git a/libs/parser/src/parse_decl.c b/libs/parser/src/parse_decl.c index 17e8182..7375791 100644 --- a/libs/parser/src/parse_decl.c +++ b/libs/parser/src/parse_decl.c @@ -175,30 +175,13 @@ scc_ast_decl_t *scc_parse_declaration(scc_parser_t *parser) { * declarator = initializer */ cbool ok; - const scc_lexer_tok_t *tok_ptr = scc_parser_next(parser); scc_lexer_tok_t tok; - if (tok_ptr == null) { - return null; - } - scc_ast_type_t *type = scc_malloc(sizeof(scc_ast_type_t)); + scc_ast_type_t *type = scc_parse_type(parser); if (type == null) { - LOG_FATAL("out of memory"); return null; } - if (tok_ptr->type != SCC_TOK_INT) { - // TODO back it - scc_parser_reset(parser); - return null; - } else { - type->base.type = SCC_AST_TYPE_BUILTIN; - type->base.loc = tok_ptr->loc; - type->builtin.type = SCC_AST_BUILTIN_TYPE_INT; - type->builtin.quals = (scc_ast_decl_specifier_t){0}; - } - - scc_parser_commit(parser); ok = scc_parser_next_consume(parser, &tok); if (ok == false) { return null; @@ -231,7 +214,7 @@ scc_ast_decl_t *scc_parse_declaration(scc_parser_t *parser) { decl->base.type = SCC_AST_DECL_VAR; decl->var.type = type; decl->var.name = scc_cstring_as_cstr(&tok.lexeme); - decl->var.init = null; // scc_parse_expression(parser); + decl->var.init = scc_parse_expression(parser); goto RETURN; } // TODO @@ -255,7 +238,7 @@ scc_ast_decl_t *scc_parse_declaration(scc_parser_t *parser) { return null; } - tok_ptr = scc_parser_peek(parser); + const scc_lexer_tok_t *tok_ptr = scc_parser_peek(parser); if (tok_ptr == null) { return null; } diff --git a/libs/parser/src/parse_expr.c b/libs/parser/src/parse_expr.c index ef44e44..6a9a58a 100644 --- a/libs/parser/src/parse_expr.c +++ b/libs/parser/src/parse_expr.c @@ -525,9 +525,6 @@ static scc_ast_expr_t *parse_assignment_expression(scc_parser_t *parser) { // unary-expression) scc_ast_expr_t *left = null; left = parse_conditional_expression(parser); - if (left) - return left; - left = parse_unary_expression(parser); if (!left) return null; diff --git a/libs/parser/src/parse_stmt.c b/libs/parser/src/parse_stmt.c index 77844ca..8628636 100644 --- a/libs/parser/src/parse_stmt.c +++ b/libs/parser/src/parse_stmt.c @@ -260,6 +260,12 @@ static scc_ast_stmt_t *parse_for_statement(scc_parser_t *parser) { // TODO use decl or expr stmt->for_stmt.init = (scc_ast_type_t *)scc_parse_expression(parser); + if (stmt->for_stmt.init == null) { + stmt->for_stmt.init = (scc_ast_type_t *)scc_parse_declaration(parser); + } + if (stmt->for_stmt.init == null) { + LOG_ERROR("Expected expression or declaration in for statement."); + } if (!scc_parser_consume_if(parser, SCC_TOK_SEMICOLON)) { LOG_ERROR("Expected semicolon in for statement."); @@ -348,8 +354,10 @@ scc_ast_stmt_t *scc_parse_statement(scc_parser_t *parser) { default : statement */ case SCC_TOK_IDENT: + scc_parser_next(parser); tok_ref = scc_parser_next(parser); if (tok_ref == null || tok_ref->type != SCC_TOK_COLON) { + scc_parser_reset(parser); break; } stmt = parse_label_statement(parser); diff --git a/libs/parser/src/parse_type.c b/libs/parser/src/parse_type.c index f960d79..6456a50 100644 --- a/libs/parser/src/parse_type.c +++ b/libs/parser/src/parse_type.c @@ -235,8 +235,22 @@ cbool scc_parse_is_storage_class_start(scc_parser_t *parser) { } } scc_ast_type_t *scc_parse_type(scc_parser_t *parser) { - TODO(); - return null; + const scc_lexer_tok_t *tok_ptr = scc_parser_peek(parser); + scc_ast_type_t *ret = null; + if (tok_ptr->type == SCC_TOK_INT) { + scc_lexer_tok_t tok; + scc_parser_next_consume(parser, &tok); + ret = scc_malloc(sizeof(scc_ast_type_t)); + if (ret == null) { + LOG_FATAL("memory alloc failed"); + return ret; + } + ret->base.type = SCC_AST_TYPE_BUILTIN; + ret->base.loc = tok_ptr->loc; + ret->builtin.type = SCC_AST_BUILTIN_TYPE_INT; + scc_lexer_tok_drop(&tok); + } + return ret; } // // 前向声明辅助函数 diff --git a/libs/parser/tests/test_parser_unit.c b/libs/parser/tests/test_parser_unit.c index 82a5ca7..82bc31b 100644 --- a/libs/parser/tests/test_parser_unit.c +++ b/libs/parser/tests/test_parser_unit.c @@ -115,7 +115,7 @@ static void test_parser_unit(void) { .return_stmt.expr = &(scc_ast_expr_t){ .base.type = SCC_AST_EXPR_INT_LITERAL, - .literal.lexme = "0", + .literal.lexme = "65536", }, }, }; @@ -147,7 +147,134 @@ static void test_parser_unit(void) { .declarations.size = 1, .declarations.data = decls, }; - SCC_CHECK_AST(&tu.base, "int main(void) { return 0; }", + SCC_CHECK_AST(&tu.base, "int main(void) { return 65536; }", + scc_parse_translation_unit); + } + + { + // 修复后的测试用例:正确表示多语句函数 + // 创建变量声明: int a; + scc_ast_type_t a_type = {.base.type = SCC_AST_TYPE_BUILTIN, + .builtin.type = SCC_AST_BUILTIN_TYPE_INT}; + scc_ast_decl_t a_decl = {.base.type = SCC_AST_DECL_VAR, + .var.name = "a", + .var.type = &a_type}; + + // 创建变量声明: int b; + scc_ast_type_t b_type = {.base.type = SCC_AST_TYPE_BUILTIN, + .builtin.type = SCC_AST_BUILTIN_TYPE_INT}; + scc_ast_decl_t b_decl = {.base.type = SCC_AST_DECL_VAR, + .var.name = "b", + .var.type = &b_type}; + + // 创建表达式: 1 + 2 * 3 + scc_ast_expr_t expr1_3 = {.base.type = SCC_AST_EXPR_INT_LITERAL, + .literal.lexme = "3"}; + scc_ast_expr_t expr1_2 = {.base.type = SCC_AST_EXPR_INT_LITERAL, + .literal.lexme = "2"}; + scc_ast_expr_t expr1_mul = {.base.type = SCC_AST_EXPR_BINARY, + .binary.op = SCC_AST_OP_MUL, + .binary.lhs = &expr1_2, + .binary.rhs = &expr1_3}; + scc_ast_expr_t expr1_1 = {.base.type = SCC_AST_EXPR_INT_LITERAL, + .literal.lexme = "1"}; + scc_ast_expr_t expr1_add = {.base.type = SCC_AST_EXPR_BINARY, + .binary.op = SCC_AST_OP_ADD, + .binary.lhs = &expr1_1, + .binary.rhs = &expr1_mul}; + + // 创建赋值语句: a = 1 + 2 * 3; + scc_ast_expr_t a_expr1 = {.base.type = SCC_AST_EXPR_IDENTIFIER, + .identifier.name = "a"}; + scc_ast_expr_t assign1 = {.base.type = SCC_AST_EXPR_BINARY, + .binary.op = SCC_AST_OP_ASSIGN, + .binary.lhs = &a_expr1, + .binary.rhs = &expr1_add}; + scc_ast_stmt_t assign1_stmt = {.base.type = SCC_AST_STMT_EXPR, + .expr.expr = &assign1}; + + // 创建赋值语句: b = 7; + scc_ast_expr_t expr2_7 = {.base.type = SCC_AST_EXPR_INT_LITERAL, + .literal.lexme = "7"}; + scc_ast_expr_t b_expr1 = {.base.type = SCC_AST_EXPR_IDENTIFIER, + .identifier.name = "b"}; + scc_ast_expr_t assign2 = {.base.type = SCC_AST_EXPR_BINARY, + .binary.op = SCC_AST_OP_ASSIGN, + .binary.lhs = &b_expr1, + .binary.rhs = &expr2_7}; + scc_ast_stmt_t assign2_stmt = {.base.type = SCC_AST_STMT_EXPR, + .expr.expr = &assign2}; + + // 创建表达式: a - b + 1 + scc_ast_expr_t a_expr2 = {.base.type = SCC_AST_EXPR_IDENTIFIER, + .identifier.name = "a"}; + scc_ast_expr_t b_expr2 = {.base.type = SCC_AST_EXPR_IDENTIFIER, + .identifier.name = "b"}; + scc_ast_expr_t sub_expr = {.base.type = SCC_AST_EXPR_BINARY, + .binary.op = SCC_AST_OP_SUB, + .binary.lhs = &a_expr2, + .binary.rhs = &b_expr2}; + scc_ast_expr_t expr3_1 = {.base.type = SCC_AST_EXPR_INT_LITERAL, + .literal.lexme = "1"}; + scc_ast_expr_t add_expr = {.base.type = SCC_AST_EXPR_BINARY, + .binary.op = SCC_AST_OP_ADD, + .binary.lhs = &sub_expr, + .binary.rhs = &expr3_1}; + + // 创建赋值语句: a = a - b + 1; + scc_ast_expr_t a_expr3 = {.base.type = SCC_AST_EXPR_IDENTIFIER, + .identifier.name = "a"}; + scc_ast_expr_t assign3 = {.base.type = SCC_AST_EXPR_BINARY, + .binary.op = SCC_AST_OP_ASSIGN, + .binary.lhs = &a_expr3, + .binary.rhs = &add_expr}; + scc_ast_stmt_t assign3_stmt = {.base.type = SCC_AST_STMT_EXPR, + .expr.expr = &assign3}; + + // 创建return语句: return a; + scc_ast_expr_t return_expr = {.base.type = SCC_AST_EXPR_IDENTIFIER, + .identifier.name = "a"}; + scc_ast_stmt_t return_stmt = {.base.type = SCC_AST_STMT_RETURN, + .return_stmt.expr = &return_expr}; + + // 创建复合语句块 + scc_ast_node_t *items[] = { + (scc_ast_node_t *)&a_decl, (scc_ast_node_t *)&b_decl, + (scc_ast_node_t *)&assign1_stmt, (scc_ast_node_t *)&assign2_stmt, + (scc_ast_node_t *)&assign3_stmt, (scc_ast_node_t *)&return_stmt}; + + scc_ast_type_t return_type = {.base.type = SCC_AST_TYPE_BUILTIN, + .builtin.type = SCC_AST_BUILTIN_TYPE_INT}; + + scc_ast_type_t func_type = {.base.type = SCC_AST_TYPE_FUNCTION, + .function.is_variadic = false, + .function.param_types = {0}, + .function.return_type = &return_type}; + + scc_ast_decl_t func_decl = { + .base.type = SCC_AST_DECL_FUNC, + .func.name = "main", + .func.body = &(scc_ast_stmt_t){.base.type = SCC_AST_STMT_COMPOUND, + .compound.block_items.cap = 6, + .compound.block_items.size = 6, + .compound.block_items.data = items}, + .func.type = &func_type}; + + scc_ast_decl_t *decls[] = {&func_decl}; + scc_ast_translation_unit_t tu = {.base.type = SCC_AST_TRANSLATION_UNIT, + .declarations.cap = 1, + .declarations.size = 1, + .declarations.data = decls}; + + SCC_CHECK_AST(&tu.base, + "int main() {\n" + " int a;\n" + " int b;\n" + " a = 1 + 2 * 3;\n" + " b = 7;\n" + " a = a - b + 1;\n" + " return a;\n" + "}\n", scc_parse_translation_unit); } } @@ -337,8 +464,8 @@ static void test_parser_expression(void) { // scc_ast_type_t int_type = { .base.type = SCC_AST_TYPE_BUILTIN, // .builtin.type = SCC_AST_BUILTIN_TYPE_INT }; scc_ast_expr_t x = // make_identifier("x"); scc_ast_expr_t cast = { .base.type = - // SCC_AST_EXPR_CAST }; cast.cast.type = &int_type; cast.cast.expr = &x; - // SCC_CHECK_AST(&cast.base, "(int)x", scc_parse_expression); + // SCC_AST_EXPR_CAST }; cast.cast.type = &int_type; cast.cast.expr = + // &x; SCC_CHECK_AST(&cast.base, "(int)x", scc_parse_expression); } // 5. 二元运算符(按优先级测试) @@ -405,16 +532,26 @@ static void test_parser_expression(void) { // 7. 赋值运算符(右结合) { - // scc_ast_expr_t a = make_identifier("a"); - // scc_ast_expr_t b = make_identifier("b"); - // scc_ast_expr_t c = make_identifier("c"); - // scc_ast_expr_t assign1 = make_binary(SCC_AST_OP_ASSIGN, &a, &b); - // scc_ast_expr_t assign2 = - // make_binary(SCC_AST_OP_ASSIGN, &assign1, &c); // a = (b = c) - // SCC_CHECK_AST(&assign2.base, "a = b = c", scc_parse_expression); + scc_ast_expr_t a = make_identifier("a"); + scc_ast_expr_t b = make_identifier("b"); + scc_ast_expr_t c = make_identifier("c"); + scc_ast_expr_t int_lit = make_int_literal("42"); + scc_ast_expr_t assign1 = make_binary(SCC_AST_OP_ASSIGN, &b, &c); + scc_ast_expr_t assign2 = + make_binary(SCC_AST_OP_ASSIGN, &a, &assign1); // a = (b = c) + SCC_CHECK_AST(&assign2.base, "a = b = c", scc_parse_expression); - // scc_ast_expr_t add_assign = make_binary(SCC_AST_OP_ASSIGN_ADD, &a, - // &b); SCC_CHECK_AST(&add_assign.base, "a += b", scc_parse_expression); + scc_ast_expr_t assign3 = make_binary(SCC_AST_OP_ASSIGN, &a, &int_lit); + SCC_CHECK_AST(&assign3.base, "a = 42", scc_parse_expression); + + scc_ast_expr_t assign4 = make_binary(SCC_AST_OP_SUB, &a, &b); + scc_ast_expr_t assign5 = + make_binary(SCC_AST_OP_ADD, &assign4, &int_lit); + scc_ast_expr_t assign6 = make_binary(SCC_AST_OP_ASSIGN, &a, &assign5); + SCC_CHECK_AST(&assign6.base, "a = a - b + 42", scc_parse_expression); + + scc_ast_expr_t add_assign = make_binary(SCC_AST_OP_ASSIGN_ADD, &a, &b); + SCC_CHECK_AST(&add_assign.base, "a += b", scc_parse_expression); } // 8. 逗号运算符