feat(parser): 启用parser和ast模块并重构解析器结构

- 在cbuild.toml中启用parser和ast依赖项
- 将AST内置类型枚举重命名为SCC_AST_BUILTIN_TYPE_*前缀格式
- 修复ast_def.h中的类型字段命名,将builtin改为type
- 添加逗号操作符支持到表达式操作符枚举中
- 更新字面量表达式的lexeme字段为const char*指针和owned标志
- 重构解析器头文件结构,分离为parser.h、parser_utils.h、scc_sema.h等
- 实现新的解析器工具函数,包括预览、消费、回溯等功能
- 更新声明解析逻辑,使用新的解析器接口进行token处理
- 添加符号表语义分析功能框架
- 修复词法分析器中token移动时的空指针检查
- 统一使用scc_tree_dump_printf替代直接的scc_printf调用
This commit is contained in:
zzy
2026-03-09 15:25:12 +08:00
parent a805814d3f
commit 1fceeca011
28 changed files with 2759 additions and 1987 deletions

View File

@@ -1,10 +1,6 @@
/*
415
ISO/IEC 9899:TC3
Committee Draft — Septermber 7, 2007
WG14/N1256
A.2.3 Statements
(6.8)
statement:
labeled-statement
@@ -50,37 +46,37 @@ A.2.3 Statements
break ;
return expression(opt) ;
*/
#include <parser.h>
#include <parser_utils.h>
#include <scc_parser.h>
static inline scc_ast_stmt_t *ast_stmt_alloc() {
scc_ast_stmt_t *stmt = (scc_ast_stmt_t *)scc_malloc(sizeof(scc_ast_stmt_t));
Assert(stmt != null);
stmt->base.type = SCC_AST_TRANSLATION_UNIT;
stmt->base.type = SCC_AST_UNKNOWN;
stmt->base.loc = scc_pos_create();
return stmt;
}
static inline scc_ast_expr_t *ast_parse_paren_expression(scc_parser_t *parser) {
if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_L_PAREN)) {
if (!scc_parser_consume_if(parser, SCC_TOK_L_PAREN)) {
LOG_ERROR("Expected '(' before like `( expression )` .");
}
scc_ast_expr_t *ret = scc_parse_expression(parser);
if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_R_PAREN)) {
LOG_ERROR("Expected ')' after like `( expression )` .");
if (!scc_parser_consume_if(parser, SCC_TOK_R_PAREN)) {
LOG_ERROR("Expected ')' before like `( expression )` .");
}
return ret;
}
static scc_ast_stmt_t *parse_label_statement(scc_parser_t *parser) {
const scc_lexer_tok_t *tok = scc_lexer_stream_current(parser->lex_stream);
if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_IDENT)) {
scc_lexer_tok_t tok = {0};
if (!scc_parser_next_consume(parser, &tok)) {
return null;
}
if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_COLON)) {
LOG_ERROR("Expected constant expression after case.");
if (!scc_parser_consume_if(parser, SCC_TOK_COLON)) {
return null;
}
@@ -93,21 +89,20 @@ static scc_ast_stmt_t *parse_label_statement(scc_parser_t *parser) {
Assert(stmt != null);
stmt->base.type = SCC_AST_STMT_LABEL;
// TODO maybe use cstring
stmt->label_stmt.label = tok->value.cstr.data;
stmt->label_stmt.label = scc_cstring_as_cstr(&tok.lexeme);
stmt->label_stmt.stmt = statement;
return stmt;
}
static scc_ast_stmt_t *parse_case_statement(scc_parser_t *parser) {
if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_CASE)) {
if (!scc_parser_consume_if(parser, SCC_TOK_CASE)) {
return null;
}
scc_ast_expr_t *expr = null;
// TODO = scc_parser_constant_expression();
if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_COLON)) {
if (!scc_parser_consume_if(parser, SCC_TOK_COLON)) {
LOG_ERROR("Expected constant expression after case.");
return null;
}
@@ -126,11 +121,11 @@ static scc_ast_stmt_t *parse_case_statement(scc_parser_t *parser) {
}
static scc_ast_stmt_t *parse_default_statement(scc_parser_t *parser) {
if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_DEFAULT)) {
if (!scc_parser_consume_if(parser, SCC_TOK_DEFAULT)) {
return null;
}
if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_COLON)) {
if (!scc_parser_consume_if(parser, SCC_TOK_COLON)) {
LOG_ERROR("Expected constant expression after case.");
return null;
}
@@ -148,14 +143,14 @@ static scc_ast_stmt_t *parse_default_statement(scc_parser_t *parser) {
}
static scc_ast_stmt_t *parse_compound_statement(scc_parser_t *parser) {
if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_L_BRACE)) {
if (!scc_parser_consume_if(parser, SCC_TOK_L_BRACE)) {
return null;
}
scc_ast_stmt_t *stmt = ast_stmt_alloc();
stmt->base.type = SCC_AST_STMT_COMPOUND;
scc_vec_init(stmt->compound.block_items);
while (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_R_BRACE)) {
while (!scc_parser_consume_if(parser, SCC_TOK_R_BRACE)) {
/// TODO
// scc_parse_is_decl();
scc_ast_node_t *ret = null;
@@ -175,7 +170,7 @@ static scc_ast_stmt_t *parse_compound_statement(scc_parser_t *parser) {
}
static scc_ast_stmt_t *parse_if_statement(scc_parser_t *parser) {
if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_IF)) {
if (!scc_parser_consume_if(parser, SCC_TOK_IF)) {
return null;
}
@@ -186,7 +181,7 @@ static scc_ast_stmt_t *parse_if_statement(scc_parser_t *parser) {
stmt->base.type = SCC_AST_STMT_IF;
stmt->if_stmt.cond = expression;
stmt->if_stmt.then_stmt = statement;
if (scc_parse_consume_if(parser->lex_stream, SCC_TOK_ELSE)) {
if (scc_parser_consume_if(parser, SCC_TOK_ELSE)) {
stmt->if_stmt.opt_else_stmt = scc_parse_statement(parser);
} else {
stmt->if_stmt.opt_else_stmt = null;
@@ -195,7 +190,7 @@ static scc_ast_stmt_t *parse_if_statement(scc_parser_t *parser) {
}
static scc_ast_stmt_t *parse_switch_statement(scc_parser_t *parser) {
if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_SWITCH)) {
if (!scc_parser_consume_if(parser, SCC_TOK_SWITCH)) {
return null;
}
@@ -210,7 +205,7 @@ static scc_ast_stmt_t *parse_switch_statement(scc_parser_t *parser) {
}
static scc_ast_stmt_t *parse_while_statement(scc_parser_t *parser) {
if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_WHILE)) {
if (!scc_parser_consume_if(parser, SCC_TOK_WHILE)) {
return null;
}
@@ -225,13 +220,13 @@ static scc_ast_stmt_t *parse_while_statement(scc_parser_t *parser) {
}
static scc_ast_stmt_t *parse_do_while_statement(scc_parser_t *parser) {
if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_DO)) {
if (!scc_parser_consume_if(parser, SCC_TOK_DO)) {
return null;
}
scc_ast_stmt_t *statement = scc_parse_statement(parser);
if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_WHILE)) {
if (!scc_parser_consume_if(parser, SCC_TOK_WHILE)) {
LOG_ERROR("Expected 'while' after do.");
// TODO 使用更好的错误处理,未来应当采用更好的内存管理器
scc_free(statement);
@@ -247,7 +242,7 @@ static scc_ast_stmt_t *parse_do_while_statement(scc_parser_t *parser) {
}
static scc_ast_stmt_t *parse_for_statement(scc_parser_t *parser) {
if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_FOR)) {
if (!scc_parser_consume_if(parser, SCC_TOK_FOR)) {
return null;
}
@@ -256,7 +251,7 @@ static scc_ast_stmt_t *parse_for_statement(scc_parser_t *parser) {
for ( declaration expression(opt) ; expression(opt) ) statement
*/
if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_L_PAREN)) {
if (!scc_parser_consume_if(parser, SCC_TOK_L_PAREN)) {
LOG_ERROR("Expected '(' before like `( expression )` .");
}
@@ -266,19 +261,19 @@ static scc_ast_stmt_t *parse_for_statement(scc_parser_t *parser) {
// TODO use decl or expr
stmt->for_stmt.init = (scc_ast_type_t *)scc_parse_expression(parser);
if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_SEMICOLON)) {
if (!scc_parser_consume_if(parser, SCC_TOK_SEMICOLON)) {
LOG_ERROR("Expected semicolon in for statement.");
}
stmt->for_stmt.cond = scc_parse_expression(parser);
if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_SEMICOLON)) {
if (!scc_parser_consume_if(parser, SCC_TOK_SEMICOLON)) {
LOG_ERROR("Expected semicolon in for statement.");
}
stmt->for_stmt.iter = scc_parse_expression(parser);
if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_R_PAREN)) {
if (!scc_parser_consume_if(parser, SCC_TOK_R_PAREN)) {
LOG_ERROR("Expected ')' after like `( expression )` .");
}
@@ -290,28 +285,26 @@ static scc_ast_stmt_t *parse_for_statement(scc_parser_t *parser) {
static scc_ast_stmt_t *parse_jump_statement(scc_parser_t *parser) {
scc_ast_stmt_t *stmt = ast_stmt_alloc();
if (scc_parse_consume_if(parser->lex_stream, SCC_TOK_GOTO)) {
if (scc_parser_consume_if(parser, SCC_TOK_GOTO)) {
stmt->base.type = SCC_AST_STMT_GOTO;
if (scc_parse_is(parser->lex_stream, SCC_TOK_IDENT)) {
const scc_lexer_tok_t *tok =
scc_lexer_stream_current(parser->lex_stream);
stmt->goto_stmt.label = tok->value.cstr.data;
scc_lexer_stream_consume(parser->lex_stream);
scc_lexer_tok_t tok = {0};
if (scc_parser_next_consume(parser, &tok)) {
stmt->goto_stmt.label = scc_cstring_as_cstr(&tok.lexeme);
} else {
LOG_ERROR("Expected label after goto.");
}
} else if (scc_parse_consume_if(parser->lex_stream, SCC_TOK_CONTINUE)) {
} else if (scc_parser_consume_if(parser, SCC_TOK_CONTINUE)) {
stmt->base.type = SCC_AST_STMT_CONTINUE;
} else if (scc_parse_consume_if(parser->lex_stream, SCC_TOK_BREAK)) {
} else if (scc_parser_consume_if(parser, SCC_TOK_BREAK)) {
stmt->base.type = SCC_AST_STMT_BREAK;
} else if (scc_parse_consume_if(parser->lex_stream, SCC_TOK_RETURN)) {
} else if (scc_parser_consume_if(parser, SCC_TOK_RETURN)) {
stmt->base.type = SCC_AST_STMT_RETURN;
stmt->return_stmt.expr = scc_parse_expression(parser);
} else {
UNREACHABLE();
}
if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_SEMICOLON)) {
if (!scc_parser_consume_if(parser, SCC_TOK_SEMICOLON)) {
LOG_ERROR("Expected semicolon after jump statement.");
}
return stmt;
@@ -321,7 +314,7 @@ static scc_ast_stmt_t *parse_expression_statement(scc_parser_t *parser) {
scc_ast_stmt_t *stmt = ast_stmt_alloc();
stmt->base.type = SCC_AST_STMT_EXPR;
if (scc_parse_consume_if(parser->lex_stream, SCC_TOK_SEMICOLON)) {
if (scc_parser_consume_if(parser, SCC_TOK_SEMICOLON)) {
stmt->expr.expr = null;
return stmt;
}
@@ -333,15 +326,20 @@ static scc_ast_stmt_t *parse_expression_statement(scc_parser_t *parser) {
return null;
}
if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_SEMICOLON)) {
if (!scc_parser_consume_if(parser, SCC_TOK_SEMICOLON)) {
LOG_ERROR("Expected semicolon after expression.");
}
return stmt;
}
scc_ast_stmt_t *scc_parse_statement(scc_parser_t *parser) {
const scc_lexer_tok_t *tok = scc_lexer_stream_current(parser->lex_stream);
switch (tok->type) {
scc_ast_stmt_t *stmt;
const scc_lexer_tok_t *tok_ref;
tok_ref = scc_parser_peek(parser);
if (!tok_ref) {
return null;
}
switch (tok_ref->type) {
/*
(6.8.1)
labeled-statement:
@@ -350,15 +348,19 @@ scc_ast_stmt_t *scc_parse_statement(scc_parser_t *parser) {
default : statement
*/
case SCC_TOK_IDENT:
// 注意需要检测下一个 token 是否为冒号,否则将需要判定成表达式语句
if (!scc_parse_peek_is(parser->lex_stream, 1, SCC_TOK_COLON)) {
tok_ref = scc_parser_next(parser);
if (tok_ref == null || tok_ref->type != SCC_TOK_COLON) {
break;
}
return parse_label_statement(parser);
case SCC_TOK_CASE:
return parse_case_statement(parser);
stmt = parse_label_statement(parser);
goto RETURN;
case SCC_TOK_CASE: {
stmt = parse_case_statement(parser);
goto RETURN;
}
case SCC_TOK_DEFAULT:
return parse_default_statement(parser);
stmt = parse_default_statement(parser);
goto RETURN;
/*
(6.8.2)
compound-statement:
@@ -373,7 +375,8 @@ scc_ast_stmt_t *scc_parse_statement(scc_parser_t *parser) {
statement
*/
case SCC_TOK_L_BRACE:
return parse_compound_statement(parser);
stmt = parse_compound_statement(parser);
goto RETURN;
/*
(6.8.4)
selection-statement:
@@ -382,9 +385,11 @@ scc_ast_stmt_t *scc_parse_statement(scc_parser_t *parser) {
switch ( expression ) statement
*/
case SCC_TOK_IF:
return parse_if_statement(parser);
stmt = parse_if_statement(parser);
goto RETURN;
case SCC_TOK_SWITCH:
return parse_switch_statement(parser);
stmt = parse_switch_statement(parser);
goto RETURN;
/*
(6.8.5)
iteration-statement:
@@ -396,11 +401,14 @@ scc_ast_stmt_t *scc_parse_statement(scc_parser_t *parser) {
statement
*/
case SCC_TOK_WHILE:
return parse_while_statement(parser);
stmt = parse_while_statement(parser);
goto RETURN;
case SCC_TOK_DO:
return parse_do_while_statement(parser);
stmt = parse_do_while_statement(parser);
goto RETURN;
case SCC_TOK_FOR:
return parse_for_statement(parser);
stmt = parse_for_statement(parser);
goto RETURN;
/*
(6.8.6)
jump-statement:
@@ -413,7 +421,8 @@ scc_ast_stmt_t *scc_parse_statement(scc_parser_t *parser) {
case SCC_TOK_CONTINUE:
case SCC_TOK_BREAK:
case SCC_TOK_RETURN:
return parse_jump_statement(parser);
stmt = parse_jump_statement(parser);
goto RETURN;
default:
break;
}
@@ -422,5 +431,10 @@ scc_ast_stmt_t *scc_parse_statement(scc_parser_t *parser) {
expression-statement:
expression(opt) ;
*/
return parse_expression_statement(parser);
stmt = parse_expression_statement(parser);
RETURN:
scc_parser_reset(parser);
parser->sema_callbacks.on_stmt(parser->sema_callbacks.context,
stmt->base.type, stmt);
return stmt;
}