From 1fceeca011fe938bb4e13cbe78b4516daeabbed0 Mon Sep 17 00:00:00 2001 From: zzy <2450266535@qq.com> Date: Mon, 9 Mar 2026 15:25:12 +0800 Subject: [PATCH] =?UTF-8?q?feat(parser):=20=E5=90=AF=E7=94=A8parser?= =?UTF-8?q?=E5=92=8Cast=E6=A8=A1=E5=9D=97=E5=B9=B6=E9=87=8D=E6=9E=84?= =?UTF-8?q?=E8=A7=A3=E6=9E=90=E5=99=A8=E7=BB=93=E6=9E=84?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 在cbuild.toml中启用parser和ast依赖项 - 将AST内置类型枚举重命名为SCC_AST_BUILTIN_TYPE_*前缀格式 - 修复ast_def.h中的类型字段命名,将builtin改为type - 添加逗号操作符支持到表达式操作符枚举中 - 更新字面量表达式的lexeme字段为const char*指针和owned标志 - 重构解析器头文件结构,分离为parser.h、parser_utils.h、scc_sema.h等 - 实现新的解析器工具函数,包括预览、消费、回溯等功能 - 更新声明解析逻辑,使用新的解析器接口进行token处理 - 添加符号表语义分析功能框架 - 修复词法分析器中token移动时的空指针检查 - 统一使用scc_tree_dump_printf替代直接的scc_printf调用 --- cbuild.toml | 4 +- libs/ast/include/ast_def.h | 45 +- libs/ast/include/scc_ast.h | 1 + libs/ast/src/ast_dump.c | 51 +- libs/ast/src/scc_ast.c | 0 libs/lexer/include/scc_lexer_token.h | 5 +- libs/lexer/src/lexer.c | 4 +- libs/parser/include/parser.h | 113 -- libs/parser/include/parser_utils.h | 69 + libs/parser/include/scc_parser.h | 75 + libs/parser/include/scc_sema.h | 25 + libs/parser/include/sema_symtab.h | 28 + libs/parser/src/parse_decl.c | 364 +++-- libs/parser/src/parse_expr.c | 1395 +++++++++-------- libs/parser/src/parse_stmt.c | 136 +- libs/parser/src/parse_type.c | 1636 ++++++++++---------- libs/parser/src/{parser.c => scc_parser.c} | 39 +- libs/parser/src/scc_sema.c | 15 + libs/parser/src/sema_symtab.c | 58 + libs/parser/tests/test_parser_unit.c | 453 ++++++ libs/pproc/include/scc_pproc.h | 5 +- libs/pproc/src/scc_pproc.c | 21 +- libs/tree_dump/include/tree_dump.h | 47 +- runtime/scc_core/include/scc_core_impl.h | 2 + runtime/scc_core/include/scc_core_ring.h | 108 +- runtime/scc_core/src/core_impl.c | 3 - src/main.c | 43 +- tests/simple/00_main.c | 1 + 28 files changed, 2759 insertions(+), 1987 deletions(-) create mode 100644 libs/ast/src/scc_ast.c delete mode 100644 libs/parser/include/parser.h create mode 100644 libs/parser/include/parser_utils.h create mode 100644 libs/parser/include/scc_parser.h create mode 100644 libs/parser/include/scc_sema.h create mode 100644 libs/parser/include/sema_symtab.h rename libs/parser/src/{parser.c => scc_parser.c} (77%) create mode 100644 libs/parser/src/scc_sema.c create mode 100644 libs/parser/src/sema_symtab.c create mode 100644 libs/parser/tests/test_parser_unit.c create mode 100644 tests/simple/00_main.c diff --git a/cbuild.toml b/cbuild.toml index 6bbad87..0c30497 100644 --- a/cbuild.toml +++ b/cbuild.toml @@ -6,8 +6,8 @@ dependencies = [ { name = "argparse", path = "./libs/argparse" }, { name = "lexer", path = "./libs/lexer" }, { name = "pproc", path = "./libs/pproc" }, - # { name = "parser", path = "./libs/parser" }, - # { name = "ast", path = "./libs/ast" }, + { name = "parser", path = "./libs/parser" }, + { name = "ast", path = "./libs/ast" }, # { name = "ast2ir", path = "./libs/ast2ir" }, # { name = "ir", path = "./libs/ir" }, ] diff --git a/libs/ast/include/ast_def.h b/libs/ast/include/ast_def.h index cb3bf78..4c1ad3d 100644 --- a/libs/ast/include/ast_def.h +++ b/libs/ast/include/ast_def.h @@ -95,19 +95,19 @@ typedef struct { * @brief 内置类型枚举 */ typedef enum { - TYPE_VOID, - TYPE_CHAR, - TYPE_SHORT, - TYPE_INT, - TYPE_LONG, - TYPE_LONG_LONG, - TYPE_FLOAT, - TYPE_DOUBLE, - TYPE_LONG_DOUBLE, - TYPE_BOOL, - TYPE_COMPLEX_FLOAT, - TYPE_COMPLEX_DOUBLE, - TYPE_COMPLEX_LONG_DOUBLE, + SCC_AST_BUILTIN_TYPE_VOID, + SCC_AST_BUILTIN_TYPE_CHAR, + SCC_AST_BUILTIN_TYPE_SHORT, + SCC_AST_BUILTIN_TYPE_INT, + SCC_AST_BUILTIN_TYPE_LONG, + SCC_AST_BUILTIN_TYPE_LONG_LONG, + SCC_AST_BUILTIN_TYPE_FLOAT, + SCC_AST_BUILTIN_TYPE_DOUBLE, + SCC_AST_BUILTIN_TYPE_LONG_DOUBLE, + SCC_AST_BUILTIN_TYPE_BOOL, + SCC_AST_BUILTIN_TYPE_COMPLEX_FLOAT, + SCC_AST_BUILTIN_TYPE_COMPLEX_DOUBLE, + SCC_AST_BUILTIN_TYPE_COMPLEX_LONG_DOUBLE, } scc_ast_builtin_type_t; /** @@ -150,7 +150,7 @@ struct scc_ast_type { scc_ast_node_t base; union { struct { - scc_ast_builtin_type_t builtin; + scc_ast_builtin_type_t type; scc_ast_decl_specifier_t quals; } builtin; struct { @@ -188,7 +188,6 @@ struct scc_ast_type { typedef enum scc_ast_expr_op { /* 无操作符 */ SCC_AST_OP_NONE = 0, - /* 赋值操作符 */ SCC_AST_OP_ASSIGN, // = SCC_AST_OP_ASSIGN_ADD, // += @@ -201,40 +200,34 @@ typedef enum scc_ast_expr_op { SCC_AST_OP_ASSIGN_OR, // |= SCC_AST_OP_ASSIGN_LSHIFT, // <<= SCC_AST_OP_ASSIGN_RSHIFT, // >>= - /* 条件操作符 */ SCC_AST_OP_CONDITIONAL, // ?: - + /* 逗号操作符 */ + SCC_AST_OP_COMMA, // , /* 逻辑操作符 */ SCC_AST_OP_LOGICAL_OR, // || SCC_AST_OP_LOGICAL_AND, // && - /* 位操作符 */ SCC_AST_OP_BITWISE_OR, // | SCC_AST_OP_BITWISE_XOR, // ^ SCC_AST_OP_BITWISE_AND, // & - /* 相等性操作符 */ SCC_AST_OP_EQUAL, // == SCC_AST_OP_NOT_EQUAL, // != - /* 关系操作符 */ SCC_AST_OP_LESS, // < SCC_AST_OP_GREATER, // > SCC_AST_OP_LESS_EQUAL, // <= SCC_AST_OP_GREATER_EQUAL, // >= - /* 移位操作符 */ SCC_AST_OP_LEFT_SHIFT, // << SCC_AST_OP_RIGHT_SHIFT, // >> - /* 算术操作符 */ SCC_AST_OP_ADD, // + SCC_AST_OP_SUB, // - SCC_AST_OP_MUL, // * SCC_AST_OP_DIV, // / SCC_AST_OP_MOD, // % - /* 一元操作符 */ SCC_AST_OP_UNARY_PLUS, // + (一元) SCC_AST_OP_UNARY_MINUS, // - (一元) @@ -246,7 +239,6 @@ typedef enum scc_ast_expr_op { SCC_AST_OP_PREFIX_DECREMENT, // -- (前缀) SCC_AST_OP_POSTFIX_INCREMENT, // ++ (后缀) SCC_AST_OP_POSTFIX_DECREMENT, // -- (后缀) - /* 成员访问 */ SCC_AST_OP_MEMBER_ACCESS, // . SCC_AST_OP_PTR_MEMBER_ACCESS, // -> @@ -312,7 +304,8 @@ struct scc_ast_expr { } compound_literal; // 字面量 struct { - scc_cstring_t lexme; + const char *lexme; + cbool owned; } literal; // 标识符 struct { @@ -408,7 +401,7 @@ struct scc_ast_decl { struct { const char *name; scc_ast_type_t *type; // 函数类型 - scc_ast_stmt_t *body; // 可为 NULL(只有声明) or + scc_ast_stmt_t *body; // 可为 null 表示只有声明 } func; // 参数声明 struct { diff --git a/libs/ast/include/scc_ast.h b/libs/ast/include/scc_ast.h index feb4425..7d6d56c 100644 --- a/libs/ast/include/scc_ast.h +++ b/libs/ast/include/scc_ast.h @@ -2,5 +2,6 @@ #define __SCC_AST_H__ #include "ast_def.h" +#include "ast_dump.h" #endif /* __SCC_AST_H__ */ diff --git a/libs/ast/src/ast_dump.c b/libs/ast/src/ast_dump.c index 38853d5..d2b946a 100644 --- a/libs/ast/src/ast_dump.c +++ b/libs/ast/src/ast_dump.c @@ -127,31 +127,31 @@ static const char *get_node_type_str(scc_ast_node_type_t type) { // 获取内置类型名称 static const char *get_builtin_type_str(scc_ast_builtin_type_t type) { switch (type) { - case TYPE_VOID: + case SCC_AST_BUILTIN_TYPE_VOID: return "void"; - case TYPE_CHAR: + case SCC_AST_BUILTIN_TYPE_CHAR: return "char"; - case TYPE_SHORT: + case SCC_AST_BUILTIN_TYPE_SHORT: return "short"; - case TYPE_INT: + case SCC_AST_BUILTIN_TYPE_INT: return "int"; - case TYPE_LONG: + case SCC_AST_BUILTIN_TYPE_LONG: return "long"; - case TYPE_LONG_LONG: + case SCC_AST_BUILTIN_TYPE_LONG_LONG: return "long long"; - case TYPE_FLOAT: + case SCC_AST_BUILTIN_TYPE_FLOAT: return "float"; - case TYPE_DOUBLE: + case SCC_AST_BUILTIN_TYPE_DOUBLE: return "double"; - case TYPE_LONG_DOUBLE: + case SCC_AST_BUILTIN_TYPE_LONG_DOUBLE: return "long double"; - case TYPE_BOOL: + case SCC_AST_BUILTIN_TYPE_BOOL: return "_Bool"; - case TYPE_COMPLEX_FLOAT: + case SCC_AST_BUILTIN_TYPE_COMPLEX_FLOAT: return "float _Complex"; - case TYPE_COMPLEX_DOUBLE: + case SCC_AST_BUILTIN_TYPE_COMPLEX_DOUBLE: return "double _Complex"; - case TYPE_COMPLEX_LONG_DOUBLE: + case SCC_AST_BUILTIN_TYPE_COMPLEX_LONG_DOUBLE: return "long double _Complex"; default: return ""; @@ -259,7 +259,9 @@ static inline void start_node_dump(scc_ast_node_t *node, } // 通用的结束节点打印函数 -static inline void end_node_dump(scc_tree_dump_ctx_t *ctx) { scc_printf("\n"); } +static inline void end_node_dump(scc_tree_dump_ctx_t *ctx) { + scc_tree_dump_printf(ctx, "\n"); +} // 通用的递归转储辅助函数 static inline void dump_child_node(scc_ast_node_t *child, @@ -276,10 +278,11 @@ static inline void dump_child_node(scc_ast_node_t *child, #define BUILD_TYPE_NAME(ctx, prefix, name) \ do { \ if (ctx->use_color) { \ - scc_printf("%s'%s%s%s'%s", ctx->value_color, prefix, name, \ - ctx->reset_color, ctx->reset_color); \ + scc_tree_dump_printf(ctx, "%s'%s%s%s'%s", ctx->value_color, \ + prefix, name, ctx->reset_color, \ + ctx->reset_color); \ } else { \ - scc_printf("'%s%s'", prefix, name); \ + scc_tree_dump_printf(ctx, "'%s%s'", prefix, name); \ } \ } while (0) @@ -293,18 +296,18 @@ static void dump_type_impl(scc_ast_type_t *type, scc_tree_dump_ctx_t *ctx) { // 根据类型输出特定信息 switch (type->base.type) { case SCC_AST_TYPE_BUILTIN: - PRINT_QUOTED_VALUE(ctx, get_builtin_type_str(type->builtin.builtin)); + PRINT_QUOTED_VALUE(ctx, get_builtin_type_str(type->builtin.type)); break; case SCC_AST_TYPE_POINTER: if (type->pointer.pointee && type->pointer.pointee->base.type == SCC_AST_TYPE_BUILTIN) { const char *base_type = - get_builtin_type_str(type->pointer.pointee->builtin.builtin); + get_builtin_type_str(type->pointer.pointee->builtin.type); if (ctx->use_color) { - scc_printf("%s'%s *'%s", ctx->value_color, base_type, - ctx->reset_color); + scc_tree_dump_printf(ctx, "%s'%s *'%s", ctx->value_color, + base_type, ctx->reset_color); } else { - scc_printf("'%s *'", base_type); + scc_tree_dump_printf(ctx, "'%s *'", base_type); } } else { PRINT_QUOTED_VALUE(ctx, "pointer"); @@ -315,7 +318,7 @@ static void dump_type_impl(scc_ast_type_t *type, scc_tree_dump_ctx_t *ctx) { break; case SCC_AST_TYPE_FUNCTION: PRINT_QUOTED_VALUE(ctx, "function"); - scc_printf("\n"); + scc_tree_dump_printf(ctx, "\n"); if (type->function.return_type) { dump_type_impl(type->function.return_type, ctx); } @@ -451,7 +454,7 @@ static void dump_expr_impl(scc_ast_expr_t *expr, scc_tree_dump_ctx_t *ctx) { // 打印成员访问信息 scc_tree_print_indent(ctx); PRINT_NODE(ctx, "Member [\"%s\"]", expr->member.member_name); - scc_printf("\n"); + scc_tree_dump_printf(ctx, "\n"); break; case SCC_AST_EXPR_CAST: diff --git a/libs/ast/src/scc_ast.c b/libs/ast/src/scc_ast.c new file mode 100644 index 0000000..e69de29 diff --git a/libs/lexer/include/scc_lexer_token.h b/libs/lexer/include/scc_lexer_token.h index 456c968..21474e0 100644 --- a/libs/lexer/include/scc_lexer_token.h +++ b/libs/lexer/include/scc_lexer_token.h @@ -146,8 +146,8 @@ typedef enum scc_cstd { // END /* clang-format on */ -typedef enum scc_tok_type { /* clang-format off */ +typedef enum scc_tok_type { // must first becase the unknown token must be 0 #define X(str, subtype, tok) tok, SCC_CTOK_TABLE @@ -160,8 +160,8 @@ typedef enum scc_tok_type { #define X(name, subtype, tok, std) tok, SCC_CKEYWORD_TABLE #undef X -/* clang-format on*/ } scc_tok_type_t; +/* clang-format on */ typedef enum scc_tok_subtype { SCC_TOK_SUBTYPE_INVALID, // 错误占位 @@ -212,6 +212,7 @@ static inline scc_lexer_tok_t scc_lexer_tok_copy(const scc_lexer_tok_t *src) { // 移动 token(源 token 不再拥有 lexeme) static inline void scc_lexer_tok_move(scc_lexer_tok_t *dst, scc_lexer_tok_t *src) { + Assert(src != null); *dst = *src; src->lexeme.data = null; src->lexeme.size = 0; diff --git a/libs/lexer/src/lexer.c b/libs/lexer/src/lexer.c index 3cbd230..af0e509 100644 --- a/libs/lexer/src/lexer.c +++ b/libs/lexer/src/lexer.c @@ -474,8 +474,10 @@ void scc_lexer_get_valid_token(scc_lexer_t *lexer, scc_lexer_tok_t *token) { if (subtype == SCC_TOK_SUBTYPE_EMPTYSPACE || subtype == SCC_TOK_SUBTYPE_COMMENT) { scc_lexer_tok_drop(token); + continue; + } else { + break; } - break; }; } diff --git a/libs/parser/include/parser.h b/libs/parser/include/parser.h deleted file mode 100644 index 996bde4..0000000 --- a/libs/parser/include/parser.h +++ /dev/null @@ -1,113 +0,0 @@ -/** - * @file parser.h - */ - -#ifndef __SCC_PARSER_H__ -#define __SCC_PARSER_H__ - -#include "scc_ast.h" -#include - -/** - * @brief 解析器状态 - */ -typedef struct scc_parser { - scc_lexer_stream_t *lex_stream; // 词法分析器 - scc_sema_callbacks_t sema_callbacks; // 语义分析回调 - scc_ast_translation_unit_t *translation_unit; // 翻译单元(根节点) - cbool has_error; // 是否有错误 -} scc_parser_t; - -/** - * @brief 检查当前 token 类型 - */ -static inline cbool scc_parse_is(scc_lexer_stream_t *stream, - scc_tok_type_t type) { - const scc_lexer_tok_t *tok = scc_lexer_stream_current(stream); - return tok->type == type; -} - -/** - * @brief 检查前瞻 token 类型 - */ -static inline cbool scc_parse_peek_is(scc_lexer_stream_t *stream, usize n, - scc_tok_type_t type) { - const scc_lexer_tok_t *tok = scc_lexer_stream_peek(stream, n); - return tok->type == type; -} - -/** - * @brief 如果当前 token 匹配则消费 - */ -static inline cbool scc_parse_consume_if(scc_lexer_stream_t *stream, - scc_tok_type_t type) { - if (scc_parse_is(stream, type)) { - scc_lexer_stream_consume(stream); - return true; - } - return false; -} - -/** - * @brief 消费当前 token 并返回它 - */ -static inline const scc_lexer_tok_t * -scc_parse_consume(scc_lexer_stream_t *stream) { - const scc_lexer_tok_t *tok = scc_lexer_stream_current(stream); - scc_lexer_stream_consume(stream); - return tok; -} - -/** - * @brief 初始化解析器 - * @param parser 解析器实例 - * @param lexer 词法分析器实例 - * @param callbacks 语义分析回调(可为 null) - */ -void scc_parser_init(scc_parser_t *parser, scc_lexer_stream_t *lexer, - scc_sema_callbacks_t *callbacks); - -/** - * @brief 销毁解析器 - * @param parser 解析器实例 - */ -void scc_parser_drop(scc_parser_t *parser); - -/** - * @brief 解析整个翻译单元 - * @param parser 解析器实例 - * @return 翻译单元 AST 节点 - */ -scc_ast_translation_unit_t *scc_parse_translation_unit(scc_parser_t *parser); - -/** - * @brief 解析声明 - * @param parser 解析器实例 - * @return 声明 AST 节点 - */ -scc_ast_decl_t *scc_parse_declaration(scc_parser_t *parser); - -/** - * @brief 解析语句 - * @param parser 解析器实例 - * @return 语句 AST 节点 - */ -scc_ast_stmt_t *scc_parse_statement(scc_parser_t *parser); - -/** - * @brief 解析表达式 - * @param parser 解析器实例 - * @return 表达式 AST 节点 - */ -scc_ast_expr_t *scc_parse_expression(scc_parser_t *parser); - -/** - * @brief 解析类型 - * @param parser 解析器实例 - * @return 类型 AST 节点 - */ -scc_ast_type_t *scc_parse_type(scc_parser_t *parser); - -cbool scc_parse_is_declaration_start(scc_parser_t *parser, usize offset); - -#endif /* __SCC_PARSER_H__ */ diff --git a/libs/parser/include/parser_utils.h b/libs/parser/include/parser_utils.h new file mode 100644 index 0000000..c845631 --- /dev/null +++ b/libs/parser/include/parser_utils.h @@ -0,0 +1,69 @@ +#ifndef __SCC_PARSER_UTILS_H__ +#define __SCC_PARSER_UTILS_H__ + +#include "scc_parser.h" + +static inline const scc_lexer_tok_t *scc_parser_peek(scc_parser_t *parser) { + cbool ok = false; + const scc_lexer_tok_t *tok = null; + scc_ring_unsafe_peek_ref(*parser->ring, tok, ok); + if (ok == false) { + return null; + } + return tok; +} + +static inline const scc_lexer_tok_t *scc_parser_next(scc_parser_t *parser) { + cbool ok = false; + const scc_lexer_tok_t *tok = null; + scc_ring_unsafe_next_ref(*parser->ring, tok, ok); + if (ok == false) { + return null; + } + return tok; +} + +static inline cbool scc_parser_consume_if(scc_parser_t *parser, + scc_tok_type_t type) { + cbool ok = false; + scc_lexer_tok_t *tok = null; + scc_ring_unsafe_peek_ref(*parser->ring, tok, ok); + if (ok == false) { + return null; + } + if (tok->type == type) { + scc_lexer_tok_drop(tok); + scc_ring_unsafe_pure_next_consume(*parser->ring); + return true; + } else { + return false; + } +} + +static inline void scc_parser_store(scc_parser_t *parser) { + parser->checkpoint = _scc_ring_probe(*parser->ring); +} + +static inline void scc_parser_restore(scc_parser_t *parser) { + _scc_ring_probe(*parser->ring) = parser->checkpoint; +} + +static inline cbool scc_parser_next_consume(scc_parser_t *parser, + scc_lexer_tok_t *tok) { + cbool ok = false; + scc_lexer_tok_t *raw_tok_ref = null; + scc_ring_unsafe_next_ref_consume(*parser->ring, raw_tok_ref, ok); + scc_lexer_tok_move(tok, raw_tok_ref); + return ok; +} + +static inline void scc_parser_commit(scc_parser_t *parser) { + // Memory leak + scc_ring_consume(*parser->ring); +} + +static inline void scc_parser_reset(scc_parser_t *parser) { + scc_ring_reset(*parser->ring); +} + +#endif /* __SCC_PARSER_UTILS_H__ */ diff --git a/libs/parser/include/scc_parser.h b/libs/parser/include/scc_parser.h new file mode 100644 index 0000000..28b37e3 --- /dev/null +++ b/libs/parser/include/scc_parser.h @@ -0,0 +1,75 @@ +#ifndef __SCC_PARSER_H__ +#define __SCC_PARSER_H__ + +#include "scc_sema.h" +#include +#include +#include + +/** + * @brief 解析器状态 + */ +typedef struct scc_parser { + scc_lexer_tok_ring_t *ring; + usize checkpoint; + + scc_sema_callbacks_t sema_callbacks; + scc_ast_translation_unit_t *translation_unit; + int errcode; +} scc_parser_t; + +/** + * @brief 初始化解析器 + * @param parser 解析器实例 + * @param lexer 词法分析器实例 + * @param callbacks 语义分析回调(可为 null) + */ +void scc_parser_init(scc_parser_t *parser, scc_lexer_tok_ring_t *tok_ring, + scc_sema_callbacks_t *callbacks); + +/** + * @brief 销毁解析器 + * @param parser 解析器实例 + */ +void scc_parser_drop(scc_parser_t *parser); + +/** + * @brief 解析整个翻译单元 + * @param parser 解析器实例 + * @return 翻译单元 AST 节点 + */ +scc_ast_translation_unit_t *scc_parse_translation_unit(scc_parser_t *parser); + +/** + * @brief 解析声明 + * @param parser 解析器实例 + * @return 声明 AST 节点 + */ +scc_ast_decl_t *scc_parse_declaration(scc_parser_t *parser); + +/** + * @brief 解析语句 + * @param parser 解析器实例 + * @return 语句 AST 节点 + */ +scc_ast_stmt_t *scc_parse_statement(scc_parser_t *parser); + +/** + * @brief 解析表达式 + * @param parser 解析器实例 + * @return 表达式 AST 节点 + */ +scc_ast_expr_t *scc_parse_expression(scc_parser_t *parser); + +/** + * @brief 解析类型 + * @param parser 解析器实例 + * @return 类型 AST 节点 + */ +scc_ast_type_t *scc_parse_type(scc_parser_t *parser); + +static inline scc_ast_type_t *scc_parse_type_name(scc_parser_t *parser) { + return null; // TODO +} + +#endif /* __SCC_PARSER_H__ */ diff --git a/libs/parser/include/scc_sema.h b/libs/parser/include/scc_sema.h new file mode 100644 index 0000000..6aa83d2 --- /dev/null +++ b/libs/parser/include/scc_sema.h @@ -0,0 +1,25 @@ +#ifndef __SCC_SEMA_H__ +#define __SCC_SEMA_H__ + +#include + +/** + * @brief 语义分析回调函数类型 + */ +typedef void (*scc_sema_callback_t)(void *context, + scc_ast_node_type_t node_type, void *node); + +/** + * @brief 语义分析回调集合 + */ +typedef struct scc_sema_callbacks { + scc_sema_callback_t on_decl; + scc_sema_callback_t on_stmt; + scc_sema_callback_t on_expr; + scc_sema_callback_t on_type; + void *context; +} scc_sema_callbacks_t; + +void scc_sema_init(scc_sema_callbacks_t *callbacks); + +#endif /* __SCC_SEMA_H__ */ diff --git a/libs/parser/include/sema_symtab.h b/libs/parser/include/sema_symtab.h new file mode 100644 index 0000000..3698911 --- /dev/null +++ b/libs/parser/include/sema_symtab.h @@ -0,0 +1,28 @@ +#ifndef __SCC_SEMA_SYMTAB_H__ +#define __SCC_SEMA_SYMTAB_H__ + +#include +#include + +typedef struct scc_parser_scope { + scc_hashtable_t symbols; + struct scc_parser_scope *parent; +} scc_sema_scope_t; + +typedef struct { + scc_sema_scope_t root_scope; + scc_sema_scope_t *current_scope; +} scc_sema_symtab_t; + +void scc_sema_symtab_init(scc_sema_symtab_t *symtab); +void scc_sema_symtab_drop(scc_sema_symtab_t *symtab); + +void scc_sema_symtab_enter_scope(scc_sema_symtab_t *symtab); +void scc_sema_symtab_leave_scope(scc_sema_symtab_t *symtab); +scc_ast_node_t *scc_sema_symtab_add_symbol(scc_sema_symtab_t *symtab, + const char *name, + scc_ast_node_t *ast_node_ref); +scc_ast_node_t *scc_sema_symtab_lookup_symbol(scc_sema_symtab_t *symtab, + const char *name); + +#endif /* __SCC_SEMA_SYMTAB_H__ */ diff --git a/libs/parser/src/parse_decl.c b/libs/parser/src/parse_decl.c index 20b8805..17e8182 100644 --- a/libs/parser/src/parse_decl.c +++ b/libs/parser/src/parse_decl.c @@ -1,156 +1,159 @@ -#include - /* A.2.2 Declarations - (6.7) declaration: - declaration-specifiers init-declarator-list(opt) ; - (6.7) declaration-specifiers: - storage-class-specifier declaration-specifiers(opt) - type-specifier declaration-specifiers(opt) - type-qualifier declaration-specifiers(opt) - function-specifier declaration-specifiers(opt) - (6.7) init-declarator-list: - init-declarator - init-declarator-list , init-declarator - (6.7) init-declarator: - declarator - declarator = initializer - (6.7.1) storage-class-specifier: - typedef - extern - static - auto - register - (6.7.2) type-specifier: - void - char - short - int - long - float - double - signed - unsigned - _Bool - _Complex - struct-or-union-specifier - enum-specifier - typedef-name - (6.7.2.1) struct-or-union-specifier: - struct-or-union identifier(opt) { struct-declaration-list } - struct-or-union identifier - (6.7.2.1) struct-or-union: - struct - union - (6.7.2.1) struct-declaration-list: - struct-declaration - struct-declaration-list struct-declaration - (6.7.2.1) struct-declaration: - specifier-qualifier-list struct-declarator-list ; - (6.7.2.1) specifier-qualifier-list: - type-specifier specifier-qualifier-list(opt) - type-qualifier specifier-qualifier-list(opt) - (6.7.2.1) struct-declarator-list: - struct-declarator - struct-declarator-list , struct-declarator - (6.7.2.1) struct-declarator: - declarator - declarator(opt) : constant-expression - (6.7.2.2) enum-specifier: - enum identifier(opt) { enumerator-list } - enum identifier(opt) { enumerator-list ,} - enum identifier - (6.7.2.2) enumerator-list: - enumerator - enumerator-list , enumerator - (6.7.2.2) enumerator: - enumeration-constant - enumeration-constant = constant-expression - (6.7.3) type-qualifier: - const - restrict - volatile - (6.7.4) function-specifier: - inline - (6.7.5) declarator: - pointer(opt) direct-declarator - (6.7.5) direct-declarator: - identifier - ( declarator ) - direct-declarator [ type-qualifier-list(opt) - assignment-expression(opt) ] - direct-declarator [ static type-qualifier-list(opt) - assignment-expression ] - direct-declarator [ type-qualifier-list static - assignment-expression ] - direct-declarator [ type-qualifier-list(opt) *] - direct-declarator ( parameter-type-list ) - direct-declarator ( identifier-list(opt) ) - (6.7.5) pointer: - * type-qualifier-list(opt) - * type-qualifier-list(opt) pointer - (6.7.5) type-qualifier-list: - type-qualifier - type-qualifier-list type-qualifier - (6.7.5) parameter-type-list: - parameter-list - parameter-list , ... - (6.7.5) parameter-list: - parameter-declaration - parameter-list , parameter-declaration - (6.7.5) parameter-declaration: - declaration-specifiers declarator - declaration-specifiers abstract-declarator(opt) - (6.7.5) identifier-list: - identifier - identifier-list , identifier - (6.7.6) type-name: - specifier-qualifier-list abstract-declarator(opt) - (6.7.6) abstract-declarator: - pointer - pointer(opt) direct-abstract-declarator - (6.7.6) direct-abstract-declarator: - ( abstract-declarator ) - direct-abstract-declarator(opt) [ type-qualifier-list (opt) - assignment-expression(opt) ] - direct-abstract-declarator(opt) [static type-qualifier-list(opt) - assignment-expression ] - direct-abstract-declaratoropt [ type-qualifier-list static - assignment-expression ] - direct-abstract-declarator(opt) [ * ] - direct-abstract-declarator(opt) ( parameter-type-list(opt) ) - (6.7.7) typedef-name: - identifier - (6.7.8) initializer: - assignment-expression - { initializer-list } - { initializer-list , } - (6.7.8) initializer-list: - designation(opt) initializer - initializer-list , designation(opt) initializer - (6.7.8) designation: - designator-list = - (6.7.8) designator-list: - designator - designator-list designator - (6.7.8) designator: - [ constant-expression ] - . identifier + +(6.7) declaration: + declaration-specifiers init-declarator-list(opt) ; +(6.7) declaration-specifiers: + storage-class-specifier declaration-specifiers(opt) + type-specifier declaration-specifiers(opt) + type-qualifier declaration-specifiers(opt) + function-specifier declaration-specifiers(opt) +(6.7) init-declarator-list: + init-declarator + init-declarator-list , init-declarator +(6.7) init-declarator: + declarator + declarator = initializer +(6.7.1) storage-class-specifier: + typedef + extern + static + auto + register +(6.7.2) type-specifier: + void + char + short + int + long + float + double + signed + unsigned + _Bool + _Complex + struct-or-union-specifier + enum-specifier + typedef-name +(6.7.2.1) struct-or-union-specifier: + struct-or-union identifier(opt) { struct-declaration-list } + struct-or-union identifier +(6.7.2.1) struct-or-union: + struct + union +(6.7.2.1) struct-declaration-list: + struct-declaration + struct-declaration-list struct-declaration +(6.7.2.1) struct-declaration: + specifier-qualifier-list struct-declarator-list ; +(6.7.2.1) specifier-qualifier-list: + type-specifier specifier-qualifier-list(opt) + type-qualifier specifier-qualifier-list(opt) +(6.7.2.1) struct-declarator-list: + struct-declarator + struct-declarator-list , struct-declarator +(6.7.2.1) struct-declarator: + declarator + declarator(opt) : constant-expression +(6.7.2.2) enum-specifier: + enum identifier(opt) { enumerator-list } + enum identifier(opt) { enumerator-list ,} + enum identifier +(6.7.2.2) enumerator-list: + enumerator + enumerator-list , enumerator +(6.7.2.2) enumerator: + enumeration-constant + enumeration-constant = constant-expression +(6.7.3) type-qualifier: + const + restrict + volatile +(6.7.4) function-specifier: + inline +(6.7.5) declarator: + pointer(opt) direct-declarator +(6.7.5) direct-declarator: + identifier + ( declarator ) + direct-declarator [ type-qualifier-list(opt) + assignment-expression(opt) ] + direct-declarator [ static type-qualifier-list(opt) + assignment-expression ] + direct-declarator [ type-qualifier-list static + assignment-expression ] + direct-declarator [ type-qualifier-list(opt) *] + direct-declarator ( parameter-type-list ) + direct-declarator ( identifier-list(opt) ) +(6.7.5) pointer: + * type-qualifier-list(opt) + * type-qualifier-list(opt) pointer +(6.7.5) type-qualifier-list: + type-qualifier + type-qualifier-list type-qualifier +(6.7.5) parameter-type-list: + parameter-list + parameter-list , ... +(6.7.5) parameter-list: + parameter-declaration + parameter-list , parameter-declaration +(6.7.5) parameter-declaration: + declaration-specifiers declarator + declaration-specifiers abstract-declarator(opt) +(6.7.5) identifier-list: + identifier + identifier-list , identifier +(6.7.6) type-name: + specifier-qualifier-list abstract-declarator(opt) +(6.7.6) abstract-declarator: + pointer + pointer(opt) direct-abstract-declarator +(6.7.6) direct-abstract-declarator: + ( abstract-declarator ) + direct-abstract-declarator(opt) [ type-qualifier-list (opt) + assignment-expression(opt) ] + direct-abstract-declarator(opt) [static type-qualifier-list(opt) + assignment-expression ] + direct-abstract-declaratoropt [ type-qualifier-list static + assignment-expression ] + direct-abstract-declarator(opt) [ * ] + direct-abstract-declarator(opt) ( parameter-type-list(opt) ) +(6.7.7) typedef-name: + identifier +(6.7.8) initializer: + assignment-expression + { initializer-list } + { initializer-list , } +(6.7.8) initializer-list: + designation(opt) initializer + initializer-list , designation(opt) initializer +(6.7.8) designation: + designator-list = +(6.7.8) designator-list: + designator + designator-list designator +(6.7.8) designator: + [ constant-expression ] + . identifier + A.2.4 External definitions - (6.9) translation-unit: - external-declaration - translation-unit external-declaration - (6.9) external-declaration: - function-definition - declaration - (6.9.1) function-definition: - declaration-specifiers declarator declaration-list(opt) - compound-statement - (6.9.1) declaration-list: - declaration - declaration-list declaration + +(6.9) translation-unit: + external-declaration + translation-unit external-declaration +(6.9) external-declaration: + function-definition + declaration +(6.9.1) function-definition: + declaration-specifiers declarator declaration-listopt compound-statement +(6.9.1) declaration-list: + declaration + declaration-list declaration */ +#include +#include + scc_ast_decl_t *scc_parse_declaration(scc_parser_t *parser) { /** * ISO/IEC 9899:TC3 @@ -171,22 +174,36 @@ scc_ast_decl_t *scc_parse_declaration(scc_parser_t *parser) { * declarator * declarator = initializer */ - if (!scc_parse_is_declaration_start(parser, 0)) { + cbool ok; + const scc_lexer_tok_t *tok_ptr = scc_parser_next(parser); + scc_lexer_tok_t tok; + if (tok_ptr == null) { return null; } - scc_ast_type_t *type = scc_parse_type(parser); + + scc_ast_type_t *type = scc_malloc(sizeof(scc_ast_type_t)); if (type == null) { - LOG_ERROR("Failed to parse type"); + LOG_FATAL("out of memory"); return null; } - const scc_lexer_tok_t *tok = scc_lexer_stream_current(parser->lex_stream); - if (!scc_lexer_tok_match(tok, SCC_TOK_IDENT)) { - LOG_ERROR("Expected identifier, got %s", scc_get_tok_name(tok->type)); + if (tok_ptr->type != SCC_TOK_INT) { + // TODO back it + scc_parser_reset(parser); + return null; + } else { + type->base.type = SCC_AST_TYPE_BUILTIN; + type->base.loc = tok_ptr->loc; + type->builtin.type = SCC_AST_BUILTIN_TYPE_INT; + type->builtin.quals = (scc_ast_decl_specifier_t){0}; + } + + scc_parser_commit(parser); + ok = scc_parser_next_consume(parser, &tok); + if (ok == false) { return null; } - scc_lexer_stream_consume(parser->lex_stream); scc_ast_decl_t *decl = scc_malloc(sizeof(scc_ast_decl_t)); /* @@ -202,27 +219,28 @@ scc_ast_decl_t *scc_parse_declaration(scc_parser_t *parser) { direct-declarator ( parameter-type-list ) direct-declarator ( identifier-listopt ) */ - if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_L_PAREN)) { + if (!scc_parser_consume_if(parser, SCC_TOK_L_PAREN)) { // TODO - if (scc_parse_consume_if(parser->lex_stream, SCC_TOK_SEMICOLON)) { + if (scc_parser_consume_if(parser, SCC_TOK_SEMICOLON)) { decl->base.type = SCC_AST_DECL_VAR; decl->var.type = type; - decl->var.name = tok->value.cstr.data; + decl->var.name = scc_cstring_as_cstr(&tok.lexeme); decl->var.init = null; - return decl; - } else if (scc_parse_consume_if(parser->lex_stream, SCC_TOK_ASSIGN)) { + goto RETURN; + } else if (scc_parser_consume_if(parser, SCC_TOK_ASSIGN)) { decl->base.type = SCC_AST_DECL_VAR; decl->var.type = type; - decl->var.name = tok->value.cstr.data; - decl->var.init = scc_parse_expression(parser); - return decl; + decl->var.name = scc_cstring_as_cstr(&tok.lexeme); + decl->var.init = null; // scc_parse_expression(parser); + goto RETURN; } + // TODO return null; } // function decl decl->base.type = SCC_AST_DECL_FUNC; - decl->func.name = tok->value.cstr.data; + decl->func.name = scc_cstring_as_cstr(&tok.lexeme); decl->func.type = scc_malloc(sizeof(scc_ast_type_t)); decl->func.type->base.type = SCC_AST_TYPE_FUNCTION; scc_vec_init(decl->func.type->function.param_types); @@ -231,20 +249,32 @@ scc_ast_decl_t *scc_parse_declaration(scc_parser_t *parser) { decl->func.type->function.is_variadic = false; // TODO param type - scc_parse_consume_if(parser->lex_stream, SCC_TOK_VOID); + scc_parser_consume_if(parser, SCC_TOK_VOID); - if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_R_PAREN)) { + if (!scc_parser_consume_if(parser, SCC_TOK_R_PAREN)) { return null; } - if (!scc_parse_is(parser->lex_stream, SCC_TOK_L_BRACE)) { + tok_ptr = scc_parser_peek(parser); + if (tok_ptr == null) { return null; } + if (tok_ptr->type != SCC_TOK_L_BRACE) { + if (tok_ptr->type == SCC_TOK_SEMICOLON) { + decl->func.body = null; + } else { + return null; + } + } decl->func.body = scc_parse_statement(parser); Assert(decl->func.type != null); Assert(decl->func.type->base.type == SCC_AST_TYPE_FUNCTION); Assert(decl->func.body != null); Assert(decl->func.body->base.type == SCC_AST_STMT_COMPOUND); + +RETURN: + parser->sema_callbacks.on_decl(parser->sema_callbacks.context, + decl->base.type, decl); return decl; } diff --git a/libs/parser/src/parse_expr.c b/libs/parser/src/parse_expr.c index 3c325ff..ef44e44 100644 --- a/libs/parser/src/parse_expr.c +++ b/libs/parser/src/parse_expr.c @@ -1,15 +1,3 @@ -/** - * @file parse_expr.c - * @author your name (you@domain.com) - * @brief Pratt Parser表达式解析器 - * @version 0.1 - * @date 2026-01-09 - * - * @copyright Copyright (c) 2026 - * - */ -#include - /* A.2.1 Expressions @@ -107,113 +95,18 @@ A.2.1 Expressions unary-expression assignment-operator assignment-expression (6.5.16) assignment-operator: one of - = *= /= %= +=-= <<= >>= &= ^= |= + = *= /= %= += -= <<= >>= &= ^= |= (6.5.17) expression: assignment-expression expression , assignment-expression (6.6) -constant-expression: - conditional-expression + constant-expression: + conditional-expression */ -/** - * @brief 从token映射到AST操作符 - * @param tok_type 词法token类型 - * @param is_unary 是否为一元操作符上下文 - * @return AST操作符类型 - */ -static scc_ast_expr_op_t scc_ast_token_to_operator(scc_tok_type_t tok_type, - cbool is_unary) { - switch (tok_type) { - /* 赋值操作符 */ - case SCC_TOK_ASSIGN: - return SCC_AST_OP_ASSIGN; - case SCC_TOK_ASSIGN_ADD: - return SCC_AST_OP_ASSIGN_ADD; - case SCC_TOK_ASSIGN_SUB: - return SCC_AST_OP_ASSIGN_SUB; - case SCC_TOK_ASSIGN_MUL: - return SCC_AST_OP_ASSIGN_MUL; - case SCC_TOK_ASSIGN_DIV: - return SCC_AST_OP_ASSIGN_DIV; - case SCC_TOK_ASSIGN_MOD: - return SCC_AST_OP_ASSIGN_MOD; - case SCC_TOK_ASSIGN_AND: - return SCC_AST_OP_ASSIGN_AND; - case SCC_TOK_ASSIGN_XOR: - return SCC_AST_OP_ASSIGN_XOR; - case SCC_TOK_ASSIGN_OR: - return SCC_AST_OP_ASSIGN_OR; - case SCC_TOK_ASSIGN_L_SH: - return SCC_AST_OP_ASSIGN_LSHIFT; - case SCC_TOK_ASSIGN_R_SH: - return SCC_AST_OP_ASSIGN_RSHIFT; - - /* 逻辑操作符 */ - case SCC_TOK_OR_OR: - return SCC_AST_OP_LOGICAL_OR; - case SCC_TOK_AND_AND: - return SCC_AST_OP_LOGICAL_AND; - - /* 位操作符 */ - case SCC_TOK_OR: - return SCC_AST_OP_BITWISE_OR; - case SCC_TOK_XOR: - return SCC_AST_OP_BITWISE_XOR; - case SCC_TOK_AND: - return is_unary ? SCC_AST_OP_ADDRESS_OF : SCC_AST_OP_BITWISE_AND; - - /* 相等性操作符 */ - case SCC_TOK_EQ: - return SCC_AST_OP_EQUAL; - case SCC_TOK_NEQ: - return SCC_AST_OP_NOT_EQUAL; - - /* 关系操作符 */ - case SCC_TOK_LT: - return SCC_AST_OP_LESS; - case SCC_TOK_GT: - return SCC_AST_OP_GREATER; - case SCC_TOK_LE: - return SCC_AST_OP_LESS_EQUAL; - case SCC_TOK_GE: - return SCC_AST_OP_GREATER_EQUAL; - - /* 移位操作符 */ - case SCC_TOK_L_SH: - return SCC_AST_OP_LEFT_SHIFT; - case SCC_TOK_R_SH: - return SCC_AST_OP_RIGHT_SHIFT; - - /* 算术操作符 */ - case SCC_TOK_ADD: - return is_unary ? SCC_AST_OP_UNARY_PLUS : SCC_AST_OP_ADD; - case SCC_TOK_SUB: - return is_unary ? SCC_AST_OP_UNARY_MINUS : SCC_AST_OP_SUB; - case SCC_TOK_MUL: - return is_unary ? SCC_AST_OP_INDIRECTION : SCC_AST_OP_MUL; - case SCC_TOK_DIV: - return SCC_AST_OP_DIV; - case SCC_TOK_MOD: - return SCC_AST_OP_MOD; - - /* 一元操作符 */ - case SCC_TOK_NOT: - return SCC_AST_OP_LOGICAL_NOT; - case SCC_TOK_BIT_NOT: - return SCC_AST_OP_BITWISE_NOT; - case SCC_TOK_ADD_ADD: - return is_unary ? SCC_AST_OP_PREFIX_INCREMENT - : SCC_AST_OP_POSTFIX_INCREMENT; - case SCC_TOK_SUB_SUB: - return is_unary ? SCC_AST_OP_PREFIX_DECREMENT - : SCC_AST_OP_POSTFIX_DECREMENT; - - default: - return SCC_AST_OP_NONE; - } -} +#include +#include /** * @brief 运算符优先级定义 @@ -239,13 +132,64 @@ typedef enum { PREC_PRIMARY = 17, // 最高优先级 } scc_precedence_t; -/** - * @brief 获取二元运算符优先级 - */ -static scc_precedence_t get_binary_precedence(scc_tok_type_t op) { - switch (op) { +static inline scc_ast_expr_t *expr_create(scc_parser_t *parser, + scc_ast_node_type_t type) { + scc_ast_expr_t *expr = (scc_ast_expr_t *)scc_malloc(sizeof(scc_ast_expr_t)); + Assert(expr != null); + expr->base.type = type; + expr->base.loc = scc_pos_create(); + return expr; +} + +/* ---------------------------- 函数前向声明 ---------------------------- */ +static scc_ast_expr_t *expr_create(scc_parser_t *parser, + scc_ast_node_type_t type); +static scc_ast_expr_t *parse_expression_with_precedence(scc_parser_t *parser, + int min_prec); + +// 通用二元解析器(用于左结合各层) +typedef scc_ast_expr_t *(*parse_sub_expr_func)(scc_parser_t *parser); +static scc_ast_expr_t *parse_binary_expression(scc_parser_t *parser, + parse_sub_expr_func parse_sub, + int this_prec); + +// 各优先级层(除特殊的外,均调用通用解析器) +static scc_ast_expr_t *parse_multiplicative_expression(scc_parser_t *parser); +static scc_ast_expr_t *parse_additive_expression(scc_parser_t *parser); +static scc_ast_expr_t *parse_shift_expression(scc_parser_t *parser); +static scc_ast_expr_t *parse_relational_expression(scc_parser_t *parser); +static scc_ast_expr_t *parse_equality_expression(scc_parser_t *parser); +static scc_ast_expr_t *parse_bitwise_and_expression(scc_parser_t *parser); +static scc_ast_expr_t *parse_bitwise_xor_expression(scc_parser_t *parser); +static scc_ast_expr_t *parse_bitwise_or_expression(scc_parser_t *parser); +static scc_ast_expr_t *parse_logical_and_expression(scc_parser_t *parser); +static scc_ast_expr_t *parse_logical_or_expression(scc_parser_t *parser); + +// 特殊结构:独立解析(右结合、条件、一元、后缀、基本) +static scc_ast_expr_t * +parse_assignment_expression(scc_parser_t *parser); // 右结合 +static scc_ast_expr_t * +parse_conditional_expression(scc_parser_t *parser); // 右结合 +static scc_ast_expr_t *parse_cast_expression(scc_parser_t *parser); // 类型转换 +static scc_ast_expr_t * +parse_unary_expression(scc_parser_t *parser); // 一元运算符 +static scc_ast_expr_t * +parse_postfix_expression(scc_parser_t *parser); // 后缀运算符 +static scc_ast_expr_t * +parse_primary_expression(scc_parser_t *parser); // 基本表达式 + +// 特殊结构的内部辅助函数 +static scc_ast_expr_t *parse_sizeof_expression(scc_parser_t *parser); +static scc_ast_expr_t * +parse_paren_expression(scc_parser_t *parser); // 处理括号的三种情况 + +/* ---------------------------- 工具函数 ---------------------------- */ +// 获取 token 的优先级(用于二元运算符) +static int get_token_precedence(scc_tok_type_t type) { + switch (type) { case SCC_TOK_COMMA: return PREC_COMMA; + // 赋值运算符(右结合,但优先级相同,统一处理) case SCC_TOK_ASSIGN: case SCC_TOK_ASSIGN_ADD: case SCC_TOK_ASSIGN_SUB: @@ -253,8 +197,8 @@ static scc_precedence_t get_binary_precedence(scc_tok_type_t op) { case SCC_TOK_ASSIGN_DIV: case SCC_TOK_ASSIGN_MOD: case SCC_TOK_ASSIGN_AND: - case SCC_TOK_ASSIGN_XOR: case SCC_TOK_ASSIGN_OR: + case SCC_TOK_ASSIGN_XOR: case SCC_TOK_ASSIGN_L_SH: case SCC_TOK_ASSIGN_R_SH: return PREC_ASSIGNMENT; @@ -293,626 +237,743 @@ static scc_precedence_t get_binary_precedence(scc_tok_type_t op) { } } -/** - * @brief 检查是否是赋值运算符 - */ -static cbool is_assignment_operator(scc_tok_type_t op) { - switch (op) { - case SCC_TOK_ASSIGN: - case SCC_TOK_ASSIGN_ADD: - case SCC_TOK_ASSIGN_SUB: - case SCC_TOK_ASSIGN_MUL: - case SCC_TOK_ASSIGN_DIV: - case SCC_TOK_ASSIGN_MOD: - case SCC_TOK_ASSIGN_AND: - case SCC_TOK_ASSIGN_XOR: - case SCC_TOK_ASSIGN_OR: - case SCC_TOK_ASSIGN_L_SH: - case SCC_TOK_ASSIGN_R_SH: - return true; +// 判断 token 是否为二元运算符 +static cbool is_binary_operator(scc_tok_type_t type) { + return get_token_precedence(type) > PREC_NONE; +} + +// 将 token 类型映射为二元操作符(用于 AST) +static scc_ast_expr_op_t map_token_to_binary_op(scc_tok_type_t type) { + switch (type) { + case SCC_TOK_ADD: + return SCC_AST_OP_ADD; + case SCC_TOK_SUB: + return SCC_AST_OP_SUB; + case SCC_TOK_MUL: + return SCC_AST_OP_MUL; + case SCC_TOK_DIV: + return SCC_AST_OP_DIV; + case SCC_TOK_MOD: + return SCC_AST_OP_MOD; + case SCC_TOK_L_SH: + return SCC_AST_OP_LEFT_SHIFT; + case SCC_TOK_R_SH: + return SCC_AST_OP_RIGHT_SHIFT; + case SCC_TOK_LT: + return SCC_AST_OP_LESS; + case SCC_TOK_GT: + return SCC_AST_OP_GREATER; + case SCC_TOK_LE: + return SCC_AST_OP_LESS_EQUAL; + case SCC_TOK_GE: + return SCC_AST_OP_GREATER_EQUAL; + case SCC_TOK_EQ: + return SCC_AST_OP_EQUAL; + case SCC_TOK_NEQ: + return SCC_AST_OP_NOT_EQUAL; + case SCC_TOK_AND: + return SCC_AST_OP_BITWISE_AND; + case SCC_TOK_XOR: + return SCC_AST_OP_BITWISE_XOR; + case SCC_TOK_OR: + return SCC_AST_OP_BITWISE_OR; + case SCC_TOK_AND_AND: + return SCC_AST_OP_LOGICAL_AND; + case SCC_TOK_OR_OR: + return SCC_AST_OP_LOGICAL_OR; + case SCC_TOK_COMMA: + return SCC_AST_OP_COMMA; default: - return false; + return SCC_AST_OP_NONE; } } -/** - * @brief 检查是否是二元运算符 - */ -static cbool is_binary_operator(scc_tok_type_t op) { - return get_binary_precedence(op) != PREC_NONE; +// 将 token 类型映射为一元操作符 +static scc_ast_expr_op_t map_token_to_unary_op(scc_tok_type_t type, + cbool is_prefix) { + if (is_prefix) { + switch (type) { + case SCC_TOK_ADD: + return SCC_AST_OP_UNARY_PLUS; + case SCC_TOK_SUB: + return SCC_AST_OP_UNARY_MINUS; + case SCC_TOK_AND: + return SCC_AST_OP_ADDRESS_OF; + case SCC_TOK_MUL: + return SCC_AST_OP_INDIRECTION; + case SCC_TOK_BIT_NOT: + return SCC_AST_OP_BITWISE_NOT; + case SCC_TOK_NOT: + return SCC_AST_OP_LOGICAL_NOT; + case SCC_TOK_ADD_ADD: + return SCC_AST_OP_PREFIX_INCREMENT; + case SCC_TOK_SUB_SUB: + return SCC_AST_OP_PREFIX_DECREMENT; + default: + return SCC_AST_OP_NONE; + } + } else { + switch (type) { + case SCC_TOK_ADD_ADD: + return SCC_AST_OP_POSTFIX_INCREMENT; + case SCC_TOK_SUB_SUB: + return SCC_AST_OP_POSTFIX_DECREMENT; + default: + return SCC_AST_OP_NONE; + } + } } -static inline scc_ast_expr_t *expr_create(scc_parser_t *parser, - scc_ast_node_type_t type) { - scc_ast_expr_t *expr = (scc_ast_expr_t *)scc_malloc(sizeof(scc_ast_expr_t)); - Assert(expr != null); - expr->base.type = type; - expr->base.loc = scc_pos_create(); +// 将 token 类型映射为赋值操作符 +static scc_ast_expr_op_t map_token_to_assign_op(scc_tok_type_t type) { + switch (type) { + case SCC_TOK_ASSIGN: + return SCC_AST_OP_ASSIGN; + case SCC_TOK_ASSIGN_ADD: + return SCC_AST_OP_ASSIGN_ADD; + case SCC_TOK_ASSIGN_SUB: + return SCC_AST_OP_ASSIGN_SUB; + case SCC_TOK_ASSIGN_MUL: + return SCC_AST_OP_ASSIGN_MUL; + case SCC_TOK_ASSIGN_DIV: + return SCC_AST_OP_ASSIGN_DIV; + case SCC_TOK_ASSIGN_MOD: + return SCC_AST_OP_ASSIGN_MOD; + case SCC_TOK_ASSIGN_AND: + return SCC_AST_OP_ASSIGN_AND; + case SCC_TOK_ASSIGN_OR: + return SCC_AST_OP_ASSIGN_OR; + case SCC_TOK_ASSIGN_XOR: + return SCC_AST_OP_ASSIGN_XOR; + case SCC_TOK_ASSIGN_L_SH: + return SCC_AST_OP_ASSIGN_LSHIFT; + case SCC_TOK_ASSIGN_R_SH: + return SCC_AST_OP_ASSIGN_RSHIFT; + default: + return SCC_AST_OP_NONE; + } +} + +/* ---------------------------- 错误恢复辅助 ---------------------------- */ +// 跳过直到遇到同步 token(分号、右括号、逗号、EOF) +static void parser_sync(scc_parser_t *parser) { + const scc_lexer_tok_t *tok; + while ((tok = scc_parser_peek(parser)) != null) { + scc_tok_type_t type = tok->type; + if (type == SCC_TOK_SEMICOLON || type == SCC_TOK_R_PAREN || + type == SCC_TOK_R_BRACE || type == SCC_TOK_COMMA || + type == SCC_TOK_EOF) { + break; + } + // 消耗并丢弃当前 token + scc_lexer_tok_t discard; + if (scc_parser_next_consume(parser, &discard)) { + scc_lexer_tok_drop(&discard); + } + } +} + +static scc_ast_expr_t *create_binary_expr(scc_parser_t *parser, + scc_ast_expr_t *left, + scc_ast_expr_t *right, + scc_ast_expr_op_t op) { + scc_ast_expr_t *expr = expr_create(parser, SCC_AST_EXPR_BINARY); + if (!expr) + return null; + expr->binary.op = op; + expr->binary.lhs = left; + expr->binary.rhs = right; return expr; } -/** - * @brief 解析基本表达式 - * - * (6.5.1) primary-expression: - * identifier - * constant - * string-literal - * ( expression ) - */ -static scc_ast_expr_t *parse_primary_expression(scc_parser_t *parser) { - const scc_lexer_tok_t *tok = scc_lexer_stream_current(parser->lex_stream); - - switch (tok->type) { - case SCC_TOK_IDENT: { - scc_ast_expr_t *expr = expr_create(parser, SCC_AST_EXPR_IDENTIFIER); - if (!expr) - return null; - expr->identifier.name = tok->value.cstr.data; - scc_lexer_stream_consume(parser->lex_stream); - - // 调用语义回调 - if (parser->sema_callbacks.on_expr) { - parser->sema_callbacks.on_expr(parser->sema_callbacks.context, - expr->base.type, expr); - } - return expr; - } - - case SCC_TOK_INT_LITERAL: - case SCC_TOK_FLOAT_LITERAL: - case SCC_TOK_CHAR_LITERAL: { - scc_ast_expr_t *expr = expr_create(parser, SCC_AST_EXPR_INT_LITERAL); - if (!expr) - return null; - expr->literal.value = tok->value; - scc_lexer_stream_consume(parser->lex_stream); - - if (parser->sema_callbacks.on_expr) { - parser->sema_callbacks.on_expr(parser->sema_callbacks.context, - expr->base.type, expr); - } - return expr; - } - - case SCC_TOK_STRING_LITERAL: { - scc_ast_expr_t *expr = expr_create(parser, SCC_AST_EXPR_STRING_LITERAL); - if (!expr) - return null; - expr->literal.value = tok->value; - scc_lexer_stream_consume(parser->lex_stream); - - if (parser->sema_callbacks.on_expr) { - parser->sema_callbacks.on_expr(parser->sema_callbacks.context, - expr->base.type, expr); - } - return expr; - } - - case SCC_TOK_L_PAREN: { - TODO(); - // // 保存当前位置,用于区分类型转换和括号表达式 - // usize save_pos = parser->lex_stream->curr_pos; - - // // 跳过 '(' - // scc_lexer_stream_consume(parser->lex_stream); - - // // 尝试解析类型转换 - // if (parser_is_type_start(parser)) { - // scc_ast_type_t *type = scc_parse_type_name(parser); - // if (type && parser_consume_if(parser, SCC_TOK_R_PAREN)) { - // // 成功解析类型转换 - // scc_ast_expr_t *cast = expr_create(parser, - // SCC_AST_EXPR_CAST); if (!cast) { - // scc_free(type); - // return null; - // } - // cast->cast.type = type; - // cast->cast.expr = scc_parse_expression(parser, 0); // - // 递归解析 - - // if (parser->sema_callbacks.on_expr) { - // parser->sema_callbacks.on_expr( - // parser->sema_callbacks.context, cast->node_type, - // cast); - // } - // return cast; - // } - // // 解析失败,清理 - // if (type) - // scc_free(type); - // } - - // // 不是类型转换,恢复为括号表达式 - // parser->lex_stream->curr_pos = save_pos; - // scc_lexer_stream_consume(parser->lex_stream); // 跳过 '(' - - // scc_ast_expr_t *expr = scc_parse_expression(parser, 0); - // if (!expr) { - // return null; - // } - - // if (!parser_consume_if(parser, SCC_TOK_R_PAREN)) { - // PARSER_ERROR(parser, "expected ')' after expression"); - // scc_free(expr); - // return null; - // } - - // 括号表达式不需要特殊节点,直接返回内部表达式 - // return expr; - } - - default: - LOG_ERROR("expected primary expression, got %s", - scc_get_tok_name(tok->type)); +static scc_ast_expr_t *create_unary_expr(scc_parser_t *parser, + scc_ast_expr_op_t op, + scc_ast_expr_t *operand) { + scc_ast_expr_t *expr = expr_create(parser, SCC_AST_EXPR_UNARY); + if (!expr) return null; + expr->unary.op = op; + expr->unary.operand = operand; + return expr; +} + +static scc_ast_expr_t *create_conditional_expr(scc_parser_t *parser, + scc_ast_expr_t *cond, + scc_ast_expr_t *then_expr, + scc_ast_expr_t *else_expr) { + scc_ast_expr_t *expr = expr_create(parser, SCC_AST_EXPR_COND); + if (!expr) + return null; + expr->cond.cond = cond; + expr->cond.then_expr = then_expr; + expr->cond.else_expr = else_expr; + return expr; +} + +// 其他创建函数根据需要添加(如 call、subscript、member 等) + +/* ---------------------------- 通用二元解析器 ---------------------------- */ +/** + * 解析左结合的二元表达式 + * @param parser 解析器 + * @param parse_sub 解析下一级(优先级更低)的函数 + * @param this_prec 当前层的优先级 + * @return 表达式节点 + */ +static scc_ast_expr_t *parse_binary_expression(scc_parser_t *parser, + parse_sub_expr_func parse_sub, + int this_prec) { + scc_ast_expr_t *left = parse_sub(parser); + if (!left) { + // 左侧解析失败,尝试同步后返回 null + parser_sync(parser); + return null; + } + + while (1) { + const scc_lexer_tok_t *tok = scc_parser_peek(parser); + if (!tok) + break; + int prec = get_token_precedence(tok->type); + if (prec < this_prec) + break; + // 必须是二元运算符(但赋值和条件除外,它们已在高层处理,这里不会出现) + if (!is_binary_operator(tok->type)) + break; + + // 消费运算符 + scc_lexer_tok_t op_tok; + if (!scc_parser_next_consume(parser, &op_tok)) { + // 消费失败,可能是流结束,退出循环 + break; + } + scc_ast_expr_op_t op = map_token_to_binary_op(op_tok.type); + scc_lexer_tok_drop(&op_tok); + + // 解析右侧(注意左结合:传入 + // this_prec+1,确保优先级相同的不再结合,避免右结合) + scc_ast_expr_t *right = + parse_binary_expression(parser, parse_sub, this_prec + 1); + if (!right) { + // 右侧解析失败,尝试恢复 + parser_sync(parser); + // 释放已构建的 left?这里 left 可能已被使用,简单的做法是向上返回 + // null + return null; + } + + left = create_binary_expr(parser, left, right, op); + if (!left) + return null; + } + return left; +} + +static scc_ast_expr_t *parse_multiplicative_expression(scc_parser_t *parser) { + return parse_binary_expression(parser, parse_cast_expression, + PREC_MULTIPLICATIVE); +} + +static scc_ast_expr_t *parse_additive_expression(scc_parser_t *parser) { + return parse_binary_expression(parser, parse_multiplicative_expression, + PREC_ADDITIVE); +} + +static scc_ast_expr_t *parse_shift_expression(scc_parser_t *parser) { + return parse_binary_expression(parser, parse_additive_expression, + PREC_SHIFT); +} + +static scc_ast_expr_t *parse_relational_expression(scc_parser_t *parser) { + return parse_binary_expression(parser, parse_shift_expression, + PREC_RELATIONAL); +} + +static scc_ast_expr_t *parse_equality_expression(scc_parser_t *parser) { + return parse_binary_expression(parser, parse_relational_expression, + PREC_EQUALITY); +} + +static scc_ast_expr_t *parse_bitwise_and_expression(scc_parser_t *parser) { + return parse_binary_expression(parser, parse_equality_expression, + PREC_BITWISE_AND); +} + +static scc_ast_expr_t *parse_bitwise_xor_expression(scc_parser_t *parser) { + return parse_binary_expression(parser, parse_bitwise_and_expression, + PREC_BITWISE_XOR); +} + +static scc_ast_expr_t *parse_bitwise_or_expression(scc_parser_t *parser) { + return parse_binary_expression(parser, parse_bitwise_xor_expression, + PREC_BITWISE_OR); +} + +static scc_ast_expr_t *parse_logical_and_expression(scc_parser_t *parser) { + return parse_binary_expression(parser, parse_bitwise_or_expression, + PREC_LOGICAL_AND); +} + +static scc_ast_expr_t *parse_logical_or_expression(scc_parser_t *parser) { + return parse_binary_expression(parser, parse_logical_and_expression, + PREC_LOGICAL_OR); +} + +// 赋值表达式(右结合) +static scc_ast_expr_t *parse_assignment_expression(scc_parser_t *parser) { + // 先解析左侧的 unary-expression(C 标准规定赋值左边必须是 + // unary-expression) + scc_ast_expr_t *left = null; + left = parse_conditional_expression(parser); + if (left) + return left; + left = parse_unary_expression(parser); + if (!left) + return null; + + const scc_lexer_tok_t *tok = scc_parser_peek(parser); + if (!tok) + return left; + + int prec = get_token_precedence(tok->type); + if (prec == PREC_ASSIGNMENT && is_binary_operator(tok->type)) { + // 消费赋值运算符 + scc_lexer_tok_t op_tok; + if (!scc_parser_next_consume(parser, &op_tok)) + return left; + scc_ast_expr_op_t op = map_token_to_assign_op(op_tok.type); + scc_lexer_tok_drop(&op_tok); + + // 解析右侧(右结合:继续调用 parse_assignment_expression) + scc_ast_expr_t *right = parse_assignment_expression(parser); + if (!right) { + // 错误恢复 + parser_sync(parser); + return null; + } + + left = create_binary_expr(parser, left, right, op); + } + return left; +} + +// 条件表达式(右结合) +static scc_ast_expr_t *parse_conditional_expression(scc_parser_t *parser) { + scc_ast_expr_t *cond = parse_logical_or_expression(parser); + if (!cond) + return null; + + const scc_lexer_tok_t *tok = scc_parser_peek(parser); + if (tok && tok->type == SCC_TOK_COND) { + // 消耗 '?' + scc_lexer_tok_t q_tok; + if (!scc_parser_next_consume(parser, &q_tok)) + return cond; + scc_lexer_tok_drop(&q_tok); + + // 解析中间表达式(可以是任何表达式,包括逗号) + scc_ast_expr_t *then_expr = scc_parse_expression(parser); + if (!then_expr) { + parser_sync(parser); + return null; + } + + // 消耗 ':' + if (!scc_parser_consume_if(parser, SCC_TOK_COLON)) { + LOG_ERROR("Expected ':' after '?'"); + parser_sync(parser); + return null; + } + + // 解析 else 部分(右结合,再次调用 parse_conditional_expression) + scc_ast_expr_t *else_expr = parse_conditional_expression(parser); + if (!else_expr) { + parser_sync(parser); + return null; + } + + cond = create_conditional_expr(parser, cond, then_expr, else_expr); + } + return cond; +} + +// 类型转换表达式 (type-name) cast-expression +static scc_ast_expr_t *parse_cast_expression(scc_parser_t *parser) { + const scc_lexer_tok_t *tok = scc_parser_peek(parser); + if (tok && tok->type == SCC_TOK_L_PAREN) { + // 尝试解析类型名 + scc_parser_store(parser); + scc_ast_type_t *type = scc_parse_type_name(parser); // 需要外部实现 + if (type) { + // 消耗了类型名后,下一个应该是 ')' + if (scc_parser_consume_if(parser, SCC_TOK_R_PAREN)) { + // 是类型转换,解析后面的 cast-expression(注意 cast-expression + // 可以嵌套) + scc_ast_expr_t *operand = parse_cast_expression(parser); + if (!operand) { + // 释放 type + // TODO: 需要 scc_ast_type_drop(type); + return null; + } + scc_ast_expr_t *expr = expr_create(parser, SCC_AST_EXPR_CAST); + expr->cast.type = type; + expr->cast.expr = operand; + return expr; + } else { + // 不是类型转换,回退 + scc_parser_restore(parser); + // 释放 type(假设 scc_parse_type_name 分配了) + // TODO: scc_ast_type_drop(type); + } + } else { + // 解析类型名失败,回退 + scc_parser_restore(parser); + } + } + // 否则作为一元表达式 + return parse_unary_expression(parser); +} + +// 一元表达式 +static scc_ast_expr_t *parse_unary_expression(scc_parser_t *parser) { + const scc_lexer_tok_t *tok = scc_parser_peek(parser); + if (!tok) + return null; + + // 处理一元运算符 + switch (tok->type) { + case SCC_TOK_ADD_ADD: // ++x + case SCC_TOK_SUB_SUB: // --x + case SCC_TOK_AND: // &x + case SCC_TOK_MUL: // *x + case SCC_TOK_ADD: // +x + case SCC_TOK_SUB: // -x + case SCC_TOK_BIT_NOT: // ~x + case SCC_TOK_NOT: // !x + { + scc_lexer_tok_t op_tok; + if (!scc_parser_next_consume(parser, &op_tok)) + return null; + scc_ast_expr_op_t op = map_token_to_unary_op(op_tok.type, true); + scc_lexer_tok_drop(&op_tok); + + // 一元运算符右结合,递归调用 parse_unary_expression + scc_ast_expr_t *operand = parse_unary_expression(parser); + if (!operand) { + parser_sync(parser); + return null; + } + return create_unary_expr(parser, op, operand); + } + case SCC_TOK_SIZEOF: + return parse_sizeof_expression(parser); + default: + return parse_postfix_expression(parser); } } -/** - * @brief 解析后缀表达式 - * - * (6.5.2) postfix-expression: - * primary-expression - * postfix-expression [ expression ] - * postfix-expression ( argument-expression-list(opt) ) - * postfix-expression . identifier - * postfix-expression -> identifier - * postfix-expression ++ - * postfix-expression -- - * ( type-name ) { initializer-list } - * ( type-name ) { initializer-list , } - */ -static scc_ast_expr_t *parse_postfix_expression(scc_parser_t *parser, - scc_ast_expr_t *lhs) { - scc_ast_expr_t *expr = lhs; +// sizeof 表达式(特殊处理两种形式) +static scc_ast_expr_t *parse_sizeof_expression(scc_parser_t *parser) { + scc_lexer_tok_t tok; + if (!scc_parser_next_consume(parser, &tok) || tok.type != SCC_TOK_SIZEOF) { + return null; + } + scc_lexer_tok_drop(&tok); - while (true) { - const scc_lexer_tok_t *tok = - scc_lexer_stream_current(parser->lex_stream); + const scc_lexer_tok_t *next = scc_parser_peek(parser); + if (!next) { + LOG_ERROR("Unexpected end after sizeof"); + return null; + } + + scc_ast_expr_t *expr = expr_create(parser, SCC_AST_EXPR_SIZE_OF); + + // 尝试解析 sizeof(type-name) + if (next->type == SCC_TOK_L_PAREN) { + scc_parser_store(parser); + scc_ast_type_t *type = scc_parse_type_name(parser); + if (type) { + // 消耗 ')' + if (scc_parser_consume_if(parser, SCC_TOK_R_PAREN)) { + expr->attr_of.type = type; + return expr; + } else { + // 不是有效的 sizeof(type-name),回退 + scc_parser_restore(parser); + // 释放 type + // TODO: scc_ast_type_drop(type); + } + } else { + scc_parser_restore(parser); + } + } + + // 否则作为 sizeof unary-expression + scc_ast_expr_t *operand = parse_unary_expression(parser); + if (!operand) { + scc_free(expr); + parser_sync(parser); + return null; + } + expr->attr_of.expr = operand; + return expr; +} + +// 后缀表达式 +static scc_ast_expr_t *parse_postfix_expression(scc_parser_t *parser) { + scc_ast_expr_t *left = parse_primary_expression(parser); + if (!left) + return null; + + while (1) { + const scc_lexer_tok_t *tok = scc_parser_peek(parser); + if (!tok) + break; switch (tok->type) { - case SCC_TOK_L_BRACKET: { // 数组下标 - scc_lexer_stream_consume(parser->lex_stream); // 跳过 '[' + case SCC_TOK_L_BRACKET: // left[expr] + { + scc_lexer_tok_t lb; + if (!scc_parser_next_consume(parser, &lb)) + return left; + scc_lexer_tok_drop(&lb); + scc_ast_expr_t *index = scc_parse_expression(parser); + if (!index) { + parser_sync(parser); + return null; + } + if (!scc_parser_consume_if(parser, SCC_TOK_R_BRACKET)) { + LOG_ERROR("Expected ']' after subscript"); + parser_sync(parser); + return null; + } scc_ast_expr_t *subscript = expr_create(parser, SCC_AST_EXPR_ARRAY_SUBSCRIPT); - if (!subscript) { - scc_free(expr); - return null; - } - - subscript->subscript.array = expr; - subscript->subscript.index = scc_parse_expression(parser); - - if (!subscript->subscript.index) { - scc_free(subscript); - return null; - } - - if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_R_BRACKET)) { - LOG_ERROR("expected ']' after array index"); - scc_free(subscript); - return null; - } - - expr = subscript; + subscript->subscript.array = left; + subscript->subscript.index = index; + left = subscript; break; } - - case SCC_TOK_L_PAREN: { // 函数调用 - scc_lexer_stream_consume(parser->lex_stream); // 跳过 '(' + case SCC_TOK_L_PAREN: // left(args) + { + scc_lexer_tok_t lp; + if (!scc_parser_next_consume(parser, &lp)) + return left; + scc_lexer_tok_drop(&lp); scc_ast_expr_t *call = expr_create(parser, SCC_AST_EXPR_CALL); - if (!call) { - scc_free(expr); - return null; - } - - call->call.callee = expr; + call->call.callee = left; scc_vec_init(call->call.args); // 解析参数列表 - if (!scc_parse_is(parser->lex_stream, SCC_TOK_R_PAREN)) { - do { - scc_ast_expr_t *arg = scc_parse_expression(parser); + if (!scc_parser_consume_if(parser, SCC_TOK_R_PAREN)) { + while (1) { + scc_ast_expr_t *arg = parse_assignment_expression(parser); if (!arg) { - // 清理已解析的参数 - scc_vec_foreach(call->call.args, i) { - scc_free(scc_vec_at(call->call.args, i)); - } - scc_vec_free(call->call.args); + parser_sync(parser); + // 释放已解析的参数 + // TODO: 释放 call->call.args 中的表达式 scc_free(call); return null; } scc_vec_push(call->call.args, arg); - if (!scc_parse_consume_if(parser->lex_stream, - SCC_TOK_COMMA)) { + if (scc_parser_consume_if(parser, SCC_TOK_COMMA)) { + continue; + } else if (scc_parser_consume_if(parser, SCC_TOK_R_PAREN)) { break; + } else { + LOG_ERROR("Expected ',' or ')' in function call"); + parser_sync(parser); + // 释放资源 + scc_free(call); + return null; } - } while (true); - } - - if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_R_PAREN)) { - LOG_ERROR("expected ')' after argument list"); - // 清理 - scc_vec_foreach(call->call.args, i) { - scc_free(scc_vec_at(call->call.args, i)); } - scc_vec_free(call->call.args); - scc_free(call); - return null; } - - expr = call; + left = call; break; } - - case SCC_TOK_DOT: { // 成员访问 - scc_lexer_stream_consume(parser->lex_stream); // 跳过 '.' - - scc_ast_expr_t *member = expr_create(parser, SCC_AST_EXPR_MEMBER); - if (!member) { - scc_free(expr); + case SCC_TOK_DOT: + case SCC_TOK_DEREF: { + scc_lexer_tok_t op_tok; + if (!scc_parser_next_consume(parser, &op_tok)) + return left; + scc_lexer_tok_t ident_tok; + if (!scc_parser_next_consume(parser, &ident_tok) || + ident_tok.type != SCC_TOK_IDENT) { + LOG_ERROR("Expected identifier after member access"); + scc_lexer_tok_drop(&op_tok); + parser_sync(parser); return null; } + const char *name = scc_cstring_as_cstr(&ident_tok.lexeme); + scc_lexer_tok_drop(&ident_tok); - member->member.base = expr; - - if (!scc_parse_is(parser->lex_stream, SCC_TOK_IDENT)) { - LOG_ERROR("expected identifier after '.'"); - scc_free(member); - return null; + scc_ast_expr_t *member = expr_create( + parser, op_tok.type == SCC_TOK_DOT ? SCC_AST_EXPR_MEMBER + : SCC_AST_EXPR_PTR_MEMBER); + if (op_tok.type == SCC_TOK_DOT) { + member->member.base = left; + member->member.member_name = name; + } else { + member->ptr_member.base = left; + member->ptr_member.member_name = name; } - - member->member.member_name = tok->value.cstr.data; - scc_lexer_stream_consume(parser->lex_stream); // 跳过标识符 - - expr = member; + scc_lexer_tok_drop(&op_tok); + left = member; break; } - - case SCC_TOK_DEREF: { // 指针成员访问 -> - scc_lexer_stream_consume(parser->lex_stream); // 跳过 '->' - - scc_ast_expr_t *ptr_member = - expr_create(parser, SCC_AST_EXPR_PTR_MEMBER); - if (!ptr_member) { - scc_free(expr); - return null; - } - - ptr_member->ptr_member.base = expr; - - if (!scc_parse_is(parser->lex_stream, SCC_TOK_IDENT)) { - LOG_ERROR("expected identifier after '->'"); - scc_free(ptr_member); - return null; - } - - ptr_member->ptr_member.member_name = tok->value.cstr.data; - scc_lexer_stream_consume(parser->lex_stream); // 跳过标识符 - - expr = ptr_member; + case SCC_TOK_ADD_ADD: // left++ + case SCC_TOK_SUB_SUB: // left-- + { + scc_lexer_tok_t op_tok; + if (!scc_parser_next_consume(parser, &op_tok)) + return left; + scc_ast_expr_op_t op = map_token_to_unary_op(op_tok.type, false); + scc_lexer_tok_drop(&op_tok); + left = create_unary_expr(parser, op, left); break; } - - case SCC_TOK_ADD_ADD: // 后缀++ - case SCC_TOK_SUB_SUB: { // 后缀-- - // 跳过操作符 - scc_lexer_stream_consume(parser->lex_stream); - - scc_ast_expr_t *unary = expr_create(parser, SCC_AST_EXPR_UNARY); - if (!unary) { - scc_free(expr); - return null; - } - - unary->unary.op = scc_ast_token_to_operator(tok->type, false); - unary->unary.operand = expr; - - expr = unary; - break; - } - default: - // 不是后缀操作符,返回当前表达式 - return expr; + goto done; } - - // 调用语义回调 - // if (parser->sema_callbacks.on_expr) { - // parser->sema_callbacks.on_expr(parser->sema_callbacks.context, - // expr->base.type, expr); - // } } +done: + return left; } -/** - * @brief 解析一元表达式 - * - * (6.5.3) unary-expression: - * postfix-expression - * ++ unary-expression - * -- unary-expression - * unary-operator cast-expression - * sizeof unary-expression - * sizeof ( type-name ) - */ -static scc_ast_expr_t *parse_unary_expression(scc_parser_t *parser) { - const scc_lexer_tok_t *tok = scc_lexer_stream_current(parser->lex_stream); +// 基本表达式 +static scc_ast_expr_t *parse_primary_expression(scc_parser_t *parser) { + const scc_lexer_tok_t *tok = scc_parser_peek(parser); + if (!tok) + return null; switch (tok->type) { - case SCC_TOK_ADD_ADD: // 前缀++ - case SCC_TOK_SUB_SUB: { // 前缀-- - scc_lexer_stream_consume(parser->lex_stream); // 跳过操作符 - - scc_ast_expr_t *unary = expr_create(parser, SCC_AST_EXPR_UNARY); - if (!unary) + case SCC_TOK_IDENT: { + scc_lexer_tok_t ident; + if (!scc_parser_next_consume(parser, &ident)) return null; - - unary->unary.op = scc_ast_token_to_operator(tok->type, true); - unary->unary.operand = parse_unary_expression(parser); - - if (!unary->unary.operand) { - scc_free(unary); - return null; - } - - // if (parser->sema_callbacks.on_expr) { - // parser->sema_callbacks.on_expr(parser->sema_callbacks.context, - // unary->node_type, unary); - // } - return unary; + scc_ast_expr_t *expr = expr_create(parser, SCC_AST_EXPR_IDENTIFIER); + expr->identifier.name = scc_cstring_as_cstr(&ident.lexeme); + ident.lexeme.data = null; + scc_lexer_tok_drop(&ident); + return expr; } - - case SCC_TOK_ADD: // + - case SCC_TOK_SUB: // - - case SCC_TOK_MUL: // * - case SCC_TOK_AND: // & - case SCC_TOK_NOT: // ! - case SCC_TOK_BIT_NOT: { // ~ - // 跳过操作符 - scc_lexer_stream_consume(parser->lex_stream); - - scc_ast_expr_t *unary = expr_create(parser, SCC_AST_EXPR_UNARY); - if (!unary) + case SCC_TOK_INT_LITERAL: { + scc_lexer_tok_t lit; + if (!scc_parser_next_consume(parser, &lit)) return null; - - unary->unary.op = scc_ast_token_to_operator(tok->type, true); - unary->unary.operand = parse_unary_expression(parser); - - if (!unary->unary.operand) { - scc_free(unary); + scc_ast_expr_t *expr = expr_create(parser, SCC_AST_EXPR_INT_LITERAL); + expr->literal.lexme = scc_cstring_as_cstr(&lit.lexeme); + lit.lexeme.data = null; // 转移所有权 + scc_lexer_tok_drop(&lit); + return expr; + } + case SCC_TOK_FLOAT_LITERAL: { + scc_lexer_tok_t lit; + if (!scc_parser_next_consume(parser, &lit)) return null; - } - - // if (parser->sema_callbacks.on_expr) { - // parser->sema_callbacks.on_expr(parser->sema_callbacks.context, - // unary->node_type, unary); - // } - return unary; + scc_ast_expr_t *expr = expr_create(parser, SCC_AST_EXPR_FLOAT_LITERAL); + expr->literal.lexme = scc_cstring_as_cstr(&lit.lexeme); + lit.lexeme.data = null; + scc_lexer_tok_drop(&lit); + return expr; } - - case SCC_TOK_SIZEOF: { // sizeof - // scc_lexer_stream_consume(parser->lex_stream); // 跳过 'sizeof' - - // scc_ast_expr_t *size_of = expr_create(parser, SCC_AST_EXPR_SIZE_OF); - // if (!size_of) - // return null; - - // size_of->size_align.is_size_of = true; - - // // 检查是否是 sizeof(type) 或 sizeof expr - // if (scc_parse_consume_if(parser->lex_stream, SCC_TOK_L_PAREN)) { - // // 检查是否是类型 - // // if (parser_is_type_start(parser)) { - // // size_of->size_align.type = scc_parse_type_name(parser); - // // if (!size_of->size_align.type) { - // // scc_free(size_of); - // // return null; - // // } - // // } else { - // size_of->size_align.expr = scc_parse_expression(parser); - // if (!size_of->size_align.expr) { - // scc_free(size_of); - // return null; - // } - // // } - - // if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_R_PAREN)) { - // LOG_ERROR("expected ')' after sizeof"); - // // if (size_of->size_align.type) - // // scc_free(size_of->size_align.type); - // // if (size_of->size_align.expr) - // // scc_free(size_of->size_align.expr); - // // scc_free(size_of); - // // return null; - // } - // } else { - // size_of->size_align.expr = parse_unary_expression(parser); - // if (!size_of->size_align.expr) { - // scc_free(size_of); - // return null; - // } - // } - - // // if (parser->sema_callbacks.on_expr) { - // // parser->sema_callbacks.on_expr(parser->sema_callbacks.context, - // // size_of->node_type, size_of); - // // } - // return size_of; + case SCC_TOK_CHAR_LITERAL: { + scc_lexer_tok_t lit; + if (!scc_parser_next_consume(parser, &lit)) + return null; + scc_ast_expr_t *expr = expr_create(parser, SCC_AST_EXPR_CHAR_LITERAL); + expr->literal.lexme = scc_cstring_as_cstr(&lit.lexeme); + lit.lexeme.data = null; + scc_lexer_tok_drop(&lit); + return expr; } - + case SCC_TOK_STRING_LITERAL: { + scc_lexer_tok_t lit; + if (!scc_parser_next_consume(parser, &lit)) + return null; + scc_ast_expr_t *expr = expr_create(parser, SCC_AST_EXPR_STRING_LITERAL); + expr->literal.lexme = scc_cstring_as_cstr(&lit.lexeme); + lit.lexeme.data = null; + scc_lexer_tok_drop(&lit); + return expr; + } + case SCC_TOK_L_PAREN: + return parse_paren_expression(parser); default: - // 不是一元操作符,解析基本表达式 - scc_ast_expr_t *primary = parse_primary_expression(parser); - if (!primary) - return null; - - // 应用后缀操作符 - return parse_postfix_expression(parser, primary); + LOG_ERROR("Unexpected token in primary expression: %s", + scc_get_tok_name(tok->type)); + parser_sync(parser); + return null; } } -/** - * @brief 解析强制转换表达式 - * - * (6.5.4) cast-expression: - * unary-expression - * ( type-name ) cast-expression - */ -static scc_ast_expr_t *parse_cast_expression(scc_parser_t *parser) { - // 检查是否是类型转换 - // if (parser_is_cast_expression(parser)) { - // // 我们已经知道是 ( type-name ) 格式 - // scc_lexer_stream_consume(parser->lex_stream); // 跳过 '(' +// 处理括号表达式、类型转换、复合字面量(目前只实现括号表达式和类型转换) +static scc_ast_expr_t *parse_paren_expression(scc_parser_t *parser) { + // 保存位置以便回退 + scc_parser_store(parser); - // scc_ast_type_t *type = scc_parse_type_name(parser); - // if (!type) { - // return null; - // } + // 尝试解析类型名 + scc_ast_type_t *type = scc_parse_type_name(parser); + if (type) { + // 如果成功,下一个应该是 ')' + if (scc_parser_consume_if(parser, SCC_TOK_R_PAREN)) { + // 是类型转换,解析后面的 cast-expression + scc_ast_expr_t *operand = parse_cast_expression(parser); + if (!operand) { + // 释放 type + // TODO: scc_ast_type_drop(type); + return null; + } + scc_ast_expr_t *expr = expr_create(parser, SCC_AST_EXPR_CAST); + expr->cast.type = type; + expr->cast.expr = operand; + return expr; + } else { + // 不是类型转换,回退并释放 type + scc_parser_restore(parser); + // TODO: scc_ast_type_drop(type); + } + } else { + scc_parser_restore(parser); + } - // if (!parser_consume_if(parser, SCC_TOK_R_PAREN)) { - // PARSER_ERROR(parser, "expected ')' after type name"); - // scc_free(type); - // return null; - // } + // 否则作为括号表达式 + scc_lexer_tok_t lp; + if (!scc_parser_next_consume(parser, &lp) || lp.type != SCC_TOK_L_PAREN) { + return null; + } + scc_lexer_tok_drop(&lp); - // scc_ast_expr_t *cast = expr_create(parser, SCC_AST_EXPR_CAST); - // if (!cast) { - // scc_free(type); - // return null; - // } - - // cast->cast.type = type; - // cast->cast.expr = parse_cast_expression(parser); // 递归解析 - - // if (!cast->cast.expr) { - // scc_free(cast); - // return null; - // } - - // if (parser->sema_callbacks.on_expr) { - // parser->sema_callbacks.on_expr(parser->sema_callbacks.context, - // cast->node_type, cast); - // } - // return cast; - // } - - // 不是类型转换,解析一元表达式 - return parse_unary_expression(parser); -} - -/** - * @brief Pratt Parser核心:解析表达式 - * @param parser 解析器 - * @param min_prec 最小优先级 - * @return 表达式AST节点 - */ -static scc_ast_expr_t * -parse_expression_with_precedence(scc_parser_t *parser, - scc_precedence_t min_prec) { - // 解析左侧表达式(一元表达式或基本表达式) - scc_ast_expr_t *lhs = parse_cast_expression(parser); - if (!lhs) { + scc_ast_expr_t *inner = scc_parse_expression(parser); + if (!inner) { + parser_sync(parser); return null; } - while (true) { - const scc_lexer_tok_t *tok = - scc_lexer_stream_current(parser->lex_stream); - scc_tok_type_t op = tok->type; - scc_precedence_t prec = get_binary_precedence(op); - - // 检查是否达到最小优先级或不是二元运算符 - if (prec < min_prec || prec == PREC_NONE) { - break; - } - - // 特殊处理条件表达式 ?: - if (op == SCC_TOK_COND) { - scc_lexer_stream_consume(parser->lex_stream); // 跳过 '?' - - scc_ast_expr_t *cond_expr = expr_create(parser, SCC_AST_EXPR_COND); - if (!cond_expr) { - scc_free(lhs); - return null; - } - - cond_expr->cond.cond = lhs; - - // 解析then表达式 - cond_expr->cond.then_expr = - parse_expression_with_precedence(parser, PREC_NONE); - if (!cond_expr->cond.then_expr) { - scc_free(cond_expr); - scc_free(lhs); - return null; - } - - // 期望 ':' - if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_COLON)) { - LOG_ERROR("expected ':' in conditional expression"); - scc_free(cond_expr); - scc_free(lhs); - return null; - } - - // 解析else表达式(条件表达式,右结合) - cond_expr->cond.else_expr = - parse_expression_with_precedence(parser, PREC_CONDITIONAL - 1); - if (!cond_expr->cond.else_expr) { - scc_free(cond_expr); - scc_free(lhs); - return null; - } - - lhs = cond_expr; - continue; - } - - // 对于赋值运算符,右侧优先级需要减1(右结合性) - scc_precedence_t next_min_prec; - if (is_assignment_operator(op)) { - next_min_prec = (scc_precedence_t)(prec - 1); // 右结合 - } else { - next_min_prec = (scc_precedence_t)(prec + 1); // 左结合 - } - - scc_lexer_stream_consume(parser->lex_stream); // 跳过操作符 - - // 解析右侧表达式 - scc_ast_expr_t *rhs = - parse_expression_with_precedence(parser, next_min_prec); - if (!rhs) { - scc_free(lhs); - return null; - } - - // 创建二元表达式节点 - scc_ast_expr_t *binary = expr_create(parser, SCC_AST_EXPR_BINARY); - if (!binary) { - scc_free(lhs); - scc_free(rhs); - return null; - } - - binary->binary.op = scc_ast_token_to_operator(op, false); - binary->binary.lhs = lhs; - binary->binary.rhs = rhs; - - lhs = binary; + if (!scc_parser_consume_if(parser, SCC_TOK_R_PAREN)) { + LOG_ERROR("Expected ')' after expression"); + parser_sync(parser); + return null; } - - // if (parser->sema_callbacks.on_expr) { - // parser->sema_callbacks.on_expr(parser->sema_callbacks.context, - // binary->node_type, binary); - // } - return lhs; + return inner; } scc_ast_expr_t *scc_parse_expression(scc_parser_t *parser) { - return parse_expression_with_precedence(parser, PREC_NONE); + scc_ast_expr_t *left = parse_assignment_expression(parser); + if (!left) + return null; + + while (scc_parser_consume_if(parser, SCC_TOK_COMMA)) { + scc_ast_expr_t *right = parse_assignment_expression(parser); + if (!right) { + parser_sync(parser); + return null; + } + left = create_binary_expr(parser, left, right, SCC_AST_OP_COMMA); + } + return left; } diff --git a/libs/parser/src/parse_stmt.c b/libs/parser/src/parse_stmt.c index b07faa8..77844ca 100644 --- a/libs/parser/src/parse_stmt.c +++ b/libs/parser/src/parse_stmt.c @@ -1,10 +1,6 @@ /* -415 -ISO/IEC 9899:TC3 -Committee Draft — Septermber 7, 2007 -WG14/N1256 - A.2.3 Statements + (6.8) statement: labeled-statement @@ -50,37 +46,37 @@ A.2.3 Statements break ; return expression(opt) ; */ -#include +#include +#include static inline scc_ast_stmt_t *ast_stmt_alloc() { scc_ast_stmt_t *stmt = (scc_ast_stmt_t *)scc_malloc(sizeof(scc_ast_stmt_t)); Assert(stmt != null); - stmt->base.type = SCC_AST_TRANSLATION_UNIT; + stmt->base.type = SCC_AST_UNKNOWN; stmt->base.loc = scc_pos_create(); return stmt; } static inline scc_ast_expr_t *ast_parse_paren_expression(scc_parser_t *parser) { - if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_L_PAREN)) { + if (!scc_parser_consume_if(parser, SCC_TOK_L_PAREN)) { LOG_ERROR("Expected '(' before like `( expression )` ."); } scc_ast_expr_t *ret = scc_parse_expression(parser); - if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_R_PAREN)) { - LOG_ERROR("Expected ')' after like `( expression )` ."); + if (!scc_parser_consume_if(parser, SCC_TOK_R_PAREN)) { + LOG_ERROR("Expected ')' before like `( expression )` ."); } return ret; } static scc_ast_stmt_t *parse_label_statement(scc_parser_t *parser) { - const scc_lexer_tok_t *tok = scc_lexer_stream_current(parser->lex_stream); - if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_IDENT)) { + scc_lexer_tok_t tok = {0}; + if (!scc_parser_next_consume(parser, &tok)) { return null; } - if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_COLON)) { - LOG_ERROR("Expected constant expression after case."); + if (!scc_parser_consume_if(parser, SCC_TOK_COLON)) { return null; } @@ -93,21 +89,20 @@ static scc_ast_stmt_t *parse_label_statement(scc_parser_t *parser) { Assert(stmt != null); stmt->base.type = SCC_AST_STMT_LABEL; - // TODO maybe use cstring - stmt->label_stmt.label = tok->value.cstr.data; + stmt->label_stmt.label = scc_cstring_as_cstr(&tok.lexeme); stmt->label_stmt.stmt = statement; return stmt; } static scc_ast_stmt_t *parse_case_statement(scc_parser_t *parser) { - if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_CASE)) { + if (!scc_parser_consume_if(parser, SCC_TOK_CASE)) { return null; } scc_ast_expr_t *expr = null; // TODO = scc_parser_constant_expression(); - if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_COLON)) { + if (!scc_parser_consume_if(parser, SCC_TOK_COLON)) { LOG_ERROR("Expected constant expression after case."); return null; } @@ -126,11 +121,11 @@ static scc_ast_stmt_t *parse_case_statement(scc_parser_t *parser) { } static scc_ast_stmt_t *parse_default_statement(scc_parser_t *parser) { - if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_DEFAULT)) { + if (!scc_parser_consume_if(parser, SCC_TOK_DEFAULT)) { return null; } - if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_COLON)) { + if (!scc_parser_consume_if(parser, SCC_TOK_COLON)) { LOG_ERROR("Expected constant expression after case."); return null; } @@ -148,14 +143,14 @@ static scc_ast_stmt_t *parse_default_statement(scc_parser_t *parser) { } static scc_ast_stmt_t *parse_compound_statement(scc_parser_t *parser) { - if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_L_BRACE)) { + if (!scc_parser_consume_if(parser, SCC_TOK_L_BRACE)) { return null; } scc_ast_stmt_t *stmt = ast_stmt_alloc(); stmt->base.type = SCC_AST_STMT_COMPOUND; scc_vec_init(stmt->compound.block_items); - while (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_R_BRACE)) { + while (!scc_parser_consume_if(parser, SCC_TOK_R_BRACE)) { /// TODO // scc_parse_is_decl(); scc_ast_node_t *ret = null; @@ -175,7 +170,7 @@ static scc_ast_stmt_t *parse_compound_statement(scc_parser_t *parser) { } static scc_ast_stmt_t *parse_if_statement(scc_parser_t *parser) { - if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_IF)) { + if (!scc_parser_consume_if(parser, SCC_TOK_IF)) { return null; } @@ -186,7 +181,7 @@ static scc_ast_stmt_t *parse_if_statement(scc_parser_t *parser) { stmt->base.type = SCC_AST_STMT_IF; stmt->if_stmt.cond = expression; stmt->if_stmt.then_stmt = statement; - if (scc_parse_consume_if(parser->lex_stream, SCC_TOK_ELSE)) { + if (scc_parser_consume_if(parser, SCC_TOK_ELSE)) { stmt->if_stmt.opt_else_stmt = scc_parse_statement(parser); } else { stmt->if_stmt.opt_else_stmt = null; @@ -195,7 +190,7 @@ static scc_ast_stmt_t *parse_if_statement(scc_parser_t *parser) { } static scc_ast_stmt_t *parse_switch_statement(scc_parser_t *parser) { - if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_SWITCH)) { + if (!scc_parser_consume_if(parser, SCC_TOK_SWITCH)) { return null; } @@ -210,7 +205,7 @@ static scc_ast_stmt_t *parse_switch_statement(scc_parser_t *parser) { } static scc_ast_stmt_t *parse_while_statement(scc_parser_t *parser) { - if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_WHILE)) { + if (!scc_parser_consume_if(parser, SCC_TOK_WHILE)) { return null; } @@ -225,13 +220,13 @@ static scc_ast_stmt_t *parse_while_statement(scc_parser_t *parser) { } static scc_ast_stmt_t *parse_do_while_statement(scc_parser_t *parser) { - if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_DO)) { + if (!scc_parser_consume_if(parser, SCC_TOK_DO)) { return null; } scc_ast_stmt_t *statement = scc_parse_statement(parser); - if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_WHILE)) { + if (!scc_parser_consume_if(parser, SCC_TOK_WHILE)) { LOG_ERROR("Expected 'while' after do."); // TODO 使用更好的错误处理,未来应当采用更好的内存管理器 scc_free(statement); @@ -247,7 +242,7 @@ static scc_ast_stmt_t *parse_do_while_statement(scc_parser_t *parser) { } static scc_ast_stmt_t *parse_for_statement(scc_parser_t *parser) { - if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_FOR)) { + if (!scc_parser_consume_if(parser, SCC_TOK_FOR)) { return null; } @@ -256,7 +251,7 @@ static scc_ast_stmt_t *parse_for_statement(scc_parser_t *parser) { for ( declaration expression(opt) ; expression(opt) ) statement */ - if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_L_PAREN)) { + if (!scc_parser_consume_if(parser, SCC_TOK_L_PAREN)) { LOG_ERROR("Expected '(' before like `( expression )` ."); } @@ -266,19 +261,19 @@ static scc_ast_stmt_t *parse_for_statement(scc_parser_t *parser) { // TODO use decl or expr stmt->for_stmt.init = (scc_ast_type_t *)scc_parse_expression(parser); - if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_SEMICOLON)) { + if (!scc_parser_consume_if(parser, SCC_TOK_SEMICOLON)) { LOG_ERROR("Expected semicolon in for statement."); } stmt->for_stmt.cond = scc_parse_expression(parser); - if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_SEMICOLON)) { + if (!scc_parser_consume_if(parser, SCC_TOK_SEMICOLON)) { LOG_ERROR("Expected semicolon in for statement."); } stmt->for_stmt.iter = scc_parse_expression(parser); - if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_R_PAREN)) { + if (!scc_parser_consume_if(parser, SCC_TOK_R_PAREN)) { LOG_ERROR("Expected ')' after like `( expression )` ."); } @@ -290,28 +285,26 @@ static scc_ast_stmt_t *parse_for_statement(scc_parser_t *parser) { static scc_ast_stmt_t *parse_jump_statement(scc_parser_t *parser) { scc_ast_stmt_t *stmt = ast_stmt_alloc(); - if (scc_parse_consume_if(parser->lex_stream, SCC_TOK_GOTO)) { + if (scc_parser_consume_if(parser, SCC_TOK_GOTO)) { stmt->base.type = SCC_AST_STMT_GOTO; - if (scc_parse_is(parser->lex_stream, SCC_TOK_IDENT)) { - const scc_lexer_tok_t *tok = - scc_lexer_stream_current(parser->lex_stream); - stmt->goto_stmt.label = tok->value.cstr.data; - scc_lexer_stream_consume(parser->lex_stream); + scc_lexer_tok_t tok = {0}; + if (scc_parser_next_consume(parser, &tok)) { + stmt->goto_stmt.label = scc_cstring_as_cstr(&tok.lexeme); } else { LOG_ERROR("Expected label after goto."); } - } else if (scc_parse_consume_if(parser->lex_stream, SCC_TOK_CONTINUE)) { + } else if (scc_parser_consume_if(parser, SCC_TOK_CONTINUE)) { stmt->base.type = SCC_AST_STMT_CONTINUE; - } else if (scc_parse_consume_if(parser->lex_stream, SCC_TOK_BREAK)) { + } else if (scc_parser_consume_if(parser, SCC_TOK_BREAK)) { stmt->base.type = SCC_AST_STMT_BREAK; - } else if (scc_parse_consume_if(parser->lex_stream, SCC_TOK_RETURN)) { + } else if (scc_parser_consume_if(parser, SCC_TOK_RETURN)) { stmt->base.type = SCC_AST_STMT_RETURN; stmt->return_stmt.expr = scc_parse_expression(parser); } else { UNREACHABLE(); } - if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_SEMICOLON)) { + if (!scc_parser_consume_if(parser, SCC_TOK_SEMICOLON)) { LOG_ERROR("Expected semicolon after jump statement."); } return stmt; @@ -321,7 +314,7 @@ static scc_ast_stmt_t *parse_expression_statement(scc_parser_t *parser) { scc_ast_stmt_t *stmt = ast_stmt_alloc(); stmt->base.type = SCC_AST_STMT_EXPR; - if (scc_parse_consume_if(parser->lex_stream, SCC_TOK_SEMICOLON)) { + if (scc_parser_consume_if(parser, SCC_TOK_SEMICOLON)) { stmt->expr.expr = null; return stmt; } @@ -333,15 +326,20 @@ static scc_ast_stmt_t *parse_expression_statement(scc_parser_t *parser) { return null; } - if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_SEMICOLON)) { + if (!scc_parser_consume_if(parser, SCC_TOK_SEMICOLON)) { LOG_ERROR("Expected semicolon after expression."); } return stmt; } scc_ast_stmt_t *scc_parse_statement(scc_parser_t *parser) { - const scc_lexer_tok_t *tok = scc_lexer_stream_current(parser->lex_stream); - switch (tok->type) { + scc_ast_stmt_t *stmt; + const scc_lexer_tok_t *tok_ref; + tok_ref = scc_parser_peek(parser); + if (!tok_ref) { + return null; + } + switch (tok_ref->type) { /* (6.8.1) labeled-statement: @@ -350,15 +348,19 @@ scc_ast_stmt_t *scc_parse_statement(scc_parser_t *parser) { default : statement */ case SCC_TOK_IDENT: - // 注意需要检测下一个 token 是否为冒号,否则将需要判定成表达式语句 - if (!scc_parse_peek_is(parser->lex_stream, 1, SCC_TOK_COLON)) { + tok_ref = scc_parser_next(parser); + if (tok_ref == null || tok_ref->type != SCC_TOK_COLON) { break; } - return parse_label_statement(parser); - case SCC_TOK_CASE: - return parse_case_statement(parser); + stmt = parse_label_statement(parser); + goto RETURN; + case SCC_TOK_CASE: { + stmt = parse_case_statement(parser); + goto RETURN; + } case SCC_TOK_DEFAULT: - return parse_default_statement(parser); + stmt = parse_default_statement(parser); + goto RETURN; /* (6.8.2) compound-statement: @@ -373,7 +375,8 @@ scc_ast_stmt_t *scc_parse_statement(scc_parser_t *parser) { statement */ case SCC_TOK_L_BRACE: - return parse_compound_statement(parser); + stmt = parse_compound_statement(parser); + goto RETURN; /* (6.8.4) selection-statement: @@ -382,9 +385,11 @@ scc_ast_stmt_t *scc_parse_statement(scc_parser_t *parser) { switch ( expression ) statement */ case SCC_TOK_IF: - return parse_if_statement(parser); + stmt = parse_if_statement(parser); + goto RETURN; case SCC_TOK_SWITCH: - return parse_switch_statement(parser); + stmt = parse_switch_statement(parser); + goto RETURN; /* (6.8.5) iteration-statement: @@ -396,11 +401,14 @@ scc_ast_stmt_t *scc_parse_statement(scc_parser_t *parser) { statement */ case SCC_TOK_WHILE: - return parse_while_statement(parser); + stmt = parse_while_statement(parser); + goto RETURN; case SCC_TOK_DO: - return parse_do_while_statement(parser); + stmt = parse_do_while_statement(parser); + goto RETURN; case SCC_TOK_FOR: - return parse_for_statement(parser); + stmt = parse_for_statement(parser); + goto RETURN; /* (6.8.6) jump-statement: @@ -413,7 +421,8 @@ scc_ast_stmt_t *scc_parse_statement(scc_parser_t *parser) { case SCC_TOK_CONTINUE: case SCC_TOK_BREAK: case SCC_TOK_RETURN: - return parse_jump_statement(parser); + stmt = parse_jump_statement(parser); + goto RETURN; default: break; } @@ -422,5 +431,10 @@ scc_ast_stmt_t *scc_parse_statement(scc_parser_t *parser) { expression-statement: expression(opt) ; */ - return parse_expression_statement(parser); + stmt = parse_expression_statement(parser); +RETURN: + scc_parser_reset(parser); + parser->sema_callbacks.on_stmt(parser->sema_callbacks.context, + stmt->base.type, stmt); + return stmt; } diff --git a/libs/parser/src/parse_type.c b/libs/parser/src/parse_type.c index 537c49e..f960d79 100644 --- a/libs/parser/src/parse_type.c +++ b/libs/parser/src/parse_type.c @@ -115,7 +115,8 @@ EXAMPLE The constructions identifier-list , identifier */ -#include +#include +#include /** * @brief 判断 token 是否为声明说明符的开始 @@ -126,9 +127,8 @@ EXAMPLE The constructions * - 类型限定符 (const, volatile, restrict, atomic) * - 函数说明符 (inline) */ -cbool scc_parse_is_decl_specifier_start(scc_parser_t *parser, usize offset) { - const scc_lexer_tok_t *tok = - scc_lexer_stream_peek(parser->lex_stream, offset); +cbool scc_parse_is_decl_specifier_start(scc_parser_t *parser) { + const scc_lexer_tok_t *tok = scc_parser_peek(parser); switch (tok->type) { // 存储类说明符 @@ -174,12 +174,8 @@ cbool scc_parse_is_decl_specifier_start(scc_parser_t *parser, usize offset) { } } -/** - * @brief 判断 token 是否为类型说明符的开始 - */ -cbool scc_parse_is_type_specifier_start(scc_parser_t *parser, usize offset) { - const scc_lexer_tok_t *tok = - scc_lexer_stream_peek(parser->lex_stream, offset); +cbool scc_parse_is_type_specifier_start(scc_parser_t *parser) { + const scc_lexer_tok_t *tok = scc_parser_peek(parser); switch (tok->type) { // 基本类型说明符 @@ -210,12 +206,8 @@ cbool scc_parse_is_type_specifier_start(scc_parser_t *parser, usize offset) { } } -/** - * @brief 判断 token 是否为类型限定符的开始 - */ -cbool scc_parse_is_type_qualifier_start(scc_parser_t *parser, usize offset) { - const scc_lexer_tok_t *tok = - scc_lexer_stream_peek(parser->lex_stream, offset); +cbool scc_parse_is_type_qualifier_start(scc_parser_t *parser) { + const scc_lexer_tok_t *tok = scc_parser_peek(parser); switch (tok->type) { case SCC_TOK_CONST: @@ -228,12 +220,8 @@ cbool scc_parse_is_type_qualifier_start(scc_parser_t *parser, usize offset) { } } -/** - * @brief 判断 token 是否为存储类说明符的开始 - */ -cbool scc_parse_is_storage_class_start(scc_parser_t *parser, usize offset) { - const scc_lexer_tok_t *tok = - scc_lexer_stream_peek(parser->lex_stream, offset); +cbool scc_parse_is_storage_class_start(scc_parser_t *parser) { + const scc_lexer_tok_t *tok = scc_parser_peek(parser); switch (tok->type) { case SCC_TOK_TYPEDEF: @@ -246,810 +234,808 @@ cbool scc_parse_is_storage_class_start(scc_parser_t *parser, usize offset) { return false; } } - -/** - * @brief 判断当前位置是否可以开始一个声明 - * - * 声明以声明说明符开始,包括: - * - 存储类说明符 - * - 类型说明符 - * - 类型限定符 - * - 函数说明符 - */ -cbool scc_parse_is_declaration_start(scc_parser_t *parser, usize offset) { - // 检查是否是声明说明符的开始 - return scc_parse_is_decl_specifier_start(parser, offset); -} - -// 前向声明辅助函数 -static scc_ast_type_t *parse_specifier_qualifier_list(scc_parser_t *parser, - usize *offset); -static scc_ast_type_t *parse_abstract_declarator(scc_parser_t *parser, - usize *offset, - scc_ast_type_t *base_type); -static scc_ast_type_t * -parse_direct_abstract_declarator(scc_parser_t *parser, usize *offset, - scc_ast_type_t *base_type); -static scc_ast_type_t *parse_pointer(scc_parser_t *parser, usize *offset); -static scc_ast_decl_specifier_t parse_type_qualifier_list(scc_parser_t *parser, - usize *offset); -static scc_ast_type_t *parse_struct_or_union_specifier(scc_parser_t *parser, - usize *offset); -static scc_ast_type_t *parse_enum_specifier(scc_parser_t *parser, - usize *offset); - -// 创建内置类型节点的辅助函数 -static scc_ast_type_t *create_builtin_type(scc_ast_builtin_type_t builtin, - scc_ast_decl_specifier_t quals) { - scc_ast_type_t *type = (scc_ast_type_t *)scc_malloc(sizeof(scc_ast_type_t)); - if (!type) - return null; - - type->base.type = SCC_AST_TYPE_BUILTIN; - type->builtin.builtin = builtin; - type->builtin.quals = quals; - return type; -} - -// 创建指针类型节点的辅助函数 -static scc_ast_type_t *create_pointer_type(scc_ast_type_t *pointee, - scc_ast_decl_specifier_t quals) { - scc_ast_type_t *type = (scc_ast_type_t *)scc_malloc(sizeof(scc_ast_type_t)); - if (!type) - return null; - - type->base.type = SCC_AST_TYPE_POINTER; - type->pointer.pointee = pointee; - type->pointer.quals = quals; - return type; -} - -// 创建数组类型节点的辅助函数 -static scc_ast_type_t *create_array_type(scc_ast_type_t *element, - scc_ast_expr_t *size) { - scc_ast_type_t *type = (scc_ast_type_t *)scc_malloc(sizeof(scc_ast_type_t)); - if (!type) - return null; - - type->base.type = SCC_AST_TYPE_ARRAY; - type->array.element = element; - type->array.size = size; - return type; -} - -// 创建函数类型节点的辅助函数 -static scc_ast_type_t *create_function_type(scc_ast_type_t *return_type, - scc_ast_type_vec_t param_types, - cbool is_variadic) { - scc_ast_type_t *type = (scc_ast_type_t *)scc_malloc(sizeof(scc_ast_type_t)); - if (!type) - return null; - - type->base.type = SCC_AST_TYPE_FUNCTION; - type->function.return_type = return_type; - type->function.param_types = param_types; - type->function.is_variadic = is_variadic; - return type; -} - -/** - * @brief 解析类型名 - */ -scc_ast_type_t *scc_parse_type_name(scc_parser_t *parser, usize *offset) { - usize start_offset = *offset; - - // 解析 specifier-qualifier-list - scc_ast_type_t *type = parse_specifier_qualifier_list(parser, offset); - if (type == null) { - // TODO - // LOG_ERROR("Failed to parse specifier-qualifier-list at offset %zu", - // start_offset); - return null; - } - - // 解析可选的 abstract-declarator - scc_ast_type_t *full_type = parse_abstract_declarator(parser, offset, type); - if (full_type == null) { - return type; - } - - return full_type; -} - scc_ast_type_t *scc_parse_type(scc_parser_t *parser) { - usize offset = 0; - scc_ast_type_t *ret = scc_parse_type_name(parser, &offset); - if (ret == null) { - return null; - } - scc_lexer_stream_advance(parser->lex_stream, offset); - return ret; -} - -/** - * @brief 检查是否为类型限定符 token - */ -static cbool is_type_qualifier_token(const scc_lexer_tok_t *tok) { - return tok->type == SCC_TOK_CONST || tok->type == SCC_TOK_RESTRICT || - tok->type == SCC_TOK_VOLATILE || tok->type == SCC_TOK_ATOMIC; -} - -/** - * @brief 解析类型限定符 - */ -static scc_ast_decl_specifier_t parse_type_qualifier(scc_parser_t *parser, - usize *offset) { - scc_ast_decl_specifier_t quals = {0}; - const scc_lexer_tok_t *tok = - scc_lexer_stream_peek(parser->lex_stream, *offset); - - switch (tok->type) { - case SCC_TOK_CONST: - quals.is_const = true; - (*offset)++; - break; - case SCC_TOK_RESTRICT: - quals.is_restrict = true; - (*offset)++; - break; - case SCC_TOK_VOLATILE: - quals.is_volatile = true; - (*offset)++; - break; - case SCC_TOK_ATOMIC: - quals.is_atomic = true; - (*offset)++; - break; - default: - // 不是限定符 - break; - } - - return quals; -} - -/** - * @brief 解析类型限定符列表 - */ -static scc_ast_decl_specifier_t parse_type_qualifier_list(scc_parser_t *parser, - usize *offset) { - scc_ast_decl_specifier_t quals = {0}; - - while (true) { - const scc_lexer_tok_t *tok = - scc_lexer_stream_peek(parser->lex_stream, *offset); - - if (!is_type_qualifier_token(tok)) { - break; - } - - scc_ast_decl_specifier_t new_qual = - parse_type_qualifier(parser, offset); - - // 合并限定符 - quals.is_const = quals.is_const || new_qual.is_const; - quals.is_restrict = quals.is_restrict || new_qual.is_restrict; - quals.is_volatile = quals.is_volatile || new_qual.is_volatile; - quals.is_atomic = quals.is_atomic || new_qual.is_atomic; - } - - return quals; -} - -/** - * @brief 检查是否为类型说明符 token - */ -static cbool is_type_specifier_token(const scc_lexer_tok_t *tok) { - switch (tok->type) { - case SCC_TOK_VOID: - case SCC_TOK_CHAR: - case SCC_TOK_SHORT: - case SCC_TOK_INT: - case SCC_TOK_LONG: - case SCC_TOK_FLOAT: - case SCC_TOK_DOUBLE: - case SCC_TOK_SIGNED: - case SCC_TOK_UNSIGNED: - case SCC_TOK_BOOL: - case SCC_TOK_COMPLEX: - case SCC_TOK_STRUCT: - case SCC_TOK_UNION: - case SCC_TOK_ENUM: - return true; - default: - // 可能是 typedef 名称 - return tok->type == SCC_TOK_IDENT; - } -} - -/** - * @brief 解析类型说明符 - */ -static scc_ast_type_t *parse_type_specifier(scc_parser_t *parser, - usize *offset) { - const scc_lexer_tok_t *tok = - scc_lexer_stream_peek(parser->lex_stream, *offset); - - // 处理简单内置类型 - if (tok->type == SCC_TOK_VOID) { - (*offset)++; - return create_builtin_type(TYPE_VOID, (scc_ast_decl_specifier_t){0}); - } else if (tok->type == SCC_TOK_CHAR) { - (*offset)++; - return create_builtin_type(TYPE_CHAR, (scc_ast_decl_specifier_t){0}); - } else if (tok->type == SCC_TOK_SHORT) { - (*offset)++; - return create_builtin_type(TYPE_SHORT, (scc_ast_decl_specifier_t){0}); - } else if (tok->type == SCC_TOK_INT) { - (*offset)++; - return create_builtin_type(TYPE_INT, (scc_ast_decl_specifier_t){0}); - } else if (tok->type == SCC_TOK_LONG) { - // 检查是否为 long long - const scc_lexer_tok_t *next_tok = - scc_lexer_stream_peek(parser->lex_stream, *offset + 1); - if (next_tok->type == SCC_TOK_LONG) { - (*offset) += 2; // 跳过两个 long - return create_builtin_type(TYPE_LONG_LONG, - (scc_ast_decl_specifier_t){0}); - } else { - (*offset)++; - return create_builtin_type(TYPE_LONG, - (scc_ast_decl_specifier_t){0}); - } - } else if (tok->type == SCC_TOK_FLOAT) { - (*offset)++; - return create_builtin_type(TYPE_FLOAT, (scc_ast_decl_specifier_t){0}); - } else if (tok->type == SCC_TOK_DOUBLE) { - // 检查是否为 long double - const scc_lexer_tok_t *next_tok = - scc_lexer_stream_peek(parser->lex_stream, *offset + 1); - if (next_tok->type == SCC_TOK_LONG) { - (*offset) += 2; // 跳过 double long - return create_builtin_type(TYPE_LONG_DOUBLE, - (scc_ast_decl_specifier_t){0}); - } else { - (*offset)++; - return create_builtin_type(TYPE_DOUBLE, - (scc_ast_decl_specifier_t){0}); - } - } else if (tok->type == SCC_TOK_BOOL) { - (*offset)++; - return create_builtin_type(TYPE_BOOL, (scc_ast_decl_specifier_t){0}); - } else if (tok->type == SCC_TOK_SIGNED || tok->type == SCC_TOK_UNSIGNED) { - // signed/unsigned 需要与后续类型组合 - // 这里简化处理,默认为 int - (*offset)++; - return create_builtin_type(TYPE_INT, (scc_ast_decl_specifier_t){0}); - } else if (tok->type == SCC_TOK_COMPLEX) { - // _Complex 需要与浮点类型组合 - // 这里简化处理 - (*offset)++; - return create_builtin_type(TYPE_COMPLEX_FLOAT, - (scc_ast_decl_specifier_t){0}); - } - - // 处理结构体/联合体 - if (tok->type == SCC_TOK_STRUCT || tok->type == SCC_TOK_UNION) { - return parse_struct_or_union_specifier(parser, offset); - } - - // 处理枚举 - if (tok->type == SCC_TOK_ENUM) { - return parse_enum_specifier(parser, offset); - } - - // 处理 typedef 名称 - if (tok->type == SCC_TOK_IDENT) { - // TODO - return null; - scc_ast_type_t *type = - (scc_ast_type_t *)scc_malloc(sizeof(scc_ast_type_t)); - if (!type) - return null; - - type->base.type = SCC_AST_TYPE_TYPEDEF; - type->typedef_type.name = tok->value.cstr.data; - type->typedef_type.underlying = null; // 需要从符号表解析 - - (*offset)++; - return type; - } - + TODO(); return null; } -/** - * @brief 解析结构体或联合体说明符 - */ -static scc_ast_type_t *parse_struct_or_union_specifier(scc_parser_t *parser, - usize *offset) { - const scc_lexer_tok_t *tok = - scc_lexer_stream_peek(parser->lex_stream, *offset); - cbool is_struct = (tok->type == SCC_TOK_STRUCT); - - (*offset)++; // 跳过 struct 或 union - - scc_ast_type_t *type = (scc_ast_type_t *)scc_malloc(sizeof(scc_ast_type_t)); - if (!type) - return null; - - type->base.type = is_struct ? SCC_AST_TYPE_STRUCT : SCC_AST_TYPE_UNION; - type->record.name = null; - scc_vec_init(type->record.fields); - - // 检查是否有标识符 - tok = scc_lexer_stream_peek(parser->lex_stream, *offset); - if (tok->type == SCC_TOK_IDENT) { - type->record.name = tok->value.cstr.data; - (*offset)++; - tok = scc_lexer_stream_peek(parser->lex_stream, *offset); - } - - // 如果有 '{',解析结构体定义 - if (tok->type == SCC_TOK_L_BRACE) { - (*offset)++; // 跳过 '{' - - // TODO: 解析 struct-declaration-list - // 这是一个复杂的子解析器,需要单独实现 - - // 临时:跳过所有声明直到遇到 '}' - while (true) { - tok = scc_lexer_stream_peek(parser->lex_stream, *offset); - if (tok->type == SCC_TOK_R_BRACE) - break; - if (tok->type == SCC_TOK_EOF) { - LOG_ERROR("Unclosed struct/union definition"); - scc_free(type); - return null; - } - (*offset)++; - } - - (*offset)++; // 跳过 '}' - } - - return type; -} - -/** - * @brief 解析枚举说明符 - */ -static scc_ast_type_t *parse_enum_specifier(scc_parser_t *parser, - usize *offset) { - const scc_lexer_tok_t *tok = - scc_lexer_stream_peek(parser->lex_stream, *offset); - if (tok->type != SCC_TOK_ENUM) - return null; - - (*offset)++; // 跳过 enum - - scc_ast_type_t *type = (scc_ast_type_t *)scc_malloc(sizeof(scc_ast_type_t)); - if (!type) - return null; - - type->base.type = SCC_AST_TYPE_ENUM; - type->enumeration.name = null; - scc_vec_init(type->enumeration.enumerators); - - // 检查是否有标识符 - tok = scc_lexer_stream_peek(parser->lex_stream, *offset); - if (tok->type == SCC_TOK_IDENT) { - type->enumeration.name = tok->value.cstr.data; - (*offset)++; - tok = scc_lexer_stream_peek(parser->lex_stream, *offset); - } - - // 如果有 '{',解析枚举定义 - if (tok->type == SCC_TOK_L_BRACE) { - (*offset)++; // 跳过 '{' - - // TODO: 解析 enumerator-list - // 这是一个复杂的子解析器,需要单独实现 - - // 临时:跳过所有枚举项直到遇到 '}' - while (true) { - tok = scc_lexer_stream_peek(parser->lex_stream, *offset); - if (tok->type == SCC_TOK_R_BRACE) - break; - if (tok->type == SCC_TOK_EOF) { - LOG_ERROR("Unclosed enum definition"); - scc_free(type); - return null; - } - (*offset)++; - } - - (*offset)++; // 跳过 '}' - } - - return type; -} - -/** - * @brief 解析指定符-限定符列表 - */ -static scc_ast_type_t *parse_specifier_qualifier_list(scc_parser_t *parser, - usize *offset) { - scc_ast_decl_specifier_t quals = {0}; - - // 收集类型限定符(可能出现在前面) - while (true) { - const scc_lexer_tok_t *tok = - scc_lexer_stream_peek(parser->lex_stream, *offset); - - if (!is_type_qualifier_token(tok)) { - break; - } - - scc_ast_decl_specifier_t new_quals = - parse_type_qualifier(parser, offset); - quals.is_const |= new_quals.is_const; - quals.is_restrict |= new_quals.is_restrict; - quals.is_volatile |= new_quals.is_volatile; - quals.is_atomic |= new_quals.is_atomic; - } - - // 解析类型说明符 - scc_ast_type_t *type_specifier = parse_type_specifier(parser, offset); - if (!type_specifier) { - // TODO - // LOG_ERROR("Expected type specifier"); - return null; - } - - // 收集可能出现在类型说明符后面的限定符 - while (true) { - const scc_lexer_tok_t *tok = - scc_lexer_stream_peek(parser->lex_stream, *offset); - - if (!is_type_qualifier_token(tok)) { - break; - } - - scc_ast_decl_specifier_t new_quals = - parse_type_qualifier(parser, offset); - quals.is_const |= new_quals.is_const; - quals.is_restrict |= new_quals.is_restrict; - quals.is_volatile |= new_quals.is_volatile; - quals.is_atomic |= new_quals.is_atomic; - } - - // 将限定符应用到类型上 - if (type_specifier->base.type == SCC_AST_TYPE_BUILTIN) { - type_specifier->builtin.quals = quals; - } else if (type_specifier->base.type == SCC_AST_TYPE_POINTER) { - // 对于指针类型,这里的限定符应该应用到指针本身 - type_specifier->pointer.quals = quals; - } else if (type_specifier->base.type == SCC_AST_TYPE_TYPEDEF) { - // typedef 类型可能也有限定符,但需要保存在其他地方 - // 这里简化处理 - } - - return type_specifier; -} - -/** - * @brief 解析指针 - */ -static scc_ast_type_t *parse_pointer(scc_parser_t *parser, usize *offset) { - const scc_lexer_tok_t *tok = - scc_lexer_stream_peek(parser->lex_stream, *offset); - - if (tok->type != SCC_TOK_MUL) { - return null; // 不是指针 - } - - (*offset)++; // 跳过 '*' - - // 解析可选的类型限定符列表 - scc_ast_decl_specifier_t ptr_quals = - parse_type_qualifier_list(parser, offset); - - // 递归解析更多指针(多重指针) - scc_ast_type_t *inner_pointer = parse_pointer(parser, offset); - - // 创建指针类型 - scc_ast_type_t *ptr_type = create_pointer_type(null, ptr_quals); - if (!ptr_type) - return null; - - if (inner_pointer) { - // 有更多指针,将它们链接起来 - scc_ast_type_t *current = ptr_type; - while (current->base.type == SCC_AST_TYPE_POINTER && - current->pointer.pointee && - current->pointer.pointee->base.type == SCC_AST_TYPE_POINTER) { - current = current->pointer.pointee; - } - current->pointer.pointee = inner_pointer; - } - - return ptr_type; -} - -/** - * @brief 解析抽象声明符 - */ -static scc_ast_type_t *parse_abstract_declarator(scc_parser_t *parser, - usize *offset, - scc_ast_type_t *base_type) { - // 解析可选的指针 - scc_ast_type_t *ptr_type = parse_pointer(parser, offset); - - if (ptr_type) { - // 将指针的 pointee 指向解析出的直接抽象声明符 - scc_ast_type_t *direct_type = - parse_direct_abstract_declarator(parser, offset, base_type); - - // 找到最内层的指针 - scc_ast_type_t *current = ptr_type; - while (current->base.type == SCC_AST_TYPE_POINTER && - current->pointer.pointee && - current->pointer.pointee->base.type == SCC_AST_TYPE_POINTER) { - current = current->pointer.pointee; - } - - if (current->base.type == SCC_AST_TYPE_POINTER) { - current->pointer.pointee = direct_type; - } - - return ptr_type; - } else { - // 没有指针,直接解析直接抽象声明符 - return parse_direct_abstract_declarator(parser, offset, base_type); - } -} - -/** - * @brief 解析直接抽象声明符 - */ -static scc_ast_type_t * -parse_direct_abstract_declarator(scc_parser_t *parser, usize *offset, - scc_ast_type_t *base_type) { - scc_ast_type_t *current_type = base_type; - - while (true) { - const scc_lexer_tok_t *tok = - scc_lexer_stream_peek(parser->lex_stream, *offset); - - // 情况1: ( abstract-declarator ) - if (tok->type == SCC_TOK_L_PAREN) { - (*offset)++; // 跳过 '(' - - // 解析括号内的抽象声明符 - scc_ast_type_t *inner_type = - parse_abstract_declarator(parser, offset, current_type); - - // 期望右括号 - tok = scc_lexer_stream_peek(parser->lex_stream, *offset); - if (tok->type != SCC_TOK_R_PAREN) { - LOG_ERROR("Expected ')' after abstract-declarator"); - return current_type; - } - (*offset)++; // 跳过 ')' - - current_type = inner_type; - continue; - } - - // 情况2: 数组声明符 [ ... ] - if (tok->type == SCC_TOK_L_BRACKET) { - (*offset)++; // 跳过 '[' - - // 检查是否是 [ * ] (可变长度数组) - tok = scc_lexer_stream_peek(parser->lex_stream, *offset); - if (tok->type == SCC_TOK_MUL) { - // 可变长度数组 - scc_ast_type_t *array_type = - create_array_type(current_type, null); - if (!array_type) - return current_type; - - (*offset)++; // 跳过 '*' - } else { - // 解析可选的 static 关键字 - cbool has_static = false; - if (tok->type == SCC_TOK_STATIC) { - has_static = true; - (*offset)++; - } - - // 解析可选的类型限定符列表 - parse_type_qualifier_list(parser, offset); - tok = scc_lexer_stream_peek(parser->lex_stream, *offset); - - // 如果之前没有 static,但有限定符,再检查一次 static - if (!has_static && tok->type == SCC_TOK_STATIC) { - has_static = true; - (*offset)++; - tok = scc_lexer_stream_peek(parser->lex_stream, *offset); - } - - // 解析可选的赋值表达式(数组大小) - scc_ast_expr_t *size_expr = null; - if (tok->type != SCC_TOK_R_BRACKET) { - // TODO: 解析 assignment-expression - // 简化:设置 size 为 null - size_expr = null; - - // 跳过表达式 - while (tok->type != SCC_TOK_R_BRACKET && - tok->type != SCC_TOK_EOF) { - (*offset)++; - tok = - scc_lexer_stream_peek(parser->lex_stream, *offset); - } - } - - scc_ast_type_t *array_type = - create_array_type(current_type, size_expr); - if (!array_type) - return current_type; - - current_type = array_type; - } - - // 期望右括号 - tok = scc_lexer_stream_peek(parser->lex_stream, *offset); - if (tok->type != SCC_TOK_R_BRACKET) { - LOG_ERROR("Expected ']' after array declarator"); - return current_type; - } - (*offset)++; // 跳过 ']' - continue; - } - - // 情况3: 函数声明符 ( parameter-type-list(opt) ) - if (tok->type == SCC_TOK_L_PAREN) { - (*offset)++; // 跳过 '(' - - // 检查是否为空参数列表 () - tok = scc_lexer_stream_peek(parser->lex_stream, *offset); - if (tok->type == SCC_TOK_R_PAREN) { - // 空参数列表 - (*offset)++; // 跳过 ')' - scc_ast_type_t *func_type = create_function_type( - current_type, (scc_ast_type_vec_t){0}, false); - if (!func_type) - return current_type; - current_type = func_type; - continue; - } else if (tok->type == SCC_TOK_VOID) { - // void 参数列表 - (*offset)++; // 跳过 void - tok = scc_lexer_stream_peek(parser->lex_stream, *offset); - if (tok->type == SCC_TOK_R_PAREN) { - (*offset)++; // 跳过 ')' - scc_ast_type_vec_t param_types; - scc_vec_init(param_types); - scc_ast_type_t *func_type = - create_function_type(current_type, param_types, false); - if (!func_type) - return current_type; - current_type = func_type; - continue; - } else { - LOG_ERROR("Expected ')' after void parameter list"); - return current_type; - } - } else { - // TODO: 解析 parameter-type-list - // 这是一个复杂的子解析器,需要单独实现 - - scc_ast_type_vec_t param_types; - scc_vec_init(param_types); - cbool is_variadic = false; - - // 临时:跳过所有参数直到遇到 ')' 或 '...' - while (true) { - tok = scc_lexer_stream_peek(parser->lex_stream, *offset); - if (tok->type == SCC_TOK_R_PAREN) - break; - if (tok->type == SCC_TOK_ELLIPSIS) { - is_variadic = true; - (*offset)++; - break; - } - if (tok->type == SCC_TOK_EOF) { - LOG_ERROR("Unclosed function parameter list"); - return current_type; - } - (*offset)++; - } - - tok = scc_lexer_stream_peek(parser->lex_stream, *offset); - if (tok->type == SCC_TOK_R_PAREN) { - (*offset)++; // 跳过 ')' - } - - scc_ast_type_t *func_type = create_function_type( - current_type, param_types, is_variadic); - if (!func_type) - return current_type; - current_type = func_type; - continue; - } - } - - // 没有更多的直接抽象声明符 - break; - } - - return current_type; -} - -// 提供类型检查接口 -cbool scc_ast_type_is_builtin(const scc_ast_type_t *type) { - return type && type->base.type == SCC_AST_TYPE_BUILTIN; -} - -cbool scc_ast_type_is_pointer(const scc_ast_type_t *type) { - return type && type->base.type == SCC_AST_TYPE_POINTER; -} - -cbool scc_ast_type_is_array(const scc_ast_type_t *type) { - return type && type->base.type == SCC_AST_TYPE_ARRAY; -} - -cbool scc_ast_type_is_function(const scc_ast_type_t *type) { - return type && type->base.type == SCC_AST_TYPE_FUNCTION; -} - -cbool scc_ast_type_is_struct(const scc_ast_type_t *type) { - return type && type->base.type == SCC_AST_TYPE_STRUCT; -} - -cbool scc_ast_type_is_union(const scc_ast_type_t *type) { - return type && type->base.type == SCC_AST_TYPE_UNION; -} - -cbool scc_ast_type_is_enum(const scc_ast_type_t *type) { - return type && type->base.type == SCC_AST_TYPE_ENUM; -} - -cbool scc_ast_type_is_typedef(const scc_ast_type_t *type) { - return type && type->base.type == SCC_AST_TYPE_TYPEDEF; -} - -// 获取内置类型 -scc_ast_builtin_type_t scc_ast_type_get_builtin(const scc_ast_type_t *type) { - if (!scc_ast_type_is_builtin(type)) - return TYPE_VOID; - return type->builtin.builtin; -} - -// 获取指针指向的类型 -scc_ast_type_t *scc_ast_type_get_pointee(const scc_ast_type_t *type) { - if (!scc_ast_type_is_pointer(type)) - return null; - return type->pointer.pointee; -} - -// 获取数组元素类型 -scc_ast_type_t *scc_ast_type_get_element(const scc_ast_type_t *type) { - if (!scc_ast_type_is_array(type)) - return null; - return type->array.element; -} - -// 获取函数返回类型 -scc_ast_type_t *scc_ast_type_get_return(const scc_ast_type_t *type) { - if (!scc_ast_type_is_function(type)) - return null; - return type->function.return_type; -} - -// 获取类型限定符 -scc_ast_decl_specifier_t scc_ast_type_get_quals(const scc_ast_type_t *type) { - if (!type) - return (scc_ast_decl_specifier_t){0}; - - switch (type->base.type) { - case SCC_AST_TYPE_BUILTIN: - return type->builtin.quals; - case SCC_AST_TYPE_POINTER: - return type->pointer.quals; - default: - return (scc_ast_decl_specifier_t){0}; - } -} \ No newline at end of file +// // 前向声明辅助函数 +// static scc_ast_type_t *parse_specifier_qualifier_list(scc_parser_t *parser, +// usize *offset); +// static scc_ast_type_t *parse_abstract_declarator(scc_parser_t *parser, +// usize *offset, +// scc_ast_type_t *base_type); +// static scc_ast_type_t * +// parse_direct_abstract_declarator(scc_parser_t *parser, usize *offset, +// scc_ast_type_t *base_type); +// static scc_ast_type_t *parse_pointer(scc_parser_t *parser, usize *offset); +// static scc_ast_decl_specifier_t parse_type_qualifier_list(scc_parser_t +// *parser, +// usize *offset); +// static scc_ast_type_t *parse_struct_or_union_specifier(scc_parser_t *parser, +// usize *offset); +// static scc_ast_type_t *parse_enum_specifier(scc_parser_t *parser, +// usize *offset); + +// // 创建内置类型节点的辅助函数 +// static scc_ast_type_t *create_builtin_type(scc_ast_builtin_type_t builtin, +// scc_ast_decl_specifier_t quals) { +// scc_ast_type_t *type = (scc_ast_type_t +// *)scc_malloc(sizeof(scc_ast_type_t)); if (!type) +// return null; + +// type->base.type = SCC_AST_TYPE_BUILTIN; +// type->builtin.type = builtin; +// type->builtin.quals = quals; +// return type; +// } + +// // 创建指针类型节点的辅助函数 +// static scc_ast_type_t *create_pointer_type(scc_ast_type_t *pointee, +// scc_ast_decl_specifier_t quals) { +// scc_ast_type_t *type = (scc_ast_type_t +// *)scc_malloc(sizeof(scc_ast_type_t)); if (!type) +// return null; + +// type->base.type = SCC_AST_TYPE_POINTER; +// type->pointer.pointee = pointee; +// type->pointer.quals = quals; +// return type; +// } + +// // 创建数组类型节点的辅助函数 +// static scc_ast_type_t *create_array_type(scc_ast_type_t *element, +// scc_ast_expr_t *size) { +// scc_ast_type_t *type = (scc_ast_type_t +// *)scc_malloc(sizeof(scc_ast_type_t)); if (!type) +// return null; + +// type->base.type = SCC_AST_TYPE_ARRAY; +// type->array.element = element; +// type->array.size = size; +// return type; +// } + +// // 创建函数类型节点的辅助函数 +// static scc_ast_type_t *create_function_type(scc_ast_type_t *return_type, +// scc_ast_type_vec_t param_types, +// cbool is_variadic) { +// scc_ast_type_t *type = (scc_ast_type_t +// *)scc_malloc(sizeof(scc_ast_type_t)); if (!type) +// return null; + +// type->base.type = SCC_AST_TYPE_FUNCTION; +// type->function.return_type = return_type; +// type->function.param_types = param_types; +// type->function.is_variadic = is_variadic; +// return type; +// } + +// /** +// * @brief 解析类型名 +// */ +// scc_ast_type_t *scc_parse_type_name(scc_parser_t *parser, usize *offset) { +// usize start_offset = *offset; + +// // 解析 specifier-qualifier-list +// scc_ast_type_t *type = parse_specifier_qualifier_list(parser, offset); +// if (type == null) { +// // TODO +// // LOG_ERROR("Failed to parse specifier-qualifier-list at offset +// %zu", +// // start_offset); +// return null; +// } + +// // 解析可选的 abstract-declarator +// scc_ast_type_t *full_type = parse_abstract_declarator(parser, offset, +// type); if (full_type == null) { +// return type; +// } + +// return full_type; +// } + +// scc_ast_type_t *scc_parse_type(scc_parser_t *parser) { +// usize offset = 0; +// scc_ast_type_t *ret = scc_parse_type_name(parser, &offset); +// if (ret == null) { +// return null; +// } +// scc_lexer_stream_advance(parser->lex_stream, offset); +// return ret; +// } + +// /** +// * @brief 检查是否为类型限定符 token +// */ +// static cbool is_type_qualifier_token(const scc_lexer_tok_t *tok) { +// return tok->type == SCC_TOK_CONST || tok->type == SCC_TOK_RESTRICT || +// tok->type == SCC_TOK_VOLATILE || tok->type == SCC_TOK_ATOMIC; +// } + +// /** +// * @brief 解析类型限定符 +// */ +// static scc_ast_decl_specifier_t parse_type_qualifier(scc_parser_t *parser, +// usize *offset) { +// scc_ast_decl_specifier_t quals = {0}; +// const scc_lexer_tok_t *tok = +// scc_lexer_stream_peek(parser->lex_stream, *offset); + +// switch (tok->type) { +// case SCC_TOK_CONST: +// quals.is_const = true; +// (*offset)++; +// break; +// case SCC_TOK_RESTRICT: +// quals.is_restrict = true; +// (*offset)++; +// break; +// case SCC_TOK_VOLATILE: +// quals.is_volatile = true; +// (*offset)++; +// break; +// case SCC_TOK_ATOMIC: +// quals.is_atomic = true; +// (*offset)++; +// break; +// default: +// // 不是限定符 +// break; +// } + +// return quals; +// } + +// /** +// * @brief 解析类型限定符列表 +// */ +// static scc_ast_decl_specifier_t parse_type_qualifier_list(scc_parser_t +// *parser, +// usize *offset) { +// scc_ast_decl_specifier_t quals = {0}; + +// while (true) { +// const scc_lexer_tok_t *tok = +// scc_lexer_stream_peek(parser->lex_stream, *offset); + +// if (!is_type_qualifier_token(tok)) { +// break; +// } + +// scc_ast_decl_specifier_t new_qual = +// parse_type_qualifier(parser, offset); + +// // 合并限定符 +// quals.is_const = quals.is_const || new_qual.is_const; +// quals.is_restrict = quals.is_restrict || new_qual.is_restrict; +// quals.is_volatile = quals.is_volatile || new_qual.is_volatile; +// quals.is_atomic = quals.is_atomic || new_qual.is_atomic; +// } + +// return quals; +// } + +// /** +// * @brief 检查是否为类型说明符 token +// */ +// static cbool is_type_specifier_token(const scc_lexer_tok_t *tok) { +// switch (tok->type) { +// case SCC_TOK_VOID: +// case SCC_TOK_CHAR: +// case SCC_TOK_SHORT: +// case SCC_TOK_INT: +// case SCC_TOK_LONG: +// case SCC_TOK_FLOAT: +// case SCC_TOK_DOUBLE: +// case SCC_TOK_SIGNED: +// case SCC_TOK_UNSIGNED: +// case SCC_TOK_BOOL: +// case SCC_TOK_COMPLEX: +// case SCC_TOK_STRUCT: +// case SCC_TOK_UNION: +// case SCC_TOK_ENUM: +// return true; +// default: +// // 可能是 typedef 名称 +// return tok->type == SCC_TOK_IDENT; +// } +// } + +// /** +// * @brief 解析类型说明符 +// */ +// static scc_ast_type_t *parse_type_specifier(scc_parser_t *parser, +// usize *offset) { +// const scc_lexer_tok_t *tok = +// scc_lexer_stream_peek(parser->lex_stream, *offset); + +// // 处理简单内置类型 +// if (tok->type == SCC_TOK_VOID) { +// (*offset)++; +// return create_builtin_type(TYPE_VOID, (scc_ast_decl_specifier_t){0}); +// } else if (tok->type == SCC_TOK_CHAR) { +// (*offset)++; +// return create_builtin_type(TYPE_CHAR, (scc_ast_decl_specifier_t){0}); +// } else if (tok->type == SCC_TOK_SHORT) { +// (*offset)++; +// return create_builtin_type(TYPE_SHORT, +// (scc_ast_decl_specifier_t){0}); +// } else if (tok->type == SCC_TOK_INT) { +// (*offset)++; +// return create_builtin_type(TYPE_INT, (scc_ast_decl_specifier_t){0}); +// } else if (tok->type == SCC_TOK_LONG) { +// // 检查是否为 long long +// const scc_lexer_tok_t *next_tok = +// scc_lexer_stream_peek(parser->lex_stream, *offset + 1); +// if (next_tok->type == SCC_TOK_LONG) { +// (*offset) += 2; // 跳过两个 long +// return create_builtin_type(TYPE_LONG_LONG, +// (scc_ast_decl_specifier_t){0}); +// } else { +// (*offset)++; +// return create_builtin_type(TYPE_LONG, +// (scc_ast_decl_specifier_t){0}); +// } +// } else if (tok->type == SCC_TOK_FLOAT) { +// (*offset)++; +// return create_builtin_type(TYPE_FLOAT, +// (scc_ast_decl_specifier_t){0}); +// } else if (tok->type == SCC_TOK_DOUBLE) { +// // 检查是否为 long double +// const scc_lexer_tok_t *next_tok = +// scc_lexer_stream_peek(parser->lex_stream, *offset + 1); +// if (next_tok->type == SCC_TOK_LONG) { +// (*offset) += 2; // 跳过 double long +// return create_builtin_type(TYPE_LONG_DOUBLE, +// (scc_ast_decl_specifier_t){0}); +// } else { +// (*offset)++; +// return create_builtin_type(TYPE_DOUBLE, +// (scc_ast_decl_specifier_t){0}); +// } +// } else if (tok->type == SCC_TOK_BOOL) { +// (*offset)++; +// return create_builtin_type(TYPE_BOOL, (scc_ast_decl_specifier_t){0}); +// } else if (tok->type == SCC_TOK_SIGNED || tok->type == SCC_TOK_UNSIGNED) +// { +// // signed/unsigned 需要与后续类型组合 +// // 这里简化处理,默认为 int +// (*offset)++; +// return create_builtin_type(TYPE_INT, (scc_ast_decl_specifier_t){0}); +// } else if (tok->type == SCC_TOK_COMPLEX) { +// // _Complex 需要与浮点类型组合 +// // 这里简化处理 +// (*offset)++; +// return create_builtin_type(TYPE_COMPLEX_FLOAT, +// (scc_ast_decl_specifier_t){0}); +// } + +// // 处理结构体/联合体 +// if (tok->type == SCC_TOK_STRUCT || tok->type == SCC_TOK_UNION) { +// return parse_struct_or_union_specifier(parser, offset); +// } + +// // 处理枚举 +// if (tok->type == SCC_TOK_ENUM) { +// return parse_enum_specifier(parser, offset); +// } + +// // 处理 typedef 名称 +// if (tok->type == SCC_TOK_IDENT) { +// // TODO +// return null; +// scc_ast_type_t *type = +// (scc_ast_type_t *)scc_malloc(sizeof(scc_ast_type_t)); +// if (!type) +// return null; + +// type->base.type = SCC_AST_TYPE_TYPEDEF; +// type->typedef_type.name = tok->value.cstr.data; +// type->typedef_type.underlying = null; // 需要从符号表解析 + +// (*offset)++; +// return type; +// } + +// return null; +// } + +// /** +// * @brief 解析结构体或联合体说明符 +// */ +// static scc_ast_type_t *parse_struct_or_union_specifier(scc_parser_t *parser, +// usize *offset) { +// const scc_lexer_tok_t *tok = +// scc_lexer_stream_peek(parser->lex_stream, *offset); +// cbool is_struct = (tok->type == SCC_TOK_STRUCT); + +// (*offset)++; // 跳过 struct 或 union + +// scc_ast_type_t *type = (scc_ast_type_t +// *)scc_malloc(sizeof(scc_ast_type_t)); if (!type) +// return null; + +// type->base.type = is_struct ? SCC_AST_TYPE_STRUCT : SCC_AST_TYPE_UNION; +// type->record.name = null; +// scc_vec_init(type->record.fields); + +// // 检查是否有标识符 +// tok = scc_lexer_stream_peek(parser->lex_stream, *offset); +// if (tok->type == SCC_TOK_IDENT) { +// type->record.name = tok->value.cstr.data; +// (*offset)++; +// tok = scc_lexer_stream_peek(parser->lex_stream, *offset); +// } + +// // 如果有 '{',解析结构体定义 +// if (tok->type == SCC_TOK_L_BRACE) { +// (*offset)++; // 跳过 '{' + +// // TODO: 解析 struct-declaration-list +// // 这是一个复杂的子解析器,需要单独实现 + +// // 临时:跳过所有声明直到遇到 '}' +// while (true) { +// tok = scc_lexer_stream_peek(parser->lex_stream, *offset); +// if (tok->type == SCC_TOK_R_BRACE) +// break; +// if (tok->type == SCC_TOK_EOF) { +// LOG_ERROR("Unclosed struct/union definition"); +// scc_free(type); +// return null; +// } +// (*offset)++; +// } + +// (*offset)++; // 跳过 '}' +// } + +// return type; +// } + +// /** +// * @brief 解析枚举说明符 +// */ +// static scc_ast_type_t *parse_enum_specifier(scc_parser_t *parser, +// usize *offset) { +// const scc_lexer_tok_t *tok = +// scc_lexer_stream_peek(parser->lex_stream, *offset); +// if (tok->type != SCC_TOK_ENUM) +// return null; + +// (*offset)++; // 跳过 enum + +// scc_ast_type_t *type = (scc_ast_type_t +// *)scc_malloc(sizeof(scc_ast_type_t)); if (!type) +// return null; + +// type->base.type = SCC_AST_TYPE_ENUM; +// type->enumeration.name = null; +// scc_vec_init(type->enumeration.enumerators); + +// // 检查是否有标识符 +// tok = scc_lexer_stream_peek(parser->lex_stream, *offset); +// if (tok->type == SCC_TOK_IDENT) { +// type->enumeration.name = tok->value.cstr.data; +// (*offset)++; +// tok = scc_lexer_stream_peek(parser->lex_stream, *offset); +// } + +// // 如果有 '{',解析枚举定义 +// if (tok->type == SCC_TOK_L_BRACE) { +// (*offset)++; // 跳过 '{' + +// // TODO: 解析 enumerator-list +// // 这是一个复杂的子解析器,需要单独实现 + +// // 临时:跳过所有枚举项直到遇到 '}' +// while (true) { +// tok = scc_lexer_stream_peek(parser->lex_stream, *offset); +// if (tok->type == SCC_TOK_R_BRACE) +// break; +// if (tok->type == SCC_TOK_EOF) { +// LOG_ERROR("Unclosed enum definition"); +// scc_free(type); +// return null; +// } +// (*offset)++; +// } + +// (*offset)++; // 跳过 '}' +// } + +// return type; +// } + +// /** +// * @brief 解析指定符-限定符列表 +// */ +// static scc_ast_type_t *parse_specifier_qualifier_list(scc_parser_t *parser, +// usize *offset) { +// scc_ast_decl_specifier_t quals = {0}; + +// // 收集类型限定符(可能出现在前面) +// while (true) { +// const scc_lexer_tok_t *tok = +// scc_lexer_stream_peek(parser->lex_stream, *offset); + +// if (!is_type_qualifier_token(tok)) { +// break; +// } + +// scc_ast_decl_specifier_t new_quals = +// parse_type_qualifier(parser, offset); +// quals.is_const |= new_quals.is_const; +// quals.is_restrict |= new_quals.is_restrict; +// quals.is_volatile |= new_quals.is_volatile; +// quals.is_atomic |= new_quals.is_atomic; +// } + +// // 解析类型说明符 +// scc_ast_type_t *type_specifier = parse_type_specifier(parser, offset); +// if (!type_specifier) { +// // TODO +// // LOG_ERROR("Expected type specifier"); +// return null; +// } + +// // 收集可能出现在类型说明符后面的限定符 +// while (true) { +// const scc_lexer_tok_t *tok = +// scc_lexer_stream_peek(parser->lex_stream, *offset); + +// if (!is_type_qualifier_token(tok)) { +// break; +// } + +// scc_ast_decl_specifier_t new_quals = +// parse_type_qualifier(parser, offset); +// quals.is_const |= new_quals.is_const; +// quals.is_restrict |= new_quals.is_restrict; +// quals.is_volatile |= new_quals.is_volatile; +// quals.is_atomic |= new_quals.is_atomic; +// } + +// // 将限定符应用到类型上 +// if (type_specifier->base.type == SCC_AST_TYPE_BUILTIN) { +// type_specifier->builtin.quals = quals; +// } else if (type_specifier->base.type == SCC_AST_TYPE_POINTER) { +// // 对于指针类型,这里的限定符应该应用到指针本身 +// type_specifier->pointer.quals = quals; +// } else if (type_specifier->base.type == SCC_AST_TYPE_TYPEDEF) { +// // typedef 类型可能也有限定符,但需要保存在其他地方 +// // 这里简化处理 +// } + +// return type_specifier; +// } + +// /** +// * @brief 解析指针 +// */ +// static scc_ast_type_t *parse_pointer(scc_parser_t *parser, usize *offset) { +// const scc_lexer_tok_t *tok = +// scc_lexer_stream_peek(parser->lex_stream, *offset); + +// if (tok->type != SCC_TOK_MUL) { +// return null; // 不是指针 +// } + +// (*offset)++; // 跳过 '*' + +// // 解析可选的类型限定符列表 +// scc_ast_decl_specifier_t ptr_quals = +// parse_type_qualifier_list(parser, offset); + +// // 递归解析更多指针(多重指针) +// scc_ast_type_t *inner_pointer = parse_pointer(parser, offset); + +// // 创建指针类型 +// scc_ast_type_t *ptr_type = create_pointer_type(null, ptr_quals); +// if (!ptr_type) +// return null; + +// if (inner_pointer) { +// // 有更多指针,将它们链接起来 +// scc_ast_type_t *current = ptr_type; +// while (current->base.type == SCC_AST_TYPE_POINTER && +// current->pointer.pointee && +// current->pointer.pointee->base.type == SCC_AST_TYPE_POINTER) { +// current = current->pointer.pointee; +// } +// current->pointer.pointee = inner_pointer; +// } + +// return ptr_type; +// } + +// /** +// * @brief 解析抽象声明符 +// */ +// static scc_ast_type_t *parse_abstract_declarator(scc_parser_t *parser, +// usize *offset, +// scc_ast_type_t *base_type) { +// // 解析可选的指针 +// scc_ast_type_t *ptr_type = parse_pointer(parser, offset); + +// if (ptr_type) { +// // 将指针的 pointee 指向解析出的直接抽象声明符 +// scc_ast_type_t *direct_type = +// parse_direct_abstract_declarator(parser, offset, base_type); + +// // 找到最内层的指针 +// scc_ast_type_t *current = ptr_type; +// while (current->base.type == SCC_AST_TYPE_POINTER && +// current->pointer.pointee && +// current->pointer.pointee->base.type == SCC_AST_TYPE_POINTER) { +// current = current->pointer.pointee; +// } + +// if (current->base.type == SCC_AST_TYPE_POINTER) { +// current->pointer.pointee = direct_type; +// } + +// return ptr_type; +// } else { +// // 没有指针,直接解析直接抽象声明符 +// return parse_direct_abstract_declarator(parser, offset, base_type); +// } +// } + +// /** +// * @brief 解析直接抽象声明符 +// */ +// static scc_ast_type_t * +// parse_direct_abstract_declarator(scc_parser_t *parser, usize *offset, +// scc_ast_type_t *base_type) { +// scc_ast_type_t *current_type = base_type; + +// while (true) { +// const scc_lexer_tok_t *tok = +// scc_lexer_stream_peek(parser->lex_stream, *offset); + +// // 情况1: ( abstract-declarator ) +// if (tok->type == SCC_TOK_L_PAREN) { +// (*offset)++; // 跳过 '(' + +// // 解析括号内的抽象声明符 +// scc_ast_type_t *inner_type = +// parse_abstract_declarator(parser, offset, current_type); + +// // 期望右括号 +// tok = scc_lexer_stream_peek(parser->lex_stream, *offset); +// if (tok->type != SCC_TOK_R_PAREN) { +// LOG_ERROR("Expected ')' after abstract-declarator"); +// return current_type; +// } +// (*offset)++; // 跳过 ')' + +// current_type = inner_type; +// continue; +// } + +// // 情况2: 数组声明符 [ ... ] +// if (tok->type == SCC_TOK_L_BRACKET) { +// (*offset)++; // 跳过 '[' + +// // 检查是否是 [ * ] (可变长度数组) +// tok = scc_lexer_stream_peek(parser->lex_stream, *offset); +// if (tok->type == SCC_TOK_MUL) { +// // 可变长度数组 +// scc_ast_type_t *array_type = +// create_array_type(current_type, null); +// if (!array_type) +// return current_type; + +// (*offset)++; // 跳过 '*' +// } else { +// // 解析可选的 static 关键字 +// cbool has_static = false; +// if (tok->type == SCC_TOK_STATIC) { +// has_static = true; +// (*offset)++; +// } + +// // 解析可选的类型限定符列表 +// parse_type_qualifier_list(parser, offset); +// tok = scc_lexer_stream_peek(parser->lex_stream, *offset); + +// // 如果之前没有 static,但有限定符,再检查一次 static +// if (!has_static && tok->type == SCC_TOK_STATIC) { +// has_static = true; +// (*offset)++; +// tok = scc_lexer_stream_peek(parser->lex_stream, *offset); +// } + +// // 解析可选的赋值表达式(数组大小) +// scc_ast_expr_t *size_expr = null; +// if (tok->type != SCC_TOK_R_BRACKET) { +// // TODO: 解析 assignment-expression +// // 简化:设置 size 为 null +// size_expr = null; + +// // 跳过表达式 +// while (tok->type != SCC_TOK_R_BRACKET && +// tok->type != SCC_TOK_EOF) { +// (*offset)++; +// tok = +// scc_lexer_stream_peek(parser->lex_stream, +// *offset); +// } +// } + +// scc_ast_type_t *array_type = +// create_array_type(current_type, size_expr); +// if (!array_type) +// return current_type; + +// current_type = array_type; +// } + +// // 期望右括号 +// tok = scc_lexer_stream_peek(parser->lex_stream, *offset); +// if (tok->type != SCC_TOK_R_BRACKET) { +// LOG_ERROR("Expected ']' after array declarator"); +// return current_type; +// } +// (*offset)++; // 跳过 ']' +// continue; +// } + +// // 情况3: 函数声明符 ( parameter-type-list(opt) ) +// if (tok->type == SCC_TOK_L_PAREN) { +// (*offset)++; // 跳过 '(' + +// // 检查是否为空参数列表 () +// tok = scc_lexer_stream_peek(parser->lex_stream, *offset); +// if (tok->type == SCC_TOK_R_PAREN) { +// // 空参数列表 +// (*offset)++; // 跳过 ')' +// scc_ast_type_t *func_type = create_function_type( +// current_type, (scc_ast_type_vec_t){0}, false); +// if (!func_type) +// return current_type; +// current_type = func_type; +// continue; +// } else if (tok->type == SCC_TOK_VOID) { +// // void 参数列表 +// (*offset)++; // 跳过 void +// tok = scc_lexer_stream_peek(parser->lex_stream, *offset); +// if (tok->type == SCC_TOK_R_PAREN) { +// (*offset)++; // 跳过 ')' +// scc_ast_type_vec_t param_types; +// scc_vec_init(param_types); +// scc_ast_type_t *func_type = +// create_function_type(current_type, param_types, +// false); +// if (!func_type) +// return current_type; +// current_type = func_type; +// continue; +// } else { +// LOG_ERROR("Expected ')' after void parameter list"); +// return current_type; +// } +// } else { +// // TODO: 解析 parameter-type-list +// // 这是一个复杂的子解析器,需要单独实现 + +// scc_ast_type_vec_t param_types; +// scc_vec_init(param_types); +// cbool is_variadic = false; + +// // 临时:跳过所有参数直到遇到 ')' 或 '...' +// while (true) { +// tok = scc_lexer_stream_peek(parser->lex_stream, *offset); +// if (tok->type == SCC_TOK_R_PAREN) +// break; +// if (tok->type == SCC_TOK_ELLIPSIS) { +// is_variadic = true; +// (*offset)++; +// break; +// } +// if (tok->type == SCC_TOK_EOF) { +// LOG_ERROR("Unclosed function parameter list"); +// return current_type; +// } +// (*offset)++; +// } + +// tok = scc_lexer_stream_peek(parser->lex_stream, *offset); +// if (tok->type == SCC_TOK_R_PAREN) { +// (*offset)++; // 跳过 ')' +// } + +// scc_ast_type_t *func_type = create_function_type( +// current_type, param_types, is_variadic); +// if (!func_type) +// return current_type; +// current_type = func_type; +// continue; +// } +// } + +// // 没有更多的直接抽象声明符 +// break; +// } + +// return current_type; +// } + +// // 提供类型检查接口 +// cbool scc_ast_type_is_builtin(const scc_ast_type_t *type) { +// return type && type->base.type == SCC_AST_TYPE_BUILTIN; +// } + +// cbool scc_ast_type_is_pointer(const scc_ast_type_t *type) { +// return type && type->base.type == SCC_AST_TYPE_POINTER; +// } + +// cbool scc_ast_type_is_array(const scc_ast_type_t *type) { +// return type && type->base.type == SCC_AST_TYPE_ARRAY; +// } + +// cbool scc_ast_type_is_function(const scc_ast_type_t *type) { +// return type && type->base.type == SCC_AST_TYPE_FUNCTION; +// } + +// cbool scc_ast_type_is_struct(const scc_ast_type_t *type) { +// return type && type->base.type == SCC_AST_TYPE_STRUCT; +// } + +// cbool scc_ast_type_is_union(const scc_ast_type_t *type) { +// return type && type->base.type == SCC_AST_TYPE_UNION; +// } + +// cbool scc_ast_type_is_enum(const scc_ast_type_t *type) { +// return type && type->base.type == SCC_AST_TYPE_ENUM; +// } + +// cbool scc_ast_type_is_typedef(const scc_ast_type_t *type) { +// return type && type->base.type == SCC_AST_TYPE_TYPEDEF; +// } + +// // 获取内置类型 +// scc_ast_builtin_type_t scc_ast_type_get_builtin(const scc_ast_type_t *type) { +// if (!scc_ast_type_is_builtin(type)) +// return TYPE_VOID; +// return type->builtin.type; +// } + +// // 获取指针指向的类型 +// scc_ast_type_t *scc_ast_type_get_pointee(const scc_ast_type_t *type) { +// if (!scc_ast_type_is_pointer(type)) +// return null; +// return type->pointer.pointee; +// } + +// // 获取数组元素类型 +// scc_ast_type_t *scc_ast_type_get_element(const scc_ast_type_t *type) { +// if (!scc_ast_type_is_array(type)) +// return null; +// return type->array.element; +// } + +// // 获取函数返回类型 +// scc_ast_type_t *scc_ast_type_get_return(const scc_ast_type_t *type) { +// if (!scc_ast_type_is_function(type)) +// return null; +// return type->function.return_type; +// } + +// // 获取类型限定符 +// scc_ast_decl_specifier_t scc_ast_type_get_quals(const scc_ast_type_t *type) { +// if (!type) +// return (scc_ast_decl_specifier_t){0}; + +// switch (type->base.type) { +// case SCC_AST_TYPE_BUILTIN: +// return type->builtin.quals; +// case SCC_AST_TYPE_POINTER: +// return type->pointer.quals; +// default: +// return (scc_ast_decl_specifier_t){0}; +// } +// } diff --git a/libs/parser/src/parser.c b/libs/parser/src/scc_parser.c similarity index 77% rename from libs/parser/src/parser.c rename to libs/parser/src/scc_parser.c index 9dfeac3..18450b1 100644 --- a/libs/parser/src/parser.c +++ b/libs/parser/src/scc_parser.c @@ -1,11 +1,5 @@ -/** - * @file parser.c - * @brief 新的解析器实现 - */ - -#include "parser.h" -#include - +#include +#include static void dummy_sema_callback(void *context, scc_ast_node_type_t node_type, void *node) { (void)context; @@ -17,11 +11,11 @@ static void dummy_sema_callback(void *context, scc_ast_node_type_t node_type, #define ASSIGN_PTR_OR_DEFAULT(assigned_val, value, default) \ assigned_val = value ? value : default -void scc_parser_init(scc_parser_t *parser, scc_lexer_stream_t *lexer, +void scc_parser_init(scc_parser_t *parser, scc_lexer_tok_ring_t *tok_ring, scc_sema_callbacks_t *callbacks) { - Assert(parser != null && lexer != null); - parser->lex_stream = lexer; - parser->has_error = false; + Assert(parser != null && tok_ring != null); + parser->ring = tok_ring; + parser->errcode = 0; parser->translation_unit = null; if (callbacks) { ASSIGN_PTR_OR_DEFAULT(parser->sema_callbacks.on_decl, @@ -38,15 +32,12 @@ void scc_parser_init(scc_parser_t *parser, scc_lexer_stream_t *lexer, parser->sema_callbacks.on_stmt = dummy_sema_callback; parser->sema_callbacks.on_expr = dummy_sema_callback; parser->sema_callbacks.on_type = dummy_sema_callback; - parser->sema_callbacks.context = dummy_sema_callback; + parser->sema_callbacks.context = null; } - // // ONLY FOR INIT TYPE - // parser->current_token.type = SCC_TOK_UNKNOWN; } void scc_parser_drop(scc_parser_t *parser) { // TODO: 释放 AST 内存 - (void)parser; } scc_ast_translation_unit_t *scc_parse_translation_unit(scc_parser_t *parser) { @@ -62,7 +53,8 @@ scc_ast_translation_unit_t *scc_parse_translation_unit(scc_parser_t *parser) { * same as * Program := Declaration* Definition* */ - do { + cbool matched = false; + while (1) { scc_ast_decl_t *decl = scc_parse_declaration(parser); if (decl != null) { scc_vec_push(unit->declarations, decl); @@ -70,11 +62,16 @@ scc_ast_translation_unit_t *scc_parse_translation_unit(scc_parser_t *parser) { break; // MAYBE return or next } - } while (!scc_lexer_tok_match(scc_lexer_stream_current(parser->lex_stream), - SCC_TOK_EOF) && - !parser->has_error); + if (parser->errcode != 0) { // FIXME errcode + break; + } + const scc_lexer_tok_t *tok = scc_parser_peek(parser); + if (tok == null || tok->type == SCC_TOK_EOF) { + break; + } + } - if (parser->has_error) { + if (parser->errcode) { // TODO: 清理 scc_free(unit); return null; diff --git a/libs/parser/src/scc_sema.c b/libs/parser/src/scc_sema.c new file mode 100644 index 0000000..fd26198 --- /dev/null +++ b/libs/parser/src/scc_sema.c @@ -0,0 +1,15 @@ +#include +#include + +void scc_sema_init(scc_sema_callbacks_t *callbacks) { + scc_sema_symtab_t *sema_symtab = scc_malloc(sizeof(scc_sema_symtab_t)); + if (sema_symtab == null) { + LOG_FATAL("out of memory"); + return; + } + callbacks->context = sema_symtab; + callbacks->on_decl = null; + callbacks->on_expr = null; + callbacks->on_stmt = null; + callbacks->on_type = null; +} diff --git a/libs/parser/src/sema_symtab.c b/libs/parser/src/sema_symtab.c new file mode 100644 index 0000000..6e3d599 --- /dev/null +++ b/libs/parser/src/sema_symtab.c @@ -0,0 +1,58 @@ +#include + +void scc_sema_symtab_init(scc_sema_symtab_t *symtab) { + symtab->root_scope.parent = null; + + scc_hashtable_init(&symtab->root_scope.symbols, + (scc_hashtable_hash_func_t)scc_strcmp, + (scc_hashtable_equal_func_t)scc_strhash32); + symtab->current_scope = &symtab->root_scope; +} + +void scc_sema_symtab_drop(scc_sema_symtab_t *symtab) { + while (symtab->current_scope != null) { + scc_hashtable_drop(&symtab->current_scope->symbols); + symtab->current_scope = symtab->current_scope->parent; + } +} + +void scc_sema_symtab_enter_scope(scc_sema_symtab_t *symtab) { + scc_sema_scope_t *scope = scc_malloc(sizeof(scc_sema_scope_t)); + if (scope == null) { + LOG_FATAL("out of memory"); + return; + } + scope->parent = symtab->current_scope; + scc_hashtable_init(&scope->symbols, (scc_hashtable_hash_func_t)scc_strcmp, + (scc_hashtable_equal_func_t)scc_strhash32); + symtab->current_scope = scope; +} + +void scc_sema_symtab_leave_scope(scc_sema_symtab_t *symtab) { + if (symtab->current_scope == &symtab->root_scope) { + LOG_ERROR("out of scope"); + return; + } + scc_hashtable_drop(&symtab->current_scope->symbols); + symtab->current_scope = symtab->current_scope->parent; +} + +scc_ast_node_t *scc_sema_symtab_add_symbol(scc_sema_symtab_t *symtab, + const char *name, + scc_ast_node_t *ast_node_ref) { + return scc_hashtable_set(&symtab->current_scope->symbols, name, + ast_node_ref); +} + +scc_ast_node_t *scc_sema_symtab_lookup_symbol(scc_sema_symtab_t *symtab, + const char *name) { + scc_ast_node_t *node = null; + for (scc_sema_scope_t *scope = symtab->current_scope; scope != null; + scope = scope->parent) { + node = scc_hashtable_get(&scope->symbols, name); + if (node != null) { + return node; + } + } + return null; +} diff --git a/libs/parser/tests/test_parser_unit.c b/libs/parser/tests/test_parser_unit.c new file mode 100644 index 0000000..82a5ca7 --- /dev/null +++ b/libs/parser/tests/test_parser_unit.c @@ -0,0 +1,453 @@ +#include +#include +#include +#include +#include + +typedef scc_ast_node_t *(*scc_parse_node_func)(scc_parser_t *parser); + +static scc_ast_node_t *process_input(const char *input, + scc_parse_node_func parse_func) { + int res = 0; + scc_sstream_t mem_stream; + res = scc_sstream_init_by_buffer(&mem_stream, input, strlen(input), false, + 16); + Assert(res == 0); + + scc_lexer_t lexer; + scc_lexer_init(&lexer, scc_sstream_to_ring(&mem_stream)); + + scc_lexer_tok_ring_t *tok_ring = scc_lexer_to_ring(&lexer, 8, false); + + scc_parser_t parser; + scc_parser_init(&parser, tok_ring, null); + + scc_ast_node_t *ret = parse_func(&parser); + + cbool not_eof = false; + scc_ring_not_eof(*parser.ring, not_eof); + Assert(!not_eof == true); + + scc_lexer_drop_ring(parser.ring); + scc_parser_drop(&parser); + scc_lexer_drop(&lexer); + scc_sstream_drop(&mem_stream); + return ret; +} + +typedef void (*scc_tree_dump_output_t)(void *userdata, const char *fmt, ...); + +#define BUFFER_SIZE (4096) +char expect_buffer[BUFFER_SIZE]; +char output_buffer[BUFFER_SIZE]; + +static void dump2buffer(void *_buffer, const char *fmt, ...) { + char *buffer = _buffer; + va_list args; + va_start(args, fmt); + scc_vsnprintf(buffer + strlen(buffer), BUFFER_SIZE - strlen(buffer) - 1, + fmt, args); + va_end(args); +} + +#define SCC_CHECK_AST(expect_node_ptr, str, parse_func) \ + do { \ + scc_ast_node_t *output_node_ptr = \ + process_input(str, (scc_parse_node_func)parse_func); \ + scc_tree_dump_ctx_t ctx; \ + expect_buffer[0] = '\n', expect_buffer[1] = '\0'; \ + scc_tree_dump_ctx_init(&ctx, true, dump2buffer, expect_buffer); \ + scc_ast_dump_node(&ctx, expect_node_ptr); \ + scc_tree_dump_ctx_drop(&ctx); \ + output_buffer[0] = '\n', output_buffer[1] = '\0'; \ + scc_tree_dump_ctx_init(&ctx, true, dump2buffer, output_buffer); \ + scc_ast_dump_node(&ctx, output_node_ptr); \ + scc_tree_dump_ctx_drop(&ctx); \ + TEST_CHECK(strcmp(output_buffer, expect_buffer) == 0); \ + TEST_MSG("Expected: %s", expect_buffer); \ + TEST_MSG("Produced: %s", output_buffer); \ + } while (0); + +static void test_parser_unit(void) { + scc_ast_decl_t int_decl = { + .base.type = SCC_AST_DECL_VAR, + .var.name = "a", + .var.init = null, + .var.type = &(scc_ast_type_t){.base.type = SCC_AST_TYPE_BUILTIN, + .builtin.type = SCC_AST_BUILTIN_TYPE_INT}, + }; + SCC_CHECK_AST(&int_decl.base, "int a;", scc_parse_declaration); + + scc_ast_decl_t func_decl = { + .base.type = SCC_AST_DECL_FUNC, + .func.name = "main", + .func.body = + &(scc_ast_stmt_t){ + .base.type = SCC_AST_STMT_COMPOUND, + .compound.block_items = {0}, + }, + .func.type = + &(scc_ast_type_t){ + .base.type = SCC_AST_TYPE_FUNCTION, + .function.is_variadic = false, + .function.param_types = {0}, + .function.return_type = + &(scc_ast_type_t){.base.type = SCC_AST_TYPE_BUILTIN, + .builtin.type = SCC_AST_BUILTIN_TYPE_INT}, + }, + }; + SCC_CHECK_AST(&func_decl.base, "int main(void) {}", scc_parse_declaration); + + scc_ast_decl_t *decls[] = {&func_decl}; + scc_ast_translation_unit_t tu = { + .base.type = SCC_AST_TRANSLATION_UNIT, + .declarations.data = decls, + .declarations.cap = 1, + .declarations.size = 1, + }; + SCC_CHECK_AST(&tu.base, "int main(void) {}", scc_parse_translation_unit); + // SCC_CHECK_AST(&func_decl.base, "int main(void);", scc_parse_declaration); + + { + scc_ast_node_t *items[] = { + (scc_ast_node_t *)&(scc_ast_stmt_t){ + .base.type = SCC_AST_STMT_RETURN, + .return_stmt.expr = + &(scc_ast_expr_t){ + .base.type = SCC_AST_EXPR_INT_LITERAL, + .literal.lexme = "0", + }, + }, + }; + scc_ast_decl_t func_decl = { + .base.type = SCC_AST_DECL_FUNC, + .func.name = "main", + .func.body = + &(scc_ast_stmt_t){ + .base.type = SCC_AST_STMT_COMPOUND, + .compound.block_items.cap = 1, + .compound.block_items.size = 1, + .compound.block_items.data = items, + }, + .func.type = + &(scc_ast_type_t){ + .base.type = SCC_AST_TYPE_FUNCTION, + .function.is_variadic = false, + .function.param_types = {0}, + .function.return_type = + &(scc_ast_type_t){.base.type = SCC_AST_TYPE_BUILTIN, + .builtin.type = + SCC_AST_BUILTIN_TYPE_INT}, + }, + }; + scc_ast_decl_t *decls[] = {&func_decl}; + scc_ast_translation_unit_t tu = { + .base.type = SCC_AST_TRANSLATION_UNIT, + .declarations.cap = 1, + .declarations.size = 1, + .declarations.data = decls, + }; + SCC_CHECK_AST(&tu.base, "int main(void) { return 0; }", + scc_parse_translation_unit); + } +} + +static scc_ast_expr_t make_binary(scc_ast_expr_op_t op, scc_ast_expr_t *lhs, + scc_ast_expr_t *rhs) { + scc_ast_expr_t expr = {.base.type = SCC_AST_EXPR_BINARY}; + expr.binary.op = op; + expr.binary.lhs = lhs; + expr.binary.rhs = rhs; + return expr; +} + +static scc_ast_expr_t make_unary(scc_ast_expr_op_t op, + scc_ast_expr_t *operand) { + scc_ast_expr_t expr = {.base.type = SCC_AST_EXPR_UNARY}; + expr.unary.op = op; + expr.unary.operand = operand; + return expr; +} + +static scc_ast_expr_t make_identifier(char *name) { + scc_ast_expr_t expr = {.base.type = SCC_AST_EXPR_IDENTIFIER}; + expr.identifier.name = name; + return expr; +} + +static scc_ast_expr_t make_int_literal(char *val) { + scc_ast_expr_t expr = {.base.type = SCC_AST_EXPR_INT_LITERAL}; + expr.literal.lexme = val; + return expr; +} + +static scc_ast_expr_t make_float_literal(char *val) { + scc_ast_expr_t expr = {.base.type = SCC_AST_EXPR_FLOAT_LITERAL}; + expr.literal.lexme = val; + return expr; +} + +static scc_ast_expr_t make_string_literal(char *val) { + scc_ast_expr_t expr = {.base.type = SCC_AST_EXPR_STRING_LITERAL}; + expr.literal.lexme = val; + return expr; +} + +static scc_ast_expr_t make_char_literal(char *val) { + scc_ast_expr_t expr = {.base.type = SCC_AST_EXPR_CHAR_LITERAL}; + expr.literal.lexme = val; + return expr; +} + +static scc_ast_expr_t make_conditional(scc_ast_expr_t *cond, + scc_ast_expr_t *then_expr, + scc_ast_expr_t *else_expr) { + scc_ast_expr_t expr = {.base.type = SCC_AST_EXPR_COND}; + expr.cond.cond = cond; + expr.cond.then_expr = then_expr; + expr.cond.else_expr = else_expr; + return expr; +} + +static scc_ast_expr_t make_call(scc_ast_expr_t *callee, + scc_ast_expr_vec_t *args) { + scc_ast_expr_t expr = {.base.type = SCC_AST_EXPR_CALL}; + expr.call.callee = callee; + // 注意:args 需要提前初始化,此处简化处理,实际测试中可能需要动态分配 + // 我们将在具体测试中手动初始化 args 数组 + return expr; +} + +static void test_parser_expression(void) { + // 1. 基本表达式:标识符、整数常量、字符串字面量、括号 + { + scc_ast_expr_t ident = make_identifier("x"); + SCC_CHECK_AST(&ident.base, "x", scc_parse_expression); + + scc_ast_expr_t int_lit = make_int_literal("42"); + SCC_CHECK_AST(&int_lit.base, "42", scc_parse_expression); + + scc_ast_expr_t str_lit = make_string_literal("\"hello\""); + SCC_CHECK_AST(&str_lit.base, "\"hello\"", scc_parse_expression); + + // 括号表达式 + scc_ast_expr_t paren_ident = make_identifier("y"); + SCC_CHECK_AST(&paren_ident.base, "(y)", scc_parse_expression); + } + + // 2. 后缀表达式 + { + // 数组下标:a[10] + scc_ast_expr_t a = make_identifier("a"); + scc_ast_expr_t index = make_int_literal("10"); + scc_ast_expr_t subscript = {.base.type = SCC_AST_EXPR_ARRAY_SUBSCRIPT}; + subscript.subscript.array = &a; + subscript.subscript.index = &index; + SCC_CHECK_AST(&subscript.base, "a[10]", scc_parse_expression); + + // 函数调用:f() + scc_ast_expr_t f = make_identifier("f"); + scc_ast_expr_t call = {.base.type = SCC_AST_EXPR_CALL}; + call.call.callee = &f; + scc_ast_expr_vec_t args; + scc_vec_init(args); + call.call.args = args; // 空参数列表 + SCC_CHECK_AST(&call.base, "f()", scc_parse_expression); + + // 函数调用带参数:f(1, x) + scc_ast_expr_t f2 = make_identifier("f"); + scc_ast_expr_t arg1 = make_int_literal("1"); + scc_ast_expr_t arg2 = make_identifier("x"); + scc_ast_expr_vec_t args2; + scc_vec_init(args2); + scc_vec_push(args2, &arg1); + scc_vec_push(args2, &arg2); + scc_ast_expr_t call2 = {.base.type = SCC_AST_EXPR_CALL}; + call2.call.callee = &f2; + call2.call.args = args2; + SCC_CHECK_AST(&call2.base, "f(1, x)", scc_parse_expression); + + // 成员访问 . 和 -> + scc_ast_expr_t s = make_identifier("s"); + scc_ast_expr_t dot = {.base.type = SCC_AST_EXPR_MEMBER}; + dot.member.base = &s; + dot.member.member_name = "field"; + SCC_CHECK_AST(&dot.base, "s.field", scc_parse_expression); + + scc_ast_expr_t p = make_identifier("p"); + scc_ast_expr_t arrow = {.base.type = SCC_AST_EXPR_PTR_MEMBER}; + arrow.ptr_member.base = &p; + arrow.ptr_member.member_name = "field"; + SCC_CHECK_AST(&arrow.base, "p->field", scc_parse_expression); + + // 后缀 ++/-- + scc_ast_expr_t x = make_identifier("x"); + scc_ast_expr_t post_inc = make_unary(SCC_AST_OP_POSTFIX_INCREMENT, &x); + SCC_CHECK_AST(&post_inc.base, "x++", scc_parse_expression); + + scc_ast_expr_t post_dec = make_unary(SCC_AST_OP_POSTFIX_DECREMENT, &x); + SCC_CHECK_AST(&post_dec.base, "x--", scc_parse_expression); + + // 复合字面量 TODO: (int){1,2} 需要更复杂的构造,暂略 + // SCC_CHECK_AST(..., "(int){1,2}", scc_parse_expression); + } + + // 3. 一元表达式 + { + scc_ast_expr_t x = make_identifier("x"); + + scc_ast_expr_t pre_inc = make_unary(SCC_AST_OP_PREFIX_INCREMENT, &x); + SCC_CHECK_AST(&pre_inc.base, "++x", scc_parse_expression); + + scc_ast_expr_t pre_dec = make_unary(SCC_AST_OP_PREFIX_DECREMENT, &x); + SCC_CHECK_AST(&pre_dec.base, "--x", scc_parse_expression); + + scc_ast_expr_t addr = make_unary(SCC_AST_OP_ADDRESS_OF, &x); + SCC_CHECK_AST(&addr.base, "&x", scc_parse_expression); + + scc_ast_expr_t deref = make_unary(SCC_AST_OP_INDIRECTION, &x); + SCC_CHECK_AST(&deref.base, "*x", scc_parse_expression); + + scc_ast_expr_t plus = make_unary(SCC_AST_OP_UNARY_PLUS, &x); + SCC_CHECK_AST(&plus.base, "+x", scc_parse_expression); + + scc_ast_expr_t minus = make_unary(SCC_AST_OP_UNARY_MINUS, &x); + SCC_CHECK_AST(&minus.base, "-x", scc_parse_expression); + + scc_ast_expr_t bit_not = make_unary(SCC_AST_OP_BITWISE_NOT, &x); + SCC_CHECK_AST(&bit_not.base, "~x", scc_parse_expression); + + scc_ast_expr_t log_not = make_unary(SCC_AST_OP_LOGICAL_NOT, &x); + SCC_CHECK_AST(&log_not.base, "!x", scc_parse_expression); + + // sizeof 两种形式 + // sizeof 表达式 + scc_ast_expr_t sizeof_expr = {.base.type = SCC_AST_EXPR_SIZE_OF}; + sizeof_expr.attr_of.expr = &x; + SCC_CHECK_AST(&sizeof_expr.base, "sizeof x", scc_parse_expression); + + // sizeof(类型名) 需要构造类型节点,暂时略,用TODO + // SCC_CHECK_AST(..., "sizeof(int)", scc_parse_expression); + } + + // 4. 类型转换 + { + // (int)x + // 需要构造类型节点,这里简化,用TODO + // scc_ast_type_t int_type = { .base.type = SCC_AST_TYPE_BUILTIN, + // .builtin.type = SCC_AST_BUILTIN_TYPE_INT }; scc_ast_expr_t x = + // make_identifier("x"); scc_ast_expr_t cast = { .base.type = + // SCC_AST_EXPR_CAST }; cast.cast.type = &int_type; cast.cast.expr = &x; + // SCC_CHECK_AST(&cast.base, "(int)x", scc_parse_expression); + } + + // 5. 二元运算符(按优先级测试) + { + scc_ast_expr_t a = make_identifier("a"); + scc_ast_expr_t b = make_identifier("b"); + scc_ast_expr_t c = make_identifier("c"); + scc_ast_expr_t d = make_identifier("d"); + + // 乘除模优先级高于加减 + scc_ast_expr_t mul = make_binary(SCC_AST_OP_MUL, &a, &b); + scc_ast_expr_t add = make_binary(SCC_AST_OP_ADD, &mul, &c); + SCC_CHECK_AST(&add.base, "a * b + c", scc_parse_expression); + + // 左结合性 a - b - c => (a - b) - c + scc_ast_expr_t sub1 = make_binary(SCC_AST_OP_SUB, &a, &b); + scc_ast_expr_t sub2 = make_binary(SCC_AST_OP_SUB, &sub1, &c); + SCC_CHECK_AST(&sub2.base, "a - b - c", scc_parse_expression); + + // 移位 + scc_ast_expr_t shift = make_binary(SCC_AST_OP_LEFT_SHIFT, &a, &b); + SCC_CHECK_AST(&shift.base, "a << b", scc_parse_expression); + + // 关系 + scc_ast_expr_t lt = make_binary(SCC_AST_OP_LESS, &a, &b); + SCC_CHECK_AST(<.base, "a < b", scc_parse_expression); + + // 相等 + scc_ast_expr_t eq = make_binary(SCC_AST_OP_EQUAL, &a, &b); + SCC_CHECK_AST(&eq.base, "a == b", scc_parse_expression); + + // 按位与、异或、或的优先级:& 高于 ^ 高于 | + scc_ast_expr_t bitand = make_binary(SCC_AST_OP_BITWISE_AND, &a, &b); + scc_ast_expr_t bitxor = + make_binary(SCC_AST_OP_BITWISE_XOR, &bitand, &c); + scc_ast_expr_t bitor = make_binary(SCC_AST_OP_BITWISE_OR, &bitxor, &d); + SCC_CHECK_AST(&bitor.base, "a & b ^ c | d", scc_parse_expression); + + // 逻辑与、或:&& 高于 || + scc_ast_expr_t logand = make_binary(SCC_AST_OP_LOGICAL_AND, &a, &b); + scc_ast_expr_t logor = make_binary(SCC_AST_OP_LOGICAL_OR, &logand, &c); + SCC_CHECK_AST(&logor.base, "a && b || c", scc_parse_expression); + } + + // 6. 三元运算符 + { + scc_ast_expr_t cond = make_identifier("a"); + scc_ast_expr_t then_expr = make_identifier("b"); + scc_ast_expr_t else_expr = make_identifier("c"); + scc_ast_expr_t cond_expr = + make_conditional(&cond, &then_expr, &else_expr); + SCC_CHECK_AST(&cond_expr.base, "a ? b : c", scc_parse_expression); + + // 右结合性 a ? b : c ? d : e => a ? b : (c ? d : e) + scc_ast_expr_t cond2 = make_identifier("c"); + scc_ast_expr_t then2 = make_identifier("d"); + scc_ast_expr_t else2 = make_identifier("e"); + scc_ast_expr_t inner_cond = make_conditional(&cond2, &then2, &else2); + scc_ast_expr_t outer_cond = + make_conditional(&cond, &then_expr, &inner_cond); + SCC_CHECK_AST(&outer_cond.base, "a ? b : c ? d : e", + scc_parse_expression); + } + + // 7. 赋值运算符(右结合) + { + // scc_ast_expr_t a = make_identifier("a"); + // scc_ast_expr_t b = make_identifier("b"); + // scc_ast_expr_t c = make_identifier("c"); + // scc_ast_expr_t assign1 = make_binary(SCC_AST_OP_ASSIGN, &a, &b); + // scc_ast_expr_t assign2 = + // make_binary(SCC_AST_OP_ASSIGN, &assign1, &c); // a = (b = c) + // SCC_CHECK_AST(&assign2.base, "a = b = c", scc_parse_expression); + + // scc_ast_expr_t add_assign = make_binary(SCC_AST_OP_ASSIGN_ADD, &a, + // &b); SCC_CHECK_AST(&add_assign.base, "a += b", scc_parse_expression); + } + + // 8. 逗号运算符 + { + scc_ast_expr_t a = make_identifier("a"); + scc_ast_expr_t b = make_identifier("b"); + scc_ast_expr_t comma1 = make_binary(SCC_AST_OP_COMMA, &a, &b); + SCC_CHECK_AST(&comma1.base, "a, b", scc_parse_expression); + } + + // 9. 混合优先级测试 + { + scc_ast_expr_t a = make_identifier("a"); + scc_ast_expr_t b = make_identifier("b"); + scc_ast_expr_t c = make_identifier("c"); + scc_ast_expr_t d = make_identifier("d"); + + // a + b * c - d => (a + (b * c)) - d + scc_ast_expr_t mul = make_binary(SCC_AST_OP_MUL, &b, &c); + scc_ast_expr_t add = make_binary(SCC_AST_OP_ADD, &a, &mul); + scc_ast_expr_t sub = make_binary(SCC_AST_OP_SUB, &add, &d); + SCC_CHECK_AST(&sub.base, "a + b * c - d", scc_parse_expression); + + // *p++ => *(p++) + scc_ast_expr_t p = make_identifier("p"); + scc_ast_expr_t post_inc = make_unary(SCC_AST_OP_POSTFIX_INCREMENT, &p); + scc_ast_expr_t deref = make_unary(SCC_AST_OP_INDIRECTION, &post_inc); + SCC_CHECK_AST(&deref.base, "*p++", scc_parse_expression); + } +} + +TEST_LIST = { + {"parser_unit", test_parser_unit}, + {"parser_expression", test_parser_expression}, + {NULL, NULL}, +}; \ No newline at end of file diff --git a/libs/pproc/include/scc_pproc.h b/libs/pproc/include/scc_pproc.h index 3f41d19..c8681d5 100644 --- a/libs/pproc/include/scc_pproc.h +++ b/libs/pproc/include/scc_pproc.h @@ -46,6 +46,8 @@ typedef struct scc_pproc { scc_lexer_tok_ring_t ring; int ring_ref_count; + cbool ring_need_comment; + cbool ring_need_empty; struct { int max_include_depth; @@ -53,7 +55,8 @@ typedef struct scc_pproc { } scc_pproc_t; void scc_pproc_init(scc_pproc_t *pp, scc_lexer_tok_ring_t *input); -scc_lexer_tok_ring_t *scc_pproc_to_ring(scc_pproc_t *pp, int ring_size); +scc_lexer_tok_ring_t *scc_pproc_to_ring(scc_pproc_t *pp, int ring_size, + cbool need_empty, cbool need_comment); void scc_pproc_drop(scc_pproc_t *pp); static inline void scc_pproc_add_include_path(scc_pproc_t *pp, diff --git a/libs/pproc/src/scc_pproc.c b/libs/pproc/src/scc_pproc.c index 9575f06..af5dd31 100644 --- a/libs/pproc/src/scc_pproc.c +++ b/libs/pproc/src/scc_pproc.c @@ -1,4 +1,5 @@ #include +#include #include #include @@ -120,11 +121,27 @@ void scc_pproc_add_builtin_macros() { static cbool fill_token(scc_lexer_tok_t *tok, void *userdata) { scc_pproc_t *pp = userdata; - return pproc_next(pp, tok); + cbool ret = false; +CONTINUE: + ret = pproc_next(pp, tok); + if (ret && !pp->ring_need_comment && + scc_get_tok_subtype(tok->type) == SCC_TOK_SUBTYPE_COMMENT) { + scc_lexer_tok_drop(tok); + goto CONTINUE; + } + if (ret && !pp->ring_need_empty && + scc_get_tok_subtype(tok->type) == SCC_TOK_SUBTYPE_EMPTYSPACE) { + scc_lexer_tok_drop(tok); + goto CONTINUE; + } + return ret; } -scc_lexer_tok_ring_t *scc_pproc_to_ring(scc_pproc_t *pp, int ring_size) { +scc_lexer_tok_ring_t *scc_pproc_to_ring(scc_pproc_t *pp, int ring_size, + cbool need_empty, cbool need_comment) { scc_ring_init(pp->ring, ring_size, fill_token, pp); + pp->ring_need_comment = need_comment; + pp->ring_need_empty = need_empty; pp->ring_ref_count++; return &pp->ring; } diff --git a/libs/tree_dump/include/tree_dump.h b/libs/tree_dump/include/tree_dump.h index ca9c0dd..d3fc624 100644 --- a/libs/tree_dump/include/tree_dump.h +++ b/libs/tree_dump/include/tree_dump.h @@ -15,21 +15,10 @@ // #define ANSI_FMT -#define SCC_TREE_DUMP_PRINT_COLORED(ctx, color, before_str, fmt, after_str, \ - ...) \ - scc_printf(before_str "%s" fmt "%s" after_str, \ - ctx->use_color ? color : "", ##__VA_ARGS__, \ - ctx->use_color ? ANSI_NONE : ""); - -#define SCC_TREE_DUMP_PRINT_AROUND(ctx, color, around_str, fmt, ...) \ - SCC_TREE_DUMP_PRINT_COLORED(ctx, color, around_str, fmt, around_str, \ - ##__VA_ARGS__) - -#define SCC_TREE_DUMP_PRINT_PURE(ctx, color, fmt, ...) \ - SCC_TREE_DUMP_PRINT_COLORED(ctx, color, "", fmt, "", ##__VA_ARGS__) - typedef SCC_VEC(cbool) scc_ast_dump_stack_t; +typedef void (*scc_tree_dump_output_t)(void *userdata, const char *fmt, ...); + typedef struct { scc_ast_dump_stack_t stack; ///< 每层是否为最后子节点 cbool use_color; ///< 是否使用颜色输出 @@ -43,10 +32,15 @@ typedef struct { const char *value_color; ///< 值颜色 const char *branch_color; ///< 分支符号颜色 const char *reset_color; ///< 重置颜色 + + scc_tree_dump_output_t output_func; + void *output_userdata; } scc_tree_dump_ctx_t; static inline void scc_tree_dump_ctx_init(scc_tree_dump_ctx_t *ctx, - cbool use_color) { + cbool use_color, + scc_tree_dump_output_t output_func, + void *output_userdata) { ctx->use_color = use_color; scc_vec_init(ctx->stack); @@ -59,8 +53,14 @@ static inline void scc_tree_dump_ctx_init(scc_tree_dump_ctx_t *ctx, ctx->value_color = use_color ? SCC_TREE_DUMP_VALUE_COLOR : ""; ctx->branch_color = use_color ? SCC_TREE_DUMP_BRANCH_COLOR : ""; ctx->reset_color = use_color ? SCC_TREE_DUMP_RESET_COLOR : ""; + + ctx->output_func = output_func; + ctx->output_userdata = output_userdata; } +#define scc_tree_dump_printf(ctx, fmt, ...) \ + (ctx)->output_func((ctx)->output_userdata, fmt, ##__VA_ARGS__) + static inline void scc_tree_dump_ctx_drop(scc_tree_dump_ctx_t *ctx) { scc_vec_free(ctx->stack); } @@ -78,13 +78,28 @@ static void scc_tree_print_indent(scc_tree_dump_ctx_t *ctx) { } Assert(data != null); if (ctx->use_color) { - scc_printf("%s%s%s", ctx->branch_color, data, ctx->reset_color); + ctx->output_func(ctx->output_userdata, "%s%s%s", ctx->branch_color, + data, ctx->reset_color); } else { - scc_printf("%s", data); + ctx->output_func(ctx->output_userdata, "%s", data); } } } +#define SCC_TREE_DUMP_PRINT_COLORED(ctx, color, before_str, fmt, after_str, \ + ...) \ + (ctx)->output_func((ctx)->output_userdata, \ + before_str "%s" fmt "%s" after_str, \ + (ctx)->use_color ? color : "", ##__VA_ARGS__, \ + (ctx)->use_color ? ANSI_NONE : ""); + +#define SCC_TREE_DUMP_PRINT_AROUND(ctx, color, around_str, fmt, ...) \ + SCC_TREE_DUMP_PRINT_COLORED(ctx, color, around_str, fmt, around_str, \ + ##__VA_ARGS__) + +#define SCC_TREE_DUMP_PRINT_PURE(ctx, color, fmt, ...) \ + SCC_TREE_DUMP_PRINT_COLORED(ctx, color, "", fmt, "", ##__VA_ARGS__) + // 推入新的层级到栈中 static inline void scc_tree_dump_push_level(scc_tree_dump_ctx_t *ctx, cbool is_last_child) { diff --git a/runtime/scc_core/include/scc_core_impl.h b/runtime/scc_core/include/scc_core_impl.h index 9078a19..c748e49 100644 --- a/runtime/scc_core/include/scc_core_impl.h +++ b/runtime/scc_core/include/scc_core_impl.h @@ -16,6 +16,8 @@ typedef enum { SCC_FILE_WRITE, SCC_FILE_APPEND, } scc_fmode_t; +#define scc_stdout 1 +#define scc_stderr 2 scc_file_t scc_fopen(const char *path, scc_fmode_t mode); void scc_fclose(scc_file_t file); usize scc_fsize(scc_file_t file); diff --git a/runtime/scc_core/include/scc_core_ring.h b/runtime/scc_core/include/scc_core_ring.h index 0d28b63..3edb4ab 100644 --- a/runtime/scc_core/include/scc_core_ring.h +++ b/runtime/scc_core/include/scc_core_ring.h @@ -65,13 +65,39 @@ (ring).tail++; \ } while (0) +/** + * @brief 环形缓冲区核心操作模板 + * @param ring 环形缓冲区变量 + * @param ok 状态输出变量 + * @param op 具体操作代码块(可包含多条语句) + * + * 封装了以下公共逻辑: + * 1. 确保缓冲区有数据可用 + * 2. 检查probe是否越界 + * 3. 计算物理索引 + * 4. 执行具体操作 + */ +#define _SCC_RING_OP(ring, ok, op) \ + do { \ + _scc_ring_ensure(ring, ok); \ + if (!(ok)) \ + break; \ + if ((ring).probe >= (ring).tail) { \ + ok = 0; \ + break; \ + } \ + usize _phys = _scc_ring_phys(ring, (ring).probe); \ + (void)_phys; \ + op; \ + } while (0) + // ==================== 用户操作宏 ==================== /** * @brief 初始化环形缓冲区 * @param ring 环形缓冲区变量 * @param cap 容量 - * @param fill_func 填充回调函数 (可传 NULL) + * @param fill_func 填充回调函数 (可传 NULL) 返回true表示成功 * * 内存分配失败由 scc_malloc 内部处理 (如 LOG_FATAL) */ @@ -116,17 +142,16 @@ * @param ok 变量名,用于接收成功状态 (cbool 类型) */ #define scc_ring_peek(ring, val, ok) \ - do { \ - _scc_ring_ensure(ring, ok); \ - if (!(ok)) \ - break; \ - if ((ring).probe >= (ring).tail) { \ - ok = 0; \ - break; \ - } \ - usize _phys = _scc_ring_phys(ring, (ring).probe); \ - val = (ring).data[_phys]; \ - } while (0) + _SCC_RING_OP(ring, ok, val = (ring).data[_phys]) + +/** + * @brief 预览 probe 位置的引用 (不移动 probe) + * @param ring 环形缓冲区变量 + * @param val_ref 引用变量接收地址 + * @param ok 变量名,用于接收成功状态 + */ +#define scc_ring_unsafe_peek_ref(ring, val_ref, ok) \ + _SCC_RING_OP(ring, ok, val_ref = &((ring).data[_phys])) /** * @brief 获取 probe 位置的元素,并将 probe 前进一步 @@ -135,18 +160,41 @@ * @param ok 变量名,用于接收成功状态 (cbool 类型) */ #define scc_ring_next(ring, val, ok) \ - do { \ - _scc_ring_ensure(ring, ok); \ - if (!(ok)) \ - break; \ - if ((ring).probe >= (ring).tail) { \ - ok = 0; \ - break; \ - } \ - usize _phys = _scc_ring_phys(ring, (ring).probe); \ - val = (ring).data[_phys]; \ - (ring).probe++; \ - } while (0) + _SCC_RING_OP(ring, ok, val = (ring).data[_phys]; (ring).probe++) + +/** + * @brief 获取 probe 位置的引用,并将 probe 前进一步 + * @param ring 环形缓冲区变量 + * @param val 引用变量接收地址 + * @param ok 变量名,用于接收成功状态 + */ +#define scc_ring_unsafe_next_ref(ring, val, ok) \ + _SCC_RING_OP(ring, ok, val = &((ring).data[_phys]); (ring).probe++) + +/** + * @brief 获取元素并消费(移动 probe 和 head) + * @param ring 环形缓冲区变量 + * @param val 变量名,用于接收元素值 + * @param ok 变量名,用于接收成功状态 + */ +#define scc_ring_next_consume(ring, val, ok) \ + _SCC_RING_OP(ring, ok, val = (ring).data[_phys]; (ring).probe++; \ + (ring).head = (ring).probe) + +#define scc_ring_unsafe_pure_next_consume(ring) \ + _SCC_RING_OP(ring, ok, (ring).probe++; (ring).head = (ring).probe) + +/** + * @brief 获取元素并消费(移动 probe 和 head) + * @param ring 环形缓冲区变量 + * @param val 变量名,用于接收元素值 + * @param ok 变量名,用于接收成功状态 + */ +#define scc_ring_unsafe_next_ref_consume(ring, val, ok) \ + _SCC_RING_OP(ring, ok, val = &((ring).data[_phys]); (ring).probe++; \ + (ring).head = (ring).probe) + +#define scc_ring_not_eof(ring, ok) _SCC_RING_OP(ring, ok, ) /** * @brief 将 probe 后退一步 (不能低于 head) @@ -182,16 +230,4 @@ */ #define scc_ring_available(ring) ((ring).tail - (ring).probe) -/** - * @brief 获取 probe 位置的元素,并将 probe 前进一步同时标记为已消费 - * @param ring 环形缓冲区变量 - * @param val 变量名,用于接收元素值 (例如 int ch) - * @param ok 变量名,用于接收成功状态 (cbool 类型) - */ -#define scc_ring_next_consume(ring, val, ok) \ - do { \ - scc_ring_next(ring, val, ok); \ - scc_ring_consume(ring); \ - } while (0) - #endif /* __SCC_CORE_RING_H__ */ diff --git a/runtime/scc_core/src/core_impl.c b/runtime/scc_core/src/core_impl.c index 4c8d5a0..e00c188 100644 --- a/runtime/scc_core/src/core_impl.c +++ b/runtime/scc_core/src/core_impl.c @@ -5,9 +5,6 @@ #define __SCC_LOG_IMPL_IMPORT_SRC__ #include -#define scc_stdout 1 -#define scc_stderr 2 - void putchar_(char ch) { LOG_FATAL("you can't use printf.c directly"); } scc_file_t scc_fopen(const char *path, scc_fmode_t mode) { diff --git a/src/main.c b/src/main.c index b633ceb..bd6dd2b 100644 --- a/src/main.c +++ b/src/main.c @@ -2,8 +2,8 @@ #include #include -// #include -// #include +#include +#include // #include // #include @@ -195,12 +195,11 @@ int main(int argc, const char **argv, const char **envp) { SetConsoleCP(CP_UTF8); #endif -#ifdef _WIN32 -#define OUTPUT_DEFAULT_FILE "a.exe" -#else -#define OUTPUT_DEFAULT_FILE "a.out" +#ifndef SCC_DEFAULT_ARGPARSE_LANG +#define SCC_DEFAULT_ARGPARSE_LANG SCC_ARGPARSE_LANG_ZH #endif - scc_argparse_lang_t argparse_lang = SCC_ARGPARSE_LANG_EN; + + scc_argparse_lang_t argparse_lang = SCC_DEFAULT_ARGPARSE_LANG; for (const char **env = envp; *env != null; env++) { const char *env_str = *env; if (scc_strcmp(env_str, "LANG=zh_CN.UTF-8") == 0) { @@ -256,7 +255,8 @@ int main(int argc, const char **argv, const char **envp) { scc_pproc_add_object_macro(&(pproc.macro_table), &pproc_macro_name, &pproc_tok_vec); if (config.emit_pp) { - scc_lexer_tok_ring_t *tok_ring = scc_pproc_to_ring(&pproc, 8); + scc_lexer_tok_ring_t *tok_ring = + scc_pproc_to_ring(&pproc, 8, true, true); if (config.output_file == null) { print_ring(tok_ring, config.verbose); } else { @@ -265,22 +265,25 @@ int main(int argc, const char **argv, const char **envp) { return 0; } + scc_lexer_tok_ring_t *tok_ring = scc_pproc_to_ring(&pproc, 8, false, false); + scc_parser_t parser; + scc_parser_init(&parser, tok_ring, null); + scc_ast_translation_unit_t *translation_unit = + scc_parse_translation_unit(&parser); + + scc_parser_drop(&parser); scc_pproc_drop(&pproc); scc_lexer_drop(&lexer); scc_sstream_drop(&sstream); - // scc_parser_t parser; - // scc_parser_init(&parser, &lexer_stream, null); - // scc_ast_translation_unit_t *translation_unit = - // scc_parse_translation_unit(&parser); - - // if (config.emit_ast) { - // scc_tree_dump_ctx_t tree_dump; - // scc_tree_dump_ctx_init(&tree_dump, true); - // scc_ast_dump_node(&tree_dump, (scc_ast_node_t *)translation_unit); - // scc_tree_dump_ctx_drop(&tree_dump); - // return 0; - // } + if (config.emit_ast) { + scc_tree_dump_ctx_t tree_dump; + scc_tree_dump_ctx_init(&tree_dump, true, (void *)scc_fprintf, + (void *)scc_stdout); + scc_ast_dump_node(&tree_dump, (scc_ast_node_t *)translation_unit); + scc_tree_dump_ctx_drop(&tree_dump); + return 0; + } // scc_ir_builder_t ir_builder; // scc_ast2ir(translation_unit, &ir_builder); diff --git a/tests/simple/00_main.c b/tests/simple/00_main.c new file mode 100644 index 0000000..3fb1295 --- /dev/null +++ b/tests/simple/00_main.c @@ -0,0 +1 @@ +int main(void) {} \ No newline at end of file