From 79ee7a657a079d09ad8bd0c23fef1ef0c1cb0505 Mon Sep 17 00:00:00 2001 From: zzy <2450266535@qq.com> Date: Wed, 28 Jan 2026 15:44:59 +0800 Subject: [PATCH] =?UTF-8?q?feat(ast):=20=E6=B7=BB=E5=8A=A0AST=E5=AE=9A?= =?UTF-8?q?=E4=B9=89=E5=92=8Cdump=E5=B7=A5=E5=85=B7=E5=A4=B4=E6=96=87?= =?UTF-8?q?=E4=BB=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 新增libs/ast模块的基础定义文件,包括: - AST节点类型枚举定义,涵盖声明、语句、表达式、类型等各类节点 - AST操作符枚举,定义所有二元、一元、逻辑、算术等操作符 - AST节点结构体定义,包含表达式、语句、声明、类型等具体实现 - AST dump工具接口,支持树形结构输出和颜色显示 - 语义分析回调函数类型定义,为后续语义分析提供基础 --- libs/ast/cbuild.toml | 9 + libs/ast/include/ast_def.h | 442 ++++++++++++++ libs/ast/include/ast_dump.h | 37 ++ libs/ast/include/scc_ast.h | 23 + libs/ast/src/ast_dump.c | 803 ++++++++++++++++++++++++++ libs/ast/tests/test_main.c | 10 + libs/parser/cbuild.toml | 10 + libs/parser/include/parser.h | 113 ++++ libs/parser/src/parse_decl.c | 250 ++++++++ libs/parser/src/parse_expr.c | 918 +++++++++++++++++++++++++++++ libs/parser/src/parse_stmt.c | 426 ++++++++++++++ libs/parser/src/parse_type.c | 1055 ++++++++++++++++++++++++++++++++++ libs/parser/src/parser.c | 85 +++ 13 files changed, 4181 insertions(+) create mode 100644 libs/ast/cbuild.toml create mode 100644 libs/ast/include/ast_def.h create mode 100644 libs/ast/include/ast_dump.h create mode 100644 libs/ast/include/scc_ast.h create mode 100644 libs/ast/src/ast_dump.c create mode 100644 libs/ast/tests/test_main.c create mode 100644 libs/parser/cbuild.toml create mode 100644 libs/parser/include/parser.h create mode 100644 libs/parser/src/parse_decl.c create mode 100644 libs/parser/src/parse_expr.c create mode 100644 libs/parser/src/parse_stmt.c create mode 100644 libs/parser/src/parse_type.c create mode 100644 libs/parser/src/parser.c diff --git a/libs/ast/cbuild.toml b/libs/ast/cbuild.toml new file mode 100644 index 0000000..9736a2d --- /dev/null +++ b/libs/ast/cbuild.toml @@ -0,0 +1,9 @@ +[package] +name = "scc_ast" +version = "0.1.0" +authors = [] +description = "" + +# dependencies = [] +# features = {} +# default_features = [] diff --git a/libs/ast/include/ast_def.h b/libs/ast/include/ast_def.h new file mode 100644 index 0000000..04297a2 --- /dev/null +++ b/libs/ast/include/ast_def.h @@ -0,0 +1,442 @@ +#ifndef __SCC_AST_DEF_H__ +#define __SCC_AST_DEF_H__ + +#include + +/** + * @brief AST 节点类型枚举 + */ +typedef enum { + // 声明 + scc_ast_decl_t_BEGIN, // 声明开始 + SCC_AST_DECL_VAR, // 变量声明 + SCC_AST_DECL_FUNC, // 函数声明 + SCC_AST_DECL_PARAM, // 参数声明 + SCC_AST_DECL_STRUCT, // 结构体声明 + SCC_AST_DECL_UNION, // 联合声明 + SCC_AST_DECL_ENUM, // 枚举声明 + SCC_AST_DECL_TYPEDEF, // typedef 声明 + scc_ast_decl_t_END, // 声明结束 + + // 语句 + scc_ast_stmt_t_BEGIN, // 语句开始 + SCC_AST_STMT_COMPOUND, // 复合语句 { ... } + SCC_AST_STMT_EXPR, // 表达式语句 + SCC_AST_STMT_IF, // if 语句 + SCC_AST_STMT_WHILE, // while 语句 + SCC_AST_STMT_DO_WHILE, // do-while 语句 + SCC_AST_STMT_FOR, // for 语句 + SCC_AST_STMT_SWITCH, // switch 语句 + SCC_AST_STMT_CASE, // case 语句 + SCC_AST_STMT_DEFAULT, // default 语句 + SCC_AST_STMT_BREAK, // break 语句 + SCC_AST_STMT_CONTINUE, // continue 语句 + SCC_AST_STMT_RETURN, // return 语句 + SCC_AST_STMT_GOTO, // goto 语句 + SCC_AST_STMT_LABEL, // 标签语句 + scc_ast_stmt_t_END, // 结束语句 + + // 表达式 + scc_ast_expr_t_BEGIN, // 表达式开始 + SCC_AST_EXPR_BINARY, // 二元运算 + SCC_AST_EXPR_UNARY, // 一元运算 + SCC_AST_EXPR_COND, // 条件表达式 ?: + SCC_AST_EXPR_CALL, // 函数调用 + SCC_AST_EXPR_ARRAY_SUBSCRIPT, // 数组下标 + SCC_AST_EXPR_MEMBER, // 成员访问 . + SCC_AST_EXPR_PTR_MEMBER, // 指针成员访问 -> + SCC_AST_EXPR_CAST, // 类型转换 + SCC_AST_EXPR_SIZE_OF, // sizeof + SCC_AST_EXPR_ALIGN_OF, // _Alignof + SCC_AST_EXPR_COMPOUND_LITERAL, // 复合字面量 + // 字面量 + SCC_AST_EXPR_INT_LITERAL, // 整数字面量 + SCC_AST_EXPR_FLOAT_LITERAL, // 浮点字面量 + SCC_AST_EXPR_CHAR_LITERAL, // 字符字面量 + SCC_AST_EXPR_STRING_LITERAL, // 字符串字面量 + // 标识符 + SCC_AST_EXPR_IDENTIFIER, // 标识符 + scc_ast_expr_t_END, // 表达式结束 + + // 类型 + scc_ast_type_t_BEGIN, // 类型开始 + SCC_AST_TYPE_BUILTIN, // 内置类型 + SCC_AST_TYPE_POINTER, // 指针类型 + SCC_AST_TYPE_ARRAY, // 数组类型 + SCC_AST_TYPE_FUNCTION, // 函数类型 + SCC_AST_TYPE_STRUCT, // 结构体类型 + SCC_AST_TYPE_UNION, // 联合类型 + SCC_AST_TYPE_ENUM, // 枚举类型 + SCC_AST_TYPE_TYPEDEF, // typedef 类型 + scc_ast_type_t_END, // 类型结束 + + // 其他 + scc_ast_translation_unit_t_BEGIN, + SCC_AST_TRANSLATION_UNIT, // 翻译单元(根节点) + scc_ast_translation_unit_t_END, +} scc_ast_node_type_t; + +typedef struct { + scc_ast_node_type_t type; + scc_pos_t loc; +} scc_ast_node_t; + +#define SCC_AST_CAST_TO(kind, expr) \ + ((kind *)(Assert(((scc_ast_node_t *)expr)->type > kind##_BEGIN && \ + ((scc_ast_node_t *)expr)->type < kind##_END), \ + (expr))) +#define SCC_AST_IS_A(kind, expr) \ + ((expr) && (((scc_ast_node_t *)expr)->type > kind##_BEGIN && \ + ((scc_ast_node_t *)expr)->type < kind##_END)) + +/** + * @brief 内置类型枚举 + */ +typedef enum { + TYPE_VOID, + TYPE_CHAR, + TYPE_SHORT, + TYPE_INT, + TYPE_LONG, + TYPE_LONG_LONG, + TYPE_FLOAT, + TYPE_DOUBLE, + TYPE_LONG_DOUBLE, + TYPE_BOOL, + TYPE_COMPLEX_FLOAT, + TYPE_COMPLEX_DOUBLE, + TYPE_COMPLEX_LONG_DOUBLE, +} scc_ast_builtin_type_t; + +/** + * @brief 限定符 + */ +typedef struct { + // storage-class-specifier + cbool is_typedef; + cbool is_extern; + cbool is_static; + cbool is_auto; + cbool is_register; + // type-qualifier + cbool is_const; + cbool is_volatile; + cbool is_restrict; + cbool is_atomic; + // function-specifier + cbool is_inline; +} scc_ast_decl_specifier_t; + +// 前向声明 +typedef struct scc_ast_type scc_ast_type_t; +typedef struct scc_ast_expr scc_ast_expr_t; +typedef struct scc_ast_stmt scc_ast_stmt_t; +typedef struct scc_ast_decl scc_ast_decl_t; + +typedef SCC_VEC(scc_ast_type_t *) scc_ast_type_vec_t; +typedef SCC_VEC(scc_ast_expr_t *) scc_ast_expr_vec_t; +typedef SCC_VEC(scc_ast_stmt_t *) scc_ast_stmt_vec_t; +typedef SCC_VEC(scc_ast_decl_t *) scc_ast_decl_vec_t; + +// 通过指针实现泛型 +typedef SCC_VEC(scc_ast_node_type_t *) scc_ast_block_item_vec_t; + +/** + * @brief 类型表示 + */ +struct scc_ast_type { + scc_ast_node_t base; + union { + struct { + scc_ast_builtin_type_t builtin; + scc_ast_decl_specifier_t quals; + } builtin; + struct { + scc_ast_type_t *pointee; + scc_ast_decl_specifier_t quals; + } pointer; + struct { + scc_ast_type_t *element; + scc_ast_expr_t *size; // 可为 null <=> 不定长数组 + } array; + struct { + scc_ast_type_t *return_type; + scc_ast_type_vec_t param_types; + cbool is_variadic; + } function; + struct { + const char *name; + scc_ast_decl_vec_t fields; // 结构体/联合字段 + } record; + struct { + const char *name; + scc_ast_expr_vec_t enumerators; // 枚举项 + } enumeration; + struct { + const char *name; + scc_ast_type_t *underlying; + } typedef_type; + }; +}; + +/** + * @brief AST 操作符枚举 + * 这个枚举定义了所有在AST中使用的操作符,与词法token分离 + */ +typedef enum scc_ast_expr_op { + /* 无操作符 */ + SCC_AST_OP_NONE = 0, + + /* 赋值操作符 */ + SCC_AST_OP_ASSIGN, // = + SCC_AST_OP_ASSIGN_ADD, // += + SCC_AST_OP_ASSIGN_SUB, // -= + SCC_AST_OP_ASSIGN_MUL, // *= + SCC_AST_OP_ASSIGN_DIV, // /= + SCC_AST_OP_ASSIGN_MOD, // %= + SCC_AST_OP_ASSIGN_AND, // &= + SCC_AST_OP_ASSIGN_XOR, // ^= + SCC_AST_OP_ASSIGN_OR, // |= + SCC_AST_OP_ASSIGN_LSHIFT, // <<= + SCC_AST_OP_ASSIGN_RSHIFT, // >>= + + /* 条件操作符 */ + SCC_AST_OP_CONDITIONAL, // ?: + + /* 逻辑操作符 */ + SCC_AST_OP_LOGICAL_OR, // || + SCC_AST_OP_LOGICAL_AND, // && + + /* 位操作符 */ + SCC_AST_OP_BITWISE_OR, // | + SCC_AST_OP_BITWISE_XOR, // ^ + SCC_AST_OP_BITWISE_AND, // & + + /* 相等性操作符 */ + SCC_AST_OP_EQUAL, // == + SCC_AST_OP_NOT_EQUAL, // != + + /* 关系操作符 */ + SCC_AST_OP_LESS, // < + SCC_AST_OP_GREATER, // > + SCC_AST_OP_LESS_EQUAL, // <= + SCC_AST_OP_GREATER_EQUAL, // >= + + /* 移位操作符 */ + SCC_AST_OP_LEFT_SHIFT, // << + SCC_AST_OP_RIGHT_SHIFT, // >> + + /* 算术操作符 */ + SCC_AST_OP_ADD, // + + SCC_AST_OP_SUB, // - + SCC_AST_OP_MUL, // * + SCC_AST_OP_DIV, // / + SCC_AST_OP_MOD, // % + + /* 一元操作符 */ + SCC_AST_OP_UNARY_PLUS, // + (一元) + SCC_AST_OP_UNARY_MINUS, // - (一元) + SCC_AST_OP_ADDRESS_OF, // & + SCC_AST_OP_INDIRECTION, // * + SCC_AST_OP_BITWISE_NOT, // ~ + SCC_AST_OP_LOGICAL_NOT, // ! + SCC_AST_OP_PREFIX_INCREMENT, // ++ (前缀) + SCC_AST_OP_PREFIX_DECREMENT, // -- (前缀) + SCC_AST_OP_POSTFIX_INCREMENT, // ++ (后缀) + SCC_AST_OP_POSTFIX_DECREMENT, // -- (后缀) + + /* 成员访问 */ + SCC_AST_OP_MEMBER_ACCESS, // . + SCC_AST_OP_PTR_MEMBER_ACCESS, // -> +} scc_ast_expr_op_t; + +/** + * @brief 表达式节点 + */ +struct scc_ast_expr { + scc_ast_node_t base; + union { + // 二元运算 + struct { + scc_ast_expr_op_t op; + scc_ast_expr_t *lhs; + scc_ast_expr_t *rhs; + } binary; + // 一元运算 + struct { + scc_ast_expr_op_t op; + scc_ast_expr_t *operand; + } unary; + // 条件表达式 + struct { + scc_ast_expr_t *cond; + scc_ast_expr_t *then_expr; + scc_ast_expr_t *else_expr; + } cond; + // 函数调用 + struct { + scc_ast_expr_t *callee; + scc_ast_expr_vec_t args; + } call; + // 数组下标 + struct { + scc_ast_expr_t *array; + scc_ast_expr_t *index; + } subscript; + // 成员访问 + struct { + scc_ast_expr_t *base; + const char *member_name; + } member; + // 指针成员访问 + struct { + scc_ast_expr_t *base; + const char *member_name; + } ptr_member; + // 类型转换 + struct { + scc_ast_type_t *type; + scc_ast_expr_t *expr; + } cast; + // sizeof / _Alignof / ... + union { + scc_ast_type_t *type; + scc_ast_expr_t *expr; + } attr_of; + // 复合字面量 + struct { + scc_ast_type_t *type; + scc_ast_expr_vec_t init_list; + } compound_literal; + // 字面量 + struct { + scc_cvalue_t value; + } literal; + // 标识符 + struct { + const char *name; + } identifier; + }; +}; + +/** + * @brief 语句节点 + */ +struct scc_ast_stmt { + scc_ast_node_t base; + union { + // 复合语句 + struct { + scc_ast_block_item_vec_t block_items; // decl or stmt + } compound; + // 表达式语句 + struct { + scc_ast_expr_t *expr; + } expr; + // if 语句 + struct { + scc_ast_expr_t *cond; + scc_ast_stmt_t *then_stmt; + scc_ast_stmt_t *opt_else_stmt; // stmt or null + } if_stmt; + // while 语句 + struct { + scc_ast_expr_t *cond; + scc_ast_stmt_t *body; + } while_stmt; + // do-while 语句 + struct { + scc_ast_stmt_t *body; + scc_ast_expr_t *cond; + } do_while_stmt; + // for 语句 + struct { + scc_ast_type_t *init; // expr or decl or null + scc_ast_expr_t *cond; // 可为 null + scc_ast_expr_t *iter; // 可为 null + scc_ast_stmt_t *body; + } for_stmt; + // switch 语句 + struct { + scc_ast_expr_t *cond; + scc_ast_stmt_t *body; + } switch_stmt; + // case 语句 + struct { + scc_ast_expr_t *expr; + scc_ast_stmt_t *stmt; + } case_stmt; + // default 语句 + struct { + scc_ast_stmt_t *stmt; + } default_stmt; + // break/continue + struct { + // 无额外字段 + } jump; + // return 语句 + struct { + scc_ast_expr_t *expr; // 可为 NULL + } return_stmt; + // goto 语句 + struct { + const char *label; + } goto_stmt; + // 标签语句 + struct { + const char *label; + scc_ast_stmt_t *stmt; + } label_stmt; + }; +}; + +/** + * @brief 声明节点 + */ +struct scc_ast_decl { + scc_ast_node_t base; + union { + // 变量声明 + struct { + const char *name; + scc_ast_type_t *type; + scc_ast_expr_t *init; // 可为 NULL + } var; + // 函数声明 + struct { + const char *name; + scc_ast_type_t *type; // 函数类型 + scc_ast_stmt_t *body; // 可为 NULL(只有声明) or + } func; + // 参数声明 + struct { + const char *name; + scc_ast_type_t *type; + } param; + // 结构体/联合声明 + struct { + const char *name; + scc_ast_decl_vec_t fields; + } record; + // 枚举声明 + struct { + const char *name; + scc_ast_expr_vec_t enumerators; + } enumeration; + // typedef 声明 + struct { + const char *name; + scc_ast_type_t *type; + } typedef_decl; + }; +}; + +/** + * @brief 翻译单元节点(根节点) + */ +typedef struct scc_ast_translation_unit { + scc_ast_node_t base; + scc_ast_decl_vec_t declarations; +} scc_ast_translation_unit_t; + +#endif /* __SCC_AST_DEF_H__ */ diff --git a/libs/ast/include/ast_dump.h b/libs/ast/include/ast_dump.h new file mode 100644 index 0000000..2a3cf5a --- /dev/null +++ b/libs/ast/include/ast_dump.h @@ -0,0 +1,37 @@ +/** + * @file ast_dump.h + * @brief AST dump 工具,支持多种输出格式(插件化设计) + */ + +#ifndef __SCC_AST_DUMP_H__ +#define __SCC_AST_DUMP_H__ + +#include "ast_def.h" + +typedef SCC_VEC(u8) scc_ast_dump_stack_t; +/** + * @brief AST dump 上下文结构 + */ +typedef struct { + int depth; ///< 当前深度 + cbool *is_last_child; ///< 每层是否为最后子节点 + cbool use_color; ///< 是否使用颜色输出 + size_t max_depth; ///< 分配的最大深度 + const char *node_color; ///< 节点类型颜色 + const char *value_color; ///< 值颜色 + const char *branch_color; ///< 分支符号颜色 + const char *reset_color; ///< 重置颜色 +} scc_ast_dump_ctx_t; + +/** + * @brief 以指定格式 dump AST + * + * @param node AST 节点(可以是任意类型的节点) + * @param ctx dump 上下文 + */ +void scc_ast_dump_node(scc_ast_node_t *node, scc_ast_dump_ctx_t *ctx); + +void scc_ast_dump_ctx_init(scc_ast_dump_ctx_t *ctx, cbool use_color); +void scc_ast_dump_ctx_drop(scc_ast_dump_ctx_t *ctx); + +#endif /* __SCC_AST_DUMP_H__ */ diff --git a/libs/ast/include/scc_ast.h b/libs/ast/include/scc_ast.h new file mode 100644 index 0000000..2e1d6e8 --- /dev/null +++ b/libs/ast/include/scc_ast.h @@ -0,0 +1,23 @@ +#ifndef __SCC_AST_H__ +#define __SCC_AST_H__ + +#include "ast_def.h" + +/** + * @brief 语义分析回调函数类型 + */ +typedef void (*scc_sema_callback_t)(void *context, + scc_ast_node_type_t node_type, void *node); + +/** + * @brief 语义分析回调集合 + */ +typedef struct scc_sema_callbacks { + scc_sema_callback_t on_decl; + scc_sema_callback_t on_stmt; + scc_sema_callback_t on_expr; + scc_sema_callback_t on_type; + void *context; +} scc_sema_callbacks_t; + +#endif /* __SCC_AST_H__ */ diff --git a/libs/ast/src/ast_dump.c b/libs/ast/src/ast_dump.c new file mode 100644 index 0000000..330f03e --- /dev/null +++ b/libs/ast/src/ast_dump.c @@ -0,0 +1,803 @@ +/** + * @file ast_dump.c + * @brief AST dump 实现 + */ + +#include + +#define VERTICAL "| " +#define BRANCH "|-" +#define LAST_BRANCH "`-" +#define SPACE " " + +// 默认颜色配置 +#define DEFAULT_NODE_COLOR ANSI_FG_BLUE +#define DEFAULT_VALUE_COLOR ANSI_FG_GREEN +#define DEFAULT_BRANCH_COLOR ANSI_FG_YELLOW +#define DEFAULT_RESET_COLOR ANSI_NONE + +// 通用宏定义 +#define PRINT_COLORED(ctx, color_field, fmt, ...) \ + do { \ + if (ctx->use_color) { \ + scc_printf("%s" fmt "%s", ctx->color_field, ##__VA_ARGS__, \ + ctx->reset_color); \ + } else { \ + scc_printf(fmt, ##__VA_ARGS__); \ + } \ + } while (0) + +#define PRINT_VALUE(ctx, fmt, ...) \ + PRINT_COLORED(ctx, value_color, fmt, ##__VA_ARGS__) + +#define PRINT_NODE_TYPE(ctx, node) \ + PRINT_COLORED(ctx, node_color, "%s", get_node_type_str(node->type)) + +#define PRINT_QUOTED_VALUE(ctx, value) \ + do { \ + PRINT_VALUE(ctx, "'%s'", value); \ + } while (0) + +// 扩展上下文深度 +static void ensure_context_depth(scc_ast_dump_ctx_t *ctx, int new_depth) { + if ((size_t)new_depth >= ctx->max_depth) { + size_t old_size = ctx->max_depth * sizeof(cbool); + ctx->max_depth = new_depth + 16; // 预分配更多空间 + ctx->is_last_child = (cbool *)scc_realloc( + ctx->is_last_child, ctx->max_depth * sizeof(cbool)); + scc_memset((char *)ctx->is_last_child + old_size, 0, + ctx->max_depth * sizeof(cbool) - old_size); + } +} + +// 打印缩进 +static void print_indent(scc_ast_dump_ctx_t *ctx) { + for (int i = 0; i < ctx->depth; i++) { + if (i == ctx->depth - 1) { + // 最后一层打印分支符号 + if (ctx->use_color) { + scc_printf("%s%s%s", ctx->branch_color, + ctx->is_last_child[i] ? LAST_BRANCH : BRANCH, + ctx->reset_color); + } else { + scc_printf("%s", ctx->is_last_child[i] ? LAST_BRANCH : BRANCH); + } + } else { + // 中间层根据是否是最后一个子节点决定是否打印垂直线 + if (ctx->use_color) { + scc_printf("%s%s%s", ctx->branch_color, + ctx->is_last_child[i] ? SPACE : VERTICAL, + ctx->reset_color); + } else { + scc_printf("%s", ctx->is_last_child[i] ? SPACE : VERTICAL); + } + } + } +} + +// 获取节点类型的字符串表示 +static const char *get_node_type_str(scc_ast_node_type_t type) { + switch (type) { + // 声明类型 + case SCC_AST_DECL_VAR: + return "VarDecl"; + case SCC_AST_DECL_FUNC: + return "FuncDecl"; + case SCC_AST_DECL_PARAM: + return "ParamDecl"; + case SCC_AST_DECL_STRUCT: + return "StructDecl"; + case SCC_AST_DECL_UNION: + return "UnionDecl"; + case SCC_AST_DECL_ENUM: + return "EnumDecl"; + case SCC_AST_DECL_TYPEDEF: + return "TypedefDecl"; + + // 语句类型 + case SCC_AST_STMT_COMPOUND: + return "CompoundStmt"; + case SCC_AST_STMT_EXPR: + return "ExprStmt"; + case SCC_AST_STMT_IF: + return "IfStmt"; + case SCC_AST_STMT_WHILE: + return "WhileStmt"; + case SCC_AST_STMT_DO_WHILE: + return "DoStmt"; + case SCC_AST_STMT_FOR: + return "ForStmt"; + case SCC_AST_STMT_SWITCH: + return "SwitchStmt"; + case SCC_AST_STMT_CASE: + return "CaseStmt"; + case SCC_AST_STMT_DEFAULT: + return "DefaultStmt"; + case SCC_AST_STMT_BREAK: + return "BreakStmt"; + case SCC_AST_STMT_CONTINUE: + return "ContinueStmt"; + case SCC_AST_STMT_RETURN: + return "ReturnStmt"; + case SCC_AST_STMT_GOTO: + return "GotoStmt"; + case SCC_AST_STMT_LABEL: + return "LabelStmt"; + + // 表达式类型 + case SCC_AST_EXPR_BINARY: + return "BinaryOperator"; + case SCC_AST_EXPR_UNARY: + return "UnaryOperator"; + case SCC_AST_EXPR_COND: + return "ConditionalOperator"; + case SCC_AST_EXPR_CALL: + return "CallExpr"; + case SCC_AST_EXPR_ARRAY_SUBSCRIPT: + return "ArraySubscriptExpr"; + case SCC_AST_EXPR_MEMBER: + return "MemberExpr"; + case SCC_AST_EXPR_PTR_MEMBER: + return "PtrMemberExpr"; + case SCC_AST_EXPR_CAST: + return "CastExpr"; + case SCC_AST_EXPR_SIZE_OF: + return "SizeOfExpr"; + case SCC_AST_EXPR_ALIGN_OF: + return "AlignOfExpr"; + case SCC_AST_EXPR_COMPOUND_LITERAL: + return "CompoundLiteralExpr"; + case SCC_AST_EXPR_INT_LITERAL: + return "IntegerLiteral"; + case SCC_AST_EXPR_FLOAT_LITERAL: + return "FloatingLiteral"; + case SCC_AST_EXPR_CHAR_LITERAL: + return "CharacterLiteral"; + case SCC_AST_EXPR_STRING_LITERAL: + return "StringLiteral"; + case SCC_AST_EXPR_IDENTIFIER: + return "DeclRefExpr"; + + // 类型类型 + case SCC_AST_TYPE_BUILTIN: + return "BuiltinType"; + case SCC_AST_TYPE_POINTER: + return "PointerType"; + case SCC_AST_TYPE_ARRAY: + return "ArrayType"; + case SCC_AST_TYPE_FUNCTION: + return "FunctionType"; + case SCC_AST_TYPE_STRUCT: + return "RecordType"; + case SCC_AST_TYPE_UNION: + return "RecordType"; + case SCC_AST_TYPE_ENUM: + return "EnumType"; + case SCC_AST_TYPE_TYPEDEF: + return "TypedefType"; + + // 根节点 + case SCC_AST_TRANSLATION_UNIT: + return "TranslationUnitDecl"; + + default: + return "UnknownNode"; + } +} + +// 获取内置类型名称 +static const char *get_builtin_type_str(scc_ast_builtin_type_t type) { + switch (type) { + case TYPE_VOID: + return "void"; + case TYPE_CHAR: + return "char"; + case TYPE_SHORT: + return "short"; + case TYPE_INT: + return "int"; + case TYPE_LONG: + return "long"; + case TYPE_LONG_LONG: + return "long long"; + case TYPE_FLOAT: + return "float"; + case TYPE_DOUBLE: + return "double"; + case TYPE_LONG_DOUBLE: + return "long double"; + case TYPE_BOOL: + return "_Bool"; + case TYPE_COMPLEX_FLOAT: + return "float _Complex"; + case TYPE_COMPLEX_DOUBLE: + return "double _Complex"; + case TYPE_COMPLEX_LONG_DOUBLE: + return "long double _Complex"; + default: + return ""; + } +} + +// 获取操作符字符串 +static const char *get_op_str(scc_ast_expr_op_t op) { + switch (op) { + case SCC_AST_OP_ASSIGN: + return "="; + case SCC_AST_OP_ASSIGN_ADD: + return "+="; + case SCC_AST_OP_ASSIGN_SUB: + return "-="; + case SCC_AST_OP_ASSIGN_MUL: + return "*="; + case SCC_AST_OP_ASSIGN_DIV: + return "/="; + case SCC_AST_OP_ASSIGN_MOD: + return "%="; + case SCC_AST_OP_ASSIGN_AND: + return "&="; + case SCC_AST_OP_ASSIGN_XOR: + return "^="; + case SCC_AST_OP_ASSIGN_OR: + return "|="; + case SCC_AST_OP_ASSIGN_LSHIFT: + return "<<="; + case SCC_AST_OP_ASSIGN_RSHIFT: + return ">>="; + case SCC_AST_OP_CONDITIONAL: + return "? :"; + case SCC_AST_OP_LOGICAL_OR: + return "||"; + case SCC_AST_OP_LOGICAL_AND: + return "&&"; + case SCC_AST_OP_BITWISE_OR: + return "|"; + case SCC_AST_OP_BITWISE_XOR: + return "^"; + case SCC_AST_OP_BITWISE_AND: + return "&"; + case SCC_AST_OP_EQUAL: + return "=="; + case SCC_AST_OP_NOT_EQUAL: + return "!="; + case SCC_AST_OP_LESS: + return "<"; + case SCC_AST_OP_GREATER: + return ">"; + case SCC_AST_OP_LESS_EQUAL: + return "<="; + case SCC_AST_OP_GREATER_EQUAL: + return ">="; + case SCC_AST_OP_LEFT_SHIFT: + return "<<"; + case SCC_AST_OP_RIGHT_SHIFT: + return ">>"; + case SCC_AST_OP_ADD: + return "+"; + case SCC_AST_OP_SUB: + return "-"; + case SCC_AST_OP_MUL: + return "*"; + case SCC_AST_OP_DIV: + return "/"; + case SCC_AST_OP_MOD: + return "%"; + case SCC_AST_OP_UNARY_PLUS: + return "+"; + case SCC_AST_OP_UNARY_MINUS: + return "-"; + case SCC_AST_OP_ADDRESS_OF: + return "&"; + case SCC_AST_OP_INDIRECTION: + return "*"; + case SCC_AST_OP_BITWISE_NOT: + return "~"; + case SCC_AST_OP_LOGICAL_NOT: + return "!"; + case SCC_AST_OP_PREFIX_INCREMENT: + return "++"; + case SCC_AST_OP_PREFIX_DECREMENT: + return "--"; + case SCC_AST_OP_POSTFIX_INCREMENT: + return "++"; + case SCC_AST_OP_POSTFIX_DECREMENT: + return "--"; + case SCC_AST_OP_MEMBER_ACCESS: + return "."; + case SCC_AST_OP_PTR_MEMBER_ACCESS: + return "->"; + default: + return ""; + } +} + +// 通用的开始节点打印函数 +static inline void start_node_dump(scc_ast_node_t *node, + scc_ast_dump_ctx_t *ctx) { + print_indent(ctx); + PRINT_NODE_TYPE(ctx, node); +} + +// 通用的结束节点打印函数 +static inline void end_node_dump(scc_ast_dump_ctx_t *ctx) { scc_printf("\n"); } + +// 通用的递归转储辅助函数 +static inline void dump_child_node(scc_ast_node_t *child, + scc_ast_dump_ctx_t *ctx, cbool is_last) { + if (!child) + return; + + ctx->depth++; + ensure_context_depth(ctx, ctx->depth); + ctx->is_last_child[ctx->depth - 1] = is_last; + + scc_ast_dump_node(child, ctx); + + ctx->depth--; +} + +// 用于构建复合类型名称的宏 +#define BUILD_TYPE_NAME(ctx, prefix, name) \ + do { \ + if (ctx->use_color) { \ + scc_printf("%s'%s%s%s'%s", ctx->value_color, prefix, name, \ + ctx->reset_color, ctx->reset_color); \ + } else { \ + scc_printf("'%s%s'", prefix, name); \ + } \ + } while (0) + +// 递归转储类型 +static void dump_type_impl(scc_ast_type_t *type, scc_ast_dump_ctx_t *ctx) { + if (!type) + return; + + start_node_dump(&type->base, ctx); + + // 根据类型输出特定信息 + switch (type->base.type) { + case SCC_AST_TYPE_BUILTIN: + PRINT_QUOTED_VALUE(ctx, get_builtin_type_str(type->builtin.builtin)); + break; + case SCC_AST_TYPE_POINTER: + if (type->pointer.pointee && + type->pointer.pointee->base.type == SCC_AST_TYPE_BUILTIN) { + const char *base_type = + get_builtin_type_str(type->pointer.pointee->builtin.builtin); + if (ctx->use_color) { + scc_printf("%s'%s *'%s", ctx->value_color, base_type, + ctx->reset_color); + } else { + scc_printf("'%s *'", base_type); + } + } else { + PRINT_QUOTED_VALUE(ctx, "pointer"); + } + break; + case SCC_AST_TYPE_ARRAY: + PRINT_QUOTED_VALUE(ctx, "array"); + break; + case SCC_AST_TYPE_FUNCTION: + PRINT_QUOTED_VALUE(ctx, "function"); + break; + case SCC_AST_TYPE_STRUCT: + if (type->record.name) { + BUILD_TYPE_NAME(ctx, "struct ", type->record.name); + } else { + PRINT_QUOTED_VALUE(ctx, "anonymous struct"); + } + break; + case SCC_AST_TYPE_UNION: + if (type->record.name) { + BUILD_TYPE_NAME(ctx, "union ", type->record.name); + } else { + PRINT_QUOTED_VALUE(ctx, "anonymous union"); + } + break; + case SCC_AST_TYPE_ENUM: + if (type->enumeration.name) { + BUILD_TYPE_NAME(ctx, "enum ", type->enumeration.name); + } else { + PRINT_QUOTED_VALUE(ctx, "anonymous enum"); + } + break; + case SCC_AST_TYPE_TYPEDEF: + PRINT_QUOTED_VALUE(ctx, type->typedef_type.name); + break; + default: + break; + } + + end_node_dump(ctx); + + // 递归转储子节点 + switch (type->base.type) { + case SCC_AST_TYPE_POINTER: + dump_child_node((scc_ast_node_t *)type->pointer.pointee, ctx, true); + break; + case SCC_AST_TYPE_ARRAY: + dump_child_node((scc_ast_node_t *)type->array.element, ctx, + type->array.size == NULL); + if (type->array.size) { + dump_child_node((scc_ast_node_t *)type->array.size, ctx, true); + } + break; + default: + break; + } +} + +// 递归转储表达式 +static void dump_expr_impl(scc_ast_expr_t *expr, scc_ast_dump_ctx_t *ctx) { + if (!expr) + return; + + start_node_dump(&expr->base, ctx); + + // 根据表达式类型输出特定信息 + switch (expr->base.type) { + case SCC_AST_EXPR_BINARY: + PRINT_QUOTED_VALUE(ctx, get_op_str(expr->binary.op)); + break; + case SCC_AST_EXPR_UNARY: + PRINT_QUOTED_VALUE(ctx, get_op_str(expr->unary.op)); + break; + case SCC_AST_EXPR_INT_LITERAL: + PRINT_VALUE(ctx, " %lld", expr->literal.value.i); + break; + case SCC_AST_EXPR_FLOAT_LITERAL: + PRINT_VALUE(ctx, " %f", expr->literal.value.f); + break; + case SCC_AST_EXPR_CHAR_LITERAL: + PRINT_VALUE(ctx, " '%c'", (char)expr->literal.value.ch); + break; + case SCC_AST_EXPR_STRING_LITERAL: + PRINT_VALUE(ctx, " \"%s\"", expr->literal.value.cstr.data); + break; + case SCC_AST_EXPR_IDENTIFIER: + if (expr->identifier.name) { + PRINT_QUOTED_VALUE(ctx, expr->identifier.name); + } + break; + case SCC_AST_EXPR_SIZE_OF: + case SCC_AST_EXPR_ALIGN_OF: + PRINT_QUOTED_VALUE(ctx, (expr->base.type == SCC_AST_EXPR_SIZE_OF) + ? "sizeof" + : "alignof"); + break; + default: + break; + } + + end_node_dump(ctx); + + // 使用辅助函数处理子节点转储 + switch (expr->base.type) { + case SCC_AST_EXPR_BINARY: + dump_child_node((scc_ast_node_t *)expr->binary.lhs, ctx, false); + dump_child_node((scc_ast_node_t *)expr->binary.rhs, ctx, true); + break; + + case SCC_AST_EXPR_UNARY: + dump_child_node((scc_ast_node_t *)expr->unary.operand, ctx, true); + break; + + case SCC_AST_EXPR_COND: + dump_child_node((scc_ast_node_t *)expr->cond.cond, ctx, false); + dump_child_node((scc_ast_node_t *)expr->cond.then_expr, ctx, false); + dump_child_node((scc_ast_node_t *)expr->cond.else_expr, ctx, true); + break; + + case SCC_AST_EXPR_CALL: + dump_child_node((scc_ast_node_t *)expr->call.callee, ctx, false); + // 转储参数 + for (size_t i = 0; i < expr->call.args.size; i++) { + dump_child_node((scc_ast_node_t *)expr->call.args.data[i], ctx, + i == expr->call.args.size - 1); + } + break; + + case SCC_AST_EXPR_ARRAY_SUBSCRIPT: + dump_child_node((scc_ast_node_t *)expr->subscript.array, ctx, false); + dump_child_node((scc_ast_node_t *)expr->subscript.index, ctx, true); + break; + + case SCC_AST_EXPR_MEMBER: + case SCC_AST_EXPR_PTR_MEMBER: + dump_child_node((scc_ast_node_t *)expr->member.base, ctx, false); + // 打印成员访问信息 + print_indent(ctx); + PRINT_COLORED(ctx, node_color, "Member [\"%s\"]", + expr->member.member_name); + scc_printf("\n"); + break; + + case SCC_AST_EXPR_CAST: + dump_child_node((scc_ast_node_t *)expr->cast.type, ctx, false); + dump_child_node((scc_ast_node_t *)expr->cast.expr, ctx, true); + break; + + case SCC_AST_EXPR_SIZE_OF: + case SCC_AST_EXPR_ALIGN_OF: + if (expr->attr_of.expr) { + dump_child_node((scc_ast_node_t *)expr->attr_of.expr, ctx, true); + } else if (expr->attr_of.type) { + dump_child_node((scc_ast_node_t *)expr->attr_of.type, ctx, true); + } + break; + + case SCC_AST_EXPR_COMPOUND_LITERAL: + dump_child_node((scc_ast_node_t *)expr->compound_literal.type, ctx, + false); + // 初始化列表 + for (size_t i = 0; i < expr->compound_literal.init_list.size; i++) { + dump_child_node( + (scc_ast_node_t *)expr->compound_literal.init_list.data[i], ctx, + i == expr->compound_literal.init_list.size - 1); + } + break; + + default: + break; + } +} + +// 递归转储语句 +static void dump_stmt_impl(scc_ast_stmt_t *stmt, scc_ast_dump_ctx_t *ctx) { + if (!stmt) + return; + + start_node_dump(&stmt->base, ctx); + + // 根据语句类型输出特定信息 + switch (stmt->base.type) { + case SCC_AST_STMT_IF: + scc_printf("\n"); // if语句总是换行显示子节点 + dump_child_node((scc_ast_node_t *)stmt->if_stmt.cond, ctx, false); + dump_child_node((scc_ast_node_t *)stmt->if_stmt.then_stmt, ctx, + !stmt->if_stmt.opt_else_stmt); + if (stmt->if_stmt.opt_else_stmt) { + dump_child_node((scc_ast_node_t *)stmt->if_stmt.opt_else_stmt, ctx, + true); + } + return; + case SCC_AST_STMT_WHILE: + scc_printf("\n"); // 循环和switch语句换行显示子节点 + dump_child_node((scc_ast_node_t *)stmt->while_stmt.cond, ctx, false); + dump_child_node((scc_ast_node_t *)stmt->while_stmt.body, ctx, true); + return; + case SCC_AST_STMT_DO_WHILE: + scc_printf("\n"); // 循环和switch语句换行显示子节点 + dump_child_node((scc_ast_node_t *)stmt->do_while_stmt.body, ctx, false); + dump_child_node((scc_ast_node_t *)stmt->do_while_stmt.cond, ctx, true); + return; + case SCC_AST_STMT_SWITCH: + scc_printf("\n"); // 循环和switch语句换行显示子节点 + dump_child_node((scc_ast_node_t *)stmt->switch_stmt.cond, ctx, false); + dump_child_node((scc_ast_node_t *)stmt->switch_stmt.body, ctx, true); + return; + case SCC_AST_STMT_FOR: + scc_printf("\n"); // for语句换行显示子节点 + if (stmt->for_stmt.init) { + dump_child_node((scc_ast_node_t *)stmt->for_stmt.init, ctx, false); + } + if (stmt->for_stmt.cond) { + dump_child_node((scc_ast_node_t *)stmt->for_stmt.cond, ctx, false); + } + if (stmt->for_stmt.iter) { + dump_child_node((scc_ast_node_t *)stmt->for_stmt.iter, ctx, false); + } + dump_child_node((scc_ast_node_t *)stmt->for_stmt.body, ctx, true); + return; + case SCC_AST_STMT_RETURN: + if (stmt->return_stmt.expr) { + scc_printf("\n"); + dump_child_node((scc_ast_node_t *)stmt->return_stmt.expr, ctx, + true); + return; + } + break; + case SCC_AST_STMT_GOTO: + if (stmt->goto_stmt.label) { + PRINT_VALUE(ctx, " Label: %s", stmt->goto_stmt.label); + } + break; + case SCC_AST_STMT_LABEL: + if (stmt->label_stmt.label) { + PRINT_VALUE(ctx, " %s", stmt->label_stmt.label); + } + break; + default: + break; + } + + end_node_dump(ctx); + + // 递归转储其他子节点 + switch (stmt->base.type) { + case SCC_AST_STMT_COMPOUND: + for (size_t i = 0; i < stmt->compound.block_items.size; i++) { + scc_ast_node_t *item = + (scc_ast_node_t *)stmt->compound.block_items.data[i]; + dump_child_node(item, ctx, + i == stmt->compound.block_items.size - 1); + } + break; + + case SCC_AST_STMT_EXPR: + if (stmt->expr.expr) { + dump_child_node((scc_ast_node_t *)stmt->expr.expr, ctx, true); + } + break; + + case SCC_AST_STMT_CASE: + dump_child_node((scc_ast_node_t *)stmt->case_stmt.expr, ctx, false); + dump_child_node((scc_ast_node_t *)stmt->case_stmt.stmt, ctx, true); + break; + + case SCC_AST_STMT_DEFAULT: + dump_child_node((scc_ast_node_t *)stmt->default_stmt.stmt, ctx, true); + break; + + default: + break; + } +} + +// 递归转储声明 +static void dump_decl_impl(scc_ast_decl_t *decl, scc_ast_dump_ctx_t *ctx) { + if (!decl) + return; + + start_node_dump(&decl->base, ctx); + + // 根据声明类型输出特定信息 + switch (decl->base.type) { + case SCC_AST_DECL_VAR: + if (decl->var.name) { + PRINT_QUOTED_VALUE(ctx, decl->var.name); + } + break; + case SCC_AST_DECL_FUNC: + if (decl->func.name) { + PRINT_QUOTED_VALUE(ctx, decl->func.name); + } + break; + case SCC_AST_DECL_PARAM: + if (decl->param.name) { + PRINT_QUOTED_VALUE(ctx, decl->param.name); + } + break; + case SCC_AST_DECL_STRUCT: + if (decl->record.name) { + PRINT_QUOTED_VALUE(ctx, decl->record.name); + } + break; + case SCC_AST_DECL_UNION: + if (decl->record.name) { + PRINT_QUOTED_VALUE(ctx, decl->record.name); + } + break; + case SCC_AST_DECL_ENUM: + if (decl->enumeration.name) { + PRINT_QUOTED_VALUE(ctx, decl->enumeration.name); + } + break; + case SCC_AST_DECL_TYPEDEF: + if (decl->typedef_decl.name) { + PRINT_QUOTED_VALUE(ctx, decl->typedef_decl.name); + } + break; + default: + break; + } + + end_node_dump(ctx); + + // 递归转储子节点 + switch (decl->base.type) { + case SCC_AST_DECL_VAR: + if (decl->var.type) { + dump_child_node((scc_ast_node_t *)decl->var.type, ctx, + decl->var.init == NULL); + if (decl->var.init) { + dump_child_node((scc_ast_node_t *)decl->var.init, ctx, true); + } + } + break; + + case SCC_AST_DECL_FUNC: + if (decl->func.type) { + dump_child_node((scc_ast_node_t *)decl->func.type, ctx, + decl->func.body == NULL); + if (decl->func.body) { + dump_child_node((scc_ast_node_t *)decl->func.body, ctx, true); + } + } + break; + + case SCC_AST_DECL_PARAM: + if (decl->param.type) { + dump_child_node((scc_ast_node_t *)decl->param.type, ctx, true); + } + break; + + case SCC_AST_DECL_STRUCT: + case SCC_AST_DECL_UNION: + for (size_t i = 0; i < decl->record.fields.size; i++) { + dump_child_node((scc_ast_node_t *)decl->record.fields.data[i], ctx, + i == decl->record.fields.size - 1); + } + break; + + case SCC_AST_DECL_ENUM: + for (size_t i = 0; i < decl->enumeration.enumerators.size; i++) { + dump_child_node( + (scc_ast_node_t *)decl->enumeration.enumerators.data[i], ctx, + i == decl->enumeration.enumerators.size - 1); + } + break; + + case SCC_AST_DECL_TYPEDEF: + if (decl->typedef_decl.type) { + dump_child_node((scc_ast_node_t *)decl->typedef_decl.type, ctx, + true); + } + break; + + default: + break; + } +} + +// 递归转储翻译单元 +static void dump_unit_impl(scc_ast_translation_unit_t *unit, + scc_ast_dump_ctx_t *ctx) { + if (!unit) + return; + + start_node_dump(&unit->base, ctx); + scc_printf("\n"); + + for (size_t i = 0; i < unit->declarations.size; i++) { + dump_child_node((scc_ast_node_t *)unit->declarations.data[i], ctx, + i == unit->declarations.size - 1); + } +} + +// 实现上下文管理函数 +void scc_ast_dump_ctx_init(scc_ast_dump_ctx_t *ctx, cbool use_color) { + scc_memset(ctx, 0, sizeof(*ctx)); + ctx->use_color = use_color; + ctx->node_color = use_color ? DEFAULT_NODE_COLOR : ""; + ctx->value_color = use_color ? DEFAULT_VALUE_COLOR : ""; + ctx->branch_color = use_color ? DEFAULT_BRANCH_COLOR : ""; + ctx->reset_color = use_color ? DEFAULT_RESET_COLOR : ""; + + ensure_context_depth(ctx, 0); + ctx->is_last_child[0] = true; +} + +void scc_ast_dump_ctx_drop(scc_ast_dump_ctx_t *ctx) { + if (ctx->is_last_child) { + scc_free(ctx->is_last_child); + ctx->is_last_child = NULL; + } +} + +void scc_ast_dump_node(scc_ast_node_t *node, scc_ast_dump_ctx_t *ctx) { + if (!node) + return; + + if (SCC_AST_IS_A(scc_ast_expr_t, node)) { + dump_expr_impl(SCC_AST_CAST_TO(scc_ast_expr_t, node), ctx); + } else if (SCC_AST_IS_A(scc_ast_stmt_t, node)) { + dump_stmt_impl(SCC_AST_CAST_TO(scc_ast_stmt_t, node), ctx); + } else if (SCC_AST_IS_A(scc_ast_decl_t, node)) { + dump_decl_impl(SCC_AST_CAST_TO(scc_ast_decl_t, node), ctx); + } else if (SCC_AST_IS_A(scc_ast_type_t, node)) { + dump_type_impl(SCC_AST_CAST_TO(scc_ast_type_t, node), ctx); + } else if (SCC_AST_IS_A(scc_ast_translation_unit_t, node)) { + dump_unit_impl(SCC_AST_CAST_TO(scc_ast_translation_unit_t, node), ctx); + } +} \ No newline at end of file diff --git a/libs/ast/tests/test_main.c b/libs/ast/tests/test_main.c new file mode 100644 index 0000000..1616237 --- /dev/null +++ b/libs/ast/tests/test_main.c @@ -0,0 +1,10 @@ +#include + +void test_example() { + printf("Test passed!\n"); +} + +int main() { + test_example(); + return 0; +} diff --git a/libs/parser/cbuild.toml b/libs/parser/cbuild.toml new file mode 100644 index 0000000..fb5624d --- /dev/null +++ b/libs/parser/cbuild.toml @@ -0,0 +1,10 @@ +[package] +name = "scc_parser" +version = "0.1.0" + +dependencies = [ + { name = "scc_core", path = "../../runtime/scc_core" }, + { name = "scc_utils", path = "../../runtime/scc_utils" }, + { name = "lexer", path = "../lexer" }, + { name = "ast", path = "../ast" }, +] diff --git a/libs/parser/include/parser.h b/libs/parser/include/parser.h new file mode 100644 index 0000000..996bde4 --- /dev/null +++ b/libs/parser/include/parser.h @@ -0,0 +1,113 @@ +/** + * @file parser.h + */ + +#ifndef __SCC_PARSER_H__ +#define __SCC_PARSER_H__ + +#include "scc_ast.h" +#include + +/** + * @brief 解析器状态 + */ +typedef struct scc_parser { + scc_lexer_stream_t *lex_stream; // 词法分析器 + scc_sema_callbacks_t sema_callbacks; // 语义分析回调 + scc_ast_translation_unit_t *translation_unit; // 翻译单元(根节点) + cbool has_error; // 是否有错误 +} scc_parser_t; + +/** + * @brief 检查当前 token 类型 + */ +static inline cbool scc_parse_is(scc_lexer_stream_t *stream, + scc_tok_type_t type) { + const scc_lexer_tok_t *tok = scc_lexer_stream_current(stream); + return tok->type == type; +} + +/** + * @brief 检查前瞻 token 类型 + */ +static inline cbool scc_parse_peek_is(scc_lexer_stream_t *stream, usize n, + scc_tok_type_t type) { + const scc_lexer_tok_t *tok = scc_lexer_stream_peek(stream, n); + return tok->type == type; +} + +/** + * @brief 如果当前 token 匹配则消费 + */ +static inline cbool scc_parse_consume_if(scc_lexer_stream_t *stream, + scc_tok_type_t type) { + if (scc_parse_is(stream, type)) { + scc_lexer_stream_consume(stream); + return true; + } + return false; +} + +/** + * @brief 消费当前 token 并返回它 + */ +static inline const scc_lexer_tok_t * +scc_parse_consume(scc_lexer_stream_t *stream) { + const scc_lexer_tok_t *tok = scc_lexer_stream_current(stream); + scc_lexer_stream_consume(stream); + return tok; +} + +/** + * @brief 初始化解析器 + * @param parser 解析器实例 + * @param lexer 词法分析器实例 + * @param callbacks 语义分析回调(可为 null) + */ +void scc_parser_init(scc_parser_t *parser, scc_lexer_stream_t *lexer, + scc_sema_callbacks_t *callbacks); + +/** + * @brief 销毁解析器 + * @param parser 解析器实例 + */ +void scc_parser_drop(scc_parser_t *parser); + +/** + * @brief 解析整个翻译单元 + * @param parser 解析器实例 + * @return 翻译单元 AST 节点 + */ +scc_ast_translation_unit_t *scc_parse_translation_unit(scc_parser_t *parser); + +/** + * @brief 解析声明 + * @param parser 解析器实例 + * @return 声明 AST 节点 + */ +scc_ast_decl_t *scc_parse_declaration(scc_parser_t *parser); + +/** + * @brief 解析语句 + * @param parser 解析器实例 + * @return 语句 AST 节点 + */ +scc_ast_stmt_t *scc_parse_statement(scc_parser_t *parser); + +/** + * @brief 解析表达式 + * @param parser 解析器实例 + * @return 表达式 AST 节点 + */ +scc_ast_expr_t *scc_parse_expression(scc_parser_t *parser); + +/** + * @brief 解析类型 + * @param parser 解析器实例 + * @return 类型 AST 节点 + */ +scc_ast_type_t *scc_parse_type(scc_parser_t *parser); + +cbool scc_parse_is_declaration_start(scc_parser_t *parser, usize offset); + +#endif /* __SCC_PARSER_H__ */ diff --git a/libs/parser/src/parse_decl.c b/libs/parser/src/parse_decl.c new file mode 100644 index 0000000..20b8805 --- /dev/null +++ b/libs/parser/src/parse_decl.c @@ -0,0 +1,250 @@ +#include + +/* +A.2.2 Declarations + (6.7) declaration: + declaration-specifiers init-declarator-list(opt) ; + (6.7) declaration-specifiers: + storage-class-specifier declaration-specifiers(opt) + type-specifier declaration-specifiers(opt) + type-qualifier declaration-specifiers(opt) + function-specifier declaration-specifiers(opt) + (6.7) init-declarator-list: + init-declarator + init-declarator-list , init-declarator + (6.7) init-declarator: + declarator + declarator = initializer + (6.7.1) storage-class-specifier: + typedef + extern + static + auto + register + (6.7.2) type-specifier: + void + char + short + int + long + float + double + signed + unsigned + _Bool + _Complex + struct-or-union-specifier + enum-specifier + typedef-name + (6.7.2.1) struct-or-union-specifier: + struct-or-union identifier(opt) { struct-declaration-list } + struct-or-union identifier + (6.7.2.1) struct-or-union: + struct + union + (6.7.2.1) struct-declaration-list: + struct-declaration + struct-declaration-list struct-declaration + (6.7.2.1) struct-declaration: + specifier-qualifier-list struct-declarator-list ; + (6.7.2.1) specifier-qualifier-list: + type-specifier specifier-qualifier-list(opt) + type-qualifier specifier-qualifier-list(opt) + (6.7.2.1) struct-declarator-list: + struct-declarator + struct-declarator-list , struct-declarator + (6.7.2.1) struct-declarator: + declarator + declarator(opt) : constant-expression + (6.7.2.2) enum-specifier: + enum identifier(opt) { enumerator-list } + enum identifier(opt) { enumerator-list ,} + enum identifier + (6.7.2.2) enumerator-list: + enumerator + enumerator-list , enumerator + (6.7.2.2) enumerator: + enumeration-constant + enumeration-constant = constant-expression + (6.7.3) type-qualifier: + const + restrict + volatile + (6.7.4) function-specifier: + inline + (6.7.5) declarator: + pointer(opt) direct-declarator + (6.7.5) direct-declarator: + identifier + ( declarator ) + direct-declarator [ type-qualifier-list(opt) + assignment-expression(opt) ] + direct-declarator [ static type-qualifier-list(opt) + assignment-expression ] + direct-declarator [ type-qualifier-list static + assignment-expression ] + direct-declarator [ type-qualifier-list(opt) *] + direct-declarator ( parameter-type-list ) + direct-declarator ( identifier-list(opt) ) + (6.7.5) pointer: + * type-qualifier-list(opt) + * type-qualifier-list(opt) pointer + (6.7.5) type-qualifier-list: + type-qualifier + type-qualifier-list type-qualifier + (6.7.5) parameter-type-list: + parameter-list + parameter-list , ... + (6.7.5) parameter-list: + parameter-declaration + parameter-list , parameter-declaration + (6.7.5) parameter-declaration: + declaration-specifiers declarator + declaration-specifiers abstract-declarator(opt) + (6.7.5) identifier-list: + identifier + identifier-list , identifier + (6.7.6) type-name: + specifier-qualifier-list abstract-declarator(opt) + (6.7.6) abstract-declarator: + pointer + pointer(opt) direct-abstract-declarator + (6.7.6) direct-abstract-declarator: + ( abstract-declarator ) + direct-abstract-declarator(opt) [ type-qualifier-list (opt) + assignment-expression(opt) ] + direct-abstract-declarator(opt) [static type-qualifier-list(opt) + assignment-expression ] + direct-abstract-declaratoropt [ type-qualifier-list static + assignment-expression ] + direct-abstract-declarator(opt) [ * ] + direct-abstract-declarator(opt) ( parameter-type-list(opt) ) + (6.7.7) typedef-name: + identifier + (6.7.8) initializer: + assignment-expression + { initializer-list } + { initializer-list , } + (6.7.8) initializer-list: + designation(opt) initializer + initializer-list , designation(opt) initializer + (6.7.8) designation: + designator-list = + (6.7.8) designator-list: + designator + designator-list designator + (6.7.8) designator: + [ constant-expression ] + . identifier +A.2.4 External definitions + (6.9) translation-unit: + external-declaration + translation-unit external-declaration + (6.9) external-declaration: + function-definition + declaration + (6.9.1) function-definition: + declaration-specifiers declarator declaration-list(opt) + compound-statement + (6.9.1) declaration-list: + declaration + declaration-list declaration +*/ + +scc_ast_decl_t *scc_parse_declaration(scc_parser_t *parser) { + /** + * ISO/IEC 9899:TC3 + * 6.7 Declarations + * Syntax + * + * declaration: + * declaration-specifiers init-declarator-list(opt) ; + * declaration-specifiers: + * storage-class-specifier declaration-specifiers(opt) + * type-specifier declaration-specifiers(opt) + * type-qualifier declaration-specifiers(opt) + * function-specifier declaration-specifiers(opt) + * init-declarator-list: + * init-declarator + * init-declarator-list , init-declarator + * init-declarator: + * declarator + * declarator = initializer + */ + if (!scc_parse_is_declaration_start(parser, 0)) { + return null; + } + scc_ast_type_t *type = scc_parse_type(parser); + if (type == null) { + LOG_ERROR("Failed to parse type"); + return null; + } + + const scc_lexer_tok_t *tok = scc_lexer_stream_current(parser->lex_stream); + if (!scc_lexer_tok_match(tok, SCC_TOK_IDENT)) { + LOG_ERROR("Expected identifier, got %s", scc_get_tok_name(tok->type)); + return null; + } + + scc_lexer_stream_consume(parser->lex_stream); + scc_ast_decl_t *decl = scc_malloc(sizeof(scc_ast_decl_t)); + + /* + (6.7.5) declarator: + pointeropt direct-declarator + (6.7.5) direct-declarator: + identifier + ( declarator ) + direct-declarator [ type-qualifier-listopt assignment-expressionopt ] + direct-declarator [static type-qualifier-listopt assignment-expression ] + direct-declarator [ type-qualifier-list static assignment-expression ] + direct-declarator [ type-qualifier-listopt *] + direct-declarator ( parameter-type-list ) + direct-declarator ( identifier-listopt ) + */ + if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_L_PAREN)) { + // TODO + if (scc_parse_consume_if(parser->lex_stream, SCC_TOK_SEMICOLON)) { + decl->base.type = SCC_AST_DECL_VAR; + decl->var.type = type; + decl->var.name = tok->value.cstr.data; + decl->var.init = null; + return decl; + } else if (scc_parse_consume_if(parser->lex_stream, SCC_TOK_ASSIGN)) { + decl->base.type = SCC_AST_DECL_VAR; + decl->var.type = type; + decl->var.name = tok->value.cstr.data; + decl->var.init = scc_parse_expression(parser); + return decl; + } + return null; + } + + // function decl + decl->base.type = SCC_AST_DECL_FUNC; + decl->func.name = tok->value.cstr.data; + decl->func.type = scc_malloc(sizeof(scc_ast_type_t)); + decl->func.type->base.type = SCC_AST_TYPE_FUNCTION; + scc_vec_init(decl->func.type->function.param_types); + decl->func.type->function.return_type = type; + // TODO + decl->func.type->function.is_variadic = false; + + // TODO param type + scc_parse_consume_if(parser->lex_stream, SCC_TOK_VOID); + + if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_R_PAREN)) { + return null; + } + + if (!scc_parse_is(parser->lex_stream, SCC_TOK_L_BRACE)) { + return null; + } + + decl->func.body = scc_parse_statement(parser); + Assert(decl->func.type != null); + Assert(decl->func.type->base.type == SCC_AST_TYPE_FUNCTION); + Assert(decl->func.body != null); + Assert(decl->func.body->base.type == SCC_AST_STMT_COMPOUND); + return decl; +} diff --git a/libs/parser/src/parse_expr.c b/libs/parser/src/parse_expr.c new file mode 100644 index 0000000..3c325ff --- /dev/null +++ b/libs/parser/src/parse_expr.c @@ -0,0 +1,918 @@ +/** + * @file parse_expr.c + * @author your name (you@domain.com) + * @brief Pratt Parser表达式解析器 + * @version 0.1 + * @date 2026-01-09 + * + * @copyright Copyright (c) 2026 + * + */ +#include + +/* +A.2.1 Expressions + +(6.5.1) + primary-expression: + identifier + constant + string-literal + ( expression ) +(6.5.2) + postfix-expression: + primary-expression + postfix-expression [ expression ] + postfix-expression ( argument-expression-list(opt) ) + postfix-expression . identifier + postfix-expression -> identifier + postfix-expression ++ + postfix-expression -- + ( type-name ) { initializer-list } + ( type-name ) { initializer-list , } +(6.5.2) + argument-expression-list: + assignment-expression + argument-expression-list , assignment-expression +(6.5.3) + unary-expression: + postfix-expression + ++ unary-expression + -- unary-expression + unary-operator cast-expression + sizeof unary-expression + sizeof ( type-name ) +(6.5.3) + unary-operator: one of + & * + - ~ ! +(6.5.4) + cast-expression: + unary-expression + ( type-name ) cast-expression +(6.5.5) + multiplicative-expression: + cast-expression + multiplicative-expression * cast-expression + multiplicative-expression / cast-expression + multiplicative-expression % cast-expression +(6.5.6) + additive-expression: + multiplicative-expression + additive-expression + multiplicative-expression + additive-expression - multiplicative-expression +(6.5.7) + shift-expression: + additive-expression + shift-expression << additive-expression + shift-expression >> additive-expression +(6.5.8) + relational-expression: + shift-expression + relational-expression < shift-expression + relational-expression > shift-expression + relational-expression <= shift-expression + relational-expression >= shift-expression +(6.5.9) + equality-expression: + relational-expression + equality-expression == relational-expression + equality-expression != relational-expression +(6.5.10) + AND-expression: + equality-expression + AND-expression & equality-expression +(6.5.11) + exclusive-OR-expression: + AND-expression + exclusive-OR-expression ^ AND-expression +(6.5.12) + inclusive-OR-expression: + exclusive-OR-expression + inclusive-OR-expression | exclusive-OR-expression +(6.5.13) + logical-AND-expression: + inclusive-OR-expression + logical-AND-expression && inclusive-OR-expression +(6.5.14) + logical-OR-expression: + logical-AND-expression + logical-OR-expression || logical-AND-expression +(6.5.15) + conditional-expression: + logical-OR-expression + logical-OR-expression ? expression : conditional-expression +(6.5.16) + assignment-expression: + conditional-expression + unary-expression assignment-operator assignment-expression +(6.5.16) + assignment-operator: one of + = *= /= %= +=-= <<= >>= &= ^= |= +(6.5.17) + expression: + assignment-expression + expression , assignment-expression +(6.6) +constant-expression: + conditional-expression +*/ + +/** + * @brief 从token映射到AST操作符 + * @param tok_type 词法token类型 + * @param is_unary 是否为一元操作符上下文 + * @return AST操作符类型 + */ +static scc_ast_expr_op_t scc_ast_token_to_operator(scc_tok_type_t tok_type, + cbool is_unary) { + switch (tok_type) { + /* 赋值操作符 */ + case SCC_TOK_ASSIGN: + return SCC_AST_OP_ASSIGN; + case SCC_TOK_ASSIGN_ADD: + return SCC_AST_OP_ASSIGN_ADD; + case SCC_TOK_ASSIGN_SUB: + return SCC_AST_OP_ASSIGN_SUB; + case SCC_TOK_ASSIGN_MUL: + return SCC_AST_OP_ASSIGN_MUL; + case SCC_TOK_ASSIGN_DIV: + return SCC_AST_OP_ASSIGN_DIV; + case SCC_TOK_ASSIGN_MOD: + return SCC_AST_OP_ASSIGN_MOD; + case SCC_TOK_ASSIGN_AND: + return SCC_AST_OP_ASSIGN_AND; + case SCC_TOK_ASSIGN_XOR: + return SCC_AST_OP_ASSIGN_XOR; + case SCC_TOK_ASSIGN_OR: + return SCC_AST_OP_ASSIGN_OR; + case SCC_TOK_ASSIGN_L_SH: + return SCC_AST_OP_ASSIGN_LSHIFT; + case SCC_TOK_ASSIGN_R_SH: + return SCC_AST_OP_ASSIGN_RSHIFT; + + /* 逻辑操作符 */ + case SCC_TOK_OR_OR: + return SCC_AST_OP_LOGICAL_OR; + case SCC_TOK_AND_AND: + return SCC_AST_OP_LOGICAL_AND; + + /* 位操作符 */ + case SCC_TOK_OR: + return SCC_AST_OP_BITWISE_OR; + case SCC_TOK_XOR: + return SCC_AST_OP_BITWISE_XOR; + case SCC_TOK_AND: + return is_unary ? SCC_AST_OP_ADDRESS_OF : SCC_AST_OP_BITWISE_AND; + + /* 相等性操作符 */ + case SCC_TOK_EQ: + return SCC_AST_OP_EQUAL; + case SCC_TOK_NEQ: + return SCC_AST_OP_NOT_EQUAL; + + /* 关系操作符 */ + case SCC_TOK_LT: + return SCC_AST_OP_LESS; + case SCC_TOK_GT: + return SCC_AST_OP_GREATER; + case SCC_TOK_LE: + return SCC_AST_OP_LESS_EQUAL; + case SCC_TOK_GE: + return SCC_AST_OP_GREATER_EQUAL; + + /* 移位操作符 */ + case SCC_TOK_L_SH: + return SCC_AST_OP_LEFT_SHIFT; + case SCC_TOK_R_SH: + return SCC_AST_OP_RIGHT_SHIFT; + + /* 算术操作符 */ + case SCC_TOK_ADD: + return is_unary ? SCC_AST_OP_UNARY_PLUS : SCC_AST_OP_ADD; + case SCC_TOK_SUB: + return is_unary ? SCC_AST_OP_UNARY_MINUS : SCC_AST_OP_SUB; + case SCC_TOK_MUL: + return is_unary ? SCC_AST_OP_INDIRECTION : SCC_AST_OP_MUL; + case SCC_TOK_DIV: + return SCC_AST_OP_DIV; + case SCC_TOK_MOD: + return SCC_AST_OP_MOD; + + /* 一元操作符 */ + case SCC_TOK_NOT: + return SCC_AST_OP_LOGICAL_NOT; + case SCC_TOK_BIT_NOT: + return SCC_AST_OP_BITWISE_NOT; + case SCC_TOK_ADD_ADD: + return is_unary ? SCC_AST_OP_PREFIX_INCREMENT + : SCC_AST_OP_POSTFIX_INCREMENT; + case SCC_TOK_SUB_SUB: + return is_unary ? SCC_AST_OP_PREFIX_DECREMENT + : SCC_AST_OP_POSTFIX_DECREMENT; + + default: + return SCC_AST_OP_NONE; + } +} + +/** + * @brief 运算符优先级定义 + */ +typedef enum { + PREC_NONE = 0, // 无优先级 + PREC_COMMA = 1, // , + PREC_ASSIGNMENT = 2, // = += -= *= /= %= &= ^= |= <<= >>= + PREC_CONDITIONAL = 3, // ?: + PREC_LOGICAL_OR = 4, // || + PREC_LOGICAL_AND = 5, // && + PREC_BITWISE_OR = 6, // | + PREC_BITWISE_XOR = 7, // ^ + PREC_BITWISE_AND = 8, // & + PREC_EQUALITY = 9, // == != + PREC_RELATIONAL = 10, // < > <= >= + PREC_SHIFT = 11, // << >> + PREC_ADDITIVE = 12, // + - + PREC_MULTIPLICATIVE = 13, // * / % + PREC_CAST = 14, // 类型转换 + PREC_UNARY = 15, // ++ -- + - * & ~ ! sizeof + PREC_POSTFIX = 16, // [] () . -> ++ -- + PREC_PRIMARY = 17, // 最高优先级 +} scc_precedence_t; + +/** + * @brief 获取二元运算符优先级 + */ +static scc_precedence_t get_binary_precedence(scc_tok_type_t op) { + switch (op) { + case SCC_TOK_COMMA: + return PREC_COMMA; + case SCC_TOK_ASSIGN: + case SCC_TOK_ASSIGN_ADD: + case SCC_TOK_ASSIGN_SUB: + case SCC_TOK_ASSIGN_MUL: + case SCC_TOK_ASSIGN_DIV: + case SCC_TOK_ASSIGN_MOD: + case SCC_TOK_ASSIGN_AND: + case SCC_TOK_ASSIGN_XOR: + case SCC_TOK_ASSIGN_OR: + case SCC_TOK_ASSIGN_L_SH: + case SCC_TOK_ASSIGN_R_SH: + return PREC_ASSIGNMENT; + case SCC_TOK_COND: + return PREC_CONDITIONAL; + case SCC_TOK_OR_OR: + return PREC_LOGICAL_OR; + case SCC_TOK_AND_AND: + return PREC_LOGICAL_AND; + case SCC_TOK_OR: + return PREC_BITWISE_OR; + case SCC_TOK_XOR: + return PREC_BITWISE_XOR; + case SCC_TOK_AND: + return PREC_BITWISE_AND; + case SCC_TOK_EQ: + case SCC_TOK_NEQ: + return PREC_EQUALITY; + case SCC_TOK_LT: + case SCC_TOK_GT: + case SCC_TOK_LE: + case SCC_TOK_GE: + return PREC_RELATIONAL; + case SCC_TOK_L_SH: + case SCC_TOK_R_SH: + return PREC_SHIFT; + case SCC_TOK_ADD: + case SCC_TOK_SUB: + return PREC_ADDITIVE; + case SCC_TOK_MUL: + case SCC_TOK_DIV: + case SCC_TOK_MOD: + return PREC_MULTIPLICATIVE; + default: + return PREC_NONE; + } +} + +/** + * @brief 检查是否是赋值运算符 + */ +static cbool is_assignment_operator(scc_tok_type_t op) { + switch (op) { + case SCC_TOK_ASSIGN: + case SCC_TOK_ASSIGN_ADD: + case SCC_TOK_ASSIGN_SUB: + case SCC_TOK_ASSIGN_MUL: + case SCC_TOK_ASSIGN_DIV: + case SCC_TOK_ASSIGN_MOD: + case SCC_TOK_ASSIGN_AND: + case SCC_TOK_ASSIGN_XOR: + case SCC_TOK_ASSIGN_OR: + case SCC_TOK_ASSIGN_L_SH: + case SCC_TOK_ASSIGN_R_SH: + return true; + default: + return false; + } +} + +/** + * @brief 检查是否是二元运算符 + */ +static cbool is_binary_operator(scc_tok_type_t op) { + return get_binary_precedence(op) != PREC_NONE; +} + +static inline scc_ast_expr_t *expr_create(scc_parser_t *parser, + scc_ast_node_type_t type) { + scc_ast_expr_t *expr = (scc_ast_expr_t *)scc_malloc(sizeof(scc_ast_expr_t)); + Assert(expr != null); + expr->base.type = type; + expr->base.loc = scc_pos_create(); + return expr; +} + +/** + * @brief 解析基本表达式 + * + * (6.5.1) primary-expression: + * identifier + * constant + * string-literal + * ( expression ) + */ +static scc_ast_expr_t *parse_primary_expression(scc_parser_t *parser) { + const scc_lexer_tok_t *tok = scc_lexer_stream_current(parser->lex_stream); + + switch (tok->type) { + case SCC_TOK_IDENT: { + scc_ast_expr_t *expr = expr_create(parser, SCC_AST_EXPR_IDENTIFIER); + if (!expr) + return null; + expr->identifier.name = tok->value.cstr.data; + scc_lexer_stream_consume(parser->lex_stream); + + // 调用语义回调 + if (parser->sema_callbacks.on_expr) { + parser->sema_callbacks.on_expr(parser->sema_callbacks.context, + expr->base.type, expr); + } + return expr; + } + + case SCC_TOK_INT_LITERAL: + case SCC_TOK_FLOAT_LITERAL: + case SCC_TOK_CHAR_LITERAL: { + scc_ast_expr_t *expr = expr_create(parser, SCC_AST_EXPR_INT_LITERAL); + if (!expr) + return null; + expr->literal.value = tok->value; + scc_lexer_stream_consume(parser->lex_stream); + + if (parser->sema_callbacks.on_expr) { + parser->sema_callbacks.on_expr(parser->sema_callbacks.context, + expr->base.type, expr); + } + return expr; + } + + case SCC_TOK_STRING_LITERAL: { + scc_ast_expr_t *expr = expr_create(parser, SCC_AST_EXPR_STRING_LITERAL); + if (!expr) + return null; + expr->literal.value = tok->value; + scc_lexer_stream_consume(parser->lex_stream); + + if (parser->sema_callbacks.on_expr) { + parser->sema_callbacks.on_expr(parser->sema_callbacks.context, + expr->base.type, expr); + } + return expr; + } + + case SCC_TOK_L_PAREN: { + TODO(); + // // 保存当前位置,用于区分类型转换和括号表达式 + // usize save_pos = parser->lex_stream->curr_pos; + + // // 跳过 '(' + // scc_lexer_stream_consume(parser->lex_stream); + + // // 尝试解析类型转换 + // if (parser_is_type_start(parser)) { + // scc_ast_type_t *type = scc_parse_type_name(parser); + // if (type && parser_consume_if(parser, SCC_TOK_R_PAREN)) { + // // 成功解析类型转换 + // scc_ast_expr_t *cast = expr_create(parser, + // SCC_AST_EXPR_CAST); if (!cast) { + // scc_free(type); + // return null; + // } + // cast->cast.type = type; + // cast->cast.expr = scc_parse_expression(parser, 0); // + // 递归解析 + + // if (parser->sema_callbacks.on_expr) { + // parser->sema_callbacks.on_expr( + // parser->sema_callbacks.context, cast->node_type, + // cast); + // } + // return cast; + // } + // // 解析失败,清理 + // if (type) + // scc_free(type); + // } + + // // 不是类型转换,恢复为括号表达式 + // parser->lex_stream->curr_pos = save_pos; + // scc_lexer_stream_consume(parser->lex_stream); // 跳过 '(' + + // scc_ast_expr_t *expr = scc_parse_expression(parser, 0); + // if (!expr) { + // return null; + // } + + // if (!parser_consume_if(parser, SCC_TOK_R_PAREN)) { + // PARSER_ERROR(parser, "expected ')' after expression"); + // scc_free(expr); + // return null; + // } + + // 括号表达式不需要特殊节点,直接返回内部表达式 + // return expr; + } + + default: + LOG_ERROR("expected primary expression, got %s", + scc_get_tok_name(tok->type)); + return null; + } +} + +/** + * @brief 解析后缀表达式 + * + * (6.5.2) postfix-expression: + * primary-expression + * postfix-expression [ expression ] + * postfix-expression ( argument-expression-list(opt) ) + * postfix-expression . identifier + * postfix-expression -> identifier + * postfix-expression ++ + * postfix-expression -- + * ( type-name ) { initializer-list } + * ( type-name ) { initializer-list , } + */ +static scc_ast_expr_t *parse_postfix_expression(scc_parser_t *parser, + scc_ast_expr_t *lhs) { + scc_ast_expr_t *expr = lhs; + + while (true) { + const scc_lexer_tok_t *tok = + scc_lexer_stream_current(parser->lex_stream); + + switch (tok->type) { + case SCC_TOK_L_BRACKET: { // 数组下标 + scc_lexer_stream_consume(parser->lex_stream); // 跳过 '[' + + scc_ast_expr_t *subscript = + expr_create(parser, SCC_AST_EXPR_ARRAY_SUBSCRIPT); + if (!subscript) { + scc_free(expr); + return null; + } + + subscript->subscript.array = expr; + subscript->subscript.index = scc_parse_expression(parser); + + if (!subscript->subscript.index) { + scc_free(subscript); + return null; + } + + if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_R_BRACKET)) { + LOG_ERROR("expected ']' after array index"); + scc_free(subscript); + return null; + } + + expr = subscript; + break; + } + + case SCC_TOK_L_PAREN: { // 函数调用 + scc_lexer_stream_consume(parser->lex_stream); // 跳过 '(' + + scc_ast_expr_t *call = expr_create(parser, SCC_AST_EXPR_CALL); + if (!call) { + scc_free(expr); + return null; + } + + call->call.callee = expr; + scc_vec_init(call->call.args); + + // 解析参数列表 + if (!scc_parse_is(parser->lex_stream, SCC_TOK_R_PAREN)) { + do { + scc_ast_expr_t *arg = scc_parse_expression(parser); + if (!arg) { + // 清理已解析的参数 + scc_vec_foreach(call->call.args, i) { + scc_free(scc_vec_at(call->call.args, i)); + } + scc_vec_free(call->call.args); + scc_free(call); + return null; + } + scc_vec_push(call->call.args, arg); + + if (!scc_parse_consume_if(parser->lex_stream, + SCC_TOK_COMMA)) { + break; + } + } while (true); + } + + if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_R_PAREN)) { + LOG_ERROR("expected ')' after argument list"); + // 清理 + scc_vec_foreach(call->call.args, i) { + scc_free(scc_vec_at(call->call.args, i)); + } + scc_vec_free(call->call.args); + scc_free(call); + return null; + } + + expr = call; + break; + } + + case SCC_TOK_DOT: { // 成员访问 + scc_lexer_stream_consume(parser->lex_stream); // 跳过 '.' + + scc_ast_expr_t *member = expr_create(parser, SCC_AST_EXPR_MEMBER); + if (!member) { + scc_free(expr); + return null; + } + + member->member.base = expr; + + if (!scc_parse_is(parser->lex_stream, SCC_TOK_IDENT)) { + LOG_ERROR("expected identifier after '.'"); + scc_free(member); + return null; + } + + member->member.member_name = tok->value.cstr.data; + scc_lexer_stream_consume(parser->lex_stream); // 跳过标识符 + + expr = member; + break; + } + + case SCC_TOK_DEREF: { // 指针成员访问 -> + scc_lexer_stream_consume(parser->lex_stream); // 跳过 '->' + + scc_ast_expr_t *ptr_member = + expr_create(parser, SCC_AST_EXPR_PTR_MEMBER); + if (!ptr_member) { + scc_free(expr); + return null; + } + + ptr_member->ptr_member.base = expr; + + if (!scc_parse_is(parser->lex_stream, SCC_TOK_IDENT)) { + LOG_ERROR("expected identifier after '->'"); + scc_free(ptr_member); + return null; + } + + ptr_member->ptr_member.member_name = tok->value.cstr.data; + scc_lexer_stream_consume(parser->lex_stream); // 跳过标识符 + + expr = ptr_member; + break; + } + + case SCC_TOK_ADD_ADD: // 后缀++ + case SCC_TOK_SUB_SUB: { // 后缀-- + // 跳过操作符 + scc_lexer_stream_consume(parser->lex_stream); + + scc_ast_expr_t *unary = expr_create(parser, SCC_AST_EXPR_UNARY); + if (!unary) { + scc_free(expr); + return null; + } + + unary->unary.op = scc_ast_token_to_operator(tok->type, false); + unary->unary.operand = expr; + + expr = unary; + break; + } + + default: + // 不是后缀操作符,返回当前表达式 + return expr; + } + + // 调用语义回调 + // if (parser->sema_callbacks.on_expr) { + // parser->sema_callbacks.on_expr(parser->sema_callbacks.context, + // expr->base.type, expr); + // } + } +} + +/** + * @brief 解析一元表达式 + * + * (6.5.3) unary-expression: + * postfix-expression + * ++ unary-expression + * -- unary-expression + * unary-operator cast-expression + * sizeof unary-expression + * sizeof ( type-name ) + */ +static scc_ast_expr_t *parse_unary_expression(scc_parser_t *parser) { + const scc_lexer_tok_t *tok = scc_lexer_stream_current(parser->lex_stream); + + switch (tok->type) { + case SCC_TOK_ADD_ADD: // 前缀++ + case SCC_TOK_SUB_SUB: { // 前缀-- + scc_lexer_stream_consume(parser->lex_stream); // 跳过操作符 + + scc_ast_expr_t *unary = expr_create(parser, SCC_AST_EXPR_UNARY); + if (!unary) + return null; + + unary->unary.op = scc_ast_token_to_operator(tok->type, true); + unary->unary.operand = parse_unary_expression(parser); + + if (!unary->unary.operand) { + scc_free(unary); + return null; + } + + // if (parser->sema_callbacks.on_expr) { + // parser->sema_callbacks.on_expr(parser->sema_callbacks.context, + // unary->node_type, unary); + // } + return unary; + } + + case SCC_TOK_ADD: // + + case SCC_TOK_SUB: // - + case SCC_TOK_MUL: // * + case SCC_TOK_AND: // & + case SCC_TOK_NOT: // ! + case SCC_TOK_BIT_NOT: { // ~ + // 跳过操作符 + scc_lexer_stream_consume(parser->lex_stream); + + scc_ast_expr_t *unary = expr_create(parser, SCC_AST_EXPR_UNARY); + if (!unary) + return null; + + unary->unary.op = scc_ast_token_to_operator(tok->type, true); + unary->unary.operand = parse_unary_expression(parser); + + if (!unary->unary.operand) { + scc_free(unary); + return null; + } + + // if (parser->sema_callbacks.on_expr) { + // parser->sema_callbacks.on_expr(parser->sema_callbacks.context, + // unary->node_type, unary); + // } + return unary; + } + + case SCC_TOK_SIZEOF: { // sizeof + // scc_lexer_stream_consume(parser->lex_stream); // 跳过 'sizeof' + + // scc_ast_expr_t *size_of = expr_create(parser, SCC_AST_EXPR_SIZE_OF); + // if (!size_of) + // return null; + + // size_of->size_align.is_size_of = true; + + // // 检查是否是 sizeof(type) 或 sizeof expr + // if (scc_parse_consume_if(parser->lex_stream, SCC_TOK_L_PAREN)) { + // // 检查是否是类型 + // // if (parser_is_type_start(parser)) { + // // size_of->size_align.type = scc_parse_type_name(parser); + // // if (!size_of->size_align.type) { + // // scc_free(size_of); + // // return null; + // // } + // // } else { + // size_of->size_align.expr = scc_parse_expression(parser); + // if (!size_of->size_align.expr) { + // scc_free(size_of); + // return null; + // } + // // } + + // if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_R_PAREN)) { + // LOG_ERROR("expected ')' after sizeof"); + // // if (size_of->size_align.type) + // // scc_free(size_of->size_align.type); + // // if (size_of->size_align.expr) + // // scc_free(size_of->size_align.expr); + // // scc_free(size_of); + // // return null; + // } + // } else { + // size_of->size_align.expr = parse_unary_expression(parser); + // if (!size_of->size_align.expr) { + // scc_free(size_of); + // return null; + // } + // } + + // // if (parser->sema_callbacks.on_expr) { + // // parser->sema_callbacks.on_expr(parser->sema_callbacks.context, + // // size_of->node_type, size_of); + // // } + // return size_of; + } + + default: + // 不是一元操作符,解析基本表达式 + scc_ast_expr_t *primary = parse_primary_expression(parser); + if (!primary) + return null; + + // 应用后缀操作符 + return parse_postfix_expression(parser, primary); + } +} + +/** + * @brief 解析强制转换表达式 + * + * (6.5.4) cast-expression: + * unary-expression + * ( type-name ) cast-expression + */ +static scc_ast_expr_t *parse_cast_expression(scc_parser_t *parser) { + // 检查是否是类型转换 + // if (parser_is_cast_expression(parser)) { + // // 我们已经知道是 ( type-name ) 格式 + // scc_lexer_stream_consume(parser->lex_stream); // 跳过 '(' + + // scc_ast_type_t *type = scc_parse_type_name(parser); + // if (!type) { + // return null; + // } + + // if (!parser_consume_if(parser, SCC_TOK_R_PAREN)) { + // PARSER_ERROR(parser, "expected ')' after type name"); + // scc_free(type); + // return null; + // } + + // scc_ast_expr_t *cast = expr_create(parser, SCC_AST_EXPR_CAST); + // if (!cast) { + // scc_free(type); + // return null; + // } + + // cast->cast.type = type; + // cast->cast.expr = parse_cast_expression(parser); // 递归解析 + + // if (!cast->cast.expr) { + // scc_free(cast); + // return null; + // } + + // if (parser->sema_callbacks.on_expr) { + // parser->sema_callbacks.on_expr(parser->sema_callbacks.context, + // cast->node_type, cast); + // } + // return cast; + // } + + // 不是类型转换,解析一元表达式 + return parse_unary_expression(parser); +} + +/** + * @brief Pratt Parser核心:解析表达式 + * @param parser 解析器 + * @param min_prec 最小优先级 + * @return 表达式AST节点 + */ +static scc_ast_expr_t * +parse_expression_with_precedence(scc_parser_t *parser, + scc_precedence_t min_prec) { + // 解析左侧表达式(一元表达式或基本表达式) + scc_ast_expr_t *lhs = parse_cast_expression(parser); + if (!lhs) { + return null; + } + + while (true) { + const scc_lexer_tok_t *tok = + scc_lexer_stream_current(parser->lex_stream); + scc_tok_type_t op = tok->type; + scc_precedence_t prec = get_binary_precedence(op); + + // 检查是否达到最小优先级或不是二元运算符 + if (prec < min_prec || prec == PREC_NONE) { + break; + } + + // 特殊处理条件表达式 ?: + if (op == SCC_TOK_COND) { + scc_lexer_stream_consume(parser->lex_stream); // 跳过 '?' + + scc_ast_expr_t *cond_expr = expr_create(parser, SCC_AST_EXPR_COND); + if (!cond_expr) { + scc_free(lhs); + return null; + } + + cond_expr->cond.cond = lhs; + + // 解析then表达式 + cond_expr->cond.then_expr = + parse_expression_with_precedence(parser, PREC_NONE); + if (!cond_expr->cond.then_expr) { + scc_free(cond_expr); + scc_free(lhs); + return null; + } + + // 期望 ':' + if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_COLON)) { + LOG_ERROR("expected ':' in conditional expression"); + scc_free(cond_expr); + scc_free(lhs); + return null; + } + + // 解析else表达式(条件表达式,右结合) + cond_expr->cond.else_expr = + parse_expression_with_precedence(parser, PREC_CONDITIONAL - 1); + if (!cond_expr->cond.else_expr) { + scc_free(cond_expr); + scc_free(lhs); + return null; + } + + lhs = cond_expr; + continue; + } + + // 对于赋值运算符,右侧优先级需要减1(右结合性) + scc_precedence_t next_min_prec; + if (is_assignment_operator(op)) { + next_min_prec = (scc_precedence_t)(prec - 1); // 右结合 + } else { + next_min_prec = (scc_precedence_t)(prec + 1); // 左结合 + } + + scc_lexer_stream_consume(parser->lex_stream); // 跳过操作符 + + // 解析右侧表达式 + scc_ast_expr_t *rhs = + parse_expression_with_precedence(parser, next_min_prec); + if (!rhs) { + scc_free(lhs); + return null; + } + + // 创建二元表达式节点 + scc_ast_expr_t *binary = expr_create(parser, SCC_AST_EXPR_BINARY); + if (!binary) { + scc_free(lhs); + scc_free(rhs); + return null; + } + + binary->binary.op = scc_ast_token_to_operator(op, false); + binary->binary.lhs = lhs; + binary->binary.rhs = rhs; + + lhs = binary; + } + + // if (parser->sema_callbacks.on_expr) { + // parser->sema_callbacks.on_expr(parser->sema_callbacks.context, + // binary->node_type, binary); + // } + return lhs; +} + +scc_ast_expr_t *scc_parse_expression(scc_parser_t *parser) { + return parse_expression_with_precedence(parser, PREC_NONE); +} diff --git a/libs/parser/src/parse_stmt.c b/libs/parser/src/parse_stmt.c new file mode 100644 index 0000000..bc9fe87 --- /dev/null +++ b/libs/parser/src/parse_stmt.c @@ -0,0 +1,426 @@ +/* +415 +ISO/IEC 9899:TC3 +Committee Draft — Septermber 7, 2007 +WG14/N1256 + +A.2.3 Statements +(6.8) + statement: + labeled-statement + compound-statement + expression-statement + selection-statement + iteration-statement + jump-statement +(6.8.1) + labeled-statement: + identifier : statement + case constant-expression : statement + default : statement +(6.8.2) + compound-statement: + { block-item-list(opt) } +(6.8.2) + block-item-list: + block-item + block-item-list block-item +(6.8.2) + block-item: + declaration + statement +(6.8.3) + expression-statement: + expression(opt) ; +(6.8.4) + selection-statement: + if ( expression ) statement + if ( expression ) statement else statement + switch ( expression ) statement +(6.8.5) + iteration-statement: + while ( expression ) statement + do statement while ( expression ); + for ( expression(opt) ; expression(opt) ; expression(opt) ) statement + for ( declaration expression(opt) ; expression(opt) ) statement +(6.8.6) + jump-statement: + goto identifier ; + continue ; + break ; + return expression(opt) ; +*/ +#include + +static inline scc_ast_stmt_t *ast_stmt_alloc() { + scc_ast_stmt_t *stmt = (scc_ast_stmt_t *)scc_malloc(sizeof(scc_ast_stmt_t)); + Assert(stmt != null); + stmt->base.type = SCC_AST_TRANSLATION_UNIT; + stmt->base.loc = scc_pos_create(); + return stmt; +} + +static inline scc_ast_expr_t *ast_parse_paren_expression(scc_parser_t *parser) { + if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_L_PAREN)) { + LOG_ERROR("Expected '(' before like `( expression )` ."); + } + + scc_ast_expr_t *ret = scc_parse_expression(parser); + + if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_R_PAREN)) { + LOG_ERROR("Expected ')' after like `( expression )` ."); + } + return ret; +} + +static scc_ast_stmt_t *parse_label_statement(scc_parser_t *parser) { + const scc_lexer_tok_t *tok = scc_lexer_stream_current(parser->lex_stream); + if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_IDENT)) { + return null; + } + + if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_COLON)) { + LOG_ERROR("Expected constant expression after case."); + return null; + } + + scc_ast_stmt_t *statement = scc_parse_statement(parser); + if (statement == null) { + Panic("expect stmt"); + } + + scc_ast_stmt_t *stmt = ast_stmt_alloc(); + Assert(stmt != null); + + stmt->base.type = SCC_AST_STMT_LABEL; + // TODO maybe use cstring + stmt->label_stmt.label = tok->value.cstr.data; + stmt->label_stmt.stmt = statement; + return stmt; +} + +static scc_ast_stmt_t *parse_case_statement(scc_parser_t *parser) { + if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_CASE)) { + return null; + } + + scc_ast_expr_t *expr = null; + // TODO = scc_parser_constant_expression(); + + if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_COLON)) { + LOG_ERROR("Expected constant expression after case."); + return null; + } + + scc_ast_stmt_t *statement = scc_parse_statement(parser); + if (statement == null) { + Panic("expect stmt"); + } + + scc_ast_stmt_t *stmt = ast_stmt_alloc(); + Assert(stmt != null); + stmt->case_stmt.expr = expr; + stmt->base.type = SCC_AST_STMT_CASE; + stmt->case_stmt.stmt = statement; + return stmt; +} + +static scc_ast_stmt_t *parse_default_statement(scc_parser_t *parser) { + if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_DEFAULT)) { + return null; + } + + if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_COLON)) { + LOG_ERROR("Expected constant expression after case."); + return null; + } + + scc_ast_stmt_t *statement = scc_parse_statement(parser); + if (statement == null) { + Panic("expect stmt"); + } + scc_ast_stmt_t *stmt = ast_stmt_alloc(); + Assert(stmt != null); + stmt->base.type = SCC_AST_STMT_DEFAULT; + stmt->default_stmt.stmt = statement; + + return stmt; +} + +static scc_ast_stmt_t *parse_compound_statement(scc_parser_t *parser) { + if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_L_BRACE)) { + return null; + } + scc_ast_stmt_t *stmt = ast_stmt_alloc(); + stmt->base.type = SCC_AST_STMT_COMPOUND; + + scc_vec_init(stmt->compound.block_items); + while (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_R_BRACE)) { + /// TODO + // scc_parse_is_decl(); + scc_ast_node_type_t *ret = null; + ret = (scc_ast_node_type_t *)scc_parse_declaration(parser); + if (ret == null) { + ret = (scc_ast_node_type_t *)scc_parse_statement(parser); + } + if (ret == null) { + LOG_ERROR("Invalid statement"); + // TODO + scc_free(stmt); + return null; + } + scc_vec_push(stmt->compound.block_items, ret); + } + return stmt; +} + +static scc_ast_stmt_t *parse_if_statement(scc_parser_t *parser) { + if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_IF)) { + return null; + } + + scc_ast_expr_t *expression = ast_parse_paren_expression(parser); + scc_ast_stmt_t *statement = scc_parse_statement(parser); + + scc_ast_stmt_t *stmt = ast_stmt_alloc(); + stmt->base.type = SCC_AST_STMT_IF; + stmt->if_stmt.cond = expression; + stmt->if_stmt.then_stmt = statement; + if (scc_parse_consume_if(parser->lex_stream, SCC_TOK_ELSE)) { + stmt->if_stmt.opt_else_stmt = scc_parse_statement(parser); + } else { + stmt->if_stmt.opt_else_stmt = null; + } + return stmt; +} + +static scc_ast_stmt_t *parse_switch_statement(scc_parser_t *parser) { + if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_SWITCH)) { + return null; + } + + scc_ast_expr_t *expression = ast_parse_paren_expression(parser); + scc_ast_stmt_t *statement = scc_parse_statement(parser); + + scc_ast_stmt_t *stmt = ast_stmt_alloc(); + stmt->base.type = SCC_AST_STMT_SWITCH; + stmt->switch_stmt.cond = expression; + stmt->switch_stmt.body = statement; + return stmt; +} + +static scc_ast_stmt_t *parse_while_statement(scc_parser_t *parser) { + if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_WHILE)) { + return null; + } + + scc_ast_expr_t *expression = ast_parse_paren_expression(parser); + scc_ast_stmt_t *statement = scc_parse_statement(parser); + + scc_ast_stmt_t *stmt = ast_stmt_alloc(); + stmt->base.type = SCC_AST_STMT_WHILE; + stmt->while_stmt.cond = expression; + stmt->while_stmt.body = statement; + return stmt; +} + +static scc_ast_stmt_t *parse_do_while_statement(scc_parser_t *parser) { + if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_DO)) { + return null; + } + + scc_ast_stmt_t *statement = scc_parse_statement(parser); + + if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_WHILE)) { + LOG_ERROR("Expected 'while' after do."); + // TODO 使用更好的错误处理,未来应当采用更好的内存管理器 + scc_free(statement); + return null; + } + scc_ast_expr_t *expression = ast_parse_paren_expression(parser); + + scc_ast_stmt_t *stmt = ast_stmt_alloc(); + stmt->base.type = SCC_AST_STMT_DO_WHILE; + stmt->do_while_stmt.cond = expression; + stmt->do_while_stmt.body = statement; + return stmt; +} + +static scc_ast_stmt_t *parse_for_statement(scc_parser_t *parser) { + if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_FOR)) { + return null; + } + + /* + for ( expression(opt) ; expression(opt) ; expression(opt) ) statement + for ( declaration expression(opt) ; expression(opt) ) statement + */ + + if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_L_PAREN)) { + LOG_ERROR("Expected '(' before like `( expression )` ."); + } + + scc_ast_stmt_t *stmt = ast_stmt_alloc(); + stmt->base.type = SCC_AST_STMT_FOR; + + // TODO use decl or expr + stmt->for_stmt.init = (scc_ast_type_t *)scc_parse_expression(parser); + + if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_SEMICOLON)) { + LOG_ERROR("Expected semicolon in for statement."); + } + + stmt->for_stmt.cond = scc_parse_expression(parser); + + if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_SEMICOLON)) { + LOG_ERROR("Expected semicolon in for statement."); + } + + stmt->for_stmt.iter = scc_parse_expression(parser); + + if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_R_PAREN)) { + LOG_ERROR("Expected ')' after like `( expression )` ."); + } + + stmt->for_stmt.body = scc_parse_statement(parser); + + return stmt; +} + +static scc_ast_stmt_t *parse_jump_statement(scc_parser_t *parser) { + scc_ast_stmt_t *stmt = ast_stmt_alloc(); + + if (scc_parse_consume_if(parser->lex_stream, SCC_TOK_GOTO)) { + stmt->base.type = SCC_AST_STMT_GOTO; + if (scc_parse_is(parser->lex_stream, SCC_TOK_IDENT)) { + const scc_lexer_tok_t *tok = + scc_lexer_stream_current(parser->lex_stream); + stmt->goto_stmt.label = tok->value.cstr.data; + scc_lexer_stream_consume(parser->lex_stream); + } else { + LOG_ERROR("Expected label after goto."); + } + } else if (scc_parse_consume_if(parser->lex_stream, SCC_TOK_CONTINUE)) { + stmt->base.type = SCC_AST_STMT_CONTINUE; + } else if (scc_parse_consume_if(parser->lex_stream, SCC_TOK_BREAK)) { + stmt->base.type = SCC_AST_STMT_BREAK; + } else if (scc_parse_consume_if(parser->lex_stream, SCC_TOK_RETURN)) { + stmt->base.type = SCC_AST_STMT_RETURN; + stmt->return_stmt.expr = scc_parse_expression(parser); + } else { + UNREACHABLE(); + } + + if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_SEMICOLON)) { + LOG_ERROR("Expected semicolon after jump statement."); + } + return stmt; +} + +static scc_ast_stmt_t *parse_expression_statement(scc_parser_t *parser) { + scc_ast_stmt_t *stmt = ast_stmt_alloc(); + stmt->base.type = SCC_AST_STMT_EXPR; + + if (scc_parse_consume_if(parser->lex_stream, SCC_TOK_SEMICOLON)) { + stmt->expr.expr = null; + return stmt; + } + + stmt->expr.expr = scc_parse_expression(parser); + if (stmt->expr.expr == null) { + // TODO + scc_free(stmt); + return null; + } + + if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_SEMICOLON)) { + LOG_ERROR("Expected semicolon after expression."); + } + return stmt; +} + +scc_ast_stmt_t *scc_parse_statement(scc_parser_t *parser) { + const scc_lexer_tok_t *tok = scc_lexer_stream_current(parser->lex_stream); + switch (tok->type) { + /* + (6.8.1) + labeled-statement: + identifier : statement + case constant-expression : statement + default : statement + */ + case SCC_TOK_IDENT: + // 注意需要检测下一个 token 是否为冒号,否则将需要判定成表达式语句 + if (!scc_parse_peek_is(parser->lex_stream, 1, SCC_TOK_COLON)) { + break; + } + return parse_label_statement(parser); + case SCC_TOK_CASE: + return parse_case_statement(parser); + case SCC_TOK_DEFAULT: + return parse_default_statement(parser); + /* + (6.8.2) + compound-statement: + { block-item-list(opt) } + (6.8.2) + block-item-list: + block-item + block-item-list block-item + (6.8.2) + block-item: + declaration + statement + */ + case SCC_TOK_L_BRACE: + return parse_compound_statement(parser); + /* + (6.8.4) + selection-statement: + if ( expression ) statement + if ( expression ) statement else statement + switch ( expression ) statement + */ + case SCC_TOK_IF: + return parse_if_statement(parser); + case SCC_TOK_SWITCH: + return parse_switch_statement(parser); + /* + (6.8.5) + iteration-statement: + while ( expression ) statement + do statement while ( expression ); + for ( expression(opt) ; expression(opt) ; expression(opt) ) + statement + for ( declaration expression(opt) ; expression(opt) ) + statement + */ + case SCC_TOK_WHILE: + return parse_while_statement(parser); + case SCC_TOK_DO: + return parse_do_while_statement(parser); + case SCC_TOK_FOR: + return parse_for_statement(parser); + /* + (6.8.6) + jump-statement: + goto identifier ; + continue ; + break ; + return expression(opt) ; + */ + case SCC_TOK_GOTO: + case SCC_TOK_CONTINUE: + case SCC_TOK_BREAK: + case SCC_TOK_RETURN: + return parse_jump_statement(parser); + default: + break; + } + /* + (6.8.3) + expression-statement: + expression(opt) ; + */ + return parse_expression_statement(parser); +} diff --git a/libs/parser/src/parse_type.c b/libs/parser/src/parse_type.c new file mode 100644 index 0000000..537c49e --- /dev/null +++ b/libs/parser/src/parse_type.c @@ -0,0 +1,1055 @@ +/* +6.7.6 Type names +Syntax + +type-name: + specifier-qualifier-list abstract-declarator(opt) +abstract-declarator: + pointer + pointer(opt) direct-abstract-declarator +direct-abstract-declarator: + ( abstract-declarator ) + direct-abstract-declarator(opt) [ type-qualifier-list(opt) + assignment-expression(opt) ] + direct-abstract-declarator(opt) [ static type-qualifier-list(opt) + assignment-expression ] + direct-abstract-declarator(opt) [ type-qualifier-list static + assignment-expression ] + direct-abstract-declarator(opt) [ * ] + direct-abstract-declarator(opt) ( parameter-type-list(opt) ) + +EXAMPLE The constructions +(a) int +(b) int * +(c) int *[3] +(d) int (*)[3] +(e) int (*)[*] +(f) int *() +(g) int (*)(void) +(h) int (*const [])(unsigned int, ...) + +(6.7.2) + type-specifier: + void + char + short + int + long + float + double + signed + unsigned + _Bool + _Complex + struct-or-union-specifier + enum-specifier + typedef-name +(6.7.2.1) + struct-or-union-specifier: + struct-or-union identifieropt { struct-declaration-list } + struct-or-union identifier +(6.7.2.1) + struct-or-union: + struct + union +(6.7.2.1) + struct-declaration-list: + struct-declaration + struct-declaration-list struct-declaration +(6.7.2.1) + struct-declaration: + specifier-qualifier-list struct-declarator-list ; +(6.7.2.1) + specifier-qualifier-list: + type-specifier specifier-qualifier-list(opt) + type-qualifier specifier-qualifier-list(opt) +(6.7.2.1) + struct-declarator-list: + struct-declarator + struct-declarator-list , struct-declarator +(6.7.2.1) + struct-declarator: + declarator + declarator(opt) : constant-expression +(6.7.2.2) + enum-specifier: + enum identifieropt { enumerator-list } + enum identifieropt { enumerator-list ,} + enum identifier +(6.7.2.2) + enumerator-list: + enumerator + enumerator-list , enumerator +(6.7.2.2) + enumerator: + enumeration-constant + enumeration-constant = constant-expression +(6.7.3) + type-qualifier: + const + restrict + volatile +(6.7.5) + pointer: + * type-qualifier-list(opt) + * type-qualifier-list(opt) pointer +(6.7.5) + type-qualifier-list: + type-qualifier + type-qualifier-list type-qualifier +(6.7.5) + parameter-type-list: + parameter-list + parameter-list , ... +(6.7.5) + parameter-list: + parameter-declaration + parameter-list , parameter-declaration +(6.7.5) + parameter-declaration: + declaration-specifiers declarator + declaration-specifiers abstract-declaratoropt +(6.7.5) + identifier-list: + identifier + identifier-list , identifier +*/ + +#include + +/** + * @brief 判断 token 是否为声明说明符的开始 + * + * 声明说明符可以是: + * - 存储类说明符 (typedef, extern, static, auto, register) + * - 类型说明符 (void, char, int, float, struct, union, enum 等) + * - 类型限定符 (const, volatile, restrict, atomic) + * - 函数说明符 (inline) + */ +cbool scc_parse_is_decl_specifier_start(scc_parser_t *parser, usize offset) { + const scc_lexer_tok_t *tok = + scc_lexer_stream_peek(parser->lex_stream, offset); + + switch (tok->type) { + // 存储类说明符 + case SCC_TOK_TYPEDEF: + case SCC_TOK_EXTERN: + case SCC_TOK_STATIC: + case SCC_TOK_AUTO: + case SCC_TOK_REGISTER: + // 类型说明符 + case SCC_TOK_VOID: + case SCC_TOK_CHAR: + case SCC_TOK_SHORT: + case SCC_TOK_INT: + case SCC_TOK_LONG: + case SCC_TOK_FLOAT: + case SCC_TOK_DOUBLE: + case SCC_TOK_SIGNED: + case SCC_TOK_UNSIGNED: + case SCC_TOK_BOOL: + case SCC_TOK_COMPLEX: + case SCC_TOK_STRUCT: + case SCC_TOK_UNION: + case SCC_TOK_ENUM: + // 类型限定符 + case SCC_TOK_CONST: + case SCC_TOK_VOLATILE: + case SCC_TOK_RESTRICT: + case SCC_TOK_ATOMIC: + // 函数说明符 + case SCC_TOK_INLINE: + return true; + + // typedef 名称(标识符也可能是类型说明符) + case SCC_TOK_IDENT: + // 需要检查标识符是否在符号表中定义为 typedef + // 这里简化处理:假设所有标识符都可能是 typedef + // 在实际解析器中,需要查询符号表 + // TODO + return false; + + default: + return false; + } +} + +/** + * @brief 判断 token 是否为类型说明符的开始 + */ +cbool scc_parse_is_type_specifier_start(scc_parser_t *parser, usize offset) { + const scc_lexer_tok_t *tok = + scc_lexer_stream_peek(parser->lex_stream, offset); + + switch (tok->type) { + // 基本类型说明符 + case SCC_TOK_VOID: + case SCC_TOK_CHAR: + case SCC_TOK_SHORT: + case SCC_TOK_INT: + case SCC_TOK_LONG: + case SCC_TOK_FLOAT: + case SCC_TOK_DOUBLE: + case SCC_TOK_SIGNED: + case SCC_TOK_UNSIGNED: + case SCC_TOK_BOOL: + case SCC_TOK_COMPLEX: + // 复合类型说明符 + case SCC_TOK_STRUCT: + case SCC_TOK_UNION: + case SCC_TOK_ENUM: + return true; + + // typedef 名称 + case SCC_TOK_IDENT: + // 需要检查标识符是否在符号表中定义为 typedef + return true; + + default: + return false; + } +} + +/** + * @brief 判断 token 是否为类型限定符的开始 + */ +cbool scc_parse_is_type_qualifier_start(scc_parser_t *parser, usize offset) { + const scc_lexer_tok_t *tok = + scc_lexer_stream_peek(parser->lex_stream, offset); + + switch (tok->type) { + case SCC_TOK_CONST: + case SCC_TOK_VOLATILE: + case SCC_TOK_RESTRICT: + case SCC_TOK_ATOMIC: + return true; + default: + return false; + } +} + +/** + * @brief 判断 token 是否为存储类说明符的开始 + */ +cbool scc_parse_is_storage_class_start(scc_parser_t *parser, usize offset) { + const scc_lexer_tok_t *tok = + scc_lexer_stream_peek(parser->lex_stream, offset); + + switch (tok->type) { + case SCC_TOK_TYPEDEF: + case SCC_TOK_EXTERN: + case SCC_TOK_STATIC: + case SCC_TOK_AUTO: + case SCC_TOK_REGISTER: + return true; + default: + return false; + } +} + +/** + * @brief 判断当前位置是否可以开始一个声明 + * + * 声明以声明说明符开始,包括: + * - 存储类说明符 + * - 类型说明符 + * - 类型限定符 + * - 函数说明符 + */ +cbool scc_parse_is_declaration_start(scc_parser_t *parser, usize offset) { + // 检查是否是声明说明符的开始 + return scc_parse_is_decl_specifier_start(parser, offset); +} + +// 前向声明辅助函数 +static scc_ast_type_t *parse_specifier_qualifier_list(scc_parser_t *parser, + usize *offset); +static scc_ast_type_t *parse_abstract_declarator(scc_parser_t *parser, + usize *offset, + scc_ast_type_t *base_type); +static scc_ast_type_t * +parse_direct_abstract_declarator(scc_parser_t *parser, usize *offset, + scc_ast_type_t *base_type); +static scc_ast_type_t *parse_pointer(scc_parser_t *parser, usize *offset); +static scc_ast_decl_specifier_t parse_type_qualifier_list(scc_parser_t *parser, + usize *offset); +static scc_ast_type_t *parse_struct_or_union_specifier(scc_parser_t *parser, + usize *offset); +static scc_ast_type_t *parse_enum_specifier(scc_parser_t *parser, + usize *offset); + +// 创建内置类型节点的辅助函数 +static scc_ast_type_t *create_builtin_type(scc_ast_builtin_type_t builtin, + scc_ast_decl_specifier_t quals) { + scc_ast_type_t *type = (scc_ast_type_t *)scc_malloc(sizeof(scc_ast_type_t)); + if (!type) + return null; + + type->base.type = SCC_AST_TYPE_BUILTIN; + type->builtin.builtin = builtin; + type->builtin.quals = quals; + return type; +} + +// 创建指针类型节点的辅助函数 +static scc_ast_type_t *create_pointer_type(scc_ast_type_t *pointee, + scc_ast_decl_specifier_t quals) { + scc_ast_type_t *type = (scc_ast_type_t *)scc_malloc(sizeof(scc_ast_type_t)); + if (!type) + return null; + + type->base.type = SCC_AST_TYPE_POINTER; + type->pointer.pointee = pointee; + type->pointer.quals = quals; + return type; +} + +// 创建数组类型节点的辅助函数 +static scc_ast_type_t *create_array_type(scc_ast_type_t *element, + scc_ast_expr_t *size) { + scc_ast_type_t *type = (scc_ast_type_t *)scc_malloc(sizeof(scc_ast_type_t)); + if (!type) + return null; + + type->base.type = SCC_AST_TYPE_ARRAY; + type->array.element = element; + type->array.size = size; + return type; +} + +// 创建函数类型节点的辅助函数 +static scc_ast_type_t *create_function_type(scc_ast_type_t *return_type, + scc_ast_type_vec_t param_types, + cbool is_variadic) { + scc_ast_type_t *type = (scc_ast_type_t *)scc_malloc(sizeof(scc_ast_type_t)); + if (!type) + return null; + + type->base.type = SCC_AST_TYPE_FUNCTION; + type->function.return_type = return_type; + type->function.param_types = param_types; + type->function.is_variadic = is_variadic; + return type; +} + +/** + * @brief 解析类型名 + */ +scc_ast_type_t *scc_parse_type_name(scc_parser_t *parser, usize *offset) { + usize start_offset = *offset; + + // 解析 specifier-qualifier-list + scc_ast_type_t *type = parse_specifier_qualifier_list(parser, offset); + if (type == null) { + // TODO + // LOG_ERROR("Failed to parse specifier-qualifier-list at offset %zu", + // start_offset); + return null; + } + + // 解析可选的 abstract-declarator + scc_ast_type_t *full_type = parse_abstract_declarator(parser, offset, type); + if (full_type == null) { + return type; + } + + return full_type; +} + +scc_ast_type_t *scc_parse_type(scc_parser_t *parser) { + usize offset = 0; + scc_ast_type_t *ret = scc_parse_type_name(parser, &offset); + if (ret == null) { + return null; + } + scc_lexer_stream_advance(parser->lex_stream, offset); + return ret; +} + +/** + * @brief 检查是否为类型限定符 token + */ +static cbool is_type_qualifier_token(const scc_lexer_tok_t *tok) { + return tok->type == SCC_TOK_CONST || tok->type == SCC_TOK_RESTRICT || + tok->type == SCC_TOK_VOLATILE || tok->type == SCC_TOK_ATOMIC; +} + +/** + * @brief 解析类型限定符 + */ +static scc_ast_decl_specifier_t parse_type_qualifier(scc_parser_t *parser, + usize *offset) { + scc_ast_decl_specifier_t quals = {0}; + const scc_lexer_tok_t *tok = + scc_lexer_stream_peek(parser->lex_stream, *offset); + + switch (tok->type) { + case SCC_TOK_CONST: + quals.is_const = true; + (*offset)++; + break; + case SCC_TOK_RESTRICT: + quals.is_restrict = true; + (*offset)++; + break; + case SCC_TOK_VOLATILE: + quals.is_volatile = true; + (*offset)++; + break; + case SCC_TOK_ATOMIC: + quals.is_atomic = true; + (*offset)++; + break; + default: + // 不是限定符 + break; + } + + return quals; +} + +/** + * @brief 解析类型限定符列表 + */ +static scc_ast_decl_specifier_t parse_type_qualifier_list(scc_parser_t *parser, + usize *offset) { + scc_ast_decl_specifier_t quals = {0}; + + while (true) { + const scc_lexer_tok_t *tok = + scc_lexer_stream_peek(parser->lex_stream, *offset); + + if (!is_type_qualifier_token(tok)) { + break; + } + + scc_ast_decl_specifier_t new_qual = + parse_type_qualifier(parser, offset); + + // 合并限定符 + quals.is_const = quals.is_const || new_qual.is_const; + quals.is_restrict = quals.is_restrict || new_qual.is_restrict; + quals.is_volatile = quals.is_volatile || new_qual.is_volatile; + quals.is_atomic = quals.is_atomic || new_qual.is_atomic; + } + + return quals; +} + +/** + * @brief 检查是否为类型说明符 token + */ +static cbool is_type_specifier_token(const scc_lexer_tok_t *tok) { + switch (tok->type) { + case SCC_TOK_VOID: + case SCC_TOK_CHAR: + case SCC_TOK_SHORT: + case SCC_TOK_INT: + case SCC_TOK_LONG: + case SCC_TOK_FLOAT: + case SCC_TOK_DOUBLE: + case SCC_TOK_SIGNED: + case SCC_TOK_UNSIGNED: + case SCC_TOK_BOOL: + case SCC_TOK_COMPLEX: + case SCC_TOK_STRUCT: + case SCC_TOK_UNION: + case SCC_TOK_ENUM: + return true; + default: + // 可能是 typedef 名称 + return tok->type == SCC_TOK_IDENT; + } +} + +/** + * @brief 解析类型说明符 + */ +static scc_ast_type_t *parse_type_specifier(scc_parser_t *parser, + usize *offset) { + const scc_lexer_tok_t *tok = + scc_lexer_stream_peek(parser->lex_stream, *offset); + + // 处理简单内置类型 + if (tok->type == SCC_TOK_VOID) { + (*offset)++; + return create_builtin_type(TYPE_VOID, (scc_ast_decl_specifier_t){0}); + } else if (tok->type == SCC_TOK_CHAR) { + (*offset)++; + return create_builtin_type(TYPE_CHAR, (scc_ast_decl_specifier_t){0}); + } else if (tok->type == SCC_TOK_SHORT) { + (*offset)++; + return create_builtin_type(TYPE_SHORT, (scc_ast_decl_specifier_t){0}); + } else if (tok->type == SCC_TOK_INT) { + (*offset)++; + return create_builtin_type(TYPE_INT, (scc_ast_decl_specifier_t){0}); + } else if (tok->type == SCC_TOK_LONG) { + // 检查是否为 long long + const scc_lexer_tok_t *next_tok = + scc_lexer_stream_peek(parser->lex_stream, *offset + 1); + if (next_tok->type == SCC_TOK_LONG) { + (*offset) += 2; // 跳过两个 long + return create_builtin_type(TYPE_LONG_LONG, + (scc_ast_decl_specifier_t){0}); + } else { + (*offset)++; + return create_builtin_type(TYPE_LONG, + (scc_ast_decl_specifier_t){0}); + } + } else if (tok->type == SCC_TOK_FLOAT) { + (*offset)++; + return create_builtin_type(TYPE_FLOAT, (scc_ast_decl_specifier_t){0}); + } else if (tok->type == SCC_TOK_DOUBLE) { + // 检查是否为 long double + const scc_lexer_tok_t *next_tok = + scc_lexer_stream_peek(parser->lex_stream, *offset + 1); + if (next_tok->type == SCC_TOK_LONG) { + (*offset) += 2; // 跳过 double long + return create_builtin_type(TYPE_LONG_DOUBLE, + (scc_ast_decl_specifier_t){0}); + } else { + (*offset)++; + return create_builtin_type(TYPE_DOUBLE, + (scc_ast_decl_specifier_t){0}); + } + } else if (tok->type == SCC_TOK_BOOL) { + (*offset)++; + return create_builtin_type(TYPE_BOOL, (scc_ast_decl_specifier_t){0}); + } else if (tok->type == SCC_TOK_SIGNED || tok->type == SCC_TOK_UNSIGNED) { + // signed/unsigned 需要与后续类型组合 + // 这里简化处理,默认为 int + (*offset)++; + return create_builtin_type(TYPE_INT, (scc_ast_decl_specifier_t){0}); + } else if (tok->type == SCC_TOK_COMPLEX) { + // _Complex 需要与浮点类型组合 + // 这里简化处理 + (*offset)++; + return create_builtin_type(TYPE_COMPLEX_FLOAT, + (scc_ast_decl_specifier_t){0}); + } + + // 处理结构体/联合体 + if (tok->type == SCC_TOK_STRUCT || tok->type == SCC_TOK_UNION) { + return parse_struct_or_union_specifier(parser, offset); + } + + // 处理枚举 + if (tok->type == SCC_TOK_ENUM) { + return parse_enum_specifier(parser, offset); + } + + // 处理 typedef 名称 + if (tok->type == SCC_TOK_IDENT) { + // TODO + return null; + scc_ast_type_t *type = + (scc_ast_type_t *)scc_malloc(sizeof(scc_ast_type_t)); + if (!type) + return null; + + type->base.type = SCC_AST_TYPE_TYPEDEF; + type->typedef_type.name = tok->value.cstr.data; + type->typedef_type.underlying = null; // 需要从符号表解析 + + (*offset)++; + return type; + } + + return null; +} + +/** + * @brief 解析结构体或联合体说明符 + */ +static scc_ast_type_t *parse_struct_or_union_specifier(scc_parser_t *parser, + usize *offset) { + const scc_lexer_tok_t *tok = + scc_lexer_stream_peek(parser->lex_stream, *offset); + cbool is_struct = (tok->type == SCC_TOK_STRUCT); + + (*offset)++; // 跳过 struct 或 union + + scc_ast_type_t *type = (scc_ast_type_t *)scc_malloc(sizeof(scc_ast_type_t)); + if (!type) + return null; + + type->base.type = is_struct ? SCC_AST_TYPE_STRUCT : SCC_AST_TYPE_UNION; + type->record.name = null; + scc_vec_init(type->record.fields); + + // 检查是否有标识符 + tok = scc_lexer_stream_peek(parser->lex_stream, *offset); + if (tok->type == SCC_TOK_IDENT) { + type->record.name = tok->value.cstr.data; + (*offset)++; + tok = scc_lexer_stream_peek(parser->lex_stream, *offset); + } + + // 如果有 '{',解析结构体定义 + if (tok->type == SCC_TOK_L_BRACE) { + (*offset)++; // 跳过 '{' + + // TODO: 解析 struct-declaration-list + // 这是一个复杂的子解析器,需要单独实现 + + // 临时:跳过所有声明直到遇到 '}' + while (true) { + tok = scc_lexer_stream_peek(parser->lex_stream, *offset); + if (tok->type == SCC_TOK_R_BRACE) + break; + if (tok->type == SCC_TOK_EOF) { + LOG_ERROR("Unclosed struct/union definition"); + scc_free(type); + return null; + } + (*offset)++; + } + + (*offset)++; // 跳过 '}' + } + + return type; +} + +/** + * @brief 解析枚举说明符 + */ +static scc_ast_type_t *parse_enum_specifier(scc_parser_t *parser, + usize *offset) { + const scc_lexer_tok_t *tok = + scc_lexer_stream_peek(parser->lex_stream, *offset); + if (tok->type != SCC_TOK_ENUM) + return null; + + (*offset)++; // 跳过 enum + + scc_ast_type_t *type = (scc_ast_type_t *)scc_malloc(sizeof(scc_ast_type_t)); + if (!type) + return null; + + type->base.type = SCC_AST_TYPE_ENUM; + type->enumeration.name = null; + scc_vec_init(type->enumeration.enumerators); + + // 检查是否有标识符 + tok = scc_lexer_stream_peek(parser->lex_stream, *offset); + if (tok->type == SCC_TOK_IDENT) { + type->enumeration.name = tok->value.cstr.data; + (*offset)++; + tok = scc_lexer_stream_peek(parser->lex_stream, *offset); + } + + // 如果有 '{',解析枚举定义 + if (tok->type == SCC_TOK_L_BRACE) { + (*offset)++; // 跳过 '{' + + // TODO: 解析 enumerator-list + // 这是一个复杂的子解析器,需要单独实现 + + // 临时:跳过所有枚举项直到遇到 '}' + while (true) { + tok = scc_lexer_stream_peek(parser->lex_stream, *offset); + if (tok->type == SCC_TOK_R_BRACE) + break; + if (tok->type == SCC_TOK_EOF) { + LOG_ERROR("Unclosed enum definition"); + scc_free(type); + return null; + } + (*offset)++; + } + + (*offset)++; // 跳过 '}' + } + + return type; +} + +/** + * @brief 解析指定符-限定符列表 + */ +static scc_ast_type_t *parse_specifier_qualifier_list(scc_parser_t *parser, + usize *offset) { + scc_ast_decl_specifier_t quals = {0}; + + // 收集类型限定符(可能出现在前面) + while (true) { + const scc_lexer_tok_t *tok = + scc_lexer_stream_peek(parser->lex_stream, *offset); + + if (!is_type_qualifier_token(tok)) { + break; + } + + scc_ast_decl_specifier_t new_quals = + parse_type_qualifier(parser, offset); + quals.is_const |= new_quals.is_const; + quals.is_restrict |= new_quals.is_restrict; + quals.is_volatile |= new_quals.is_volatile; + quals.is_atomic |= new_quals.is_atomic; + } + + // 解析类型说明符 + scc_ast_type_t *type_specifier = parse_type_specifier(parser, offset); + if (!type_specifier) { + // TODO + // LOG_ERROR("Expected type specifier"); + return null; + } + + // 收集可能出现在类型说明符后面的限定符 + while (true) { + const scc_lexer_tok_t *tok = + scc_lexer_stream_peek(parser->lex_stream, *offset); + + if (!is_type_qualifier_token(tok)) { + break; + } + + scc_ast_decl_specifier_t new_quals = + parse_type_qualifier(parser, offset); + quals.is_const |= new_quals.is_const; + quals.is_restrict |= new_quals.is_restrict; + quals.is_volatile |= new_quals.is_volatile; + quals.is_atomic |= new_quals.is_atomic; + } + + // 将限定符应用到类型上 + if (type_specifier->base.type == SCC_AST_TYPE_BUILTIN) { + type_specifier->builtin.quals = quals; + } else if (type_specifier->base.type == SCC_AST_TYPE_POINTER) { + // 对于指针类型,这里的限定符应该应用到指针本身 + type_specifier->pointer.quals = quals; + } else if (type_specifier->base.type == SCC_AST_TYPE_TYPEDEF) { + // typedef 类型可能也有限定符,但需要保存在其他地方 + // 这里简化处理 + } + + return type_specifier; +} + +/** + * @brief 解析指针 + */ +static scc_ast_type_t *parse_pointer(scc_parser_t *parser, usize *offset) { + const scc_lexer_tok_t *tok = + scc_lexer_stream_peek(parser->lex_stream, *offset); + + if (tok->type != SCC_TOK_MUL) { + return null; // 不是指针 + } + + (*offset)++; // 跳过 '*' + + // 解析可选的类型限定符列表 + scc_ast_decl_specifier_t ptr_quals = + parse_type_qualifier_list(parser, offset); + + // 递归解析更多指针(多重指针) + scc_ast_type_t *inner_pointer = parse_pointer(parser, offset); + + // 创建指针类型 + scc_ast_type_t *ptr_type = create_pointer_type(null, ptr_quals); + if (!ptr_type) + return null; + + if (inner_pointer) { + // 有更多指针,将它们链接起来 + scc_ast_type_t *current = ptr_type; + while (current->base.type == SCC_AST_TYPE_POINTER && + current->pointer.pointee && + current->pointer.pointee->base.type == SCC_AST_TYPE_POINTER) { + current = current->pointer.pointee; + } + current->pointer.pointee = inner_pointer; + } + + return ptr_type; +} + +/** + * @brief 解析抽象声明符 + */ +static scc_ast_type_t *parse_abstract_declarator(scc_parser_t *parser, + usize *offset, + scc_ast_type_t *base_type) { + // 解析可选的指针 + scc_ast_type_t *ptr_type = parse_pointer(parser, offset); + + if (ptr_type) { + // 将指针的 pointee 指向解析出的直接抽象声明符 + scc_ast_type_t *direct_type = + parse_direct_abstract_declarator(parser, offset, base_type); + + // 找到最内层的指针 + scc_ast_type_t *current = ptr_type; + while (current->base.type == SCC_AST_TYPE_POINTER && + current->pointer.pointee && + current->pointer.pointee->base.type == SCC_AST_TYPE_POINTER) { + current = current->pointer.pointee; + } + + if (current->base.type == SCC_AST_TYPE_POINTER) { + current->pointer.pointee = direct_type; + } + + return ptr_type; + } else { + // 没有指针,直接解析直接抽象声明符 + return parse_direct_abstract_declarator(parser, offset, base_type); + } +} + +/** + * @brief 解析直接抽象声明符 + */ +static scc_ast_type_t * +parse_direct_abstract_declarator(scc_parser_t *parser, usize *offset, + scc_ast_type_t *base_type) { + scc_ast_type_t *current_type = base_type; + + while (true) { + const scc_lexer_tok_t *tok = + scc_lexer_stream_peek(parser->lex_stream, *offset); + + // 情况1: ( abstract-declarator ) + if (tok->type == SCC_TOK_L_PAREN) { + (*offset)++; // 跳过 '(' + + // 解析括号内的抽象声明符 + scc_ast_type_t *inner_type = + parse_abstract_declarator(parser, offset, current_type); + + // 期望右括号 + tok = scc_lexer_stream_peek(parser->lex_stream, *offset); + if (tok->type != SCC_TOK_R_PAREN) { + LOG_ERROR("Expected ')' after abstract-declarator"); + return current_type; + } + (*offset)++; // 跳过 ')' + + current_type = inner_type; + continue; + } + + // 情况2: 数组声明符 [ ... ] + if (tok->type == SCC_TOK_L_BRACKET) { + (*offset)++; // 跳过 '[' + + // 检查是否是 [ * ] (可变长度数组) + tok = scc_lexer_stream_peek(parser->lex_stream, *offset); + if (tok->type == SCC_TOK_MUL) { + // 可变长度数组 + scc_ast_type_t *array_type = + create_array_type(current_type, null); + if (!array_type) + return current_type; + + (*offset)++; // 跳过 '*' + } else { + // 解析可选的 static 关键字 + cbool has_static = false; + if (tok->type == SCC_TOK_STATIC) { + has_static = true; + (*offset)++; + } + + // 解析可选的类型限定符列表 + parse_type_qualifier_list(parser, offset); + tok = scc_lexer_stream_peek(parser->lex_stream, *offset); + + // 如果之前没有 static,但有限定符,再检查一次 static + if (!has_static && tok->type == SCC_TOK_STATIC) { + has_static = true; + (*offset)++; + tok = scc_lexer_stream_peek(parser->lex_stream, *offset); + } + + // 解析可选的赋值表达式(数组大小) + scc_ast_expr_t *size_expr = null; + if (tok->type != SCC_TOK_R_BRACKET) { + // TODO: 解析 assignment-expression + // 简化:设置 size 为 null + size_expr = null; + + // 跳过表达式 + while (tok->type != SCC_TOK_R_BRACKET && + tok->type != SCC_TOK_EOF) { + (*offset)++; + tok = + scc_lexer_stream_peek(parser->lex_stream, *offset); + } + } + + scc_ast_type_t *array_type = + create_array_type(current_type, size_expr); + if (!array_type) + return current_type; + + current_type = array_type; + } + + // 期望右括号 + tok = scc_lexer_stream_peek(parser->lex_stream, *offset); + if (tok->type != SCC_TOK_R_BRACKET) { + LOG_ERROR("Expected ']' after array declarator"); + return current_type; + } + (*offset)++; // 跳过 ']' + continue; + } + + // 情况3: 函数声明符 ( parameter-type-list(opt) ) + if (tok->type == SCC_TOK_L_PAREN) { + (*offset)++; // 跳过 '(' + + // 检查是否为空参数列表 () + tok = scc_lexer_stream_peek(parser->lex_stream, *offset); + if (tok->type == SCC_TOK_R_PAREN) { + // 空参数列表 + (*offset)++; // 跳过 ')' + scc_ast_type_t *func_type = create_function_type( + current_type, (scc_ast_type_vec_t){0}, false); + if (!func_type) + return current_type; + current_type = func_type; + continue; + } else if (tok->type == SCC_TOK_VOID) { + // void 参数列表 + (*offset)++; // 跳过 void + tok = scc_lexer_stream_peek(parser->lex_stream, *offset); + if (tok->type == SCC_TOK_R_PAREN) { + (*offset)++; // 跳过 ')' + scc_ast_type_vec_t param_types; + scc_vec_init(param_types); + scc_ast_type_t *func_type = + create_function_type(current_type, param_types, false); + if (!func_type) + return current_type; + current_type = func_type; + continue; + } else { + LOG_ERROR("Expected ')' after void parameter list"); + return current_type; + } + } else { + // TODO: 解析 parameter-type-list + // 这是一个复杂的子解析器,需要单独实现 + + scc_ast_type_vec_t param_types; + scc_vec_init(param_types); + cbool is_variadic = false; + + // 临时:跳过所有参数直到遇到 ')' 或 '...' + while (true) { + tok = scc_lexer_stream_peek(parser->lex_stream, *offset); + if (tok->type == SCC_TOK_R_PAREN) + break; + if (tok->type == SCC_TOK_ELLIPSIS) { + is_variadic = true; + (*offset)++; + break; + } + if (tok->type == SCC_TOK_EOF) { + LOG_ERROR("Unclosed function parameter list"); + return current_type; + } + (*offset)++; + } + + tok = scc_lexer_stream_peek(parser->lex_stream, *offset); + if (tok->type == SCC_TOK_R_PAREN) { + (*offset)++; // 跳过 ')' + } + + scc_ast_type_t *func_type = create_function_type( + current_type, param_types, is_variadic); + if (!func_type) + return current_type; + current_type = func_type; + continue; + } + } + + // 没有更多的直接抽象声明符 + break; + } + + return current_type; +} + +// 提供类型检查接口 +cbool scc_ast_type_is_builtin(const scc_ast_type_t *type) { + return type && type->base.type == SCC_AST_TYPE_BUILTIN; +} + +cbool scc_ast_type_is_pointer(const scc_ast_type_t *type) { + return type && type->base.type == SCC_AST_TYPE_POINTER; +} + +cbool scc_ast_type_is_array(const scc_ast_type_t *type) { + return type && type->base.type == SCC_AST_TYPE_ARRAY; +} + +cbool scc_ast_type_is_function(const scc_ast_type_t *type) { + return type && type->base.type == SCC_AST_TYPE_FUNCTION; +} + +cbool scc_ast_type_is_struct(const scc_ast_type_t *type) { + return type && type->base.type == SCC_AST_TYPE_STRUCT; +} + +cbool scc_ast_type_is_union(const scc_ast_type_t *type) { + return type && type->base.type == SCC_AST_TYPE_UNION; +} + +cbool scc_ast_type_is_enum(const scc_ast_type_t *type) { + return type && type->base.type == SCC_AST_TYPE_ENUM; +} + +cbool scc_ast_type_is_typedef(const scc_ast_type_t *type) { + return type && type->base.type == SCC_AST_TYPE_TYPEDEF; +} + +// 获取内置类型 +scc_ast_builtin_type_t scc_ast_type_get_builtin(const scc_ast_type_t *type) { + if (!scc_ast_type_is_builtin(type)) + return TYPE_VOID; + return type->builtin.builtin; +} + +// 获取指针指向的类型 +scc_ast_type_t *scc_ast_type_get_pointee(const scc_ast_type_t *type) { + if (!scc_ast_type_is_pointer(type)) + return null; + return type->pointer.pointee; +} + +// 获取数组元素类型 +scc_ast_type_t *scc_ast_type_get_element(const scc_ast_type_t *type) { + if (!scc_ast_type_is_array(type)) + return null; + return type->array.element; +} + +// 获取函数返回类型 +scc_ast_type_t *scc_ast_type_get_return(const scc_ast_type_t *type) { + if (!scc_ast_type_is_function(type)) + return null; + return type->function.return_type; +} + +// 获取类型限定符 +scc_ast_decl_specifier_t scc_ast_type_get_quals(const scc_ast_type_t *type) { + if (!type) + return (scc_ast_decl_specifier_t){0}; + + switch (type->base.type) { + case SCC_AST_TYPE_BUILTIN: + return type->builtin.quals; + case SCC_AST_TYPE_POINTER: + return type->pointer.quals; + default: + return (scc_ast_decl_specifier_t){0}; + } +} \ No newline at end of file diff --git a/libs/parser/src/parser.c b/libs/parser/src/parser.c new file mode 100644 index 0000000..9dfeac3 --- /dev/null +++ b/libs/parser/src/parser.c @@ -0,0 +1,85 @@ +/** + * @file parser.c + * @brief 新的解析器实现 + */ + +#include "parser.h" +#include + +static void dummy_sema_callback(void *context, scc_ast_node_type_t node_type, + void *node) { + (void)context; + (void)node_type; + (void)node; + return; +} + +#define ASSIGN_PTR_OR_DEFAULT(assigned_val, value, default) \ + assigned_val = value ? value : default + +void scc_parser_init(scc_parser_t *parser, scc_lexer_stream_t *lexer, + scc_sema_callbacks_t *callbacks) { + Assert(parser != null && lexer != null); + parser->lex_stream = lexer; + parser->has_error = false; + parser->translation_unit = null; + if (callbacks) { + ASSIGN_PTR_OR_DEFAULT(parser->sema_callbacks.on_decl, + callbacks->on_decl, dummy_sema_callback); + ASSIGN_PTR_OR_DEFAULT(parser->sema_callbacks.on_stmt, + callbacks->on_stmt, dummy_sema_callback); + ASSIGN_PTR_OR_DEFAULT(parser->sema_callbacks.on_expr, + callbacks->on_expr, dummy_sema_callback); + ASSIGN_PTR_OR_DEFAULT(parser->sema_callbacks.on_type, + callbacks->on_type, dummy_sema_callback); + parser->sema_callbacks.context = callbacks->context; + } else { + parser->sema_callbacks.on_decl = dummy_sema_callback; + parser->sema_callbacks.on_stmt = dummy_sema_callback; + parser->sema_callbacks.on_expr = dummy_sema_callback; + parser->sema_callbacks.on_type = dummy_sema_callback; + parser->sema_callbacks.context = dummy_sema_callback; + } + // // ONLY FOR INIT TYPE + // parser->current_token.type = SCC_TOK_UNKNOWN; +} + +void scc_parser_drop(scc_parser_t *parser) { + // TODO: 释放 AST 内存 + (void)parser; +} + +scc_ast_translation_unit_t *scc_parse_translation_unit(scc_parser_t *parser) { + scc_ast_translation_unit_t *unit = + scc_malloc(sizeof(scc_ast_translation_unit_t)); + if (!unit) + return null; + unit->base.type = SCC_AST_TRANSLATION_UNIT; + scc_vec_init(unit->declarations); + + /** + * Program := (Declaration | Definition)* + * same as + * Program := Declaration* Definition* + */ + do { + scc_ast_decl_t *decl = scc_parse_declaration(parser); + if (decl != null) { + scc_vec_push(unit->declarations, decl); + } else { + break; + // MAYBE return or next + } + } while (!scc_lexer_tok_match(scc_lexer_stream_current(parser->lex_stream), + SCC_TOK_EOF) && + !parser->has_error); + + if (parser->has_error) { + // TODO: 清理 + scc_free(unit); + return null; + } + + Assert(unit->base.type == SCC_AST_TRANSLATION_UNIT); + return unit; +}