From 088050c9030a1b254eaa1cfec3c68151a8644343 Mon Sep 17 00:00:00 2001 From: zzy <2450266535@qq.com> Date: Fri, 13 Feb 2026 17:26:50 +0800 Subject: [PATCH] =?UTF-8?q?feat(argparse):=20=E6=B7=BB=E5=8A=A0=E9=80=89?= =?UTF-8?q?=E6=8B=A9=E7=B1=BB=E5=9E=8B=E6=94=AF=E6=8C=81=E5=92=8C=E9=94=99?= =?UTF-8?q?=E8=AF=AF=E5=A4=84=E7=90=86=E4=BC=98=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 添加了 SCC_ARGPARSE_ERR_PNT_DEFAULT 错误类型用于默认操作处理, 实现了 scc_argparse_spec_setup_choices 函数支持枚举选择, 重构了错误处理流程使返回值更加一致。 修复了长选项名称匹配的逻辑错误。 feat(lexer): 添加换行符和注释符号的词法标记 新增 SCC_TOK_ENDLINE 和 SCC_TOK_SHARP 标记类型, 改进词法分析器对换行符和井号的识别处理。 feat(scc_core): 添加常用宏定义 添加 scc_min 和 scc_max 宏定义提供基础数值比较功能。 feat(main): 实现编译器主程序和命令行接口 创建主程序入口实现完整的编译流程, 集成预处理器、词法分析、语法分析和IR生成模块, 添加AST和IR输出功能支持调试查看中间表示。 chore(build): 配置项目构建依赖关系 创建 cbuild.toml 配置文件定义项目包信息和依赖库, 建立编译器各组件库之间的依赖关系管理。 --- cbuild.toml | 13 ++ libs/argparse/example/main.c | 112 ++++++++++++ libs/argparse/include/argparse.h | 9 + libs/argparse/src/argparse.c | 45 ++--- libs/argparse/src/argparse_print.c | 3 + libs/argparse/src/optparse.c | 2 +- libs/lexer/include/lexer_token.h | 2 + libs/lexer/src/lexer.c | 4 +- runtime/scc_core/include/scc_core_macro.h | 3 + src/main.c | 199 ++++++++++++++++++++++ 10 files changed, 370 insertions(+), 22 deletions(-) create mode 100644 cbuild.toml create mode 100644 libs/argparse/example/main.c create mode 100644 src/main.c diff --git a/cbuild.toml b/cbuild.toml new file mode 100644 index 0000000..e6460ad --- /dev/null +++ b/cbuild.toml @@ -0,0 +1,13 @@ +[package] +name = "scc" +version = "0.1.0" + +dependencies = [ + { name = "argparse", path = "./libs/argparse" }, + { name = "pprocessor", path = "./libs/pprocessor" }, + { name = "lexer", path = "./libs/lexer" }, + { name = "parser", path = "./libs/parser" }, + { name = "ast", path = "./libs/ast" }, + { name = "ast2ir", path = "./libs/ast2ir" }, + { name = "ir", path = "./libs/ir" }, +] diff --git a/libs/argparse/example/main.c b/libs/argparse/example/main.c new file mode 100644 index 0000000..ea4571f --- /dev/null +++ b/libs/argparse/example/main.c @@ -0,0 +1,112 @@ +#include + +#ifdef _WIN32 +#define WIN32_LEAN_AND_MEAN +#include +#endif + +typedef struct { + int verbose; // -v, --verbose + const char *output; // -o, --output + const char *config; // --config + const char *input; // input argument + const char *output_dir; // output_dir argument + cbool force; // -f, --force (for subcommand) + const char *path; // path argument (for subcommand) +} parsed_args_t; + +int main(int argc, const char **argv, const char **envp) { +#ifdef _WIN32 + SetConsoleOutputCP(CP_UTF8); + SetConsoleCP(CP_UTF8); +#endif + + // 初始化解析结果结构体 + parsed_args_t parsed = {0}; + parsed.output = "a.out"; // 默认值 + parsed.output_dir = "."; // 默认值 + parsed.path = "."; // 默认值 + + // 初始化 argparse + scc_argparse_t argparse; + scc_argparse_init(&argparse, "demo", "一个命令行参数解析演示工具"); + argparse.need_debug = true; + argparse.lang = SCC_ARGPARSE_LANG_ZH; // 中文帮助 + + scc_argparse_cmd_t *root = scc_argparse_get_root(&argparse); + + // ---------- 添加选项 ---------- + // 1. --verbose / -v (计数) + scc_argparse_opt_t verbose; + scc_argparse_opt_init(&verbose, 'v', "verbose", + "增加详细程度(可多次使用)"); + scc_argparse_spec_setup_count(&verbose.spec, &parsed.verbose); + scc_argparse_cmd_add_opt(root, &verbose); + + // 2. --output / -o (字符串) + scc_argparse_opt_t output; + scc_argparse_opt_init(&output, 'o', "output", "输出文件路径"); + scc_argparse_spec_setup_string(&output.spec, &parsed.output); + scc_argparse_cmd_add_opt(root, &output); + + // 3. --config (字符串) + scc_argparse_opt_t config; + scc_argparse_opt_init(&config, 0, "config", "配置文件路径"); + scc_argparse_spec_setup_string(&config.spec, &parsed.config); + scc_argparse_cmd_add_opt(root, &config); + + // ---------- 添加位置参数 ---------- + // 必需参数: input + scc_argparse_arg_t input; + scc_argparse_arg_init(&input, "input", "输入文件"); + scc_argparse_spec_setup_string(&input.spec, &parsed.input); + scc_argparse_spec_set_required(&input.spec, true); + scc_argparse_cmd_add_arg(root, &input); + + // 可选参数: output_dir + scc_argparse_arg_t output_dir; + scc_argparse_arg_init(&output_dir, "output_dir", "输出目录"); + scc_argparse_spec_setup_string(&output_dir.spec, &parsed.output_dir); + scc_argparse_spec_set_required(&output_dir.spec, false); + scc_argparse_cmd_add_arg(root, &output_dir); + + // ---------- 子命令 init ---------- + scc_argparse_cmd_t init_cmd; + scc_argparse_cmd_init(&init_cmd, "init", "初始化仓库"); + + // 子命令选项: --force / -f + scc_argparse_opt_t force; + scc_argparse_opt_init(&force, 'f', "force", "强制覆盖"); + scc_argparse_spec_setup_bool(&force.spec, &parsed.force); + scc_argparse_cmd_add_opt(&init_cmd, &force); + + // 子命令位置参数: path (可选) + scc_argparse_arg_t path; + scc_argparse_arg_init(&path, "path", "初始化路径"); + scc_argparse_spec_setup_string(&path.spec, &parsed.path); + scc_argparse_spec_set_required(&path.spec, false); + scc_argparse_cmd_add_arg(&init_cmd, &path); + + // 将子命令加入根命令 + scc_argparse_cmd_add_subcmd(root, &init_cmd); + + // 解析参数 + int ret = scc_argparse_parse(&argparse, argc, argv); + + // 打印解析结果 + if (ret == 0) { + scc_printf("\n解析成功\n"); + scc_printf("解析到的参数和选项:\n"); + scc_printf(" verbose: %d\n", parsed.verbose); + scc_printf(" output: %s\n", parsed.output); + scc_printf(" config: %s\n", + parsed.config ? parsed.config : "(未指定)"); + scc_printf(" input: %s\n", parsed.input); + scc_printf(" output_dir: %s\n", parsed.output_dir); + scc_printf(" force: %s\n", parsed.force ? "true" : "false"); + scc_printf(" path: %s\n", parsed.path); + } + + scc_argparse_drop(&argparse); + return ret; +} \ No newline at end of file diff --git a/libs/argparse/include/argparse.h b/libs/argparse/include/argparse.h index 05641ec..37788d2 100644 --- a/libs/argparse/include/argparse.h +++ b/libs/argparse/include/argparse.h @@ -33,6 +33,7 @@ typedef enum scc_argparse_val_type { typedef enum scc_argparse_err { SCC_ARGPARSE_ERR_NONE, + SCC_ARGPARSE_ERR_PNT_DEFAULT, SCC_ARGPARSE_ERR_UNKNOWN_ERR, SCC_ARGPARSE_ERR_INVALID_ARG, SCC_ARGPARSE_ERR_INVALID_VALUE, @@ -206,6 +207,14 @@ static inline void scc_argparse_spec_setup_float(scc_argparse_spec_t *spec, spec->store.float_store = store; } +static inline void scc_argparse_spec_setup_choices(scc_argparse_spec_t *spec, + const char **values, + int count) { + spec->value_type = SCC_ARGPARSE_VAL_TYPE_ENUM; + spec->choices.values = values; + spec->choices.count = count; +} + #define SCC_ARGPARSE_MACRO_SETTER(attr) \ static inline void scc_argparse_spec_set_##attr(scc_argparse_spec_t *spec, \ cbool flag) { \ diff --git a/libs/argparse/src/argparse.c b/libs/argparse/src/argparse.c index 6c6645d..df84f36 100644 --- a/libs/argparse/src/argparse.c +++ b/libs/argparse/src/argparse.c @@ -132,29 +132,33 @@ static int handle_parse_error(scc_argparse_t *parser, return error; } -static void validate_and_cleanup(scc_argparse_context_t *ctx, - scc_argparse_t *parser) { - // 检查必需参数是否都已提供 - scc_vec_foreach(ctx->current_cmd->args, i) { - scc_argparse_arg_t *arg = &scc_vec_at(ctx->current_cmd->args, i); - if (arg->spec.flag_required && *arg->spec.store.str_store == NULL) { - scc_argparse_print_error(ctx, SCC_ARGPARSE_ERR_MISSING_ARG); - break; +static int validate_and_cleanup(scc_argparse_context_t *ctx, + scc_argparse_t *parser, int errcode) { + if (errcode == SCC_ARGPARSE_ERR_NONE) { + // 检查必需参数是否都已提供 + scc_vec_foreach(ctx->current_cmd->args, i) { + scc_argparse_arg_t *arg = &scc_vec_at(ctx->current_cmd->args, i); + if (arg->spec.flag_required && *arg->spec.store.str_store == NULL) { + errcode = SCC_ARGPARSE_ERR_MISSING_ARG; + scc_argparse_print_error(ctx, errcode); + break; + } } } // 清理资源 scc_vec_free(ctx->opts); scc_optparse_drop(&ctx->optparse); + return errcode; } -static void handle_option(scc_argparse_context_t *ctx, scc_argparse_t *parser) { +static int handle_option(scc_argparse_context_t *ctx, scc_argparse_t *parser) { scc_argparse_opt_t *opt = (scc_argparse_opt_t *)ctx->result.opt->user_data; if (parser->need_help && scc_strcmp(opt->long_name, "help") == 0) { scc_argparse_print_help(parser, ctx->current_cmd); ctx->parsing_done = true; - return; + return SCC_ARGPARSE_ERR_PNT_DEFAULT; } if (opt->spec.flag_store_as_count) { @@ -190,16 +194,17 @@ static void handle_option(scc_argparse_context_t *ctx, scc_argparse_t *parser) { // // TODO // *org_opt->spec.store.str_store = opt_res.value; // } + return SCC_ARGPARSE_ERR_NONE; } -static void handle_positional_arg(scc_argparse_context_t *ctx, - scc_argparse_t *parser) { +static int handle_positional_arg(scc_argparse_context_t *ctx, + scc_argparse_t *parser) { scc_argparse_cmd_t *subcmd = is_subcommand(ctx->current_cmd, ctx->result.value); if (subcmd != NULL) { ctx->current_cmd = subcmd; parse_cmd(&ctx->optparse, &ctx->opts, ctx->current_cmd); - return; + return SCC_ARGPARSE_ERR_NONE; } if (ctx->positional_index < scc_vec_size(ctx->current_cmd->args)) { @@ -231,11 +236,13 @@ static void handle_positional_arg(scc_argparse_context_t *ctx, // } // } // } + return SCC_ARGPARSE_ERR_NONE; } int scc_argparse_parse(scc_argparse_t *parser, int argc, const char **argv) { scc_argparse_context_t ctx = {0}; init_context(&ctx, parser, argc, argv); // 初始化上下文 + int errcode = SCC_ARGPARSE_ERR_NONE; while (!ctx.parsing_done && scc_optparse_parse(&ctx.optparse, &ctx.result)) { @@ -245,22 +252,22 @@ int scc_argparse_parse(scc_argparse_t *parser, int argc, const char **argv) { ctx.result.opt ? ctx.result.opt->long_name : "--", ctx.result.error, ctx.result.value); } + if (ctx.result.error) { - handle_parse_error(parser, &ctx); - return 1; + errcode = handle_parse_error(parser, &ctx); + break; } if (ctx.result.opt != null) { - handle_option(&ctx, parser); + errcode = handle_option(&ctx, parser); } else if (ctx.result.value != null) { - handle_positional_arg(&ctx, parser); + errcode = handle_positional_arg(&ctx, parser); } else { UNREACHABLE(); // 不应到达此处 } } - validate_and_cleanup(&ctx, parser); - return 0; + return validate_and_cleanup(&ctx, parser, errcode); } void scc_argparse_cmd_init(scc_argparse_cmd_t *cmd, const char *name, diff --git a/libs/argparse/src/argparse_print.c b/libs/argparse/src/argparse_print.c index 467b081..5e0b782 100644 --- a/libs/argparse/src/argparse_print.c +++ b/libs/argparse/src/argparse_print.c @@ -174,6 +174,9 @@ void scc_argparse_print_help(scc_argparse_t *parser, scc_argparse_cmd_t *cmd) { const char *scc_argparse_find_similar_arg(scc_argparse_cmd_t *cmd, const char *arg) { + if (arg == null || cmd == null) { + return null; + } if (arg[0] == '-' && arg[1] == '-' && arg[2] != '\0') { // opt arg scc_vec_foreach(cmd->opts, i) { diff --git a/libs/argparse/src/optparse.c b/libs/argparse/src/optparse.c index 9ad8d12..37d6a4d 100644 --- a/libs/argparse/src/optparse.c +++ b/libs/argparse/src/optparse.c @@ -43,7 +43,7 @@ scc_optparse_get_long_name(const scc_optparse_opt_t *opts, const char *name, break; } } - if (name[i] == '\0' || name[i] == end) + if (opt->long_name[i] == '\0' && (name[i] == '\0' || name[i] == end)) return opt; } return 0; diff --git a/libs/lexer/include/lexer_token.h b/libs/lexer/include/lexer_token.h index 0269e68..ca09d36 100644 --- a/libs/lexer/include/lexer_token.h +++ b/libs/lexer/include/lexer_token.h @@ -57,6 +57,8 @@ typedef enum scc_cstd { X(unknown , SCC_TOK_SUBTYPE_INVALID, SCC_TOK_UNKNOWN ) \ X(EOF , SCC_TOK_SUBTYPE_EOF, SCC_TOK_EOF ) \ X(blank , SCC_TOK_SUBTYPE_EMPTYSPACE, SCC_TOK_BLANK ) \ + X(endline , SCC_TOK_SUBTYPE_EMPTYSPACE, SCC_TOK_ENDLINE ) \ + X("#" , SCC_TOK_SUBTYPE_EMPTYSPACE, SCC_TOK_SHARP ) \ X("==" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_EQ ) \ X("=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN ) \ X("++" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ADD_ADD ) \ diff --git a/libs/lexer/src/lexer.c b/libs/lexer/src/lexer.c index f264019..4ef0771 100644 --- a/libs/lexer/src/lexer.c +++ b/libs/lexer/src/lexer.c @@ -334,11 +334,11 @@ void scc_lexer_get_token(scc_lexer_t *lexer, scc_lexer_tok_t *token) { scc_probe_stream_back(stream); scc_lex_parse_skip_endline(stream, &lexer->pos); scc_probe_stream_sync(stream); - token->type = SCC_TOK_BLANK; + token->type = SCC_TOK_ENDLINE; goto END; case '#': parse_line(lexer, token); - token->type = SCC_TOK_BLANK; + token->type = SCC_TOK_SHARP; goto END; case '\0': case scc_stream_eof: diff --git a/runtime/scc_core/include/scc_core_macro.h b/runtime/scc_core/include/scc_core_macro.h index c108fac..49102d9 100644 --- a/runtime/scc_core/include/scc_core_macro.h +++ b/runtime/scc_core/include/scc_core_macro.h @@ -8,4 +8,7 @@ #define SCC_FUNC +#define scc_min(a, b) ((a) < (b) ? (a) : (b)) +#define scc_max(a, b) ((a) > (b) ? (a) : (b)) + #endif // __SCC_CORE_MACRO_H__ diff --git a/src/main.c b/src/main.c new file mode 100644 index 0000000..e972f47 --- /dev/null +++ b/src/main.c @@ -0,0 +1,199 @@ +#include +#include +#include +#include + +#include +#include +#include + +#include + +static scc_probe_stream_t *from_file_stream(FILE *fp) { + if (fseek(fp, 0, SEEK_END) != 0) { + perror("fseek failed"); + return NULL; + } + usize fsize = ftell(fp); + if (fseek(fp, 0, SEEK_SET)) { + perror("fseek failed"); + return NULL; + } + + char *buffer = (char *)scc_malloc(fsize); + scc_memset(buffer, 0, fsize); + usize read_ret = fread(buffer, 1, fsize, fp); + fclose(fp); + + scc_probe_stream_t *stream = + scc_mem_probe_stream_alloc(buffer, read_ret, true); + return stream; +} + +typedef struct { + const char *input_file; + const char *output_file; + int verbose; + cbool dump_ast; + cbool dump_ir; +} scc_config_t; + +static void setup_argparse(scc_argparse_t *argparse, scc_config_t *config, + scc_argparse_lang_t lang) { + enum { + SCC_HINT_PROG_NAME, + SCC_HINT_DESCRIPTION, + SCC_HINT_OUTPUT_FILE, + SCC_HINT_INPUT_FILE, + SCC_HINT_VERBOSE, + SCC_HINT_EMIT_AST, + SCC_HINT_EMIT_IR, + }; + + static const char *scc_hints_en[] = { + [SCC_HINT_PROG_NAME] = "scc", + [SCC_HINT_DESCRIPTION] = "A simple C compiler", + [SCC_HINT_OUTPUT_FILE] = "Output file", + [SCC_HINT_INPUT_FILE] = "Input source file", + [SCC_HINT_VERBOSE] = "Increase verbosity (can be used multiple times)", + [SCC_HINT_EMIT_AST] = "Generate AST and exit", + [SCC_HINT_EMIT_IR] = "Generate IR and exit", + }; + static const char *scc_hints_zh[] = { + [SCC_HINT_PROG_NAME] = "scc", + [SCC_HINT_DESCRIPTION] = "一个简单的C编译器", + [SCC_HINT_OUTPUT_FILE] = "输出文件", + [SCC_HINT_INPUT_FILE] = "输入源文件", + [SCC_HINT_VERBOSE] = "增加详细输出(可多次使用)", + [SCC_HINT_EMIT_AST] = "生成 AST 并退出", + [SCC_HINT_EMIT_IR] = "生成 IR 并退出", + }; + + const char **scc_hints; + switch (lang) { + case SCC_ARGPARSE_LANG_EN: + scc_hints = scc_hints_en; + break; + case SCC_ARGPARSE_LANG_ZH: + scc_hints = scc_hints_zh; + break; + default: + scc_hints = scc_hints_en; + break; + } + + scc_argparse_init(argparse, scc_hints[SCC_HINT_PROG_NAME], + scc_hints[SCC_HINT_DESCRIPTION]); + argparse->lang = lang; + scc_argparse_cmd_t *root = scc_argparse_get_root(argparse); + + // -o, --output + scc_argparse_opt_t opt_output; + scc_argparse_opt_init(&opt_output, 'o', "output", + scc_hints[SCC_HINT_OUTPUT_FILE]); + scc_argparse_spec_setup_string(&opt_output.spec, &(config->output_file)); + scc_argparse_cmd_add_opt(root, &opt_output); + + // input file (必需) + scc_argparse_arg_t arg_input; + scc_argparse_arg_init(&arg_input, "input", scc_hints[SCC_HINT_INPUT_FILE]); + scc_argparse_spec_setup_string(&arg_input.spec, &(config->input_file)); + scc_argparse_spec_set_required(&arg_input.spec, true); + scc_argparse_cmd_add_arg(root, &arg_input); + + // -v, --verbose (计数) + scc_argparse_opt_t opt_verbose; + scc_argparse_opt_init(&opt_verbose, 'v', "verbose", + scc_hints[SCC_HINT_VERBOSE]); + scc_argparse_spec_setup_count(&opt_verbose.spec, &(config->verbose)); + scc_argparse_cmd_add_opt(root, &opt_verbose); + + // -T, --ast + scc_argparse_opt_t opt_ast; + scc_argparse_opt_init(&opt_ast, 'T', "emit-ast", + scc_hints[SCC_HINT_EMIT_AST]); + scc_argparse_spec_setup_bool(&opt_ast.spec, &(config->dump_ast)); + scc_argparse_cmd_add_opt(root, &opt_ast); + + // -R, --ir + scc_argparse_opt_t opt_ir; + scc_argparse_opt_init(&opt_ir, 'R', "emit-ir", scc_hints[SCC_HINT_EMIT_IR]); + scc_argparse_spec_setup_bool(&opt_ir.spec, &(config->dump_ir)); + scc_argparse_cmd_add_opt(root, &opt_ir); +} + +#ifdef _WIN32 +#define WIN32_LEAN_AND_MEAN +#include +#endif + +int main(int argc, const char **argv, const char **envp) { +#ifdef _WIN32 + SetConsoleOutputCP(CP_UTF8); + SetConsoleCP(CP_UTF8); +#endif + scc_config_t config = { + .input_file = NULL, + .output_file = "a.exe", + .verbose = 0, + .dump_ast = false, + .dump_ir = false, + }; + scc_argparse_t argparse; + setup_argparse(&argparse, &config, SCC_ARGPARSE_LANG_ZH); + int ret = scc_argparse_parse(&argparse, argc, argv); + if (ret != 0) { + scc_argparse_drop(&argparse); + return ret; + } + scc_argparse_drop(&argparse); + + setbuf(stdout, NULL); + FILE *fp = fopen(config.input_file, "r"); + if (!fp) { + perror("fopen"); + scc_argparse_drop(&argparse); + return 1; + } + + scc_pproc_t pproc; + scc_probe_stream_t *source_code_stream = from_file_stream(fp); + // scc_probe_stream_t *pprocessed_code_stream = + // scc_pproc_init(&pproc, source_code_stream); + + scc_lexer_t lexer; + scc_lexer_init(&lexer, source_code_stream); + scc_lexer_stream_t lexer_stream; + scc_lexer_to_stream(&lexer, &lexer_stream, false); + + scc_parser_t parser; + scc_parser_init(&parser, &lexer_stream, null); + scc_ast_translation_unit_t *translation_unit = + scc_parse_translation_unit(&parser); + + if (config.dump_ast) { + scc_tree_dump_ctx_t tree_dump; + scc_tree_dump_ctx_init(&tree_dump, true); + scc_ast_dump_node(&tree_dump, (scc_ast_node_t *)translation_unit); + scc_tree_dump_ctx_drop(&tree_dump); + return 0; + } + + scc_ir_builder_t ir_builder; + scc_ast2ir(translation_unit, &ir_builder); + + if (config.dump_ir) { + scc_ir_dump_ctx_t ir_dump_ctx; + scc_tree_dump_ctx_t tree_dump; // 仅为 ir dump 辅助 + scc_tree_dump_ctx_init(&tree_dump, true); + scc_ir_dump_ctx_init(&ir_dump_ctx, &tree_dump, &ir_builder.cprog, + &ir_builder.ctx); + // scc_ir_dump_cprog(&ir_dump_ctx); + scc_ir_dump_cprog_linear(&ir_dump_ctx); + scc_tree_dump_ctx_drop(&tree_dump); + return 0; + } + + scc_printf("output exe at %s", config.output_file); + return 0; +}