- 在cbuild.toml中启用parser和ast依赖项 - 将AST内置类型枚举重命名为SCC_AST_BUILTIN_TYPE_*前缀格式 - 修复ast_def.h中的类型字段命名,将builtin改为type - 添加逗号操作符支持到表达式操作符枚举中 - 更新字面量表达式的lexeme字段为const char*指针和owned标志 - 重构解析器头文件结构,分离为parser.h、parser_utils.h、scc_sema.h等 - 实现新的解析器工具函数,包括预览、消费、回溯等功能 - 更新声明解析逻辑,使用新的解析器接口进行token处理 - 添加符号表语义分析功能框架 - 修复词法分析器中token移动时的空指针检查 - 统一使用scc_tree_dump_printf替代直接的scc_printf调用
306 lines
10 KiB
C
306 lines
10 KiB
C
#include <argparse.h>
|
|
#include <scc_lexer.h>
|
|
#include <scc_pproc.h>
|
|
|
|
#include <ast_dump.h>
|
|
#include <scc_parser.h>
|
|
// #include <ir_dump.h>
|
|
// #include <scc_ast2ir.h>
|
|
|
|
typedef struct {
|
|
const char *input_file;
|
|
const char *output_file;
|
|
int verbose;
|
|
scc_argparse_list_t include_paths;
|
|
cbool emit_lex;
|
|
cbool emit_pp;
|
|
cbool emit_ast;
|
|
cbool emit_ir;
|
|
} scc_config_t;
|
|
|
|
static void setup_argparse(scc_argparse_t *argparse, scc_config_t *config,
|
|
scc_argparse_lang_t lang) {
|
|
enum {
|
|
SCC_HINT_PROG_NAME,
|
|
SCC_HINT_DESCRIPTION,
|
|
SCC_HINT_OUTPUT_FILE,
|
|
SCC_HINT_INPUT_FILE,
|
|
SCC_HINT_INCLUDE_PATH,
|
|
SCC_HINT_VERBOSE,
|
|
|
|
SCC_HINT_EMIT_LEX,
|
|
SCC_HINT_EMIT_PP,
|
|
SCC_HINT_EMIT_AST,
|
|
SCC_HINT_EMIT_IR,
|
|
};
|
|
static const char *scc_hints_en[] = {
|
|
[SCC_HINT_PROG_NAME] = "scc",
|
|
[SCC_HINT_DESCRIPTION] = "A simple C compiler",
|
|
[SCC_HINT_OUTPUT_FILE] =
|
|
"Output file (`-` means standard output stream file)",
|
|
[SCC_HINT_INPUT_FILE] = "Input source file",
|
|
[SCC_HINT_INCLUDE_PATH] = "Add directory to the include search paths",
|
|
[SCC_HINT_VERBOSE] = "Increase verbosity (can be used multiple times)",
|
|
[SCC_HINT_EMIT_LEX] = "Generate lexer sources tokens and exit",
|
|
[SCC_HINT_EMIT_PP] = "Generate preprocessed tokens and exit",
|
|
[SCC_HINT_EMIT_AST] = "Generate AST and exit",
|
|
[SCC_HINT_EMIT_IR] = "Generate IR and exit",
|
|
};
|
|
static const char *scc_hints_zh[] = {
|
|
[SCC_HINT_PROG_NAME] = "scc",
|
|
[SCC_HINT_DESCRIPTION] = "一个简单的C编译器",
|
|
[SCC_HINT_OUTPUT_FILE] = "输出文件(`-`表示标准输出流文件)",
|
|
[SCC_HINT_INPUT_FILE] = "输入源文件",
|
|
[SCC_HINT_INCLUDE_PATH] = "添加系统头文件到搜索路径",
|
|
[SCC_HINT_VERBOSE] = "增加详细输出(可多次使用)",
|
|
[SCC_HINT_EMIT_LEX] = "生成`源代码的词法单元`并退出",
|
|
[SCC_HINT_EMIT_PP] = "生成`预处理后的词法单元`并退出",
|
|
[SCC_HINT_EMIT_AST] = "生成`抽象语法树`并退出",
|
|
[SCC_HINT_EMIT_IR] = "生成`中间代码`并退出",
|
|
};
|
|
|
|
const char **scc_hints;
|
|
switch (lang) {
|
|
case SCC_ARGPARSE_LANG_EN:
|
|
scc_hints = scc_hints_en;
|
|
break;
|
|
case SCC_ARGPARSE_LANG_ZH:
|
|
scc_hints = scc_hints_zh;
|
|
break;
|
|
default:
|
|
scc_hints = scc_hints_en;
|
|
break;
|
|
}
|
|
|
|
scc_argparse_init(argparse, scc_hints[SCC_HINT_PROG_NAME],
|
|
scc_hints[SCC_HINT_DESCRIPTION]);
|
|
argparse->lang = lang;
|
|
scc_argparse_cmd_t *root = scc_argparse_get_root(argparse);
|
|
|
|
// -o, --output
|
|
scc_argparse_opt_t opt_output;
|
|
scc_argparse_opt_init(&opt_output, 'o', "output",
|
|
scc_hints[SCC_HINT_OUTPUT_FILE]);
|
|
scc_argparse_spec_setup_string(&opt_output.spec, &(config->output_file));
|
|
scc_argparse_cmd_add_opt(root, &opt_output);
|
|
|
|
// input file (必需)
|
|
scc_argparse_arg_t arg_input;
|
|
scc_argparse_arg_init(&arg_input, "input", scc_hints[SCC_HINT_INPUT_FILE]);
|
|
scc_argparse_spec_setup_string(&arg_input.spec, &(config->input_file));
|
|
scc_argparse_spec_set_required(&arg_input.spec, true);
|
|
scc_argparse_cmd_add_arg(root, &arg_input);
|
|
|
|
// -I, --include (添加额外的系统头文件搜索路径)
|
|
scc_argparse_opt_t opt_include;
|
|
scc_argparse_opt_init(&opt_include, 'I', "include",
|
|
scc_hints[SCC_HINT_INCLUDE_PATH]);
|
|
scc_argparse_spec_setup_list(&opt_include.spec, &(config->include_paths));
|
|
scc_argparse_cmd_add_opt(root, &opt_include);
|
|
|
|
// -v, --verbose (计数)
|
|
scc_argparse_opt_t opt_verbose;
|
|
scc_argparse_opt_init(&opt_verbose, 'V', "verbose",
|
|
scc_hints[SCC_HINT_VERBOSE]);
|
|
scc_argparse_spec_setup_count(&opt_verbose.spec, &(config->verbose));
|
|
scc_argparse_cmd_add_opt(root, &opt_verbose);
|
|
|
|
// --emit-lex
|
|
scc_argparse_opt_t opt_lex;
|
|
scc_argparse_opt_init(&opt_lex, 0, "emit-lex",
|
|
scc_hints[SCC_HINT_EMIT_LEX]);
|
|
scc_argparse_spec_setup_bool(&opt_lex.spec, &(config->emit_lex));
|
|
scc_argparse_cmd_add_opt(root, &opt_lex);
|
|
|
|
// --emit-pp
|
|
scc_argparse_opt_t opt_pp;
|
|
scc_argparse_opt_init(&opt_pp, 0, "emit-pp", scc_hints[SCC_HINT_EMIT_PP]);
|
|
scc_argparse_spec_setup_bool(&opt_pp.spec, &(config->emit_pp));
|
|
scc_argparse_cmd_add_opt(root, &opt_pp);
|
|
|
|
// -T, --emit-ast
|
|
scc_argparse_opt_t opt_ast;
|
|
scc_argparse_opt_init(&opt_ast, 'T', "emit-ast",
|
|
scc_hints[SCC_HINT_EMIT_AST]);
|
|
scc_argparse_spec_setup_bool(&opt_ast.spec, &(config->emit_ast));
|
|
scc_argparse_cmd_add_opt(root, &opt_ast);
|
|
|
|
// -R, --emit-ir
|
|
scc_argparse_opt_t opt_ir;
|
|
scc_argparse_opt_init(&opt_ir, 'R', "emit-ir", scc_hints[SCC_HINT_EMIT_IR]);
|
|
scc_argparse_spec_setup_bool(&opt_ir.spec, &(config->emit_ir));
|
|
scc_argparse_cmd_add_opt(root, &opt_ir);
|
|
}
|
|
|
|
#ifdef _WIN32
|
|
#define WIN32_LEAN_AND_MEAN
|
|
#include <windows.h>
|
|
#endif
|
|
|
|
static void print_ring(scc_lexer_tok_ring_t *ring, int verbose) {
|
|
scc_lexer_tok_t tok = {0};
|
|
int ret = 0;
|
|
while (1) {
|
|
scc_ring_next_consume(*ring, tok, ret);
|
|
if (ret == false || tok.type == SCC_TOK_EOF) {
|
|
break;
|
|
}
|
|
if (verbose == 0) {
|
|
scc_printf("%s ", scc_get_tok_name(tok.type));
|
|
} else if (verbose >= 1) {
|
|
scc_printf(
|
|
"token [%-8s] `%s` at %s:%d:%d\n", scc_get_tok_name(tok.type),
|
|
tok.type != SCC_TOK_ENDLINE ? scc_cstring_as_cstr(&tok.lexeme)
|
|
: "\\n",
|
|
tok.loc.name, tok.loc.line, tok.loc.col);
|
|
}
|
|
scc_lexer_tok_drop(&tok);
|
|
}
|
|
}
|
|
|
|
static void print_file(scc_lexer_tok_ring_t *ring, const char *file_name) {
|
|
scc_lexer_tok_t tok = {0};
|
|
int ret = 0;
|
|
scc_file_t fp = null;
|
|
cbool is_stdout = scc_strcmp(file_name, "-") == 0;
|
|
if (!is_stdout) {
|
|
fp = scc_fopen(file_name, SCC_FILE_WRITE);
|
|
if (fp == null) {
|
|
LOG_FATAL("Failed to open file %s", file_name);
|
|
return;
|
|
}
|
|
}
|
|
while (1) {
|
|
scc_ring_next_consume(*ring, tok, ret);
|
|
if (ret == false || tok.type == SCC_TOK_EOF) {
|
|
break;
|
|
}
|
|
if (is_stdout) {
|
|
scc_printf("%s", scc_cstring_as_cstr(&tok.lexeme));
|
|
} else {
|
|
usize ret = scc_fwrite(fp, scc_cstring_as_cstr(&tok.lexeme),
|
|
scc_cstring_len(&tok.lexeme));
|
|
if (ret != scc_cstring_len(&tok.lexeme)) {
|
|
LOG_FATAL("Failed to write to file %s", file_name);
|
|
}
|
|
}
|
|
scc_lexer_tok_drop(&tok);
|
|
}
|
|
scc_fclose(fp);
|
|
}
|
|
|
|
int main(int argc, const char **argv, const char **envp) {
|
|
#ifdef _WIN32
|
|
SetConsoleOutputCP(CP_UTF8);
|
|
SetConsoleCP(CP_UTF8);
|
|
#endif
|
|
|
|
#ifndef SCC_DEFAULT_ARGPARSE_LANG
|
|
#define SCC_DEFAULT_ARGPARSE_LANG SCC_ARGPARSE_LANG_ZH
|
|
#endif
|
|
|
|
scc_argparse_lang_t argparse_lang = SCC_DEFAULT_ARGPARSE_LANG;
|
|
for (const char **env = envp; *env != null; env++) {
|
|
const char *env_str = *env;
|
|
if (scc_strcmp(env_str, "LANG=zh_CN.UTF-8") == 0) {
|
|
argparse_lang = SCC_ARGPARSE_LANG_ZH;
|
|
}
|
|
}
|
|
|
|
scc_config_t config = {
|
|
.input_file = null,
|
|
.verbose = 0,
|
|
.output_file = null,
|
|
.emit_ast = false,
|
|
.emit_ir = false,
|
|
};
|
|
scc_vec_init(config.include_paths);
|
|
|
|
scc_argparse_t argparse;
|
|
setup_argparse(&argparse, &config, argparse_lang);
|
|
int ret = scc_argparse_parse(&argparse, argc, argv);
|
|
if (ret != 0) {
|
|
scc_argparse_drop(&argparse);
|
|
return 0;
|
|
}
|
|
scc_argparse_drop(&argparse);
|
|
|
|
scc_sstream_t sstream;
|
|
if (scc_sstream_init(&sstream, config.input_file, 1024)) {
|
|
return 0;
|
|
}
|
|
|
|
scc_lexer_t lexer;
|
|
scc_lexer_init(&lexer, scc_sstream_to_ring(&sstream));
|
|
if (config.emit_lex) {
|
|
scc_lexer_tok_ring_t *tok_ring = scc_lexer_to_ring(
|
|
&lexer, 8, config.output_file == null ? false : true);
|
|
if (config.output_file == null) {
|
|
print_ring(tok_ring, config.verbose);
|
|
} else {
|
|
print_file(tok_ring, config.output_file);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
scc_pproc_t pproc;
|
|
scc_pproc_init(&pproc, scc_lexer_to_ring(&lexer, 8, true));
|
|
scc_vec_foreach(config.include_paths, i) {
|
|
scc_pproc_add_include_path_cstr(&pproc,
|
|
scc_vec_at(config.include_paths, i));
|
|
}
|
|
scc_lexer_tok_vec_t pproc_tok_vec;
|
|
scc_vec_init(pproc_tok_vec);
|
|
scc_cstring_t pproc_macro_name = scc_cstring_from_cstr("__SCC__");
|
|
scc_pproc_add_object_macro(&(pproc.macro_table), &pproc_macro_name,
|
|
&pproc_tok_vec);
|
|
if (config.emit_pp) {
|
|
scc_lexer_tok_ring_t *tok_ring =
|
|
scc_pproc_to_ring(&pproc, 8, true, true);
|
|
if (config.output_file == null) {
|
|
print_ring(tok_ring, config.verbose);
|
|
} else {
|
|
print_file(tok_ring, config.output_file);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
scc_lexer_tok_ring_t *tok_ring = scc_pproc_to_ring(&pproc, 8, false, false);
|
|
scc_parser_t parser;
|
|
scc_parser_init(&parser, tok_ring, null);
|
|
scc_ast_translation_unit_t *translation_unit =
|
|
scc_parse_translation_unit(&parser);
|
|
|
|
scc_parser_drop(&parser);
|
|
scc_pproc_drop(&pproc);
|
|
scc_lexer_drop(&lexer);
|
|
scc_sstream_drop(&sstream);
|
|
|
|
if (config.emit_ast) {
|
|
scc_tree_dump_ctx_t tree_dump;
|
|
scc_tree_dump_ctx_init(&tree_dump, true, (void *)scc_fprintf,
|
|
(void *)scc_stdout);
|
|
scc_ast_dump_node(&tree_dump, (scc_ast_node_t *)translation_unit);
|
|
scc_tree_dump_ctx_drop(&tree_dump);
|
|
return 0;
|
|
}
|
|
|
|
// scc_ir_builder_t ir_builder;
|
|
// scc_ast2ir(translation_unit, &ir_builder);
|
|
|
|
// if (config.emit_ir) {
|
|
// scc_ir_dump_ctx_t ir_dump_ctx;
|
|
// scc_tree_dump_ctx_t tree_dump; // 仅为 ir dump 辅助
|
|
// scc_tree_dump_ctx_init(&tree_dump, true);
|
|
// scc_ir_dump_ctx_init(&ir_dump_ctx, &tree_dump, &ir_builder.cprog,
|
|
// &ir_builder.ctx);
|
|
// // scc_ir_dump_cprog(&ir_dump_ctx);
|
|
// scc_ir_dump_cprog_linear(&ir_dump_ctx);
|
|
// scc_tree_dump_ctx_drop(&tree_dump);
|
|
// return 0;
|
|
// }
|
|
|
|
scc_printf("output exe at %s", config.output_file);
|
|
return 0;
|
|
}
|