diff --git a/cbuild.toml b/cbuild.toml index e6460ad..6bbad87 100644 --- a/cbuild.toml +++ b/cbuild.toml @@ -4,10 +4,10 @@ version = "0.1.0" dependencies = [ { name = "argparse", path = "./libs/argparse" }, - { name = "pprocessor", path = "./libs/pprocessor" }, { name = "lexer", path = "./libs/lexer" }, - { name = "parser", path = "./libs/parser" }, - { name = "ast", path = "./libs/ast" }, - { name = "ast2ir", path = "./libs/ast2ir" }, - { name = "ir", path = "./libs/ir" }, + { name = "pproc", path = "./libs/pproc" }, + # { name = "parser", path = "./libs/parser" }, + # { name = "ast", path = "./libs/ast" }, + # { name = "ast2ir", path = "./libs/ast2ir" }, + # { name = "ir", path = "./libs/ir" }, ] diff --git a/libs/lexer/include/lexer_token.h b/libs/lexer/include/lexer_token.h index 92cbd0d..d646574 100644 --- a/libs/lexer/include/lexer_token.h +++ b/libs/lexer/include/lexer_token.h @@ -140,7 +140,7 @@ typedef enum scc_cstd { // 定义TokenType枚举 typedef enum scc_tok_type { - +// must first becase the unknown token must be 0 #define X(str, subtype, tok) tok, SCC_CTOK_TABLE #undef X diff --git a/libs/lexer/include/scc_lexer_utils.h b/libs/lexer/include/scc_lexer_utils.h new file mode 100644 index 0000000..21a4bbc --- /dev/null +++ b/libs/lexer/include/scc_lexer_utils.h @@ -0,0 +1,39 @@ +#ifndef __SCC_LEXER_UTILS_H__ +#define __SCC_LEXER_UTILS_H__ + +#include "scc_lexer.h" + +static inline cbool scc_lexer_peek_non_blank(scc_lexer_tok_ring_t *stream, + scc_lexer_tok_t *out) { + cbool ok; + while (1) { + scc_ring_peek(*stream, *out, ok); + if (!ok || out->type != SCC_TOK_BLANK) + break; + scc_ring_next_consume(*stream, *out, ok); + scc_lexer_tok_drop(out); + } + return ok; +} + +static inline cbool scc_lexer_next_non_blank(scc_lexer_tok_ring_t *stream, + scc_lexer_tok_t *out) { + cbool ok; + if (!scc_lexer_peek_non_blank(stream, out)) + return false; + scc_ring_next_consume(*stream, *out, ok); + return true; +} + +static inline void scc_lexer_skip_until_newline(scc_lexer_tok_ring_t *stream) { + scc_lexer_tok_t tok; + cbool ok; + while (scc_lexer_peek_non_blank(stream, &tok)) { + if (tok.type == SCC_TOK_ENDLINE) + break; + scc_ring_next_consume(*stream, tok, ok); + scc_lexer_tok_drop(&tok); + } +} + +#endif /* __SCC_LEXER_UTILS_H__ */ diff --git a/libs/pproc/cbuild.toml b/libs/pproc/cbuild.toml new file mode 100644 index 0000000..395a9ca --- /dev/null +++ b/libs/pproc/cbuild.toml @@ -0,0 +1,7 @@ +[package] +name = "scc_pprocesser" + +dependencies = [ + { name = "scc_utils", path = "../../runtime/scc_utils" }, + { name = "lexer", path = "../lexer" }, +] diff --git a/libs/pproc/include/pproc_macro.h b/libs/pproc/include/pproc_macro.h new file mode 100644 index 0000000..fe22d6d --- /dev/null +++ b/libs/pproc/include/pproc_macro.h @@ -0,0 +1,97 @@ +#ifndef __SCC_PP_MACRO_H__ +#define __SCC_PP_MACRO_H__ + +#include +#include +#include + +// 宏定义类型 +typedef enum { + SCC_PP_MACRO_NONE, // 不是宏 + SCC_PP_MACRO_OBJECT, // 对象宏 + SCC_PP_MACRO_FUNCTION, // 函数宏 +} scc_pp_macro_type_t; + +typedef scc_lexer_tok_vec_t scc_pproc_macro_list_t; + +// 宏定义结构 +typedef struct scc_macro { + scc_cstring_t name; // 宏名称 + scc_pp_macro_type_t type; // 宏类型 + scc_lexer_tok_vec_t replaces; // 替换列表 + scc_pproc_macro_list_t params; // 参数列表(仅函数宏) +} scc_pp_macro_t; + +typedef struct scc_macro_table { + scc_hashtable_t table; // 宏定义表 +} scc_pp_macro_table_t; + +/** + * @brief 创建宏对象 + * @param name 宏名称 + * @param type 宏类型 + * @return 创建的宏对象指针,失败返回NULL + */ +scc_pp_macro_t *scc_pp_macro_new(const scc_cstring_t *name, + scc_pp_macro_type_t type); + +/** + * @brief 销毁宏对象 + * @param macro 要销毁的宏对象 + */ +void scc_pp_macro_drop(scc_pp_macro_t *macro); + +/** + * @brief 添加对象宏 + * @param pp 预处理器实例 + * @param name 宏名称 + * @param replacement 替换文本列表 + * @return 成功返回true,失败返回false + */ +cbool scc_pp_add_object_macro(scc_pp_macro_table_t *pp, + const scc_cstring_t *name, + const scc_pproc_macro_list_t *replacement); + +/** + * @brief 添加函数宏 + * @param pp 预处理器实例 + * @param name 宏名称 + * @param params 参数列表 + * @param replacement 替换文本列表 + * @return 成功返回true,失败返回false + */ +cbool scc_pp_add_function_macro(scc_pp_macro_table_t *pp, + const scc_cstring_t *name, + const scc_pproc_macro_list_t *params, + const scc_pproc_macro_list_t *replacement); +/** + * @brief + * + * @param pp + * @param macro + * @return scc_pp_macro_t* + */ +scc_pp_macro_t *scc_pp_macro_table_set(scc_pp_macro_table_t *pp, + scc_pp_macro_t *macro); + +/** + * @brief 查找宏定义 + * @param pp 预处理器实例 + * @param name 宏名称 + * @return 找到的宏对象指针,未找到返回NULL + */ +scc_pp_macro_t *scc_pp_macro_table_get(scc_pp_macro_table_t *pp, + const scc_cstring_t *name); + +/** + * @brief 从预处理器中删除宏 + * @param pp 预处理器实例 + * @param name 宏名称 + * @return 成功删除返回true,未找到返回false + */ +cbool scc_pp_macro_table_remove(scc_pp_macro_table_t *pp, + const scc_cstring_t *name); + +void scc_pp_marco_table_init(scc_pp_macro_table_t *macros); +void scc_pp_macro_table_drop(scc_pp_macro_table_t *macros); +#endif /* __SCC_PP_MACRO_H__ */ diff --git a/libs/pproc/include/scc_pproc.h b/libs/pproc/include/scc_pproc.h new file mode 100644 index 0000000..255d1b4 --- /dev/null +++ b/libs/pproc/include/scc_pproc.h @@ -0,0 +1,60 @@ +/** + * @file pprocessor.h + * @brief C语言预处理器核心数据结构与接口 + */ + +#ifndef __SCC_PPROC_H__ +#define __SCC_PPROC_H__ + +#include "pproc_macro.h" +#include +#include +#include + +// 预处理器状态结构 + +// 条件编译状态栈 +typedef struct { + int active; // 当前层级是否有效(即应该输出 token) + int skip; // 当前层级是否跳过(即不输出 token) + // 可根据需要增加状态,如 #if 的结果、#elif 已执行等 +} scc_pproc_if_state_t; +typedef SCC_VEC(scc_pproc_if_state_t) scc_pproc_if_stack_t; + +// 文件包含栈 +typedef struct { + scc_lexer_t *lexer; // 当前文件的 lexer + scc_lexer_tok_ring_t *tok_ring; // 当前文件的 token 环(由 lexer 提供) + // 可能还需要保存当前位置等 +} scc_pproc_file_state_t; +typedef SCC_VEC(scc_pproc_file_state_t) scc_pproc_file_stack_t; + +typedef SCC_VEC(scc_lexer_tok_ring_t *) scc_pproc_ring_vec_t; +typedef struct scc_pproc { + scc_lexer_tok_ring_t *cur_ring; + scc_strpool_t strpool; + + scc_pp_macro_table_t macro_table; + scc_pproc_if_stack_t if_stack; + scc_pproc_file_stack_t file_stack; + + scc_lexer_tok_vec_t cache; + int cache_pos; + scc_lexer_tok_ring_t ring; + int ring_ref_count; +} scc_pproc_t; + +void scc_pproc_init(scc_pproc_t *pp, scc_lexer_tok_ring_t *input); +scc_lexer_tok_ring_t *scc_pproc_to_ring(scc_pproc_t *pp, int ring_size); +void scc_pproc_drop(scc_pproc_t *pp); + +void scc_pproc_handle_directive(scc_pproc_t *pp); +void scc_pproc_expand_macro(scc_pproc_t *pp, const scc_pp_macro_t *macro); + +void scc_pproc_parse_macro_arguments(scc_pproc_t *pp, + scc_pproc_macro_list_t *args); +void scc_pproc_parse_function_macro(scc_pproc_t *pp, + const scc_lexer_tok_t *ident); +void scc_pproc_parse_object_macro(scc_pproc_t *pp, + const scc_lexer_tok_t *ident); +#endif /* __SCC_PPROC_H__ */ diff --git a/libs/pproc/src/pproc_directive.c b/libs/pproc/src/pproc_directive.c new file mode 100644 index 0000000..1942960 --- /dev/null +++ b/libs/pproc/src/pproc_directive.c @@ -0,0 +1,279 @@ +#include +#include +static const struct { + const char *name; + scc_tok_type_t tok_type; +} keywords[] = { +#define X(name, type, tok) {#name, tok}, + SCC_PPKEYWORD_TABLE +#undef X +}; + +// 使用二分查找查找关键字 +static inline int keyword_cmp(const char *name, int len) { + int low = 0; + int high = sizeof(keywords) / sizeof(keywords[0]) - 1; + while (low <= high) { + int mid = (low + high) / 2; + const char *key = keywords[mid].name; + int cmp = 0; + + // 自定义字符串比较逻辑 + for (int i = 0; i < len; i++) { + if (name[i] != key[i]) { + cmp = (unsigned char)name[i] - (unsigned char)key[i]; + break; + } + if (name[i] == '\0') + break; // 遇到终止符提前结束 + } + + if (cmp == 0) { + // 完全匹配检查(长度相同) + if (key[len] == '\0') + return mid; + cmp = -1; // 当前关键词比输入长 + } + + if (cmp < 0) { + high = mid - 1; + } else { + low = mid + 1; + } + } + return -1; // Not a keyword. +} + +void scc_pproc_parse_macro_arguments(scc_pproc_t *pp, + scc_pproc_macro_list_t *args) { + Assert(pp != null && args != null); + scc_lexer_tok_t tok = {0}; + scc_vec_init(*args); + int depth = 0; + do { + scc_lexer_next_non_blank(pp->cur_ring, &tok); + if (tok.type == SCC_TOK_L_PAREN) { + depth++; + } else if (tok.type == SCC_TOK_R_PAREN) { + depth--; + } + if (depth > 1) { + scc_vec_push(*args, tok); + } else { + scc_lexer_tok_drop(&tok); + } + } while (depth); +} + +static inline void fill_replacements(scc_pproc_t *pp, scc_pp_macro_t *macro) { + int ok; + scc_lexer_tok_t tok; + ok = scc_lexer_next_non_blank(pp->cur_ring, &tok); + if (!ok || tok.type == SCC_TOK_EOF || tok.type == SCC_TOK_ENDLINE) { + return; + } else { + scc_vec_push(macro->replaces, tok); + } + while (1) { + scc_ring_next_consume(*pp->cur_ring, tok, ok); + if (!ok) + break; + if (tok.type == SCC_TOK_EOF || tok.type == SCC_TOK_ENDLINE) { + scc_lexer_tok_drop(&tok); + break; + } + scc_vec_push(macro->replaces, tok); + } +} + +void scc_pproc_parse_function_macro(scc_pproc_t *pp, + const scc_lexer_tok_t *ident) { + scc_pproc_macro_list_t args; + scc_pproc_parse_macro_arguments(pp, &args); + scc_pp_macro_t *macro = + scc_pp_macro_new(&ident->lexeme, SCC_PP_MACRO_FUNCTION); + /* + check and set params + 1. identifier-list(opt) + 2. ... + 3. identifier-list , ... + */ + scc_vec_foreach(args, i) { + scc_lexer_tok_t *arg = &scc_vec_at(args, i); + if (arg->type == SCC_TOK_COMMA) { + scc_lexer_tok_drop(arg); + if (i % 2 != 1) { + LOG_FATAL("ERROR"); + } + } else if (arg->type == SCC_TOK_IDENT) { + if (i % 2 != 0) { + LOG_FATAL("ERROR"); + } + scc_vec_push(macro->params, *arg); + } else if (arg->type == SCC_TOK_ELLIPSIS) { + if (i % 2 != 0) { + LOG_FATAL("ERROR"); + } + scc_cstring_t va_args = scc_cstring_from_cstr("__VA_ARGS__"); + scc_cstring_free(&arg->lexeme); + arg->lexeme = va_args; + scc_vec_push(macro->params, *arg); + } else { + LOG_FATAL("ERROR"); + } + } + fill_replacements(pp, macro); + scc_pp_macro_table_set(&pp->macro_table, macro); +} + +void scc_pproc_parse_object_macro(scc_pproc_t *pp, + const scc_lexer_tok_t *ident) { + scc_pp_macro_t *macro = + scc_pp_macro_new(&ident->lexeme, SCC_PP_MACRO_OBJECT); + fill_replacements(pp, macro); + scc_pp_macro_table_set(&pp->macro_table, macro); +} + +/* +```txt +6.10 Preprocessing directives + preprocessing-file: + group(opt) + group: + group-part + group group-part + group-part: + if-section + control-line + text-line + # non-directive + if-section: + if-group elif-groups(opt) else-group(opt) endif-line + if-group: + # if constant-expression new-line group(opt) + # ifdef identifier new-line group(opt) + # ifndef identifier new-line group(opt) + elif-groups: + elif-group + elif-groups elif-group + elif-group: + #elif constant-expression new-line group(opt) + else-group: + # else new-line group(opt) + endif-line: + # endif new-line + control-line: + # include pp-tokens new-line + # define identifier replacement-list new-line + # define identifier lparen identifier-list(opt) ) + replacement-list new-line + # define identifier lparen ... ) replacement-list new-line + # define identifier lparen identifier-list ,... ) + replacement-list new-line + # undef identifier new-line + # line pp-tokens new-line + # error pp-tokens(opt) new-line + # pragma pp-tokens(opt) new-line + # new-line + text-line: + pp-tokens(opt) new-line + non-directive: + pp-tokens new-line + lparen: + `a ( character not immediately preceded by white-space` + replacement-list: + pp-tokens(opt) + pp-tokens: + preprocessing-token + pp-tokens preprocessing-token + new-line: + the new-line character +``` + */ +void scc_pproc_handle_directive(scc_pproc_t *pp) { + scc_lexer_tok_t tok = {0}; + int ok = 0; + scc_ring_next(*pp->cur_ring, tok, ok); + scc_lexer_tok_drop(&tok); + + if (!scc_lexer_next_non_blank(pp->cur_ring, &tok) || + tok.type != SCC_TOK_IDENT) { + scc_lexer_tok_drop(&tok); + LOG_ERROR("Invalid preprocessor directive"); + goto ERROR; + } + int ret = keyword_cmp(scc_cstring_as_cstr(&tok.lexeme), + scc_cstring_len(&tok.lexeme)); + if (ret == -1) { + scc_lexer_tok_drop(&tok); + LOG_ERROR("Expected preprocessor keyword, got %s", tok.lexeme); + goto ERROR; + } + + scc_tok_type_t type = keywords[ret].tok_type; + switch (type) { + case SCC_PP_TOK_DEFINE: { + scc_lexer_tok_drop(&tok); + scc_lexer_next_non_blank(pp->cur_ring, &tok); + if (tok.type != SCC_TOK_IDENT) { + scc_lexer_tok_drop(&tok); + LOG_ERROR("expected identifier"); + goto ERROR; + } + scc_lexer_tok_t next_tok; + scc_ring_peek(*pp->cur_ring, next_tok, ok); + if (!ok) { + LOG_ERROR("unexpected EOF"); + goto ERROR; + } + if (next_tok.type == SCC_TOK_L_PAREN) { + // function macro + scc_pproc_parse_function_macro(pp, &tok); + } else { + // object macro + scc_pproc_parse_object_macro(pp, &tok); + } + scc_lexer_tok_drop(&tok); + // FIXME + return; + } + case SCC_PP_TOK_UNDEF: { + scc_lexer_tok_drop(&tok); + scc_lexer_next_non_blank(pp->cur_ring, &tok); + if (tok.type != SCC_TOK_IDENT) { + scc_lexer_tok_drop(&tok); + LOG_ERROR("expected identifier"); + goto ERROR; + } + scc_pp_macro_table_remove(&pp->macro_table, &tok.lexeme); + scc_lexer_tok_drop(&tok); + scc_lexer_next_non_blank(pp->cur_ring, &tok); + if (tok.type != SCC_TOK_ENDLINE) { + scc_lexer_tok_drop(&tok); + LOG_ERROR("expected newline"); + goto ERROR; + } + scc_lexer_tok_drop(&tok); + break; + } + case SCC_PP_TOK_INCLUDE: + case SCC_PP_TOK_IF: + case SCC_PP_TOK_IFDEF: + case SCC_PP_TOK_IFNDEF: + case SCC_PP_TOK_ELSE: + case SCC_PP_TOK_ELIF: + case SCC_PP_TOK_ELIFDEF: + case SCC_PP_TOK_ELIFNDEF: + case SCC_PP_TOK_ENDIF: + case SCC_PP_TOK_LINE: + case SCC_PP_TOK_EMBED: + case SCC_PP_TOK_ERROR: + case SCC_PP_TOK_WARNING: + case SCC_PP_TOK_PRAGMA: + default: + LOG_WARN("Unhandled directive: %s", scc_cstring_as_cstr(&tok.lexeme)); + break; + } +ERROR: + scc_lexer_skip_until_newline(pp->cur_ring); +} \ No newline at end of file diff --git a/libs/pproc/src/pproc_expand.c b/libs/pproc/src/pproc_expand.c new file mode 100644 index 0000000..8538ed2 --- /dev/null +++ b/libs/pproc/src/pproc_expand.c @@ -0,0 +1,32 @@ +#include + +typedef struct { + +} scc_expand_t; + +void scc_pproc_expand_macro(scc_pproc_t *pp, const scc_pp_macro_t *macro) { + if (macro->type == SCC_PP_MACRO_NONE) { + UNREACHABLE(); + } + + if (macro->type == SCC_PP_MACRO_OBJECT) { + scc_vec_foreach(macro->replaces, i) { + scc_lexer_tok_t tok = scc_vec_at(macro->replaces, i); + if (tok.type == SCC_TOK_BLANK) { + tok.lexeme = scc_cstring_from_cstr(" "); + } else { + tok.lexeme = scc_cstring_copy(&tok.lexeme); + } + scc_vec_push(pp->cache, tok); + } + pp->cache_pos = 0; + return; + } + Assert(macro->type == SCC_PP_MACRO_FUNCTION); + // Check params match + scc_pproc_macro_list_t args; + scc_pproc_parse_macro_arguments(pp, &args); + scc_vec_foreach(args, i) {} + scc_vec_foreach(macro->params, i) {} + scc_vec_foreach(macro->replaces, i) {} +} diff --git a/libs/pproc/src/pproc_include.c b/libs/pproc/src/pproc_include.c new file mode 100644 index 0000000..e69de29 diff --git a/libs/pproc/src/pproc_macro.c b/libs/pproc/src/pproc_macro.c new file mode 100644 index 0000000..813d2ad --- /dev/null +++ b/libs/pproc/src/pproc_macro.c @@ -0,0 +1,155 @@ +#include + +// 创建宏对象 +scc_pp_macro_t *scc_pp_macro_new(const scc_cstring_t *name, + scc_pp_macro_type_t type) { + scc_pp_macro_t *macro = scc_malloc(sizeof(scc_pp_macro_t)); + if (!macro) { + LOG_ERROR("Failed to allocate memory for macro"); + return null; + } + + macro->name = scc_cstring_copy(name); + macro->type = type; + scc_vec_init(macro->params); + scc_vec_init(macro->replaces); + + return macro; +} + +// 销毁宏对象 +void scc_pp_macro_drop(scc_pp_macro_t *macro) { + if (!macro) + return; + + scc_cstring_free(¯o->name); + + // 释放参数列表 + for (usize i = 0; i < macro->params.size; ++i) { + scc_lexer_tok_drop(&scc_vec_at(macro->params, i)); + } + scc_vec_free(macro->params); + + // 释放替换列表 + for (usize i = 0; i < macro->replaces.size; ++i) { + scc_lexer_tok_drop(&scc_vec_at(macro->replaces, i)); + } + scc_vec_free(macro->replaces); + + scc_free(macro); +} + +// 添加对象宏 +cbool scc_pp_add_object_macro(scc_pp_macro_table_t *macros, + const scc_cstring_t *name, + const scc_pproc_macro_list_t *replacement) { + if (!macros || !name || !replacement) + return false; + + scc_pp_macro_t *macro = scc_pp_macro_new(name, SCC_PP_MACRO_OBJECT); + if (!macro) + return false; + + macro->replaces = *replacement; + + // 检查是否已存在同名宏 + scc_pp_macro_t *existing = scc_hashtable_get(¯os->table, ¯o->name); + if (existing) { + LOG_WARN("Redefining macro: %s", scc_cstring_as_cstr(¯o->name)); + scc_pp_macro_drop(existing); + } + + scc_hashtable_set(¯os->table, ¯o->name, macro); + return true; +} + +// 添加函数宏 +cbool scc_pp_add_function_macro(scc_pp_macro_table_t *macros, + const scc_cstring_t *name, + const scc_pproc_macro_list_t *params, + const scc_pproc_macro_list_t *replacement) { + if (!macros || !name || !params || !replacement) + return false; + + scc_pp_macro_t *macro = scc_pp_macro_new(name, SCC_PP_MACRO_FUNCTION); + if (!macro) + return false; + + // 复制参数列表 + macro->params = *params; + macro->replaces = *replacement; + + // 检查是否已存在同名宏 + scc_pp_macro_t *existing = scc_hashtable_get(¯os->table, ¯o->name); + if (existing) { + LOG_WARN("Redefining macro: %s", scc_cstring_as_cstr(¯o->name)); + scc_pp_macro_drop(existing); + } + + scc_hashtable_set(¯os->table, ¯o->name, macro); + return true; +} + +/// marco_table + +scc_pp_macro_t *scc_pp_macro_table_set(scc_pp_macro_table_t *pp, + scc_pp_macro_t *macro) { + Assert(pp != null && macro != null); + return scc_hashtable_set(&pp->table, ¯o->name, macro); +} + +// 查找宏定义 +scc_pp_macro_t *scc_pp_macro_table_get(scc_pp_macro_table_t *pp, + const scc_cstring_t *name) { + return scc_hashtable_get(&pp->table, name); +} + +// 从预处理器中删除宏 +cbool scc_pp_macro_table_remove(scc_pp_macro_table_t *pp, + const scc_cstring_t *name) { + if (!pp || !name) + return false; + + scc_pp_macro_t *macro = scc_hashtable_get(&pp->table, name); + if (!macro) + return false; + + scc_hashtable_del(&pp->table, name); + scc_pp_macro_drop(macro); + return true; +} + +static u32 hash_func(const void *key) { + const scc_cstring_t *string = (const scc_cstring_t *)key; + return scc_strhash32(scc_cstring_as_cstr(string)); +} + +static int hash_cmp(const void *key1, const void *key2) { + const scc_cstring_t *str1 = (const scc_cstring_t *)key1; + const scc_cstring_t *str2 = (const scc_cstring_t *)key2; + + if (str1->size != str2->size) { + return str1->size - str2->size; + } + return scc_strcmp(scc_cstring_as_cstr(str1), scc_cstring_as_cstr(str2)); +} + +void scc_pp_marco_table_init(scc_pp_macro_table_t *macros) { + Assert(macros != null); + macros->table.hash_func = hash_func; + macros->table.key_cmp = hash_cmp; + scc_hashtable_init(¯os->table); +} + +static int macro_free(const void *key, void *value, void *context) { + (void)key; + (void)context; + scc_pp_macro_drop(value); + return 0; +} + +void scc_pp_macro_table_drop(scc_pp_macro_table_t *macros) { + Assert(macros != null); + scc_hashtable_foreach(¯os->table, macro_free, null); + scc_hashtable_drop(¯os->table); +} diff --git a/libs/pproc/src/scc_pproc.c b/libs/pproc/src/scc_pproc.c new file mode 100644 index 0000000..5faf85e --- /dev/null +++ b/libs/pproc/src/scc_pproc.c @@ -0,0 +1,68 @@ +#include + +static int pproc_next(scc_pproc_t *pp, scc_lexer_tok_t *out) { + scc_lexer_tok_ring_t *stream = pp->cur_ring; + scc_lexer_tok_t tok = {0}; + int ok = 0; +CONTINUE: + if (scc_vec_size(pp->cache)) { + // use cache? + *out = scc_vec_at(pp->cache, pp->cache_pos); + pp->cache_pos++; + if (pp->cache_pos == scc_vec_size(pp->cache)) { + pp->cache_pos = 0; + scc_vec_free(pp->cache); + } + return true; + } + scc_ring_peek(*stream, tok, ok); + if (tok.type == SCC_TOK_SHARP && tok.loc.col == 1) { + // parse to # + scc_pproc_handle_directive(pp); + goto CONTINUE; + } else if (tok.type == SCC_TOK_IDENT) { + // maybe expanded + scc_pp_macro_t *macro = + scc_pp_macro_table_get(&pp->macro_table, &tok.lexeme); + scc_ring_next_consume(*stream, *out, ok); + if (macro == null) { + return ok; + } + scc_pproc_expand_macro(pp, macro); + goto CONTINUE; + } else { + // continue + scc_ring_next_consume(*stream, *out, ok); + return ok; + } + return false; +} + +void scc_pproc_init(scc_pproc_t *pp, scc_lexer_tok_ring_t *input) { + Assert(pp != null && input != null); + pp->cur_ring = input; + scc_pp_marco_table_init(&pp->macro_table); + scc_vec_init(pp->if_stack); + scc_vec_init(pp->file_stack); + scc_vec_init(pp->cache); + pp->cache_pos = 0; +} + +static cbool fill_token(scc_lexer_tok_t *tok, void *userdata) { + scc_pproc_t *pp = userdata; + return pproc_next(pp, tok); +} + +scc_lexer_tok_ring_t *scc_pproc_to_ring(scc_pproc_t *pp, int ring_size) { + scc_ring_init(pp->ring, ring_size, fill_token, pp); + pp->ring_ref_count++; + return &pp->ring; +} + +// 销毁预处理器 +void scc_pproc_drop(scc_pproc_t *pp) { + if (pp == null) + return; + scc_lexer_drop_ring(pp->cur_ring); + scc_pp_macro_table_drop(&pp->macro_table); +} diff --git a/libs/pproc/tests/test_unit.c b/libs/pproc/tests/test_unit.c new file mode 100644 index 0000000..7c3b14e --- /dev/null +++ b/libs/pproc/tests/test_unit.c @@ -0,0 +1,231 @@ +#include +#include +#include +#include + +static cbool process_input(const char *input, scc_cstring_t *output) { + int ret = 0; + scc_sstream_t mem_stream; + ret = scc_sstream_init_by_buffer(&mem_stream, input, strlen(input), false, + 16); + Assert(ret == 0); + + scc_lexer_t lexer; + scc_lexer_init(&lexer, scc_sstream_to_ring(&mem_stream)); + + scc_pproc_t pp; + scc_pproc_init(&pp, scc_lexer_to_ring(&lexer, 8, true)); + + scc_lexer_tok_ring_t *tok_ring = scc_pproc_to_ring(&pp, 8); + *output = scc_cstring_create(); + scc_lexer_tok_t tok; + while (1) { + scc_ring_next_consume(*tok_ring, tok, ret); + if (!ret) { + break; + } + scc_cstring_append(output, &tok.lexeme); + scc_lexer_tok_drop(&tok); + } + + scc_pproc_drop(&pp); + scc_lexer_drop(&lexer); + scc_sstream_drop(&mem_stream); + + return true; +} + +#define CHECK_PP_OUTPUT_EXACT(input, expect) \ + do { \ + scc_cstring_t output; \ + process_input(input, &output); \ + assert(output.data != NULL); \ + TEST_CHECK(strcmp(output.data, expect) == 0); \ + TEST_MSG("Expected: %s", expect); \ + TEST_MSG("Produced: %s", output.data); \ + } while (0) + +#define CHECK_PP_OUTPUT_CONTAIN(input, expect) \ + do { \ + scc_cstring_t output; \ + process_input(input, &output); \ + assert(output.data != NULL); \ + TEST_CHECK(strstr(output.data, expect) != NULL); \ + TEST_MSG("Expected: %s", expect); \ + TEST_MSG("Produced: %s", output.data); \ + } while (0) + +static void test_define_simple_no_macro(void) { + TEST_CASE("simple no macro"); + CHECK_PP_OUTPUT_EXACT("a", "a"); + CHECK_PP_OUTPUT_EXACT("a()", "a()"); + CHECK_PP_OUTPUT_EXACT("a(b)", "a(b)"); + CHECK_PP_OUTPUT_EXACT("a(b, c)", "a(b, c)"); + CHECK_PP_OUTPUT_EXACT("a(b, c, d)", "a(b, c, d)"); +} + +static void test_define_simple_object_macro(void) { + TEST_CASE("simple object-like macro"); + CHECK_PP_OUTPUT_EXACT("#define MAX 100\nMAX\n", "100\n"); + CHECK_PP_OUTPUT_EXACT("#define NAME test\r\nNAME\n", "test\n"); +} + +static void test_define_complex_object_macro(void) { + TEST_CASE("complex object-like macro"); + CHECK_PP_OUTPUT_EXACT("#define VALUE (100 + 50)\nVALUE\n", "(100 + 50)\n"); + CHECK_PP_OUTPUT_EXACT("#define PI 3.14159\nPI\n", "3.14159\n"); +} + +static void test_define_object_macro_backspace(void) { + TEST_CASE("object-like macro check backspace"); + CHECK_PP_OUTPUT_EXACT("#define MAX 100\nMAX\n", "100\n"); + CHECK_PP_OUTPUT_EXACT("#define NAME \ttest\r\nNAME\n", "test\n"); + CHECK_PP_OUTPUT_EXACT("#define \tVALUE (100 \t+ 50)\nVALUE\n", + "(100 + 50)\n"); + CHECK_PP_OUTPUT_EXACT("#define \tPI \t 3.14159\nPI\n", "3.14159\n"); +} + +static void test_define_function_macro(void) { + TEST_CASE("function-like macro"); + CHECK_PP_OUTPUT_EXACT("#define ADD(a,b) a + b\nADD(1, 2)\n", "1 + 2\n"); + CHECK_PP_OUTPUT_EXACT( + "#define MAX(a,b) ((a) > (b) ? (a) : (b))\nMAX(10, 20)\n", + "((10) > (20) ? (10) : (20))\n"); +} + +static void test_define_stringify_operator(void) { + TEST_CASE("stringify operator (#)"); + CHECK_PP_OUTPUT_EXACT("#define STRINGIFY(x) #x\nSTRINGIFY(hello)\n", + "\"hello\"\n"); + CHECK_PP_OUTPUT_EXACT("#define STR(x) #x\nSTR(test value)\n", + "\"test value\"\n"); +} + +static void test_define_concat_operator(void) { + TEST_CASE("concatenation operator (##)"); + CHECK_PP_OUTPUT_EXACT("#define CONCAT(a,b) a##b\nCONCAT(hello,world)\n", + "helloworld\n"); + CHECK_PP_OUTPUT_EXACT("#define JOIN(pre,suf) pre ## suf\nJOIN(var, 123)\n", + "var123\n"); +} + +static void test_define_nested_macros(void) { + TEST_CASE("nested macros"); + CHECK_PP_OUTPUT_EXACT( + "#define MAX 100\n#define TWICE_MAX (MAX * 2)\nTWICE_MAX\n", + "(100 * 2)\n"); + CHECK_PP_OUTPUT_EXACT( + "#define A 1\n#define B (A + 1)\n#define C (B + 1)\nC\n", + "((1 + 1) + 1)\n"); +} + +static void test_undef_macros(void) { + TEST_CASE("test_undef_macros"); + CHECK_PP_OUTPUT_EXACT("#define x 1\n" + "x\n" + "#undef x\n" + "x\n" + "#define x 2\n" + "x\n", + "1\nx\n2\n"); +} + +static void hard_test_define_func_macros(void) { + TEST_CASE("func_macros_hard with pp_01"); + CHECK_PP_OUTPUT_EXACT("#define hash_hash # ## #\n" + "#define mkstr(a) # a\n" + "#define in_between(a) mkstr(a)\n" + "#define join(c, d) in_between(c hash_hash d)\n" + "char p[] = join(x, y);\n", + "char p[] = \"x ## y\";\n"); + + TEST_CASE("func_macros_hard with recursive define"); + CHECK_PP_OUTPUT_EXACT("#define M1(x) M2(x + 1)\n" + "#define M2(x) M1(x * 2)\n" + "M1(5)\n", + "M1(5 + 1 * 2)\n"); + CHECK_PP_OUTPUT_EXACT("#define A B\n" + "#define B C\n" + "#define C 1\n" + "A\n", + "1\n"); + + TEST_CASE("func_macros_hard with self recursive call"); + CHECK_PP_OUTPUT_EXACT("#define M(x) x\n" + "M(M(10))\n", + "10\n"); + CHECK_PP_OUTPUT_EXACT("#define M(x) M(x)\n" + "#define N(x) x\n" + "N(M(1))\n", + "M(1)\n"); + + TEST_CASE("func_macros_hard with define by macro"); + CHECK_PP_OUTPUT_EXACT("#define M1(x) M1(x + 1)\n" + "#define M2 M1\n" + "#define M3(x) x\n" + "M3(M3(M2)(0))\n", + "M1(0 + 1)\n"); + + TEST_CASE("TODO"); + CHECK_PP_OUTPUT_EXACT("#define str(x) # x\n" + "str()\n", + "\"\"\n"); + + TEST_CASE("TODO"); + CHECK_PP_OUTPUT_EXACT("#define x 1\n" + "#define f(a) f(x * (a))\n" + "f(0)\n" + "f(x)", + "f(1 * (0))\n" + "f(1 * (1))"); + CHECK_PP_OUTPUT_EXACT("#define x x(0)\n" + "#define f(a) f(x * (a))\n" + "f(f(0))\n" + "f(f(x))\n" + "f(f(a))\n", + "f(x(0) * (f(x(0) * (0))))\n" + "f(x(0) * (f(x(0) * (x(0)))))\n" + "f(x(0) * (f(x(0) * (a))))\n"); +} + +static void test_conditional_compilation(void) { + TEST_CASE("conditional compilation"); + CHECK_PP_OUTPUT_EXACT("#if 1\ntrue\n#endif\n", "true\n"); + CHECK_PP_OUTPUT_EXACT("#if 0\nfalse\n#endif\n", ""); + CHECK_PP_OUTPUT_EXACT("#define FLAG 1\n#if FLAG\ntrue\n#endif\n", "true\n"); +} + +static void test_error_cases(void) { + TEST_CASE("macro redefinition"); + // 应检测到警告或错误 + // CHECK_PP_OUTPUT_CONTAIN("#define A 1\n#define A 2\n", "warning"); + + TEST_CASE("undefined macro"); + CHECK_PP_OUTPUT_EXACT("UNDEFINED_MACRO\n", "UNDEFINED_MACRO\n"); +} + +static void test_edge_cases(void) { + TEST_CASE("empty macro"); + CHECK_PP_OUTPUT_EXACT("#define EMPTY\nEMPTY\n", "\n"); + + TEST_CASE("macro with only spaces"); + CHECK_PP_OUTPUT_EXACT("#define SPACE \nSPACE\n", "\n"); + + TEST_CASE("deep nesting"); + CHECK_PP_OUTPUT_EXACT("#define A B\n#define B C\n#define C 1\nA\n", "1\n"); +} + +#define TEST_LIST_CASE(func_name) {#func_name, func_name} +TEST_LIST = { + TEST_LIST_CASE(test_define_simple_no_macro), + TEST_LIST_CASE(test_define_simple_object_macro), + TEST_LIST_CASE(test_define_complex_object_macro), + TEST_LIST_CASE(test_define_object_macro_backspace), + TEST_LIST_CASE(test_define_function_macro), + TEST_LIST_CASE(test_define_stringify_operator), + TEST_LIST_CASE(test_define_concat_operator), + TEST_LIST_CASE(test_define_nested_macros), + TEST_LIST_CASE(test_undef_macros), + TEST_LIST_CASE(hard_test_define_func_macros), + {NULL, NULL}, +}; \ No newline at end of file diff --git a/src/main.c b/src/main.c index e972f47..553be3b 100644 --- a/src/main.c +++ b/src/main.c @@ -1,41 +1,22 @@ #include -#include -#include -#include +#include +#include -#include -#include -#include +// #include +// #include +// #include +// #include #include -static scc_probe_stream_t *from_file_stream(FILE *fp) { - if (fseek(fp, 0, SEEK_END) != 0) { - perror("fseek failed"); - return NULL; - } - usize fsize = ftell(fp); - if (fseek(fp, 0, SEEK_SET)) { - perror("fseek failed"); - return NULL; - } - - char *buffer = (char *)scc_malloc(fsize); - scc_memset(buffer, 0, fsize); - usize read_ret = fread(buffer, 1, fsize, fp); - fclose(fp); - - scc_probe_stream_t *stream = - scc_mem_probe_stream_alloc(buffer, read_ret, true); - return stream; -} - typedef struct { const char *input_file; const char *output_file; int verbose; - cbool dump_ast; - cbool dump_ir; + cbool emit_lex; + cbool emit_pp; + cbool emit_ast; + cbool emit_ir; } scc_config_t; static void setup_argparse(scc_argparse_t *argparse, scc_config_t *config, @@ -46,16 +27,20 @@ static void setup_argparse(scc_argparse_t *argparse, scc_config_t *config, SCC_HINT_OUTPUT_FILE, SCC_HINT_INPUT_FILE, SCC_HINT_VERBOSE, + + SCC_HINT_EMIT_LEX, + SCC_HINT_EMIT_PP, SCC_HINT_EMIT_AST, SCC_HINT_EMIT_IR, }; - static const char *scc_hints_en[] = { [SCC_HINT_PROG_NAME] = "scc", [SCC_HINT_DESCRIPTION] = "A simple C compiler", [SCC_HINT_OUTPUT_FILE] = "Output file", [SCC_HINT_INPUT_FILE] = "Input source file", [SCC_HINT_VERBOSE] = "Increase verbosity (can be used multiple times)", + [SCC_HINT_EMIT_LEX] = "Generate lexer sources tokens and exit", + [SCC_HINT_EMIT_PP] = "Generate preprocessed tokens and exit", [SCC_HINT_EMIT_AST] = "Generate AST and exit", [SCC_HINT_EMIT_IR] = "Generate IR and exit", }; @@ -65,8 +50,10 @@ static void setup_argparse(scc_argparse_t *argparse, scc_config_t *config, [SCC_HINT_OUTPUT_FILE] = "输出文件", [SCC_HINT_INPUT_FILE] = "输入源文件", [SCC_HINT_VERBOSE] = "增加详细输出(可多次使用)", - [SCC_HINT_EMIT_AST] = "生成 AST 并退出", - [SCC_HINT_EMIT_IR] = "生成 IR 并退出", + [SCC_HINT_EMIT_LEX] = "生成`源代码的词法单元`并退出", + [SCC_HINT_EMIT_PP] = "生成`预处理后的词法单元`并退出", + [SCC_HINT_EMIT_AST] = "生成`抽象语法树`并退出", + [SCC_HINT_EMIT_IR] = "生成`中间代码`并退出", }; const char **scc_hints; @@ -103,22 +90,35 @@ static void setup_argparse(scc_argparse_t *argparse, scc_config_t *config, // -v, --verbose (计数) scc_argparse_opt_t opt_verbose; - scc_argparse_opt_init(&opt_verbose, 'v', "verbose", + scc_argparse_opt_init(&opt_verbose, 'V', "verbose", scc_hints[SCC_HINT_VERBOSE]); scc_argparse_spec_setup_count(&opt_verbose.spec, &(config->verbose)); scc_argparse_cmd_add_opt(root, &opt_verbose); - // -T, --ast + // --emit-lex + scc_argparse_opt_t opt_lex; + scc_argparse_opt_init(&opt_lex, 0, "emit-lex", + scc_hints[SCC_HINT_EMIT_LEX]); + scc_argparse_spec_setup_bool(&opt_lex.spec, &(config->emit_lex)); + scc_argparse_cmd_add_opt(root, &opt_lex); + + // --emit-pp + scc_argparse_opt_t opt_pp; + scc_argparse_opt_init(&opt_pp, 0, "emit-pp", scc_hints[SCC_HINT_EMIT_PP]); + scc_argparse_spec_setup_bool(&opt_pp.spec, &(config->emit_pp)); + scc_argparse_cmd_add_opt(root, &opt_pp); + + // -T, --emit-ast scc_argparse_opt_t opt_ast; scc_argparse_opt_init(&opt_ast, 'T', "emit-ast", scc_hints[SCC_HINT_EMIT_AST]); - scc_argparse_spec_setup_bool(&opt_ast.spec, &(config->dump_ast)); + scc_argparse_spec_setup_bool(&opt_ast.spec, &(config->emit_ast)); scc_argparse_cmd_add_opt(root, &opt_ast); - // -R, --ir + // -R, --emit-ir scc_argparse_opt_t opt_ir; scc_argparse_opt_init(&opt_ir, 'R', "emit-ir", scc_hints[SCC_HINT_EMIT_IR]); - scc_argparse_spec_setup_bool(&opt_ir.spec, &(config->dump_ir)); + scc_argparse_spec_setup_bool(&opt_ir.spec, &(config->emit_ir)); scc_argparse_cmd_add_opt(root, &opt_ir); } @@ -127,17 +127,42 @@ static void setup_argparse(scc_argparse_t *argparse, scc_config_t *config, #include #endif +static void print_ring(scc_lexer_tok_ring_t *ring, int verbose) { + scc_lexer_tok_t tok = {0}; + int ret = 0; + while (1) { + scc_ring_next_consume(*ring, tok, ret); + if (ret == false || tok.type == SCC_TOK_EOF) { + break; + } + if (verbose == 0) { + scc_printf("%s ", scc_get_tok_name(tok.type)); + } else if (verbose >= 1) { + scc_printf("token [%-8s] `%s` at %s:%d:%d\n", + scc_get_tok_name(tok.type), + scc_cstring_as_cstr(&tok.lexeme), tok.loc.name, + tok.loc.line, tok.loc.col); + } + } +} + int main(int argc, const char **argv, const char **envp) { #ifdef _WIN32 SetConsoleOutputCP(CP_UTF8); SetConsoleCP(CP_UTF8); #endif + setbuf(stdout, NULL); + scc_config_t config = { .input_file = NULL, +#ifdef _WIN32 .output_file = "a.exe", +#else + .output_file = "a.out", +#endif .verbose = 0, - .dump_ast = false, - .dump_ir = false, + .emit_ast = false, + .emit_ir = false, }; scc_argparse_t argparse; setup_argparse(&argparse, &config, SCC_ARGPARSE_LANG_ZH); @@ -148,51 +173,57 @@ int main(int argc, const char **argv, const char **envp) { } scc_argparse_drop(&argparse); - setbuf(stdout, NULL); - FILE *fp = fopen(config.input_file, "r"); - if (!fp) { - perror("fopen"); - scc_argparse_drop(&argparse); - return 1; + scc_sstream_t sstream; + if (scc_sstream_init(&sstream, config.input_file, 1024)) { + return 0; + } + + scc_lexer_t lexer; + scc_lexer_init(&lexer, scc_sstream_to_ring(&sstream)); + if (config.emit_lex) { + scc_lexer_tok_ring_t *tok_ring = scc_lexer_to_ring(&lexer, 8, false); + print_ring(tok_ring, config.verbose); + return 0; } scc_pproc_t pproc; - scc_probe_stream_t *source_code_stream = from_file_stream(fp); - // scc_probe_stream_t *pprocessed_code_stream = - // scc_pproc_init(&pproc, source_code_stream); - - scc_lexer_t lexer; - scc_lexer_init(&lexer, source_code_stream); - scc_lexer_stream_t lexer_stream; - scc_lexer_to_stream(&lexer, &lexer_stream, false); - - scc_parser_t parser; - scc_parser_init(&parser, &lexer_stream, null); - scc_ast_translation_unit_t *translation_unit = - scc_parse_translation_unit(&parser); - - if (config.dump_ast) { - scc_tree_dump_ctx_t tree_dump; - scc_tree_dump_ctx_init(&tree_dump, true); - scc_ast_dump_node(&tree_dump, (scc_ast_node_t *)translation_unit); - scc_tree_dump_ctx_drop(&tree_dump); - return 0; + scc_pproc_init(&pproc, scc_lexer_to_ring(&lexer, 8, false)); + if (config.emit_pp) { + scc_lexer_tok_ring_t *tok_ring = scc_pproc_to_ring(&pproc, 8); + print_ring(tok_ring, config.verbose); } - scc_ir_builder_t ir_builder; - scc_ast2ir(translation_unit, &ir_builder); + scc_pproc_drop(&pproc); + scc_lexer_drop(&lexer); + scc_sstream_drop(&sstream); - if (config.dump_ir) { - scc_ir_dump_ctx_t ir_dump_ctx; - scc_tree_dump_ctx_t tree_dump; // 仅为 ir dump 辅助 - scc_tree_dump_ctx_init(&tree_dump, true); - scc_ir_dump_ctx_init(&ir_dump_ctx, &tree_dump, &ir_builder.cprog, - &ir_builder.ctx); - // scc_ir_dump_cprog(&ir_dump_ctx); - scc_ir_dump_cprog_linear(&ir_dump_ctx); - scc_tree_dump_ctx_drop(&tree_dump); - return 0; - } + // scc_parser_t parser; + // scc_parser_init(&parser, &lexer_stream, null); + // scc_ast_translation_unit_t *translation_unit = + // scc_parse_translation_unit(&parser); + + // if (config.emit_ast) { + // scc_tree_dump_ctx_t tree_dump; + // scc_tree_dump_ctx_init(&tree_dump, true); + // scc_ast_dump_node(&tree_dump, (scc_ast_node_t *)translation_unit); + // scc_tree_dump_ctx_drop(&tree_dump); + // return 0; + // } + + // scc_ir_builder_t ir_builder; + // scc_ast2ir(translation_unit, &ir_builder); + + // if (config.emit_ir) { + // scc_ir_dump_ctx_t ir_dump_ctx; + // scc_tree_dump_ctx_t tree_dump; // 仅为 ir dump 辅助 + // scc_tree_dump_ctx_init(&tree_dump, true); + // scc_ir_dump_ctx_init(&ir_dump_ctx, &tree_dump, &ir_builder.cprog, + // &ir_builder.ctx); + // // scc_ir_dump_cprog(&ir_dump_ctx); + // scc_ir_dump_cprog_linear(&ir_dump_ctx); + // scc_tree_dump_ctx_drop(&tree_dump); + // return 0; + // } scc_printf("output exe at %s", config.output_file); return 0;