From 07a76d82f4f25a5d1f868e16856b0a354d2e7d1f Mon Sep 17 00:00:00 2001 From: zzy <2450266535@qq.com> Date: Sat, 13 Dec 2025 16:09:46 +0800 Subject: [PATCH] feat(lex_parser, pprocessor): rename identifier header check and add macro system MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Rename `scc_lex_parse_is_identifier_header` to `scc_lex_parse_is_identifier_prefix` for clarity and add a TODO comment - Update lexer to use the renamed function for consistency - Fix package and dependency names in `cbuild.toml` (`smcc_pprocesser` → `scc_pprocesser`, `smcc_lex_parser` → `lex_parser`) - Introduce new macro system with header file `pp_macro.h` defining macro types, structures, and management functions - Refactor preprocessor initialization and cleanup in `pprocessor.c` to use new macro table and stream handling - Replace legacy `hashmap` with `scc_pp_macro_table_t` for macro storage - Improve error handling and resource management in preprocessor lifecycle --- libs/lex_parser/include/lex_parser.h | 3 +- libs/lex_parser/src/lex_parser.c | 3 +- libs/pprocessor/cbuild.toml | 4 +- libs/pprocessor/include/pp_macro.h | 90 ++++++ libs/pprocessor/include/pp_parse.h | 11 + libs/pprocessor/include/pp_token.h | 42 +-- libs/pprocessor/include/pprocessor.h | 60 ++-- libs/pprocessor/src/macro.c | 207 ++++++++++++ libs/pprocessor/src/parse.c | 411 +++++++++++++++++++++++ libs/pprocessor/src/pprocessor.c | 449 ++++---------------------- libs/pprocessor/tests/test_pp.c | 40 ++- libs/pprocessor/tests/test_run.c | 14 +- libs/pprocessor/tests/test_unit.c | 81 +++-- runtime/libcore/include/core_str.h | 8 +- runtime/libcore/include/core_stream.h | 12 +- runtime/libcore/src/stream.c | 25 +- 16 files changed, 970 insertions(+), 490 deletions(-) create mode 100644 libs/pprocessor/include/pp_macro.h create mode 100644 libs/pprocessor/include/pp_parse.h create mode 100644 libs/pprocessor/src/macro.c create mode 100644 libs/pprocessor/src/parse.c diff --git a/libs/lex_parser/include/lex_parser.h b/libs/lex_parser/include/lex_parser.h index 6424440..757e815 100644 --- a/libs/lex_parser/include/lex_parser.h +++ b/libs/lex_parser/include/lex_parser.h @@ -11,7 +11,8 @@ static inline cbool scc_lex_parse_is_whitespace(int ch) { return ch == ' ' || ch == '\t'; } -static inline cbool scc_lex_parse_is_identifier_header(int ch) { +// TODO identifier check is right? +static inline cbool scc_lex_parse_is_identifier_prefix(int ch) { return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '_'; } diff --git a/libs/lex_parser/src/lex_parser.c b/libs/lex_parser/src/lex_parser.c index 168d7a9..09f2c2c 100644 --- a/libs/lex_parser/src/lex_parser.c +++ b/libs/lex_parser/src/lex_parser.c @@ -417,8 +417,7 @@ cbool scc_lex_parse_identifier(scc_probe_stream_t *input, scc_pos_t *pos, if (ch == scc_stream_eof) { LOG_WARN("Unexpected EOF at begin"); - } else if (ch == '_' || (ch >= 'a' && ch <= 'z') || - (ch >= 'A' && ch <= 'Z')) { + } else if (scc_lex_parse_is_identifier_prefix(ch)) { while (1) { scc_cstring_append_ch(output, ch); scc_probe_stream_consume(stream); diff --git a/libs/pprocessor/cbuild.toml b/libs/pprocessor/cbuild.toml index 89f6d34..d42e612 100644 --- a/libs/pprocessor/cbuild.toml +++ b/libs/pprocessor/cbuild.toml @@ -1,8 +1,8 @@ [package] -name = "smcc_pprocesser" +name = "scc_pprocesser" dependencies = [ { name = "libcore", path = "../../runtime/libcore" }, { name = "libutils", path = "../../runtime/libutils" }, - { name = "smcc_lex_parser", path = "../lex_parser" }, + { name = "lex_parser", path = "../lex_parser" }, ] diff --git a/libs/pprocessor/include/pp_macro.h b/libs/pprocessor/include/pp_macro.h new file mode 100644 index 0000000..94baf8b --- /dev/null +++ b/libs/pprocessor/include/pp_macro.h @@ -0,0 +1,90 @@ +#ifndef __SCC_PP_MACRO_H__ +#define __SCC_PP_MACRO_H__ + +#include +#include + +// 宏定义类型 +typedef enum { + SCC_PP_MACRO_OBJECT, // 对象宏 + SCC_PP_MACRO_FUNCTION, // 函数宏 +} scc_pp_macro_type_t; + +typedef SCC_VEC(scc_cstring_t) scc_pp_macro_list_t; + +// 宏定义结构 +typedef struct scc_macro { + scc_cstring_t name; // 宏名称 + scc_pp_macro_type_t type; // 宏类型 + scc_pp_macro_list_t replaces; // 替换列表 + scc_pp_macro_list_t params; // 参数列表(仅函数宏) +} scc_pp_macro_t; + +typedef struct scc_macro_table { + scc_hashtable_t table; // 宏定义表 +} scc_macro_table_t; + +/** + * @brief 创建宏对象 + * @param name 宏名称 + * @param type 宏类型 + * @return 创建的宏对象指针,失败返回NULL + */ +scc_pp_macro_t *scc_pp_macro_create(const scc_cstring_t *name, + scc_pp_macro_type_t type); + +/** + * @brief 销毁宏对象 + * @param macro 要销毁的宏对象 + */ +void scc_pp_macro_drop(scc_pp_macro_t *macro); + +/** + * @brief 压缩空白字符 + * @param tokens token列表 + * @return 压缩后的字符串 + */ +scc_cstring_t scc_pp_compress_whitespace(const scc_pp_macro_list_t *tokens); + +/** + * @brief 添加对象宏 + * @param pp 预处理器实例 + * @param name 宏名称 + * @param replacement 替换文本列表 + * @return 成功返回true,失败返回false + */ +cbool scc_pp_add_object_macro(scc_macro_table_t *pp, const scc_cstring_t *name, + const scc_pp_macro_list_t *replacement); + +/** + * @brief 添加函数宏 + * @param pp 预处理器实例 + * @param name 宏名称 + * @param params 参数列表 + * @param replacement 替换文本列表 + * @return 成功返回true,失败返回false + */ +cbool scc_pp_add_function_macro(scc_macro_table_t *pp, + const scc_cstring_t *name, + const scc_pp_macro_list_t *params, + const scc_pp_macro_list_t *replacement); + +/** + * @brief 查找宏定义 + * @param pp 预处理器实例 + * @param name 宏名称 + * @return 找到的宏对象指针,未找到返回NULL + */ +scc_pp_macro_t *scc_pp_find_macro(scc_macro_table_t *pp, scc_cstring_t *name); + +/** + * @brief 从预处理器中删除宏 + * @param pp 预处理器实例 + * @param name 宏名称 + * @return 成功删除返回true,未找到返回false + */ +cbool scc_pp_remove_macro(scc_macro_table_t *pp, const scc_cstring_t *name); + +void scc_pp_marco_table_init(scc_macro_table_t *macros); +void scc_pp_macro_table_drop(scc_macro_table_t *macros); +#endif /* __SCC_PP_MACRO_H__ */ diff --git a/libs/pprocessor/include/pp_parse.h b/libs/pprocessor/include/pp_parse.h new file mode 100644 index 0000000..f8232d0 --- /dev/null +++ b/libs/pprocessor/include/pp_parse.h @@ -0,0 +1,11 @@ +#ifndef __SCC_PP_PARSE_H__ +#define __SCC_PP_PARSE_H__ + +#include +#include +void scc_pp_parse_directive(scc_probe_stream_t *stream, scc_pos_t *pos, + scc_macro_table_t *macros); +cbool scc_pp_expand_macro(scc_probe_stream_t *stream, scc_macro_table_t *macros, + scc_probe_stream_t **out_stream, int depth); + +#endif /* __SCC_PP_PARSE_H__ */ diff --git a/libs/pprocessor/include/pp_token.h b/libs/pprocessor/include/pp_token.h index 02d680f..e751a6c 100644 --- a/libs/pprocessor/include/pp_token.h +++ b/libs/pprocessor/include/pp_token.h @@ -1,30 +1,30 @@ -#ifndef __SMCC_PP_TOKEN_H__ -#define __SMCC_PP_TOKEN_H__ +#ifndef __SCC_PP_TOKEN_H__ +#define __SCC_PP_TOKEN_H__ /* clang-format off */ /// https://cppreference.cn/w/c/preprocessor -#define PP_INST_TOKEN \ - X(define , PP_STD, PP_TOK_DEFINE ) \ - X(undef , PP_STD, PP_TOK_UNDEF ) \ - X(include , PP_STD, PP_TOK_INCLUDE ) \ - X(if , PP_STD, PP_TOK_IF ) \ - X(ifdef , PP_STD, PP_TOK_IFDEF ) \ - X(ifndef , PP_STD, PP_TOK_IFNDEF ) \ - X(else , PP_STD, PP_TOK_ELSE ) \ - X(elif , PP_STD, PP_TOK_ELIF ) \ - X(elifdef , PP_STD, PP_TOK_ELIFDEF ) \ - X(elifndef , PP_C23, PP_TOK_ELIFNDEF ) \ - X(endif , PP_STD, PP_TOK_ENDIF ) \ - X(line , PP_STD, PP_TOK_LINE ) \ - X(embed , PP_C23, PP_TOK_EMBED ) \ - X(error , PP_STD, PP_TOK_ERROR ) \ - X(warning , PP_C23, PP_TOK_WARNING ) \ - X(pragma , PP_STD, PP_TOK_PRAMA ) \ +#define SCC_PP_INST_TOKEN \ + X(define , SCC_PP_STD, SCC_PP_TOK_DEFINE ) \ + X(undef , SCC_PP_STD, SCC_PP_TOK_UNDEF ) \ + X(include , SCC_PP_STD, SCC_PP_TOK_INCLUDE ) \ + X(if , SCC_PP_STD, SCC_PP_TOK_IF ) \ + X(ifdef , SCC_PP_STD, SCC_PP_TOK_IFDEF ) \ + X(ifndef , SCC_PP_STD, SCC_PP_TOK_IFNDEF ) \ + X(else , SCC_PP_STD, SCC_PP_TOK_ELSE ) \ + X(elif , SCC_PP_STD, SCC_PP_TOK_ELIF ) \ + X(elifdef , SCC_PP_STD, SCC_PP_TOK_ELIFDEF ) \ + X(elifndef , SCC_PP_C23, SCC_PP_TOK_ELIFNDEF ) \ + X(endif , SCC_PP_STD, SCC_PP_TOK_ENDIF ) \ + X(line , SCC_PP_STD, SCC_PP_TOK_LINE ) \ + X(embed , SCC_PP_C23, SCC_PP_TOK_EMBED ) \ + X(error , SCC_PP_STD, SCC_PP_TOK_ERROR ) \ + X(warning , SCC_PP_C23, SCC_PP_TOK_WARNING ) \ + X(pragma , SCC_PP_STD, SCC_PP_TOK_PRAMA ) \ // END /* clang-format on */ #define X(name, type, tok) tok, -typedef enum pp_token { PP_INST_TOKEN } pp_token_t; +typedef enum scc_pp_token { SCC_PP_INST_TOKEN } scc_pp_token_t; #undef X -#endif /* __SMCC_PP_TOKEN_H__ */ +#endif /* __SCC_PP_TOKEN_H__ */ diff --git a/libs/pprocessor/include/pprocessor.h b/libs/pprocessor/include/pprocessor.h index 4f2a08d..cc676fb 100644 --- a/libs/pprocessor/include/pprocessor.h +++ b/libs/pprocessor/include/pprocessor.h @@ -1,30 +1,14 @@ -// pprocessor.h - 更新后的头文件 /** * @file pprocessor.h * @brief C语言预处理器核心数据结构与接口 */ -#ifndef __SMCC_PP_H__ -#define __SMCC_PP_H__ +#ifndef __SCC_PP_H__ +#define __SCC_PP_H__ #include #include - -// 宏定义类型 -typedef enum { - MACRO_OBJECT, // 对象宏 - MACRO_FUNCTION, // 函数宏 -} macro_type_t; - -typedef VEC(cstring_t) macro_list_t; - -// 宏定义结构 -typedef struct smcc_macro { - cstring_t name; // 宏名称 - macro_type_t type; // 宏类型 - macro_list_t replaces; // 替换列表 - macro_list_t params; // 参数列表(仅函数宏) -} smcc_macro_t; +#include // 条件编译状态 typedef enum { @@ -41,12 +25,12 @@ typedef struct if_stack_item { } if_stack_item_t; // 预处理器状态结构 -typedef struct smcc_preprocessor { - core_stream_t *stream; // 输出流 - strpool_t strpool; // 字符串池 - hashmap_t macros; // 宏定义表 - VEC(if_stack_item_t) if_stack; // 条件编译栈 -} smcc_pp_t; +typedef struct scc_pproc { + scc_probe_stream_t *stream; // 输出流 + scc_strpool_t strpool; // 字符串池 + scc_macro_table_t macro_table; + SCC_VEC(if_stack_item_t) if_stack; // 条件编译栈 +} scc_pproc_t; /** * @brief 初始化预处理器 @@ -54,19 +38,25 @@ typedef struct smcc_preprocessor { * @param[in] input 输入流对象指针 * @return output 输出流对象指针 */ -core_stream_t *pp_init(smcc_pp_t *pp, core_stream_t *input); - -/** - * @brief 执行预处理 - * @param[in] pp 预处理器实例 - * @return 处理结果 - */ -int pp_process(smcc_pp_t *pp); +// TODO 内存释放问题 +scc_probe_stream_t *scc_pproc_init(scc_pproc_t *pp, scc_probe_stream_t *input); /** * @brief 销毁预处理器 * @param[in] pp 预处理器实例 */ -void pp_drop(smcc_pp_t *pp); +void scc_pproc_drop(scc_pproc_t *pp); -#endif /* __SMCC_PP_H__ */ +/// inner private struct + +typedef SCC_VEC(u8) scc_pp_buffer_t; +typedef struct pp_stream { + scc_probe_stream_t stream; + scc_probe_stream_t *input; + scc_pproc_t *self; + + scc_pos_t pos; + scc_probe_stream_t *tmp_stream; +} scc_pp_stream_t; + +#endif /* __SMC_PP_H__ */ diff --git a/libs/pprocessor/src/macro.c b/libs/pprocessor/src/macro.c new file mode 100644 index 0000000..705c3b8 --- /dev/null +++ b/libs/pprocessor/src/macro.c @@ -0,0 +1,207 @@ +#include + +// 创建宏对象 +scc_pp_macro_t *scc_pp_macro_create(const scc_cstring_t *name, + scc_pp_macro_type_t type) { + scc_pp_macro_t *macro = scc_malloc(sizeof(scc_pp_macro_t)); + if (!macro) { + LOG_ERROR("Failed to allocate memory for macro"); + return null; + } + + macro->name = scc_cstring_from_cstr(scc_cstring_as_cstr(name)); + macro->type = type; + scc_vec_init(macro->params); + scc_vec_init(macro->replaces); + + return macro; +} + +// 销毁宏对象 +void scc_pp_macro_drop(scc_pp_macro_t *macro) { + if (!macro) + return; + + scc_cstring_free(¯o->name); + + // 释放参数列表 + for (usize i = 0; i < macro->params.size; ++i) { + scc_cstring_free(&scc_vec_at(macro->params, i)); + } + scc_vec_free(macro->params); + + // 释放替换列表 + for (usize i = 0; i < macro->replaces.size; ++i) { + scc_cstring_free(&scc_vec_at(macro->replaces, i)); + } + scc_vec_free(macro->replaces); + + scc_free(macro); +} + +// // 压缩空白字符 +// scc_cstring_t scc_pp_compress_whitespace(const scc_pp_macro_list_t *tokens) { +// scc_cstring_t combined = scc_cstring_new(); +// cbool last_was_space = false; + +// for (usize i = 0; i < tokens->size; ++i) { +// scc_cstring_t *token = &scc_vec_at(*tokens, i); +// const char *str = scc_cstring_as_cstr(token); +// usize len = scc_cstring_len(token); + +// for (usize j = 0; j < len; ++j) { +// char ch = str[j]; +// if (ch == ' ' || ch == '\t') { +// if (!last_was_space && !scc_cstring_is_empty(&combined)) { +// scc_cstring_append_ch(&combined, ' '); +// last_was_space = true; +// } +// } else { +// scc_cstring_append_ch(&combined, ch); +// last_was_space = false; +// } +// } + +// // 在 token 之间添加一个空格(除非已经是空格) +// if (i + 1 < tokens->size && !last_was_space && +// !scc_cstring_is_empty(&combined)) { +// scc_cstring_append_ch(&combined, ' '); +// last_was_space = true; +// } +// } + +// // 去除尾随空格 +// while (!scc_cstring_is_empty(&combined) && +// (combined.data[combined.size - 1] == ' ' || +// combined.data[combined.size - 1] == '\t')) { +// combined.size--; +// } + +// return combined; +// } + +// 添加对象宏 +cbool scc_pp_add_object_macro(scc_macro_table_t *macros, + const scc_cstring_t *name, + const scc_pp_macro_list_t *replacement) { + if (!macros || !name || !replacement) + return false; + + scc_pp_macro_t *macro = scc_pp_macro_create(name, SCC_PP_MACRO_OBJECT); + if (!macro) + return false; + + // if (replacement->size > 0) { + // scc_cstring_t combined = scc_pp_compress_whitespace(replacement); + // scc_vec_push(macro->replaces, combined); + // // 释放原始 tokens + // for (usize i = 0; i < replacement->size; ++i) { + // scc_cstring_free(&scc_vec_at(*replacement, i)); + // } + // } + macro->replaces = *replacement; + + // 检查是否已存在同名宏 + scc_pp_macro_t *existing = scc_hashtable_get(¯os->table, ¯o->name); + if (existing) { + LOG_WARN("Redefining macro: %s", scc_cstring_as_cstr(¯o->name)); + scc_pp_macro_drop(existing); + } + + scc_hashtable_set(¯os->table, ¯o->name, macro); + return true; +} + +// 添加函数宏 +cbool scc_pp_add_function_macro(scc_macro_table_t *macros, + const scc_cstring_t *name, + const scc_pp_macro_list_t *params, + const scc_pp_macro_list_t *replacement) { + if (!macros || !name || !params || !replacement) + return false; + + scc_pp_macro_t *macro = scc_pp_macro_create(name, SCC_PP_MACRO_FUNCTION); + if (!macro) + return false; + + // 复制参数列表 + macro->params = *params; + macro->replaces = *replacement; + + // if (replacement->size > 0) { + // // 函数宏直接存储替换文本 + // scc_cstring_t combined = scc_pp_compress_whitespace(replacement); + // scc_vec_push(macro->replaces, combined); + + // // 释放原始 tokens + // for (usize i = 0; i < replacement->size; ++i) { + // scc_cstring_free(&scc_vec_at(*replacement, i)); + // } + // } + + // 检查是否已存在同名宏 + scc_pp_macro_t *existing = scc_hashtable_get(¯os->table, ¯o->name); + if (existing) { + LOG_WARN("Redefining macro: %s", scc_cstring_as_cstr(¯o->name)); + scc_pp_macro_drop(existing); + } + + scc_hashtable_set(¯os->table, ¯o->name, macro); + return true; +} + +// 查找宏定义 +scc_pp_macro_t *scc_pp_find_macro(scc_macro_table_t *pp, scc_cstring_t *name) { + return scc_hashtable_get(&pp->table, name); +} + +// 从预处理器中删除宏 +cbool scc_pp_remove_macro(scc_macro_table_t *pp, const scc_cstring_t *name) { + if (!pp || !name) + return false; + + scc_pp_macro_t *macro = scc_hashtable_get(&pp->table, name); + if (!macro) + return false; + + scc_hashtable_del(&pp->table, name); + scc_pp_macro_drop(macro); + return true; +} + +/// marco_table + +static u32 hash_func(const void *key) { + const scc_cstring_t *string = (const scc_cstring_t *)key; + return scc_strhash32(scc_cstring_as_cstr(string)); +} + +static int hash_cmp(const void *key1, const void *key2) { + const scc_cstring_t *str1 = (const scc_cstring_t *)key1; + const scc_cstring_t *str2 = (const scc_cstring_t *)key2; + + if (str1->size != str2->size) { + return str1->size - str2->size; + } + return scc_strcmp(scc_cstring_as_cstr(str1), scc_cstring_as_cstr(str2)); +} + +void scc_pp_marco_table_init(scc_macro_table_t *macros) { + Assert(macros != null); + macros->table.hash_func = hash_func; + macros->table.key_cmp = hash_cmp; + scc_hashtable_init(¯os->table); +} + +static int macro_free(const void *key, void *value, void *context) { + (void)key; + (void)context; + scc_pp_macro_drop(value); + return 0; +} + +void scc_pp_macro_table_drop(scc_macro_table_t *macros) { + Assert(macros != null); + scc_hashtable_foreach(¯os->table, macro_free, null); + scc_hashtable_drop(¯os->table); +} diff --git a/libs/pprocessor/src/parse.c b/libs/pprocessor/src/parse.c new file mode 100644 index 0000000..3a77d73 --- /dev/null +++ b/libs/pprocessor/src/parse.c @@ -0,0 +1,411 @@ +#include +#include +#include +#include + +static const struct { + const char *name; + scc_pp_token_t tok; +} keywords[] = { +#define X(name, type, tok) {#name, tok}, + SCC_PP_INST_TOKEN +#undef X +}; + +// 使用二分查找查找关键字 +static inline int keyword_cmp(const char *name, int len) { + int low = 0; + int high = sizeof(keywords) / sizeof(keywords[0]) - 1; + while (low <= high) { + int mid = (low + high) / 2; + const char *key = keywords[mid].name; + int cmp = 0; + + // 自定义字符串比较逻辑 + for (int i = 0; i < len; i++) { + if (name[i] != key[i]) { + cmp = (unsigned char)name[i] - (unsigned char)key[i]; + break; + } + if (name[i] == '\0') + break; // 遇到终止符提前结束 + } + + if (cmp == 0) { + // 完全匹配检查(长度相同) + if (key[len] == '\0') + return mid; + cmp = -1; // 当前关键词比输入长 + } + + if (cmp < 0) { + high = mid - 1; + } else { + low = mid + 1; + } + } + return -1; // Not a keyword. +} + +static inline void try_to_cut_list(scc_pp_macro_list_t *list, + scc_cstring_t *buff) { + if (scc_cstring_len(buff) != 0) { + scc_vec_push(*list, *buff); + *buff = scc_cstring_new(); + } +} + +static cbool parse_macro_replace_list(scc_probe_stream_t *stream, + scc_pp_macro_list_t *list) { + Assert(stream != null && list != null); + scc_probe_stream_reset(stream); + + scc_vec_init(*list); + scc_cstring_t replacement = scc_cstring_new(); + int ch; + scc_pos_t pos = scc_pos_init(); + + while ((ch = scc_probe_stream_peek(stream)) != scc_stream_eof) { + if (scc_lex_parse_is_endline(ch)) { + break; + } + if (scc_lex_parse_is_identifier_prefix(ch)) { + try_to_cut_list(list, &replacement); + cbool ret = scc_lex_parse_identifier(stream, &pos, &replacement); + Assert(ret == true); + try_to_cut_list(list, &replacement); + } else if (ch == '#') { + // TODO for # ## + scc_probe_stream_consume(stream); + try_to_cut_list(list, &replacement); + } else if (scc_lex_parse_is_whitespace(ch)) { + scc_probe_stream_consume(stream); + try_to_cut_list(list, &replacement); + } else { + scc_probe_stream_consume(stream); + scc_cstring_append_ch(&replacement, (char)ch); + } + } + + if (scc_cstring_len(&replacement) != 0) { + scc_vec_push(*list, replacement); + replacement = scc_cstring_new(); + } + + // for (usize i = 0; i < list->size; ++i) { + // LOG_DEBUG("list %d: %s", (int)i, + // scc_cstring_as_cstr(&scc_vec_at(*list, i))); + // } + return true; +} + +// 解析宏参数列表 +static cbool parse_macro_arguments(scc_probe_stream_t *stream, + scc_pp_macro_list_t *args) { + Assert(stream != null && args != null); + + scc_vec_init(*args); + int ch; + scc_probe_stream_reset(stream); + + // 跳过 '(' + ch = scc_probe_stream_peek(stream); + if (ch != '(') { + return false; + } + scc_probe_stream_consume(stream); // 消费 '(' + + int paren_depth = 1; + scc_cstring_t current_arg = scc_cstring_new(); + scc_pos_t pos = scc_pos_init(); + + while (paren_depth > 0) { + ch = scc_probe_stream_peek(stream); + if (ch == scc_stream_eof) { + scc_cstring_free(¤t_arg); + scc_cstring_free(&pos.name); + return false; + } + + if (ch == '(') { + paren_depth++; + scc_cstring_append_ch(¤t_arg, (char)ch); + scc_probe_stream_consume(stream); + } else if (ch == ')') { + paren_depth--; + if (paren_depth > 0) { + scc_cstring_append_ch(¤t_arg, (char)ch); + } + scc_probe_stream_consume(stream); + } else if (ch == ',' && paren_depth == 1) { + // 参数分隔符 + scc_vec_push(*args, current_arg); + current_arg = scc_cstring_new(); + scc_probe_stream_consume(stream); + // 跳过参数后的空白 + scc_lex_parse_skip_whitespace(stream, &pos); + } else { + scc_cstring_append_ch(¤t_arg, (char)ch); + scc_probe_stream_consume(stream); + } + } + + // 添加最后一个参数 + if (!scc_cstring_is_empty(¤t_arg)) { + scc_vec_push(*args, current_arg); + } else { + scc_cstring_free(¤t_arg); + } + + scc_cstring_free(&pos.name); + return true; +} + +static cbool safe_skip_backspace_if_endline(scc_probe_stream_t *stream, + scc_pos_t *pos) { + scc_probe_stream_reset(stream); + int ch = scc_probe_stream_peek(stream); + // FIXME maybe it not correct + while (ch == '\r' || ch == '\n' || ch == ' ' || ch == '\t') { + if (scc_lex_parse_is_endline(ch)) { + scc_lex_parse_skip_endline(stream, pos); + return true; + } + scc_probe_stream_consume(stream); + ch = scc_probe_stream_peek(stream); + } + scc_probe_stream_reset(stream); + return false; +} + +void scc_pp_parse_directive(scc_probe_stream_t *stream, scc_pos_t *pos, + scc_macro_table_t *macros) { + Assert(stream != null); + + scc_probe_stream_reset(stream); + // 跳过 '#' 和后续空白 + if (scc_probe_stream_peek(stream) != '#') { + LOG_WARN("Invalid directive"); + return; + } + scc_pos_next(pos); + scc_probe_stream_consume(stream); + if (safe_skip_backspace_if_endline(stream, pos)) + return; + + // 解析指令名称 + scc_cstring_t directive = scc_cstring_new(); + if (!scc_lex_parse_identifier(stream, pos, &directive)) { + goto ERR; + } + if (safe_skip_backspace_if_endline(stream, pos)) + goto FREE; + + scc_pp_token_t token = keyword_cmp(scc_cstring_as_cstr(&directive), + scc_cstring_len(&directive)); + + scc_cstring_t name = scc_cstring_new(); + switch (token) { + case SCC_PP_TOK_DEFINE: { + if (!scc_lex_parse_identifier(stream, pos, &name)) { + scc_cstring_free(&name); + goto ERR; + } + + // 检查是否是函数宏:宏名后是否直接跟着 '('(没有空白字符) + scc_probe_stream_reset(stream); + int ch = scc_probe_stream_peek(stream); + cbool has_whitespace = scc_lex_parse_is_whitespace(ch); + if (has_whitespace && safe_skip_backspace_if_endline(stream, pos)) { + goto FREE; + } + + if (!has_whitespace && ch == '(') { + // 函数宏 + scc_pp_macro_list_t params; + if (!parse_macro_arguments(stream, ¶ms)) { + goto ERR; + } + + ch = scc_probe_stream_peek(stream); + if (ch == ')') { + scc_probe_stream_consume(stream); // 消费 ')' + } + if (safe_skip_backspace_if_endline(stream, pos)) { + goto FREE; + } + + scc_pp_macro_list_t replacement; + parse_macro_replace_list(stream, &replacement); + scc_pp_add_function_macro(macros, &name, ¶ms, &replacement); + } else { + // 对象宏 + scc_pp_macro_list_t replacement; + parse_macro_replace_list(stream, &replacement); + scc_pp_add_object_macro(macros, &name, &replacement); + } + scc_cstring_free(&name); + break; + } + case SCC_PP_TOK_UNDEF: { + if (scc_lex_parse_identifier(stream, pos, &name)) { + // TODO ret value + scc_pp_remove_macro(macros, &name); + } + break; + } + case SCC_PP_TOK_INCLUDE: + case SCC_PP_TOK_IF: + case SCC_PP_TOK_IFDEF: + case SCC_PP_TOK_IFNDEF: + case SCC_PP_TOK_ELSE: + case SCC_PP_TOK_ELIF: + case SCC_PP_TOK_ELIFDEF: + case SCC_PP_TOK_ELIFNDEF: + case SCC_PP_TOK_ENDIF: + case SCC_PP_TOK_LINE: + case SCC_PP_TOK_EMBED: + case SCC_PP_TOK_ERROR: + case SCC_PP_TOK_WARNING: + case SCC_PP_TOK_PRAMA: + // 暂时跳过这一行 + TODO(); + scc_lex_parse_skip_line(stream, pos); + break; + default: + LOG_WARN("Unknown preprocessor directive: %s", + scc_cstring_as_cstr(&directive)); + scc_lex_parse_skip_line(stream, pos); + } +ERR: + scc_lex_parse_skip_line(stream, pos); +FREE: + scc_cstring_free(&directive); + scc_cstring_free(&name); +} + +// for # ## to generator string +cbool scc_pp_expand_string() { return false; } + +// 展开对象宏 +cbool scc_pp_expand_object_macro(scc_pp_macro_t *macro, + scc_cstring_t *out_buff) { + Assert(macro->type == SCC_PP_MACRO_OBJECT && macro->params.size == 0); + // FIXME hack cstring to init and clean + scc_cstring_free(out_buff); + // 对象宏输出替换文本并进行递归展开 + for (usize i = 0; i < macro->replaces.size; ++i) { + scc_cstring_append(out_buff, &scc_vec_at(macro->replaces, i)); + // YOU MUST USE + 1 to cmp because we use unsigned integer + if (i + 1 < macro->replaces.size) { + scc_cstring_append_ch(out_buff, ' '); + } + } + return true; +} + +// 展开函数宏 +cbool scc_pp_expand_function_macro(scc_pp_macro_t *macro, + scc_pp_macro_list_t *params, + scc_cstring_t *out_buff) { + Assert(macro->type == SCC_PP_MACRO_FUNCTION); + Assert(out_buff != null); + // FIXME hack cstring to init and clean + scc_cstring_free(out_buff); + for (usize i = 0; i < macro->replaces.size; ++i) { + // TODO ... __VA_ARGS__ + for (usize j = 0; j < macro->params.size; ++j) { + if (scc_strcmp( + scc_cstring_as_cstr(&scc_vec_at(macro->replaces, i)), + scc_cstring_as_cstr(&scc_vec_at(macro->params, j))) == 0) { + scc_cstring_append(out_buff, &scc_vec_at(*params, j)); + goto MATCH; + } + } + scc_cstring_append(out_buff, &scc_vec_at(macro->replaces, i)); + MATCH: + // YOU MUST USE + 1 to cmp because we use unsigned + if (i + 1 < macro->replaces.size) { + scc_cstring_append_ch(out_buff, ' '); + } + } + return true; +} + +cbool scc_pp_expand_macro(scc_probe_stream_t *stream, scc_macro_table_t *macros, + scc_probe_stream_t **out_stream, int depth) { + // TODO self position and it maybe is a stack on #include ? + // 递归扫描 + if (depth <= 0) { + return false; + } + Assert(stream != null && macros != null && out_stream != null); + + scc_cstring_t identifier = scc_cstring_new(); + scc_pos_t pos = scc_pos_init(); + + cbool ret; + ret = scc_lex_parse_identifier(stream, &pos, &identifier); + Assert(ret == true); + + scc_pp_macro_t *macro = scc_pp_find_macro(macros, &identifier); + if (macro == null) { + // 不是宏,直接输出标识符 + *out_stream = + scc_mem_probe_stream_new(scc_cstring_as_cstr(&identifier), + scc_cstring_len(&identifier), false); + return true; + } else { + scc_cstring_free(&identifier); + } + + // 根据宏类型展开 + scc_cstring_t tmp_buff = scc_cstring_new(); + if (macro->type == SCC_PP_MACRO_OBJECT) { + cbool ret = scc_pp_expand_object_macro(macro, &tmp_buff); + Assert(ret == true); + } else if (macro->type == SCC_PP_MACRO_FUNCTION) { + // FIXME 是否需要忽略空白字符? + scc_lex_parse_skip_whitespace(stream, &pos); + if (scc_probe_stream_peek(stream) != '(') { + LOG_ERROR("Not a function and skip it"); + goto ERR; + } + scc_pp_macro_list_t params; + ret = parse_macro_arguments(stream, ¶ms); + Assert(ret == true); + scc_pp_expand_function_macro(macro, ¶ms, &tmp_buff); + Assert(ret == true); + } + + scc_probe_stream_t *tmp_stream = scc_mem_probe_stream_new( + scc_cstring_as_cstr(&tmp_buff), scc_cstring_len(&tmp_buff), false); + + int ch; + scc_cstring_t real_buff = scc_cstring_new(); + while ((ch = scc_probe_stream_peek(tmp_stream)) != scc_stream_eof) { + if (scc_lex_parse_is_identifier_prefix(ch)) { + scc_probe_stream_t *tmp_out_stream; + scc_pp_expand_macro(tmp_stream, macros, &tmp_out_stream, depth - 1); + // scc_cstring_append_cstr(); + Assert(tmp_out_stream != null); + while (scc_probe_stream_peek(tmp_out_stream) != scc_stream_eof) { + scc_cstring_append_ch(&real_buff, + scc_probe_stream_consume(tmp_out_stream)); + } + Assert(tmp_out_stream != null && tmp_out_stream->drop != null); + scc_probe_stream_drop(tmp_out_stream); + } else { + scc_cstring_append_ch(&real_buff, + scc_probe_stream_consume(tmp_stream)); + } + } + scc_cstring_free(&tmp_buff); + scc_probe_stream_drop(tmp_stream); + *out_stream = scc_mem_probe_stream_new(scc_cstring_as_cstr(&real_buff), + scc_cstring_len(&real_buff), false); + return true; +ERR: + *out_stream = null; + return false; +} diff --git a/libs/pprocessor/src/pprocessor.c b/libs/pprocessor/src/pprocessor.c index 72ab98f..afb12b9 100644 --- a/libs/pprocessor/src/pprocessor.c +++ b/libs/pprocessor/src/pprocessor.c @@ -4,424 +4,109 @@ */ #include +#include +#include #include #include -#define PPROCESSER_BUFFER_SIZE (1024) -static u32 hash_func(cstring_t *string) { - return smcc_strhash32(cstring_as_cstr(string)); -} +#define MAX_MACRO_EXPANSION_DEPTH 64 // 防止无限递归的最大展开深度 -static int hash_cmp(const cstring_t *str1, const cstring_t *str2) { - if (str1->size != str2->size) { - return str1->size - str2->size; - } - - return smcc_strcmp(cstring_as_cstr(str1), cstring_as_cstr(str2)); -} - -// 添加宏定义 -static void add_macro(smcc_pp_t *pp, const cstring_t *name, - const macro_list_t *replaces, const macro_list_t *params, - macro_type_t type) { - smcc_macro_t *macro = smcc_malloc(sizeof(smcc_macro_t)); - macro->name = *name; - macro->type = type; - - if (replaces) { - macro->replaces = *replaces; - } else { - vec_init(macro->replaces); - } - - if (params) { - macro->params = *params; - } else { - vec_init(macro->params); - } - - hashmap_set(&pp->macros, ¯o->name, macro); -} - -// 查找宏定义 -static smcc_macro_t *find_macro(smcc_pp_t *pp, cstring_t *name) { - return hashmap_get(&pp->macros, name); -} - -// 条件编译处理框架 -static void handle_if(smcc_pp_t *pp, const char *condition) { - if_stack_item_t item; - int cond_value; - // cond_value = evaluate_condition(pp, condition); - - item.state = cond_value ? IFState_TRUE : IFState_FALSE; - item.skip = !cond_value; - vec_push(pp->if_stack, item); -} - -static void handle_else(smcc_pp_t *pp) { - if (pp->if_stack.size == 0) { - // 错误:没有匹配的#if - return; - } - - if_stack_item_t *top = &vec_at(pp->if_stack, pp->if_stack.size - 1); - if (top->state == IFState_ELSE) { - // 错误:#else重复出现 - return; - } - - top->skip = !top->skip; - top->state = IFState_ELSE; -} - -static void handle_include(smcc_pp_t *pp, const char *filename, - int system_header) { - // 查找文件路径逻辑 - // 创建新的输入流 - // 递归处理包含文件 -} - -// 解析标识符 -static cstring_t parse_identifier(core_stream_t *stream) { - cstring_t identifier = cstring_new(); - core_stream_reset_char(stream); - int ch = core_stream_peek_char(stream); - - // 标识符以字母或下划线开头 - if (!((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '_')) { - LOG_WARN("Invalid identifier"); - return identifier; - } - do { - cstring_push(&identifier, (char)ch); - core_stream_next_char(stream); // 消费字符 - ch = core_stream_peek_char(stream); - } while ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || - (ch >= '0' && ch <= '9') || ch == '_'); - - return identifier; -} - -// 跳过空白字符 ' ' and '\t' -static void skip_whitespace(core_stream_t *stream) { - int ch; - core_stream_reset_char(stream); - while ((ch = core_stream_peek_char(stream)) != core_stream_eof) { - if (ch == ' ' || ch == '\t') { - core_stream_next_char(stream); - } else { - break; - } - } -} - -#define X(name, type, tok) SMCC_STR(name), -static const char *token_strings[] = {PP_INST_TOKEN}; -#undef X - -static const struct { - const char *name; - pp_token_t tok; -} keywords[] = { -#define X(name, type, tok) {#name, tok}, - PP_INST_TOKEN -#undef X -}; - -// by using binary search to find the keyword -static inline int keyword_cmp(const char *name, int len) { - int low = 0; - int high = sizeof(keywords) / sizeof(keywords[0]) - 1; - while (low <= high) { - int mid = (low + high) / 2; - const char *key = keywords[mid].name; - int cmp = 0; - - // 自定义字符串比较逻辑 - for (int i = 0; i < len; i++) { - if (name[i] != key[i]) { - cmp = (unsigned char)name[i] - (unsigned char)key[i]; - break; - } - if (name[i] == '\0') - break; // 遇到终止符提前结束 - } - - if (cmp == 0) { - // 完全匹配检查(长度相同) - if (key[len] == '\0') - return mid; - cmp = -1; // 当前关键词比输入长 - } - - if (cmp < 0) { - high = mid - 1; - } else { - low = mid + 1; - } - } - return -1; // Not a keyword. -} - -typedef struct pp_stream { - core_stream_t stream; - core_stream_t *input; - smcc_pp_t *self; - - usize size; - usize pos; - char buffer[PPROCESSER_BUFFER_SIZE]; -} pp_stream_t; - -static cbool parse_list(pp_stream_t *_stream, macro_list_t *list, - cbool is_param) { - Assert(_stream != null); - core_stream_t *stream = _stream->input; +static int pp_stream_read_char(scc_probe_stream_t *_stream) { + scc_pp_stream_t *stream = (scc_pp_stream_t *)_stream; Assert(stream != null); - core_stream_reset_char(stream); - - vec_init(*list); int ch; - cstring_t str = cstring_new(); - core_pos_t pos; - while ((ch = core_stream_peek_char(stream)) != core_stream_eof) { - if (is_param) { - // ( 参数 ) ( 参数, ... ) ( ... ) - if (lex_parse_is_whitespace(ch)) { - // TODO #define ( A A , B ) need ERROR - lex_parse_skip_whitespace(stream, &pos); - core_stream_reset_char(stream); - } else if (ch == ',') { - vec_push(*list, str); - str = cstring_new(); - core_stream_next_char(stream); - continue; - } else if (ch == ')') { - break; - } else if (ch == core_stream_eof || lex_parse_is_endline(ch)) { - LOG_ERROR("Invalid parameter list"); - return false; - } - } else { - // 替换列表 - if (lex_parse_is_whitespace(ch)) { - lex_parse_skip_whitespace(stream, &pos); - vec_push(*list, str); - str = cstring_new(); - core_stream_reset_char(stream); - continue; - } else if (lex_parse_is_endline(ch)) { - break; - } - } - core_stream_next_char(stream); - cstring_push(&str, (char)ch); - } - vec_push(*list, str); - str = cstring_new(); - return true; -} - -// 解析预处理指令 -static void parse_directive(pp_stream_t *_stream) { - Assert(_stream != null); - core_stream_t *stream = _stream->input; - Assert(stream != null); - - int ch; - core_pos_t pos; - core_stream_reset_char(stream); - // 跳过 '#' 和后续空白 - if (core_stream_peek_char(stream) != '#') { - LOG_WARN("Invalid directive"); - return; - } - core_stream_next_char(stream); - - // TODO 允许空指令(# 后跟换行符),且无任何效果。 - skip_whitespace(stream); - // 解析指令名称 - cstring_t directive = parse_identifier(stream); - if (cstring_is_empty(&directive)) { - LOG_ERROR("expected indentifier"); - goto ERR; - } - skip_whitespace(stream); - core_stream_reset_char(stream); - - pp_token_t token = - keyword_cmp(cstring_as_cstr(&directive), cstring_len(&directive)); - switch (token) { - case PP_TOK_DEFINE: { - cstring_t name = parse_identifier(stream); - if (cstring_is_empty(&name)) { - LOG_ERROR("expected indentifier"); - goto ERR; - } - skip_whitespace(stream); - core_stream_reset_char(stream); - - int ch = core_stream_peek_char(stream); - if (ch == '(') { - macro_list_t params; - parse_list(_stream, ¶ms, true); - ch = core_stream_next_char(stream); - if (ch != ')') { - } - goto ERR; - } - macro_list_t replacement; - parse_list(_stream, &replacement, false); - add_macro(_stream->self, &name, &replacement, NULL, MACRO_OBJECT); - break; - } - case PP_TOK_UNDEF: - case PP_TOK_INCLUDE: - case PP_TOK_IF: - case PP_TOK_IFDEF: - case PP_TOK_IFNDEF: - case PP_TOK_ELSE: - case PP_TOK_ELIF: - case PP_TOK_ELIFDEF: - case PP_TOK_ELIFNDEF: - case PP_TOK_ENDIF: - case PP_TOK_LINE: - case PP_TOK_EMBED: - case PP_TOK_ERROR: - case PP_TOK_WARNING: - case PP_TOK_PRAMA: - TODO(); - break; - default: - LOG_WARN("Unknown preprocessor directive: %s", - cstring_as_cstr(&directive)); - } - - // TODO: win \r\n linux \n mac \r => all need transport to \n - core_stream_reset_char(stream); - lex_parse_skip_line(stream, &pos); - - cstring_free(&directive); - return; -ERR: - // TODO skip line - LOG_FATAL("Unhandled preprocessor directive"); -} - -static inline void stream_push_string(pp_stream_t *stream, cstring_t *str) { - stream->size += cstring_len(str); - Assert(stream->size <= PPROCESSER_BUFFER_SIZE); - smcc_memcpy(stream->buffer, cstring_as_cstr(str), stream->size); -} - -static inline void stream_push_char(pp_stream_t *stream, int ch) { - stream->buffer[stream->size++] = ch; - Assert(stream->size <= PPROCESSER_BUFFER_SIZE); -} - -static int next_char(core_stream_t *_stream) { - pp_stream_t *stream = (pp_stream_t *)_stream; - Assert(stream != null); READ_BUF: - if (stream->size != 0) { - if (stream->pos < stream->size) { - return stream->buffer[stream->pos++]; - } else { - stream->size = 0; - stream->pos = 0; - } + if (stream->tmp_stream != null && + (ch = scc_probe_stream_consume(stream->tmp_stream)) != scc_stream_eof) { + return ch; } RETRY: - core_stream_reset_char(stream->input); - int ch = core_stream_peek_char(stream->input); + scc_probe_stream_reset(stream->input); + ch = scc_probe_stream_peek(stream->input); + if (ch == '#') { - parse_directive(stream); + scc_pp_parse_directive(stream->input, &stream->pos, + &stream->self->macro_table); goto RETRY; - } else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || - ch == '_') { - cstring_t identifier = parse_identifier(stream->input); - smcc_macro_t *macro = find_macro(stream->self, &identifier); - if (macro == null) { - stream_push_string(stream, &identifier); - cstring_free(&identifier); - goto READ_BUF; - } else { - cstring_free(&identifier); - } - if (macro->type == MACRO_OBJECT) { - for (usize i = 0; i < macro->replaces.size; ++i) { - stream_push_string(stream, &vec_at(macro->replaces, i)); - // usize never using `-` - if (i + 1 < macro->replaces.size) - stream_push_char(stream, ' '); - } - goto READ_BUF; - } else if (macro->type == MACRO_FUNCTION) { - TODO(); - } - UNREACHABLE(); + } else if (scc_lex_parse_is_identifier_prefix(ch)) { + scc_pp_expand_macro(stream->input, &stream->self->macro_table, + &stream->tmp_stream, MAX_MACRO_EXPANSION_DEPTH); + goto READ_BUF; } - return core_stream_next_char(stream->input); + + // 非标识符字符,直接消费并返回 + return scc_probe_stream_consume(stream->input); } -static core_stream_t *pp_stream_init(smcc_pp_t *self, core_stream_t *input) { - pp_stream_t *stream = smcc_malloc(sizeof(pp_stream_t)); +static void pp_stream_drop(scc_probe_stream_t *_stream) { + scc_pp_stream_t *stream = (scc_pp_stream_t *)_stream; + Assert(stream != null); + scc_cstring_free(&stream->stream.name); + if (stream->tmp_stream) { + scc_probe_stream_drop(stream->tmp_stream); + } + scc_free(_stream); +} + +static scc_probe_stream_t *pp_stream_init(scc_pproc_t *self, + scc_probe_stream_t *input) { + scc_pp_stream_t *stream = scc_malloc(sizeof(scc_pp_stream_t)); if (stream == null) { LOG_FATAL("Failed to allocate memory for output stream"); - } - if (stream == null || self == null) { return null; } + + if (self == null) { + scc_free(stream); + return null; + } + stream->self = self; stream->input = input; - stream->size = 0; - stream->pos = 0; - stream->stream.name = cstring_from_cstr("pipe_stream"); - stream->stream.free_stream = null; - stream->stream.next_char = next_char; - stream->stream.peek_char = null; - stream->stream.reset_char = null; + stream->tmp_stream = null; + stream->pos = scc_pos_init(); + + stream->stream.name = scc_cstring_from_cstr("pp_stream"); + stream->stream.consume = pp_stream_read_char; + stream->stream.peek = null; + stream->stream.next = null; + stream->stream.sync = null; + stream->stream.reset = null; + stream->stream.back = null; stream->stream.read_buf = null; - return (core_stream_t *)stream; + stream->stream.is_at_end = null; + stream->stream.drop = pp_stream_drop; + + return (scc_probe_stream_t *)stream; } -core_stream_t *pp_init(smcc_pp_t *pp, core_stream_t *input) { +scc_probe_stream_t *scc_pproc_init(scc_pproc_t *pp, scc_probe_stream_t *input) { if (pp == null || input == null) { return null; } - core_mem_stream_t *stream = smcc_malloc(sizeof(core_mem_stream_t)); - if (stream == null) { - LOG_FATAL("Failed to allocate memory for output stream"); - } pp->stream = pp_stream_init(pp, input); - Assert(pp->stream != null); + if (pp->stream == null) { + return null; + } + + scc_pp_marco_table_init(&pp->macro_table); - hashmap_init(&pp->macros); - pp->macros.hash_func = (u32 (*)(const void *))hash_func; - pp->macros.key_cmp = (int (*)(const void *, const void *))hash_cmp; return pp->stream; } // 销毁预处理器 -void pp_drop(smcc_pp_t *pp) { - if (pp == NULL) +void scc_pproc_drop(scc_pproc_t *pp) { + if (pp == null) return; - // 清理所有宏定义 - // 注意:需要实现 hashmap 的迭代和清理函数 - hashmap_drop(&pp->macros); + scc_pp_macro_table_drop(&pp->macro_table); - // 清理字符串池 - // strpool_destroy(&pp->strpool); - - // 清理条件编译栈 - // 需要释放栈中每个元素的资源(如果有的话) - // vec_free(pp->if_stack); - - // 清理文件名 - cstring_free(&pp->stream->name); + // 清理流 + if (pp->stream) { + scc_probe_stream_drop(pp->stream); + pp->stream = null; + } } diff --git a/libs/pprocessor/tests/test_pp.c b/libs/pprocessor/tests/test_pp.c index 749a61f..8814a01 100644 --- a/libs/pprocessor/tests/test_pp.c +++ b/libs/pprocessor/tests/test_pp.c @@ -4,7 +4,7 @@ #include #include -static core_stream_t *from_file_stream(FILE *fp) { +static scc_probe_stream_t *from_file_stream(FILE *fp) { if (fseek(fp, 0, SEEK_END) != 0) { perror("fseek failed"); return NULL; @@ -20,9 +20,9 @@ static core_stream_t *from_file_stream(FILE *fp) { usize read_ret = fread(buffer, 1, fsize, fp); fclose(fp); - core_mem_stream_t *mem_stream = malloc(sizeof(core_mem_stream_t)); - core_stream_t *stream = - core_mem_stream_init(mem_stream, buffer, fsize, true); + scc_mem_probe_stream_t *mem_stream = malloc(sizeof(scc_mem_probe_stream_t)); + scc_probe_stream_t *stream = + scc_mem_probe_stream_init(mem_stream, buffer, fsize, true); return stream; } @@ -37,28 +37,34 @@ static void test_file(const char *name) { FILE *fexpect = fopen(expected_fname, "r"); assert(fexpect != NULL); - smcc_pp_t pp; - core_mem_stream_t stream; - core_stream_t *output_stream = pp_init(&pp, from_file_stream(fsrc)); - core_stream_t *expect_stream = from_file_stream(fexpect); + scc_pproc_t pp; + scc_mem_probe_stream_t stream; + scc_probe_stream_t *output_stream = + scc_pproc_init(&pp, from_file_stream(fsrc)); + scc_probe_stream_t *expect_stream = from_file_stream(fexpect); + TEST_CASE(src_fname); while (1) { - int output_ch = core_stream_next_char(output_stream); - int expect_ch = core_stream_next_char(expect_stream); + int output_ch = scc_probe_stream_consume(output_stream); + int expect_ch = scc_probe_stream_consume(expect_stream); TEST_CHECK(output_ch == expect_ch); - TEST_MSG("output: %c, expect: %c", output_ch, expect_ch); - if (output_ch == core_stream_eof) { + TEST_MSG("output: %c %x, expect: %c %x", output_ch, output_ch, + expect_ch, expect_ch); + if (output_ch != expect_ch) { + break; + } + if (output_ch == scc_stream_eof) { break; } } - pp_drop(&pp); + scc_pproc_drop(&pp); } static void test_basic(void) { char name[32]; - // for (int i = 1; i <= 22; ++i) { - // snprintf(name, sizeof(name), "%02d", i); - // test_file(name); - // } + for (int i = 1; i <= 22; ++i) { + // snprintf(name, sizeof(name), "%02d", i); + // test_file(name); + } } TEST_LIST = { diff --git a/libs/pprocessor/tests/test_run.c b/libs/pprocessor/tests/test_run.c index f32ce4b..3392ad3 100644 --- a/libs/pprocessor/tests/test_run.c +++ b/libs/pprocessor/tests/test_run.c @@ -2,19 +2,19 @@ #include int main(void) { - smcc_pp_t pp; - core_mem_stream_t input; - core_stream_t *output; + scc_pproc_t pp; + scc_mem_probe_stream_t input; + scc_probe_stream_t *output; const char buf[] = "#define A 123 \"asd\"\nA A A\n"; - output = - pp_init(&pp, core_mem_stream_init(&input, buf, sizeof(buf) - 1, false)); + output = scc_pproc_init( + &pp, scc_mem_probe_stream_init(&input, buf, sizeof(buf) - 1, false)); int ch = 0; while (1) { - ch = core_stream_next_char(output); - if (ch == core_stream_eof) { + ch = scc_probe_stream_consume(output); + if (ch == scc_stream_eof) { break; } putc(ch, stdout); diff --git a/libs/pprocessor/tests/test_unit.c b/libs/pprocessor/tests/test_unit.c index 0571e1f..b46681d 100644 --- a/libs/pprocessor/tests/test_unit.c +++ b/libs/pprocessor/tests/test_unit.c @@ -3,45 +3,50 @@ #include #include -static cbool process_input(const char *input, cstring_t *output) { - smcc_pp_t pp; - core_mem_stream_t mem_stream; - core_stream_t *output_stream; +static cbool process_input(const char *input, scc_cstring_t *output) { + scc_pproc_t pp; + scc_mem_probe_stream_t mem_stream; + scc_probe_stream_t *output_stream; // 初始化预处理器 - output_stream = pp_init( - &pp, core_mem_stream_init(&mem_stream, input, strlen(input), false)); + output_stream = + scc_pproc_init(&pp, scc_mem_probe_stream_init(&mem_stream, input, + strlen(input), false)); // 获取输出结果 int ch; - *output = cstring_new(); + *output = scc_cstring_new(); while (1) { - ch = core_stream_next_char(output_stream); - if (ch == core_stream_eof) { + ch = scc_probe_stream_consume(output_stream); + if (ch == scc_stream_eof) { break; } - cstring_push(output, (char)ch); + scc_cstring_append_ch(output, (char)ch); } // 清理资源 - pp_drop(&pp); + scc_pproc_drop(&pp); return true; } #define CHECK_PP_OUTPUT_EXACT(input, expect) \ do { \ - cstring_t output; \ + scc_cstring_t output; \ process_input(input, &output); \ assert(output.data != NULL); \ TEST_CHECK(strcmp(output.data, expect) == 0); \ + TEST_MSG("Expected: %s", expect); \ + TEST_MSG("Produced: %s", output.data); \ } while (0) #define CHECK_PP_OUTPUT_CONTAIN(input, expect) \ do { \ - cstring_t output; \ + scc_cstring_t output; \ process_input(input, &output); \ assert(output.data != NULL); \ TEST_CHECK(strstr(output.data, expect) != NULL); \ + TEST_MSG("Expected: %s", expect); \ + TEST_MSG("Produced: %s", output.data); \ } while (0) static void test_define_simple_object_macro(void) { @@ -56,6 +61,15 @@ static void test_define_complex_object_macro(void) { CHECK_PP_OUTPUT_EXACT("#define PI 3.14159\nPI\n", "3.14159\n"); } +static void test_define_object_macro_backspace(void) { + TEST_CASE("object-like macro check backspace"); + CHECK_PP_OUTPUT_EXACT("#define MAX 100\nMAX\n", "100\n"); + CHECK_PP_OUTPUT_EXACT("#define NAME \ttest\r\nNAME\n", "test\n"); + CHECK_PP_OUTPUT_EXACT("#define \tVALUE (100 \t+ 50)\nVALUE\n", + "(100 + 50)\n"); + CHECK_PP_OUTPUT_EXACT("#define \tPI \t 3.14159\nPI\n", "3.14159\n"); +} + static void test_define_function_macro(void) { TEST_CASE("function-like macro"); CHECK_PP_OUTPUT_EXACT("#define ADD(a,b) a + b\nADD(1, 2)\n", "1 + 2\n"); @@ -90,12 +104,41 @@ static void test_define_nested_macros(void) { "((1 + 1) + 1)\n"); } +static void test_conditional_compilation(void) { + TEST_CASE("conditional compilation"); + CHECK_PP_OUTPUT_EXACT("#if 1\ntrue\n#endif\n", "true\n"); + CHECK_PP_OUTPUT_EXACT("#if 0\nfalse\n#endif\n", ""); + CHECK_PP_OUTPUT_EXACT("#define FLAG 1\n#if FLAG\ntrue\n#endif\n", "true\n"); +} + +static void test_error_cases(void) { + TEST_CASE("macro redefinition"); + // 应检测到警告或错误 + // CHECK_PP_OUTPUT_CONTAIN("#define A 1\n#define A 2\n", "warning"); + + TEST_CASE("undefined macro"); + CHECK_PP_OUTPUT_EXACT("UNDEFINED_MACRO\n", "UNDEFINED_MACRO\n"); +} + +static void test_edge_cases(void) { + TEST_CASE("empty macro"); + CHECK_PP_OUTPUT_EXACT("#define EMPTY\nEMPTY\n", "\n"); + + TEST_CASE("macro with only spaces"); + CHECK_PP_OUTPUT_EXACT("#define SPACE \nSPACE\n", "\n"); + + TEST_CASE("deep nesting"); + CHECK_PP_OUTPUT_EXACT("#define A B\n#define B C\n#define C 1\nA\n", "1\n"); +} + +#define TEST_LIST_CASE(func_name) {#func_name, func_name} TEST_LIST = { - {"test_define_simple_object_macro", test_define_simple_object_macro}, - {"test_define_complex_object_macro", test_define_complex_object_macro}, - {"test_define_function_macro", test_define_function_macro}, - {"test_define_stringify_operator", test_define_stringify_operator}, - {"test_define_concat_operator", test_define_concat_operator}, - {"test_define_nested_macros", test_define_nested_macros}, + TEST_LIST_CASE(test_define_simple_object_macro), + TEST_LIST_CASE(test_define_complex_object_macro), + TEST_LIST_CASE(test_define_object_macro_backspace), + TEST_LIST_CASE(test_define_function_macro), + TEST_LIST_CASE(test_define_stringify_operator), + TEST_LIST_CASE(test_define_concat_operator), + TEST_LIST_CASE(test_define_nested_macros), {NULL, NULL}, }; \ No newline at end of file diff --git a/runtime/libcore/include/core_str.h b/runtime/libcore/include/core_str.h index e1c0630..ce2380f 100644 --- a/runtime/libcore/include/core_str.h +++ b/runtime/libcore/include/core_str.h @@ -136,7 +136,13 @@ static inline void scc_cstring_append_ch(scc_cstring_t *str, char ch) { * @return usize 字符串实际长度 */ static inline usize scc_cstring_len(const scc_cstring_t *str) { - return str ? str->size - 1 : 0; + if (str == null) { + return 0; + } + if (str->size == 0) { + return 0; + } + return str->size - 1; } /** diff --git a/runtime/libcore/include/core_stream.h b/runtime/libcore/include/core_stream.h index 85c9327..8ea8a30 100644 --- a/runtime/libcore/include/core_stream.h +++ b/runtime/libcore/include/core_stream.h @@ -104,7 +104,7 @@ typedef struct scc_mem_probe_stream { } scc_mem_probe_stream_t; /** - * @brief 初始化内存探针流 + * @brief 初始化内存探针流(由你负责scc_mem_probe_stream_t的释放) * * @param stream 流结构指针 * @param data 数据指针 @@ -115,6 +115,16 @@ typedef struct scc_mem_probe_stream { scc_probe_stream_t *scc_mem_probe_stream_init(scc_mem_probe_stream_t *stream, const char *data, usize length, cbool need_copy); +/** + * @brief 构造内存探针流(其中drop会自动释放内存) + * + * @param data + * @param length + * @param need_copy + * @return scc_probe_stream_t* + */ +scc_probe_stream_t *scc_mem_probe_stream_new(const char *data, usize length, + cbool need_copy); #endif #endif /* __SMCC_CORE_PROBE_STREAM_H__ */ diff --git a/runtime/libcore/src/stream.c b/runtime/libcore/src/stream.c index ce161db..5950d2e 100644 --- a/runtime/libcore/src/stream.c +++ b/runtime/libcore/src/stream.c @@ -111,7 +111,7 @@ static cbool mem_probe_stream_is_at_end(scc_probe_stream_t *_stream) { return stream->curr_pos >= stream->data_length; } -static void mem_probe_stream_destroy(scc_probe_stream_t *_stream) { +static void mem_probe_stream_drop(scc_probe_stream_t *_stream) { Assert(_stream != null); scc_mem_probe_stream_t *stream = (scc_mem_probe_stream_t *)_stream; @@ -164,9 +164,30 @@ scc_probe_stream_t *scc_mem_probe_stream_init(scc_mem_probe_stream_t *stream, stream->stream.read_buf = mem_probe_stream_read_buf; stream->stream.reset = mem_probe_stream_reset; stream->stream.is_at_end = mem_probe_stream_is_at_end; - stream->stream.drop = mem_probe_stream_destroy; + stream->stream.drop = mem_probe_stream_drop; return (scc_probe_stream_t *)stream; } +static void scc_owned_mem_stream_drop(scc_probe_stream_t *_stream) { + scc_mem_probe_stream_t *stream = (scc_mem_probe_stream_t *)_stream; + mem_probe_stream_drop(_stream); + scc_free(stream); +} + +scc_probe_stream_t *scc_mem_probe_stream_new(const char *data, usize length, + cbool need_copy) { + scc_mem_probe_stream_t *stream = + (scc_mem_probe_stream_t *)scc_malloc(sizeof(scc_mem_probe_stream_t)); + if (stream == null) { + return null; + } + + scc_probe_stream_t *ret = + scc_mem_probe_stream_init(stream, data, length, need_copy); + stream->stream.drop = scc_owned_mem_stream_drop; + Assert(ret != null); + return ret; +} + #endif /* __SCC_CORE_NO_MEM_PROBE_STREAM__ */