From beb0b190260e26faca52c97328798f9d07841bc4 Mon Sep 17 00:00:00 2001 From: zzyxyz <2450266535@qq.com> Date: Mon, 2 Mar 2026 18:04:43 +0800 Subject: [PATCH] =?UTF-8?q?fix(lexer):=20=E5=9C=A8token=E6=8B=B7=E8=B4=9D?= =?UTF-8?q?=E5=87=BD=E6=95=B0=E4=B8=AD=E6=B7=BB=E5=8A=A0=E7=A9=BA=E6=8C=87?= =?UTF-8?q?=E9=92=88=E6=A3=80=E6=9F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 添加了对源指针的空值断言检查,防止在scc_lexer_tok_copy函数中 传入空指针导致崩溃 refactor(pproc): 优化数组到环形缓冲区转换函数的参数命名 修改scc_lexer_array_to_ring函数参数名从array改为move_array, 并在转换后初始化原数组结构,确保资源正确管理 fix(pproc): 修复宏处理中的内存泄漏问题 - 在fill_replacements函数中初始化token结构并正确释放 - 在scc_pproc_parse_function_macro中释放args向量 - 修改token消费逻辑以避免重复消费 - 在指令处理中添加token释放操作 fix(pproc): 改进宏展开过程中的资源管理 - 修复concatenate_tokens函数中的潜在内存泄漏 - 添加对输出向量的断言检查 - 正确释放输入环形缓冲区 - 重构参数拆分逻辑以避免内存泄漏 refactor(pproc): 优化条件解析和宏表设置逻辑 - 修正parse_constant_condition函数中的返回值初始化 - 在宏表设置时处理重复宏名称的情况 - 改进预处理器的整体资源清理流程 fix(pproc): 修复预处理器中的多个内存泄漏 - 在测试代码中添加正确的资源释放 - 修正格式化字符串中的类型匹配 - 优化环形缓冲区初始化逻辑以处理零容量情况 --- libs/lexer/include/scc_lexer_token.h | 1 + libs/pproc/include/pproc_expand.h | 5 +- libs/pproc/src/pproc_directive.c | 9 ++- libs/pproc/src/pproc_expand.c | 94 ++++++++++++++++-------- libs/pproc/src/pproc_if.c | 14 +++- libs/pproc/src/pproc_macro.c | 7 +- libs/pproc/src/scc_pproc.c | 20 ++++- libs/pproc/tests/test_pproc_unit.c | 3 + libs/sstream/include/scc_pos_log.h | 5 +- runtime/scc_core/include/scc_core_ring.h | 3 +- 10 files changed, 114 insertions(+), 47 deletions(-) diff --git a/libs/lexer/include/scc_lexer_token.h b/libs/lexer/include/scc_lexer_token.h index b4cb458..456c968 100644 --- a/libs/lexer/include/scc_lexer_token.h +++ b/libs/lexer/include/scc_lexer_token.h @@ -203,6 +203,7 @@ static inline cbool scc_lexer_tok_match(const scc_lexer_tok_t *tok, // 深拷贝 token static inline scc_lexer_tok_t scc_lexer_tok_copy(const scc_lexer_tok_t *src) { + Assert(src != null); scc_lexer_tok_t dst = *src; dst.lexeme = scc_cstring_copy(&src->lexeme); return dst; diff --git a/libs/pproc/include/pproc_expand.h b/libs/pproc/include/pproc_expand.h index e1de3c5..79b05c6 100644 --- a/libs/pproc/include/pproc_expand.h +++ b/libs/pproc/include/pproc_expand.h @@ -16,9 +16,10 @@ typedef struct { } scc_pproc_expand_t; static inline scc_lexer_tok_ring_t -scc_lexer_array_to_ring(scc_lexer_tok_vec_t *array) { +scc_lexer_array_to_ring(scc_lexer_tok_vec_t *move_array) { scc_lexer_tok_ring_t ret; - scc_ring_by_buffer(ret, array->data, array->size); + scc_ring_by_buffer(ret, move_array->data, move_array->size); + scc_vec_init(*move_array); return ret; } diff --git a/libs/pproc/src/pproc_directive.c b/libs/pproc/src/pproc_directive.c index 550bfda..549c7d1 100644 --- a/libs/pproc/src/pproc_directive.c +++ b/libs/pproc/src/pproc_directive.c @@ -78,9 +78,10 @@ void scc_pproc_parse_macro_arguments(scc_lexer_tok_ring_t *ring, static inline void fill_replacements(scc_pproc_t *pp, scc_pproc_macro_t *macro) { int ok; - scc_lexer_tok_t tok; + scc_lexer_tok_t tok = {0}; ok = scc_lexer_next_non_blank(pp->cur_ring, &tok); if (!ok || tok.type == SCC_TOK_EOF || tok.type == SCC_TOK_ENDLINE) { + scc_lexer_tok_drop(&tok); return; } else { scc_vec_push(macro->replaces, tok); @@ -139,6 +140,7 @@ void scc_pproc_parse_function_macro(scc_pproc_t *pp, LOG_FATAL("ERROR"); } } + scc_vec_free(args); fill_replacements(pp, macro); scc_pproc_macro_table_set(&pp->macro_table, macro); } @@ -160,10 +162,9 @@ static void scc_pproc_parse_line_and_expand(scc_pproc_t *pp, scc_lexer_tok_vec_t org_toks; scc_vec_init(org_toks); while (1) { - scc_ring_peek(*stream, tok, ok); + scc_ring_next_consume(*stream, tok, ok); if (ok == false) break; - scc_ring_next_consume(*stream, tok, ok); scc_vec_push(org_toks, tok); if (tok.type == SCC_TOK_ENDLINE) break; @@ -254,6 +255,7 @@ void scc_pproc_handle_directive(scc_pproc_t *pp) { scc_tok_type_t type = keywords[ret].tok_type; if (scc_pproc_parse_if_need_skip(pp, type)) { + scc_lexer_tok_drop(&tok); scc_lexer_skip_until_newline(pp->cur_ring); return; } @@ -273,6 +275,7 @@ void scc_pproc_handle_directive(scc_pproc_t *pp) { SCC_ERROR(tok.loc, "unexpected end of file in macro definition"); goto ERROR; } + if (next_tok.type == SCC_TOK_L_PAREN) { // function macro scc_pproc_parse_function_macro(pp, &tok); diff --git a/libs/pproc/src/pproc_expand.c b/libs/pproc/src/pproc_expand.c index 5c7f5f4..c81afb9 100644 --- a/libs/pproc/src/pproc_expand.c +++ b/libs/pproc/src/pproc_expand.c @@ -59,19 +59,28 @@ static scc_lexer_tok_t concatenate_tokens(const scc_lexer_tok_t *left, scc_lexer_init(&lexer, scc_sstream_to_ring(&sstream)); scc_lexer_tok_ring_t *ring = scc_lexer_to_ring(&lexer, 8, true); - scc_lexer_tok_t result; + scc_lexer_tok_t result = {0}; int ok; scc_ring_next_consume(*ring, result, ok); if (!ok) { result.type = SCC_TOK_EOF; - return result; + goto RETURN; // FIXME maybe memleak } scc_ring_next_consume(*ring, result, ok); if (ok) { scc_lexer_tok_drop(&result); - return result; + goto RETURN; // FIXME maybe memleak } + scc_lexer_tok_t dummy; +RETURN: + while (1) { + scc_ring_next_consume(*ring, dummy, ok); + if (ok == false) { + break; + } + scc_lexer_tok_drop(&dummy); + } scc_lexer_drop_ring(ring); scc_lexer_drop(&lexer); scc_sstream_drop(&sstream); @@ -122,6 +131,7 @@ void scc_pproc_expand_by_src(scc_pproc_macro_table_t *macro_table, scc_lexer_tok_vec_t output_vec; scc_pproc_expand_by_vec(macro_table, &expaned_buffer, &output_vec, false); + Assert(output->cap == 0 && output->data == null); // FIXME hack it *output = scc_lexer_array_to_ring(&output_vec); } @@ -142,6 +152,7 @@ void scc_pproc_expand_by_vec(scc_pproc_macro_table_t *macro_table, scc_pproc_marco_table_init(ctx.expanded_set); scc_pproc_expand_macro(&ctx); *output = ctx.output; + scc_ring_free(*ctx.input); scc_pproc_macro_table_drop(ctx.expanded_set); } @@ -169,8 +180,8 @@ static cbool need_skip(scc_pproc_expand_t *expand_ctx, static inline void split_arguments(scc_pproc_macro_extened_params_t *splited_params, scc_lexer_tok_vec_t *raw_args, const scc_pproc_macro_t *macro) { - scc_lexer_tok_vec_t arg; - scc_vec_init(arg); + scc_lexer_tok_vec_t args; + scc_vec_init(args); int named_count = (int)scc_vec_size(macro->params); cbool is_variadic = @@ -189,31 +200,35 @@ split_arguments(scc_pproc_macro_extened_params_t *splited_params, if (depth != 0 || raw_arg->type != SCC_TOK_COMMA || (is_variadic && (int)scc_vec_size(*splited_params) == named_count - 1)) { - if (scc_vec_size(arg) == 0 && raw_arg->type == SCC_TOK_BLANK) { + if (scc_vec_size(args) == 0 && raw_arg->type == SCC_TOK_BLANK) { scc_lexer_tok_drop(raw_arg); } else { - scc_vec_push(arg, *raw_arg); + scc_lexer_tok_t arg; + scc_lexer_tok_move(&arg, raw_arg); + scc_vec_push(args, arg); } - continue; } else { scc_lexer_tok_drop(raw_arg); - if (scc_vec_size(arg) && - scc_vec_at(arg, scc_vec_size(arg) - 1).type == SCC_TOK_BLANK) { - scc_lexer_tok_drop(&scc_vec_pop(arg)); + if (scc_vec_size(args) && + scc_vec_at(args, scc_vec_size(args) - 1).type == + SCC_TOK_BLANK) { + scc_lexer_tok_drop(&scc_vec_pop(args)); } - scc_vec_push(*splited_params, arg); - scc_vec_init(arg); + scc_vec_push(*splited_params, args); + scc_vec_init(args); } } - if (scc_vec_size(arg) && - scc_vec_at(arg, scc_vec_size(arg) - 1).type == SCC_TOK_BLANK) { - scc_lexer_tok_drop(&scc_vec_pop(arg)); + scc_vec_free(*raw_args); + + if (scc_vec_size(args) && + scc_vec_at(args, scc_vec_size(args) - 1).type == SCC_TOK_BLANK) { + scc_lexer_tok_drop(&scc_vec_pop(args)); } - scc_vec_push(*splited_params, arg); + scc_vec_push(*splited_params, args); if (is_variadic && (int)scc_vec_size(*splited_params) == named_count - 1) { - scc_vec_init(arg); - scc_vec_push(*splited_params, arg); + scc_vec_init(args); + scc_vec_push(*splited_params, args); } } @@ -264,26 +279,25 @@ static void rescan(scc_pproc_expand_t *expand_ctx, scc_pproc_expand_macro(&rescan_ctx); enable(expand_ctx, macro); - scc_ring_free(ring); scc_vec_foreach(rescan_ctx.output, i) { scc_vec_push(expand_ctx->output, scc_vec_at(rescan_ctx.output, i)); } if (scc_vec_size(expand_ctx->output) == 0) { - return; + goto RETURN; } scc_lexer_tok_t *end_tok = &scc_vec_at(expand_ctx->output, scc_vec_size(expand_ctx->output) - 1); if (scc_get_tok_subtype(end_tok->type) != SCC_TOK_SUBTYPE_IDENTIFIER) { - return; + goto RETURN; } scc_pproc_macro_t *end_macro = scc_pproc_macro_table_get(expand_ctx->macro_table, &end_tok->lexeme); if (end_macro == null || end_macro->type != SCC_PP_MACRO_FUNCTION) { - return; + goto RETURN; } int ok = false; @@ -301,7 +315,12 @@ static void rescan(scc_pproc_expand_t *expand_ctx, scc_vec_foreach(output, i) { scc_vec_push(expand_ctx->output, scc_vec_at(output, i)); } + scc_vec_free(output); } + +RETURN: + scc_ring_free(*rescan_ctx.input); + scc_vec_free(rescan_ctx.output); } static int find_params(const scc_lexer_tok_t *tok, @@ -378,6 +397,7 @@ static inline void expand_function_macro(scc_pproc_expand_t *expand_ctx, scc_pproc_macro_extened_params_t splited_params; scc_vec_init(splited_params); split_arguments(&splited_params, &raw_args, macro); + Assert(raw_args.cap == 0); // FIXME Assert(scc_vec_size(splited_params) >= scc_vec_size(macro->params)); scc_pproc_macro_extened_params_t expanded_params; @@ -428,11 +448,15 @@ static inline void expand_function_macro(scc_pproc_expand_t *expand_ctx, int idx = find_params(right_tok, macro); scc_lexer_tok_vec_t right_vec; + scc_vec_init(right_vec); if (idx != -1) { Assert(idx < (int)scc_vec_size(splited_params)); - right_vec = scc_vec_at(splited_params, idx); + scc_lexer_tok_vec_t *tok_vec = &scc_vec_at(splited_params, idx); + scc_vec_foreach(*tok_vec, j) { + scc_vec_push(right_vec, + scc_lexer_tok_copy(&scc_vec_at(*tok_vec, j))); + } } else { - scc_vec_init(right_vec); scc_vec_push(right_vec, scc_lexer_tok_copy(right_tok)); } @@ -451,14 +475,18 @@ static inline void expand_function_macro(scc_pproc_expand_t *expand_ctx, concact(&tok_buffer, right, false); } - scc_vec_foreach(right_vec, k) { - if (k == 0) { + scc_vec_foreach(right_vec, j) { + if (j == 0) { + scc_lexer_tok_drop(&scc_vec_at(right_vec, j)); continue; } - scc_lexer_tok_t tok = - scc_lexer_tok_copy(&scc_vec_at(right_vec, k)); + scc_lexer_tok_t tok = {0}; + scc_lexer_tok_move(&tok, &scc_vec_at(right_vec, j)); + scc_lexer_tok_drop(&scc_vec_at(right_vec, j)); scc_vec_push(tok_buffer, tok); } + scc_vec_free(right_vec); + i = right_idx; continue; } else { @@ -481,6 +509,7 @@ static inline void expand_function_macro(scc_pproc_expand_t *expand_ctx, expanded_params_free(&expanded_params); rescan(expand_ctx, macro, &tok_buffer); + Assert(tok_buffer.cap == 0); // FIXME } static inline void expand_object_macro(scc_pproc_expand_t *expand_ctx, @@ -492,10 +521,12 @@ static inline void expand_object_macro(scc_pproc_expand_t *expand_ctx, scc_lexer_tok_t tok = scc_lexer_tok_copy(&scc_vec_at(macro->replaces, i)); if (tok.type == SCC_TOK_BLANK) { + // FIXME using function to warpper it scc_cstring_free(&tok.lexeme); tok.lexeme = scc_cstring_from_cstr(" "); } else if (tok.type == SCC_TOK_SHARP_SHARP) { // ## contact + scc_lexer_tok_drop(&tok); int right_idx = got_right_non_blank(i, ¯o->replaces); scc_lexer_tok_t *right; @@ -570,7 +601,7 @@ void scc_pproc_expand_macro(scc_pproc_expand_t *expand_ctx) { if (!ok) { return; } - if (tok.type != SCC_TOK_IDENT) { + if (scc_get_tok_subtype(tok.type) != SCC_TOK_SUBTYPE_IDENTIFIER) { scc_vec_push(expand_ctx->output, tok); continue; } @@ -591,7 +622,6 @@ void scc_pproc_expand_macro(scc_pproc_expand_t *expand_ctx) { scc_pproc_macro_table_get(expand_ctx->macro_table, &tok.lexeme); if (macro == null || need_skip(expand_ctx, macro)) { - Assert(tok.type == SCC_TOK_IDENT); // FIXME maybe keyword is error or don't parse c keyword or number // 这个地方不太清楚正确的原因 tok.type = SCC_TOK_DISABLED; @@ -601,6 +631,7 @@ void scc_pproc_expand_macro(scc_pproc_expand_t *expand_ctx) { expand_ctx->need_rescan = true; if (macro->type == SCC_PP_MACRO_OBJECT) { + scc_lexer_tok_drop(&tok); expand_object_macro(expand_ctx, macro); } else if (macro->type == SCC_PP_MACRO_FUNCTION) { scc_lexer_tok_t expect_tok; @@ -610,6 +641,7 @@ void scc_pproc_expand_macro(scc_pproc_expand_t *expand_ctx) { continue; } + scc_lexer_tok_drop(&tok); expand_function_macro(expand_ctx, macro); } else { UNREACHABLE(); diff --git a/libs/pproc/src/pproc_if.c b/libs/pproc/src/pproc_if.c index 7cf19a4..a7563a1 100644 --- a/libs/pproc/src/pproc_if.c +++ b/libs/pproc/src/pproc_if.c @@ -143,7 +143,7 @@ static int parse_constant_condition(scc_pproc_t *pp, scc_lexer_tok_drop(&tok); LOG_FATAL("unexpected EOF"); } - int res = 0; + int res = false; if (tok.type == SCC_TOK_INT_LITERAL) { // got int @@ -151,11 +151,19 @@ static int parse_constant_condition(scc_pproc_t *pp, for (int i = scc_cstring_len(&tok.lexeme) - 1; i >= 0; i--) { res = res * 10 + intstr[i] - '0'; } - return res; + } else { + scc_lexer_tok_drop(&tok); } + while (1) { + scc_ring_next_consume(*tok_ring, tok, ok); + if (ok == false) { + break; + } + scc_lexer_tok_drop(&tok); + } scc_ring_free(*tok_ring); - return false; + return res; } cbool scc_pproc_parse_if_condition(scc_pproc_t *pp, scc_tok_type_t type, diff --git a/libs/pproc/src/pproc_macro.c b/libs/pproc/src/pproc_macro.c index 8560914..d2e0a89 100644 --- a/libs/pproc/src/pproc_macro.c +++ b/libs/pproc/src/pproc_macro.c @@ -97,7 +97,12 @@ cbool scc_pproc_add_function_macro(scc_pproc_macro_table_t *macros, scc_pproc_macro_t *scc_pproc_macro_table_set(scc_pproc_macro_table_t *pp, scc_pproc_macro_t *macro) { Assert(pp != null && macro != null); - return scc_hashtable_set(&pp->table, ¯o->name, macro); + scc_pproc_macro_t *old = scc_hashtable_set(&pp->table, ¯o->name, macro); + if (old && old != macro) { + LOG_WARN("same macro name"); + scc_pproc_macro_drop(old); + } + return macro; } // 查找宏定义 diff --git a/libs/pproc/src/scc_pproc.c b/libs/pproc/src/scc_pproc.c index 70968f1..a97f48e 100644 --- a/libs/pproc/src/scc_pproc.c +++ b/libs/pproc/src/scc_pproc.c @@ -3,10 +3,10 @@ #include static int pproc_next_one_file(scc_pproc_t *pp, scc_lexer_tok_t *out) { -CONTINUE: scc_lexer_tok_ring_t *stream = pp->cur_ring; scc_lexer_tok_t tok = {0}; int ok = 0; +CONTINUE: if (pp->expanded_ring.cap) { scc_ring_next_consume(pp->expanded_ring, *out, ok); if (ok == false) { @@ -31,6 +31,7 @@ CONTINUE: scc_ring_next(*stream, *out, ok); scc_ring_peek(*stream, tok, ok); if (ok && tok.type == SCC_TOK_SHARP) { + scc_lexer_tok_drop(out); scc_pproc_handle_directive(pp); pp->at_line_start = true; goto CONTINUE; @@ -70,8 +71,8 @@ CONTINUE: } static int pproc_next(scc_pproc_t *pp, scc_lexer_tok_t *tok) { -CONTINUE: int ret = pproc_next_one_file(pp, tok); +CONTINUE: if (ret != 0) { return true; } @@ -130,6 +131,19 @@ scc_lexer_tok_ring_t *scc_pproc_to_ring(scc_pproc_t *pp, int ring_size) { void scc_pproc_drop(scc_pproc_t *pp) { if (pp == null) return; - scc_lexer_drop_ring(pp->cur_ring); + Assert(pp->cur_ring == pp->org_ring); + scc_lexer_drop_ring(pp->org_ring); scc_pproc_macro_table_drop(&pp->macro_table); + scc_ring_free(pp->expanded_ring); + + scc_vec_foreach(pp->include_paths, i) { + scc_cstring_free(&scc_vec_at(pp->include_paths, i)); + } + scc_vec_free(pp->include_paths); + + Assert(scc_vec_size(pp->if_stack) == 0); + scc_vec_free(pp->if_stack); + + Assert(scc_vec_size(pp->file_stack) == 0); + scc_vec_free(pp->file_stack); } diff --git a/libs/pproc/tests/test_pproc_unit.c b/libs/pproc/tests/test_pproc_unit.c index 4750f40..5548e05 100644 --- a/libs/pproc/tests/test_pproc_unit.c +++ b/libs/pproc/tests/test_pproc_unit.c @@ -28,6 +28,7 @@ static cbool process_input(const char *input, scc_cstring_t *output) { scc_lexer_tok_drop(&tok); } + scc_ring_free(*tok_ring); scc_pproc_drop(&pp); scc_lexer_drop(&lexer); scc_sstream_drop(&mem_stream); @@ -42,6 +43,7 @@ static cbool process_input(const char *input, scc_cstring_t *output) { TEST_CHECK(strcmp(output.data, expect) == 0); \ TEST_MSG("Expected: %s", expect); \ TEST_MSG("Produced: %s", output.data); \ + scc_cstring_free(&output); \ } while (0) #define CHECK_PP_OUTPUT_CONTAIN(input, expect) \ @@ -52,6 +54,7 @@ static cbool process_input(const char *input, scc_cstring_t *output) { TEST_CHECK(strstr(output.data, expect) != NULL); \ TEST_MSG("Expected: %s", expect); \ TEST_MSG("Produced: %s", output.data); \ + scc_cstring_free(&output); \ } while (0) static void test_define_simple_no_macro(void) { diff --git a/libs/sstream/include/scc_pos_log.h b/libs/sstream/include/scc_pos_log.h index f3b1f61..fd9ae2c 100644 --- a/libs/sstream/include/scc_pos_log.h +++ b/libs/sstream/include/scc_pos_log.h @@ -9,9 +9,8 @@ extern logger_t __scc_usr_log; #define SCC_POS_LOG(level, pos, fmt, ...) \ do { \ char _full_msg[LOGGER_MAX_BUF_SIZE]; \ - int _n = \ - snprintf_(_full_msg, sizeof(_full_msg), \ - "%s:%llu:%llu: ", (pos).name, (pos).line, (pos).col); \ + int _n = snprintf_(_full_msg, sizeof(_full_msg), \ + "%s:%lu:%lu: ", (pos).name, (pos).line, (pos).col); \ snprintf_(_full_msg + _n, sizeof(_full_msg) - _n, fmt, ##__VA_ARGS__); \ __scc_usr_log.handler(&__scc_usr_log, level, null, 0, null, "%s", \ _full_msg); \ diff --git a/runtime/scc_core/include/scc_core_ring.h b/runtime/scc_core/include/scc_core_ring.h index 44fe57f..0d28b63 100644 --- a/runtime/scc_core/include/scc_core_ring.h +++ b/runtime/scc_core/include/scc_core_ring.h @@ -77,7 +77,8 @@ */ #define scc_ring_init(ring, _cap, fill_func, _userdata) \ do { \ - (ring).data = scc_malloc((_cap) * sizeof(*(ring).data)); \ + (ring).data = \ + (_cap) ? scc_malloc((_cap) * sizeof(*(ring).data)) : null; \ (ring).cap = (_cap); \ (ring).head = 0; \ (ring).probe = 0; \