From 0fede5f46e6d5c34053081b4d4d5737f2d953bfa Mon Sep 17 00:00:00 2001 From: zzy <2450266535@qq.com> Date: Fri, 27 Feb 2026 21:00:14 +0800 Subject: [PATCH] =?UTF-8?q?feat(pproc):=20=E5=AE=9E=E7=8E=B0=E9=A2=84?= =?UTF-8?q?=E5=A4=84=E7=90=86=E5=99=A8=E5=AE=8F=E8=BF=9E=E6=8E=A5=E6=93=8D?= =?UTF-8?q?=E4=BD=9C=E7=AC=A6=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 修改concatenate_tokens函数以支持null参数检查,避免空指针访问 - 添加concact辅助函数来处理##连接操作的逻辑 - 重构expand_function_macro中##操作符的实现,支持GNU扩展特性 - 实现对可变参数宏中##操作的正确处理,包括逗号删除逻辑 - 改进object宏中的##连接操作处理 - 添加多个单元测试用例验证连接操作符的正确性 - 修复字符串连接时的边界条件处理 refactor(tests): 重命名预处理器单元测试文件 - 将test_unit.c重命名为test_pproc_unit.c以更明确标识测试范围 --- libs/pproc/src/pproc_expand.c | 135 ++++++++---------- .../tests/{test_unit.c => test_pproc_unit.c} | 63 ++++---- 2 files changed, 91 insertions(+), 107 deletions(-) rename libs/pproc/tests/{test_unit.c => test_pproc_unit.c} (94%) diff --git a/libs/pproc/src/pproc_expand.c b/libs/pproc/src/pproc_expand.c index 2746e7e..37814ef 100644 --- a/libs/pproc/src/pproc_expand.c +++ b/libs/pproc/src/pproc_expand.c @@ -39,10 +39,13 @@ static scc_lexer_tok_t stringify_argument(scc_lexer_tok_vec_t *arg_tokens) { static scc_lexer_tok_t concatenate_tokens(const scc_lexer_tok_t *left, const scc_lexer_tok_t *right) { - Assert(left != null && right != null); - scc_cstring_t new_lex = scc_cstring_create(); - scc_cstring_append(&new_lex, &left->lexeme); - scc_cstring_append(&new_lex, &right->lexeme); + scc_cstring_t new_lex = scc_cstring_from_cstr(""); + if (left != null) { + scc_cstring_append(&new_lex, &left->lexeme); + } + if (right != null) { + scc_cstring_append(&new_lex, &right->lexeme); + } scc_lexer_t lexer; scc_sstream_t sstream; @@ -56,7 +59,7 @@ static scc_lexer_tok_t concatenate_tokens(const scc_lexer_tok_t *left, int ok; scc_ring_next_consume(*ring, result, ok); if (!ok) { - scc_lexer_tok_drop(&result); + result.type = SCC_TOK_EOF; return result; } scc_ring_next_consume(*ring, result, ok); @@ -263,6 +266,36 @@ static inline int got_right_non_blank(int i, return right_idx; } +static void concact(scc_lexer_tok_vec_t *tok_buffer, scc_lexer_tok_t *right, + cbool gnu_va_arg_extend) { + // ## contact + int tok_buf_size = (int)scc_vec_size(*tok_buffer); + int left_idx = got_left_non_blank(tok_buf_size, tok_buffer); + + scc_lexer_tok_t *left; + if (left_idx < 0) { + left = null; + left_idx = 0; // FIXME for free tok_buffer + } else { + left = &scc_vec_at(*tok_buffer, left_idx); + if (gnu_va_arg_extend && left->type == SCC_TOK_COMMA) { + left = null; + } + } + + scc_lexer_tok_t concate_tok = concatenate_tokens(left, right); + + while (left_idx++ < tok_buf_size) { + scc_lexer_tok_drop(&scc_vec_pop(*tok_buffer)); + } + if (concate_tok.type == SCC_TOK_UNKNOWN) { + LOG_FATAL("Invalid ## token"); + } + if (concate_tok.type != SCC_TOK_EOF) { + scc_vec_push(*tok_buffer, concate_tok); + } +} + static inline void expand_function_macro(scc_pproc_expand_t *expand_ctx, const scc_pproc_macro_t *macro) { scc_lexer_tok_vec_t tok_buffer; @@ -316,55 +349,16 @@ static inline void expand_function_macro(scc_pproc_expand_t *expand_ctx, } else if (tok.type == SCC_TOK_SHARP_SHARP) { // ## contact scc_lexer_tok_drop(&tok); - int left_idx = got_left_non_blank(i, ¯o->replaces); int right_idx = got_right_non_blank(i, ¯o->replaces); - if (left_idx < 0 || - right_idx >= (int)scc_vec_size(macro->replaces)) { - LOG_FATAL("Invalid ## operator"); - } - while ((int)i++ < right_idx) { - scc_lexer_tok_drop(&scc_vec_pop(tok_buffer)); - } - - scc_lexer_tok_t *left_tok = &scc_vec_at(macro->replaces, left_idx); - scc_lexer_tok_t *right_tok = - &scc_vec_at(macro->replaces, right_idx); - - if (left_tok->type == SCC_TOK_COMMA && - scc_strcmp(scc_cstring_as_cstr(&(right_tok->lexeme)), - "__VA_ARGS__") == 0) { - // GNU 扩展:处理逗号删除 - int right_param_idx = find_params(right_tok, macro); - Assert(right_param_idx != -1); - scc_lexer_tok_vec_t right_vec = - scc_vec_at(expanded_params, right_param_idx); - if (scc_vec_size(right_vec) != 0) { - // 可变参数非空:输出逗号副本,然后输出右侧参数的展开 - scc_lexer_tok_t comma_tok = scc_lexer_tok_copy(left_tok); - scc_vec_push(tok_buffer, comma_tok); - } - scc_vec_foreach(right_vec, k) { - scc_lexer_tok_t tok = - scc_lexer_tok_copy(&scc_vec_at(right_vec, k)); - scc_vec_push(tok_buffer, tok); - } - i = right_idx; - continue; - } - - int idx; - idx = find_params(left_tok, macro); - scc_lexer_tok_vec_t left_vec; - if (idx != -1) { - Assert(idx < (int)scc_vec_size(splited_params)); - left_vec = scc_vec_at(splited_params, idx); + scc_lexer_tok_t *right_tok; + if (right_idx >= (int)scc_vec_size(macro->replaces)) { + right_tok = null; } else { - scc_vec_init(left_vec); - scc_vec_push(left_vec, scc_lexer_tok_copy(left_tok)); + right_tok = &scc_vec_at(macro->replaces, right_idx); } - idx = find_params(right_tok, macro); + int idx = find_params(right_tok, macro); scc_lexer_tok_vec_t right_vec; if (idx != -1) { Assert(idx < (int)scc_vec_size(splited_params)); @@ -374,26 +368,21 @@ static inline void expand_function_macro(scc_pproc_expand_t *expand_ctx, scc_vec_push(right_vec, scc_lexer_tok_copy(right_tok)); } - scc_lexer_tok_t *left = - scc_vec_size(left_vec) - ? &scc_vec_at(left_vec, scc_vec_size(left_vec) - 1) - : null; scc_lexer_tok_t *right = scc_vec_size(right_vec) ? &scc_vec_at(right_vec, 0) : null; - scc_vec_foreach(left_vec, k) { - if (k + 1 >= scc_vec_size(left_vec)) { + // GNU ## extention + if (scc_strcmp(scc_cstring_as_cstr(&(right_tok->lexeme)), + "__VA_ARGS__") == 0) { + if (scc_vec_size(right_vec) == 0) { + concact(&tok_buffer, right, true); + } else { continue; } - scc_lexer_tok_t tok = - scc_lexer_tok_copy(&scc_vec_at(left_vec, k)); - scc_vec_push(tok_buffer, tok); + } else { + concact(&tok_buffer, right, false); } - scc_lexer_tok_t concate_tok = concatenate_tokens(left, right); - if (concate_tok.type == SCC_TOK_UNKNOWN) { - LOG_FATAL("Invalid ## token"); - } - scc_vec_push(tok_buffer, concate_tok); + scc_vec_foreach(right_vec, k) { if (k == 0) { continue; @@ -439,24 +428,16 @@ static inline void expand_object_macro(scc_pproc_expand_t *expand_ctx, tok.lexeme = scc_cstring_from_cstr(" "); } else if (tok.type == SCC_TOK_SHARP_SHARP) { // ## contact - int left_idx = got_left_non_blank(i, ¯o->replaces); int right_idx = got_right_non_blank(i, ¯o->replaces); - if (left_idx < 0 || - right_idx >= (int)scc_vec_size(macro->replaces)) { - LOG_FATAL("Invalid ## operator"); + scc_lexer_tok_t *right; + if (right_idx >= (int)scc_vec_size(macro->replaces)) { + right = null; + } else { + right = &scc_vec_at(macro->replaces, right_idx); } - scc_lexer_tok_t *left = &scc_vec_at(macro->replaces, left_idx); - scc_lexer_tok_t *right = &scc_vec_at(macro->replaces, right_idx); - scc_lexer_tok_t concate_tok = concatenate_tokens(left, right); - while ((int)i++ < right_idx) { - scc_lexer_tok_drop(&scc_vec_pop(tok_buffer)); - } - if (concate_tok.type == SCC_TOK_UNKNOWN) { - LOG_FATAL("Invalid ## token"); - } - scc_vec_push(tok_buffer, concate_tok); + concact(&tok_buffer, right, false); i = right_idx; continue; } diff --git a/libs/pproc/tests/test_unit.c b/libs/pproc/tests/test_pproc_unit.c similarity index 94% rename from libs/pproc/tests/test_unit.c rename to libs/pproc/tests/test_pproc_unit.c index 2ee9976..a692df2 100644 --- a/libs/pproc/tests/test_unit.c +++ b/libs/pproc/tests/test_pproc_unit.c @@ -116,6 +116,9 @@ static void test_define_concat_operator(void) { "helloworld\n"); CHECK_PP_OUTPUT_EXACT("#define JOIN(pre,suf) pre ## suf\nJOIN(var, 123)\n", "var123\n"); + CHECK_PP_OUTPUT_EXACT("#define CONCAT a ## b ## c\nCONCAT\n", "abc\n"); + CHECK_PP_OUTPUT_EXACT( + "#define CONCAT(a, b, c) a ## b ## c\nCONCAT(x, y, z)\n", "xyz\n"); } static void test_define_nested_macros(void) { @@ -131,7 +134,7 @@ static void test_define_nested_macros(void) { CHECK_PP_OUTPUT_EXACT("#undef A\n", ""); CHECK_PP_OUTPUT_EXACT(" # define A 1\nA", "1"); - // CHECK_PP_OUTPUT_EXACT(" # define A 1 \nA", "1"); // TODO + // CHECK_PP_OUTPUT_EXACT(" # define A 1 \nA", "1"); CHECK_PP_OUTPUT_EXACT("#define CONCAT(str) __scc_##str\nCONCAT(int)", "__scc_int"); @@ -140,6 +143,9 @@ static void test_define_nested_macros(void) { CHECK_PP_OUTPUT_EXACT("#define CONCAT(str) __scc_ ## str\nCONCAT(int)", "__scc_int"); + CHECK_PP_OUTPUT_EXACT( + "#define CONCAT(a, b) a ## b\nCONCAT(x, )\nCONCAT(,y)\nCONCAT(,)\n", + "x\ny\n\n"); // TEST_CASE("TODO"); /*FALSE*/ // CHECK_PP_OUTPUT_EXACT("#define str(x) # x\n" @@ -496,7 +502,7 @@ static void test_gnu_comma_variadic_deletion(void) { // 可变参数非空,逗号保留 CHECK_PP_OUTPUT_EXACT("#define FOO(fmt, ...) printf(fmt, ## __VA_ARGS__)\n" "FOO(\"%d\", 42)\n", - "printf(\"%d\",42)\n"); + "printf(\"%d\", 42)\n"); // 带空白变体 CHECK_PP_OUTPUT_EXACT("#define FOO(fmt,...) printf(fmt,##__VA_ARGS__)\n" "FOO(\"%d\", 42)\n", @@ -515,45 +521,42 @@ static void test_c99_docs(void) { // 6.10.3.5 Scope of macrodefinitions TEST_CASE("EXAMPLE 3 To illustrate the rules for redefinition and " "reexamination, the sequence"); - /* CHECK_PP_OUTPUT_EXACT( - "#define x 3\n" - "#define f(a) f(x * (a))\n" - "#undef x\n" - "#define x 2\n" - "#define g f\n" - "#define z z[0]\n" - "#define h g(~\n" - "#define m(a) a(w)\n" - "#define w 0,1\n" - "#define t(a) a\n" - "#define p() int\n" - "#define q(x) x\n" - "#define r(x,y) x ## y\n" - "#define str(x) # x\n" - "f(y+1) + f(f(z)) % t(t(g)(0) + t)(1);\n" - "g(x+(3,4)-w) | h 5) & m\n" - " (f)^m(m);\n" - "p() i[q()] = { q(1), r(2,3), r(4,), r(,5), r(,) };\n" - "char c[2][6] = { str(hello), str() };\n", - "f(2 * (y+1)) + f(2 * (f(2 * (z[0])))) % f(2 * (0)) + t(1);\n" - "f(2 * (2+(3,4)-0,1)) | f(2 * (~ 5)) & f(2 * (0,1))^m(0,1);\n" - "int i[] = { 1, 23, 4, 5, };\n" - "char c[2][6] = { \"hello\", \"\" };\n"); - */ + "#define x 3\n" + "#define f(a) f(x * (a))\n" + "#undef x\n" + "#define x 2\n" + "#define g f\n" + "#define z z[0]\n" + "#define h g(~\n" + "#define m(a) a(w)\n" + "#define w 0,1\n" + "#define t(a) a\n" + "#define p() int\n" + "#define q(x) x\n" + "#define r(x,y) x ## y\n" + "#define str(x) # x\n" + "f(y+1) + f(f(z)) % t(t(g)(0) + t)(1);\n" + "g(x+(3,4)-w) | h 5) & m\n" + " (f)^m(m);\n" + "p() i[q()] = { q(1), r(2,3), r(4,), r(,5), r(,) };\n" + "char c[2][6] = { str(hello), str() };\n", + "f(2 * (y+1)) + f(2 * (f(2 * (z[0])))) % f(2 * (0)) + t(1);\n" + "f(2 * (2+(3,4)-0,1)) | f(2 * (~ 5)) & f(2 * (0,1))^m(0,1);\n" + "int i[] = { 1, 23, 4, 5, };\n" + "char c[2][6] = { \"hello\", \"\" };\n"); TEST_CASE("EXAMPLE 4 To illustrate the rules for creating character string " "literals and concatenating tokens, the sequence"); TEST_CASE("EXAMPLE 5 To illustrate the rules for placemarker preprocessing " "tokens, the sequence"); - /* CHECK_PP_OUTPUT_EXACT("#define t(x,y,z) x ## y ## z\n" "int j[] = { t(1,2,3), t(,4,5), t(6,,7), t(8,9,),\n" "\t\t\tt(10,,), t(,11,), t(,,12), t(,,) };\n", + "int j[] = { 123, 45, 67, 89,\n" - "\t\t\t10, 11, 12, };\n"); - */ + "\t\t\t10, 11, 12, };\n"); TEST_CASE("EXAMPLE 6 To demonstrate the redefinition rules, the following " "sequence is valid.");