feat(pproc): 实现预处理器宏连接操作符功能

- 修改concatenate_tokens函数以支持null参数检查,避免空指针访问
- 添加concact辅助函数来处理##连接操作的逻辑
- 重构expand_function_macro中##操作符的实现,支持GNU扩展特性
- 实现对可变参数宏中##操作的正确处理,包括逗号删除逻辑
- 改进object宏中的##连接操作处理
- 添加多个单元测试用例验证连接操作符的正确性
- 修复字符串连接时的边界条件处理

refactor(tests): 重命名预处理器单元测试文件

- 将test_unit.c重命名为test_pproc_unit.c以更明确标识测试范围
This commit is contained in:
zzy
2026-02-27 21:00:14 +08:00
parent e79984592e
commit 0fede5f46e
2 changed files with 91 additions and 107 deletions

View File

@@ -39,10 +39,13 @@ static scc_lexer_tok_t stringify_argument(scc_lexer_tok_vec_t *arg_tokens) {
static scc_lexer_tok_t concatenate_tokens(const scc_lexer_tok_t *left,
const scc_lexer_tok_t *right) {
Assert(left != null && right != null);
scc_cstring_t new_lex = scc_cstring_create();
scc_cstring_t new_lex = scc_cstring_from_cstr("");
if (left != null) {
scc_cstring_append(&new_lex, &left->lexeme);
}
if (right != null) {
scc_cstring_append(&new_lex, &right->lexeme);
}
scc_lexer_t lexer;
scc_sstream_t sstream;
@@ -56,7 +59,7 @@ static scc_lexer_tok_t concatenate_tokens(const scc_lexer_tok_t *left,
int ok;
scc_ring_next_consume(*ring, result, ok);
if (!ok) {
scc_lexer_tok_drop(&result);
result.type = SCC_TOK_EOF;
return result;
}
scc_ring_next_consume(*ring, result, ok);
@@ -263,6 +266,36 @@ static inline int got_right_non_blank(int i,
return right_idx;
}
static void concact(scc_lexer_tok_vec_t *tok_buffer, scc_lexer_tok_t *right,
cbool gnu_va_arg_extend) {
// ## contact
int tok_buf_size = (int)scc_vec_size(*tok_buffer);
int left_idx = got_left_non_blank(tok_buf_size, tok_buffer);
scc_lexer_tok_t *left;
if (left_idx < 0) {
left = null;
left_idx = 0; // FIXME for free tok_buffer
} else {
left = &scc_vec_at(*tok_buffer, left_idx);
if (gnu_va_arg_extend && left->type == SCC_TOK_COMMA) {
left = null;
}
}
scc_lexer_tok_t concate_tok = concatenate_tokens(left, right);
while (left_idx++ < tok_buf_size) {
scc_lexer_tok_drop(&scc_vec_pop(*tok_buffer));
}
if (concate_tok.type == SCC_TOK_UNKNOWN) {
LOG_FATAL("Invalid ## token");
}
if (concate_tok.type != SCC_TOK_EOF) {
scc_vec_push(*tok_buffer, concate_tok);
}
}
static inline void expand_function_macro(scc_pproc_expand_t *expand_ctx,
const scc_pproc_macro_t *macro) {
scc_lexer_tok_vec_t tok_buffer;
@@ -316,55 +349,16 @@ static inline void expand_function_macro(scc_pproc_expand_t *expand_ctx,
} else if (tok.type == SCC_TOK_SHARP_SHARP) {
// ## contact
scc_lexer_tok_drop(&tok);
int left_idx = got_left_non_blank(i, &macro->replaces);
int right_idx = got_right_non_blank(i, &macro->replaces);
if (left_idx < 0 ||
right_idx >= (int)scc_vec_size(macro->replaces)) {
LOG_FATAL("Invalid ## operator");
}
while ((int)i++ < right_idx) {
scc_lexer_tok_drop(&scc_vec_pop(tok_buffer));
}
scc_lexer_tok_t *left_tok = &scc_vec_at(macro->replaces, left_idx);
scc_lexer_tok_t *right_tok =
&scc_vec_at(macro->replaces, right_idx);
if (left_tok->type == SCC_TOK_COMMA &&
scc_strcmp(scc_cstring_as_cstr(&(right_tok->lexeme)),
"__VA_ARGS__") == 0) {
// GNU 扩展:处理逗号删除
int right_param_idx = find_params(right_tok, macro);
Assert(right_param_idx != -1);
scc_lexer_tok_vec_t right_vec =
scc_vec_at(expanded_params, right_param_idx);
if (scc_vec_size(right_vec) != 0) {
// 可变参数非空:输出逗号副本,然后输出右侧参数的展开
scc_lexer_tok_t comma_tok = scc_lexer_tok_copy(left_tok);
scc_vec_push(tok_buffer, comma_tok);
}
scc_vec_foreach(right_vec, k) {
scc_lexer_tok_t tok =
scc_lexer_tok_copy(&scc_vec_at(right_vec, k));
scc_vec_push(tok_buffer, tok);
}
i = right_idx;
continue;
}
int idx;
idx = find_params(left_tok, macro);
scc_lexer_tok_vec_t left_vec;
if (idx != -1) {
Assert(idx < (int)scc_vec_size(splited_params));
left_vec = scc_vec_at(splited_params, idx);
scc_lexer_tok_t *right_tok;
if (right_idx >= (int)scc_vec_size(macro->replaces)) {
right_tok = null;
} else {
scc_vec_init(left_vec);
scc_vec_push(left_vec, scc_lexer_tok_copy(left_tok));
right_tok = &scc_vec_at(macro->replaces, right_idx);
}
idx = find_params(right_tok, macro);
int idx = find_params(right_tok, macro);
scc_lexer_tok_vec_t right_vec;
if (idx != -1) {
Assert(idx < (int)scc_vec_size(splited_params));
@@ -374,26 +368,21 @@ static inline void expand_function_macro(scc_pproc_expand_t *expand_ctx,
scc_vec_push(right_vec, scc_lexer_tok_copy(right_tok));
}
scc_lexer_tok_t *left =
scc_vec_size(left_vec)
? &scc_vec_at(left_vec, scc_vec_size(left_vec) - 1)
: null;
scc_lexer_tok_t *right =
scc_vec_size(right_vec) ? &scc_vec_at(right_vec, 0) : null;
scc_vec_foreach(left_vec, k) {
if (k + 1 >= scc_vec_size(left_vec)) {
// GNU ## extention
if (scc_strcmp(scc_cstring_as_cstr(&(right_tok->lexeme)),
"__VA_ARGS__") == 0) {
if (scc_vec_size(right_vec) == 0) {
concact(&tok_buffer, right, true);
} else {
continue;
}
scc_lexer_tok_t tok =
scc_lexer_tok_copy(&scc_vec_at(left_vec, k));
scc_vec_push(tok_buffer, tok);
} else {
concact(&tok_buffer, right, false);
}
scc_lexer_tok_t concate_tok = concatenate_tokens(left, right);
if (concate_tok.type == SCC_TOK_UNKNOWN) {
LOG_FATAL("Invalid ## token");
}
scc_vec_push(tok_buffer, concate_tok);
scc_vec_foreach(right_vec, k) {
if (k == 0) {
continue;
@@ -439,24 +428,16 @@ static inline void expand_object_macro(scc_pproc_expand_t *expand_ctx,
tok.lexeme = scc_cstring_from_cstr(" ");
} else if (tok.type == SCC_TOK_SHARP_SHARP) {
// ## contact
int left_idx = got_left_non_blank(i, &macro->replaces);
int right_idx = got_right_non_blank(i, &macro->replaces);
if (left_idx < 0 ||
right_idx >= (int)scc_vec_size(macro->replaces)) {
LOG_FATAL("Invalid ## operator");
scc_lexer_tok_t *right;
if (right_idx >= (int)scc_vec_size(macro->replaces)) {
right = null;
} else {
right = &scc_vec_at(macro->replaces, right_idx);
}
scc_lexer_tok_t *left = &scc_vec_at(macro->replaces, left_idx);
scc_lexer_tok_t *right = &scc_vec_at(macro->replaces, right_idx);
scc_lexer_tok_t concate_tok = concatenate_tokens(left, right);
while ((int)i++ < right_idx) {
scc_lexer_tok_drop(&scc_vec_pop(tok_buffer));
}
if (concate_tok.type == SCC_TOK_UNKNOWN) {
LOG_FATAL("Invalid ## token");
}
scc_vec_push(tok_buffer, concate_tok);
concact(&tok_buffer, right, false);
i = right_idx;
continue;
}

View File

@@ -116,6 +116,9 @@ static void test_define_concat_operator(void) {
"helloworld\n");
CHECK_PP_OUTPUT_EXACT("#define JOIN(pre,suf) pre ## suf\nJOIN(var, 123)\n",
"var123\n");
CHECK_PP_OUTPUT_EXACT("#define CONCAT a ## b ## c\nCONCAT\n", "abc\n");
CHECK_PP_OUTPUT_EXACT(
"#define CONCAT(a, b, c) a ## b ## c\nCONCAT(x, y, z)\n", "xyz\n");
}
static void test_define_nested_macros(void) {
@@ -131,7 +134,7 @@ static void test_define_nested_macros(void) {
CHECK_PP_OUTPUT_EXACT("#undef A\n", "");
CHECK_PP_OUTPUT_EXACT(" # define A 1\nA", "1");
// CHECK_PP_OUTPUT_EXACT(" # define A 1 \nA", "1"); // TODO
// CHECK_PP_OUTPUT_EXACT(" # define A 1 \nA", "1");
CHECK_PP_OUTPUT_EXACT("#define CONCAT(str) __scc_##str\nCONCAT(int)",
"__scc_int");
@@ -140,6 +143,9 @@ static void test_define_nested_macros(void) {
CHECK_PP_OUTPUT_EXACT("#define CONCAT(str) __scc_ ## str\nCONCAT(int)",
"__scc_int");
CHECK_PP_OUTPUT_EXACT(
"#define CONCAT(a, b) a ## b\nCONCAT(x, )\nCONCAT(,y)\nCONCAT(,)\n",
"x\ny\n\n");
// TEST_CASE("TODO"); /*FALSE*/
// CHECK_PP_OUTPUT_EXACT("#define str(x) # x\n"
@@ -515,7 +521,6 @@ static void test_c99_docs(void) {
// 6.10.3.5 Scope of macrodefinitions
TEST_CASE("EXAMPLE 3 To illustrate the rules for redefinition and "
"reexamination, the sequence");
/*
CHECK_PP_OUTPUT_EXACT(
"#define x 3\n"
"#define f(a) f(x * (a))\n"
@@ -540,20 +545,18 @@ static void test_c99_docs(void) {
"f(2 * (2+(3,4)-0,1)) | f(2 * (~ 5)) & f(2 * (0,1))^m(0,1);\n"
"int i[] = { 1, 23, 4, 5, };\n"
"char c[2][6] = { \"hello\", \"\" };\n");
*/
TEST_CASE("EXAMPLE 4 To illustrate the rules for creating character string "
"literals and concatenating tokens, the sequence");
TEST_CASE("EXAMPLE 5 To illustrate the rules for placemarker preprocessing "
"tokens, the sequence");
/*
CHECK_PP_OUTPUT_EXACT("#define t(x,y,z) x ## y ## z\n"
"int j[] = { t(1,2,3), t(,4,5), t(6,,7), t(8,9,),\n"
"\t\t\tt(10,,), t(,11,), t(,,12), t(,,) };\n",
"int j[] = { 123, 45, 67, 89,\n"
"\t\t\t10, 11, 12, };\n");
*/
TEST_CASE("EXAMPLE 6 To demonstrate the redefinition rules, the following "
"sequence is valid.");