feat(parser): 改进解析器错误处理和表达式解析逻辑

- 在初始化解析中添加缺失的赋值操作符检查
- 改进后缀表达式解析逻辑,处理嵌套情况
- 添加数组下标初始化的赋值操作符验证
- 修复主表达式解析中的返回语句处理

refactor(pproc): 优化预处理器宏展开和位置追踪

- 添加token复制函数来保持原始位置信息
- 重构宏展开函数参数传递方式
- 修复字符串化参数的位置信息处理
- 改进可变参数宏的处理逻辑

test(parser): 增加标签语句和字符串字面量测试用例

- 添加返回语句with复合字面量的测试
- 增加标签继续语句的测试用例
- 添加字符串连接的解析测试

test(pproc): 添加预处理器位置追踪测试

- 增加双重宏定义位置追踪测试
- 添加带参数宏定义位置追踪测试
- 增加字符串化操作位置追踪测试

docs: 更新代码中的宏定义和注释

- 修正未定义标识符的拼写错误
- 添加必要的头文件包含
- 改进错误消息提示文本
This commit is contained in:
zzy
2026-03-13 13:48:55 +08:00
parent 2d1032c363
commit c99f64708e
14 changed files with 260 additions and 60 deletions

View File

@@ -6,6 +6,15 @@ static void rescan(scc_pproc_expand_t *expand_ctx,
const scc_pproc_macro_t *macro,
scc_lexer_tok_vec_t *tok_buffer);
static inline scc_lexer_tok_t scc_pproc_tok_copy(scc_pproc_expand_t *ctx,
const scc_lexer_tok_t *src) {
scc_lexer_tok_t dst = scc_lexer_tok_copy(src);
if (ctx->need_keep_org_pos) {
dst.loc = ctx->call_pos;
}
return dst;
}
static scc_lexer_tok_t stringify_argument(scc_pproc_expand_t *ctx,
scc_lexer_tok_vec_t *arg_tokens) {
// WRITE BY AI
@@ -13,8 +22,9 @@ static scc_lexer_tok_t stringify_argument(scc_pproc_expand_t *ctx,
scc_cstring_append_ch(&str, '\"'); // 左引号
int need_space = 0; // 是否需要插入空格
scc_lexer_tok_t *tok = null;
scc_vec_foreach(*arg_tokens, i) {
scc_lexer_tok_t *tok = &scc_vec_at(*arg_tokens, i);
tok = &scc_vec_at(*arg_tokens, i);
if (tok->type == SCC_TOK_BLANK) {
need_space = 1; // 标记遇到空白
continue;
@@ -39,8 +49,11 @@ static scc_lexer_tok_t stringify_argument(scc_pproc_expand_t *ctx,
scc_lexer_tok_t result;
result.type = SCC_TOK_STRING_LITERAL;
result.lexeme = str;
if (ctx->need_keep_org_pos)
if (ctx->need_keep_org_pos) {
result.loc = ctx->call_pos;
} else {
result.loc = tok ? tok->loc : scc_pos_create();
}
return result;
}
@@ -88,8 +101,9 @@ RETURN:
scc_lexer_drop_ring(ring);
scc_lexer_drop(&lexer);
scc_sstream_drop(&sstream);
if (ctx->need_keep_org_pos)
if (ctx->need_keep_org_pos) {
result.loc = ctx->call_pos;
}
return result;
}
@@ -102,6 +116,7 @@ static inline void scc_copy_expand(scc_pproc_expand_t *expand_ctx,
copyed_ctx->macro_table = expand_ctx->macro_table;
copyed_ctx->need_rescan = false;
copyed_ctx->need_parse_defined = expand_ctx->need_parse_defined;
copyed_ctx->need_keep_org_pos = expand_ctx->need_keep_org_pos;
scc_vec_init(copyed_ctx->output);
}
@@ -394,7 +409,7 @@ static void concact(scc_pproc_expand_t *ctx, scc_lexer_tok_vec_t *tok_buffer,
}
}
static inline void expand_function_macro(scc_pproc_expand_t *expand_ctx,
static inline void expand_function_macro(scc_pproc_expand_t *ctx,
const scc_pproc_macro_t *macro) {
scc_lexer_tok_vec_t tok_buffer;
@@ -403,7 +418,7 @@ static inline void expand_function_macro(scc_pproc_expand_t *expand_ctx,
Assert(macro->type == SCC_PP_MACRO_FUNCTION);
scc_lexer_tok_vec_t raw_args;
scc_vec_init(raw_args);
scc_pproc_parse_macro_arguments(expand_ctx->input, &raw_args, false);
scc_pproc_parse_macro_arguments(ctx->input, &raw_args, false);
// collect, fill and expand arg
scc_pproc_macro_extened_params_t splited_params;
@@ -414,15 +429,13 @@ static inline void expand_function_macro(scc_pproc_expand_t *expand_ctx,
scc_pproc_macro_extened_params_t expanded_params;
scc_vec_init(expanded_params);
expand_arguments(&expanded_params, &splited_params, expand_ctx);
expand_arguments(&expanded_params, &splited_params, ctx);
Assert(scc_vec_size(expanded_params) >= scc_vec_size(macro->params));
// replace
scc_vec_foreach(macro->replaces, i) {
scc_lexer_tok_t tok =
scc_lexer_tok_copy(&scc_vec_at(macro->replaces, i));
if (expand_ctx->need_keep_org_pos)
tok.loc = expand_ctx->call_pos;
scc_pproc_tok_copy(ctx, &scc_vec_at(macro->replaces, i));
if (tok.type == SCC_TOK_BLANK) {
scc_cstring_free(&tok.lexeme);
tok.lexeme = scc_cstring_from_cstr(" ");
@@ -444,8 +457,7 @@ static inline void expand_function_macro(scc_pproc_expand_t *expand_ctx,
int j = find_params(&scc_vec_at(macro->replaces, right_idx), macro);
Assert(j != -1 && j < (int)scc_vec_size(splited_params));
tok =
stringify_argument(expand_ctx, &scc_vec_at(splited_params, j));
tok = stringify_argument(ctx, &scc_vec_at(splited_params, j));
scc_vec_push(tok_buffer, tok);
i = right_idx;
continue;
@@ -468,11 +480,11 @@ static inline void expand_function_macro(scc_pproc_expand_t *expand_ctx,
Assert(idx < (int)scc_vec_size(splited_params));
scc_lexer_tok_vec_t *tok_vec = &scc_vec_at(splited_params, idx);
scc_vec_foreach(*tok_vec, j) {
scc_vec_push(right_vec,
scc_lexer_tok_copy(&scc_vec_at(*tok_vec, j)));
scc_vec_push(right_vec, scc_pproc_tok_copy(
ctx, &scc_vec_at(*tok_vec, j)));
}
} else {
scc_vec_push(right_vec, scc_lexer_tok_copy(right_tok));
scc_vec_push(right_vec, scc_pproc_tok_copy(ctx, right_tok));
}
scc_lexer_tok_t *right =
@@ -482,12 +494,12 @@ static inline void expand_function_macro(scc_pproc_expand_t *expand_ctx,
if (scc_strcmp(scc_cstring_as_cstr(&(right_tok->lexeme)),
"__VA_ARGS__") == 0) {
if (scc_vec_size(right_vec) == 0) {
concact(expand_ctx, &tok_buffer, right, true);
concact(ctx, &tok_buffer, right, true);
} else {
continue;
}
} else {
concact(expand_ctx, &tok_buffer, right, false);
concact(ctx, &tok_buffer, right, false);
}
scc_vec_foreach(right_vec, j) {
@@ -512,7 +524,8 @@ static inline void expand_function_macro(scc_pproc_expand_t *expand_ctx,
scc_vec_at(expanded_params, j);
scc_lexer_tok_drop(&tok);
scc_vec_foreach(expanded_param, k) {
tok = scc_lexer_tok_copy(&scc_vec_at(expanded_param, k));
tok =
scc_pproc_tok_copy(ctx, &scc_vec_at(expanded_param, k));
scc_vec_push(tok_buffer, tok);
}
continue;
@@ -523,20 +536,18 @@ static inline void expand_function_macro(scc_pproc_expand_t *expand_ctx,
expanded_params_free(&splited_params);
expanded_params_free(&expanded_params);
rescan(expand_ctx, macro, &tok_buffer);
rescan(ctx, macro, &tok_buffer);
Assert(tok_buffer.cap == 0); // FIXME
}
static inline void expand_object_macro(scc_pproc_expand_t *expand_ctx,
static inline void expand_object_macro(scc_pproc_expand_t *ctx,
const scc_pproc_macro_t *macro) {
scc_lexer_tok_vec_t tok_buffer;
scc_vec_init(tok_buffer);
scc_vec_foreach(macro->replaces, i) {
scc_lexer_tok_t tok =
scc_lexer_tok_copy(&scc_vec_at(macro->replaces, i));
if (expand_ctx->need_keep_org_pos)
tok.loc = expand_ctx->call_pos;
scc_pproc_tok_copy(ctx, &scc_vec_at(macro->replaces, i));
if (tok.type == SCC_TOK_BLANK) {
// FIXME using function to warpper it
scc_cstring_free(&tok.lexeme);
@@ -553,14 +564,14 @@ static inline void expand_object_macro(scc_pproc_expand_t *expand_ctx,
right = &scc_vec_at(macro->replaces, right_idx);
}
concact(expand_ctx, &tok_buffer, right, false);
concact(ctx, &tok_buffer, right, false);
i = right_idx;
continue;
}
scc_vec_push(tok_buffer, tok);
}
rescan(expand_ctx, macro, &tok_buffer);
rescan(ctx, macro, &tok_buffer);
}
static cbool parse_defined(scc_pproc_expand_t *expand_ctx, scc_pos_t *tok_pos) {
@@ -640,8 +651,7 @@ void scc_pproc_expand_macro(scc_pproc_expand_t *expand_ctx) {
if (macro == null || need_skip(expand_ctx, macro)) {
// FIXME maybe keyword is error or don't parse c keyword or number
// 这个地方不太清楚正确的原因
tok.type = SCC_TOK_DISABLED;
tok.type += SCC_TOK_DISABLED;
scc_vec_push(expand_ctx->output, tok);
continue;
}

View File

@@ -126,6 +126,14 @@ static cbool fill_token(scc_lexer_tok_t *tok, void *userdata) {
cbool ret = false;
CONTINUE:
ret = pproc_next(pp, tok);
// FIXME Hack the token
if (ret && tok->type >= SCC_TOK_DISABLED) {
tok->type -= SCC_TOK_DISABLED;
}
if (ret && scc_get_tok_subtype(tok->type) == SCC_TOK_SUBTYPE_INVALID) {
PanicFmt("Invalid token: %s", scc_cstring_as_cstr(&tok->lexeme));
}
if (ret && !pp->ring_need_comment &&
scc_get_tok_subtype(tok->type) == SCC_TOK_SUBTYPE_COMMENT) {
scc_lexer_tok_drop(tok);

View File

@@ -34,8 +34,105 @@ static void test_define_pos(void) {
scc_sstream_drop(&mem_stream);
}
static void test_define_double_pos(void) {
int ret = 0;
scc_sstream_t mem_stream;
const char *input = "#define _OBJ 1\n#define OBJ _OBJ\nOBJ";
ret = scc_sstream_init_by_buffer(&mem_stream, input, strlen(input), false,
16);
Assert(ret == 0);
scc_lexer_t lexer;
scc_lexer_init(&lexer, scc_sstream_to_ring(&mem_stream));
scc_pproc_t pp;
scc_pproc_init(&pp, scc_lexer_to_ring(&lexer, 8, true));
scc_lexer_tok_ring_t *tok_ring = scc_pproc_to_ring(&pp, 8, true, true);
scc_lexer_tok_t tok = {0};
scc_ring_next_consume(*tok_ring, tok, ret);
Assert(ret == true);
TEST_CHECK(tok.loc.line == 3 && tok.loc.col == 1 &&
scc_strcmp(tok.lexeme.data, "1") == 0);
TEST_MSG("Expected: %d:%d:%s", 3, 1, "1");
TEST_MSG("Produced: %zu:%zu:%s", tok.loc.line, tok.loc.col,
tok.lexeme.data);
scc_ring_free(*tok_ring);
scc_pproc_drop(&pp);
scc_lexer_drop(&lexer);
scc_sstream_drop(&mem_stream);
}
static void test_define_param_pos(void) {
int ret = 0;
scc_sstream_t mem_stream;
const char *input = "#define OBJ 1\n#define func(x) x\nfunc(OBJ)";
ret = scc_sstream_init_by_buffer(&mem_stream, input, strlen(input), false,
16);
Assert(ret == 0);
scc_lexer_t lexer;
scc_lexer_init(&lexer, scc_sstream_to_ring(&mem_stream));
scc_pproc_t pp;
scc_pproc_init(&pp, scc_lexer_to_ring(&lexer, 8, true));
scc_lexer_tok_ring_t *tok_ring = scc_pproc_to_ring(&pp, 8, true, true);
scc_lexer_tok_t tok = {0};
scc_ring_next_consume(*tok_ring, tok, ret);
Assert(ret == true);
TEST_CHECK(tok.loc.line == 3 && tok.loc.col == 1 &&
scc_strcmp(tok.lexeme.data, "1") == 0);
TEST_MSG("Expected: %d:%d:%s", 3, 1, "1");
TEST_MSG("Produced: %zu:%zu:%s", tok.loc.line, tok.loc.col,
tok.lexeme.data);
scc_ring_free(*tok_ring);
scc_pproc_drop(&pp);
scc_lexer_drop(&lexer);
scc_sstream_drop(&mem_stream);
}
static void test_define_stringify_pos(void) {
int ret = 0;
scc_sstream_t mem_stream;
const char *input =
"#define _STR(x) #x\n#define STR(x) _STR(x)\n#define OBJ 1\nSTR(OBJ)";
ret = scc_sstream_init_by_buffer(&mem_stream, input, strlen(input), false,
16);
Assert(ret == 0);
scc_lexer_t lexer;
scc_lexer_init(&lexer, scc_sstream_to_ring(&mem_stream));
scc_pproc_t pp;
scc_pproc_init(&pp, scc_lexer_to_ring(&lexer, 8, true));
scc_lexer_tok_ring_t *tok_ring = scc_pproc_to_ring(&pp, 8, true, true);
scc_lexer_tok_t tok = {0};
scc_ring_next_consume(*tok_ring, tok, ret);
Assert(ret == true);
TEST_CHECK(tok.loc.line == 4 && tok.loc.col == 1 &&
scc_strcmp(tok.lexeme.data, "\"1\"") == 0);
TEST_MSG("Expected: %d:%d:%s", 4, 1, "\"1\"");
TEST_MSG("Produced: %zu:%zu:%s", tok.loc.line, tok.loc.col,
tok.lexeme.data);
scc_ring_free(*tok_ring);
scc_pproc_drop(&pp);
scc_lexer_drop(&lexer);
scc_sstream_drop(&mem_stream);
}
#define TEST_LIST_CASE(func_name) {#func_name, func_name}
TEST_LIST = {
TEST_LIST_CASE(test_define_pos),
TEST_LIST_CASE(test_define_double_pos),
TEST_LIST_CASE(test_define_param_pos),
TEST_LIST_CASE(test_define_stringify_pos),
{NULL, NULL},
};

View File

@@ -541,6 +541,8 @@ static void test_gnu_comma_variadic_deletion(void) {
"printf(\"%d\",42)\n");
}
static void test_real_case(void) {}
static void test_c99_docs(void) {
TEST_CASE("6.10.3.3 The ## operator EXAMPLE");
CHECK_PP_OUTPUT_EXACT("#define hash_hash # ## #\n"
@@ -644,6 +646,8 @@ TEST_LIST = {
TEST_LIST_CASE(test_variadic_macros),
TEST_LIST_CASE(test_gnu_comma_variadic_deletion),
TEST_LIST_CASE(test_real_case),
TEST_LIST_CASE(test_c99_docs),
{NULL, NULL},
};