feat(pproc): 实现C语言预处理器功能并重构项目依赖

- 新增预处理器库(pproc),替代原有的pprocessor模块
- 实现完整的宏定义解析功能,支持对象宏和函数宏
- 添加条件编译指令处理(#if、#ifdef、#ifndef、#else、#elif、#endif)
- 实现宏展开机制,包括嵌套宏和递归宏处理
- 添加宏定义测试用例,覆盖基本功能和复杂场景
- 在cbuild.toml中更新依赖配置,移除parser、ast、ast2ir、ir等未完成模块
- 新增lexer工具函数用于token流处理
- 添加宏定义表管理功能,支持宏的创建、查找、删除操作
- 实现宏参数解析和替换列表处理
This commit is contained in:
zzy
2026-02-17 22:47:25 +08:00
parent 681a15cb44
commit 2de5ae59f5
13 changed files with 1083 additions and 84 deletions

View File

@@ -0,0 +1,279 @@
#include <scc_lexer_utils.h>
#include <scc_pproc.h>
static const struct {
const char *name;
scc_tok_type_t tok_type;
} keywords[] = {
#define X(name, type, tok) {#name, tok},
SCC_PPKEYWORD_TABLE
#undef X
};
// 使用二分查找查找关键字
static inline int keyword_cmp(const char *name, int len) {
int low = 0;
int high = sizeof(keywords) / sizeof(keywords[0]) - 1;
while (low <= high) {
int mid = (low + high) / 2;
const char *key = keywords[mid].name;
int cmp = 0;
// 自定义字符串比较逻辑
for (int i = 0; i < len; i++) {
if (name[i] != key[i]) {
cmp = (unsigned char)name[i] - (unsigned char)key[i];
break;
}
if (name[i] == '\0')
break; // 遇到终止符提前结束
}
if (cmp == 0) {
// 完全匹配检查(长度相同)
if (key[len] == '\0')
return mid;
cmp = -1; // 当前关键词比输入长
}
if (cmp < 0) {
high = mid - 1;
} else {
low = mid + 1;
}
}
return -1; // Not a keyword.
}
void scc_pproc_parse_macro_arguments(scc_pproc_t *pp,
scc_pproc_macro_list_t *args) {
Assert(pp != null && args != null);
scc_lexer_tok_t tok = {0};
scc_vec_init(*args);
int depth = 0;
do {
scc_lexer_next_non_blank(pp->cur_ring, &tok);
if (tok.type == SCC_TOK_L_PAREN) {
depth++;
} else if (tok.type == SCC_TOK_R_PAREN) {
depth--;
}
if (depth > 1) {
scc_vec_push(*args, tok);
} else {
scc_lexer_tok_drop(&tok);
}
} while (depth);
}
static inline void fill_replacements(scc_pproc_t *pp, scc_pp_macro_t *macro) {
int ok;
scc_lexer_tok_t tok;
ok = scc_lexer_next_non_blank(pp->cur_ring, &tok);
if (!ok || tok.type == SCC_TOK_EOF || tok.type == SCC_TOK_ENDLINE) {
return;
} else {
scc_vec_push(macro->replaces, tok);
}
while (1) {
scc_ring_next_consume(*pp->cur_ring, tok, ok);
if (!ok)
break;
if (tok.type == SCC_TOK_EOF || tok.type == SCC_TOK_ENDLINE) {
scc_lexer_tok_drop(&tok);
break;
}
scc_vec_push(macro->replaces, tok);
}
}
void scc_pproc_parse_function_macro(scc_pproc_t *pp,
const scc_lexer_tok_t *ident) {
scc_pproc_macro_list_t args;
scc_pproc_parse_macro_arguments(pp, &args);
scc_pp_macro_t *macro =
scc_pp_macro_new(&ident->lexeme, SCC_PP_MACRO_FUNCTION);
/*
check and set params
1. identifier-list(opt)
2. ...
3. identifier-list , ...
*/
scc_vec_foreach(args, i) {
scc_lexer_tok_t *arg = &scc_vec_at(args, i);
if (arg->type == SCC_TOK_COMMA) {
scc_lexer_tok_drop(arg);
if (i % 2 != 1) {
LOG_FATAL("ERROR");
}
} else if (arg->type == SCC_TOK_IDENT) {
if (i % 2 != 0) {
LOG_FATAL("ERROR");
}
scc_vec_push(macro->params, *arg);
} else if (arg->type == SCC_TOK_ELLIPSIS) {
if (i % 2 != 0) {
LOG_FATAL("ERROR");
}
scc_cstring_t va_args = scc_cstring_from_cstr("__VA_ARGS__");
scc_cstring_free(&arg->lexeme);
arg->lexeme = va_args;
scc_vec_push(macro->params, *arg);
} else {
LOG_FATAL("ERROR");
}
}
fill_replacements(pp, macro);
scc_pp_macro_table_set(&pp->macro_table, macro);
}
void scc_pproc_parse_object_macro(scc_pproc_t *pp,
const scc_lexer_tok_t *ident) {
scc_pp_macro_t *macro =
scc_pp_macro_new(&ident->lexeme, SCC_PP_MACRO_OBJECT);
fill_replacements(pp, macro);
scc_pp_macro_table_set(&pp->macro_table, macro);
}
/*
```txt
6.10 Preprocessing directives
preprocessing-file:
group(opt)
group:
group-part
group group-part
group-part:
if-section
control-line
text-line
# non-directive
if-section:
if-group elif-groups(opt) else-group(opt) endif-line
if-group:
# if constant-expression new-line group(opt)
# ifdef identifier new-line group(opt)
# ifndef identifier new-line group(opt)
elif-groups:
elif-group
elif-groups elif-group
elif-group:
#elif constant-expression new-line group(opt)
else-group:
# else new-line group(opt)
endif-line:
# endif new-line
control-line:
# include pp-tokens new-line
# define identifier replacement-list new-line
# define identifier lparen identifier-list(opt) )
replacement-list new-line
# define identifier lparen ... ) replacement-list new-line
# define identifier lparen identifier-list ,... )
replacement-list new-line
# undef identifier new-line
# line pp-tokens new-line
# error pp-tokens(opt) new-line
# pragma pp-tokens(opt) new-line
# new-line
text-line:
pp-tokens(opt) new-line
non-directive:
pp-tokens new-line
lparen:
`a ( character not immediately preceded by white-space`
replacement-list:
pp-tokens(opt)
pp-tokens:
preprocessing-token
pp-tokens preprocessing-token
new-line:
the new-line character
```
*/
void scc_pproc_handle_directive(scc_pproc_t *pp) {
scc_lexer_tok_t tok = {0};
int ok = 0;
scc_ring_next(*pp->cur_ring, tok, ok);
scc_lexer_tok_drop(&tok);
if (!scc_lexer_next_non_blank(pp->cur_ring, &tok) ||
tok.type != SCC_TOK_IDENT) {
scc_lexer_tok_drop(&tok);
LOG_ERROR("Invalid preprocessor directive");
goto ERROR;
}
int ret = keyword_cmp(scc_cstring_as_cstr(&tok.lexeme),
scc_cstring_len(&tok.lexeme));
if (ret == -1) {
scc_lexer_tok_drop(&tok);
LOG_ERROR("Expected preprocessor keyword, got %s", tok.lexeme);
goto ERROR;
}
scc_tok_type_t type = keywords[ret].tok_type;
switch (type) {
case SCC_PP_TOK_DEFINE: {
scc_lexer_tok_drop(&tok);
scc_lexer_next_non_blank(pp->cur_ring, &tok);
if (tok.type != SCC_TOK_IDENT) {
scc_lexer_tok_drop(&tok);
LOG_ERROR("expected identifier");
goto ERROR;
}
scc_lexer_tok_t next_tok;
scc_ring_peek(*pp->cur_ring, next_tok, ok);
if (!ok) {
LOG_ERROR("unexpected EOF");
goto ERROR;
}
if (next_tok.type == SCC_TOK_L_PAREN) {
// function macro
scc_pproc_parse_function_macro(pp, &tok);
} else {
// object macro
scc_pproc_parse_object_macro(pp, &tok);
}
scc_lexer_tok_drop(&tok);
// FIXME
return;
}
case SCC_PP_TOK_UNDEF: {
scc_lexer_tok_drop(&tok);
scc_lexer_next_non_blank(pp->cur_ring, &tok);
if (tok.type != SCC_TOK_IDENT) {
scc_lexer_tok_drop(&tok);
LOG_ERROR("expected identifier");
goto ERROR;
}
scc_pp_macro_table_remove(&pp->macro_table, &tok.lexeme);
scc_lexer_tok_drop(&tok);
scc_lexer_next_non_blank(pp->cur_ring, &tok);
if (tok.type != SCC_TOK_ENDLINE) {
scc_lexer_tok_drop(&tok);
LOG_ERROR("expected newline");
goto ERROR;
}
scc_lexer_tok_drop(&tok);
break;
}
case SCC_PP_TOK_INCLUDE:
case SCC_PP_TOK_IF:
case SCC_PP_TOK_IFDEF:
case SCC_PP_TOK_IFNDEF:
case SCC_PP_TOK_ELSE:
case SCC_PP_TOK_ELIF:
case SCC_PP_TOK_ELIFDEF:
case SCC_PP_TOK_ELIFNDEF:
case SCC_PP_TOK_ENDIF:
case SCC_PP_TOK_LINE:
case SCC_PP_TOK_EMBED:
case SCC_PP_TOK_ERROR:
case SCC_PP_TOK_WARNING:
case SCC_PP_TOK_PRAGMA:
default:
LOG_WARN("Unhandled directive: %s", scc_cstring_as_cstr(&tok.lexeme));
break;
}
ERROR:
scc_lexer_skip_until_newline(pp->cur_ring);
}

View File

@@ -0,0 +1,32 @@
#include <scc_pproc.h>
typedef struct {
} scc_expand_t;
void scc_pproc_expand_macro(scc_pproc_t *pp, const scc_pp_macro_t *macro) {
if (macro->type == SCC_PP_MACRO_NONE) {
UNREACHABLE();
}
if (macro->type == SCC_PP_MACRO_OBJECT) {
scc_vec_foreach(macro->replaces, i) {
scc_lexer_tok_t tok = scc_vec_at(macro->replaces, i);
if (tok.type == SCC_TOK_BLANK) {
tok.lexeme = scc_cstring_from_cstr(" ");
} else {
tok.lexeme = scc_cstring_copy(&tok.lexeme);
}
scc_vec_push(pp->cache, tok);
}
pp->cache_pos = 0;
return;
}
Assert(macro->type == SCC_PP_MACRO_FUNCTION);
// Check params match
scc_pproc_macro_list_t args;
scc_pproc_parse_macro_arguments(pp, &args);
scc_vec_foreach(args, i) {}
scc_vec_foreach(macro->params, i) {}
scc_vec_foreach(macro->replaces, i) {}
}

View File

View File

@@ -0,0 +1,155 @@
#include <pproc_macro.h>
// 创建宏对象
scc_pp_macro_t *scc_pp_macro_new(const scc_cstring_t *name,
scc_pp_macro_type_t type) {
scc_pp_macro_t *macro = scc_malloc(sizeof(scc_pp_macro_t));
if (!macro) {
LOG_ERROR("Failed to allocate memory for macro");
return null;
}
macro->name = scc_cstring_copy(name);
macro->type = type;
scc_vec_init(macro->params);
scc_vec_init(macro->replaces);
return macro;
}
// 销毁宏对象
void scc_pp_macro_drop(scc_pp_macro_t *macro) {
if (!macro)
return;
scc_cstring_free(&macro->name);
// 释放参数列表
for (usize i = 0; i < macro->params.size; ++i) {
scc_lexer_tok_drop(&scc_vec_at(macro->params, i));
}
scc_vec_free(macro->params);
// 释放替换列表
for (usize i = 0; i < macro->replaces.size; ++i) {
scc_lexer_tok_drop(&scc_vec_at(macro->replaces, i));
}
scc_vec_free(macro->replaces);
scc_free(macro);
}
// 添加对象宏
cbool scc_pp_add_object_macro(scc_pp_macro_table_t *macros,
const scc_cstring_t *name,
const scc_pproc_macro_list_t *replacement) {
if (!macros || !name || !replacement)
return false;
scc_pp_macro_t *macro = scc_pp_macro_new(name, SCC_PP_MACRO_OBJECT);
if (!macro)
return false;
macro->replaces = *replacement;
// 检查是否已存在同名宏
scc_pp_macro_t *existing = scc_hashtable_get(&macros->table, &macro->name);
if (existing) {
LOG_WARN("Redefining macro: %s", scc_cstring_as_cstr(&macro->name));
scc_pp_macro_drop(existing);
}
scc_hashtable_set(&macros->table, &macro->name, macro);
return true;
}
// 添加函数宏
cbool scc_pp_add_function_macro(scc_pp_macro_table_t *macros,
const scc_cstring_t *name,
const scc_pproc_macro_list_t *params,
const scc_pproc_macro_list_t *replacement) {
if (!macros || !name || !params || !replacement)
return false;
scc_pp_macro_t *macro = scc_pp_macro_new(name, SCC_PP_MACRO_FUNCTION);
if (!macro)
return false;
// 复制参数列表
macro->params = *params;
macro->replaces = *replacement;
// 检查是否已存在同名宏
scc_pp_macro_t *existing = scc_hashtable_get(&macros->table, &macro->name);
if (existing) {
LOG_WARN("Redefining macro: %s", scc_cstring_as_cstr(&macro->name));
scc_pp_macro_drop(existing);
}
scc_hashtable_set(&macros->table, &macro->name, macro);
return true;
}
/// marco_table
scc_pp_macro_t *scc_pp_macro_table_set(scc_pp_macro_table_t *pp,
scc_pp_macro_t *macro) {
Assert(pp != null && macro != null);
return scc_hashtable_set(&pp->table, &macro->name, macro);
}
// 查找宏定义
scc_pp_macro_t *scc_pp_macro_table_get(scc_pp_macro_table_t *pp,
const scc_cstring_t *name) {
return scc_hashtable_get(&pp->table, name);
}
// 从预处理器中删除宏
cbool scc_pp_macro_table_remove(scc_pp_macro_table_t *pp,
const scc_cstring_t *name) {
if (!pp || !name)
return false;
scc_pp_macro_t *macro = scc_hashtable_get(&pp->table, name);
if (!macro)
return false;
scc_hashtable_del(&pp->table, name);
scc_pp_macro_drop(macro);
return true;
}
static u32 hash_func(const void *key) {
const scc_cstring_t *string = (const scc_cstring_t *)key;
return scc_strhash32(scc_cstring_as_cstr(string));
}
static int hash_cmp(const void *key1, const void *key2) {
const scc_cstring_t *str1 = (const scc_cstring_t *)key1;
const scc_cstring_t *str2 = (const scc_cstring_t *)key2;
if (str1->size != str2->size) {
return str1->size - str2->size;
}
return scc_strcmp(scc_cstring_as_cstr(str1), scc_cstring_as_cstr(str2));
}
void scc_pp_marco_table_init(scc_pp_macro_table_t *macros) {
Assert(macros != null);
macros->table.hash_func = hash_func;
macros->table.key_cmp = hash_cmp;
scc_hashtable_init(&macros->table);
}
static int macro_free(const void *key, void *value, void *context) {
(void)key;
(void)context;
scc_pp_macro_drop(value);
return 0;
}
void scc_pp_macro_table_drop(scc_pp_macro_table_t *macros) {
Assert(macros != null);
scc_hashtable_foreach(&macros->table, macro_free, null);
scc_hashtable_drop(&macros->table);
}

View File

@@ -0,0 +1,68 @@
#include <scc_pproc.h>
static int pproc_next(scc_pproc_t *pp, scc_lexer_tok_t *out) {
scc_lexer_tok_ring_t *stream = pp->cur_ring;
scc_lexer_tok_t tok = {0};
int ok = 0;
CONTINUE:
if (scc_vec_size(pp->cache)) {
// use cache?
*out = scc_vec_at(pp->cache, pp->cache_pos);
pp->cache_pos++;
if (pp->cache_pos == scc_vec_size(pp->cache)) {
pp->cache_pos = 0;
scc_vec_free(pp->cache);
}
return true;
}
scc_ring_peek(*stream, tok, ok);
if (tok.type == SCC_TOK_SHARP && tok.loc.col == 1) {
// parse to #
scc_pproc_handle_directive(pp);
goto CONTINUE;
} else if (tok.type == SCC_TOK_IDENT) {
// maybe expanded
scc_pp_macro_t *macro =
scc_pp_macro_table_get(&pp->macro_table, &tok.lexeme);
scc_ring_next_consume(*stream, *out, ok);
if (macro == null) {
return ok;
}
scc_pproc_expand_macro(pp, macro);
goto CONTINUE;
} else {
// continue
scc_ring_next_consume(*stream, *out, ok);
return ok;
}
return false;
}
void scc_pproc_init(scc_pproc_t *pp, scc_lexer_tok_ring_t *input) {
Assert(pp != null && input != null);
pp->cur_ring = input;
scc_pp_marco_table_init(&pp->macro_table);
scc_vec_init(pp->if_stack);
scc_vec_init(pp->file_stack);
scc_vec_init(pp->cache);
pp->cache_pos = 0;
}
static cbool fill_token(scc_lexer_tok_t *tok, void *userdata) {
scc_pproc_t *pp = userdata;
return pproc_next(pp, tok);
}
scc_lexer_tok_ring_t *scc_pproc_to_ring(scc_pproc_t *pp, int ring_size) {
scc_ring_init(pp->ring, ring_size, fill_token, pp);
pp->ring_ref_count++;
return &pp->ring;
}
// 销毁预处理器
void scc_pproc_drop(scc_pproc_t *pp) {
if (pp == null)
return;
scc_lexer_drop_ring(pp->cur_ring);
scc_pp_macro_table_drop(&pp->macro_table);
}