feat(pproc): 实现C语言预处理器功能并重构项目依赖
- 新增预处理器库(pproc),替代原有的pprocessor模块 - 实现完整的宏定义解析功能,支持对象宏和函数宏 - 添加条件编译指令处理(#if、#ifdef、#ifndef、#else、#elif、#endif) - 实现宏展开机制,包括嵌套宏和递归宏处理 - 添加宏定义测试用例,覆盖基本功能和复杂场景 - 在cbuild.toml中更新依赖配置,移除parser、ast、ast2ir、ir等未完成模块 - 新增lexer工具函数用于token流处理 - 添加宏定义表管理功能,支持宏的创建、查找、删除操作 - 实现宏参数解析和替换列表处理
This commit is contained in:
279
libs/pproc/src/pproc_directive.c
Normal file
279
libs/pproc/src/pproc_directive.c
Normal file
@@ -0,0 +1,279 @@
|
||||
#include <scc_lexer_utils.h>
|
||||
#include <scc_pproc.h>
|
||||
static const struct {
|
||||
const char *name;
|
||||
scc_tok_type_t tok_type;
|
||||
} keywords[] = {
|
||||
#define X(name, type, tok) {#name, tok},
|
||||
SCC_PPKEYWORD_TABLE
|
||||
#undef X
|
||||
};
|
||||
|
||||
// 使用二分查找查找关键字
|
||||
static inline int keyword_cmp(const char *name, int len) {
|
||||
int low = 0;
|
||||
int high = sizeof(keywords) / sizeof(keywords[0]) - 1;
|
||||
while (low <= high) {
|
||||
int mid = (low + high) / 2;
|
||||
const char *key = keywords[mid].name;
|
||||
int cmp = 0;
|
||||
|
||||
// 自定义字符串比较逻辑
|
||||
for (int i = 0; i < len; i++) {
|
||||
if (name[i] != key[i]) {
|
||||
cmp = (unsigned char)name[i] - (unsigned char)key[i];
|
||||
break;
|
||||
}
|
||||
if (name[i] == '\0')
|
||||
break; // 遇到终止符提前结束
|
||||
}
|
||||
|
||||
if (cmp == 0) {
|
||||
// 完全匹配检查(长度相同)
|
||||
if (key[len] == '\0')
|
||||
return mid;
|
||||
cmp = -1; // 当前关键词比输入长
|
||||
}
|
||||
|
||||
if (cmp < 0) {
|
||||
high = mid - 1;
|
||||
} else {
|
||||
low = mid + 1;
|
||||
}
|
||||
}
|
||||
return -1; // Not a keyword.
|
||||
}
|
||||
|
||||
void scc_pproc_parse_macro_arguments(scc_pproc_t *pp,
|
||||
scc_pproc_macro_list_t *args) {
|
||||
Assert(pp != null && args != null);
|
||||
scc_lexer_tok_t tok = {0};
|
||||
scc_vec_init(*args);
|
||||
int depth = 0;
|
||||
do {
|
||||
scc_lexer_next_non_blank(pp->cur_ring, &tok);
|
||||
if (tok.type == SCC_TOK_L_PAREN) {
|
||||
depth++;
|
||||
} else if (tok.type == SCC_TOK_R_PAREN) {
|
||||
depth--;
|
||||
}
|
||||
if (depth > 1) {
|
||||
scc_vec_push(*args, tok);
|
||||
} else {
|
||||
scc_lexer_tok_drop(&tok);
|
||||
}
|
||||
} while (depth);
|
||||
}
|
||||
|
||||
static inline void fill_replacements(scc_pproc_t *pp, scc_pp_macro_t *macro) {
|
||||
int ok;
|
||||
scc_lexer_tok_t tok;
|
||||
ok = scc_lexer_next_non_blank(pp->cur_ring, &tok);
|
||||
if (!ok || tok.type == SCC_TOK_EOF || tok.type == SCC_TOK_ENDLINE) {
|
||||
return;
|
||||
} else {
|
||||
scc_vec_push(macro->replaces, tok);
|
||||
}
|
||||
while (1) {
|
||||
scc_ring_next_consume(*pp->cur_ring, tok, ok);
|
||||
if (!ok)
|
||||
break;
|
||||
if (tok.type == SCC_TOK_EOF || tok.type == SCC_TOK_ENDLINE) {
|
||||
scc_lexer_tok_drop(&tok);
|
||||
break;
|
||||
}
|
||||
scc_vec_push(macro->replaces, tok);
|
||||
}
|
||||
}
|
||||
|
||||
void scc_pproc_parse_function_macro(scc_pproc_t *pp,
|
||||
const scc_lexer_tok_t *ident) {
|
||||
scc_pproc_macro_list_t args;
|
||||
scc_pproc_parse_macro_arguments(pp, &args);
|
||||
scc_pp_macro_t *macro =
|
||||
scc_pp_macro_new(&ident->lexeme, SCC_PP_MACRO_FUNCTION);
|
||||
/*
|
||||
check and set params
|
||||
1. identifier-list(opt)
|
||||
2. ...
|
||||
3. identifier-list , ...
|
||||
*/
|
||||
scc_vec_foreach(args, i) {
|
||||
scc_lexer_tok_t *arg = &scc_vec_at(args, i);
|
||||
if (arg->type == SCC_TOK_COMMA) {
|
||||
scc_lexer_tok_drop(arg);
|
||||
if (i % 2 != 1) {
|
||||
LOG_FATAL("ERROR");
|
||||
}
|
||||
} else if (arg->type == SCC_TOK_IDENT) {
|
||||
if (i % 2 != 0) {
|
||||
LOG_FATAL("ERROR");
|
||||
}
|
||||
scc_vec_push(macro->params, *arg);
|
||||
} else if (arg->type == SCC_TOK_ELLIPSIS) {
|
||||
if (i % 2 != 0) {
|
||||
LOG_FATAL("ERROR");
|
||||
}
|
||||
scc_cstring_t va_args = scc_cstring_from_cstr("__VA_ARGS__");
|
||||
scc_cstring_free(&arg->lexeme);
|
||||
arg->lexeme = va_args;
|
||||
scc_vec_push(macro->params, *arg);
|
||||
} else {
|
||||
LOG_FATAL("ERROR");
|
||||
}
|
||||
}
|
||||
fill_replacements(pp, macro);
|
||||
scc_pp_macro_table_set(&pp->macro_table, macro);
|
||||
}
|
||||
|
||||
void scc_pproc_parse_object_macro(scc_pproc_t *pp,
|
||||
const scc_lexer_tok_t *ident) {
|
||||
scc_pp_macro_t *macro =
|
||||
scc_pp_macro_new(&ident->lexeme, SCC_PP_MACRO_OBJECT);
|
||||
fill_replacements(pp, macro);
|
||||
scc_pp_macro_table_set(&pp->macro_table, macro);
|
||||
}
|
||||
|
||||
/*
|
||||
```txt
|
||||
6.10 Preprocessing directives
|
||||
preprocessing-file:
|
||||
group(opt)
|
||||
group:
|
||||
group-part
|
||||
group group-part
|
||||
group-part:
|
||||
if-section
|
||||
control-line
|
||||
text-line
|
||||
# non-directive
|
||||
if-section:
|
||||
if-group elif-groups(opt) else-group(opt) endif-line
|
||||
if-group:
|
||||
# if constant-expression new-line group(opt)
|
||||
# ifdef identifier new-line group(opt)
|
||||
# ifndef identifier new-line group(opt)
|
||||
elif-groups:
|
||||
elif-group
|
||||
elif-groups elif-group
|
||||
elif-group:
|
||||
#elif constant-expression new-line group(opt)
|
||||
else-group:
|
||||
# else new-line group(opt)
|
||||
endif-line:
|
||||
# endif new-line
|
||||
control-line:
|
||||
# include pp-tokens new-line
|
||||
# define identifier replacement-list new-line
|
||||
# define identifier lparen identifier-list(opt) )
|
||||
replacement-list new-line
|
||||
# define identifier lparen ... ) replacement-list new-line
|
||||
# define identifier lparen identifier-list ,... )
|
||||
replacement-list new-line
|
||||
# undef identifier new-line
|
||||
# line pp-tokens new-line
|
||||
# error pp-tokens(opt) new-line
|
||||
# pragma pp-tokens(opt) new-line
|
||||
# new-line
|
||||
text-line:
|
||||
pp-tokens(opt) new-line
|
||||
non-directive:
|
||||
pp-tokens new-line
|
||||
lparen:
|
||||
`a ( character not immediately preceded by white-space`
|
||||
replacement-list:
|
||||
pp-tokens(opt)
|
||||
pp-tokens:
|
||||
preprocessing-token
|
||||
pp-tokens preprocessing-token
|
||||
new-line:
|
||||
the new-line character
|
||||
```
|
||||
*/
|
||||
void scc_pproc_handle_directive(scc_pproc_t *pp) {
|
||||
scc_lexer_tok_t tok = {0};
|
||||
int ok = 0;
|
||||
scc_ring_next(*pp->cur_ring, tok, ok);
|
||||
scc_lexer_tok_drop(&tok);
|
||||
|
||||
if (!scc_lexer_next_non_blank(pp->cur_ring, &tok) ||
|
||||
tok.type != SCC_TOK_IDENT) {
|
||||
scc_lexer_tok_drop(&tok);
|
||||
LOG_ERROR("Invalid preprocessor directive");
|
||||
goto ERROR;
|
||||
}
|
||||
int ret = keyword_cmp(scc_cstring_as_cstr(&tok.lexeme),
|
||||
scc_cstring_len(&tok.lexeme));
|
||||
if (ret == -1) {
|
||||
scc_lexer_tok_drop(&tok);
|
||||
LOG_ERROR("Expected preprocessor keyword, got %s", tok.lexeme);
|
||||
goto ERROR;
|
||||
}
|
||||
|
||||
scc_tok_type_t type = keywords[ret].tok_type;
|
||||
switch (type) {
|
||||
case SCC_PP_TOK_DEFINE: {
|
||||
scc_lexer_tok_drop(&tok);
|
||||
scc_lexer_next_non_blank(pp->cur_ring, &tok);
|
||||
if (tok.type != SCC_TOK_IDENT) {
|
||||
scc_lexer_tok_drop(&tok);
|
||||
LOG_ERROR("expected identifier");
|
||||
goto ERROR;
|
||||
}
|
||||
scc_lexer_tok_t next_tok;
|
||||
scc_ring_peek(*pp->cur_ring, next_tok, ok);
|
||||
if (!ok) {
|
||||
LOG_ERROR("unexpected EOF");
|
||||
goto ERROR;
|
||||
}
|
||||
if (next_tok.type == SCC_TOK_L_PAREN) {
|
||||
// function macro
|
||||
scc_pproc_parse_function_macro(pp, &tok);
|
||||
} else {
|
||||
// object macro
|
||||
scc_pproc_parse_object_macro(pp, &tok);
|
||||
}
|
||||
scc_lexer_tok_drop(&tok);
|
||||
// FIXME
|
||||
return;
|
||||
}
|
||||
case SCC_PP_TOK_UNDEF: {
|
||||
scc_lexer_tok_drop(&tok);
|
||||
scc_lexer_next_non_blank(pp->cur_ring, &tok);
|
||||
if (tok.type != SCC_TOK_IDENT) {
|
||||
scc_lexer_tok_drop(&tok);
|
||||
LOG_ERROR("expected identifier");
|
||||
goto ERROR;
|
||||
}
|
||||
scc_pp_macro_table_remove(&pp->macro_table, &tok.lexeme);
|
||||
scc_lexer_tok_drop(&tok);
|
||||
scc_lexer_next_non_blank(pp->cur_ring, &tok);
|
||||
if (tok.type != SCC_TOK_ENDLINE) {
|
||||
scc_lexer_tok_drop(&tok);
|
||||
LOG_ERROR("expected newline");
|
||||
goto ERROR;
|
||||
}
|
||||
scc_lexer_tok_drop(&tok);
|
||||
break;
|
||||
}
|
||||
case SCC_PP_TOK_INCLUDE:
|
||||
case SCC_PP_TOK_IF:
|
||||
case SCC_PP_TOK_IFDEF:
|
||||
case SCC_PP_TOK_IFNDEF:
|
||||
case SCC_PP_TOK_ELSE:
|
||||
case SCC_PP_TOK_ELIF:
|
||||
case SCC_PP_TOK_ELIFDEF:
|
||||
case SCC_PP_TOK_ELIFNDEF:
|
||||
case SCC_PP_TOK_ENDIF:
|
||||
case SCC_PP_TOK_LINE:
|
||||
case SCC_PP_TOK_EMBED:
|
||||
case SCC_PP_TOK_ERROR:
|
||||
case SCC_PP_TOK_WARNING:
|
||||
case SCC_PP_TOK_PRAGMA:
|
||||
default:
|
||||
LOG_WARN("Unhandled directive: %s", scc_cstring_as_cstr(&tok.lexeme));
|
||||
break;
|
||||
}
|
||||
ERROR:
|
||||
scc_lexer_skip_until_newline(pp->cur_ring);
|
||||
}
|
||||
32
libs/pproc/src/pproc_expand.c
Normal file
32
libs/pproc/src/pproc_expand.c
Normal file
@@ -0,0 +1,32 @@
|
||||
#include <scc_pproc.h>
|
||||
|
||||
typedef struct {
|
||||
|
||||
} scc_expand_t;
|
||||
|
||||
void scc_pproc_expand_macro(scc_pproc_t *pp, const scc_pp_macro_t *macro) {
|
||||
if (macro->type == SCC_PP_MACRO_NONE) {
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
if (macro->type == SCC_PP_MACRO_OBJECT) {
|
||||
scc_vec_foreach(macro->replaces, i) {
|
||||
scc_lexer_tok_t tok = scc_vec_at(macro->replaces, i);
|
||||
if (tok.type == SCC_TOK_BLANK) {
|
||||
tok.lexeme = scc_cstring_from_cstr(" ");
|
||||
} else {
|
||||
tok.lexeme = scc_cstring_copy(&tok.lexeme);
|
||||
}
|
||||
scc_vec_push(pp->cache, tok);
|
||||
}
|
||||
pp->cache_pos = 0;
|
||||
return;
|
||||
}
|
||||
Assert(macro->type == SCC_PP_MACRO_FUNCTION);
|
||||
// Check params match
|
||||
scc_pproc_macro_list_t args;
|
||||
scc_pproc_parse_macro_arguments(pp, &args);
|
||||
scc_vec_foreach(args, i) {}
|
||||
scc_vec_foreach(macro->params, i) {}
|
||||
scc_vec_foreach(macro->replaces, i) {}
|
||||
}
|
||||
0
libs/pproc/src/pproc_include.c
Normal file
0
libs/pproc/src/pproc_include.c
Normal file
155
libs/pproc/src/pproc_macro.c
Normal file
155
libs/pproc/src/pproc_macro.c
Normal file
@@ -0,0 +1,155 @@
|
||||
#include <pproc_macro.h>
|
||||
|
||||
// 创建宏对象
|
||||
scc_pp_macro_t *scc_pp_macro_new(const scc_cstring_t *name,
|
||||
scc_pp_macro_type_t type) {
|
||||
scc_pp_macro_t *macro = scc_malloc(sizeof(scc_pp_macro_t));
|
||||
if (!macro) {
|
||||
LOG_ERROR("Failed to allocate memory for macro");
|
||||
return null;
|
||||
}
|
||||
|
||||
macro->name = scc_cstring_copy(name);
|
||||
macro->type = type;
|
||||
scc_vec_init(macro->params);
|
||||
scc_vec_init(macro->replaces);
|
||||
|
||||
return macro;
|
||||
}
|
||||
|
||||
// 销毁宏对象
|
||||
void scc_pp_macro_drop(scc_pp_macro_t *macro) {
|
||||
if (!macro)
|
||||
return;
|
||||
|
||||
scc_cstring_free(¯o->name);
|
||||
|
||||
// 释放参数列表
|
||||
for (usize i = 0; i < macro->params.size; ++i) {
|
||||
scc_lexer_tok_drop(&scc_vec_at(macro->params, i));
|
||||
}
|
||||
scc_vec_free(macro->params);
|
||||
|
||||
// 释放替换列表
|
||||
for (usize i = 0; i < macro->replaces.size; ++i) {
|
||||
scc_lexer_tok_drop(&scc_vec_at(macro->replaces, i));
|
||||
}
|
||||
scc_vec_free(macro->replaces);
|
||||
|
||||
scc_free(macro);
|
||||
}
|
||||
|
||||
// 添加对象宏
|
||||
cbool scc_pp_add_object_macro(scc_pp_macro_table_t *macros,
|
||||
const scc_cstring_t *name,
|
||||
const scc_pproc_macro_list_t *replacement) {
|
||||
if (!macros || !name || !replacement)
|
||||
return false;
|
||||
|
||||
scc_pp_macro_t *macro = scc_pp_macro_new(name, SCC_PP_MACRO_OBJECT);
|
||||
if (!macro)
|
||||
return false;
|
||||
|
||||
macro->replaces = *replacement;
|
||||
|
||||
// 检查是否已存在同名宏
|
||||
scc_pp_macro_t *existing = scc_hashtable_get(¯os->table, ¯o->name);
|
||||
if (existing) {
|
||||
LOG_WARN("Redefining macro: %s", scc_cstring_as_cstr(¯o->name));
|
||||
scc_pp_macro_drop(existing);
|
||||
}
|
||||
|
||||
scc_hashtable_set(¯os->table, ¯o->name, macro);
|
||||
return true;
|
||||
}
|
||||
|
||||
// 添加函数宏
|
||||
cbool scc_pp_add_function_macro(scc_pp_macro_table_t *macros,
|
||||
const scc_cstring_t *name,
|
||||
const scc_pproc_macro_list_t *params,
|
||||
const scc_pproc_macro_list_t *replacement) {
|
||||
if (!macros || !name || !params || !replacement)
|
||||
return false;
|
||||
|
||||
scc_pp_macro_t *macro = scc_pp_macro_new(name, SCC_PP_MACRO_FUNCTION);
|
||||
if (!macro)
|
||||
return false;
|
||||
|
||||
// 复制参数列表
|
||||
macro->params = *params;
|
||||
macro->replaces = *replacement;
|
||||
|
||||
// 检查是否已存在同名宏
|
||||
scc_pp_macro_t *existing = scc_hashtable_get(¯os->table, ¯o->name);
|
||||
if (existing) {
|
||||
LOG_WARN("Redefining macro: %s", scc_cstring_as_cstr(¯o->name));
|
||||
scc_pp_macro_drop(existing);
|
||||
}
|
||||
|
||||
scc_hashtable_set(¯os->table, ¯o->name, macro);
|
||||
return true;
|
||||
}
|
||||
|
||||
/// marco_table
|
||||
|
||||
scc_pp_macro_t *scc_pp_macro_table_set(scc_pp_macro_table_t *pp,
|
||||
scc_pp_macro_t *macro) {
|
||||
Assert(pp != null && macro != null);
|
||||
return scc_hashtable_set(&pp->table, ¯o->name, macro);
|
||||
}
|
||||
|
||||
// 查找宏定义
|
||||
scc_pp_macro_t *scc_pp_macro_table_get(scc_pp_macro_table_t *pp,
|
||||
const scc_cstring_t *name) {
|
||||
return scc_hashtable_get(&pp->table, name);
|
||||
}
|
||||
|
||||
// 从预处理器中删除宏
|
||||
cbool scc_pp_macro_table_remove(scc_pp_macro_table_t *pp,
|
||||
const scc_cstring_t *name) {
|
||||
if (!pp || !name)
|
||||
return false;
|
||||
|
||||
scc_pp_macro_t *macro = scc_hashtable_get(&pp->table, name);
|
||||
if (!macro)
|
||||
return false;
|
||||
|
||||
scc_hashtable_del(&pp->table, name);
|
||||
scc_pp_macro_drop(macro);
|
||||
return true;
|
||||
}
|
||||
|
||||
static u32 hash_func(const void *key) {
|
||||
const scc_cstring_t *string = (const scc_cstring_t *)key;
|
||||
return scc_strhash32(scc_cstring_as_cstr(string));
|
||||
}
|
||||
|
||||
static int hash_cmp(const void *key1, const void *key2) {
|
||||
const scc_cstring_t *str1 = (const scc_cstring_t *)key1;
|
||||
const scc_cstring_t *str2 = (const scc_cstring_t *)key2;
|
||||
|
||||
if (str1->size != str2->size) {
|
||||
return str1->size - str2->size;
|
||||
}
|
||||
return scc_strcmp(scc_cstring_as_cstr(str1), scc_cstring_as_cstr(str2));
|
||||
}
|
||||
|
||||
void scc_pp_marco_table_init(scc_pp_macro_table_t *macros) {
|
||||
Assert(macros != null);
|
||||
macros->table.hash_func = hash_func;
|
||||
macros->table.key_cmp = hash_cmp;
|
||||
scc_hashtable_init(¯os->table);
|
||||
}
|
||||
|
||||
static int macro_free(const void *key, void *value, void *context) {
|
||||
(void)key;
|
||||
(void)context;
|
||||
scc_pp_macro_drop(value);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void scc_pp_macro_table_drop(scc_pp_macro_table_t *macros) {
|
||||
Assert(macros != null);
|
||||
scc_hashtable_foreach(¯os->table, macro_free, null);
|
||||
scc_hashtable_drop(¯os->table);
|
||||
}
|
||||
68
libs/pproc/src/scc_pproc.c
Normal file
68
libs/pproc/src/scc_pproc.c
Normal file
@@ -0,0 +1,68 @@
|
||||
#include <scc_pproc.h>
|
||||
|
||||
static int pproc_next(scc_pproc_t *pp, scc_lexer_tok_t *out) {
|
||||
scc_lexer_tok_ring_t *stream = pp->cur_ring;
|
||||
scc_lexer_tok_t tok = {0};
|
||||
int ok = 0;
|
||||
CONTINUE:
|
||||
if (scc_vec_size(pp->cache)) {
|
||||
// use cache?
|
||||
*out = scc_vec_at(pp->cache, pp->cache_pos);
|
||||
pp->cache_pos++;
|
||||
if (pp->cache_pos == scc_vec_size(pp->cache)) {
|
||||
pp->cache_pos = 0;
|
||||
scc_vec_free(pp->cache);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
scc_ring_peek(*stream, tok, ok);
|
||||
if (tok.type == SCC_TOK_SHARP && tok.loc.col == 1) {
|
||||
// parse to #
|
||||
scc_pproc_handle_directive(pp);
|
||||
goto CONTINUE;
|
||||
} else if (tok.type == SCC_TOK_IDENT) {
|
||||
// maybe expanded
|
||||
scc_pp_macro_t *macro =
|
||||
scc_pp_macro_table_get(&pp->macro_table, &tok.lexeme);
|
||||
scc_ring_next_consume(*stream, *out, ok);
|
||||
if (macro == null) {
|
||||
return ok;
|
||||
}
|
||||
scc_pproc_expand_macro(pp, macro);
|
||||
goto CONTINUE;
|
||||
} else {
|
||||
// continue
|
||||
scc_ring_next_consume(*stream, *out, ok);
|
||||
return ok;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void scc_pproc_init(scc_pproc_t *pp, scc_lexer_tok_ring_t *input) {
|
||||
Assert(pp != null && input != null);
|
||||
pp->cur_ring = input;
|
||||
scc_pp_marco_table_init(&pp->macro_table);
|
||||
scc_vec_init(pp->if_stack);
|
||||
scc_vec_init(pp->file_stack);
|
||||
scc_vec_init(pp->cache);
|
||||
pp->cache_pos = 0;
|
||||
}
|
||||
|
||||
static cbool fill_token(scc_lexer_tok_t *tok, void *userdata) {
|
||||
scc_pproc_t *pp = userdata;
|
||||
return pproc_next(pp, tok);
|
||||
}
|
||||
|
||||
scc_lexer_tok_ring_t *scc_pproc_to_ring(scc_pproc_t *pp, int ring_size) {
|
||||
scc_ring_init(pp->ring, ring_size, fill_token, pp);
|
||||
pp->ring_ref_count++;
|
||||
return &pp->ring;
|
||||
}
|
||||
|
||||
// 销毁预处理器
|
||||
void scc_pproc_drop(scc_pproc_t *pp) {
|
||||
if (pp == null)
|
||||
return;
|
||||
scc_lexer_drop_ring(pp->cur_ring);
|
||||
scc_pp_macro_table_drop(&pp->macro_table);
|
||||
}
|
||||
Reference in New Issue
Block a user