feat(lex_parser, pprocessor): rename identifier header check and add macro system

- Rename `scc_lex_parse_is_identifier_header` to `scc_lex_parse_is_identifier_prefix` for clarity and add a TODO comment
- Update lexer to use the renamed function for consistency
- Fix package and dependency names in `cbuild.toml` (`smcc_pprocesser` → `scc_pprocesser`, `smcc_lex_parser` → `lex_parser`)
- Introduce new macro system with header file `pp_macro.h` defining macro types, structures, and management functions
- Refactor preprocessor initialization and cleanup in `pprocessor.c` to use new macro table and stream handling
- Replace legacy `hashmap` with `scc_pp_macro_table_t` for macro storage
- Improve error handling and resource management in preprocessor lifecycle
This commit is contained in:
zzy
2025-12-13 16:09:46 +08:00
parent 874a58281f
commit 07a76d82f4
16 changed files with 970 additions and 490 deletions

411
libs/pprocessor/src/parse.c Normal file
View File

@@ -0,0 +1,411 @@
#include <lex_parser.h>
#include <pp_macro.h>
#include <pp_parse.h>
#include <pp_token.h>
static const struct {
const char *name;
scc_pp_token_t tok;
} keywords[] = {
#define X(name, type, tok) {#name, tok},
SCC_PP_INST_TOKEN
#undef X
};
// 使用二分查找查找关键字
static inline int keyword_cmp(const char *name, int len) {
int low = 0;
int high = sizeof(keywords) / sizeof(keywords[0]) - 1;
while (low <= high) {
int mid = (low + high) / 2;
const char *key = keywords[mid].name;
int cmp = 0;
// 自定义字符串比较逻辑
for (int i = 0; i < len; i++) {
if (name[i] != key[i]) {
cmp = (unsigned char)name[i] - (unsigned char)key[i];
break;
}
if (name[i] == '\0')
break; // 遇到终止符提前结束
}
if (cmp == 0) {
// 完全匹配检查(长度相同)
if (key[len] == '\0')
return mid;
cmp = -1; // 当前关键词比输入长
}
if (cmp < 0) {
high = mid - 1;
} else {
low = mid + 1;
}
}
return -1; // Not a keyword.
}
static inline void try_to_cut_list(scc_pp_macro_list_t *list,
scc_cstring_t *buff) {
if (scc_cstring_len(buff) != 0) {
scc_vec_push(*list, *buff);
*buff = scc_cstring_new();
}
}
static cbool parse_macro_replace_list(scc_probe_stream_t *stream,
scc_pp_macro_list_t *list) {
Assert(stream != null && list != null);
scc_probe_stream_reset(stream);
scc_vec_init(*list);
scc_cstring_t replacement = scc_cstring_new();
int ch;
scc_pos_t pos = scc_pos_init();
while ((ch = scc_probe_stream_peek(stream)) != scc_stream_eof) {
if (scc_lex_parse_is_endline(ch)) {
break;
}
if (scc_lex_parse_is_identifier_prefix(ch)) {
try_to_cut_list(list, &replacement);
cbool ret = scc_lex_parse_identifier(stream, &pos, &replacement);
Assert(ret == true);
try_to_cut_list(list, &replacement);
} else if (ch == '#') {
// TODO for # ##
scc_probe_stream_consume(stream);
try_to_cut_list(list, &replacement);
} else if (scc_lex_parse_is_whitespace(ch)) {
scc_probe_stream_consume(stream);
try_to_cut_list(list, &replacement);
} else {
scc_probe_stream_consume(stream);
scc_cstring_append_ch(&replacement, (char)ch);
}
}
if (scc_cstring_len(&replacement) != 0) {
scc_vec_push(*list, replacement);
replacement = scc_cstring_new();
}
// for (usize i = 0; i < list->size; ++i) {
// LOG_DEBUG("list %d: %s", (int)i,
// scc_cstring_as_cstr(&scc_vec_at(*list, i)));
// }
return true;
}
// 解析宏参数列表
static cbool parse_macro_arguments(scc_probe_stream_t *stream,
scc_pp_macro_list_t *args) {
Assert(stream != null && args != null);
scc_vec_init(*args);
int ch;
scc_probe_stream_reset(stream);
// 跳过 '('
ch = scc_probe_stream_peek(stream);
if (ch != '(') {
return false;
}
scc_probe_stream_consume(stream); // 消费 '('
int paren_depth = 1;
scc_cstring_t current_arg = scc_cstring_new();
scc_pos_t pos = scc_pos_init();
while (paren_depth > 0) {
ch = scc_probe_stream_peek(stream);
if (ch == scc_stream_eof) {
scc_cstring_free(&current_arg);
scc_cstring_free(&pos.name);
return false;
}
if (ch == '(') {
paren_depth++;
scc_cstring_append_ch(&current_arg, (char)ch);
scc_probe_stream_consume(stream);
} else if (ch == ')') {
paren_depth--;
if (paren_depth > 0) {
scc_cstring_append_ch(&current_arg, (char)ch);
}
scc_probe_stream_consume(stream);
} else if (ch == ',' && paren_depth == 1) {
// 参数分隔符
scc_vec_push(*args, current_arg);
current_arg = scc_cstring_new();
scc_probe_stream_consume(stream);
// 跳过参数后的空白
scc_lex_parse_skip_whitespace(stream, &pos);
} else {
scc_cstring_append_ch(&current_arg, (char)ch);
scc_probe_stream_consume(stream);
}
}
// 添加最后一个参数
if (!scc_cstring_is_empty(&current_arg)) {
scc_vec_push(*args, current_arg);
} else {
scc_cstring_free(&current_arg);
}
scc_cstring_free(&pos.name);
return true;
}
static cbool safe_skip_backspace_if_endline(scc_probe_stream_t *stream,
scc_pos_t *pos) {
scc_probe_stream_reset(stream);
int ch = scc_probe_stream_peek(stream);
// FIXME maybe it not correct
while (ch == '\r' || ch == '\n' || ch == ' ' || ch == '\t') {
if (scc_lex_parse_is_endline(ch)) {
scc_lex_parse_skip_endline(stream, pos);
return true;
}
scc_probe_stream_consume(stream);
ch = scc_probe_stream_peek(stream);
}
scc_probe_stream_reset(stream);
return false;
}
void scc_pp_parse_directive(scc_probe_stream_t *stream, scc_pos_t *pos,
scc_macro_table_t *macros) {
Assert(stream != null);
scc_probe_stream_reset(stream);
// 跳过 '#' 和后续空白
if (scc_probe_stream_peek(stream) != '#') {
LOG_WARN("Invalid directive");
return;
}
scc_pos_next(pos);
scc_probe_stream_consume(stream);
if (safe_skip_backspace_if_endline(stream, pos))
return;
// 解析指令名称
scc_cstring_t directive = scc_cstring_new();
if (!scc_lex_parse_identifier(stream, pos, &directive)) {
goto ERR;
}
if (safe_skip_backspace_if_endline(stream, pos))
goto FREE;
scc_pp_token_t token = keyword_cmp(scc_cstring_as_cstr(&directive),
scc_cstring_len(&directive));
scc_cstring_t name = scc_cstring_new();
switch (token) {
case SCC_PP_TOK_DEFINE: {
if (!scc_lex_parse_identifier(stream, pos, &name)) {
scc_cstring_free(&name);
goto ERR;
}
// 检查是否是函数宏:宏名后是否直接跟着 '('(没有空白字符)
scc_probe_stream_reset(stream);
int ch = scc_probe_stream_peek(stream);
cbool has_whitespace = scc_lex_parse_is_whitespace(ch);
if (has_whitespace && safe_skip_backspace_if_endline(stream, pos)) {
goto FREE;
}
if (!has_whitespace && ch == '(') {
// 函数宏
scc_pp_macro_list_t params;
if (!parse_macro_arguments(stream, &params)) {
goto ERR;
}
ch = scc_probe_stream_peek(stream);
if (ch == ')') {
scc_probe_stream_consume(stream); // 消费 ')'
}
if (safe_skip_backspace_if_endline(stream, pos)) {
goto FREE;
}
scc_pp_macro_list_t replacement;
parse_macro_replace_list(stream, &replacement);
scc_pp_add_function_macro(macros, &name, &params, &replacement);
} else {
// 对象宏
scc_pp_macro_list_t replacement;
parse_macro_replace_list(stream, &replacement);
scc_pp_add_object_macro(macros, &name, &replacement);
}
scc_cstring_free(&name);
break;
}
case SCC_PP_TOK_UNDEF: {
if (scc_lex_parse_identifier(stream, pos, &name)) {
// TODO ret value
scc_pp_remove_macro(macros, &name);
}
break;
}
case SCC_PP_TOK_INCLUDE:
case SCC_PP_TOK_IF:
case SCC_PP_TOK_IFDEF:
case SCC_PP_TOK_IFNDEF:
case SCC_PP_TOK_ELSE:
case SCC_PP_TOK_ELIF:
case SCC_PP_TOK_ELIFDEF:
case SCC_PP_TOK_ELIFNDEF:
case SCC_PP_TOK_ENDIF:
case SCC_PP_TOK_LINE:
case SCC_PP_TOK_EMBED:
case SCC_PP_TOK_ERROR:
case SCC_PP_TOK_WARNING:
case SCC_PP_TOK_PRAMA:
// 暂时跳过这一行
TODO();
scc_lex_parse_skip_line(stream, pos);
break;
default:
LOG_WARN("Unknown preprocessor directive: %s",
scc_cstring_as_cstr(&directive));
scc_lex_parse_skip_line(stream, pos);
}
ERR:
scc_lex_parse_skip_line(stream, pos);
FREE:
scc_cstring_free(&directive);
scc_cstring_free(&name);
}
// for # ## to generator string
cbool scc_pp_expand_string() { return false; }
// 展开对象宏
cbool scc_pp_expand_object_macro(scc_pp_macro_t *macro,
scc_cstring_t *out_buff) {
Assert(macro->type == SCC_PP_MACRO_OBJECT && macro->params.size == 0);
// FIXME hack cstring to init and clean
scc_cstring_free(out_buff);
// 对象宏输出替换文本并进行递归展开
for (usize i = 0; i < macro->replaces.size; ++i) {
scc_cstring_append(out_buff, &scc_vec_at(macro->replaces, i));
// YOU MUST USE + 1 to cmp because we use unsigned integer
if (i + 1 < macro->replaces.size) {
scc_cstring_append_ch(out_buff, ' ');
}
}
return true;
}
// 展开函数宏
cbool scc_pp_expand_function_macro(scc_pp_macro_t *macro,
scc_pp_macro_list_t *params,
scc_cstring_t *out_buff) {
Assert(macro->type == SCC_PP_MACRO_FUNCTION);
Assert(out_buff != null);
// FIXME hack cstring to init and clean
scc_cstring_free(out_buff);
for (usize i = 0; i < macro->replaces.size; ++i) {
// TODO ... __VA_ARGS__
for (usize j = 0; j < macro->params.size; ++j) {
if (scc_strcmp(
scc_cstring_as_cstr(&scc_vec_at(macro->replaces, i)),
scc_cstring_as_cstr(&scc_vec_at(macro->params, j))) == 0) {
scc_cstring_append(out_buff, &scc_vec_at(*params, j));
goto MATCH;
}
}
scc_cstring_append(out_buff, &scc_vec_at(macro->replaces, i));
MATCH:
// YOU MUST USE + 1 to cmp because we use unsigned
if (i + 1 < macro->replaces.size) {
scc_cstring_append_ch(out_buff, ' ');
}
}
return true;
}
cbool scc_pp_expand_macro(scc_probe_stream_t *stream, scc_macro_table_t *macros,
scc_probe_stream_t **out_stream, int depth) {
// TODO self position and it maybe is a stack on #include ?
// 递归扫描
if (depth <= 0) {
return false;
}
Assert(stream != null && macros != null && out_stream != null);
scc_cstring_t identifier = scc_cstring_new();
scc_pos_t pos = scc_pos_init();
cbool ret;
ret = scc_lex_parse_identifier(stream, &pos, &identifier);
Assert(ret == true);
scc_pp_macro_t *macro = scc_pp_find_macro(macros, &identifier);
if (macro == null) {
// 不是宏,直接输出标识符
*out_stream =
scc_mem_probe_stream_new(scc_cstring_as_cstr(&identifier),
scc_cstring_len(&identifier), false);
return true;
} else {
scc_cstring_free(&identifier);
}
// 根据宏类型展开
scc_cstring_t tmp_buff = scc_cstring_new();
if (macro->type == SCC_PP_MACRO_OBJECT) {
cbool ret = scc_pp_expand_object_macro(macro, &tmp_buff);
Assert(ret == true);
} else if (macro->type == SCC_PP_MACRO_FUNCTION) {
// FIXME 是否需要忽略空白字符?
scc_lex_parse_skip_whitespace(stream, &pos);
if (scc_probe_stream_peek(stream) != '(') {
LOG_ERROR("Not a function and skip it");
goto ERR;
}
scc_pp_macro_list_t params;
ret = parse_macro_arguments(stream, &params);
Assert(ret == true);
scc_pp_expand_function_macro(macro, &params, &tmp_buff);
Assert(ret == true);
}
scc_probe_stream_t *tmp_stream = scc_mem_probe_stream_new(
scc_cstring_as_cstr(&tmp_buff), scc_cstring_len(&tmp_buff), false);
int ch;
scc_cstring_t real_buff = scc_cstring_new();
while ((ch = scc_probe_stream_peek(tmp_stream)) != scc_stream_eof) {
if (scc_lex_parse_is_identifier_prefix(ch)) {
scc_probe_stream_t *tmp_out_stream;
scc_pp_expand_macro(tmp_stream, macros, &tmp_out_stream, depth - 1);
// scc_cstring_append_cstr();
Assert(tmp_out_stream != null);
while (scc_probe_stream_peek(tmp_out_stream) != scc_stream_eof) {
scc_cstring_append_ch(&real_buff,
scc_probe_stream_consume(tmp_out_stream));
}
Assert(tmp_out_stream != null && tmp_out_stream->drop != null);
scc_probe_stream_drop(tmp_out_stream);
} else {
scc_cstring_append_ch(&real_buff,
scc_probe_stream_consume(tmp_stream));
}
}
scc_cstring_free(&tmp_buff);
scc_probe_stream_drop(tmp_stream);
*out_stream = scc_mem_probe_stream_new(scc_cstring_as_cstr(&real_buff),
scc_cstring_len(&real_buff), false);
return true;
ERR:
*out_stream = null;
return false;
}