feat(lex_parser, pprocessor): rename identifier header check and add macro system
- Rename `scc_lex_parse_is_identifier_header` to `scc_lex_parse_is_identifier_prefix` for clarity and add a TODO comment - Update lexer to use the renamed function for consistency - Fix package and dependency names in `cbuild.toml` (`smcc_pprocesser` → `scc_pprocesser`, `smcc_lex_parser` → `lex_parser`) - Introduce new macro system with header file `pp_macro.h` defining macro types, structures, and management functions - Refactor preprocessor initialization and cleanup in `pprocessor.c` to use new macro table and stream handling - Replace legacy `hashmap` with `scc_pp_macro_table_t` for macro storage - Improve error handling and resource management in preprocessor lifecycle
This commit is contained in:
411
libs/pprocessor/src/parse.c
Normal file
411
libs/pprocessor/src/parse.c
Normal file
@@ -0,0 +1,411 @@
|
||||
#include <lex_parser.h>
|
||||
#include <pp_macro.h>
|
||||
#include <pp_parse.h>
|
||||
#include <pp_token.h>
|
||||
|
||||
static const struct {
|
||||
const char *name;
|
||||
scc_pp_token_t tok;
|
||||
} keywords[] = {
|
||||
#define X(name, type, tok) {#name, tok},
|
||||
SCC_PP_INST_TOKEN
|
||||
#undef X
|
||||
};
|
||||
|
||||
// 使用二分查找查找关键字
|
||||
static inline int keyword_cmp(const char *name, int len) {
|
||||
int low = 0;
|
||||
int high = sizeof(keywords) / sizeof(keywords[0]) - 1;
|
||||
while (low <= high) {
|
||||
int mid = (low + high) / 2;
|
||||
const char *key = keywords[mid].name;
|
||||
int cmp = 0;
|
||||
|
||||
// 自定义字符串比较逻辑
|
||||
for (int i = 0; i < len; i++) {
|
||||
if (name[i] != key[i]) {
|
||||
cmp = (unsigned char)name[i] - (unsigned char)key[i];
|
||||
break;
|
||||
}
|
||||
if (name[i] == '\0')
|
||||
break; // 遇到终止符提前结束
|
||||
}
|
||||
|
||||
if (cmp == 0) {
|
||||
// 完全匹配检查(长度相同)
|
||||
if (key[len] == '\0')
|
||||
return mid;
|
||||
cmp = -1; // 当前关键词比输入长
|
||||
}
|
||||
|
||||
if (cmp < 0) {
|
||||
high = mid - 1;
|
||||
} else {
|
||||
low = mid + 1;
|
||||
}
|
||||
}
|
||||
return -1; // Not a keyword.
|
||||
}
|
||||
|
||||
static inline void try_to_cut_list(scc_pp_macro_list_t *list,
|
||||
scc_cstring_t *buff) {
|
||||
if (scc_cstring_len(buff) != 0) {
|
||||
scc_vec_push(*list, *buff);
|
||||
*buff = scc_cstring_new();
|
||||
}
|
||||
}
|
||||
|
||||
static cbool parse_macro_replace_list(scc_probe_stream_t *stream,
|
||||
scc_pp_macro_list_t *list) {
|
||||
Assert(stream != null && list != null);
|
||||
scc_probe_stream_reset(stream);
|
||||
|
||||
scc_vec_init(*list);
|
||||
scc_cstring_t replacement = scc_cstring_new();
|
||||
int ch;
|
||||
scc_pos_t pos = scc_pos_init();
|
||||
|
||||
while ((ch = scc_probe_stream_peek(stream)) != scc_stream_eof) {
|
||||
if (scc_lex_parse_is_endline(ch)) {
|
||||
break;
|
||||
}
|
||||
if (scc_lex_parse_is_identifier_prefix(ch)) {
|
||||
try_to_cut_list(list, &replacement);
|
||||
cbool ret = scc_lex_parse_identifier(stream, &pos, &replacement);
|
||||
Assert(ret == true);
|
||||
try_to_cut_list(list, &replacement);
|
||||
} else if (ch == '#') {
|
||||
// TODO for # ##
|
||||
scc_probe_stream_consume(stream);
|
||||
try_to_cut_list(list, &replacement);
|
||||
} else if (scc_lex_parse_is_whitespace(ch)) {
|
||||
scc_probe_stream_consume(stream);
|
||||
try_to_cut_list(list, &replacement);
|
||||
} else {
|
||||
scc_probe_stream_consume(stream);
|
||||
scc_cstring_append_ch(&replacement, (char)ch);
|
||||
}
|
||||
}
|
||||
|
||||
if (scc_cstring_len(&replacement) != 0) {
|
||||
scc_vec_push(*list, replacement);
|
||||
replacement = scc_cstring_new();
|
||||
}
|
||||
|
||||
// for (usize i = 0; i < list->size; ++i) {
|
||||
// LOG_DEBUG("list %d: %s", (int)i,
|
||||
// scc_cstring_as_cstr(&scc_vec_at(*list, i)));
|
||||
// }
|
||||
return true;
|
||||
}
|
||||
|
||||
// 解析宏参数列表
|
||||
static cbool parse_macro_arguments(scc_probe_stream_t *stream,
|
||||
scc_pp_macro_list_t *args) {
|
||||
Assert(stream != null && args != null);
|
||||
|
||||
scc_vec_init(*args);
|
||||
int ch;
|
||||
scc_probe_stream_reset(stream);
|
||||
|
||||
// 跳过 '('
|
||||
ch = scc_probe_stream_peek(stream);
|
||||
if (ch != '(') {
|
||||
return false;
|
||||
}
|
||||
scc_probe_stream_consume(stream); // 消费 '('
|
||||
|
||||
int paren_depth = 1;
|
||||
scc_cstring_t current_arg = scc_cstring_new();
|
||||
scc_pos_t pos = scc_pos_init();
|
||||
|
||||
while (paren_depth > 0) {
|
||||
ch = scc_probe_stream_peek(stream);
|
||||
if (ch == scc_stream_eof) {
|
||||
scc_cstring_free(¤t_arg);
|
||||
scc_cstring_free(&pos.name);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (ch == '(') {
|
||||
paren_depth++;
|
||||
scc_cstring_append_ch(¤t_arg, (char)ch);
|
||||
scc_probe_stream_consume(stream);
|
||||
} else if (ch == ')') {
|
||||
paren_depth--;
|
||||
if (paren_depth > 0) {
|
||||
scc_cstring_append_ch(¤t_arg, (char)ch);
|
||||
}
|
||||
scc_probe_stream_consume(stream);
|
||||
} else if (ch == ',' && paren_depth == 1) {
|
||||
// 参数分隔符
|
||||
scc_vec_push(*args, current_arg);
|
||||
current_arg = scc_cstring_new();
|
||||
scc_probe_stream_consume(stream);
|
||||
// 跳过参数后的空白
|
||||
scc_lex_parse_skip_whitespace(stream, &pos);
|
||||
} else {
|
||||
scc_cstring_append_ch(¤t_arg, (char)ch);
|
||||
scc_probe_stream_consume(stream);
|
||||
}
|
||||
}
|
||||
|
||||
// 添加最后一个参数
|
||||
if (!scc_cstring_is_empty(¤t_arg)) {
|
||||
scc_vec_push(*args, current_arg);
|
||||
} else {
|
||||
scc_cstring_free(¤t_arg);
|
||||
}
|
||||
|
||||
scc_cstring_free(&pos.name);
|
||||
return true;
|
||||
}
|
||||
|
||||
static cbool safe_skip_backspace_if_endline(scc_probe_stream_t *stream,
|
||||
scc_pos_t *pos) {
|
||||
scc_probe_stream_reset(stream);
|
||||
int ch = scc_probe_stream_peek(stream);
|
||||
// FIXME maybe it not correct
|
||||
while (ch == '\r' || ch == '\n' || ch == ' ' || ch == '\t') {
|
||||
if (scc_lex_parse_is_endline(ch)) {
|
||||
scc_lex_parse_skip_endline(stream, pos);
|
||||
return true;
|
||||
}
|
||||
scc_probe_stream_consume(stream);
|
||||
ch = scc_probe_stream_peek(stream);
|
||||
}
|
||||
scc_probe_stream_reset(stream);
|
||||
return false;
|
||||
}
|
||||
|
||||
void scc_pp_parse_directive(scc_probe_stream_t *stream, scc_pos_t *pos,
|
||||
scc_macro_table_t *macros) {
|
||||
Assert(stream != null);
|
||||
|
||||
scc_probe_stream_reset(stream);
|
||||
// 跳过 '#' 和后续空白
|
||||
if (scc_probe_stream_peek(stream) != '#') {
|
||||
LOG_WARN("Invalid directive");
|
||||
return;
|
||||
}
|
||||
scc_pos_next(pos);
|
||||
scc_probe_stream_consume(stream);
|
||||
if (safe_skip_backspace_if_endline(stream, pos))
|
||||
return;
|
||||
|
||||
// 解析指令名称
|
||||
scc_cstring_t directive = scc_cstring_new();
|
||||
if (!scc_lex_parse_identifier(stream, pos, &directive)) {
|
||||
goto ERR;
|
||||
}
|
||||
if (safe_skip_backspace_if_endline(stream, pos))
|
||||
goto FREE;
|
||||
|
||||
scc_pp_token_t token = keyword_cmp(scc_cstring_as_cstr(&directive),
|
||||
scc_cstring_len(&directive));
|
||||
|
||||
scc_cstring_t name = scc_cstring_new();
|
||||
switch (token) {
|
||||
case SCC_PP_TOK_DEFINE: {
|
||||
if (!scc_lex_parse_identifier(stream, pos, &name)) {
|
||||
scc_cstring_free(&name);
|
||||
goto ERR;
|
||||
}
|
||||
|
||||
// 检查是否是函数宏:宏名后是否直接跟着 '('(没有空白字符)
|
||||
scc_probe_stream_reset(stream);
|
||||
int ch = scc_probe_stream_peek(stream);
|
||||
cbool has_whitespace = scc_lex_parse_is_whitespace(ch);
|
||||
if (has_whitespace && safe_skip_backspace_if_endline(stream, pos)) {
|
||||
goto FREE;
|
||||
}
|
||||
|
||||
if (!has_whitespace && ch == '(') {
|
||||
// 函数宏
|
||||
scc_pp_macro_list_t params;
|
||||
if (!parse_macro_arguments(stream, ¶ms)) {
|
||||
goto ERR;
|
||||
}
|
||||
|
||||
ch = scc_probe_stream_peek(stream);
|
||||
if (ch == ')') {
|
||||
scc_probe_stream_consume(stream); // 消费 ')'
|
||||
}
|
||||
if (safe_skip_backspace_if_endline(stream, pos)) {
|
||||
goto FREE;
|
||||
}
|
||||
|
||||
scc_pp_macro_list_t replacement;
|
||||
parse_macro_replace_list(stream, &replacement);
|
||||
scc_pp_add_function_macro(macros, &name, ¶ms, &replacement);
|
||||
} else {
|
||||
// 对象宏
|
||||
scc_pp_macro_list_t replacement;
|
||||
parse_macro_replace_list(stream, &replacement);
|
||||
scc_pp_add_object_macro(macros, &name, &replacement);
|
||||
}
|
||||
scc_cstring_free(&name);
|
||||
break;
|
||||
}
|
||||
case SCC_PP_TOK_UNDEF: {
|
||||
if (scc_lex_parse_identifier(stream, pos, &name)) {
|
||||
// TODO ret value
|
||||
scc_pp_remove_macro(macros, &name);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case SCC_PP_TOK_INCLUDE:
|
||||
case SCC_PP_TOK_IF:
|
||||
case SCC_PP_TOK_IFDEF:
|
||||
case SCC_PP_TOK_IFNDEF:
|
||||
case SCC_PP_TOK_ELSE:
|
||||
case SCC_PP_TOK_ELIF:
|
||||
case SCC_PP_TOK_ELIFDEF:
|
||||
case SCC_PP_TOK_ELIFNDEF:
|
||||
case SCC_PP_TOK_ENDIF:
|
||||
case SCC_PP_TOK_LINE:
|
||||
case SCC_PP_TOK_EMBED:
|
||||
case SCC_PP_TOK_ERROR:
|
||||
case SCC_PP_TOK_WARNING:
|
||||
case SCC_PP_TOK_PRAMA:
|
||||
// 暂时跳过这一行
|
||||
TODO();
|
||||
scc_lex_parse_skip_line(stream, pos);
|
||||
break;
|
||||
default:
|
||||
LOG_WARN("Unknown preprocessor directive: %s",
|
||||
scc_cstring_as_cstr(&directive));
|
||||
scc_lex_parse_skip_line(stream, pos);
|
||||
}
|
||||
ERR:
|
||||
scc_lex_parse_skip_line(stream, pos);
|
||||
FREE:
|
||||
scc_cstring_free(&directive);
|
||||
scc_cstring_free(&name);
|
||||
}
|
||||
|
||||
// for # ## to generator string
|
||||
cbool scc_pp_expand_string() { return false; }
|
||||
|
||||
// 展开对象宏
|
||||
cbool scc_pp_expand_object_macro(scc_pp_macro_t *macro,
|
||||
scc_cstring_t *out_buff) {
|
||||
Assert(macro->type == SCC_PP_MACRO_OBJECT && macro->params.size == 0);
|
||||
// FIXME hack cstring to init and clean
|
||||
scc_cstring_free(out_buff);
|
||||
// 对象宏输出替换文本并进行递归展开
|
||||
for (usize i = 0; i < macro->replaces.size; ++i) {
|
||||
scc_cstring_append(out_buff, &scc_vec_at(macro->replaces, i));
|
||||
// YOU MUST USE + 1 to cmp because we use unsigned integer
|
||||
if (i + 1 < macro->replaces.size) {
|
||||
scc_cstring_append_ch(out_buff, ' ');
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// 展开函数宏
|
||||
cbool scc_pp_expand_function_macro(scc_pp_macro_t *macro,
|
||||
scc_pp_macro_list_t *params,
|
||||
scc_cstring_t *out_buff) {
|
||||
Assert(macro->type == SCC_PP_MACRO_FUNCTION);
|
||||
Assert(out_buff != null);
|
||||
// FIXME hack cstring to init and clean
|
||||
scc_cstring_free(out_buff);
|
||||
for (usize i = 0; i < macro->replaces.size; ++i) {
|
||||
// TODO ... __VA_ARGS__
|
||||
for (usize j = 0; j < macro->params.size; ++j) {
|
||||
if (scc_strcmp(
|
||||
scc_cstring_as_cstr(&scc_vec_at(macro->replaces, i)),
|
||||
scc_cstring_as_cstr(&scc_vec_at(macro->params, j))) == 0) {
|
||||
scc_cstring_append(out_buff, &scc_vec_at(*params, j));
|
||||
goto MATCH;
|
||||
}
|
||||
}
|
||||
scc_cstring_append(out_buff, &scc_vec_at(macro->replaces, i));
|
||||
MATCH:
|
||||
// YOU MUST USE + 1 to cmp because we use unsigned
|
||||
if (i + 1 < macro->replaces.size) {
|
||||
scc_cstring_append_ch(out_buff, ' ');
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
cbool scc_pp_expand_macro(scc_probe_stream_t *stream, scc_macro_table_t *macros,
|
||||
scc_probe_stream_t **out_stream, int depth) {
|
||||
// TODO self position and it maybe is a stack on #include ?
|
||||
// 递归扫描
|
||||
if (depth <= 0) {
|
||||
return false;
|
||||
}
|
||||
Assert(stream != null && macros != null && out_stream != null);
|
||||
|
||||
scc_cstring_t identifier = scc_cstring_new();
|
||||
scc_pos_t pos = scc_pos_init();
|
||||
|
||||
cbool ret;
|
||||
ret = scc_lex_parse_identifier(stream, &pos, &identifier);
|
||||
Assert(ret == true);
|
||||
|
||||
scc_pp_macro_t *macro = scc_pp_find_macro(macros, &identifier);
|
||||
if (macro == null) {
|
||||
// 不是宏,直接输出标识符
|
||||
*out_stream =
|
||||
scc_mem_probe_stream_new(scc_cstring_as_cstr(&identifier),
|
||||
scc_cstring_len(&identifier), false);
|
||||
return true;
|
||||
} else {
|
||||
scc_cstring_free(&identifier);
|
||||
}
|
||||
|
||||
// 根据宏类型展开
|
||||
scc_cstring_t tmp_buff = scc_cstring_new();
|
||||
if (macro->type == SCC_PP_MACRO_OBJECT) {
|
||||
cbool ret = scc_pp_expand_object_macro(macro, &tmp_buff);
|
||||
Assert(ret == true);
|
||||
} else if (macro->type == SCC_PP_MACRO_FUNCTION) {
|
||||
// FIXME 是否需要忽略空白字符?
|
||||
scc_lex_parse_skip_whitespace(stream, &pos);
|
||||
if (scc_probe_stream_peek(stream) != '(') {
|
||||
LOG_ERROR("Not a function and skip it");
|
||||
goto ERR;
|
||||
}
|
||||
scc_pp_macro_list_t params;
|
||||
ret = parse_macro_arguments(stream, ¶ms);
|
||||
Assert(ret == true);
|
||||
scc_pp_expand_function_macro(macro, ¶ms, &tmp_buff);
|
||||
Assert(ret == true);
|
||||
}
|
||||
|
||||
scc_probe_stream_t *tmp_stream = scc_mem_probe_stream_new(
|
||||
scc_cstring_as_cstr(&tmp_buff), scc_cstring_len(&tmp_buff), false);
|
||||
|
||||
int ch;
|
||||
scc_cstring_t real_buff = scc_cstring_new();
|
||||
while ((ch = scc_probe_stream_peek(tmp_stream)) != scc_stream_eof) {
|
||||
if (scc_lex_parse_is_identifier_prefix(ch)) {
|
||||
scc_probe_stream_t *tmp_out_stream;
|
||||
scc_pp_expand_macro(tmp_stream, macros, &tmp_out_stream, depth - 1);
|
||||
// scc_cstring_append_cstr();
|
||||
Assert(tmp_out_stream != null);
|
||||
while (scc_probe_stream_peek(tmp_out_stream) != scc_stream_eof) {
|
||||
scc_cstring_append_ch(&real_buff,
|
||||
scc_probe_stream_consume(tmp_out_stream));
|
||||
}
|
||||
Assert(tmp_out_stream != null && tmp_out_stream->drop != null);
|
||||
scc_probe_stream_drop(tmp_out_stream);
|
||||
} else {
|
||||
scc_cstring_append_ch(&real_buff,
|
||||
scc_probe_stream_consume(tmp_stream));
|
||||
}
|
||||
}
|
||||
scc_cstring_free(&tmp_buff);
|
||||
scc_probe_stream_drop(tmp_stream);
|
||||
*out_stream = scc_mem_probe_stream_new(scc_cstring_as_cstr(&real_buff),
|
||||
scc_cstring_len(&real_buff), false);
|
||||
return true;
|
||||
ERR:
|
||||
*out_stream = null;
|
||||
return false;
|
||||
}
|
||||
Reference in New Issue
Block a user