Files
scc/libs/pprocessor/src/parse.c
zzy 73d74f5e13 refactor(pprocessor): rename macro table type and update function names
- Change `scc_macro_table_t` to `scc_pp_macro_table_t` for consistency
- Rename `scc_pp_macro_create` to `scc_pp_macro_new` for naming convention
- Remove unused `scc_pp_compress_whitespace` function
- Update macro table function names: `scc_pp_find_macro` → `scc_pp_macro_table_get`, `scc_pp_remove_macro` → `scc_pp_macro_table_remove`
- Add new `scc_pp_macro_table_set` function for setting macros
- Update all function signatures to use new type name
- Remove commented-out whitespace compression code from implementation
2025-12-14 12:59:03 +08:00

486 lines
17 KiB
C

#include <ctype.h>
#include <lex_parser.h>
#include <pp_macro.h>
#include <pp_parse.h>
#include <pp_token.h>
#include <string.h>
static const struct {
const char *name;
scc_pp_token_t tok;
} keywords[] = {
#define X(name, type, tok) {#name, tok},
SCC_PP_INST_TOKEN
#undef X
};
// 使用二分查找查找关键字
static inline int keyword_cmp(const char *name, int len) {
int low = 0;
int high = sizeof(keywords) / sizeof(keywords[0]) - 1;
while (low <= high) {
int mid = (low + high) / 2;
const char *key = keywords[mid].name;
int cmp = 0;
// 自定义字符串比较逻辑
for (int i = 0; i < len; i++) {
if (name[i] != key[i]) {
cmp = (unsigned char)name[i] - (unsigned char)key[i];
break;
}
if (name[i] == '\0')
break; // 遇到终止符提前结束
}
if (cmp == 0) {
// 完全匹配检查(长度相同)
if (key[len] == '\0')
return mid;
cmp = -1; // 当前关键词比输入长
}
if (cmp < 0) {
high = mid - 1;
} else {
low = mid + 1;
}
}
return -1; // Not a keyword.
}
static inline void try_to_cut_list(scc_pp_macro_list_t *list,
scc_cstring_t *buff) {
if (scc_cstring_len(buff) != 0) {
scc_vec_push(*list, *buff);
*buff = scc_cstring_new();
}
}
static cbool parse_macro_replace_list(scc_probe_stream_t *stream,
scc_pp_macro_list_t *list) {
Assert(stream != null && list != null);
scc_probe_stream_reset(stream);
scc_vec_init(*list);
scc_cstring_t replacement = scc_cstring_new();
int ch;
scc_pos_t pos = scc_pos_init();
while ((ch = scc_probe_stream_peek(stream)) != scc_stream_eof) {
if (scc_lex_parse_is_endline(ch)) {
break;
}
if (scc_lex_parse_is_identifier_prefix(ch)) {
try_to_cut_list(list, &replacement);
cbool ret = scc_lex_parse_identifier(stream, &pos, &replacement);
Assert(ret == true);
try_to_cut_list(list, &replacement);
} else if (ch == '#') {
// 处理 # 和 ## 操作符
scc_probe_stream_consume(stream);
try_to_cut_list(list, &replacement);
scc_cstring_append_ch(&replacement, '#');
if (scc_probe_stream_peek(stream) == '#') {
// ## 连接操作符
scc_probe_stream_consume(stream);
scc_cstring_append_ch(&replacement, '#');
}
// 我需要尽可能防止空白字符干扰解析
scc_lex_parse_skip_whitespace(stream, &pos);
try_to_cut_list(list, &replacement);
} else if (scc_lex_parse_is_whitespace(ch)) {
try_to_cut_list(list, &replacement);
scc_lex_parse_skip_whitespace(stream, &pos);
scc_cstring_append_ch(&replacement, ' ');
try_to_cut_list(list, &replacement);
} else {
scc_probe_stream_consume(stream);
scc_cstring_append_ch(&replacement, (char)ch);
}
}
if (scc_cstring_len(&replacement) != 0) {
scc_vec_push(*list, replacement);
replacement = scc_cstring_new();
}
// for (usize i = 0; i < list->size; ++i) {
// LOG_DEBUG("list %d: %s", (int)i,
// scc_cstring_as_cstr(&scc_vec_at(*list, i)));
// }
return true;
}
// 解析宏参数列表
static cbool parse_macro_arguments(scc_probe_stream_t *stream,
scc_pp_macro_list_t *args) {
Assert(stream != null && args != null);
scc_vec_init(*args);
int ch;
scc_probe_stream_reset(stream);
// 跳过 '('
ch = scc_probe_stream_peek(stream);
if (ch != '(') {
return false;
}
scc_probe_stream_consume(stream); // 消费 '('
int paren_depth = 1;
scc_cstring_t current_arg = scc_cstring_new();
scc_pos_t pos = scc_pos_init();
while (paren_depth > 0) {
ch = scc_probe_stream_peek(stream);
if (ch == scc_stream_eof) {
scc_cstring_free(&current_arg);
scc_cstring_free(&pos.name);
return false;
}
if (ch == '(') {
paren_depth++;
scc_cstring_append_ch(&current_arg, (char)ch);
scc_probe_stream_consume(stream);
} else if (ch == ')') {
paren_depth--;
if (paren_depth > 0) {
scc_cstring_append_ch(&current_arg, (char)ch);
}
scc_probe_stream_consume(stream);
} else if (ch == ',' && paren_depth == 1) {
// 参数分隔符
scc_vec_push(*args, current_arg);
current_arg = scc_cstring_new();
scc_probe_stream_consume(stream);
// 跳过参数后的空白
scc_lex_parse_skip_whitespace(stream, &pos);
} else {
scc_cstring_append_ch(&current_arg, (char)ch);
scc_probe_stream_consume(stream);
}
}
// 添加最后一个参数
if (!scc_cstring_is_empty(&current_arg)) {
scc_vec_push(*args, current_arg);
} else {
scc_cstring_free(&current_arg);
}
scc_cstring_free(&pos.name);
return true;
}
static cbool safe_skip_backspace_if_endline(scc_probe_stream_t *stream,
scc_pos_t *pos) {
scc_probe_stream_reset(stream);
int ch = scc_probe_stream_peek(stream);
// FIXME maybe it not correct
while (ch == '\r' || ch == '\n' || ch == ' ' || ch == '\t') {
if (scc_lex_parse_is_endline(ch)) {
scc_lex_parse_skip_endline(stream, pos);
return true;
}
scc_probe_stream_consume(stream);
ch = scc_probe_stream_peek(stream);
}
scc_probe_stream_reset(stream);
return false;
}
void scc_pp_parse_directive(scc_probe_stream_t *stream, scc_pos_t *pos,
scc_pp_macro_table_t *macros) {
Assert(stream != null);
scc_probe_stream_reset(stream);
// 跳过 '#' 和后续空白
if (scc_probe_stream_peek(stream) != '#') {
LOG_WARN("Invalid directive");
return;
}
scc_pos_next(pos);
scc_probe_stream_consume(stream);
if (safe_skip_backspace_if_endline(stream, pos))
return;
// 解析指令名称
scc_cstring_t directive = scc_cstring_new();
if (!scc_lex_parse_identifier(stream, pos, &directive)) {
goto ERR;
}
if (safe_skip_backspace_if_endline(stream, pos))
goto FREE;
scc_pp_token_t token = keyword_cmp(scc_cstring_as_cstr(&directive),
scc_cstring_len(&directive));
scc_cstring_t name = scc_cstring_new();
switch (token) {
case SCC_PP_TOK_DEFINE: {
if (!scc_lex_parse_identifier(stream, pos, &name)) {
scc_cstring_free(&name);
goto ERR;
}
// 检查是否是函数宏:宏名后是否直接跟着 '('(没有空白字符)
scc_probe_stream_reset(stream);
int ch = scc_probe_stream_peek(stream);
cbool has_whitespace = scc_lex_parse_is_whitespace(ch);
if (has_whitespace && safe_skip_backspace_if_endline(stream, pos)) {
goto FREE;
}
if (!has_whitespace && ch == '(') {
// 函数宏
scc_pp_macro_list_t params;
if (!parse_macro_arguments(stream, &params)) {
goto ERR;
}
ch = scc_probe_stream_peek(stream);
if (ch == ')') {
scc_probe_stream_consume(stream); // 消费 ')'
}
if (safe_skip_backspace_if_endline(stream, pos)) {
goto FREE;
}
scc_pp_macro_list_t replacement;
parse_macro_replace_list(stream, &replacement);
scc_pp_add_function_macro(macros, &name, &params, &replacement);
} else {
// 对象宏
scc_pp_macro_list_t replacement;
parse_macro_replace_list(stream, &replacement);
scc_pp_add_object_macro(macros, &name, &replacement);
}
scc_cstring_free(&name);
break;
}
case SCC_PP_TOK_UNDEF: {
if (scc_lex_parse_identifier(stream, pos, &name)) {
// TODO ret value
scc_pp_macro_table_remove(macros, &name);
}
break;
}
case SCC_PP_TOK_INCLUDE:
case SCC_PP_TOK_IF:
case SCC_PP_TOK_IFDEF:
case SCC_PP_TOK_IFNDEF:
case SCC_PP_TOK_ELSE:
case SCC_PP_TOK_ELIF:
case SCC_PP_TOK_ELIFDEF:
case SCC_PP_TOK_ELIFNDEF:
case SCC_PP_TOK_ENDIF:
case SCC_PP_TOK_LINE:
case SCC_PP_TOK_EMBED:
case SCC_PP_TOK_ERROR:
case SCC_PP_TOK_WARNING:
case SCC_PP_TOK_PRAMA:
// 暂时跳过这一行
scc_lex_parse_skip_line(stream, pos);
break;
default:
LOG_WARN("Unknown preprocessor directive: %s",
scc_cstring_as_cstr(&directive));
scc_lex_parse_skip_line(stream, pos);
}
ERR:
scc_lex_parse_skip_line(stream, pos);
FREE:
scc_cstring_free(&directive);
scc_cstring_free(&name);
}
static inline void scc_generate_cstr(scc_cstring_t *buff) {
scc_cstring_t out_buff = scc_cstring_new();
scc_cstring_append_ch(&out_buff, '\"');
// TODO it is too simple
scc_cstring_append(&out_buff, buff);
scc_cstring_append_ch(&out_buff, '\"');
// FIXME 可能有着更好的解决方案
scc_cstring_clear(buff);
scc_cstring_append(buff, &out_buff);
scc_cstring_free(&out_buff);
}
#define SCC_PP_IS_LIST_BLANK(i) \
((i) < list->size && scc_vec_at(*list, (i)).data[0] == ' ' && \
scc_vec_at(*list, (i)).data[1] == '\0')
#define SCC_PP_IS_LIST_TO_STRING(i) \
((i) < list->size && scc_vec_at(*list, (i)).data[0] == '#' && \
scc_vec_at(*list, (i)).data[1] == '\0')
#define SCC_PP_IS_LIST_CONNECT(i) \
((i) < list->size && scc_vec_at(*list, (i)).data[0] == '#' && \
scc_vec_at(*list, (i)).data[1] == '#' && \
scc_vec_at(*list, (i)).data[2] == '\0')
#define SCC_PP_USE_CONNECT(font, rear) \
if (rear < list->size) { \
scc_cstring_append(out_buff, &scc_vec_at(*list, font)); \
scc_cstring_append(out_buff, &scc_vec_at(*list, rear)); \
} else { \
scc_cstring_append(out_buff, &scc_vec_at(*list, font)); \
}
// for # ## to generator string
static inline cbool scc_pp_expand_string_unsafe(scc_pp_macro_list_t *list,
scc_cstring_t *out_buff) {
for (usize i = 0; i < list->size; ++i) {
if (SCC_PP_IS_LIST_BLANK(i + 1)) {
if (SCC_PP_IS_LIST_CONNECT(i + 2)) {
SCC_PP_USE_CONNECT(i, i + 3);
i += 3;
continue;
}
} else if (SCC_PP_IS_LIST_CONNECT(i + 1)) {
SCC_PP_USE_CONNECT(i, i + 2);
i += 2;
continue;
} else if (SCC_PP_IS_LIST_TO_STRING(i)) {
i += 1;
if (i < list->size) {
scc_generate_cstr(&scc_vec_at(*list, i));
} else {
LOG_WARN("# need a valid literator");
break;
}
}
scc_cstring_append(out_buff, &scc_vec_at(*list, i));
}
return true;
}
// 展开对象宏
cbool scc_pp_expand_object_macro(scc_pp_macro_t *macro,
scc_cstring_t *out_buff) {
Assert(macro->type == SCC_PP_MACRO_OBJECT && macro->params.size == 0);
Assert(scc_cstring_is_empty(out_buff) == true);
// 对象宏输出替换文本并进行递归展开
scc_pp_expand_string_unsafe(&macro->replaces, out_buff);
return true;
}
// 展开函数宏
cbool scc_pp_expand_function_macro(scc_pp_macro_t *macro,
scc_pp_macro_list_t *params,
scc_cstring_t *out_buff) {
Assert(macro->type == SCC_PP_MACRO_FUNCTION);
Assert(out_buff != null);
Assert(scc_cstring_is_empty(out_buff) == true);
for (usize i = 0; i < macro->replaces.size; ++i) {
// TODO ... __VA_ARGS__
for (usize j = 0; j < macro->params.size; ++j) {
if (scc_strcmp(
scc_cstring_as_cstr(&scc_vec_at(macro->replaces, i)),
scc_cstring_as_cstr(&scc_vec_at(macro->params, j))) == 0) {
scc_cstring_free(&scc_vec_at(macro->replaces, i));
scc_cstring_append(&scc_vec_at(macro->replaces, i),
&scc_vec_at(*params, j));
continue;
}
}
}
scc_pp_expand_string_unsafe(&macro->replaces, out_buff);
return true;
}
cbool scc_pp_expand_macro(scc_probe_stream_t *stream,
scc_pp_macro_table_t *macros,
scc_pp_macro_table_t *expand_stack,
scc_probe_stream_t **out_stream, int depth) {
// TODO self position and it maybe is a stack on #include ?
// 递归扫描
if (depth <= 0) {
*out_stream = null;
return false;
}
Assert(stream != null && macros != null && out_stream != null);
scc_cstring_t identifier = scc_cstring_new();
scc_pos_t pos = scc_pos_init();
cbool ret;
ret = scc_lex_parse_identifier(stream, &pos, &identifier);
Assert(ret == true);
scc_pp_macro_t *macro = scc_pp_macro_table_get(macros, &identifier);
// 1. 不是宏,直接输出标识符
// 2. 检查到重复展开跳过
if (macro == null ||
scc_pp_macro_table_get(expand_stack, &macro->name) != null) {
*out_stream =
scc_mem_probe_stream_new(scc_cstring_as_cstr(&identifier),
scc_cstring_len(&identifier), false);
return true;
} else {
scc_cstring_free(&identifier);
}
// 根据宏类型展开
scc_cstring_t tmp_buff = scc_cstring_new();
if (macro->type == SCC_PP_MACRO_OBJECT) {
cbool ret = scc_pp_expand_object_macro(macro, &tmp_buff);
Assert(ret == true);
} else if (macro->type == SCC_PP_MACRO_FUNCTION) {
// FIXME 是否需要忽略空白字符?
scc_lex_parse_skip_whitespace(stream, &pos);
if (scc_probe_stream_peek(stream) != '(') {
LOG_ERROR("Not a function and skip it");
goto ERR;
}
scc_pp_macro_list_t params;
ret = parse_macro_arguments(stream, &params);
Assert(ret == true);
scc_pp_expand_function_macro(macro, &params, &tmp_buff);
Assert(ret == true);
}
// 已经展开的将被标记并入栈
scc_pp_macro_table_set(expand_stack,
scc_pp_macro_new(&macro->name, macro->type));
// 将展开内容变换成stream
scc_probe_stream_t *tmp_stream = scc_mem_probe_stream_new(
scc_cstring_as_cstr(&tmp_buff), scc_cstring_len(&tmp_buff), false);
int ch;
scc_cstring_t real_buff = scc_cstring_new();
while ((ch = scc_probe_stream_peek(tmp_stream)) != scc_stream_eof) {
if (scc_lex_parse_is_identifier_prefix(ch)) {
// 递归检查
scc_probe_stream_t *tmp_out_stream;
if (scc_pp_expand_macro(tmp_stream, macros, expand_stack,
&tmp_out_stream, depth - 1) == false) {
return false;
}
// scc_cstring_append_cstr();
Assert(tmp_out_stream != null);
while (scc_probe_stream_peek(tmp_out_stream) != scc_stream_eof) {
scc_cstring_append_ch(&real_buff,
scc_probe_stream_consume(tmp_out_stream));
}
Assert(tmp_out_stream != null && tmp_out_stream->drop != null);
scc_probe_stream_drop(tmp_out_stream);
} else {
scc_cstring_append_ch(&real_buff,
scc_probe_stream_consume(tmp_stream));
}
}
scc_cstring_free(&tmp_buff);
scc_probe_stream_drop(tmp_stream);
*out_stream = scc_mem_probe_stream_new(scc_cstring_as_cstr(&real_buff),
scc_cstring_len(&real_buff), false);
// 已经展开的将被标记并出栈
scc_pp_macro_table_remove(expand_stack, &macro->name);
return true;
ERR:
*out_stream = null;
return false;
}