feat(lexer, preprocessor): replace cstring conversion with copy and refactor macro expansion

- Replace `scc_cstring_from_cstr(scc_cstring_as_cstr(...))` with `scc_cstring_copy()` in lexer to fix memory leaks
- Extract macro expansion logic into separate `expand_macro.c` file
- Remove `expand_stack` parameter from `scc_pp_expand_macro()` function
- Add new parsing functions for macro replacement lists and arguments
- Add string utility functions for whitespace trimming and string joining
- Update memory stream documentation for clarity
This commit is contained in:
zzy
2025-12-15 20:24:39 +08:00
parent 73d74f5e13
commit 07f5d9331b
15 changed files with 574 additions and 346 deletions

View File

@@ -5,9 +5,14 @@
#include <pp_macro.h>
void scc_pp_parse_directive(scc_probe_stream_t *stream, scc_pos_t *pos,
scc_pp_macro_table_t *macros);
cbool scc_pp_parse_macro_replace_list(scc_probe_stream_t *stream,
scc_pp_macro_list_t *list);
cbool scc_pp_parse_macro_arguments(scc_probe_stream_t *stream,
scc_pp_macro_list_t *args);
// expand
cbool scc_pp_expand_macro(scc_probe_stream_t *stream,
scc_pp_macro_table_t *macros,
scc_pp_macro_table_t *expand_stack,
scc_probe_stream_t **out_stream, int depth);
#endif /* __SCC_PP_PARSE_H__ */

View File

@@ -0,0 +1,296 @@
#include <lex_parser.h>
#include <libutils.h>
#include <pp_macro.h>
#include <pp_parse.h>
static inline void scc_generate_cstr(scc_cstring_t *buff) {
scc_cstring_t out_buff = scc_cstring_new();
scc_cstring_append_ch(&out_buff, '\"');
// TODO it is too simple
scc_cstring_append(&out_buff, buff);
scc_cstring_append_ch(&out_buff, '\"');
// FIXME 可能有着更好的解决方案
scc_cstring_clear(buff);
scc_cstring_append(buff, &out_buff);
scc_cstring_free(&out_buff);
}
#define SCC_PP_IS_LIST_BLANK(i) \
((i) < list->size && scc_vec_at(*list, (i)).data[0] == ' ' && \
scc_vec_at(*list, (i)).data[1] == '\0')
#define SCC_PP_IS_LIST_TO_STRING(i) \
((i) < list->size && scc_vec_at(*list, (i)).data[0] == '#' && \
scc_vec_at(*list, (i)).data[1] == '\0')
#define SCC_PP_IS_LIST_CONNECT(i) \
((i) < list->size && scc_vec_at(*list, (i)).data[0] == '#' && \
scc_vec_at(*list, (i)).data[1] == '#' && \
scc_vec_at(*list, (i)).data[2] == '\0')
#define SCC_PP_USE_CONNECT(font, rear) \
if (rear < list->size) { \
scc_cstring_append(out_buff, &scc_vec_at(*list, font)); \
scc_cstring_append(out_buff, &scc_vec_at(*list, rear)); \
} else { \
scc_cstring_append(out_buff, &scc_vec_at(*list, font)); \
}
// for # ## to generator string
static inline cbool scc_pp_expand_string_unsafe(scc_pp_macro_list_t *list,
scc_cstring_t *out_buff) {
for (usize i = 0; i < list->size; ++i) {
if (SCC_PP_IS_LIST_BLANK(i + 1)) {
if (SCC_PP_IS_LIST_CONNECT(i + 2)) {
SCC_PP_USE_CONNECT(i, i + 3);
i += 3;
continue;
}
} else if (SCC_PP_IS_LIST_CONNECT(i + 1)) {
SCC_PP_USE_CONNECT(i, i + 2);
i += 2;
continue;
} else if (SCC_PP_IS_LIST_TO_STRING(i)) {
i += 1;
if (i < list->size) {
scc_generate_cstr(&scc_vec_at(*list, i));
} else {
LOG_WARN("# need a valid literator");
break;
}
}
scc_cstring_append(out_buff, &scc_vec_at(*list, i));
}
return true;
}
// 展开对象宏
cbool scc_pp_expand_object_macro(scc_pp_macro_t *macro,
scc_cstring_t *out_buff) {
Assert(macro->type == SCC_PP_MACRO_OBJECT && macro->params.size == 0);
Assert(scc_cstring_is_empty(out_buff) == true);
// 对象宏输出替换文本并进行递归展开
scc_pp_expand_string_unsafe(&macro->replaces, out_buff);
return true;
}
// 展开函数宏
cbool scc_pp_expand_function_macro(scc_pp_macro_t *macro,
scc_pp_macro_list_t *origin_params,
scc_pp_macro_list_t *params,
scc_cstring_t *out_buff) {
Assert(macro->type == SCC_PP_MACRO_FUNCTION);
Assert(out_buff != null);
Assert(scc_cstring_is_empty(out_buff) == true);
Assert(params->size == macro->params.size);
scc_pp_macro_list_t list;
scc_vec_init(list);
for (usize i = 0; i < macro->replaces.size; ++i) {
// TODO ... __VA_ARGS__
for (usize j = 0; j < macro->params.size; ++j) {
if (scc_strcmp(
scc_cstring_as_cstr(&scc_vec_at(macro->replaces, i)),
scc_cstring_as_cstr(&scc_vec_at(macro->params, j))) == 0) {
Assert(&scc_vec_at(*params, j) != null);
scc_vec_push(list, scc_cstring_copy(&scc_vec_at(*params, j)));
goto END;
}
}
scc_vec_push(list, scc_cstring_copy(&scc_vec_at(macro->replaces, i)));
END:;
}
scc_pp_expand_string_unsafe(&list, out_buff);
for (usize i = 0; i < list.size; ++i) {
scc_cstring_free(&scc_vec_at(list, i));
}
scc_vec_free(list);
return true;
}
// 状态管理结构
typedef struct {
scc_pp_macro_table_t *macros;
scc_pp_macro_table_t painted_blue; // 正在展开的宏
int depth;
} macro_expansion_state_t;
// 进入宏展开
static void enter_macro_expansion(macro_expansion_state_t *state,
scc_pp_macro_t *macro) {
// 添加到活动宏集合
scc_pp_macro_table_set(&state->painted_blue,
scc_pp_macro_new(&macro->name, macro->type));
}
// 离开宏展开(开始重新扫描)
static void leave_macro_expansion(macro_expansion_state_t *state,
scc_pp_macro_t *macro) {
// 从活动宏移除,添加到禁用宏
scc_pp_macro_table_remove(&state->painted_blue, &macro->name);
}
// 检查是否可以展开
static cbool can_expand_macro(macro_expansion_state_t *state,
scc_pp_macro_t *macro) {
return scc_pp_macro_table_get(&state->painted_blue, &macro->name) == null;
}
static cbool _scc_pp_expand_macro(scc_probe_stream_t *stream,
macro_expansion_state_t *state,
scc_probe_stream_t **out_stream);
static cbool expanded_buffer(const scc_cstring_t *in, scc_cstring_t *out,
macro_expansion_state_t *state) {
scc_probe_stream_t *in_stream = scc_mem_probe_stream_new(
scc_cstring_as_cstr(in), scc_cstring_len(in), false);
// rescanning
int ch;
while ((ch = scc_probe_stream_peek(in_stream)) != scc_stream_eof) {
if (scc_lex_parse_is_identifier_prefix(ch)) {
// 递归检查
scc_probe_stream_t *out_stream;
if (_scc_pp_expand_macro(in_stream, state, &out_stream) == false) {
scc_cstring_free(out);
return false;
}
Assert(out_stream != null);
while (scc_probe_stream_peek(out_stream) != scc_stream_eof) {
scc_cstring_append_ch(out,
scc_probe_stream_consume(out_stream));
}
Assert(out_stream != null && out_stream->drop != null);
scc_probe_stream_drop(out_stream);
} else {
scc_cstring_append_ch(out, scc_probe_stream_consume(in_stream));
}
}
scc_probe_stream_drop(in_stream);
return true;
}
/**
* 1. 参数先展开
* 2. 替换后重扫描
* 3. 蓝色集合中不展开
* 4. #, ## 不展开
* 5. 最后的括号要检查
*/
static cbool _scc_pp_expand_macro(scc_probe_stream_t *stream,
macro_expansion_state_t *state,
scc_probe_stream_t **out_stream) {
// TODO self position and it maybe is a stack on #include ?
// 递归扫描
if (state->depth-- <= 0) {
*out_stream = null;
return false;
}
scc_pp_macro_table_t *macros = state->macros;
scc_cstring_t identifier = scc_cstring_new();
scc_pos_t pos = scc_pos_init();
cbool ret;
ret = scc_lex_parse_identifier(stream, &pos, &identifier);
Assert(ret == true);
scc_pp_macro_t *macro = scc_pp_macro_table_get(macros, &identifier);
// 1. 不是宏,直接输出标识符
if (macro == null) {
// 不是宏,直接输出
usize length = scc_cstring_len(&identifier);
*out_stream = scc_mem_probe_stream_new(
scc_cstring_move_cstr(&identifier), length, true);
return true;
} else {
scc_cstring_free(&identifier);
}
// 收集参数(如果是函数宏)
scc_pp_macro_list_t params;
scc_vec_init(params);
if (macro->type == SCC_PP_MACRO_FUNCTION) {
// TODO when expand need check another func with () at the end
scc_lex_parse_skip_whitespace(stream, &pos);
if (scc_probe_stream_peek(stream) != '(') {
goto ORIGIN;
}
ret = scc_pp_parse_macro_arguments(stream, &params);
Assert(ret == true);
}
// 2. 检查到重复展开跳过
// 检查是否可以展开
if (!can_expand_macro(state, macro)) {
ORIGIN:
// 输出原始调用
scc_cstring_t original = scc_cstring_new();
scc_cstring_append(&original, &macro->name);
if (macro->type == SCC_PP_MACRO_FUNCTION && params.size != 0) {
scc_cstring_append_ch(&original, '(');
for (usize i = 0; i < params.size; ++i) {
scc_cstring_append(&original, &scc_vec_at(params, i));
if (i != params.size - 1) {
scc_cstring_append_ch(&original, ',');
scc_cstring_append_ch(&original, ' ');
}
}
scc_cstring_append_ch(&original, ')');
}
*out_stream = scc_mem_probe_stream_new(
scc_cstring_as_cstr(&original), scc_cstring_len(&original), true);
scc_vec_free(params);
return true;
}
// 开始展开
scc_cstring_t expanded = scc_cstring_new();
if (macro->type == SCC_PP_MACRO_OBJECT) {
ret = scc_pp_expand_object_macro(macro, &expanded);
Assert(ret == true);
goto RESCANNING;
} else if (macro->type != SCC_PP_MACRO_FUNCTION) {
TODO();
}
Assert(macro->type == SCC_PP_MACRO_FUNCTION);
scc_pp_macro_list_t expanded_params;
scc_vec_init(expanded_params);
// expand params fisrt and recursive
for (usize i = 0; i < params.size; ++i) {
scc_cstring_t param = scc_vec_at(params, i);
scc_cstring_t out = scc_cstring_new();
expanded_buffer(&param, &out, state);
scc_vec_push(expanded_params, out);
}
ret = scc_pp_expand_function_macro(macro, &params, &expanded_params,
&expanded);
Assert(ret == true);
RESCANNING:
// 重新扫描展开结果
// 将展开内容变换成stream并递归展开
scc_cstring_t rescanned = scc_cstring_new();
enter_macro_expansion(state, macro);
expanded_buffer(&expanded, &rescanned, state);
leave_macro_expansion(state, macro);
scc_cstring_free(&expanded);
*out_stream = scc_mem_probe_stream_new(scc_cstring_as_cstr(&rescanned),
scc_cstring_len(&rescanned), true);
return true;
}
cbool scc_pp_expand_macro(scc_probe_stream_t *stream,
scc_pp_macro_table_t *macros,
scc_probe_stream_t **out_stream, int depth) {
Assert(depth > 0 && stream != null && macros != null && out_stream != null);
macro_expansion_state_t state;
state.depth = depth;
scc_pp_marco_table_init(&state.painted_blue);
state.macros = macros;
cbool ret = _scc_pp_expand_macro(stream, &state, out_stream);
scc_pp_macro_table_drop(&state.painted_blue);
return ret;
}

View File

@@ -9,7 +9,7 @@ scc_pp_macro_t *scc_pp_macro_new(const scc_cstring_t *name,
return null;
}
macro->name = scc_cstring_from_cstr(scc_cstring_as_cstr(name));
macro->name = scc_cstring_copy(name);
macro->type = type;
scc_vec_init(macro->params);
scc_vec_init(macro->replaces);

View File

@@ -1,9 +1,7 @@
#include <ctype.h>
#include <lex_parser.h>
#include <pp_macro.h>
#include <pp_parse.h>
#include <pp_token.h>
#include <string.h>
static const struct {
const char *name;
@@ -57,7 +55,7 @@ static inline void try_to_cut_list(scc_pp_macro_list_t *list,
}
}
static cbool parse_macro_replace_list(scc_probe_stream_t *stream,
cbool scc_pp_parse_macro_replace_list(scc_probe_stream_t *stream,
scc_pp_macro_list_t *list) {
Assert(stream != null && list != null);
scc_probe_stream_reset(stream);
@@ -114,7 +112,7 @@ static cbool parse_macro_replace_list(scc_probe_stream_t *stream,
}
// 解析宏参数列表
static cbool parse_macro_arguments(scc_probe_stream_t *stream,
cbool scc_pp_parse_macro_arguments(scc_probe_stream_t *stream,
scc_pp_macro_list_t *args) {
Assert(stream != null && args != null);
@@ -237,7 +235,7 @@ void scc_pp_parse_directive(scc_probe_stream_t *stream, scc_pos_t *pos,
if (!has_whitespace && ch == '(') {
// 函数宏
scc_pp_macro_list_t params;
if (!parse_macro_arguments(stream, &params)) {
if (!scc_pp_parse_macro_arguments(stream, &params)) {
goto ERR;
}
@@ -250,12 +248,12 @@ void scc_pp_parse_directive(scc_probe_stream_t *stream, scc_pos_t *pos,
}
scc_pp_macro_list_t replacement;
parse_macro_replace_list(stream, &replacement);
scc_pp_parse_macro_replace_list(stream, &replacement);
scc_pp_add_function_macro(macros, &name, &params, &replacement);
} else {
// 对象宏
scc_pp_macro_list_t replacement;
parse_macro_replace_list(stream, &replacement);
scc_pp_parse_macro_replace_list(stream, &replacement);
scc_pp_add_object_macro(macros, &name, &replacement);
}
scc_cstring_free(&name);
@@ -296,190 +294,3 @@ FREE:
scc_cstring_free(&directive);
scc_cstring_free(&name);
}
static inline void scc_generate_cstr(scc_cstring_t *buff) {
scc_cstring_t out_buff = scc_cstring_new();
scc_cstring_append_ch(&out_buff, '\"');
// TODO it is too simple
scc_cstring_append(&out_buff, buff);
scc_cstring_append_ch(&out_buff, '\"');
// FIXME 可能有着更好的解决方案
scc_cstring_clear(buff);
scc_cstring_append(buff, &out_buff);
scc_cstring_free(&out_buff);
}
#define SCC_PP_IS_LIST_BLANK(i) \
((i) < list->size && scc_vec_at(*list, (i)).data[0] == ' ' && \
scc_vec_at(*list, (i)).data[1] == '\0')
#define SCC_PP_IS_LIST_TO_STRING(i) \
((i) < list->size && scc_vec_at(*list, (i)).data[0] == '#' && \
scc_vec_at(*list, (i)).data[1] == '\0')
#define SCC_PP_IS_LIST_CONNECT(i) \
((i) < list->size && scc_vec_at(*list, (i)).data[0] == '#' && \
scc_vec_at(*list, (i)).data[1] == '#' && \
scc_vec_at(*list, (i)).data[2] == '\0')
#define SCC_PP_USE_CONNECT(font, rear) \
if (rear < list->size) { \
scc_cstring_append(out_buff, &scc_vec_at(*list, font)); \
scc_cstring_append(out_buff, &scc_vec_at(*list, rear)); \
} else { \
scc_cstring_append(out_buff, &scc_vec_at(*list, font)); \
}
// for # ## to generator string
static inline cbool scc_pp_expand_string_unsafe(scc_pp_macro_list_t *list,
scc_cstring_t *out_buff) {
for (usize i = 0; i < list->size; ++i) {
if (SCC_PP_IS_LIST_BLANK(i + 1)) {
if (SCC_PP_IS_LIST_CONNECT(i + 2)) {
SCC_PP_USE_CONNECT(i, i + 3);
i += 3;
continue;
}
} else if (SCC_PP_IS_LIST_CONNECT(i + 1)) {
SCC_PP_USE_CONNECT(i, i + 2);
i += 2;
continue;
} else if (SCC_PP_IS_LIST_TO_STRING(i)) {
i += 1;
if (i < list->size) {
scc_generate_cstr(&scc_vec_at(*list, i));
} else {
LOG_WARN("# need a valid literator");
break;
}
}
scc_cstring_append(out_buff, &scc_vec_at(*list, i));
}
return true;
}
// 展开对象宏
cbool scc_pp_expand_object_macro(scc_pp_macro_t *macro,
scc_cstring_t *out_buff) {
Assert(macro->type == SCC_PP_MACRO_OBJECT && macro->params.size == 0);
Assert(scc_cstring_is_empty(out_buff) == true);
// 对象宏输出替换文本并进行递归展开
scc_pp_expand_string_unsafe(&macro->replaces, out_buff);
return true;
}
// 展开函数宏
cbool scc_pp_expand_function_macro(scc_pp_macro_t *macro,
scc_pp_macro_list_t *params,
scc_cstring_t *out_buff) {
Assert(macro->type == SCC_PP_MACRO_FUNCTION);
Assert(out_buff != null);
Assert(scc_cstring_is_empty(out_buff) == true);
for (usize i = 0; i < macro->replaces.size; ++i) {
// TODO ... __VA_ARGS__
for (usize j = 0; j < macro->params.size; ++j) {
if (scc_strcmp(
scc_cstring_as_cstr(&scc_vec_at(macro->replaces, i)),
scc_cstring_as_cstr(&scc_vec_at(macro->params, j))) == 0) {
scc_cstring_free(&scc_vec_at(macro->replaces, i));
scc_cstring_append(&scc_vec_at(macro->replaces, i),
&scc_vec_at(*params, j));
continue;
}
}
}
scc_pp_expand_string_unsafe(&macro->replaces, out_buff);
return true;
}
cbool scc_pp_expand_macro(scc_probe_stream_t *stream,
scc_pp_macro_table_t *macros,
scc_pp_macro_table_t *expand_stack,
scc_probe_stream_t **out_stream, int depth) {
// TODO self position and it maybe is a stack on #include ?
// 递归扫描
if (depth <= 0) {
*out_stream = null;
return false;
}
Assert(stream != null && macros != null && out_stream != null);
scc_cstring_t identifier = scc_cstring_new();
scc_pos_t pos = scc_pos_init();
cbool ret;
ret = scc_lex_parse_identifier(stream, &pos, &identifier);
Assert(ret == true);
scc_pp_macro_t *macro = scc_pp_macro_table_get(macros, &identifier);
// 1. 不是宏,直接输出标识符
// 2. 检查到重复展开跳过
if (macro == null ||
scc_pp_macro_table_get(expand_stack, &macro->name) != null) {
*out_stream =
scc_mem_probe_stream_new(scc_cstring_as_cstr(&identifier),
scc_cstring_len(&identifier), false);
return true;
} else {
scc_cstring_free(&identifier);
}
// 根据宏类型展开
scc_cstring_t tmp_buff = scc_cstring_new();
if (macro->type == SCC_PP_MACRO_OBJECT) {
cbool ret = scc_pp_expand_object_macro(macro, &tmp_buff);
Assert(ret == true);
} else if (macro->type == SCC_PP_MACRO_FUNCTION) {
// FIXME 是否需要忽略空白字符?
scc_lex_parse_skip_whitespace(stream, &pos);
if (scc_probe_stream_peek(stream) != '(') {
LOG_ERROR("Not a function and skip it");
goto ERR;
}
scc_pp_macro_list_t params;
ret = parse_macro_arguments(stream, &params);
Assert(ret == true);
scc_pp_expand_function_macro(macro, &params, &tmp_buff);
Assert(ret == true);
}
// 已经展开的将被标记并入栈
scc_pp_macro_table_set(expand_stack,
scc_pp_macro_new(&macro->name, macro->type));
// 将展开内容变换成stream
scc_probe_stream_t *tmp_stream = scc_mem_probe_stream_new(
scc_cstring_as_cstr(&tmp_buff), scc_cstring_len(&tmp_buff), false);
int ch;
scc_cstring_t real_buff = scc_cstring_new();
while ((ch = scc_probe_stream_peek(tmp_stream)) != scc_stream_eof) {
if (scc_lex_parse_is_identifier_prefix(ch)) {
// 递归检查
scc_probe_stream_t *tmp_out_stream;
if (scc_pp_expand_macro(tmp_stream, macros, expand_stack,
&tmp_out_stream, depth - 1) == false) {
return false;
}
// scc_cstring_append_cstr();
Assert(tmp_out_stream != null);
while (scc_probe_stream_peek(tmp_out_stream) != scc_stream_eof) {
scc_cstring_append_ch(&real_buff,
scc_probe_stream_consume(tmp_out_stream));
}
Assert(tmp_out_stream != null && tmp_out_stream->drop != null);
scc_probe_stream_drop(tmp_out_stream);
} else {
scc_cstring_append_ch(&real_buff,
scc_probe_stream_consume(tmp_stream));
}
}
scc_cstring_free(&tmp_buff);
scc_probe_stream_drop(tmp_stream);
*out_stream = scc_mem_probe_stream_new(scc_cstring_as_cstr(&real_buff),
scc_cstring_len(&real_buff), false);
// 已经展开的将被标记并出栈
scc_pp_macro_table_remove(expand_stack, &macro->name);
return true;
ERR:
*out_stream = null;
return false;
}

View File

@@ -9,7 +9,11 @@
#include <pp_token.h>
#include <pprocessor.h>
#ifdef TEST_MODE
#define MAX_MACRO_EXPANSION_DEPTH 16
#else
#define MAX_MACRO_EXPANSION_DEPTH 64 // 防止无限递归的最大展开深度
#endif
static int pp_stream_read_char(scc_probe_stream_t *_stream) {
scc_pp_stream_t *stream = (scc_pp_stream_t *)_stream;
@@ -30,12 +34,9 @@ RETRY:
&stream->self->macro_table);
goto RETRY;
} else if (scc_lex_parse_is_identifier_prefix(ch)) {
scc_pp_macro_table_t tmp_table;
scc_pp_marco_table_init(&tmp_table);
cbool ret = scc_pp_expand_macro(
stream->input, &stream->self->macro_table, &tmp_table,
&stream->tmp_stream, MAX_MACRO_EXPANSION_DEPTH);
scc_pp_macro_table_drop(&tmp_table);
cbool ret =
scc_pp_expand_macro(stream->input, &stream->self->macro_table,
&stream->tmp_stream, MAX_MACRO_EXPANSION_DEPTH);
if (ret == false) {
LOG_ERROR("macro_expand_error");
}