- Change `scc_macro_table_t` to `scc_pp_macro_table_t` for consistency - Rename `scc_pp_macro_create` to `scc_pp_macro_new` for naming convention - Remove unused `scc_pp_compress_whitespace` function - Update macro table function names: `scc_pp_find_macro` → `scc_pp_macro_table_get`, `scc_pp_remove_macro` → `scc_pp_macro_table_remove` - Add new `scc_pp_macro_table_set` function for setting macros - Update all function signatures to use new type name - Remove commented-out whitespace compression code from implementation
486 lines
17 KiB
C
486 lines
17 KiB
C
#include <ctype.h>
|
|
#include <lex_parser.h>
|
|
#include <pp_macro.h>
|
|
#include <pp_parse.h>
|
|
#include <pp_token.h>
|
|
#include <string.h>
|
|
|
|
static const struct {
|
|
const char *name;
|
|
scc_pp_token_t tok;
|
|
} keywords[] = {
|
|
#define X(name, type, tok) {#name, tok},
|
|
SCC_PP_INST_TOKEN
|
|
#undef X
|
|
};
|
|
|
|
// 使用二分查找查找关键字
|
|
static inline int keyword_cmp(const char *name, int len) {
|
|
int low = 0;
|
|
int high = sizeof(keywords) / sizeof(keywords[0]) - 1;
|
|
while (low <= high) {
|
|
int mid = (low + high) / 2;
|
|
const char *key = keywords[mid].name;
|
|
int cmp = 0;
|
|
|
|
// 自定义字符串比较逻辑
|
|
for (int i = 0; i < len; i++) {
|
|
if (name[i] != key[i]) {
|
|
cmp = (unsigned char)name[i] - (unsigned char)key[i];
|
|
break;
|
|
}
|
|
if (name[i] == '\0')
|
|
break; // 遇到终止符提前结束
|
|
}
|
|
|
|
if (cmp == 0) {
|
|
// 完全匹配检查(长度相同)
|
|
if (key[len] == '\0')
|
|
return mid;
|
|
cmp = -1; // 当前关键词比输入长
|
|
}
|
|
|
|
if (cmp < 0) {
|
|
high = mid - 1;
|
|
} else {
|
|
low = mid + 1;
|
|
}
|
|
}
|
|
return -1; // Not a keyword.
|
|
}
|
|
|
|
static inline void try_to_cut_list(scc_pp_macro_list_t *list,
|
|
scc_cstring_t *buff) {
|
|
if (scc_cstring_len(buff) != 0) {
|
|
scc_vec_push(*list, *buff);
|
|
*buff = scc_cstring_new();
|
|
}
|
|
}
|
|
|
|
static cbool parse_macro_replace_list(scc_probe_stream_t *stream,
|
|
scc_pp_macro_list_t *list) {
|
|
Assert(stream != null && list != null);
|
|
scc_probe_stream_reset(stream);
|
|
|
|
scc_vec_init(*list);
|
|
scc_cstring_t replacement = scc_cstring_new();
|
|
int ch;
|
|
scc_pos_t pos = scc_pos_init();
|
|
|
|
while ((ch = scc_probe_stream_peek(stream)) != scc_stream_eof) {
|
|
if (scc_lex_parse_is_endline(ch)) {
|
|
break;
|
|
}
|
|
if (scc_lex_parse_is_identifier_prefix(ch)) {
|
|
try_to_cut_list(list, &replacement);
|
|
cbool ret = scc_lex_parse_identifier(stream, &pos, &replacement);
|
|
Assert(ret == true);
|
|
try_to_cut_list(list, &replacement);
|
|
} else if (ch == '#') {
|
|
// 处理 # 和 ## 操作符
|
|
scc_probe_stream_consume(stream);
|
|
try_to_cut_list(list, &replacement);
|
|
|
|
scc_cstring_append_ch(&replacement, '#');
|
|
if (scc_probe_stream_peek(stream) == '#') {
|
|
// ## 连接操作符
|
|
scc_probe_stream_consume(stream);
|
|
scc_cstring_append_ch(&replacement, '#');
|
|
}
|
|
// 我需要尽可能防止空白字符干扰解析
|
|
scc_lex_parse_skip_whitespace(stream, &pos);
|
|
try_to_cut_list(list, &replacement);
|
|
} else if (scc_lex_parse_is_whitespace(ch)) {
|
|
try_to_cut_list(list, &replacement);
|
|
scc_lex_parse_skip_whitespace(stream, &pos);
|
|
scc_cstring_append_ch(&replacement, ' ');
|
|
try_to_cut_list(list, &replacement);
|
|
} else {
|
|
scc_probe_stream_consume(stream);
|
|
scc_cstring_append_ch(&replacement, (char)ch);
|
|
}
|
|
}
|
|
|
|
if (scc_cstring_len(&replacement) != 0) {
|
|
scc_vec_push(*list, replacement);
|
|
replacement = scc_cstring_new();
|
|
}
|
|
|
|
// for (usize i = 0; i < list->size; ++i) {
|
|
// LOG_DEBUG("list %d: %s", (int)i,
|
|
// scc_cstring_as_cstr(&scc_vec_at(*list, i)));
|
|
// }
|
|
return true;
|
|
}
|
|
|
|
// 解析宏参数列表
|
|
static cbool parse_macro_arguments(scc_probe_stream_t *stream,
|
|
scc_pp_macro_list_t *args) {
|
|
Assert(stream != null && args != null);
|
|
|
|
scc_vec_init(*args);
|
|
int ch;
|
|
scc_probe_stream_reset(stream);
|
|
|
|
// 跳过 '('
|
|
ch = scc_probe_stream_peek(stream);
|
|
if (ch != '(') {
|
|
return false;
|
|
}
|
|
scc_probe_stream_consume(stream); // 消费 '('
|
|
|
|
int paren_depth = 1;
|
|
scc_cstring_t current_arg = scc_cstring_new();
|
|
scc_pos_t pos = scc_pos_init();
|
|
|
|
while (paren_depth > 0) {
|
|
ch = scc_probe_stream_peek(stream);
|
|
if (ch == scc_stream_eof) {
|
|
scc_cstring_free(¤t_arg);
|
|
scc_cstring_free(&pos.name);
|
|
return false;
|
|
}
|
|
|
|
if (ch == '(') {
|
|
paren_depth++;
|
|
scc_cstring_append_ch(¤t_arg, (char)ch);
|
|
scc_probe_stream_consume(stream);
|
|
} else if (ch == ')') {
|
|
paren_depth--;
|
|
if (paren_depth > 0) {
|
|
scc_cstring_append_ch(¤t_arg, (char)ch);
|
|
}
|
|
scc_probe_stream_consume(stream);
|
|
} else if (ch == ',' && paren_depth == 1) {
|
|
// 参数分隔符
|
|
scc_vec_push(*args, current_arg);
|
|
current_arg = scc_cstring_new();
|
|
scc_probe_stream_consume(stream);
|
|
// 跳过参数后的空白
|
|
scc_lex_parse_skip_whitespace(stream, &pos);
|
|
} else {
|
|
scc_cstring_append_ch(¤t_arg, (char)ch);
|
|
scc_probe_stream_consume(stream);
|
|
}
|
|
}
|
|
|
|
// 添加最后一个参数
|
|
if (!scc_cstring_is_empty(¤t_arg)) {
|
|
scc_vec_push(*args, current_arg);
|
|
} else {
|
|
scc_cstring_free(¤t_arg);
|
|
}
|
|
|
|
scc_cstring_free(&pos.name);
|
|
return true;
|
|
}
|
|
|
|
static cbool safe_skip_backspace_if_endline(scc_probe_stream_t *stream,
|
|
scc_pos_t *pos) {
|
|
scc_probe_stream_reset(stream);
|
|
int ch = scc_probe_stream_peek(stream);
|
|
// FIXME maybe it not correct
|
|
while (ch == '\r' || ch == '\n' || ch == ' ' || ch == '\t') {
|
|
if (scc_lex_parse_is_endline(ch)) {
|
|
scc_lex_parse_skip_endline(stream, pos);
|
|
return true;
|
|
}
|
|
scc_probe_stream_consume(stream);
|
|
ch = scc_probe_stream_peek(stream);
|
|
}
|
|
scc_probe_stream_reset(stream);
|
|
return false;
|
|
}
|
|
|
|
void scc_pp_parse_directive(scc_probe_stream_t *stream, scc_pos_t *pos,
|
|
scc_pp_macro_table_t *macros) {
|
|
Assert(stream != null);
|
|
|
|
scc_probe_stream_reset(stream);
|
|
// 跳过 '#' 和后续空白
|
|
if (scc_probe_stream_peek(stream) != '#') {
|
|
LOG_WARN("Invalid directive");
|
|
return;
|
|
}
|
|
scc_pos_next(pos);
|
|
scc_probe_stream_consume(stream);
|
|
if (safe_skip_backspace_if_endline(stream, pos))
|
|
return;
|
|
|
|
// 解析指令名称
|
|
scc_cstring_t directive = scc_cstring_new();
|
|
if (!scc_lex_parse_identifier(stream, pos, &directive)) {
|
|
goto ERR;
|
|
}
|
|
if (safe_skip_backspace_if_endline(stream, pos))
|
|
goto FREE;
|
|
|
|
scc_pp_token_t token = keyword_cmp(scc_cstring_as_cstr(&directive),
|
|
scc_cstring_len(&directive));
|
|
|
|
scc_cstring_t name = scc_cstring_new();
|
|
switch (token) {
|
|
case SCC_PP_TOK_DEFINE: {
|
|
if (!scc_lex_parse_identifier(stream, pos, &name)) {
|
|
scc_cstring_free(&name);
|
|
goto ERR;
|
|
}
|
|
|
|
// 检查是否是函数宏:宏名后是否直接跟着 '('(没有空白字符)
|
|
scc_probe_stream_reset(stream);
|
|
int ch = scc_probe_stream_peek(stream);
|
|
cbool has_whitespace = scc_lex_parse_is_whitespace(ch);
|
|
if (has_whitespace && safe_skip_backspace_if_endline(stream, pos)) {
|
|
goto FREE;
|
|
}
|
|
|
|
if (!has_whitespace && ch == '(') {
|
|
// 函数宏
|
|
scc_pp_macro_list_t params;
|
|
if (!parse_macro_arguments(stream, ¶ms)) {
|
|
goto ERR;
|
|
}
|
|
|
|
ch = scc_probe_stream_peek(stream);
|
|
if (ch == ')') {
|
|
scc_probe_stream_consume(stream); // 消费 ')'
|
|
}
|
|
if (safe_skip_backspace_if_endline(stream, pos)) {
|
|
goto FREE;
|
|
}
|
|
|
|
scc_pp_macro_list_t replacement;
|
|
parse_macro_replace_list(stream, &replacement);
|
|
scc_pp_add_function_macro(macros, &name, ¶ms, &replacement);
|
|
} else {
|
|
// 对象宏
|
|
scc_pp_macro_list_t replacement;
|
|
parse_macro_replace_list(stream, &replacement);
|
|
scc_pp_add_object_macro(macros, &name, &replacement);
|
|
}
|
|
scc_cstring_free(&name);
|
|
break;
|
|
}
|
|
case SCC_PP_TOK_UNDEF: {
|
|
if (scc_lex_parse_identifier(stream, pos, &name)) {
|
|
// TODO ret value
|
|
scc_pp_macro_table_remove(macros, &name);
|
|
}
|
|
break;
|
|
}
|
|
case SCC_PP_TOK_INCLUDE:
|
|
case SCC_PP_TOK_IF:
|
|
case SCC_PP_TOK_IFDEF:
|
|
case SCC_PP_TOK_IFNDEF:
|
|
case SCC_PP_TOK_ELSE:
|
|
case SCC_PP_TOK_ELIF:
|
|
case SCC_PP_TOK_ELIFDEF:
|
|
case SCC_PP_TOK_ELIFNDEF:
|
|
case SCC_PP_TOK_ENDIF:
|
|
case SCC_PP_TOK_LINE:
|
|
case SCC_PP_TOK_EMBED:
|
|
case SCC_PP_TOK_ERROR:
|
|
case SCC_PP_TOK_WARNING:
|
|
case SCC_PP_TOK_PRAMA:
|
|
// 暂时跳过这一行
|
|
scc_lex_parse_skip_line(stream, pos);
|
|
break;
|
|
default:
|
|
LOG_WARN("Unknown preprocessor directive: %s",
|
|
scc_cstring_as_cstr(&directive));
|
|
scc_lex_parse_skip_line(stream, pos);
|
|
}
|
|
ERR:
|
|
scc_lex_parse_skip_line(stream, pos);
|
|
FREE:
|
|
scc_cstring_free(&directive);
|
|
scc_cstring_free(&name);
|
|
}
|
|
|
|
static inline void scc_generate_cstr(scc_cstring_t *buff) {
|
|
scc_cstring_t out_buff = scc_cstring_new();
|
|
scc_cstring_append_ch(&out_buff, '\"');
|
|
// TODO it is too simple
|
|
scc_cstring_append(&out_buff, buff);
|
|
scc_cstring_append_ch(&out_buff, '\"');
|
|
|
|
// FIXME 可能有着更好的解决方案
|
|
scc_cstring_clear(buff);
|
|
scc_cstring_append(buff, &out_buff);
|
|
scc_cstring_free(&out_buff);
|
|
}
|
|
|
|
#define SCC_PP_IS_LIST_BLANK(i) \
|
|
((i) < list->size && scc_vec_at(*list, (i)).data[0] == ' ' && \
|
|
scc_vec_at(*list, (i)).data[1] == '\0')
|
|
#define SCC_PP_IS_LIST_TO_STRING(i) \
|
|
((i) < list->size && scc_vec_at(*list, (i)).data[0] == '#' && \
|
|
scc_vec_at(*list, (i)).data[1] == '\0')
|
|
#define SCC_PP_IS_LIST_CONNECT(i) \
|
|
((i) < list->size && scc_vec_at(*list, (i)).data[0] == '#' && \
|
|
scc_vec_at(*list, (i)).data[1] == '#' && \
|
|
scc_vec_at(*list, (i)).data[2] == '\0')
|
|
#define SCC_PP_USE_CONNECT(font, rear) \
|
|
if (rear < list->size) { \
|
|
scc_cstring_append(out_buff, &scc_vec_at(*list, font)); \
|
|
scc_cstring_append(out_buff, &scc_vec_at(*list, rear)); \
|
|
} else { \
|
|
scc_cstring_append(out_buff, &scc_vec_at(*list, font)); \
|
|
}
|
|
// for # ## to generator string
|
|
static inline cbool scc_pp_expand_string_unsafe(scc_pp_macro_list_t *list,
|
|
scc_cstring_t *out_buff) {
|
|
for (usize i = 0; i < list->size; ++i) {
|
|
if (SCC_PP_IS_LIST_BLANK(i + 1)) {
|
|
if (SCC_PP_IS_LIST_CONNECT(i + 2)) {
|
|
SCC_PP_USE_CONNECT(i, i + 3);
|
|
i += 3;
|
|
continue;
|
|
}
|
|
} else if (SCC_PP_IS_LIST_CONNECT(i + 1)) {
|
|
SCC_PP_USE_CONNECT(i, i + 2);
|
|
i += 2;
|
|
continue;
|
|
} else if (SCC_PP_IS_LIST_TO_STRING(i)) {
|
|
i += 1;
|
|
if (i < list->size) {
|
|
scc_generate_cstr(&scc_vec_at(*list, i));
|
|
} else {
|
|
LOG_WARN("# need a valid literator");
|
|
break;
|
|
}
|
|
}
|
|
scc_cstring_append(out_buff, &scc_vec_at(*list, i));
|
|
}
|
|
return true;
|
|
}
|
|
|
|
// 展开对象宏
|
|
cbool scc_pp_expand_object_macro(scc_pp_macro_t *macro,
|
|
scc_cstring_t *out_buff) {
|
|
Assert(macro->type == SCC_PP_MACRO_OBJECT && macro->params.size == 0);
|
|
Assert(scc_cstring_is_empty(out_buff) == true);
|
|
// 对象宏输出替换文本并进行递归展开
|
|
scc_pp_expand_string_unsafe(¯o->replaces, out_buff);
|
|
return true;
|
|
}
|
|
|
|
// 展开函数宏
|
|
cbool scc_pp_expand_function_macro(scc_pp_macro_t *macro,
|
|
scc_pp_macro_list_t *params,
|
|
scc_cstring_t *out_buff) {
|
|
Assert(macro->type == SCC_PP_MACRO_FUNCTION);
|
|
Assert(out_buff != null);
|
|
Assert(scc_cstring_is_empty(out_buff) == true);
|
|
for (usize i = 0; i < macro->replaces.size; ++i) {
|
|
// TODO ... __VA_ARGS__
|
|
for (usize j = 0; j < macro->params.size; ++j) {
|
|
if (scc_strcmp(
|
|
scc_cstring_as_cstr(&scc_vec_at(macro->replaces, i)),
|
|
scc_cstring_as_cstr(&scc_vec_at(macro->params, j))) == 0) {
|
|
scc_cstring_free(&scc_vec_at(macro->replaces, i));
|
|
scc_cstring_append(&scc_vec_at(macro->replaces, i),
|
|
&scc_vec_at(*params, j));
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
scc_pp_expand_string_unsafe(¯o->replaces, out_buff);
|
|
return true;
|
|
}
|
|
|
|
cbool scc_pp_expand_macro(scc_probe_stream_t *stream,
|
|
scc_pp_macro_table_t *macros,
|
|
scc_pp_macro_table_t *expand_stack,
|
|
scc_probe_stream_t **out_stream, int depth) {
|
|
// TODO self position and it maybe is a stack on #include ?
|
|
// 递归扫描
|
|
if (depth <= 0) {
|
|
*out_stream = null;
|
|
return false;
|
|
}
|
|
Assert(stream != null && macros != null && out_stream != null);
|
|
|
|
scc_cstring_t identifier = scc_cstring_new();
|
|
scc_pos_t pos = scc_pos_init();
|
|
|
|
cbool ret;
|
|
ret = scc_lex_parse_identifier(stream, &pos, &identifier);
|
|
Assert(ret == true);
|
|
|
|
scc_pp_macro_t *macro = scc_pp_macro_table_get(macros, &identifier);
|
|
// 1. 不是宏,直接输出标识符
|
|
// 2. 检查到重复展开跳过
|
|
if (macro == null ||
|
|
scc_pp_macro_table_get(expand_stack, ¯o->name) != null) {
|
|
*out_stream =
|
|
scc_mem_probe_stream_new(scc_cstring_as_cstr(&identifier),
|
|
scc_cstring_len(&identifier), false);
|
|
return true;
|
|
} else {
|
|
scc_cstring_free(&identifier);
|
|
}
|
|
|
|
// 根据宏类型展开
|
|
scc_cstring_t tmp_buff = scc_cstring_new();
|
|
if (macro->type == SCC_PP_MACRO_OBJECT) {
|
|
cbool ret = scc_pp_expand_object_macro(macro, &tmp_buff);
|
|
Assert(ret == true);
|
|
} else if (macro->type == SCC_PP_MACRO_FUNCTION) {
|
|
// FIXME 是否需要忽略空白字符?
|
|
scc_lex_parse_skip_whitespace(stream, &pos);
|
|
if (scc_probe_stream_peek(stream) != '(') {
|
|
LOG_ERROR("Not a function and skip it");
|
|
goto ERR;
|
|
}
|
|
scc_pp_macro_list_t params;
|
|
ret = parse_macro_arguments(stream, ¶ms);
|
|
Assert(ret == true);
|
|
scc_pp_expand_function_macro(macro, ¶ms, &tmp_buff);
|
|
Assert(ret == true);
|
|
}
|
|
|
|
// 已经展开的将被标记并入栈
|
|
scc_pp_macro_table_set(expand_stack,
|
|
scc_pp_macro_new(¯o->name, macro->type));
|
|
|
|
// 将展开内容变换成stream
|
|
scc_probe_stream_t *tmp_stream = scc_mem_probe_stream_new(
|
|
scc_cstring_as_cstr(&tmp_buff), scc_cstring_len(&tmp_buff), false);
|
|
int ch;
|
|
scc_cstring_t real_buff = scc_cstring_new();
|
|
|
|
while ((ch = scc_probe_stream_peek(tmp_stream)) != scc_stream_eof) {
|
|
if (scc_lex_parse_is_identifier_prefix(ch)) {
|
|
// 递归检查
|
|
scc_probe_stream_t *tmp_out_stream;
|
|
if (scc_pp_expand_macro(tmp_stream, macros, expand_stack,
|
|
&tmp_out_stream, depth - 1) == false) {
|
|
return false;
|
|
}
|
|
// scc_cstring_append_cstr();
|
|
Assert(tmp_out_stream != null);
|
|
while (scc_probe_stream_peek(tmp_out_stream) != scc_stream_eof) {
|
|
scc_cstring_append_ch(&real_buff,
|
|
scc_probe_stream_consume(tmp_out_stream));
|
|
}
|
|
Assert(tmp_out_stream != null && tmp_out_stream->drop != null);
|
|
scc_probe_stream_drop(tmp_out_stream);
|
|
} else {
|
|
scc_cstring_append_ch(&real_buff,
|
|
scc_probe_stream_consume(tmp_stream));
|
|
}
|
|
}
|
|
scc_cstring_free(&tmp_buff);
|
|
scc_probe_stream_drop(tmp_stream);
|
|
*out_stream = scc_mem_probe_stream_new(scc_cstring_as_cstr(&real_buff),
|
|
scc_cstring_len(&real_buff), false);
|
|
|
|
// 已经展开的将被标记并出栈
|
|
scc_pp_macro_table_remove(expand_stack, ¯o->name);
|
|
return true;
|
|
ERR:
|
|
*out_stream = null;
|
|
return false;
|
|
}
|