#include #include #include #include #include #include static const struct { const char *name; scc_pp_token_t tok; } keywords[] = { #define X(name, type, tok) {#name, tok}, SCC_PP_INST_TOKEN #undef X }; // 使用二分查找查找关键字 static inline int keyword_cmp(const char *name, int len) { int low = 0; int high = sizeof(keywords) / sizeof(keywords[0]) - 1; while (low <= high) { int mid = (low + high) / 2; const char *key = keywords[mid].name; int cmp = 0; // 自定义字符串比较逻辑 for (int i = 0; i < len; i++) { if (name[i] != key[i]) { cmp = (unsigned char)name[i] - (unsigned char)key[i]; break; } if (name[i] == '\0') break; // 遇到终止符提前结束 } if (cmp == 0) { // 完全匹配检查(长度相同) if (key[len] == '\0') return mid; cmp = -1; // 当前关键词比输入长 } if (cmp < 0) { high = mid - 1; } else { low = mid + 1; } } return -1; // Not a keyword. } static inline void try_to_cut_list(scc_pp_macro_list_t *list, scc_cstring_t *buff) { if (scc_cstring_len(buff) != 0) { scc_vec_push(*list, *buff); *buff = scc_cstring_new(); } } static cbool parse_macro_replace_list(scc_probe_stream_t *stream, scc_pp_macro_list_t *list) { Assert(stream != null && list != null); scc_probe_stream_reset(stream); scc_vec_init(*list); scc_cstring_t replacement = scc_cstring_new(); int ch; scc_pos_t pos = scc_pos_init(); while ((ch = scc_probe_stream_peek(stream)) != scc_stream_eof) { if (scc_lex_parse_is_endline(ch)) { break; } if (scc_lex_parse_is_identifier_prefix(ch)) { try_to_cut_list(list, &replacement); cbool ret = scc_lex_parse_identifier(stream, &pos, &replacement); Assert(ret == true); try_to_cut_list(list, &replacement); } else if (ch == '#') { // 处理 # 和 ## 操作符 scc_probe_stream_consume(stream); try_to_cut_list(list, &replacement); scc_cstring_append_ch(&replacement, '#'); if (scc_probe_stream_peek(stream) == '#') { // ## 连接操作符 scc_probe_stream_consume(stream); scc_cstring_append_ch(&replacement, '#'); } // 我需要尽可能防止空白字符干扰解析 scc_lex_parse_skip_whitespace(stream, &pos); try_to_cut_list(list, &replacement); } else if (scc_lex_parse_is_whitespace(ch)) { try_to_cut_list(list, &replacement); scc_lex_parse_skip_whitespace(stream, &pos); scc_cstring_append_ch(&replacement, ' '); try_to_cut_list(list, &replacement); } else { scc_probe_stream_consume(stream); scc_cstring_append_ch(&replacement, (char)ch); } } if (scc_cstring_len(&replacement) != 0) { scc_vec_push(*list, replacement); replacement = scc_cstring_new(); } // for (usize i = 0; i < list->size; ++i) { // LOG_DEBUG("list %d: %s", (int)i, // scc_cstring_as_cstr(&scc_vec_at(*list, i))); // } return true; } // 解析宏参数列表 static cbool parse_macro_arguments(scc_probe_stream_t *stream, scc_pp_macro_list_t *args) { Assert(stream != null && args != null); scc_vec_init(*args); int ch; scc_probe_stream_reset(stream); // 跳过 '(' ch = scc_probe_stream_peek(stream); if (ch != '(') { return false; } scc_probe_stream_consume(stream); // 消费 '(' int paren_depth = 1; scc_cstring_t current_arg = scc_cstring_new(); scc_pos_t pos = scc_pos_init(); while (paren_depth > 0) { ch = scc_probe_stream_peek(stream); if (ch == scc_stream_eof) { scc_cstring_free(¤t_arg); scc_cstring_free(&pos.name); return false; } if (ch == '(') { paren_depth++; scc_cstring_append_ch(¤t_arg, (char)ch); scc_probe_stream_consume(stream); } else if (ch == ')') { paren_depth--; if (paren_depth > 0) { scc_cstring_append_ch(¤t_arg, (char)ch); } scc_probe_stream_consume(stream); } else if (ch == ',' && paren_depth == 1) { // 参数分隔符 scc_vec_push(*args, current_arg); current_arg = scc_cstring_new(); scc_probe_stream_consume(stream); // 跳过参数后的空白 scc_lex_parse_skip_whitespace(stream, &pos); } else { scc_cstring_append_ch(¤t_arg, (char)ch); scc_probe_stream_consume(stream); } } // 添加最后一个参数 if (!scc_cstring_is_empty(¤t_arg)) { scc_vec_push(*args, current_arg); } else { scc_cstring_free(¤t_arg); } scc_cstring_free(&pos.name); return true; } static cbool safe_skip_backspace_if_endline(scc_probe_stream_t *stream, scc_pos_t *pos) { scc_probe_stream_reset(stream); int ch = scc_probe_stream_peek(stream); // FIXME maybe it not correct while (ch == '\r' || ch == '\n' || ch == ' ' || ch == '\t') { if (scc_lex_parse_is_endline(ch)) { scc_lex_parse_skip_endline(stream, pos); return true; } scc_probe_stream_consume(stream); ch = scc_probe_stream_peek(stream); } scc_probe_stream_reset(stream); return false; } void scc_pp_parse_directive(scc_probe_stream_t *stream, scc_pos_t *pos, scc_pp_macro_table_t *macros) { Assert(stream != null); scc_probe_stream_reset(stream); // 跳过 '#' 和后续空白 if (scc_probe_stream_peek(stream) != '#') { LOG_WARN("Invalid directive"); return; } scc_pos_next(pos); scc_probe_stream_consume(stream); if (safe_skip_backspace_if_endline(stream, pos)) return; // 解析指令名称 scc_cstring_t directive = scc_cstring_new(); if (!scc_lex_parse_identifier(stream, pos, &directive)) { goto ERR; } if (safe_skip_backspace_if_endline(stream, pos)) goto FREE; scc_pp_token_t token = keyword_cmp(scc_cstring_as_cstr(&directive), scc_cstring_len(&directive)); scc_cstring_t name = scc_cstring_new(); switch (token) { case SCC_PP_TOK_DEFINE: { if (!scc_lex_parse_identifier(stream, pos, &name)) { scc_cstring_free(&name); goto ERR; } // 检查是否是函数宏:宏名后是否直接跟着 '('(没有空白字符) scc_probe_stream_reset(stream); int ch = scc_probe_stream_peek(stream); cbool has_whitespace = scc_lex_parse_is_whitespace(ch); if (has_whitespace && safe_skip_backspace_if_endline(stream, pos)) { goto FREE; } if (!has_whitespace && ch == '(') { // 函数宏 scc_pp_macro_list_t params; if (!parse_macro_arguments(stream, ¶ms)) { goto ERR; } ch = scc_probe_stream_peek(stream); if (ch == ')') { scc_probe_stream_consume(stream); // 消费 ')' } if (safe_skip_backspace_if_endline(stream, pos)) { goto FREE; } scc_pp_macro_list_t replacement; parse_macro_replace_list(stream, &replacement); scc_pp_add_function_macro(macros, &name, ¶ms, &replacement); } else { // 对象宏 scc_pp_macro_list_t replacement; parse_macro_replace_list(stream, &replacement); scc_pp_add_object_macro(macros, &name, &replacement); } scc_cstring_free(&name); break; } case SCC_PP_TOK_UNDEF: { if (scc_lex_parse_identifier(stream, pos, &name)) { // TODO ret value scc_pp_macro_table_remove(macros, &name); } break; } case SCC_PP_TOK_INCLUDE: case SCC_PP_TOK_IF: case SCC_PP_TOK_IFDEF: case SCC_PP_TOK_IFNDEF: case SCC_PP_TOK_ELSE: case SCC_PP_TOK_ELIF: case SCC_PP_TOK_ELIFDEF: case SCC_PP_TOK_ELIFNDEF: case SCC_PP_TOK_ENDIF: case SCC_PP_TOK_LINE: case SCC_PP_TOK_EMBED: case SCC_PP_TOK_ERROR: case SCC_PP_TOK_WARNING: case SCC_PP_TOK_PRAMA: // 暂时跳过这一行 scc_lex_parse_skip_line(stream, pos); break; default: LOG_WARN("Unknown preprocessor directive: %s", scc_cstring_as_cstr(&directive)); scc_lex_parse_skip_line(stream, pos); } ERR: scc_lex_parse_skip_line(stream, pos); FREE: scc_cstring_free(&directive); scc_cstring_free(&name); } static inline void scc_generate_cstr(scc_cstring_t *buff) { scc_cstring_t out_buff = scc_cstring_new(); scc_cstring_append_ch(&out_buff, '\"'); // TODO it is too simple scc_cstring_append(&out_buff, buff); scc_cstring_append_ch(&out_buff, '\"'); // FIXME 可能有着更好的解决方案 scc_cstring_clear(buff); scc_cstring_append(buff, &out_buff); scc_cstring_free(&out_buff); } #define SCC_PP_IS_LIST_BLANK(i) \ ((i) < list->size && scc_vec_at(*list, (i)).data[0] == ' ' && \ scc_vec_at(*list, (i)).data[1] == '\0') #define SCC_PP_IS_LIST_TO_STRING(i) \ ((i) < list->size && scc_vec_at(*list, (i)).data[0] == '#' && \ scc_vec_at(*list, (i)).data[1] == '\0') #define SCC_PP_IS_LIST_CONNECT(i) \ ((i) < list->size && scc_vec_at(*list, (i)).data[0] == '#' && \ scc_vec_at(*list, (i)).data[1] == '#' && \ scc_vec_at(*list, (i)).data[2] == '\0') #define SCC_PP_USE_CONNECT(font, rear) \ if (rear < list->size) { \ scc_cstring_append(out_buff, &scc_vec_at(*list, font)); \ scc_cstring_append(out_buff, &scc_vec_at(*list, rear)); \ } else { \ scc_cstring_append(out_buff, &scc_vec_at(*list, font)); \ } // for # ## to generator string static inline cbool scc_pp_expand_string_unsafe(scc_pp_macro_list_t *list, scc_cstring_t *out_buff) { for (usize i = 0; i < list->size; ++i) { if (SCC_PP_IS_LIST_BLANK(i + 1)) { if (SCC_PP_IS_LIST_CONNECT(i + 2)) { SCC_PP_USE_CONNECT(i, i + 3); i += 3; continue; } } else if (SCC_PP_IS_LIST_CONNECT(i + 1)) { SCC_PP_USE_CONNECT(i, i + 2); i += 2; continue; } else if (SCC_PP_IS_LIST_TO_STRING(i)) { i += 1; if (i < list->size) { scc_generate_cstr(&scc_vec_at(*list, i)); } else { LOG_WARN("# need a valid literator"); break; } } scc_cstring_append(out_buff, &scc_vec_at(*list, i)); } return true; } // 展开对象宏 cbool scc_pp_expand_object_macro(scc_pp_macro_t *macro, scc_cstring_t *out_buff) { Assert(macro->type == SCC_PP_MACRO_OBJECT && macro->params.size == 0); Assert(scc_cstring_is_empty(out_buff) == true); // 对象宏输出替换文本并进行递归展开 scc_pp_expand_string_unsafe(¯o->replaces, out_buff); return true; } // 展开函数宏 cbool scc_pp_expand_function_macro(scc_pp_macro_t *macro, scc_pp_macro_list_t *params, scc_cstring_t *out_buff) { Assert(macro->type == SCC_PP_MACRO_FUNCTION); Assert(out_buff != null); Assert(scc_cstring_is_empty(out_buff) == true); for (usize i = 0; i < macro->replaces.size; ++i) { // TODO ... __VA_ARGS__ for (usize j = 0; j < macro->params.size; ++j) { if (scc_strcmp( scc_cstring_as_cstr(&scc_vec_at(macro->replaces, i)), scc_cstring_as_cstr(&scc_vec_at(macro->params, j))) == 0) { scc_cstring_free(&scc_vec_at(macro->replaces, i)); scc_cstring_append(&scc_vec_at(macro->replaces, i), &scc_vec_at(*params, j)); continue; } } } scc_pp_expand_string_unsafe(¯o->replaces, out_buff); return true; } cbool scc_pp_expand_macro(scc_probe_stream_t *stream, scc_pp_macro_table_t *macros, scc_pp_macro_table_t *expand_stack, scc_probe_stream_t **out_stream, int depth) { // TODO self position and it maybe is a stack on #include ? // 递归扫描 if (depth <= 0) { *out_stream = null; return false; } Assert(stream != null && macros != null && out_stream != null); scc_cstring_t identifier = scc_cstring_new(); scc_pos_t pos = scc_pos_init(); cbool ret; ret = scc_lex_parse_identifier(stream, &pos, &identifier); Assert(ret == true); scc_pp_macro_t *macro = scc_pp_macro_table_get(macros, &identifier); // 1. 不是宏,直接输出标识符 // 2. 检查到重复展开跳过 if (macro == null || scc_pp_macro_table_get(expand_stack, ¯o->name) != null) { *out_stream = scc_mem_probe_stream_new(scc_cstring_as_cstr(&identifier), scc_cstring_len(&identifier), false); return true; } else { scc_cstring_free(&identifier); } // 根据宏类型展开 scc_cstring_t tmp_buff = scc_cstring_new(); if (macro->type == SCC_PP_MACRO_OBJECT) { cbool ret = scc_pp_expand_object_macro(macro, &tmp_buff); Assert(ret == true); } else if (macro->type == SCC_PP_MACRO_FUNCTION) { // FIXME 是否需要忽略空白字符? scc_lex_parse_skip_whitespace(stream, &pos); if (scc_probe_stream_peek(stream) != '(') { LOG_ERROR("Not a function and skip it"); goto ERR; } scc_pp_macro_list_t params; ret = parse_macro_arguments(stream, ¶ms); Assert(ret == true); scc_pp_expand_function_macro(macro, ¶ms, &tmp_buff); Assert(ret == true); } // 已经展开的将被标记并入栈 scc_pp_macro_table_set(expand_stack, scc_pp_macro_new(¯o->name, macro->type)); // 将展开内容变换成stream scc_probe_stream_t *tmp_stream = scc_mem_probe_stream_new( scc_cstring_as_cstr(&tmp_buff), scc_cstring_len(&tmp_buff), false); int ch; scc_cstring_t real_buff = scc_cstring_new(); while ((ch = scc_probe_stream_peek(tmp_stream)) != scc_stream_eof) { if (scc_lex_parse_is_identifier_prefix(ch)) { // 递归检查 scc_probe_stream_t *tmp_out_stream; if (scc_pp_expand_macro(tmp_stream, macros, expand_stack, &tmp_out_stream, depth - 1) == false) { return false; } // scc_cstring_append_cstr(); Assert(tmp_out_stream != null); while (scc_probe_stream_peek(tmp_out_stream) != scc_stream_eof) { scc_cstring_append_ch(&real_buff, scc_probe_stream_consume(tmp_out_stream)); } Assert(tmp_out_stream != null && tmp_out_stream->drop != null); scc_probe_stream_drop(tmp_out_stream); } else { scc_cstring_append_ch(&real_buff, scc_probe_stream_consume(tmp_stream)); } } scc_cstring_free(&tmp_buff); scc_probe_stream_drop(tmp_stream); *out_stream = scc_mem_probe_stream_new(scc_cstring_as_cstr(&real_buff), scc_cstring_len(&real_buff), false); // 已经展开的将被标记并出栈 scc_pp_macro_table_remove(expand_stack, ¯o->name); return true; ERR: *out_stream = null; return false; }