#include #include #include static void rescan(scc_pproc_expand_t *expand_ctx, const scc_pproc_macro_t *macro, scc_lexer_tok_vec_t *tok_buffer); static inline scc_lexer_tok_t scc_pproc_tok_copy(scc_pproc_expand_t *ctx, const scc_lexer_tok_t *src) { scc_lexer_tok_t dst = scc_lexer_tok_copy(src); if (ctx->need_keep_org_pos) { dst.loc = ctx->call_pos; } return dst; } static scc_lexer_tok_t stringify_argument(scc_pproc_expand_t *ctx, scc_lexer_tok_vec_t *arg_tokens) { // WRITE BY AI scc_str_t str = scc_str_empty(); scc_str_append_ch(&str, '\"'); // 左引号 int need_space = 0; // 是否需要插入空格 scc_lexer_tok_t *tok = null; scc_vec_foreach(*arg_tokens, i) { tok = &scc_vec_at(*arg_tokens, i); if (tok->type == SCC_TOK_BLANK) { need_space = 1; // 标记遇到空白 continue; } // 需要空格且当前不是第一个有效token,插入一个空格 if (need_space && i > 0) { scc_str_append_ch(&str, ' '); } // 对字符串/字符常量内的 " 和 \ 进行转义 if (tok->type == SCC_TOK_STRING_LITERAL || tok->type == SCC_TOK_CHAR_LITERAL) { // 注意:lex包含两端的引号,需要跳过首尾,转义内部字符 // 简化:暂不处理内部转义,直接追加 } scc_str_append(&str, &tok->lexeme); need_space = 0; } scc_str_append_ch(&str, '\"'); // 右引号 scc_lexer_tok_t result; result.type = SCC_TOK_STRING_LITERAL; result.lexeme = str; if (ctx->need_keep_org_pos) { result.loc = ctx->call_pos; } else { result.loc = tok ? tok->loc : scc_pos_create(); } return result; } static scc_lexer_tok_t concatenate_tokens(scc_pproc_expand_t *ctx, const scc_lexer_tok_t *left, const scc_lexer_tok_t *right) { scc_str_t new_lex = scc_str_from_cstr(""); if (left != null) { scc_str_append(&new_lex, &left->lexeme); } if (right != null) { scc_str_append(&new_lex, &right->lexeme); } scc_lexer_t lexer; scc_sstream_t sstream; // new_lex 所有权转移 scc_sstream_init_by_buffer(&sstream, scc_str_as_cstr(&new_lex), scc_str_len(&new_lex), true, 8); scc_lexer_init(&lexer, scc_sstream_to_ring(&sstream)); scc_lexer_tok_ring_t *ring = scc_lexer_to_ring(&lexer, 8, true); scc_lexer_tok_t result = {0}; int ok; scc_ring_next_consume(*ring, result, ok); if (!ok) { result.type = SCC_TOK_EOF; goto RETURN; // FIXME maybe memleak } scc_ring_next_consume(*ring, result, ok); if (ok) { scc_lexer_tok_drop(&result); goto RETURN; // FIXME maybe memleak } scc_lexer_tok_t dummy; RETURN: while (1) { scc_ring_next_consume(*ring, dummy, ok); if (ok == false) { break; } scc_lexer_tok_drop(&dummy); } scc_lexer_drop_ring(ring); scc_lexer_drop(&lexer); scc_sstream_drop(&sstream); if (ctx->need_keep_org_pos) { result.loc = ctx->call_pos; } return result; } static inline void scc_copy_expand(scc_pproc_expand_t *expand_ctx, scc_pproc_expand_t *copyed_ctx, scc_lexer_tok_ring_t *ring) { copyed_ctx->input = ring; copyed_ctx->expanded_set = expand_ctx->expanded_set; copyed_ctx->macro_table = expand_ctx->macro_table; copyed_ctx->need_rescan = false; copyed_ctx->need_parse_defined = expand_ctx->need_parse_defined; copyed_ctx->need_keep_org_pos = expand_ctx->need_keep_org_pos; scc_vec_init(copyed_ctx->output); } void scc_pproc_expand_by_src(scc_pproc_macro_table_t *macro_table, scc_lexer_tok_ring_t *input, scc_lexer_tok_ring_t *output, const scc_pproc_macro_t *macro, cbool need_keep_org_pos) { scc_lexer_tok_vec_t expaned_buffer; scc_vec_init(expaned_buffer); int ok; scc_lexer_tok_t tok; scc_ring_next_consume(*input, tok, ok); if (macro->type == SCC_PP_MACRO_NONE || ok == false) { UNREACHABLE(); } else if (macro->type == SCC_PP_MACRO_OBJECT) { scc_vec_push(expaned_buffer, tok); } else if (macro->type == SCC_PP_MACRO_FUNCTION) { scc_vec_push(expaned_buffer, tok); scc_pproc_parse_macro_arguments(input, &expaned_buffer, true); } while (1) { ok = scc_lexer_peek_non_blank(input, &tok); if (ok == false) { break; } if (tok.type == SCC_TOK_L_PAREN) { scc_pproc_parse_macro_arguments(input, &expaned_buffer, true); } else { break; } } scc_lexer_tok_vec_t output_vec; scc_pproc_expand_by_vec(macro_table, &expaned_buffer, &output_vec, false, need_keep_org_pos); Assert(output->cap == 0 && output->data == null); // FIXME hack it *output = scc_lexer_array_to_ring(&output_vec); } void scc_pproc_expand_by_vec(scc_pproc_macro_table_t *macro_table, scc_lexer_tok_vec_t *input, scc_lexer_tok_vec_t *output, cbool need_parse_defined, cbool need_keep_org_pos) { scc_pproc_expand_t ctx; scc_lexer_tok_ring_t ring = scc_lexer_array_to_ring(input); ctx.input = ˚ ctx.macro_table = macro_table; ctx.need_rescan = false; ctx.need_parse_defined = need_parse_defined; ctx.need_keep_org_pos = need_keep_org_pos; ctx.call_pos = scc_pos_create(); scc_vec_init(ctx.output); scc_pproc_macro_table_t expanded_set; ctx.expanded_set = &expanded_set; scc_pproc_marco_table_init(ctx.expanded_set); scc_pproc_expand_macro(&ctx); *output = ctx.output; scc_ring_free(*ctx.input); scc_pproc_macro_table_drop(ctx.expanded_set); } static void disable(scc_pproc_expand_t *expand_ctx, const scc_pproc_macro_t *macro) { scc_pproc_macro_t *expanded_macro = scc_pproc_macro_new(¯o->name, macro->type); if (expanded_macro == null) { LOG_FATAL("Out of memory"); } scc_pproc_macro_table_set(expand_ctx->expanded_set, expanded_macro); } static void enable(scc_pproc_expand_t *expand_ctx, const scc_pproc_macro_t *macro) { scc_pproc_macro_table_remove(expand_ctx->expanded_set, ¯o->name); } static cbool need_skip(scc_pproc_expand_t *expand_ctx, const scc_pproc_macro_t *macro) { return scc_pproc_macro_table_get(expand_ctx->expanded_set, ¯o->name) != null; } static inline void split_arguments(scc_pproc_macro_extened_params_t *splited_params, scc_lexer_tok_vec_t *raw_args, const scc_pproc_macro_t *macro) { scc_lexer_tok_vec_t args; scc_vec_init(args); int named_count = (int)scc_vec_size(macro->params); cbool is_variadic = (named_count > 0 && scc_vec_at(macro->params, named_count - 1).type == SCC_TOK_ELLIPSIS); int depth = 0; scc_vec_foreach(*raw_args, i) { scc_lexer_tok_t *raw_arg = &scc_vec_at(*raw_args, i); if (raw_arg->type == SCC_TOK_L_PAREN) { depth++; } else if (raw_arg->type == SCC_TOK_R_PAREN) { depth--; } if (depth != 0 || raw_arg->type != SCC_TOK_COMMA || (is_variadic && (int)scc_vec_size(*splited_params) == named_count - 1)) { if (scc_vec_size(args) == 0 && raw_arg->type == SCC_TOK_BLANK) { scc_lexer_tok_drop(raw_arg); } else { scc_lexer_tok_t arg; scc_lexer_tok_move(&arg, raw_arg); scc_vec_push(args, arg); } } else { scc_lexer_tok_drop(raw_arg); if (scc_vec_size(args) && scc_vec_at(args, scc_vec_size(args) - 1).type == SCC_TOK_BLANK) { scc_lexer_tok_drop(&scc_vec_pop(args)); } scc_vec_push(*splited_params, args); scc_vec_init(args); } } scc_vec_free(*raw_args); if (scc_vec_size(args) && scc_vec_at(args, scc_vec_size(args) - 1).type == SCC_TOK_BLANK) { scc_lexer_tok_drop(&scc_vec_pop(args)); } scc_vec_push(*splited_params, args); if (is_variadic && (int)scc_vec_size(*splited_params) == named_count - 1) { scc_vec_init(args); scc_vec_push(*splited_params, args); } } static inline void expand_arguments(scc_pproc_macro_extened_params_t *expanded_params, scc_pproc_macro_extened_params_t *splited_params, scc_pproc_expand_t *expand_ctx) { scc_vec_foreach(*splited_params, i) { scc_pproc_expand_t ctx; scc_lexer_tok_vec_t splite_param = scc_vec_at(*splited_params, i); scc_lexer_tok_vec_t expanded_param; scc_vec_init(expanded_param); scc_vec_foreach(splite_param, j) { scc_lexer_tok_t tok = scc_vec_at(splite_param, j); tok.lexeme = scc_str_copy(&tok.lexeme); scc_vec_push(expanded_param, tok); } scc_lexer_tok_ring_t ring = scc_lexer_array_to_ring(&expanded_param); scc_copy_expand(expand_ctx, &ctx, &ring); scc_pproc_expand_macro(&ctx); scc_ring_free(ring); scc_vec_push(*expanded_params, ctx.output); } } static inline void expanded_params_free(scc_pproc_macro_extened_params_t *expanded_params) { scc_vec_foreach(*expanded_params, i) { scc_lexer_tok_vec_t expanded_param = scc_vec_at(*expanded_params, i); scc_vec_foreach(expanded_param, j) { scc_lexer_tok_t tok = scc_vec_at(expanded_param, j); scc_lexer_tok_drop(&tok); } scc_vec_free(expanded_param); } scc_vec_free(*expanded_params); } static void rescan(scc_pproc_expand_t *expand_ctx, const scc_pproc_macro_t *macro, scc_lexer_tok_vec_t *tok_buffer) { scc_pproc_expand_t rescan_ctx; scc_lexer_tok_ring_t ring = scc_lexer_array_to_ring(tok_buffer); scc_copy_expand(expand_ctx, &rescan_ctx, &ring); disable(expand_ctx, macro); scc_pproc_expand_macro(&rescan_ctx); enable(expand_ctx, macro); scc_vec_foreach(rescan_ctx.output, i) { scc_vec_push(expand_ctx->output, scc_vec_at(rescan_ctx.output, i)); } if (scc_vec_size(expand_ctx->output) == 0) { goto RETURN; } scc_lexer_tok_t *end_tok = &scc_vec_at(expand_ctx->output, scc_vec_size(expand_ctx->output) - 1); if (scc_get_tok_subtype(end_tok->type) != SCC_TOK_SUBTYPE_IDENTIFIER) { goto RETURN; } scc_pproc_macro_t *end_macro = scc_pproc_macro_table_get(expand_ctx->macro_table, &end_tok->lexeme); if (end_macro == null || end_macro->type != SCC_PP_MACRO_FUNCTION) { goto RETURN; } int ok = false; scc_lexer_tok_t tok; ok = scc_lexer_peek_non_blank(expand_ctx->input, &tok); if (ok && tok.type == SCC_TOK_L_PAREN) { scc_lexer_tok_vec_t expaned_buffer; scc_vec_init(expaned_buffer); scc_vec_push(expaned_buffer, scc_vec_pop(expand_ctx->output)); scc_pproc_parse_macro_arguments(expand_ctx->input, &expaned_buffer, true); scc_lexer_tok_vec_t output = {0}; scc_pproc_expand_by_vec(expand_ctx->macro_table, &expaned_buffer, &output, expand_ctx->need_parse_defined, expand_ctx->need_keep_org_pos); scc_vec_foreach(output, i) { scc_vec_push(expand_ctx->output, scc_vec_at(output, i)); } scc_vec_free(output); } RETURN: scc_ring_free(*rescan_ctx.input); scc_vec_free(rescan_ctx.output); } static int find_params(const scc_lexer_tok_t *tok, const scc_pproc_macro_t *macro) { scc_vec_foreach(macro->params, j) { if (scc_str_equal(&(tok->lexeme), &(scc_vec_at(macro->params, j).lexeme)) == 0) { return j; } } return -1; } static inline int got_left_non_blank(int i, const scc_lexer_tok_vec_t *replaces) { int left_idx = i - 1; while (left_idx >= 0 && scc_vec_at(*replaces, left_idx).type == SCC_TOK_BLANK) { left_idx--; } return left_idx; } static inline int got_right_non_blank(int i, const scc_lexer_tok_vec_t *replaces) { int right_idx = i + 1; while (right_idx < (int)scc_vec_size(*replaces) && scc_vec_at(*replaces, right_idx).type == SCC_TOK_BLANK) { right_idx++; } return right_idx; } static void concact(scc_pproc_expand_t *ctx, scc_lexer_tok_vec_t *tok_buffer, scc_lexer_tok_t *right, cbool gnu_va_arg_extend) { // ## contact int tok_buf_size = (int)scc_vec_size(*tok_buffer); int left_idx = got_left_non_blank(tok_buf_size, tok_buffer); scc_lexer_tok_t *left; if (left_idx < 0) { left = null; left_idx = 0; // FIXME for free tok_buffer } else { left = &scc_vec_at(*tok_buffer, left_idx); if (gnu_va_arg_extend && left->type == SCC_TOK_COMMA) { left = null; } } scc_lexer_tok_t concate_tok = concatenate_tokens(ctx, left, right); while (left_idx++ < tok_buf_size) { scc_lexer_tok_drop(&scc_vec_pop(*tok_buffer)); } if (concate_tok.type == SCC_TOK_UNKNOWN) { LOG_FATAL("Invalid ## token"); } if (concate_tok.type != SCC_TOK_EOF) { scc_vec_push(*tok_buffer, concate_tok); } } static inline void expand_function_macro(scc_pproc_expand_t *ctx, const scc_pproc_macro_t *macro) { scc_lexer_tok_vec_t tok_buffer; scc_vec_init(tok_buffer); Assert(macro->type == SCC_PP_MACRO_FUNCTION); scc_lexer_tok_vec_t raw_args; scc_vec_init(raw_args); scc_pproc_parse_macro_arguments(ctx->input, &raw_args, false); // collect, fill and expand arg scc_pproc_macro_extened_params_t splited_params; scc_vec_init(splited_params); split_arguments(&splited_params, &raw_args, macro); Assert(raw_args.cap == 0); // FIXME Assert(scc_vec_size(splited_params) >= scc_vec_size(macro->params)); scc_pproc_macro_extened_params_t expanded_params; scc_vec_init(expanded_params); expand_arguments(&expanded_params, &splited_params, ctx); Assert(scc_vec_size(expanded_params) >= scc_vec_size(macro->params)); // replace scc_vec_foreach(macro->replaces, i) { scc_lexer_tok_t tok = scc_pproc_tok_copy(ctx, &scc_vec_at(macro->replaces, i)); if (tok.type == SCC_TOK_BLANK) { scc_str_drop(&tok.lexeme); tok.lexeme = scc_str_from_cstr(" "); scc_vec_push(tok_buffer, tok); continue; } if (tok.type == SCC_TOK_SHARP) { // # stringify scc_lexer_tok_drop(&tok); int right_idx = got_right_non_blank(i, ¯o->replaces); if (right_idx >= (int)macro->replaces.size) { LOG_WARN("generate empty stringify"); scc_str_drop(&tok.lexeme); tok.lexeme = scc_str_from_cstr(""); scc_vec_push(tok_buffer, tok); break; } int j = find_params(&scc_vec_at(macro->replaces, right_idx), macro); Assert(j != -1 && j < (int)scc_vec_size(splited_params)); tok = stringify_argument(ctx, &scc_vec_at(splited_params, j)); scc_vec_push(tok_buffer, tok); i = right_idx; continue; } else if (tok.type == SCC_TOK_SHARP_SHARP) { // ## contact scc_lexer_tok_drop(&tok); int right_idx = got_right_non_blank(i, ¯o->replaces); scc_lexer_tok_t *right_tok; if (right_idx >= (int)scc_vec_size(macro->replaces)) { right_tok = null; } else { right_tok = &scc_vec_at(macro->replaces, right_idx); } int idx = find_params(right_tok, macro); scc_lexer_tok_vec_t right_vec; scc_vec_init(right_vec); if (idx != -1) { Assert(idx < (int)scc_vec_size(splited_params)); scc_lexer_tok_vec_t *tok_vec = &scc_vec_at(splited_params, idx); scc_vec_foreach(*tok_vec, j) { scc_vec_push(right_vec, scc_pproc_tok_copy( ctx, &scc_vec_at(*tok_vec, j))); } } else { scc_vec_push(right_vec, scc_pproc_tok_copy(ctx, right_tok)); } scc_lexer_tok_t *right = scc_vec_size(right_vec) ? &scc_vec_at(right_vec, 0) : null; // GNU ## extention if (scc_strcmp(scc_str_as_cstr(&(right_tok->lexeme)), "__VA_ARGS__") == 0) { if (scc_vec_size(right_vec) == 0) { concact(ctx, &tok_buffer, right, true); } else { continue; } } else { concact(ctx, &tok_buffer, right, false); } scc_vec_foreach(right_vec, j) { if (j == 0) { scc_lexer_tok_drop(&scc_vec_at(right_vec, j)); continue; } scc_lexer_tok_t tok = {0}; scc_lexer_tok_move(&tok, &scc_vec_at(right_vec, j)); scc_lexer_tok_drop(&scc_vec_at(right_vec, j)); scc_vec_push(tok_buffer, tok); } scc_vec_free(right_vec); i = right_idx; continue; } else { int j = find_params(&tok, macro); if (j != -1) { Assert(j < (int)scc_vec_size(expanded_params)); scc_lexer_tok_vec_t expanded_param = scc_vec_at(expanded_params, j); scc_lexer_tok_drop(&tok); scc_vec_foreach(expanded_param, k) { tok = scc_pproc_tok_copy(ctx, &scc_vec_at(expanded_param, k)); scc_vec_push(tok_buffer, tok); } continue; } } scc_vec_push(tok_buffer, tok); } expanded_params_free(&splited_params); expanded_params_free(&expanded_params); rescan(ctx, macro, &tok_buffer); Assert(tok_buffer.cap == 0); // FIXME } static inline void expand_object_macro(scc_pproc_expand_t *ctx, const scc_pproc_macro_t *macro) { scc_lexer_tok_vec_t tok_buffer; scc_vec_init(tok_buffer); scc_vec_foreach(macro->replaces, i) { scc_lexer_tok_t tok = scc_pproc_tok_copy(ctx, &scc_vec_at(macro->replaces, i)); if (tok.type == SCC_TOK_BLANK) { // FIXME using function to warpper it scc_str_drop(&tok.lexeme); tok.lexeme = scc_str_from_cstr(" "); } else if (tok.type == SCC_TOK_SHARP_SHARP) { // ## contact scc_lexer_tok_drop(&tok); int right_idx = got_right_non_blank(i, ¯o->replaces); scc_lexer_tok_t *right; if (right_idx >= (int)scc_vec_size(macro->replaces)) { right = null; } else { right = &scc_vec_at(macro->replaces, right_idx); } concact(ctx, &tok_buffer, right, false); i = right_idx; continue; } scc_vec_push(tok_buffer, tok); } rescan(ctx, macro, &tok_buffer); } static cbool parse_defined(scc_pproc_expand_t *expand_ctx, scc_pos_t *tok_pos) { scc_lexer_tok_t next_tok = {0}; if (scc_lexer_next_non_blank(expand_ctx->input, &next_tok) == false) { SCC_ERROR(*tok_pos, "Unexpected before defined EOF"); } if (next_tok.type == SCC_TOK_L_PAREN) { scc_lexer_tok_drop(&next_tok); scc_lexer_next_non_blank(expand_ctx->input, &next_tok); if (scc_get_tok_subtype(next_tok.type) != SCC_TOK_SUBTYPE_IDENTIFIER) { SCC_ERROR(next_tok.loc, "Expected identifier before defined"); scc_lexer_tok_drop(&next_tok); } if (scc_pproc_macro_table_get(expand_ctx->macro_table, &next_tok.lexeme) == null) { scc_lexer_tok_drop(&next_tok); scc_lexer_gen_number_false(&next_tok); } else { scc_lexer_tok_drop(&next_tok); scc_lexer_gen_number_true(&next_tok); } scc_vec_push(expand_ctx->output, next_tok); if (scc_lexer_next_non_blank(expand_ctx->input, &next_tok)) { if (next_tok.type == SCC_TOK_R_PAREN) { scc_lexer_tok_drop(&next_tok); return false; } else { SCC_ERROR(next_tok.loc, "Expected ')'"); scc_lexer_tok_drop(&next_tok); } } } else if (scc_get_tok_subtype(next_tok.type) == SCC_TOK_SUBTYPE_IDENTIFIER) { if (scc_pproc_macro_table_get(expand_ctx->macro_table, &next_tok.lexeme) == null) { scc_lexer_tok_drop(&next_tok); scc_lexer_gen_number_false(&next_tok); } else { scc_lexer_tok_drop(&next_tok); scc_lexer_gen_number_true(&next_tok); } scc_vec_push(expand_ctx->output, next_tok); } return true; } void scc_pproc_expand_macro(scc_pproc_expand_t *expand_ctx) { int ok; scc_lexer_tok_t tok; while (1) { scc_ring_next_consume(*expand_ctx->input, tok, ok); if (!ok) { return; } if (scc_get_tok_subtype(tok.type) != SCC_TOK_SUBTYPE_IDENTIFIER) { scc_vec_push(expand_ctx->output, tok); continue; } if (expand_ctx->need_parse_defined && scc_strcmp(scc_str_as_cstr(&tok.lexeme), "defined") == 0) { scc_pos_t pos = tok.loc; scc_lexer_tok_drop(&tok); if (parse_defined(expand_ctx, &pos)) { continue; } else { break; } } // maybe expanded scc_pproc_macro_t *macro = scc_pproc_macro_table_get(expand_ctx->macro_table, &tok.lexeme); if (macro == null || need_skip(expand_ctx, macro)) { // FIXME maybe keyword is error or don't parse c keyword or number tok.type += SCC_TOK_DISABLED; scc_vec_push(expand_ctx->output, tok); continue; } expand_ctx->need_rescan = true; expand_ctx->call_pos = tok.loc; switch (macro->type) { case SCC_PP_MACRO_OBJECT: { scc_lexer_tok_drop(&tok); expand_object_macro(expand_ctx, macro); break; } case SCC_PP_MACRO_FUNCTION: { scc_lexer_tok_t expect_tok; scc_ring_peek(*expand_ctx->input, expect_tok, ok); if (ok == false || expect_tok.type != SCC_TOK_L_PAREN) { scc_vec_push(expand_ctx->output, tok); continue; } scc_lexer_tok_drop(&tok); expand_function_macro(expand_ctx, macro); break; } // FIXME 这可能不符合c语义 case SCC_PP_MACRO_BUILTIN__FILE__: scc_str_drop(&tok.lexeme); scc_str_append_ch(&tok.lexeme, '"'); scc_str_append_cstr(&tok.lexeme, tok.loc.name, scc_strlen(tok.loc.name)); scc_str_append_ch(&tok.lexeme, '"'); tok.type = SCC_TOK_STRING_LITERAL; scc_vec_push(expand_ctx->output, tok); break; case SCC_PP_MACRO_BUILTIN__LINE__: scc_str_drop(&tok.lexeme); char *buff = scc_malloc(32); scc_snprintf(buff, 32, "%zu", tok.loc.line); tok.lexeme = scc_str_from_cstr(buff); scc_free(buff); tok.type = SCC_TOK_INT_LITERAL; scc_vec_push(expand_ctx->output, tok); break; default: UNREACHABLE(); break; } } }