feat(ir2mcode): 添加IR到机器码转换模块并更新依赖配置

- 新增ir2mcode库用于将IR转换为机器码
- 添加sccf2target依赖以支持目标平台转换
- 在ast库中添加scc_pos依赖支持位置信息
- 更新cbuild.toml配置文件添加新库依赖
- 实现AMD64架构代码生成功能
- 添加寄存器分配器实现栈和寄存器位置管理
- 支持基本的算术运算和内存访问操作
- 添加PE格式目标文件生成支持
This commit is contained in:
zzy
2026-03-20 14:12:25 +08:00
parent 02a6c684f1
commit de6f5d510a
19 changed files with 4046 additions and 290 deletions

21
libs/ir2mcode/cbuild.toml Normal file
View File

@@ -0,0 +1,21 @@
[package]
name = "ir2mcode"
version = "0.1.0"
authors = []
description = ""
dependencies = [
{ name = "scc_utils", path = "../../runtime/scc_utils" },
{ name = "scc_ir", path = "../ir" },
{ name = "scc_mcode", path = "../mcode" },
{ name = "sccf", path = "../sccf" },
{ name = "lexer", path = "../lexer" },
{ name = "pproc", path = "../pproc" },
{ name = "parser", path = "../parser" },
{ name = "ast", path = "../ast" },
{ name = "ast2ir", path = "../ast2ir" },
{ name = "sccf2target", path = "../target/sccf2target" },
]
# features = {}
# default_features = []

View File

@@ -0,0 +1,47 @@
#ifndef __SCC_REG_ALLOC_H__
#define __SCC_REG_ALLOC_H__
#include <scc_core.h>
#include <scc_ir.h>
#include <scc_utils.h>
typedef enum {
SCC_REG_KIND_UNDEF,
SCC_REG_KIND_GPR, ///< 通用寄存器(整数)
SCC_REG_KIND_FPR, ///< 浮点数寄存器
SCC_REG_KIND_STACK, ///< 栈
SCC_REG_KIND_IMM, ///< 整数立即数
SCC_REG_KIND_IMM_FP, ///< 浮点数常量
} scc_reg_kind_t;
typedef struct {
scc_reg_kind_t kind;
usize idx;
} scc_reg_loc_t;
typedef SCC_VEC(scc_reg_loc_t) scc_reg_loc_vec_t;
struct scc_reg_alloc;
typedef struct scc_reg_alloc scc_reg_alloc_t;
typedef scc_hashtable_t *(*scc_reg_alloc_func_t)(
scc_reg_alloc_t *ctx, ///< @param [in] 上下文
scc_ir_func_t *func ///< @param [in] 待处理的 IR 函数
);
typedef struct scc_reg_alloc {
scc_ir_cprog_ctx_t *ir_ctx; ///< IR上下文
scc_hashtable_t node_ref2reg_loc; ///< 输出结果哈希表
scc_reg_loc_vec_t reg_loc_vec;
int gpr_caller_saved; ///< 函数可以随意修改,调用者如果在意需自行保护.
int gpr_callee_saved; ///< 函数必须保护这些寄存器的值.
scc_reg_alloc_func_t reg_alloc_func;
int alloc_stack_size;
} scc_reg_alloc_t;
#define scc_reg_alloc(ctx, func) ((ctx)->reg_alloc_func(ctx, func))
void scc_reg_alloc_init(scc_reg_alloc_t *ctx, scc_reg_alloc_func_t func,
scc_ir_cprog_ctx_t *ir_ctx);
scc_hashtable_t *scc_reg_alloc_with_stack(scc_reg_alloc_t *ctx,
scc_ir_func_t *func);
#endif /* __SCC_REG_ALLOC_H__ */

View File

@@ -0,0 +1,25 @@
#ifndef __SCC_IR2MCODE_H__
#define __SCC_IR2MCODE_H__
#include "reg_alloc.h"
#include <scc_core.h>
#include <scc_ir.h>
#include <scc_mcode.h>
#include <sccf_builder.h>
typedef struct {
scc_ir_cprog_t *cprog;
scc_ir_cprog_ctx_t *ir_ctx;
scc_mcode_t mcode;
sccf_builder_t builder;
scc_reg_alloc_t reg_alloc;
scc_hashtable_t *noderef2regloc;
} scc_ir2mcode_ctx_t;
// amd64
void scc_ir2mcode_init(scc_ir2mcode_ctx_t *ctx, scc_ir_cprog_t *cprog,
scc_ir_cprog_ctx_t *ir_ctx, scc_mcode_arch_t arch);
void scc_ir2mcode(scc_ir2mcode_ctx_t *ctx);
#endif /* __SCC_IR2MCODE_H__ */

View File

@@ -0,0 +1,232 @@
#include <amd64/scc_amd64.h>
#include <amd64/scc_amd64_abi.h>
#include <reg_alloc.h>
#include <scc_ir2mcode.h>
static scc_reg_loc_t *parse_location(scc_ir2mcode_ctx_t *ctx,
scc_ir_bblock_ref_t node_ref) {
scc_ir_node_t *node = scc_ir_ctx_get_node(ctx->ir_ctx, node_ref);
if (node == null) {
LOG_FATAL("invalid node ref");
return null;
}
usize idx = 0;
switch (node->tag) {
case SCC_IR_NODE_CONST_INT:
scc_ir_type_t *type = scc_ir_ctx_get_type(ctx->ir_ctx, node->type);
Assert(type != 0);
Assert(type->tag == SCC_IR_TYPE_U32 || type->tag == SCC_IR_TYPE_I32);
scc_reg_loc_t loc = {.kind = SCC_REG_KIND_IMM,
.idx = (usize)node->data.const_int.int32};
scc_vec_push(ctx->reg_alloc.reg_loc_vec, loc);
idx = scc_vec_size(ctx->reg_alloc.reg_loc_vec);
break;
case SCC_IR_NODE_CONST_UINT:
case SCC_IR_NODE_CONST_FLOAT:
TODO();
break;
default:
idx = (usize)scc_hashtable_get(ctx->noderef2regloc,
(void *)(usize)node_ref);
break;
}
Assert(idx > 0 && idx <= scc_vec_size(ctx->reg_alloc.reg_loc_vec));
return &scc_vec_at(ctx->reg_alloc.reg_loc_vec, idx - 1);
}
static void load_value_to_reg(scc_mcode_t *mcode, scc_reg_loc_t *loc, int reg) {
switch (loc->kind) {
case SCC_REG_KIND_GPR:
if (loc->idx != reg) {
scc_mcode_amd64_mov_r64_r64(mcode, reg, loc->idx);
} else {
TODO();
}
break;
case SCC_REG_KIND_STACK:
scc_mcode_amd64_mov_r64_m64_disp32(mcode, reg, SCC_AMD64_RBP,
-loc->idx);
break;
case SCC_REG_KIND_IMM:
scc_mcode_amd64_mov_r64_imm64(mcode, reg, loc->idx); // 或 imm32
break;
default:
LOG_FATAL("unsupported location");
}
}
static void store_value_from_reg(scc_mcode_t *mcode, scc_reg_loc_t *loc,
int reg) {
switch (loc->kind) {
case SCC_REG_KIND_GPR:
if (loc->idx != reg) {
scc_mcode_amd64_mov_r64_r64(mcode, loc->idx, reg);
}
case SCC_REG_KIND_STACK:
scc_mcode_amd64_mov_m64_disp32_r64(mcode, SCC_AMD64_RBP, -loc->idx,
reg);
break;
case SCC_REG_KIND_IMM:
LOG_FATAL("cannot store to immediate");
break;
default:
LOG_FATAL("unsupported location");
}
}
static void parse_node(scc_ir2mcode_ctx_t *ctx, scc_ir_bblock_ref_t node_ref,
int idx) {
scc_ir_node_t *node = scc_ir_ctx_get_node(ctx->ir_ctx, node_ref);
if (node == null) {
LOG_ERROR("invalid node ref");
return;
}
switch (node->tag) {
case SCC_IR_NODE_CONV: ///< 类型转换
case SCC_IR_NODE_FUNC_ARG_REF: ///< 函数参数引用
case SCC_IR_NODE_BLOCK_ARG_REF: ///< 基本块参数引用
LOG_ERROR("Unsupported node type: %d", node->tag);
break;
case SCC_IR_NODE_ALLOC: ///< 分配内存(stack)
case SCC_IR_NODE_GLOBAL_ALLOC: ///< 全局分配(bss)
break;
case SCC_IR_NODE_LOAD: ///< 加载数据
{
// node->data.load.target
scc_reg_loc_t *from = parse_location(ctx, node->data.load.target);
scc_reg_loc_t *to = parse_location(ctx, node_ref);
load_value_to_reg(&ctx->mcode, from, SCC_AMD64_RAX);
store_value_from_reg(&ctx->mcode, to, SCC_AMD64_RAX);
break;
}
case SCC_IR_NODE_STORE: ///< 存储数据
{
scc_reg_loc_t *from = parse_location(ctx, node->data.store.value);
scc_reg_loc_t *to = parse_location(ctx, node->data.store.target);
load_value_to_reg(&ctx->mcode, from, SCC_AMD64_RAX);
store_value_from_reg(&ctx->mcode, to, SCC_AMD64_RAX);
break;
}
case SCC_IR_NODE_GET_PTR: ///< 获取指针
case SCC_IR_NODE_GET_ELEM_PTR: ///< 获取元素指针(used by array)
TODO();
case SCC_IR_NODE_OP: ///< 二元运算
scc_reg_loc_t *loc_lhs = parse_location(ctx, node->data.op.lhs);
scc_reg_loc_t *loc_rhs = parse_location(ctx, node->data.op.rhs);
scc_reg_loc_t *loc_res = parse_location(ctx, node_ref);
// 将左操作数加载到 RAX临时结果寄存器
load_value_to_reg(&ctx->mcode, loc_lhs, SCC_AMD64_RAX);
// 将右操作数加载到 RCX
load_value_to_reg(&ctx->mcode, loc_rhs, SCC_AMD64_RCX);
switch (node->data.op.op) {
case SCC_IR_OP_ADD:
scc_mcode_amd64_add_r64_r64(&ctx->mcode, SCC_AMD64_RAX,
SCC_AMD64_RCX);
break;
case SCC_IR_OP_SUB:
scc_mcode_amd64_sub_r64_r64(&ctx->mcode, SCC_AMD64_RAX,
SCC_AMD64_RCX);
break;
case SCC_IR_OP_MUL:
scc_mcode_amd64_mul_r64(&ctx->mcode, SCC_AMD64_RCX);
break;
default:
LOG_FATAL("unknown op: %d", node->data.op.op);
break;
}
// 将 RAX 中的结果存储到 res 位置
store_value_from_reg(&ctx->mcode, loc_res, SCC_AMD64_RAX);
// [SCC_IR_OP_EMPTY] = "empty", [SCC_IR_OP_NEQ] = "!=",
// [SCC_IR_OP_EQ] = "==", [SCC_IR_OP_GT] = ">",
// [SCC_IR_OP_LT] = "<", [SCC_IR_OP_GE] = ">=",
// [SCC_IR_OP_LE] = "<=", [SCC_IR_OP_ADD] = "+",
// [SCC_IR_OP_SUB] = "-", [SCC_IR_OP_MUL] = "*",
// [SCC_IR_OP_DIV] = "/", [SCC_IR_OP_MOD] = "%",
// [SCC_IR_OP_AND] = "&", [SCC_IR_OP_OR] = "|",
// [SCC_IR_OP_XOR] = "^", [SCC_IR_OP_NOT] = "~",
// [SCC_IR_OP_SHL] = "<<", [SCC_IR_OP_SHR] = ">>",
// [SCC_IR_OP_SAR] = ">>a", // Arithmetic shift right
break;
case SCC_IR_NODE_BRANCH: ///< 有条件分支
case SCC_IR_NODE_JUMP: ///< 无条件跳转
case SCC_IR_NODE_CALL: ///< 调用函数
LOG_ERROR("Unsupported node type: %d", node->tag);
break;
case SCC_IR_NODE_RET: ///< 函数返回
scc_mcode_amd64_mov_r64_r64(&ctx->mcode, SCC_AMD64_RSP, SCC_AMD64_RBP);
scc_mcode_amd64_pop_r64(&ctx->mcode, SCC_AMD64_RBP);
scc_mcode_amd64_ret(&ctx->mcode);
break;
default:
UNREACHABLE();
break;
}
}
static void parse_bblock(scc_ir2mcode_ctx_t *ctx, scc_ir_bblock_t *bblock) {
// 打印基本块中的每条指令
for (usize i = 0; i < scc_vec_size(bblock->instrs); i++) {
scc_ir_node_ref_t node_ref = scc_vec_at(bblock->instrs, i);
parse_node(ctx, node_ref, i);
}
}
static void parse_function(scc_ir2mcode_ctx_t *ctx, scc_ir_func_t *func) {
scc_hashtable_t bblock2offset;
ctx->noderef2regloc = scc_reg_alloc(&ctx->reg_alloc, func);
// 对齐到 16 字节
usize stack_size = (ctx->reg_alloc.alloc_stack_size + 15) & ~15;
scc_mcode_amd64_push_r64(&ctx->mcode, SCC_AMD64_RBP);
scc_mcode_amd64_mov_r64_r64(&ctx->mcode, SCC_AMD64_RBP, SCC_AMD64_RSP);
scc_mcode_amd64_sub_rsp_imm32(&ctx->mcode, stack_size);
for (usize i = 0; i < scc_vec_size(func->bblocks); i++) {
scc_ir_bblock_ref_t bblock_ref = scc_vec_at(func->bblocks, i);
scc_ir_bblock_t *bblock =
scc_ir_ctx_get_bblock(ctx->ir_ctx, bblock_ref);
if (bblock == null) {
LOG_FATAL("<invalid block>\n");
return;
}
parse_bblock(ctx, bblock);
}
}
void scc_ir2amd64(scc_ir2mcode_ctx_t *ctx) {
scc_reg_alloc_init(&ctx->reg_alloc, scc_reg_alloc_with_stack, ctx->ir_ctx);
scc_vec_foreach(ctx->cprog->func_defs, i) {
scc_ir_node_ref_t func_ref = scc_vec_at(ctx->cprog->func_defs, i);
scc_ir_func_t *func = scc_ir_ctx_get_func(ctx->ir_ctx, func_ref);
if (!func) {
LOG_ERROR("invalid function reference");
return;
}
sccf_sym_t sym = {
.sccf_sect_offset = scc_vec_size(ctx->mcode.mcode),
.sccf_sect_type = SCCF_SECT_CODE,
.sccf_sym_bind = SCCF_SYM_BIND_GLOBAL,
.sccf_sym_size = 0,
.sccf_sym_type = SCCF_SYM_TYPE_FUNC,
.sccf_sym_vis = SCCF_SYM_VIS_DEFAULT,
};
sccf_builder_add_symbol(&ctx->builder, func->name, &sym);
parse_function(ctx, func);
}
sccf_sect_data_t text_section;
scc_vec_unsafe_from_buffer(text_section,
scc_vec_unsafe_get_data(ctx->mcode.mcode),
scc_vec_size(ctx->mcode.mcode));
sccf_builder_add_text_section(&ctx->builder, &text_section);
sccf_sect_data_t data_section;
scc_vec_init(data_section);
sccf_builder_add_data_section(&ctx->builder, &data_section);
}

View File

@@ -0,0 +1,59 @@
#include "reg_alloc.h"
u32 hash_func(const void *key) { return (usize)key; }
int equal_func(const void *key1, const void *key2) {
return (usize)key1 - (usize)key2;
}
void scc_reg_alloc_init(scc_reg_alloc_t *ctx, scc_reg_alloc_func_t func,
scc_ir_cprog_ctx_t *ir_ctx) {
ctx->gpr_caller_saved = 0;
ctx->gpr_callee_saved = 0;
ctx->ir_ctx = ir_ctx;
ctx->reg_alloc_func = func;
ctx->alloc_stack_size = 0;
scc_vec_init(ctx->reg_loc_vec);
scc_hashtable_init(&ctx->node_ref2reg_loc, hash_func, equal_func);
}
scc_hashtable_t *scc_reg_alloc_with_stack(scc_reg_alloc_t *ctx,
scc_ir_func_t *func) {
ctx->alloc_stack_size = 0;
scc_hashtable_drop(&ctx->node_ref2reg_loc);
scc_vec_free(ctx->reg_loc_vec);
scc_vec_init(ctx->reg_loc_vec);
scc_vec_foreach(func->bblocks, i) {
scc_ir_bblock_ref_t bblock_ref = scc_vec_at(func->bblocks, i);
scc_ir_bblock_t *bblock =
scc_ir_ctx_get_bblock(ctx->ir_ctx, bblock_ref);
Assert(bblock != null);
scc_vec_foreach(bblock->instrs, j) {
scc_ir_node_ref_t node_ref = scc_vec_at(bblock->instrs, j);
scc_ir_node_t *node = scc_ir_ctx_get_node(ctx->ir_ctx, node_ref);
Assert(node != null);
scc_reg_loc_t loc;
loc.kind = SCC_REG_KIND_UNDEF;
switch (node->tag) {
case SCC_IR_NODE_LOAD:
case SCC_IR_NODE_OP:
case SCC_IR_NODE_ALLOC: {
loc.kind = SCC_REG_KIND_STACK;
loc.idx = ctx->alloc_stack_size;
ctx->alloc_stack_size += 8;
scc_vec_push(ctx->reg_loc_vec, loc);
scc_hashtable_set(&ctx->node_ref2reg_loc,
(void *)(usize)node_ref,
(void *)scc_vec_size(ctx->reg_loc_vec));
break;
}
default:
break;
}
}
}
return &ctx->node_ref2reg_loc;
}

View File

@@ -0,0 +1,12 @@
#include <scc_ir2mcode.h>
void scc_ir2mcode_init(scc_ir2mcode_ctx_t *ctx, scc_ir_cprog_t *cprog,
scc_ir_cprog_ctx_t *ir_ctx, scc_mcode_arch_t arch) {
ctx->cprog = cprog;
ctx->ir_ctx = ir_ctx;
scc_mcode_init(&ctx->mcode, arch);
sccf_builder_init(&ctx->builder);
}
void scc_ir2amd64(scc_ir2mcode_ctx_t *ctx);
void scc_ir2mcode(scc_ir2mcode_ctx_t *ctx) { scc_ir2amd64(ctx); }

View File

@@ -0,0 +1,59 @@
#include <scc_ast2ir.h>
#include <scc_ir2mcode.h>
#include <scc_lexer.h>
#include <scc_parser.h>
#include <sccf2pe.h>
#include <stdio.h>
void test_example(const char *input, cbool need_sema) {
int res = 0;
scc_sstream_t mem_stream;
res = scc_sstream_init_by_buffer(&mem_stream, input, scc_strlen(input),
false, 16);
Assert(res == 0);
scc_lexer_t lexer;
scc_lexer_init(&lexer, scc_sstream_to_ring(&mem_stream));
scc_lexer_tok_ring_t *tok_ring = scc_lexer_to_ring(&lexer, 64, false);
scc_parser_t parser;
if (need_sema) {
scc_sema_callbacks_t sema_callbacks;
scc_sema_init(&sema_callbacks);
scc_parser_init(&parser, tok_ring, &sema_callbacks);
} else {
scc_parser_init(&parser, tok_ring, null);
}
scc_ast_translation_unit_t *tu = scc_parse_translation_unit(&parser);
scc_ast2ir_ctx_t ast2ir_ctx;
#include <abi/win_x64_type_abi.h>
scc_ast2ir_ctx_init(&ast2ir_ctx, scc_win_x64_type_abi);
scc_ast2ir_translation_unit(&ast2ir_ctx, tu);
scc_ir2mcode_ctx_t mcode_ctx;
scc_ir2mcode_init(&mcode_ctx, &ast2ir_ctx.builder.cprog,
&ast2ir_ctx.builder.ctx, SCC_MCODE_ARCH_AMD64);
scc_ir2mcode(&mcode_ctx);
const sccf_t *sccf = sccf_builder_to_sccf(&mcode_ctx.builder);
scc_pe_builder_t pe_builder;
sccf2pe(&pe_builder, sccf);
scc_pe_dump_to_file(&pe_builder, __FILE__ "/../../test.exe");
}
int main() {
test_example("int main() {\n"
" int a;\n"
" int b;\n"
" a = 1 + 2 * 3;\n"
" b = 7;\n"
" a = a - b + 1;\n"
" return a;\n"
"}\n",
true);
return 0;
}