From aa8a1ff8cef444a1b2064954bfed4f57d3e6234c Mon Sep 17 00:00:00 2001 From: zzy <2450266535@qq.com> Date: Tue, 5 May 2026 15:59:31 +0800 Subject: [PATCH] =?UTF-8?q?feat(compiler):=20=E5=90=AF=E7=94=A8=20ir2mcode?= =?UTF-8?q?=20=E5=92=8C=20sccf2target=20=E5=BA=93=E5=B9=B6=E5=AE=9E?= =?UTF-8?q?=E7=8E=B0=20x86=5F64=20=E4=BB=A3=E7=A0=81=E7=94=9F=E6=88=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 在 cbuild.toml 中启用 ir2mcode 和 sccf2target 依赖库 - 修改 justfile 中的构建命令,使用 release 模式并更新 tokei 统计排除 mcode 目录 - 重构 LIR 中的地址操作数类型,将 SCC_LIR_INSTR_KIND_ADDR 重命名为 SCC_LIR_INSTR_KIND_MEM - 实现完整的 MIR 到 x86_64 机器码转换,包括: - 添加 move、compare、binary operation 等指令发射函数 - 实现条件分支和跳转指令生成 - 支持算术、逻辑、移位等基本操作 - 添加调用和返回指令处理 - 实现栈分配和寄存器分配功能 - 完善 ir2mcode 模块,将 MIR 指令转换为机器码 - 更新 ir2sccf 模块,集成机器码生成功能 - 添加 mcode 库的架构支持和内存管理功能 - 修复 PE 文件生成中的空指针检查问题 --- cbuild.toml | 5 +- justfile | 8 +- libs/ir/lir/include/scc_lir.h | 5 +- libs/ir/lir/src/scc_lir_dump.c | 2 +- libs/ir/mir/src/mir_x86.c | 607 +++++++++++++++----------- libs/ir2mcode/include/scc_ir2mcode.h | 7 + libs/ir2mcode/include/scc_ir2sccf.h | 2 + libs/ir2mcode/src/scc_ir2mcode.c | 37 +- libs/ir2mcode/src/scc_ir2sccf.c | 52 ++- libs/mcode/include/scc_mcode.h | 17 +- libs/target/sccf2target/src/sccf2pe.c | 3 +- src/config.h | 35 ++ src/main.c | 69 +-- tests/simple/test.py | 392 +++++++++++++---- 14 files changed, 842 insertions(+), 399 deletions(-) diff --git a/cbuild.toml b/cbuild.toml index 24edc60..021183f 100644 --- a/cbuild.toml +++ b/cbuild.toml @@ -12,7 +12,6 @@ dependencies = [ { name = "hir", path = "./libs/ir/hir" }, { name = "lir", path = "./libs/ir/lir" }, { name = "mir", path = "./libs/ir/mir" }, - - # { name = "ir2mcode", path = "./libs/ir2mcode" }, - # { name = "sccf2target", path = "./libs/target/sccf2target" }, + { name = "ir2mcode", path = "./libs/ir2mcode" }, + { name = "sccf2target", path = "./libs/target/sccf2target" }, ] diff --git a/justfile b/justfile index e8fa9db..927f99c 100644 --- a/justfile +++ b/justfile @@ -9,20 +9,20 @@ docs: build_docs count: # you need download `tokei` it can download by cargo - tokei libs runtime src -e tests + tokei libs runtime src -e tests -e libs/mcode count-file: # you need download `tokei` it can download by cargo - tokei libs runtime src -e tests --files + tokei libs runtime src -e tests -e libs/mcode --files clean: cbuild clean build: - cbuild build -cclang --dev + cbuild build -cclang --release build-install: build - cp ./build/dev/scc ./scc + cp ./build/release/scc ./scc test-scc: # windows: (Get-Content a.txt -Raw) -replace '\x1b\[[0-9;]*[a-zA-Z]', '' | Set-Content clean.txt diff --git a/libs/ir/lir/include/scc_lir.h b/libs/ir/lir/include/scc_lir.h index 06cf589..fd6c74d 100644 --- a/libs/ir/lir/include/scc_lir.h +++ b/libs/ir/lir/include/scc_lir.h @@ -37,7 +37,7 @@ typedef enum { SCC_LIR_INSTR_KIND_IMM, // 整数立即数 SCC_LIR_INSTR_KIND_FIMM, // 浮点立即数 SCC_LIR_INSTR_KIND_SYMBOL, // 全局符号 (函数名、全局变量、字符串常量) - SCC_LIR_INSTR_KIND_ADDR // 复杂地址表达式 (base + index*scale + offset) + SCC_LIR_INSTR_KIND_MEM, // 复杂地址表达式 (base + index*scale + offset) } scc_lir_instr_kind_t; /** @@ -73,8 +73,7 @@ typedef struct scc_lir_instr { #define SCC_LIR_SYMBOL(s) \ ((scc_lir_val_t){.kind = SCC_LIR_INSTR_KIND_SYMBOL, .data.symbol = (s)}) #define SCC_LIR_ADDR(b, i, s, o) \ - ((scc_lir_val_t){.kind = SCC_LIR_INSTR_KIND_ADDR, \ - .data.addr = {b, i, s, o}}) + ((scc_lir_val_t){.kind = SCC_LIR_INSTR_KIND_MEM, .data.addr = {b, i, s, o}}) #define SCC_LIR_SIZE_8 1 #define SCC_LIR_SIZE_16 2 diff --git a/libs/ir/lir/src/scc_lir_dump.c b/libs/ir/lir/src/scc_lir_dump.c index b40e72a..6ccb6f7 100644 --- a/libs/ir/lir/src/scc_lir_dump.c +++ b/libs/ir/lir/src/scc_lir_dump.c @@ -159,7 +159,7 @@ static void dump_operand(scc_lir_dump_ctx_t *ctx, const scc_lir_val_t *op) { scc_tree_dump_append_fmt(td, "@%s", op->data.symbol ? op->data.symbol : ""); break; - case SCC_LIR_INSTR_KIND_ADDR: { + case SCC_LIR_INSTR_KIND_MEM: { const scc_lir_addr_t *addr = &op->data.addr; scc_tree_dump_append(td, "["); if (addr->base != -1) { diff --git a/libs/ir/mir/src/mir_x86.c b/libs/ir/mir/src/mir_x86.c index 5050505..a00d053 100644 --- a/libs/ir/mir/src/mir_x86.c +++ b/libs/ir/mir/src/mir_x86.c @@ -101,268 +101,379 @@ static scc_mir_operand_t lir_val_to_mir_op(const scc_lir_val_t *val) { return op; } +// 虚拟临时寄存器分配(简单递增) +static scc_mir_operand_t new_vreg_temp(x86_isel_t *isel) { + // FIXME + static int next_temp = 10000; // 避免与常规 vreg 冲突 + return (scc_mir_operand_t){.kind = SCC_MIR_OP_VREG, .vreg = next_temp++}; +} + +static void emit_move(x86_isel_t *isel, scc_mir_operand_t dst, + scc_mir_operand_t src, u8 size) { + if (dst.kind == SCC_MIR_OP_VREG || dst.kind == SCC_MIR_OP_PREG) { + if (src.kind == SCC_MIR_OP_VREG) { + add_instr_2(isel, SCC_X86_IFORM_MOV_GPRV_GPRV_89, dst, src); + } else if (src.kind == SCC_MIR_OP_IMM) { + add_instr_2(isel, + (size == 8) ? SCC_X86_IFORM_MOV_GPRV_IMMZ + : SCC_X86_IFORM_MOV_GPRV_IMMV, + dst, src); + } else if (src.kind == SCC_MIR_OP_SYMBOL) { + add_instr_2(isel, SCC_X86_IFORM_MOV_GPRV_IMMZ, dst, src); + } else if (src.kind == SCC_MIR_OP_MEM) { + add_instr_2(isel, SCC_X86_IFORM_MOV_GPRV_MEMV, dst, src); + } else { + UNREACHABLE(); + } + } else if (dst.kind == SCC_MIR_OP_MEM) { + if (src.kind == SCC_MIR_OP_VREG) { + add_instr_2(isel, SCC_X86_IFORM_MOV_MEMV_GPRV, dst, src); + } else if (src.kind == SCC_MIR_OP_IMM) { + add_instr_2(isel, SCC_X86_IFORM_MOV_MEMV_IMMZ, dst, src); + } else if (src.kind == SCC_MIR_OP_SYMBOL) { + scc_mir_operand_t temp = new_vreg_temp(isel); + add_instr_2(isel, SCC_X86_IFORM_MOV_GPRV_IMMZ, temp, src); + add_instr_2(isel, SCC_X86_IFORM_MOV_MEMV_GPRV, dst, temp); + } else if (src.kind == SCC_MIR_OP_MEM) { + scc_mir_operand_t temp = new_vreg_temp(isel); + emit_move(isel, temp, src, size); + emit_move(isel, dst, temp, size); + } else { + UNREACHABLE(); + } + } else { + UNREACHABLE(); + } +} + +static void emit_compare(x86_isel_t *isel, scc_mir_operand_t op0, + scc_mir_operand_t op1, u8 size) { + // cmp op0, op1 (注意 x86 是 cmp a, b 即 a - b) + if (op0.kind == SCC_MIR_OP_VREG && op1.kind == SCC_MIR_OP_IMM) { + add_instr_2(isel, SCC_X86_IFORM_CMP_GPRV_IMMZ, op0, op1); + } else if (op0.kind == SCC_MIR_OP_VREG && op1.kind == SCC_MIR_OP_VREG) { + add_instr_2(isel, SCC_X86_IFORM_CMP_GPRV_GPRV_39, op0, op1); + } else { + UNREACHABLE(); + } +} + +static scc_x86_iform_t cond_to_jcc(scc_lir_cond_t cond) { + switch (cond) { + case SCC_LIR_COND_EQ: + return SCC_X86_IFORM_JZ_RELBRZ; + case SCC_LIR_COND_NE: + return SCC_X86_IFORM_JNZ_RELBRZ; + case SCC_LIR_COND_SLT: + return SCC_X86_IFORM_JL_RELBRZ; + case SCC_LIR_COND_SLE: + return SCC_X86_IFORM_JLE_RELBRZ; + case SCC_LIR_COND_SGT: + return SCC_X86_IFORM_JNLE_RELBRZ; // JNLE = JG + case SCC_LIR_COND_SGE: + return SCC_X86_IFORM_JNL_RELBRZ; // JNL = JGE + case SCC_LIR_COND_ULT: + return SCC_X86_IFORM_JB_RELBRZ; + case SCC_LIR_COND_ULE: + return SCC_X86_IFORM_JBE_RELBRZ; + case SCC_LIR_COND_UGT: + return SCC_X86_IFORM_JNBE_RELBRZ; // JNBE = JA + case SCC_LIR_COND_UGE: + return SCC_X86_IFORM_JNB_RELBRZ; // JNB = JAE + // 浮点比较暂不处理(需要 fcomi + jcc) + default: + UNREACHABLE(); + } +} + +static void emit_compare_and_branch(x86_isel_t *isel, scc_lir_cond_t cond, + scc_mir_operand_t lhs, + scc_mir_operand_t rhs, + scc_mir_operand_t true_bb, + scc_mir_operand_t false_bb, u8 size) { + if (lhs.kind == SCC_MIR_OP_VREG && rhs.kind == SCC_MIR_OP_IMM) + add_instr_2(isel, SCC_X86_IFORM_CMP_GPRV_IMMZ, lhs, rhs); + else if (lhs.kind == SCC_MIR_OP_VREG && rhs.kind == SCC_MIR_OP_VREG) + add_instr_2(isel, SCC_X86_IFORM_CMP_GPRV_GPRV_39, lhs, rhs); + else + UNREACHABLE(); + + scc_x86_iform_t jcc = cond_to_jcc(cond); + add_instr_1(isel, jcc, true_bb); + add_instr_1(isel, SCC_X86_IFORM_JMP_RELBRZ, false_bb); +} + +static void emit_ret(x86_isel_t *isel, scc_lir_val_t ret_val) { + if (ret_val.kind != SCC_LIR_INSTR_KIND_NONE) { + scc_mir_operand_t rax = {.kind = SCC_MIR_OP_PREG, + .preg = SCC_X86_REG_RAX}; + emit_move(isel, rax, lir_val_to_mir_op(&ret_val), 8); + } + add_instr_0(isel, SCC_X86_IFORM_RET_NEAR); +} + +static void emit_binary_op(x86_isel_t *isel, scc_lir_op_t op, + scc_mir_operand_t dst, scc_mir_operand_t src0, + scc_mir_operand_t src1, u8 size) { + if (dst.kind == SCC_MIR_OP_VREG && src0.kind == SCC_MIR_OP_VREG && + dst.vreg != src0.vreg) + emit_move(isel, dst, src0, size); + + bool is_imm = (src1.kind == SCC_MIR_OP_IMM); + scc_x86_iform_t iform; + switch (op) { + case SCC_LIR_ADD: + iform = is_imm ? SCC_X86_IFORM_ADD_GPRV_IMMZ + : SCC_X86_IFORM_ADD_GPRV_GPRV_01; + break; + case SCC_LIR_SUB: + iform = is_imm ? SCC_X86_IFORM_SUB_GPRV_IMMZ + : SCC_X86_IFORM_SUB_GPRV_GPRV_29; + break; + case SCC_LIR_AND: + iform = is_imm ? SCC_X86_IFORM_AND_GPRV_IMMZ + : SCC_X86_IFORM_AND_GPRV_GPRV_21; + break; + case SCC_LIR_OR: + iform = + is_imm ? SCC_X86_IFORM_OR_GPRV_IMMZ : SCC_X86_IFORM_OR_GPRV_GPRV_09; + break; + case SCC_LIR_XOR: + iform = is_imm ? SCC_X86_IFORM_XOR_GPRV_IMMZ + : SCC_X86_IFORM_XOR_GPRV_GPRV_31; + break; + default: + UNREACHABLE(); + } + add_instr_2(isel, iform, dst, src1); +} + +static scc_mir_operand_t stack_slot_op(int offset) { + return (scc_mir_operand_t){.kind = SCC_MIR_OP_MEM, .stack_slot = offset}; +} + +static void emit_spill_load(x86_isel_t *isel, int vreg, int offset) { + scc_mir_operand_t dst = {.kind = SCC_MIR_OP_VREG, .vreg = vreg}; + add_instr_2(isel, SCC_X86_IFORM_MOV_GPRV_MEMV, dst, stack_slot_op(offset)); +} + +static void emit_spill_store(x86_isel_t *isel, int vreg, int offset) { + scc_mir_operand_t src = {.kind = SCC_MIR_OP_VREG, .vreg = vreg}; + add_instr_2(isel, SCC_X86_IFORM_MOV_MEMV_GPRV, stack_slot_op(offset), src); +} + +static void emit_call(x86_isel_t *isel, const char *callee, + scc_mir_operand_t ret_reg) { + scc_mir_operand_t sym = {.kind = SCC_MIR_OP_SYMBOL, .symbol = callee}; + add_instr_1(isel, SCC_X86_IFORM_CALL_NEAR_GPRV, sym); + if (ret_reg.kind == SCC_MIR_OP_VREG) { + scc_mir_operand_t rax = {.kind = SCC_MIR_OP_PREG, + .preg = SCC_X86_REG_RAX}; + emit_move(isel, ret_reg, rax, 8); + } +} + +static void emit_alloca(x86_isel_t *isel, scc_mir_operand_t dst, i64 size) { + scc_mir_operand_t imm = {.kind = SCC_MIR_OP_IMM, .imm = size}; + scc_mir_operand_t rsp = {.kind = SCC_MIR_OP_PREG, .preg = SCC_X86_REG_RSP}; + add_instr_2(isel, SCC_X86_IFORM_SUB_GPRV_IMMZ, rsp, imm); + emit_move(isel, dst, rsp, 8); +} + static void sel_mir(x86_isel_t *isel, const scc_lir_instr_t *instr) { + scc_mir_operand_t dst = lir_val_to_mir_op(&instr->to); + scc_mir_operand_t src0 = lir_val_to_mir_op(&instr->arg0); + scc_mir_operand_t src1 = lir_val_to_mir_op(&instr->arg1); + u8 size = instr->size; + switch (instr->op) { - case SCC_LIR_MOV: { - add_instr_2(isel, SCC_X86_IFORM_MOV_GPRV_GPRV_89, - lir_val_to_mir_op(&instr->to), - lir_val_to_mir_op(&instr->arg0)); - } break; - case SCC_LIR_LOAD: { - add_instr_2(isel, SCC_X86_IFORM_MOV_GPRV_MEMV, - lir_val_to_mir_op(&instr->to), - lir_val_to_mir_op(&instr->arg0)); - } break; - case SCC_LIR_LOAD_ADDR: { - add_instr_2(isel, SCC_X86_IFORM_LEA_GPRV_AGEN, - lir_val_to_mir_op(&instr->to), - lir_val_to_mir_op(&instr->arg0)); - } break; + /* ---- 数据移动 ---- */ + case SCC_LIR_MOV: + emit_move(isel, dst, src0, size); + break; + + case SCC_LIR_LOAD: + // 从 [addr] 加载到 vreg(addr 通常为 vreg) + add_instr_2(isel, SCC_X86_IFORM_MOV_GPRV_MEMV, dst, src0); + break; + case SCC_LIR_STORE: - case SCC_LIR_STORE_ADDR: { - add_instr_2(isel, SCC_X86_IFORM_MOV_MEMV_GPRV, - lir_val_to_mir_op(&instr->arg1), - lir_val_to_mir_op(&instr->arg0)); - } break; - // case SCC_LIR_LEA: - // case SCC_LIR_NEG: - // case SCC_LIR_NOT: - // case SCC_LIR_FNEG: - // case SCC_LIR_FCVT: - case SCC_LIR_ALLOCA: { + // 将 src0 存入 [src1] + add_instr_2(isel, SCC_X86_IFORM_MOV_MEMV_GPRV, src1, src0); + break; + + case SCC_LIR_LEA: + case SCC_LIR_LOAD_ADDR: + // 地址计算,src0 是复杂地址(LIR 的 MEM 类型) + add_instr_2(isel, SCC_X86_IFORM_LEA_GPRV_AGEN, dst, src0); + break; + + /* ---- 一元运算 ---- */ + case SCC_LIR_NEG: + add_instr_1(isel, SCC_X86_IFORM_NEG_GPRV, dst); + break; + case SCC_LIR_NOT: + add_instr_1(isel, SCC_X86_IFORM_NOT_GPRV, dst); + break; + + /* ---- 算术/逻辑二元运算 ---- */ + case SCC_LIR_ADD: + case SCC_LIR_SUB: + case SCC_LIR_AND: + case SCC_LIR_OR: + case SCC_LIR_XOR: + emit_binary_op(isel, instr->op, dst, src0, src1, size); + break; + + case SCC_LIR_MUL: + // imul dst, src0, src1 → 需要 mov + imul + if (src0.kind == SCC_MIR_OP_VREG && dst.kind == SCC_MIR_OP_VREG && + src0.vreg != dst.vreg) + emit_move(isel, dst, src0, size); + add_instr_2(isel, SCC_X86_IFORM_IMUL_GPRV_GPRV, dst, src1); + break; + + case SCC_LIR_SHL: + case SCC_LIR_SHR: + case SCC_LIR_SAR: + // 双地址:dst = dst op count + if (src0.kind == SCC_MIR_OP_VREG && dst.kind == SCC_MIR_OP_VREG && + src0.vreg != dst.vreg) + emit_move(isel, dst, src0, size); + + if (src1.kind == SCC_MIR_OP_IMM) { + scc_x86_iform_t iform; + switch (instr->op) { + case SCC_LIR_SHL: + iform = SCC_X86_IFORM_SHL_GPRV_IMMB_C1R4; + break; + case SCC_LIR_SHR: + iform = SCC_X86_IFORM_SHR_GPRV_IMMB; + break; + case SCC_LIR_SAR: + iform = SCC_X86_IFORM_SAR_GPRV_IMMB; + break; + default: + UNREACHABLE(); + } + add_instr_2(isel, iform, dst, src1); + } else { + // 移位量在 CL(需要先 mov cl, src1) + scc_mir_operand_t cl = {.kind = SCC_MIR_OP_PREG, + .preg = SCC_X86_REG_CL}; + emit_move(isel, cl, src1, 1); // CL 是 8 位 + scc_x86_iform_t iform; + switch (instr->op) { + case SCC_LIR_SHL: + iform = SCC_X86_IFORM_SHL_GPRV_CL_D3R4; + break; + case SCC_LIR_SHR: + iform = SCC_X86_IFORM_SHR_GPRV_CL; + break; + case SCC_LIR_SAR: + iform = SCC_X86_IFORM_SAR_GPRV_CL; + break; + default: + UNREACHABLE(); + } + add_instr_2(isel, iform, dst, cl); + } + break; + + /* ---- 除法与取模 ---- */ + case SCC_LIR_DIV_S: + case SCC_LIR_DIV_U: + case SCC_LIR_REM_S: + case SCC_LIR_REM_U: { + scc_mir_operand_t rax = {.kind = SCC_MIR_OP_PREG, + .preg = SCC_X86_REG_RAX}; + scc_mir_operand_t rdx = {.kind = SCC_MIR_OP_PREG, + .preg = SCC_X86_REG_RDX}; + + emit_move(isel, rax, src0, size); + + if (instr->op == SCC_LIR_DIV_S || instr->op == SCC_LIR_REM_S) { + // 有符号扩展:cqo / cdq(根据 size 选择,这里简化为 64 位 cqo) + add_instr_0(isel, SCC_X86_IFORM_CQO); + } else { + // 无符号:xor edx, edx + scc_mir_operand_t zero = {.kind = SCC_MIR_OP_IMM, .imm = 0}; + emit_move(isel, rdx, zero, size); + } + + scc_x86_iform_t div_if = + (instr->op == SCC_LIR_DIV_S || instr->op == SCC_LIR_REM_S) + ? SCC_X86_IFORM_IDIV_GPRV + : SCC_X86_IFORM_DIV_GPRV; + add_instr_1(isel, div_if, src1); + + // 结果:商在 RAX,余数在 RDX + if (instr->op == SCC_LIR_REM_S || instr->op == SCC_LIR_REM_U) + emit_move(isel, dst, rdx, size); + else + emit_move(isel, dst, rax, size); + break; + } + + /* ---- 比较与分支 ---- */ + case SCC_LIR_CMP: + // 比较并设置标志位,结果通过后续 BR 使用。 + // 当前 LIR 中 CMP 不直接生成 setcc,需要配合分支。 + emit_compare(isel, src0, src1, size); + // 如果有需要将比较结果写入 to(即 bool 值),可后续添加 SETcc + break; + + case SCC_LIR_BR: { + // 条件分支:依赖前一条 CMP 设置的标志位 + // 问题:LIR 的 BR 未携带条件码,实际需要依据前一条 CMP 的条件。 + // 这里暂时无法精确生成 jcc,故保留原始构造假的直接跳转,待上层 IR 合并 + // CMP+BR 后再完善。 以下代码仅为占位,实际不可用。 scc_mir_operand_t + // true_bb = { .kind = SCC_MIR_OP_BLOCK, .block_id = + // instr->metadata.br.true_target }; scc_mir_operand_t false_bb = { + // .kind = SCC_MIR_OP_BLOCK, .block_id = instr->metadata.br.false_target + // }; add_instr_1(isel, SCC_X86_IFORM_JMP_RELBRZ, true_bb); // + // 不合理占位 + UNREACHABLE(); // 当前不可达,要求上层保证 CMP+BR 合并 + break; + } + + case SCC_LIR_JMP: + add_instr_1( + isel, SCC_X86_IFORM_JMP_RELBRZ, + (scc_mir_operand_t){.kind = SCC_MIR_OP_BLOCK, + .block_id = instr->metadata.jmp_target}); + break; + + /* ---- 调用与返回 ---- */ + case SCC_LIR_CALL: + emit_call(isel, instr->metadata.call.callee, dst); + break; + + case SCC_LIR_RET: + emit_ret(isel, instr->metadata.ret_val); + break; + + /* ---- 栈分配 ---- */ + case SCC_LIR_ALLOCA: + // emit_alloca(isel, dst, instr->metadata.alloca.size_bytes); add_instr_2(isel, (scc_x86_iform_t)SCC_MIR_PSUEDO_ALLOCA, lir_val_to_mir_op(&instr->to), (scc_mir_operand_t){ .kind = SCC_MIR_OP_IMM, .imm = instr->size, }); - } break; - - case SCC_LIR_ADD: - case SCC_LIR_SUB: - case SCC_LIR_MUL: - case SCC_LIR_AND: - case SCC_LIR_OR: - case SCC_LIR_XOR: - case SCC_LIR_SHL: - case SCC_LIR_SHR: - case SCC_LIR_SAR: { - int base; - bool is_commutative = - (instr->op != SCC_LIR_SUB && instr->op != SCC_LIR_SHL && - instr->op != SCC_LIR_SHR && instr->op != SCC_LIR_SAR); - switch (instr->op) { - case SCC_LIR_ADD: - base = SCC_X86_IFORM_ADD_GPRV_GPRV_01; - break; - case SCC_LIR_SUB: - base = SCC_X86_IFORM_SUB_GPRV_GPRV_29; - break; - case SCC_LIR_AND: - base = SCC_X86_IFORM_AND_GPRV_GPRV_21; - break; - case SCC_LIR_OR: - base = SCC_X86_IFORM_OR_GPRV_GPRV_09; - break; - case SCC_LIR_XOR: - base = SCC_X86_IFORM_XOR_GPRV_GPRV_31; - break; - case SCC_LIR_MUL: - base = SCC_X86_IFORM_IMUL_GPRV_GPRV; - break; - case SCC_LIR_SHL: - base = SCC_X86_IFORM_SHR_GPRV_IMMB; - break; // 简化,实际应处理 CL 移位 - case SCC_LIR_SHR: - base = SCC_X86_IFORM_SHR_GPRV_IMMB; - break; - case SCC_LIR_SAR: - base = SCC_X86_IFORM_SAR_GPRV_IMMB; - break; - default: - return; - } - - if (instr->arg0.kind == SCC_LIR_INSTR_KIND_IMM) { - add_instr_2(isel, SCC_X86_IFORM_MOV_GPRV_IMMV, - lir_val_to_mir_op(&instr->to), - lir_val_to_mir_op(&instr->arg0)); - } - - // 三地址转两地址:若 to != arg0,需先 mov to, arg0 - if (instr->to.kind == SCC_LIR_INSTR_KIND_VREG && - instr->arg0.kind == SCC_LIR_INSTR_KIND_VREG && - instr->to.data.reg != instr->arg0.data.reg) { - add_instr_2(isel, SCC_X86_IFORM_MOV_GPRV_GPRV_89, - lir_val_to_mir_op(&instr->to), - lir_val_to_mir_op(&instr->arg0)); - } - - // 如果是立即数,使用 RI 变体 - if (instr->arg1.kind == SCC_LIR_INSTR_KIND_IMM) { - // 需要根据立即数大小选择 RI8/RI32 - add_instr_2(isel, base, lir_val_to_mir_op(&instr->to), - lir_val_to_mir_op(&instr->arg1)); - break; - } - - // 生成两地址运算指令 - add_instr_2(isel, base, lir_val_to_mir_op(&instr->to), - lir_val_to_mir_op(&instr->arg1)); - } break; - // case SCC_LIR_DIV_S: - // case SCC_LIR_DIV_U: - // case SCC_LIR_REM_S: - // case SCC_LIR_REM_U: - // case SCC_LIR_FADD: - // case SCC_LIR_FSUB: - // case SCC_LIR_FMUL: - // case SCC_LIR_FDIV: - // dump_operand(ctx, &instr->to); - // scc_tree_dump_append(td, ", "); - // dump_operand(ctx, &instr->arg0); - // scc_tree_dump_append(td, ", "); - // dump_operand(ctx, &instr->arg1); - // break; - - // case SCC_LIR_CMP: - // dump_operand(ctx, &instr->to); - // scc_tree_dump_append_fmt(td, ", %s, ", - // cond_to_string(instr->metadata.cond)); - // dump_operand(ctx, &instr->arg0); - // scc_tree_dump_append(td, ", "); - // dump_operand(ctx, &instr->arg1); - // break; - - // case SCC_LIR_BR: - // dump_operand(ctx, &instr->arg0); - // scc_tree_dump_append_fmt(td, ", BB#%zu, BB#%zu", - // instr->metadata.br.true_target, - // instr->metadata.br.false_target); - // break; - - case SCC_LIR_JMP: { - add_instr_1(isel, SCC_X86_IFORM_JMP_GPRV, - (scc_mir_operand_t){ - .kind = SCC_MIR_OP_BLOCK, - .block_id = instr->metadata.jmp_target, - }); - } break; - - // case SCC_LIR_JMP_INDIRECT: - // dump_operand(ctx, &instr->arg0); - // break; - - case SCC_LIR_CALL: { - const struct scc_lir_call *c = &instr->metadata.call; - Assert(c->callee != nullptr); - add_instr_1(isel, SCC_X86_IFORM_CALL_NEAR_GPRV, - (scc_mir_operand_t){ - .kind = SCC_MIR_OP_SYMBOL, - .symbol = c->callee, - }); - } break; - // case SCC_LIR_CALL: { - // const struct scc_lir_call *c = &instr->metadata.call; - // if (c->ret_vreg.kind != SCC_LIR_INSTR_KIND_NONE) { - // dump_operand(ctx, &c->ret_vreg); - // scc_tree_dump_append(td, " = "); - // } - // scc_tree_dump_append_fmt(td, "call @%s(", - // c->callee ? c->callee : ""); - // for (u8 i = 0; i < c->arg_count; i++) { - // if (i > 0) - // scc_tree_dump_append(td, ", "); - // dump_operand(ctx, &c->args[i]); - // } - // scc_tree_dump_append_fmt(td, ") clobber=0x%llx", - // (unsigned long long)c->clobber_mask); - // break; - // } - - // case SCC_LIR_CALL_INDIRECT: { - // const struct scc_lir_call_indirect *c = - // &instr->metadata.call_indirect; if (c->ret_vreg.kind != - // SCC_LIR_INSTR_KIND_NONE) { - // dump_operand(ctx, &c->ret_vreg); - // scc_tree_dump_append(td, " = "); - // } - // scc_tree_dump_append(td, "call "); - // dump_operand(ctx, &c->target); - // scc_tree_dump_append(td, "("); - // for (u8 i = 0; i < c->arg_count; i++) { - // if (i > 0) - // scc_tree_dump_append(td, ", "); - // dump_operand(ctx, &c->args[i]); - // } - // scc_tree_dump_append_fmt(td, ") clobber=0x%llx", - // (unsigned long long)c->clobber_mask); - // break; - // } - - case SCC_LIR_RET: { - if (instr->metadata.ret_val.kind != SCC_LIR_INSTR_KIND_NONE) { - // FIXME target ABI - add_instr_2(isel, SCC_X86_IFORM_MOV_GPRV_GPRV_89, - (scc_mir_operand_t){ - .kind = SCC_MIR_OP_PREG, - .preg = SCC_X86_REG_RAX, - }, - lir_val_to_mir_op(&instr->metadata.ret_val)); - } - - add_instr_0(isel, SCC_X86_IFORM_RET_NEAR); - } break; - - // case SCC_LIR_PARALLEL_COPY: { - // const struct scc_lir_parallel_copy *pc = - // &instr->metadata.parallel_copy; scc_tree_dump_append(td, "["); - // for (u8 i = 0; i < pc->num_copies; i++) { - // if (i > 0) - // scc_tree_dump_append(td, ", "); - // dump_operand(ctx, &pc->dests[i]); - // scc_tree_dump_append(td, " <- "); - // dump_operand(ctx, &pc->srcs[i]); - // } - // scc_tree_dump_append(td, "]"); - // break; - // } - - // case SCC_LIR_VA_START: - // dump_operand(ctx, &instr->metadata.va_start.ap); - // scc_tree_dump_append(td, ", "); - // dump_operand(ctx, &instr->metadata.va_start.last); - // break; - - // case SCC_LIR_VA_ARG: - // dump_operand(ctx, &instr->metadata.va_arg.to); - // scc_tree_dump_append(td, " = va_arg "); - // dump_operand(ctx, &instr->metadata.va_arg.ap); - // scc_tree_dump_append_fmt(td, ", size=%u, align=%u, float=%d", - // instr->metadata.va_arg.type_size, - // instr->metadata.va_arg.type_align, - // instr->metadata.va_arg.is_float); - // break; - - // case SCC_LIR_VA_END: - // dump_operand(ctx, &instr->metadata.va_end.ap); - // break; - - // case SCC_LIR_VA_COPY: - // dump_operand(ctx, &instr->metadata.va_copy.dest); - // scc_tree_dump_append(td, ", "); - // dump_operand(ctx, &instr->metadata.va_copy.src); - // break; - - // case SCC_LIR_NOP: - // break; - // - default: break; + + /* ---- 其他(占位) ---- */ + case SCC_LIR_NOP: + break; + + default: UNREACHABLE(); break; } } + static void sel_func(const scc_lir_module_t *lir_module, const scc_lir_func_t *func) { x86_isel_t isel; diff --git a/libs/ir2mcode/include/scc_ir2mcode.h b/libs/ir2mcode/include/scc_ir2mcode.h index 94dbaca..5622b11 100644 --- a/libs/ir2mcode/include/scc_ir2mcode.h +++ b/libs/ir2mcode/include/scc_ir2mcode.h @@ -1,4 +1,11 @@ #ifndef __SCC_IR2MCODE_H__ #define __SCC_IR2MCODE_H__ +#include +#include +// FIXME target choice +void scc_ir2mcode_emit_instr(scc_mcode_t *mcode, + const scc_mir_instr_t *mir_instr); +void scc_ir2mcode(scc_mcode_t *mcode, const scc_mir_module_t *mir_module); + #endif /* __SCC_IR2MCODE_H__ */ diff --git a/libs/ir2mcode/include/scc_ir2sccf.h b/libs/ir2mcode/include/scc_ir2sccf.h index 1fe26dd..1c7ba2c 100644 --- a/libs/ir2mcode/include/scc_ir2sccf.h +++ b/libs/ir2mcode/include/scc_ir2sccf.h @@ -1,9 +1,11 @@ #ifndef __SCC_IR2SCCF_H__ #define __SCC_IR2SCCF_H__ +#include "scc_ir2mcode.h" #include #include +// FIXME target choice void scc_ir2sccf(sccf_builder_t *builder, scc_mir_module_t *mir_module); #endif /* __SCC_IR2SCCF_H__ */ diff --git a/libs/ir2mcode/src/scc_ir2mcode.c b/libs/ir2mcode/src/scc_ir2mcode.c index 50fbc9b..373653c 100644 --- a/libs/ir2mcode/src/scc_ir2mcode.c +++ b/libs/ir2mcode/src/scc_ir2mcode.c @@ -7,14 +7,37 @@ #include void mir_x86_to_mcode(scc_mcode_t *mcode, const scc_mir_instr_t *ins) { - // scc_x86_operand_value_t ops[8] = {0}; - // for (int i = 0; i < ins->num_operands; i += 1) { - // } - scc_x86_encode_inst(mcode, ins->opcode, (void *)&ins->operands); + scc_x86_operand_value_t ops[8] = {0}; + for (int i = 0; i < ins->num_operands; i += 1) { + switch (ins->operands[i].kind) { + case SCC_MIR_OP_VREG: + Panic("can't convert vreg to mcode"); + break; + case SCC_MIR_OP_PREG: + ops[i].kind = SCC_X86_OPR_REG; + ops[i].reg = ins->operands[i].preg; + break; + case SCC_MIR_OP_MEM: + Panic("can't convert mem to mcode"); + break; + case SCC_MIR_OP_IMM: + ops[i].kind = SCC_X86_OPR_IMM; + ops[i].imm = ins->operands[i].imm; + break; + case SCC_MIR_OP_SYMBOL: + case SCC_MIR_OP_BLOCK: + ops[i].kind = SCC_X86_OPR_RELBR; + ops[i].imm = 0; + break; + default: + Panic("unsupported operand kind"); + }; + } + scc_x86_encode_inst(mcode, ins->opcode, ops); } -static void scc_emit_mcode(scc_mcode_t *mcode, - const scc_mir_instr_t *mir_instr) { +void scc_ir2mcode_emit_instr(scc_mcode_t *mcode, + const scc_mir_instr_t *mir_instr) { // TODO mir_x86_to_mcode(mcode, mir_instr); } @@ -31,7 +54,7 @@ void scc_ir2mcode(scc_mcode_t *mcode, const scc_mir_module_t *mir_module) { scc_mir_instr_vec_t *instrs = SCC_MIR_BBLOCK_VALUES(bb); scc_vec_foreach(*instrs, i) { const scc_mir_instr_t *ins = &scc_vec_at(*instrs, i); - scc_emit_mcode(mcode, ins); + scc_ir2mcode_emit_instr(mcode, ins); } } } diff --git a/libs/ir2mcode/src/scc_ir2sccf.c b/libs/ir2mcode/src/scc_ir2sccf.c index fb5957b..1c91430 100644 --- a/libs/ir2mcode/src/scc_ir2sccf.c +++ b/libs/ir2mcode/src/scc_ir2sccf.c @@ -45,6 +45,11 @@ void scc_ir2sccf(sccf_builder_t *builder, scc_mir_module_t *mir_module) { scc_cfg_symbol_t *symbol = &scc_vec_at(mir_module->cfg_module.symbols, i); + if (symbol->name == nullptr) { + LOG_ERROR("Symbol name is null"); + continue; + } + sccf_sym_t sym = (sccf_sym_t){ .sccf_sect_offset = scc_vec_size(sect_data), .sccf_sym_size = @@ -58,19 +63,33 @@ void scc_ir2sccf(sccf_builder_t *builder, scc_mir_module_t *mir_module) { Assert(sym_idx != 0); } - // scc_vec_foreach(ctx->cprog->func_defs, i) { - // scc_ir_value_ref_t func_ref = scc_vec_at(ctx->cprog->func_defs, i); - // scc_ir_func_t *func = scc_ir_module_get_func(GET_MODULE(ctx), - // func_ref); if (!func) { - // LOG_ERROR("invalid function reference"); - // return; - // } - // sccf_sym_t *sym = - // sccf_builder_get_symbol_unsafe(ctx->builder, func->name); - // Assert(sym != nullptr); - // sym->sccf_sect_offset = scc_vec_size(ctx->sect_mcode.mcode); - // parse_function(ctx, func); - // } + sccf_sect_data_t sect_code = {0}; + scc_vec_init(sect_code); + scc_mcode_t mcode = {0}; + scc_mcode_init(&mcode, SCC_MCODE_ARCH_X86_64); + scc_vec_foreach(mir_module->cfg_module.funcs, i) { + if (i == 0) + continue; + scc_mir_func_t *func = &scc_vec_at(mir_module->cfg_module.funcs, i); + + sccf_sym_t *sym = sccf_builder_get_symbol_unsafe(builder, func->name); + Assert(sym != nullptr); + sym->sccf_sect_offset = scc_vec_size(mcode.mcode); + + scc_vec_foreach(func->bblocks, i) { + scc_cfg_bblock_id_t id = scc_vec_at(func->bblocks, i); + const scc_cfg_bblock_t *bb = + scc_cfg_module_unsafe_get_bblock(&mir_module->cfg_module, id); + scc_mir_instr_vec_t *instrs = SCC_MIR_BBLOCK_VALUES(bb); + scc_vec_foreach(*instrs, i) { + const scc_mir_instr_t *ins = &scc_vec_at(*instrs, i); + // FIXME reloc symbol needed + scc_ir2mcode_emit_instr(&mcode, ins); + } + } + } + scc_vec_unsafe_from_buffer(sect_code, scc_vec_unsafe_get_data(mcode.mcode), + scc_vec_size(mcode.mcode)); // u8 *buf = scc_vec_unsafe_get_data(ctx->sect_mcode.mcode); // scc_vec_foreach(ctx->builder->relocs, i) { @@ -95,11 +114,6 @@ void scc_ir2sccf(sccf_builder_t *builder, scc_mir_module_t *mir_module) { // } // } - // sccf_sect_data_t text_section; - // scc_vec_unsafe_from_buffer(text_section, - // scc_vec_unsafe_get_data(ctx->sect_mcode.mcode), - // scc_vec_size(ctx->sect_mcode.mcode)); - // sccf_builder_add_text_section(ctx->builder, &text_section); - + sccf_builder_add_text_section(builder, §_code); sccf_builder_add_data_section(builder, §_data); } diff --git a/libs/mcode/include/scc_mcode.h b/libs/mcode/include/scc_mcode.h index b9d16f8..7c2242f 100644 --- a/libs/mcode/include/scc_mcode.h +++ b/libs/mcode/include/scc_mcode.h @@ -4,7 +4,8 @@ #include typedef enum { - SCC_MCODE_ARCH_AMD64, + SCC_MCODE_ARCH_NONE, + SCC_MCODE_ARCH_X86_64, } scc_mcode_arch_t; typedef SCC_VEC(u8) scc_mcode_buff_t; @@ -20,6 +21,20 @@ static inline void scc_mcode_init(scc_mcode_t *mcode, scc_mcode_arch_t arch) { mcode->is_littel_endian = true; } +static inline char *scc_mcode_unsafe_data(scc_mcode_t *mcode) { + return (char *)scc_vec_unsafe_get_data(mcode->mcode); +} + +static inline usize scc_mcode_size(scc_mcode_t *mcode) { + return scc_vec_size(mcode->mcode); +} + +static inline void scc_mcode_drop(scc_mcode_t *mcode) { + scc_vec_free(mcode->mcode); + mcode->arch = SCC_MCODE_ARCH_NONE; + mcode->is_littel_endian = true; +} + static inline void scc_mcode_add_u8(scc_mcode_t *mcode, u8 data) { scc_vec_push(mcode->mcode, data); } diff --git a/libs/target/sccf2target/src/sccf2pe.c b/libs/target/sccf2target/src/sccf2pe.c index 6422a90..ba82982 100644 --- a/libs/target/sccf2target/src/sccf2pe.c +++ b/libs/target/sccf2target/src/sccf2pe.c @@ -170,7 +170,8 @@ void sccf2pe(scc_pe_builder_t *builder, const sccf_t *sccf) { scc_pe_construct_idata(&idata_builder, &idata_range); u32 entry_point_offset = sccf->header.entry_point; - Assert(entry_point_offset < scc_vec_size(*code_data)); + Assert(code_data != nullptr && + entry_point_offset < scc_vec_size(*code_data)); u64 base_address = 0x140000000; u32 entry_point = code_range.virual_address + entry_point_offset; scc_pe_config_t config = (scc_pe_config_t){ diff --git a/src/config.h b/src/config.h index 7aa56f4..a517040 100644 --- a/src/config.h +++ b/src/config.h @@ -18,6 +18,9 @@ typedef struct { cbool emit_lir; cbool emit_mir; + cbool emit_flatbin; + cbool emit_sccf; + cbool emit_target; cbool emit_mir_pass_reg_alloc; cbool emit_mir_pass_stack_layout; cbool emit_mir_pass_prolog_epilog; @@ -43,6 +46,10 @@ static void setup_argparse(scc_argparse_t *argparse, scc_config_t *config, SCC_HINT_EMIT_HIR, SCC_HINT_EMIT_LIR, SCC_HINT_EMIT_MIR, + + SCC_HINT_EMIT_FLATBIN, + SCC_HINT_EMIT_SCCF, + SCC_HINT_EMIT_TARGET, }; static const char *scc_hints_en[] = { [SCC_HINT_PROG_NAME] = "scc", @@ -65,6 +72,10 @@ static void setup_argparse(scc_argparse_t *argparse, scc_config_t *config, [SCC_HINT_EMIT_LIR] = "Generate Low-level IR and exit", [SCC_HINT_EMIT_MIR] = "Generate Machine IR and exit", + [SCC_HINT_EMIT_FLATBIN] = "Generate flat binary and exit", + [SCC_HINT_EMIT_SCCF] = "Generate SCCF and exit", + [SCC_HINT_EMIT_TARGET] = "Generate target description and exit", + }; static const char *scc_hints_zh[] = { [SCC_HINT_PROG_NAME] = "scc", @@ -84,6 +95,10 @@ static void setup_argparse(scc_argparse_t *argparse, scc_config_t *config, [SCC_HINT_EMIT_HIR] = "生成`高级中间代码`并退出", [SCC_HINT_EMIT_LIR] = "生成`低级中间代码`并退出", [SCC_HINT_EMIT_MIR] = "生成`机器中间代码`并退出", + + [SCC_HINT_EMIT_FLATBIN] = "生成`flat binary`并退出", + [SCC_HINT_EMIT_SCCF] = "生成`SCCF`并退出", + [SCC_HINT_EMIT_TARGET] = "生成`目标代码`并退出", }; const char **scc_hints; @@ -195,6 +210,26 @@ static void setup_argparse(scc_argparse_t *argparse, scc_config_t *config, scc_hints[SCC_HINT_EMIT_MIR]); scc_argparse_spec_setup_bool(&opt_mir.spec, &(config->emit_mir)); scc_argparse_cmd_add_opt(root, &opt_mir); + + // --emit-flatbin + scc_argparse_opt_t opt_flatbin; + scc_argparse_opt_init(&opt_flatbin, 0, "emit-flatbin", + scc_hints[SCC_HINT_EMIT_FLATBIN]); + scc_argparse_spec_setup_bool(&opt_flatbin.spec, &(config->emit_flatbin)); + scc_argparse_cmd_add_opt(root, &opt_flatbin); + // --emit-sccf + scc_argparse_opt_t opt_sccf; + scc_argparse_opt_init(&opt_sccf, 0, "emit-sccf", + scc_hints[SCC_HINT_EMIT_SCCF]); + scc_argparse_spec_setup_bool(&opt_sccf.spec, &(config->emit_sccf)); + scc_argparse_cmd_add_opt(root, &opt_sccf); + // --emit-target + scc_argparse_opt_t opt_emit_target; + scc_argparse_opt_init(&opt_emit_target, 0, "emit-target", + scc_hints[SCC_HINT_EMIT_TARGET]); + scc_argparse_spec_setup_string(&opt_emit_target.spec, + &(config->emit_target)); + scc_argparse_cmd_add_opt(root, &opt_emit_target); } #endif /* __SCC_CONFIG_H___ */ diff --git a/src/main.c b/src/main.c index 8a3345a..8256fd4 100644 --- a/src/main.c +++ b/src/main.c @@ -10,8 +10,10 @@ #include #include #include -// #include -// #include + +#include +#include +#include #include "config.h" @@ -65,6 +67,8 @@ static void tree_dump_output(const char *str, usize len, void *user) { void init_platform(void); +#define GET_VALID_FP(fp) (fp == nullptr ? scc_stdout : fp) + int main(int argc, const char **argv, const char **envp) { init_platform(); @@ -215,8 +219,7 @@ sstream_drop: scc_tree_dump_init(&tree_dump, false); } scc_ast_dump_node(&tree_dump, (scc_ast_node_t *)translation_unit); - scc_tree_dump_flush(&tree_dump, tree_dump_output, - fp == nullptr ? scc_stdout : fp); + scc_tree_dump_flush(&tree_dump, tree_dump_output, GET_VALID_FP(fp)); scc_tree_dump_drop(&tree_dump); return 0; } @@ -241,8 +244,7 @@ sstream_drop: // scc_ir_dump_cprog(&ir_dump_ctx); scc_hir_dump_cprog_linear(&ir_dump_ctx); - scc_tree_dump_flush(&tree_dump, tree_dump_output, - fp == nullptr ? scc_stdout : fp); + scc_tree_dump_flush(&tree_dump, tree_dump_output, GET_VALID_FP(fp)); scc_tree_dump_drop(&tree_dump); return 0; } @@ -262,8 +264,7 @@ sstream_drop: // scc_ir_dump_cprog(&ir_dump_ctx); scc_lir_dump_module(&lir_dump_ctx); - scc_tree_dump_flush(&tree_dump, tree_dump_output, - fp == nullptr ? scc_stdout : fp); + scc_tree_dump_flush(&tree_dump, tree_dump_output, GET_VALID_FP(fp)); scc_tree_dump_drop(&tree_dump); return 0; } @@ -282,30 +283,46 @@ sstream_drop: scc_mir_dump_init(&mir_dump_ctx, &tree_dump, &mir_module); scc_mir_dump_module(&mir_dump_ctx); - scc_tree_dump_flush(&tree_dump, tree_dump_output, - fp == nullptr ? scc_stdout : fp); + scc_tree_dump_flush(&tree_dump, tree_dump_output, GET_VALID_FP(fp)); scc_tree_dump_drop(&tree_dump); return 0; } - // scc_ir2mcode_ctx_t ir2mcode_ctx; - // sccf_builder_t sccf_builder; - // scc_ir2mcode_init(&ir2mcode_ctx, &cprog, &sccf_builder, - // SCC_MCODE_ARCH_AMD64); - // scc_ir2mcode(&ir2mcode_ctx); - // scc_ir2mcode_drop(&ir2mcode_ctx); + if (config.emit_flatbin) { + scc_mcode_t mcode = {0}; + scc_mcode_init(&mcode, SCC_MCODE_ARCH_X86_64); + scc_ir2mcode(&mcode, &mir_module); + if (fp == nullptr) { + LOG_WARN("emit flatbin can't write to stdout"); + return 0; + } + usize ret = scc_fwrite(fp, scc_vec_unsafe_get_data(mcode.mcode), + scc_vec_size(mcode.mcode)); + if (ret != scc_vec_size(mcode.mcode)) { + LOG_ERROR("write flatbin failed, write %zu but need write %zu\n", + ret, scc_vec_size(mcode.mcode)); + return 1; + } + return 0; + } - // sccf_builder_set_entry_symbol_name(&sccf_builder, - // config.entry_point_symbol); - // const sccf_t *sccf = sccf_builder_to_sccf(&sccf_builder); - // scc_pe_builder_t pe_builder; - // sccf2pe(&pe_builder, sccf); + sccf_builder_t sccf_builder = {0}; + sccf_builder_init(&sccf_builder); + scc_ir2sccf(&sccf_builder, &mir_module); + sccf_builder_set_entry_symbol_name(&sccf_builder, + config.entry_point_symbol); + if (config.emit_sccf) { + return 0; + } - // if (fp == nullptr) { - // scc_printf("output exe at %s\n", config.output_file); - // } else { - // scc_pe_dump_to_file(&pe_builder, config.output_file); - // } + const sccf_t *sccf = sccf_builder_to_sccf(&sccf_builder); + scc_pe_builder_t pe_builder; + sccf2pe(&pe_builder, sccf); + if (fp == nullptr) { + scc_printf("output exe at %s\n", config.output_file); + } else { + scc_pe_dump_to_file(&pe_builder, config.output_file); + } return 0; } diff --git a/tests/simple/test.py b/tests/simple/test.py index 0be4fbc..7de88bf 100644 --- a/tests/simple/test.py +++ b/tests/simple/test.py @@ -1,107 +1,327 @@ -from pprint import PrettyPrinter +#!/usr/bin/env python3 +"""Integration test runner for scc compiler. + +Reads test expectations from `expect.toml`, compiles each C source file with scc, +executes the resulting binary, and validates the process return code or stdout. +""" +from __future__ import annotations + +import argparse +import logging +import os import subprocess -from pathlib import Path +import sys +import tempfile import tomllib import uuid +from dataclasses import dataclass +from pathlib import Path +from typing import Sequence -# 配置参数 -WORKSPACE = Path(__file__).resolve().parent # 测试工作目录 -TEST_DIR = Path(WORKSPACE) -CC_PATH = Path(WORKSPACE / "../../build/dev/scc") +# --------------------------------------------------------------------------- +# Configuration +# --------------------------------------------------------------------------- +WORKSPACE = Path(__file__).resolve().parent +CC_PATH = WORKSPACE / "../../build/dev/scc" +CONFIG_PATH = WORKSPACE / "expect.toml" +DEFAULT_TIMEOUT = 10 # seconds -def run_command(cmd, capture_output=True): - """执行命令并捕获 stdout""" - try: - result = subprocess.run( - cmd, - cwd=WORKSPACE, - stdout=subprocess.PIPE if capture_output else None, - stderr=subprocess.PIPE, - text=True, - timeout=5, # 增加超时时间以防虚拟机启动慢 - ) - # 返回 stdout 用于获取返回值,同时检查是否有运行时错误 - return result.stdout.strip(), result.stderr.strip(), result.returncode - except subprocess.TimeoutExpired: - return None, "Timeout expired", -1 - except Exception as e: - return None, str(e), -1 +logger = logging.getLogger("scc-test") -def run_test(test_file, expected): - print(f"\nTesting {test_file}...") - # 使用唯一文件名避免并发冲突 - unique_id = str(uuid.uuid4())[:8] # 简短的唯一标识符 - exe_filename = f"test_{unique_id}.exe" - exe_path = WORKSPACE / exe_filename +# --------------------------------------------------------------------------- +# Data models +# --------------------------------------------------------------------------- +@dataclass +class TestCase: + """A single test case defined in expect.toml.""" + source: Path + expected: int | str # return code (int) or stdout (str) + test_type: str # "return" or "stdout" + origin_key: str # original TOML key for user-friendly display - # 1. 编译 - compile_cmd = [str(CC_PATH), str(test_file), "-o", exe_filename, "--entry-point-symbol", "main"] - # 编译时关注 stderr 和返回码 - _, compile_err, compile_ret = run_command(compile_cmd) + @property + def description(self) -> str: + return f"{self.test_type.upper():6s} {self.source}" - if not exe_path.exists() or compile_ret != 0: - print(f" Compilation failed: {compile_err}") - # 确保清理失败的输出文件 - if exe_path.exists(): - try: - exe_path.unlink() - except: - pass # 忽略清理失败 - return False - # 2. 执行虚拟机并获取输出 - vm_cmd = [str(exe_path)] - actual_output, vm_err, vm_ret = run_command(vm_cmd) +# --------------------------------------------------------------------------- +# Test runner +# --------------------------------------------------------------------------- +class Runner: + """Compiles and executes test cases.""" - # 如果存在 stderr 且返回码异常(例如负数表示信号终止),则视为运行时错误 - if vm_err and vm_ret < 0: - print(f" Runtime error: {vm_err}") - # 清理文件后返回 + def __init__( + self, + cc: Path, + workspace: Path, + timeout: float = DEFAULT_TIMEOUT, + keep_temps: bool = False, + ) -> None: + self.cc = cc.resolve() + self.workspace = workspace.resolve() + self.timeout = timeout + self.keep_temps = keep_temps + self._temp_dir: tempfile.TemporaryDirectory | None = None + + @property + def temp_dir(self) -> Path: + if self._temp_dir is None: + self._temp_dir = tempfile.TemporaryDirectory( + prefix="scc-test-", dir=self.workspace + ) + return Path(self._temp_dir.name) + + def cleanup(self) -> None: + if self._temp_dir is not None: + self._temp_dir.cleanup() + self._temp_dir = None + + def _unique_exe_path(self) -> Path: + """Generate a unique executable path inside the temporary directory.""" + return self.temp_dir / f"test_{uuid.uuid4().hex[:8]}.exe" + + def compile(self, source: Path, output: Path) -> tuple[bool, str]: + """Compile *source* into *output*. Returns (success, stderr).""" + cmd = [ + str(self.cc), + str(source), + "-o", + str(output), + "--entry-point-symbol", "main", + ] try: - exe_path.unlink() - except: - pass # 忽略清理失败 - return False + proc = subprocess.run( + cmd, + cwd=self.workspace, + capture_output=True, + text=True, + timeout=self.timeout, + ) + except subprocess.TimeoutExpired: + return False, "Compilation timed out" + except OSError as exc: + return False, f"Failed to invoke compiler: {exc}" - # 3. 获取返回值 (修改进程返回值而非 stdout) - actual = vm_ret - - # 4. 清理输出文件 - try: - exe_path.unlink() - except: - pass # 忽略清理失败 + if proc.returncode != 0 or not output.exists(): + return False, proc.stderr.strip() or "(no error message)" + return True, proc.stderr.strip() - # 5. 验证结果 - # 注意:toml 中读取的 expected 可能是整数,actual 也是整数,直接比较 - if actual == expected: - print(f" PASSED {test_file}") - return True - else: - print(f" FAILED: Expected '{expected}', got '{actual}'") - return False + def run_exe(self, exe: Path) -> tuple[int, str, str]: + """Execute a binary. Returns (returncode, stdout, stderr).""" + # Make sure the file is executable (Unix) + exe.chmod(exe.stat().st_mode | 0o111) + try: + proc = subprocess.run( + [str(exe)], + cwd=self.workspace, + capture_output=True, + text=True, + timeout=self.timeout, + ) + except subprocess.TimeoutExpired: + return -1, "", "Execution timed out" + except OSError as exc: + return -1, "", f"Failed to run executable: {exc}" -def main(): - passed = 0 - total = 0 - config = {} - config_path = WORKSPACE / "expect.toml" - - if not config_path.exists(): - print(f"Config file not found: {config_path}") + return proc.returncode, proc.stdout, proc.stderr.strip() + + def run_one(self, test: TestCase) -> bool: + """Run a single test case. Returns True if passed.""" + logger.info("Testing %s", test.source) + + # 1. Compile + exe_path = self._unique_exe_path() + ok, stderr = self.compile(test.source, exe_path) + if not ok: + logger.error(" Compilation FAILED: %s", stderr) + return False + + # 2. Execute + returncode, stdout, run_err = self.run_exe(exe_path) + + # Runtime error detection: negative return code (signal) or timeout + if run_err and returncode < 0: + logger.error(" Runtime error: %s", run_err) + self._remove(exe_path) + return False + + # 3. Validate + if test.test_type == "return": + actual = returncode + else: + actual = stdout + + passed = actual == test.expected + if passed: + logger.info(" PASSED") + else: + logger.error( + " FAILED: expected %r, got %r", test.expected, actual + ) + + # 4. Cleanup + self._remove(exe_path) + return passed + + def _remove(self, path: Path) -> None: + """Remove *path* unless keep_temps is set.""" + if self.keep_temps: + return + try: + path.unlink(missing_ok=True) + except OSError: + pass + + def run_all(self, tests: Sequence[TestCase]) -> tuple[int, int]: + """Run a sequence of tests. Returns (passed, total).""" + passed = 0 + for idx, test in enumerate(tests, start=1): + logger.info("(%d/%d) %s", idx, len(tests), test.description) + if self.run_one(test): + passed += 1 + return passed, len(tests) + + +# --------------------------------------------------------------------------- +# Configuration loading +# --------------------------------------------------------------------------- +def load_config(config_path: Path) -> list[TestCase]: + """Parse `expect.toml` and build list of TestCase objects.""" + if not config_path.is_file(): + logger.error("Config file not found: %s", config_path) + sys.exit(1) + + with config_path.open("rb") as fh: + config = tomllib.load(fh) + + cases: list[TestCase] = [] + for test_file, expected in config.get("return_val_cases", {}).items(): + cases.append( + TestCase( + source=WORKSPACE / test_file, + expected=expected, + test_type="return", + origin_key=test_file, + ) + ) + for test_file, expected in config.get("stdout_val_cases", {}).items(): + cases.append( + TestCase( + source=WORKSPACE / test_file, + expected=expected, + test_type="stdout", + origin_key=test_file, + ) + ) + return cases + + +# --------------------------------------------------------------------------- +# CLI helpers +# --------------------------------------------------------------------------- +def build_arg_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser( + description="Integration test runner for scc", + ) + parser.add_argument( + "tests", + nargs="*", + metavar="SOURCE", + help="Specific test source files to run (as listed in expect.toml). " + "If none given, all tests are executed.", + ) + parser.add_argument( + "--list", + action="store_true", + help="List all available test cases from expect.toml and exit.", + ) + parser.add_argument( + "--verbose", "-v", + action="store_true", + help="Enable debug-level logging.", + ) + parser.add_argument( + "--keep-temps", + action="store_true", + help="Keep temporary executables (useful for debugging).", + ) + parser.add_argument( + "--timeout", + type=float, + default=DEFAULT_TIMEOUT, + help=f"Timeout in seconds for compilation/execution (default: {DEFAULT_TIMEOUT}).", + ) + parser.add_argument( + "--cc", + type=Path, + default=CC_PATH, + help=f"Path to scc compiler (default: {CC_PATH}).", + ) + return parser + + +def select_tests(all_tests: list[TestCase], requested: list[str]) -> list[TestCase]: + """Filter *all_tests* by user-provided source paths.""" + if not requested: + return all_tests + + selected: list[TestCase] = [] + for name in requested: + matched = [ + t for t in all_tests + if t.origin_key == name or str(t.source) == name + or t.source.name == name + ] + if not matched: + logger.warning("No test case matches '%s'", name) + continue + selected.extend(matched) + return selected + + +# --------------------------------------------------------------------------- +# Main entry point +# --------------------------------------------------------------------------- +def main() -> None: + parser = build_arg_parser() + args = parser.parse_args() + + logging.basicConfig( + level=logging.DEBUG if args.verbose else logging.INFO, + format="%(message)s", + ) + + # Load configuration + all_tests = load_config(CONFIG_PATH) + + if args.list: + print("Available test cases:") + for i, test in enumerate(all_tests, start=1): + print(f" {i:3d}: {test.description}") return - with open(config_path, "rb") as f: - config = tomllib.load(f) - PrettyPrinter().pprint(config) + # Filter tests + tests = select_tests(all_tests, args.tests) + if not tests: + logger.warning("No tests to run.") + return - for test_file, expected in config.get("return_val_cases", {}).items(): - total += 1 - if run_test(TEST_DIR / test_file, expected): - passed += 1 + runner = Runner( + cc=args.cc, + workspace=WORKSPACE, + timeout=args.timeout, + keep_temps=args.keep_temps, + ) + + passed, total = runner.run_all(tests) + logger.info("=" * 40) + logger.info("Summary: %d/%d passed", passed, total) + if passed != total: + sys.exit(1) + + runner.cleanup() - print(f"\nTest Summary: {passed}/{total} passed") if __name__ == "__main__": - main() + main() \ No newline at end of file