feat(lir): 修改HIR到LIR转换以支持可变参数函数

- 移除SCC_LIR_LEA指令类型,改用SCC_LIR_LOAD_ADDR
- 在scc_lir_func_meta_t中添加is_va_arg字段用于标识可变参数函数
- 修改scc_hir2lir函数参数类型,移除const限定符
- 更新比较操作的指令映射逻辑,将条件信息存储在metadata中
- 调整代码结构,在各个switch case分支中统一使用"} break"格式

fix(x86-isel): 修复x86指令选择中的立即数和重定位处理

- 修改emit_direct_call函数以正确处理全局符号重定位
- 更新立即数字段访问从imm到imm0
- 添加新的重定位操作数类型SCC_X86_OPR_RELOC
- 实现重定位目标类型的完整处理逻辑,包括基本块和符号

refactor(x86-mir): 重构x86操作数结构以支持重定位机制

- 将内存操作数的disp字段改为结构体形式包含displacement信息
- 移除不再使用的常用操作数构造器函数
- 保留并完善slot操作数构造器
- 更新内存操作数的调试输出格式

feat(ir2mcode): 添加重定位表支持以处理符号引用

- 定义新的重定位结构体scc_reloc_t用于记录重定位信息
- 修改scc_ir2mcode_emit_instr函数签名以传递重定位表
- 实现重定位补丁应用功能scc_ir2mcode_patch
- 更新机器码生成流程以收集和处理重定位信息

refactor(ir2sccf): 重构SCEF文件生成以支持重定位处理

- 提取独立的emit_mir_module函数处理MIR模块的机器码生成
- 实现基本块间重定位的地址解析和补丁应用
- 改进符号重定位的处理机制
- 简化机器码段数据的最终处理流程
This commit is contained in:
zzy
2026-05-21 16:19:49 +08:00
parent aa4292a30e
commit 41d060d7e7
19 changed files with 608 additions and 227 deletions

2
libs/mcode/.gitignore vendored Normal file
View File

@@ -0,0 +1,2 @@
*.json

View File

@@ -6,21 +6,116 @@
#include "scc_x86_reg.h"
typedef struct {
i64 displacement;
u32 displacement_bits;
} scc_x86_disp_t;
typedef struct {
scc_x86_reg_t seg;
scc_x86_reg_t base;
scc_x86_reg_t index;
u8 scale; /* 1,2,4,8 */
i32 disp;
u32 scale;
scc_x86_disp_t disp;
} scc_x86_mem_t;
typedef enum {
SCC_X86_RELOC_TARGET_SYMBOL,
SCC_X86_RELOC_TARGET_BBLOCK,
} scc_x86_reloc_target_t;
typedef struct scc_x86_reloc_op {
scc_x86_operand_kind_t kind; // 原始操作数类型
scc_x86_reloc_target_t target;
union {
i64 imm;
int bblock_id; // 如果 kind == RELBR
const char *global_name; // 如果 kind == SYMBOL
};
i64 addend; // 额外常量偏移,用于 sym+addend
} scc_x86_reloc_op_t;
typedef struct {
scc_x86_operand_kind_t kind;
union {
scc_x86_reg_t reg;
i64 imm;
i32 brdisp;
i64 simm0;
u64 imm0;
u8 imm1;
scc_x86_mem_t mem;
scc_x86_reloc_op_t reloc;
};
} scc_x86_operand_value_t;
static inline scc_x86_operand_value_t scc_x86_op_preg(scc_x86_reg_t reg) {
scc_x86_operand_value_t o = {.kind = SCC_X86_OPR_REG, .reg = reg};
return o;
}
static inline scc_x86_operand_value_t scc_x86_op_vreg(int vreg) {
scc_x86_operand_value_t o = {
.kind = SCC_X86_OPR_REG,
.reg = (scc_x86_reg_t)((int)SCC_X86_REG_COUNT + vreg)};
return o;
}
static inline scc_x86_operand_value_t scc_x86_op_relbr(i32 rel) {
scc_x86_operand_value_t o = {.kind = SCC_X86_OPR_RELBR, .brdisp = rel};
return o;
}
static inline scc_x86_operand_value_t scc_x86_op_imm(i64 imm) {
scc_x86_operand_value_t o = {.kind = SCC_X86_OPR_IMM, .simm0 = imm};
return o;
}
static inline scc_x86_operand_value_t scc_x86_op_mem(scc_x86_mem_t mem) {
scc_x86_operand_value_t o = {.kind = SCC_X86_OPR_MEM, .mem = mem};
return o;
}
// 1. 全局符号重定位绝对地址加载MOV reg, imm32/64
static inline scc_x86_operand_value_t
scc_x86_op_reloc_global_imm(const char *sym, i64 addend) {
scc_x86_operand_value_t op = {.kind = SCC_X86_OPR_RELOC};
op.reloc.kind = SCC_X86_OPR_IMM;
op.reloc.target = SCC_X86_RELOC_TARGET_SYMBOL;
op.reloc.global_name = sym;
op.reloc.addend = addend;
return op;
}
// 2. 全局符号重定位(相对跳转/调用CALL/JMP rel32
static inline scc_x86_operand_value_t
scc_x86_op_reloc_global_relbr(const char *sym, i64 addend) {
scc_x86_operand_value_t op = {.kind = SCC_X86_OPR_RELOC};
op.reloc.kind = SCC_X86_OPR_RELBR;
op.reloc.target = SCC_X86_RELOC_TARGET_SYMBOL;
op.reloc.global_name = sym;
op.reloc.addend = addend;
return op;
}
// 3. 基本块重定位(相对跳转)
static inline scc_x86_operand_value_t scc_x86_op_reloc_block(int bid,
i64 addend) {
scc_x86_operand_value_t op = {.kind = SCC_X86_OPR_RELOC};
op.reloc.kind = SCC_X86_OPR_RELBR;
op.reloc.target = SCC_X86_RELOC_TARGET_BBLOCK;
op.reloc.bblock_id = bid;
op.reloc.addend = addend;
return op;
}
// 4. 如果需要支持符号的内存寻址(如 [RIP + sym]),可扩展 kind =
// SCC_X86_OPR_MEM
static inline scc_x86_operand_value_t
scc_x86_op_reloc_global_mem(const char *sym, i64 addend) {
scc_x86_operand_value_t op = {.kind = SCC_X86_OPR_RELOC};
op.reloc.kind = SCC_X86_OPR_MEM;
op.reloc.target = SCC_X86_RELOC_TARGET_SYMBOL;
op.reloc.global_name = sym;
op.reloc.addend = addend;
// 编码时需生成 RIP 相对寻址的 ModRM/SIB
return op;
}
/* 按 iform 发射一条指令ops 数组长度需与 iform 定义的操作数数目一致 */
int scc_x86_encode_inst(scc_mcode_t *mcode, scc_x86_iform_t iform,
const scc_x86_operand_value_t *ops);

View File

@@ -0,0 +1,137 @@
#ifndef __SCC_X86_PATCH_H__
#define __SCC_X86_PATCH_H__
#include "scc_x86_encode.h"
// x86-64 重定位类型(由底层 patch 函数处理)
typedef enum {
SCC_X86_PATCH_NONE,
SCC_X86_PATCH_PC32, // 32 位 PC 相对偏移CALL/JMP/Jcc
SCC_X86_PATCH_ABS64, // 64 位绝对地址MOV reg, imm64
SCC_X86_PATCH_ABS32, // 32 位绝对地址MOV reg, imm32
SCC_X86_PATCH_DISP8, // 8 位内存位移([RIP+disp8] 或 [base+disp8]
SCC_X86_PATCH_DISP32, // 32 位内存位移([RIP+disp32]
} scc_x86_patch_type_t;
static inline void patch_bytes(scc_mcode_t *mcode, usize offset,
const void *data, usize size) {
u8 *buf = (u8 *)scc_mcode_unsafe_data(mcode);
usize total = scc_mcode_size(mcode);
if (offset + size > total) {
Panic("patch offset out of range: offset=%zu size=%zu total=%zu",
offset, size, total);
}
scc_memcpy(buf + offset, data, size);
}
/* ---------- 实现补丁接口 ---------- */
static inline int scc_x86_patch_disp(scc_mcode_t *mcode, usize offset,
scc_x86_disp_t disp) {
if (!mcode || disp.displacement_bits == 0)
return -1;
usize bytes = disp.displacement_bits / 8;
if (bytes == 0) {
bytes = 4; // RELBR 32
}
if (bytes != 1 && bytes != 2 && bytes != 4 && bytes != 8) {
Panic("invalid displacement bits: %u", disp.displacement_bits);
}
// 将 displacement 截断到对应宽度(小端写入)
i64 val = disp.displacement;
switch (bytes) {
case 1: {
u8 v = (u8)val;
patch_bytes(mcode, offset, &v, 1);
break;
}
case 2: {
u16 v = (u16)val;
patch_bytes(mcode, offset, &v, 2);
break;
}
case 4: {
u32 v = (u32)val;
patch_bytes(mcode, offset, &v, 4);
break;
}
case 8:
patch_bytes(mcode, offset, &val, 8);
break;
}
return 0;
}
static inline int scc_x86_patch_brdisp(scc_mcode_t *mcode, usize offset,
scc_x86_operand_value_t disp) {
if (disp.kind != SCC_X86_OPR_RELBR && disp.kind != SCC_X86_OPR_RELOC) {
Panic("patch_brdisp called with non-branch operand kind %d", disp.kind);
}
i32 brdisp;
if (disp.kind == SCC_X86_OPR_RELBR)
brdisp = disp.brdisp;
else
brdisp = (i32)disp.reloc.imm; // 从重定位占位中获取值(上层会先填好)
patch_bytes(mcode, offset, &brdisp, 4);
return 0;
}
static inline int scc_x86_patch_imm0(scc_mcode_t *mcode, usize offset,
scc_x86_operand_value_t imm0) {
// 注意:原声明未提供大小参数,我们假定调用者保证了正确的宽度。
// 此处默认写入 8 字节(最常用)。若需要其他宽度,请使用扩展版本。
if (imm0.kind != SCC_X86_OPR_IMM && imm0.kind != SCC_X86_OPR_RELOC) {
Panic("patch_imm0 called with non-immediate operand kind %d",
imm0.kind);
}
u64 val;
if (imm0.kind == SCC_X86_OPR_IMM)
val = imm0.imm0;
else
val = (u64)imm0.reloc.imm; // 从重定位占位中获取
patch_bytes(mcode, offset, &val, 8);
return 0;
}
/* ---------- 扩展:带大小的立即数 patch推荐上层使用 ---------- */
static inline int scc_x86_patch_imm0_ex(scc_mcode_t *mcode, usize offset,
u64 value, usize size) {
if (size != 1 && size != 2 && size != 4 && size != 8)
return -1;
switch (size) {
case 1: {
u8 v = (u8)value;
patch_bytes(mcode, offset, &v, 1);
break;
}
case 2: {
u16 v = (u16)value;
patch_bytes(mcode, offset, &v, 2);
break;
}
case 4: {
u32 v = (u32)value;
patch_bytes(mcode, offset, &v, 4);
break;
}
case 8:
patch_bytes(mcode, offset, &value, 8);
break;
}
return 0;
}
static inline void scc_x86_patch(scc_mcode_t *mcode,
scc_x86_patch_type_t patch_type, u64 offset,
i64 value) {
switch (patch_type) {
case SCC_X86_PATCH_PC32:
scc_x86_patch_brdisp(mcode, offset - 4,
scc_x86_op_relbr(value - offset));
break;
default:
TODO();
break;
}
}
#endif /* __SCC_X86_PATCH_H__ */

View File

@@ -1,9 +1,14 @@
#include <assert.h>
#include <string.h>
#include <x86/scc_x86_encode.h>
#include <x86/scc_x86_iform.h>
#include <x86/scc_x86_reg.h>
#if 1
#ifdef LOG_INFO
#undef LOG_INFO
#endif
#define LOG_INFO(...)
#endif
/* ---------- 内部辅助 ---------- */
static inline void emit_u8(scc_mcode_t *m, uint8_t v) {
scc_mcode_add_u8(m, v);
@@ -54,13 +59,13 @@ static int infer_operand_width(const scc_x86_iform_info_t *info,
}
const char *eosz = info->encode.eosz;
int default_64 = info->encode.default_64b;
if (!strcmp(eosz, "o16"))
if (!scc_strcmp(eosz, "o16"))
return 16;
if (!strcmp(eosz, "o32"))
if (!scc_strcmp(eosz, "o32"))
return 32;
if (!strcmp(eosz, "o64"))
if (!scc_strcmp(eosz, "o64"))
return 64;
if (!strcmp(eosz, "oszall") || !strcmp(eosz, "osznot16"))
if (!scc_strcmp(eosz, "oszall") || !scc_strcmp(eosz, "osznot16"))
return default_64 ? 64 : 32;
return 32;
}
@@ -69,16 +74,11 @@ static int infer_operand_width(const scc_x86_iform_info_t *info,
static int need_rexw(const scc_x86_encoding_t *enc, scc_x86_reg_t reg_op,
scc_x86_reg_t rm_op, scc_x86_reg_t base,
scc_x86_reg_t idx) {
/* 强制要求 / 明确禁止 优先级最高 */
if (enc->rex_w == 1)
return 1;
if (enc->rex_w == 0)
return 0;
/* 指令已隐含 64 位语义(如 PUSH/POP但仍可能被 64 位操作数需要 W例如
* PUSH r64 不需要 W但 PUSH r/m64 也不需要。总之 default_64b
* 时一般无需额外 W但若操作数明确是 64 位且模板没有禁止,我们仍加上以兼容
* movabs 等 */
int has_64bit_op = 0;
if (reg_op != SCC_X86_REG_INVALID && scc_reg_width(reg_op) == 64)
has_64bit_op = 1;
@@ -90,9 +90,6 @@ static int need_rexw(const scc_x86_encoding_t *enc, scc_x86_reg_t reg_op,
has_64bit_op = 1;
if (enc->default_64b) {
/* 对于默认 64 位的指令(如 CALL, JMP有些仍需要 REX.W
* 访问扩展寄存器,但若 reg/rm 本身是 64 位且需要 REX.B/R 则一并设置 W
* 来保证编码正确?这里简单处理:只要访问扩展寄存器就加 W */
return (reg_op != SCC_X86_REG_INVALID && reg_rex_bit(reg_op)) ||
(rm_op != SCC_X86_REG_INVALID && reg_rex_bit(rm_op)) ||
(base != SCC_X86_REG_INVALID && reg_rex_bit(base)) ||
@@ -101,7 +98,6 @@ static int need_rexw(const scc_x86_encoding_t *enc, scc_x86_reg_t reg_op,
: 0;
}
/* 普通指令:有 64 位操作数即加 W */
return has_64bit_op;
}
@@ -109,8 +105,7 @@ static int need_66_prefix(const scc_x86_encoding_t *enc, scc_x86_reg_t reg_op,
scc_x86_reg_t rm_op) {
if (enc->osz_required)
return 1;
// 如果强制指定了操作大小不自动加66
if (!strcmp(enc->eosz, "o32") || !strcmp(enc->eosz, "o64"))
if (!scc_strcmp(enc->eosz, "o32") || !scc_strcmp(enc->eosz, "o64"))
return 0;
if (reg_op != SCC_X86_REG_INVALID && scc_reg_width(reg_op) == 16)
return 1;
@@ -215,17 +210,17 @@ static void emit_immediate(scc_mcode_t *m, const scc_x86_encoding_t *enc,
int64_t imm_val, int op_width) {
const char *oc2 = tmpl[imm_idx].oc2;
int imm_size = enc->imm_size;
if (!strcmp(oc2, "b"))
if (!scc_strcmp(oc2, "b"))
imm_size = 1;
else if (!strcmp(oc2, "w"))
else if (!scc_strcmp(oc2, "w"))
imm_size = 2;
else if (!strcmp(oc2, "z"))
else if (!scc_strcmp(oc2, "z"))
imm_size = (op_width <= 16) ? op_width / 8 : (op_width <= 32 ? 4 : 4);
else if (!strcmp(oc2, "v"))
else if (!scc_strcmp(oc2, "v"))
imm_size = op_width / 8;
else if (!strcmp(oc2, "d") || !strcmp(oc2, "ss"))
else if (!scc_strcmp(oc2, "d") || !scc_strcmp(oc2, "ss"))
imm_size = 4;
else if (!strcmp(oc2, "q"))
else if (!scc_strcmp(oc2, "q"))
imm_size = 8;
LOG_INFO("[IMM] val=%lld size=%d", imm_val, imm_size);
@@ -268,9 +263,9 @@ static void emit_modrm_sib_disp(scc_mcode_t *m,
if (ops[i].kind == SCC_X86_OPR_REG) {
LOG_INFO("[OPD] %d: REG kind, name=\"%s\" reg=%d", i, tname,
ops[i].reg);
if (strncmp(tname, "REG0", 4) == 0)
if (scc_strcmp(tname, "REG0") == 0)
reg_r = ops[i].reg;
else if (strncmp(tname, "REG1", 4) == 0)
else if (scc_strcmp(tname, "REG1") == 0)
reg_b = ops[i].reg;
else {
if (reg_r == SCC_X86_REG_INVALID)
@@ -282,7 +277,7 @@ static void emit_modrm_sib_disp(scc_mcode_t *m,
has_mem = 1;
memdesc = ops[i].mem;
} else if (ops[i].kind == SCC_X86_OPR_IMM) {
imm_val = ops[i].imm;
imm_val = ops[i].imm0;
imm_idx = i;
}
}
@@ -296,7 +291,7 @@ static void emit_modrm_sib_disp(scc_mcode_t *m,
else if (enc->modrm_reg_fix >= 0)
modrm |= (enc->modrm_reg_fix & 7) << 3;
int32_t disp = memdesc.disp;
int32_t disp = memdesc.disp.displacement;
int dsize = disp_size(disp, memdesc.base);
modrm |= (dsize == 0) ? 0 : (dsize == 8) ? 0x40 : 0x80;
@@ -341,22 +336,38 @@ static void emit_modrm_sib_disp(scc_mcode_t *m,
}
}
} else {
modrm = 0xC0;
if (enc->modrm_reg_fix >= 0)
modrm |= (enc->modrm_reg_fix & 7) << 3;
else if (reg_r != SCC_X86_REG_INVALID)
modrm |= (reg_low3(reg_r) & 7) << 3;
// 特殊情况:只有一个显式寄存器操作数,且 reg_fix/rm_fix 均未指定,且
// mod_fix == 3 此时操作数应放在 rm 字段reg 字段固定为 0例如 SETZ
// 指令)
int is_single_reg_op =
(info->num_explicit_ops == 1 && enc->modrm_reg_fix == -1 &&
enc->modrm_rm_fix == -1 && enc->mod_fix == 3);
if (is_single_reg_op && reg_r != SCC_X86_REG_INVALID &&
reg_b == SCC_X86_REG_INVALID) {
// 操作数作为 rmreg 部分为 0
modrm = 0xC0 | (reg_low3(reg_r) & 7);
emit_u8(m, modrm);
LOG_INFO("[MODRM] single reg operand treated as rm, emit 0x%02x",
modrm);
} else {
modrm = 0xC0;
if (enc->modrm_reg_fix >= 0)
modrm |= (enc->modrm_reg_fix & 7) << 3;
else if (reg_r != SCC_X86_REG_INVALID)
modrm |= (reg_low3(reg_r) & 7) << 3;
if (enc->modrm_rm_fix >= 0) {
modrm |= enc->modrm_rm_fix & 7;
} else if (reg_b != SCC_X86_REG_INVALID) {
modrm |= reg_low3(reg_b) & 7;
} else if (enc->modrm_reg_fix >= 0 && reg_r != SCC_X86_REG_INVALID) {
modrm |= reg_low3(reg_r) & 7;
} else if (reg_r != SCC_X86_REG_INVALID) {
modrm |= reg_low3(reg_r) & 7;
if (enc->modrm_rm_fix >= 0) {
modrm |= enc->modrm_rm_fix & 7;
} else if (reg_b != SCC_X86_REG_INVALID) {
modrm |= reg_low3(reg_b) & 7;
} else if (enc->modrm_reg_fix >= 0 &&
reg_r != SCC_X86_REG_INVALID) {
modrm |= reg_low3(reg_r) & 7;
} else if (reg_r != SCC_X86_REG_INVALID) {
modrm |= reg_low3(reg_r) & 7;
}
emit_u8(m, modrm);
}
emit_u8(m, modrm);
}
LOG_INFO("[MODRM] emit byte 0x%02x", modrm);
@@ -394,12 +405,12 @@ int scc_x86_encode_inst(scc_mcode_t *mcode, scc_x86_iform_t iform,
for (int i = 0; i < num_ops; i++) {
const char *tname = tmpl[i].name;
if (ops[i].kind == SCC_X86_OPR_REG) {
if (strncmp(tname, "REG0", 4) == 0) {
if (scc_strcmp(tname, "REG0") == 0) {
if (enc->modrm_reg_fix >= 0)
rm_field = ops[i].reg; // reg固定 → REG0为rm
else
reg_field = ops[i].reg; // 否则为reg
} else if (strncmp(tname, "REG1", 4) == 0) {
} else if (scc_strcmp(tname, "REG1") == 0) {
if (enc->modrm_rm_fix >= 0)
reg_field = ops[i].reg; // rm固定 → REG1为reg
else
@@ -416,11 +427,25 @@ int scc_x86_encode_inst(scc_mcode_t *mcode, scc_x86_iform_t iform,
idx_reg = ops[i].mem.index;
} else if (ops[i].kind == SCC_X86_OPR_IMM ||
ops[i].kind == SCC_X86_OPR_RELBR) {
imm_val = ops[i].imm;
imm_val = ops[i].imm0;
imm_idx = i;
}
}
// ===== 新增:特殊处理 SETZ 类指令只有一个寄存器操作数mod_fix=3 且
// reg_fix/rm_fix 未指定)=====
if (enc->has_modrm && info->num_explicit_ops == 1 &&
enc->modrm_reg_fix == -1 && enc->modrm_rm_fix == -1 &&
enc->mod_fix == 3) {
if (reg_field != SCC_X86_REG_INVALID &&
rm_field == SCC_X86_REG_INVALID) {
// 将操作数从 reg_field 移到 rm_field
rm_field = reg_field;
reg_field = SCC_X86_REG_INVALID;
}
}
// ========================================================================
int op_width = infer_operand_width(info, ops);
LOG_INFO("[OPWIDTH] %d bits", op_width);
@@ -446,4 +471,4 @@ int scc_x86_encode_inst(scc_mcode_t *mcode, scc_x86_iform_t iform,
}
return 0;
}
}