feat(backend/riscv32): 实现基础的编译器功能

- 完成 RV32IMA 指令集的代码生成
- 添加整数运算、分支、调用等基本指令支持
- 实现从 IR 到机器码的转换
- 添加简单的测试用例和测试框架
This commit is contained in:
ZZY
2025-03-08 16:50:21 +08:00
parent 95bf44eb3f
commit 172d72b0a0
32 changed files with 980 additions and 469 deletions

View File

@ -3,11 +3,16 @@ all: ccompiler
run: ccompiler
./ccompiler test.c flat.bin
ccompiler: frontend
gcc -g rv32ima_codegen.c ../../middleend/ir.c -L../../frontend -lfrontend -o ccompiler
ccompiler: frontend ir
gcc -g rv32ima_codegen.c -L../../frontend -lfrontend -L../../middleend -lir -o ccompiler
frontend:
make -C ../../frontend
ir:
make -C ../../middleend
clean:
rm -f ccompiler flat.bin
rm -f ccompiler flat.bin
make -C ../../frontend clean
make -C ../../middleend clean

View File

@ -211,7 +211,7 @@ typedef enum {
#define SLTZ(rd, rs) SLT(rd, rs, REG_X0) // 小于0则置位
// sgtz rd, rs (Set if > zero)
#define SG TZ(rd, rs) SLT(rd, REG_X0, rs) // 大于0则置位
#define SGTZ(rd, rs) SLT(rd, REG_X0, rs) // 大于0则置位
// beqz rs, offset (Branch if = zero)
#define BEQZ(rs, offset) BEQ(rs, REG_X0, offset) // 为0则转移
@ -291,13 +291,16 @@ typedef enum {
#define LI(rd, num) \
LUI(rd, num), \
ADDI(rd, rd, num)
#define MV(rd, rs) ADDI(rd, rs, 0)
#define NOT(rd, rs) XORI(rd, rs, -1)
#define CALL(offset) \
AUIPC(REG_X1, offset), \
JALR(REG_X1, REG_X1, offset)
#define SEQZ(rd, rs) SLTIU(rd, rs, 1)
#define SGT(rd, rs1, rs2) SLT(rd, rs2, rs1)
// TODO call have error when outof jalr
#define CALL(offset) \
AUIPC(REG_X1, REG_X0), \
JALR(REG_X1, REG_X1, offset)
#define CALL_ABS(addr) \
AUIPC(REG_X0, addr), \
JALR(REG_X1, REG_X0, addr)

View File

@ -9,22 +9,6 @@ typedef union rv32code {
uint8_t bytes[4];
} rv32code_t;
#define CRT_CODE_SIZE 16
// 使用示例
rv32code_t gcodes[] = {
LI(REG_SP, 0x1000),
LI(REG_RA, 0x0),
CALL_ABS(CRT_CODE_SIZE << 2),
// Exit
ECALL_EXIT2(),
};
void test_raw_gen(FILE* out) {
fwrite(gcodes, sizeof(rv32code_t), sizeof(gcodes)/sizeof(gcodes[0]), out);
}
#include "../../frontend/frontend.h"
#include "../../middleend/ir.h"
typedef struct {
@ -59,9 +43,9 @@ int write_inst(union rv32code ins, FILE* fp) {
}
#define GENCODE(code) vector_push(ctx.codes, (rv32code_t)(code)); len += 4
#define GENCODES(code) do { \
#define GENCODES(...) do { \
rv32code_t codes[] = { \
code \
__VA_ARGS__ \
}; \
for (int i = 0; i < sizeof(codes) / sizeof(codes[0]); i ++) { \
GENCODE(codes[i]); \
@ -105,13 +89,18 @@ static int func_idx(ir_func_t* tofunc) {
}
static int system_func(const char* name) {
static const char defined_func[][16] = {
"ecall_pnt_int",
static struct {
const char* name;
int ecall_num;
} defined_func[] = {
{"ecall_pnt_int", 1},
{"ecall_pnt_char", 11},
{"ecall_scan_int", 1025 + 4},
};
for (int j = 0; j < sizeof(defined_func)/sizeof(defined_func[0]); j++) {
if (strcmp(name, defined_func[j]) == 0) {
return j;
for (int i = 0; i < sizeof(defined_func)/sizeof(defined_func[0]); i++) {
if (strcmp(name, defined_func[i].name) == 0) {
return defined_func[i].ecall_num;
}
}
return -1;
@ -119,11 +108,22 @@ static int system_func(const char* name) {
static int get_node_val(ir_node_t* ptr, int reg) {
int len = 0;
if (ptr->tag == IR_NODE_CONST_INT) {
GENCODES(LI(reg, ptr->data.const_int.val));
} else {
int offset = stack_offset(ptr);
GENCODE(LW(reg, REG_SP, offset));
switch (ptr->tag) {
case IR_NODE_CONST_INT: {
GENCODES(LI(reg, ptr->data.const_int.val));
break;
}
// case IR_NODE_CALL: {
// // GENCODE(SW(REG_A0, REG_SP, ctx.stack_offset));
// // GENCODE()
// // break;
// }
default: {
int offset = stack_offset(ptr);
GENCODE(LW(reg, REG_SP, offset));
break;
}
}
return len;
}
@ -139,8 +139,6 @@ static int gen_instr(ir_bblock_t* block, ir_node_t* instr) {
// S1 = *(S0 + imm)
offset = stack_offset(instr->data.load.target);
GENCODE(LW(REG_T0, REG_SP, offset));
// offset = STACK_OFFSET(instr);
// GENCODE(SW(REG_T0, REG_SP, offset));
break;
}
case IR_NODE_STORE: {
@ -180,6 +178,27 @@ static int gen_instr(ir_bblock_t* block, ir_node_t* instr) {
case IR_OP_MOD:
GENCODE(REM(REG_T0, REG_T1, REG_T2));
break;
case IR_OP_EQ:
GENCODE(XOR(REG_T0, REG_T1, REG_T2));
GENCODE(SEQZ(REG_T0, REG_T0));
break;
case IR_OP_GE:
GENCODE(SLT(REG_T0, REG_T1, REG_T2));
GENCODE(SEQZ(REG_T0, REG_T0));
break;
case IR_OP_GT:
GENCODE(SGT(REG_T0, REG_T1, REG_T2));
break;
case IR_OP_LE:
GENCODE(SGT(REG_T0, REG_T1, REG_T2));
GENCODE(SEQZ(REG_T0, REG_T0));
break;
case IR_OP_LT:
GENCODE(SLT(REG_T0, REG_T1, REG_T2));
break;
case IR_OP_NEQ:
GENCODE(XOR(REG_T0, REG_T1, REG_T2));
break;
default:
error("ERROR gen_instr op in riscv");
break;
@ -244,13 +263,15 @@ static int gen_instr(ir_bblock_t* block, ir_node_t* instr) {
}
int system_func_idx = system_func(instr->data.call.callee->name);
if (system_func_idx == 0) {
// ecall_pnt_int
GENCODE(ADDI(REG_A7, REG_X0, 0x1));
GENCODE(ECALL());
break;
if (system_func_idx != -1) {
// ecall
GENCODES(
ADDI(REG_A7, REG_X0, system_func_idx),
ECALL()
);
goto CALL_END;
}
jmp_t* jmp = xmalloc(sizeof(jmp_t));
*jmp = (jmp_t) {
.base_offset = ctx.cur_func_offset + ctx.cur_block_offset + len,
@ -260,10 +281,11 @@ static int gen_instr(ir_bblock_t* block, ir_node_t* instr) {
.cur_idx = func_idx(ctx.cur_func),
};
vector_push(ctx.call, jmp);
GENCODES((
CALL(0)
));
GENCODES(CALL(0));
CALL_END:
offset = stack_offset(instr);
GENCODE(SW(REG_A0, REG_SP, offset));
break;
}
default:
@ -335,10 +357,9 @@ static int gen_func(ir_func_t* func) {
return len;
}
static void gen_code(ir_prog_t* prog) {
static int gen_code(ir_prog_t* prog) {
ctx.prog = prog;
for (int i = 0; i < prog->extern_funcs.size; i++) {
if (system_func(prog->extern_funcs.data[i]->name) == -1) {
error("func %s not defined and not a system func", prog->extern_funcs.data[i]->name);
@ -354,12 +375,12 @@ static void gen_code(ir_prog_t* prog) {
len += ret;
}
for (int i = 0; i < ctx.call.size; i++) {
jmp_t* jmp = vector_at(ctx.call, i);
int32_t code = 0;
// FIXME ERROR
int offset = jmp_cache[jmp->to_idx] - (jmp_cache[jmp->cur_idx] + jmp->base_offset);
assert(offset > -0xfff && offset < 0xfff);
int32_t codes[2] = {
CALL(offset)
};
@ -369,6 +390,14 @@ static void gen_code(ir_prog_t* prog) {
};
}
}
// Got Main pos;
for (int i = 0; i < prog->funcs.size; i++) {
if (strcmp(vector_at(prog->funcs, i)->name, "main") == 0) {
return jmp_cache[i];
}
}
error("main not found");
}
int main(int argc, char** argv) {
@ -390,7 +419,23 @@ int main(int argc, char** argv) {
struct ASTNode* root = frontend(infilename, in, (sread_fn)fread_s);
gen_ir_from_ast(root);
gen_code(&prog);
int main_pos = gen_code(&prog);
#define CRT_CODE_SIZE 16
rv32code_t gcodes[] = {
LI(REG_SP, 0x1000),
LI(REG_RA, 0x0),
CALL(0),
// Exit
ECALL_EXIT2(),
};
main_pos += (CRT_CODE_SIZE - 4) * 4;
assert(main_pos > -0xfff && main_pos < 0xfff);
rv32code_t call_main[2] = {
CALL(main_pos)
};
gcodes[4] = call_main[0];
gcodes[5] = call_main[1];
for (int i = 0; i < CRT_CODE_SIZE; i++) {
write_inst((union rv32code) {

View File

@ -0,0 +1,3 @@
int main() {
return 65536;
}

View File

@ -0,0 +1,8 @@
int main() {
int a;
int b;
a = 1 + 2 * 3;
b = 7;
a = a - b + 1;
return a;
}

View File

@ -0,0 +1,6 @@
int main() {
int x = 10;
x = x + 1;
return x;
}

View File

@ -0,0 +1,10 @@
int main(void) {
int a;
a = 1;
if (a) {
a = 1;
} else {
a = 2;
}
return a;
}

View File

@ -0,0 +1,10 @@
int main(void) {
int a;
a = 0;
if (a) {
a = 1;
} else {
a = 2;
}
return a;
}

View File

@ -0,0 +1,9 @@
int add(int, int);
int main(void) {
return add(1, 2);
}
int add(int a, int b) {
return a + b;
}

View File

@ -0,0 +1,5 @@
int main() {
int i = 0;
while (i < 10) i = i + 1;
return i;
}

View File

@ -0,0 +1,12 @@
// #include <stdio.h>
int main() {
int i = 0;
int pow = 1;
do {
pow = pow * 2;
i = i + 1;
} while(i < 7);
// printf("%d", pow);
return pow;
}

View File

@ -0,0 +1,7 @@
int main() {
int num = 0;
for (int i = 0; i < 10; i += 1) {
num = num + 1;
}
return num;
}

View File

@ -0,0 +1,7 @@
int add(int a, int b) {
return a + b;
}
int main(void) {
return add(1, 2);
}

View File

@ -0,0 +1,18 @@
// #include <stdio.h>
int factorial(int num);
int main() {
int num = 5;
int result = factorial(num);
// printf("%d", result);
return result;
}
int factorial(int num) {
if (num == 0) {
return 1;
} else {
return num * factorial(num - 1);
}
}

View File

@ -0,0 +1,28 @@
VM := ../../rv32-vm
CC := ../../ccompiler
STD_CC := gcc
TESTS := $(wildcard *.c)
# 定义所有测试目标
TEST_TARGETS := $(patsubst %.c, %_test, $(TESTS))
all: $(TEST_TARGETS)
%_test: %.c
@$(STD_CC) -g -o $@ $<
@$(CC) $< flat.bin
@./$@ ; ret_gcc=$$?
@$(VM) flat.bin ; ret_vm=$$?
@echo "Testing $@"
@if [ $$ret_gcc -eq $$ret_vm ]; then \
echo "$@ passed"; \
else \
echo "$@ failed: GCC returned $$ret_gcc, VM returned $$ret_vm"; \
exit 1; \
fi
clean:
rm -f $(TEST_TARGETS) flat.bin
.PHONY: all clean

View File

@ -0,0 +1,6 @@
int main() {
int a, b;
a = 1;
b = 2;
return a + b;
}

View File

@ -0,0 +1,86 @@
import subprocess
import os
from pathlib import Path
# 配置参数
TEST_DIR = Path(".")
CC_PATH = Path("../../ccompiler.exe")
VM_PATH = Path("../../rv32-vm.exe")
WORKSPACE = Path(".") # 测试工作目录
# 测试用例映射表(示例)
TEST_CASE_MAP = {
"./01_return.c": 65536,
"./02_decl_expr.c": 1,
"./03_decl_init.c": 11,
"./04_if.c": 1,
"./05_else.c": 2,
"./06_fcall.c": 3,
"./07_while.c": 10,
"./08_do_while.c": 128,
"./09_for.c": 10,
"./10_main.c": 3,
"./11_recursive.c": 120,
}
def run_command(cmd, capture_stderr=True):
"""执行命令并捕获stderr"""
result = subprocess.run(
cmd,
cwd=WORKSPACE,
stderr=subprocess.PIPE if capture_stderr else None,
text=True,
timeout=1,
)
return result.stderr.strip() if capture_stderr else None
def run_test(test_file, expected):
print(f"\nTesting {test_file}...")
# 1. 编译生成flat.bin
compile_cmd = [str(CC_PATH), str(test_file)]
compile_err = run_command(compile_cmd)
if not (WORKSPACE / "flat.bin").exists():
print(f" Compilation failed: {compile_err}")
return False
# 2. 执行虚拟机
vm_cmd = [str(VM_PATH), "flat.bin"]
# 3. 解析返回值(假设最后一行是返回值)
try:
vm_err = run_command(vm_cmd)
actual = int(vm_err.split()[-1])
except (ValueError, IndexError) as e:
print(f" Invalid VM output: {vm_err}")
return False
except subprocess.TimeoutExpired:
print(" Timeout expired")
return False
# 4. 验证结果
if actual == expected:
print(f" PASSED {test_file}")
return True
else:
print(f" FAILED: Expected {expected}, got {actual}")
return False
def main():
passed = 0
total = 0
for test_file, expected in TEST_CASE_MAP.items():
total += 1
if run_test(TEST_DIR / test_file, expected):
passed += 1
# 清理中间文件
if (WORKSPACE / "flat.bin").exists():
os.remove(WORKSPACE / "flat.bin")
print(f"\nTest Summary: {passed}/{total} passed")
if __name__ == "__main__":
main()