- 添加 .gitignore 文件,忽略编译器生成的二进制文件 - 重构 lexer.c 文件,改进了关键字处理和字符串处理 - 更新前端的前端、解析器和 AST 相关文件,以适应新的词法分析器 - 优化了 token 相关的定义和函数,引入了新的 token 类型
465 lines
13 KiB
C
465 lines
13 KiB
C
#define RISCV_VM_BUILDIN_ECALL
|
|
#include "rv32gen.h"
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
#include <assert.h>
|
|
|
|
// 指令编码联合体(自动处理小端序)
|
|
typedef union rv32code {
|
|
uint32_t code;
|
|
uint8_t bytes[4];
|
|
} rv32code_t;
|
|
|
|
#include "../../frontend/frontend.h"
|
|
#include "../../middleend/middleend.h"
|
|
|
|
typedef struct {
|
|
int code_pos;
|
|
int to_idx;
|
|
int cur_idx;
|
|
int base_offset;
|
|
enum {
|
|
JMP_BRANCH,
|
|
JMP_JUMP,
|
|
JMP_CALL,
|
|
} type;
|
|
} jmp_t;
|
|
|
|
static struct {
|
|
vector_header(codes, rv32code_t);
|
|
int stack_offset;
|
|
int stack_base;
|
|
int tmp_reg;
|
|
ir_bblock_t* cur_block;
|
|
ir_func_t* cur_func;
|
|
ir_prog_t* prog;
|
|
vector_header(jmp, jmp_t*);
|
|
vector_header(call, jmp_t*);
|
|
|
|
int cur_func_offset;
|
|
int cur_block_offset;
|
|
} ctx;
|
|
|
|
int write_inst(union rv32code ins, FILE* fp) {
|
|
return fwrite(&ins, sizeof(union rv32code), 1, fp);
|
|
}
|
|
|
|
#define GENCODE(code) vector_push(ctx.codes, (rv32code_t)(code)); len += 4
|
|
#define GENCODES(...) do { \
|
|
rv32code_t codes[] = { \
|
|
__VA_ARGS__ \
|
|
}; \
|
|
for (int i = 0; i < sizeof(codes) / sizeof(codes[0]); i ++) { \
|
|
GENCODE(codes[i]); \
|
|
} \
|
|
} while (0)
|
|
|
|
static int stack_offset(ir_node_t* ptr) {
|
|
int offset = ctx.stack_base;
|
|
for (int i = 0; i < ctx.cur_func->bblocks.size; i ++) {
|
|
ir_bblock_t* block = vector_at(ctx.cur_func->bblocks, i);
|
|
for (int i = 0; i < block->instrs.size; i++) {
|
|
if (vector_at(block->instrs, i) == ptr) {
|
|
offset += i * 4;
|
|
assert(offset >= 0 && offset < ctx.stack_offset);
|
|
return offset;
|
|
}
|
|
}
|
|
offset += block->instrs.size * 4;
|
|
}
|
|
assert(0);
|
|
}
|
|
|
|
static int block_idx(ir_bblock_t* toblock) {
|
|
for (int i = 0; i < ctx.cur_func->bblocks.size; i ++) {
|
|
ir_bblock_t* block = vector_at(ctx.cur_func->bblocks, i);
|
|
if (toblock == block) {
|
|
return i;
|
|
}
|
|
}
|
|
assert(0);
|
|
}
|
|
|
|
static int func_idx(ir_func_t* tofunc) {
|
|
for (int i = 0; i < ctx.prog->funcs.size; i ++) {
|
|
ir_func_t* func = vector_at(ctx.prog->funcs, i);
|
|
if (tofunc == func) {
|
|
return i;
|
|
}
|
|
}
|
|
assert(0);
|
|
}
|
|
|
|
static int system_func(const char* name) {
|
|
static struct {
|
|
const char* name;
|
|
int ecall_num;
|
|
} defined_func[] = {
|
|
{"ecall_pnt_int", 1},
|
|
{"ecall_pnt_char", 11},
|
|
{"ecall_scan_int", 1025 + 4},
|
|
};
|
|
|
|
for (int i = 0; i < sizeof(defined_func)/sizeof(defined_func[0]); i++) {
|
|
if (strcmp(name, defined_func[i].name) == 0) {
|
|
return defined_func[i].ecall_num;
|
|
}
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
static int get_node_val(ir_node_t* ptr, int reg) {
|
|
int len = 0;
|
|
switch (ptr->tag) {
|
|
case IR_NODE_CONST_INT: {
|
|
GENCODES(LI(reg, ptr->data.const_int.val));
|
|
break;
|
|
}
|
|
// case IR_NODE_CALL: {
|
|
// // GENCODE(SW(REG_A0, REG_SP, ctx.stack_offset));
|
|
// // GENCODE()
|
|
// // break;
|
|
// }
|
|
default: {
|
|
int offset = stack_offset(ptr);
|
|
GENCODE(LW(reg, REG_SP, offset));
|
|
break;
|
|
}
|
|
|
|
}
|
|
return len;
|
|
}
|
|
|
|
static int gen_instr(ir_bblock_t* block, ir_node_t* instr) {
|
|
int len = 0;
|
|
int offset;
|
|
switch (instr->tag) {
|
|
case IR_NODE_ALLOC: {
|
|
break;
|
|
}
|
|
case IR_NODE_LOAD: {
|
|
// S1 = *(S0 + imm)
|
|
offset = stack_offset(instr->data.load.target);
|
|
GENCODE(LW(REG_T0, REG_SP, offset));
|
|
break;
|
|
}
|
|
case IR_NODE_STORE: {
|
|
// *(S0 + imm) = S1
|
|
len += get_node_val(instr->data.store.value, REG_T0);
|
|
offset = stack_offset(instr->data.store.target);
|
|
GENCODE(SW(REG_T0, REG_SP, offset));
|
|
break;
|
|
}
|
|
case IR_NODE_RET: {
|
|
// A0 = S0
|
|
if (instr->data.ret.ret_val != NULL) {
|
|
len += get_node_val(instr->data.ret.ret_val, REG_A0);
|
|
}
|
|
GENCODE(LW(REG_RA, REG_SP, 0));
|
|
GENCODE(ADDI(REG_SP, REG_SP, ctx.stack_offset));
|
|
GENCODE(RET());
|
|
break;
|
|
}
|
|
case IR_NODE_OP: {
|
|
len += get_node_val(instr->data.op.lhs, REG_T1);
|
|
len += get_node_val(instr->data.op.rhs, REG_T2);
|
|
|
|
switch (instr->data.op.op) {
|
|
case IR_OP_ADD:
|
|
GENCODE(ADD(REG_T0, REG_T1, REG_T2));
|
|
break;
|
|
case IR_OP_SUB:
|
|
GENCODE(SUB(REG_T0, REG_T1, REG_T2));
|
|
break;
|
|
case IR_OP_MUL:
|
|
GENCODE(MUL(REG_T0, REG_T1, REG_T2));
|
|
break;
|
|
case IR_OP_DIV:
|
|
GENCODE(DIV(REG_T0, REG_T1, REG_T2));
|
|
break;
|
|
case IR_OP_MOD:
|
|
GENCODE(REM(REG_T0, REG_T1, REG_T2));
|
|
break;
|
|
case IR_OP_EQ:
|
|
GENCODE(XOR(REG_T0, REG_T1, REG_T2));
|
|
GENCODE(SEQZ(REG_T0, REG_T0));
|
|
break;
|
|
case IR_OP_GE:
|
|
GENCODE(SLT(REG_T0, REG_T1, REG_T2));
|
|
GENCODE(SEQZ(REG_T0, REG_T0));
|
|
break;
|
|
case IR_OP_GT:
|
|
GENCODE(SGT(REG_T0, REG_T1, REG_T2));
|
|
break;
|
|
case IR_OP_LE:
|
|
GENCODE(SGT(REG_T0, REG_T1, REG_T2));
|
|
GENCODE(SEQZ(REG_T0, REG_T0));
|
|
break;
|
|
case IR_OP_LT:
|
|
GENCODE(SLT(REG_T0, REG_T1, REG_T2));
|
|
break;
|
|
case IR_OP_NEQ:
|
|
GENCODE(XOR(REG_T0, REG_T1, REG_T2));
|
|
break;
|
|
default:
|
|
LOG_ERROR("ERROR gen_instr op in riscv");
|
|
break;
|
|
}
|
|
offset = stack_offset(instr);
|
|
GENCODE(SW(REG_T0, REG_SP, offset));
|
|
break;
|
|
}
|
|
case IR_NODE_BRANCH: {
|
|
len += get_node_val(instr->data.branch.cond, REG_T0);
|
|
int tidx = block_idx(instr->data.branch.true_bblock);
|
|
int fidx = block_idx(instr->data.branch.false_bblock);
|
|
int cidx = block_idx(ctx.cur_block);
|
|
jmp_t* jmp;
|
|
jmp = rt._malloc(sizeof(jmp_t));
|
|
*jmp = (jmp_t) {
|
|
.base_offset = 8,
|
|
.code_pos = ctx.codes.size,
|
|
.type = JMP_BRANCH,
|
|
.to_idx = tidx,
|
|
.cur_idx=cidx,
|
|
};
|
|
vector_push(ctx.jmp, jmp);
|
|
GENCODE(BNEZ(REG_T0, 0));
|
|
jmp = rt._malloc(sizeof(jmp_t));
|
|
*jmp = (jmp_t) {
|
|
.base_offset = 4,
|
|
.code_pos = ctx.codes.size,
|
|
.type = JMP_JUMP,
|
|
.to_idx = fidx,
|
|
.cur_idx=cidx,
|
|
};
|
|
vector_push(ctx.jmp, jmp);
|
|
GENCODE(J(0));
|
|
break;
|
|
}
|
|
case IR_NODE_JUMP: {
|
|
int idx = block_idx(instr->data.jump.target_bblock);
|
|
jmp_t* jmp = rt._malloc(sizeof(jmp_t));
|
|
*jmp = (jmp_t) {
|
|
.base_offset = 4,
|
|
.code_pos = ctx.codes.size,
|
|
.type = JMP_JUMP,
|
|
.to_idx = idx,
|
|
.cur_idx=block_idx(ctx.cur_block),
|
|
};
|
|
vector_push(ctx.jmp, jmp);
|
|
GENCODE(J(0));
|
|
break;
|
|
}
|
|
case IR_NODE_CALL: {
|
|
if (instr->data.call.args.size > 8) {
|
|
LOG_ERROR("can't add so much params");
|
|
}
|
|
int param_regs[8] = {
|
|
REG_A0, REG_A1, REG_A2, REG_A3,
|
|
REG_A4, REG_A5, REG_A6, REG_A7
|
|
};
|
|
for (int i = 0; i < instr->data.call.args.size; i++) {
|
|
ir_node_t* param = vector_at(instr->data.call.args, i);
|
|
len += get_node_val(param, param_regs[i]);
|
|
}
|
|
|
|
int system_func_idx = system_func(instr->data.call.callee->name);
|
|
if (system_func_idx != -1) {
|
|
// ecall
|
|
GENCODES(
|
|
ADDI(REG_A7, REG_X0, system_func_idx),
|
|
ECALL()
|
|
);
|
|
goto CALL_END;
|
|
}
|
|
|
|
jmp_t* jmp = rt._malloc(sizeof(jmp_t));
|
|
*jmp = (jmp_t) {
|
|
.base_offset = ctx.cur_func_offset + ctx.cur_block_offset + len,
|
|
.code_pos = ctx.codes.size,
|
|
.type = JMP_CALL,
|
|
.to_idx = func_idx(instr->data.call.callee),
|
|
.cur_idx = func_idx(ctx.cur_func),
|
|
};
|
|
vector_push(ctx.call, jmp);
|
|
|
|
GENCODES(CALL(0));
|
|
CALL_END:
|
|
offset = stack_offset(instr);
|
|
GENCODE(SW(REG_A0, REG_SP, offset));
|
|
break;
|
|
}
|
|
default:
|
|
LOG_ERROR("ERROR gen_instr in riscv");
|
|
}
|
|
return len;
|
|
}
|
|
|
|
static int gen_block(ir_bblock_t* block) {
|
|
int len = 0;
|
|
ctx.cur_block = block;
|
|
for (int i = 0; i < block->instrs.size; i ++) {
|
|
ctx.cur_block_offset = len;
|
|
len += gen_instr(block, vector_at(block->instrs, i));
|
|
}
|
|
return len;
|
|
}
|
|
|
|
static int gen_func(ir_func_t* func) {
|
|
int len = 0;
|
|
ctx.cur_func = func;
|
|
ctx.stack_base = 16;
|
|
ctx.stack_offset = ctx.stack_base;
|
|
for (int i = 0; i < func->bblocks.size; i++) {
|
|
ctx.stack_offset += 4 * (*vector_at(func->bblocks, i)).instrs.size;
|
|
}
|
|
GENCODE(ADDI(REG_SP, REG_SP, -ctx.stack_offset));
|
|
GENCODE(SW(REG_RA, REG_SP, 0));
|
|
|
|
int param_regs[8] = {
|
|
REG_A0, REG_A1, REG_A2, REG_A3,
|
|
REG_A4, REG_A5, REG_A6, REG_A7
|
|
};
|
|
if (func->params.size > 8) {
|
|
LOG_ERROR("can't add so much params");
|
|
}
|
|
for (int i = 0; i < func->params.size; i++) {
|
|
int offset = stack_offset(vector_at(func->params, i));
|
|
GENCODE(SW(param_regs[i], REG_SP, offset));
|
|
}
|
|
|
|
int jmp_cache[func->bblocks.size + 1];
|
|
|
|
if (ctx.jmp.data != NULL) vector_free(ctx.jmp);
|
|
vector_init(ctx.jmp);
|
|
jmp_cache[0] = 0;
|
|
for(int i = 0; i < func->bblocks.size; i ++) {
|
|
ctx.cur_func_offset = len;
|
|
jmp_cache[i + 1] = jmp_cache[i];
|
|
int ret = gen_block(vector_at(func->bblocks, i));
|
|
jmp_cache[i + 1] += ret;
|
|
len += ret;
|
|
}
|
|
|
|
for (int i = 0; i < ctx.jmp.size; i++) {
|
|
jmp_t* jmp = vector_at(ctx.jmp, i);
|
|
int32_t code = 0;
|
|
int offset = jmp_cache[jmp->to_idx] - (jmp_cache[jmp->cur_idx + 1] - jmp->base_offset);
|
|
if (jmp->type == JMP_JUMP) {
|
|
code = J(offset);
|
|
} else {
|
|
code = BNEZ(REG_T0, offset);
|
|
}
|
|
ctx.codes.data[jmp->code_pos] = (rv32code_t) {
|
|
.code = code,
|
|
};
|
|
}
|
|
|
|
return len;
|
|
}
|
|
|
|
static int gen_code(ir_prog_t* prog) {
|
|
ctx.prog = prog;
|
|
|
|
for (int i = 0; i < prog->extern_funcs.size; i++) {
|
|
if (system_func(prog->extern_funcs.data[i]->name) == -1) {
|
|
LOG_ERROR("func %s not defined and not a system func", prog->extern_funcs.data[i]->name);
|
|
}
|
|
}
|
|
|
|
int len = 0;
|
|
int jmp_cache[prog->funcs.size + 1];
|
|
for(int i = 0; i < prog->funcs.size; i ++) {
|
|
jmp_cache[i + 1] = jmp_cache[i];
|
|
int ret = gen_func(vector_at(prog->funcs, i));
|
|
jmp_cache[i + 1] += ret;
|
|
len += ret;
|
|
}
|
|
|
|
for (int i = 0; i < ctx.call.size; i++) {
|
|
jmp_t* jmp = vector_at(ctx.call, i);
|
|
int32_t code = 0;
|
|
// FIXME ERROR
|
|
int offset = jmp_cache[jmp->to_idx] - (jmp_cache[jmp->cur_idx] + jmp->base_offset);
|
|
assert(offset > -0xfff && offset < 0xfff);
|
|
int32_t codes[2] = {
|
|
CALL(offset)
|
|
};
|
|
for (int i = 0; i < 2; i++) {
|
|
ctx.codes.data[jmp->code_pos + i] = (rv32code_t) {
|
|
.code = codes[i],
|
|
};
|
|
}
|
|
}
|
|
|
|
// Got Main pos;
|
|
for (int i = 0; i < prog->funcs.size; i++) {
|
|
if (strcmp(vector_at(prog->funcs, i)->name, "main") == 0) {
|
|
return jmp_cache[i];
|
|
}
|
|
}
|
|
LOG_ERROR("main not found");
|
|
}
|
|
|
|
int main(int argc, char** argv) {
|
|
// gcc rv32ima_codegen.c -o rv32gen.exe
|
|
|
|
init_lib_core();
|
|
log_set_level(NULL, LOG_LEVEL_NOTSET);
|
|
|
|
const char* infilename = "test.c";
|
|
const char* outfilename = "flat.bin";
|
|
if (argc >= 2) {
|
|
infilename = argv[1];
|
|
}
|
|
if (argc >= 3) {
|
|
outfilename = argv[2];
|
|
}
|
|
FILE* in = fopen(infilename, "r");
|
|
FILE* out = fopen(outfilename, "wb");
|
|
if (in == NULL || out == NULL) {
|
|
printf("Failed to open file\n");
|
|
return 1;
|
|
}
|
|
|
|
ast_node_t* root = frontend(infilename, in, (sread_fn)fread_s);
|
|
ir_prog_t* prog = gen_ir_from_ast(root);
|
|
int main_pos = gen_code(prog);
|
|
|
|
#define CRT_CODE_SIZE 16
|
|
rv32code_t gcodes[] = {
|
|
LI(REG_SP, 0x1000),
|
|
LI(REG_RA, 0x0),
|
|
CALL(0),
|
|
// Exit
|
|
ECALL_EXIT2(),
|
|
};
|
|
main_pos += (CRT_CODE_SIZE - 4) * 4;
|
|
assert(main_pos > -0xfff && main_pos < 0xfff);
|
|
rv32code_t call_main[2] = {
|
|
CALL(main_pos)
|
|
};
|
|
gcodes[4] = call_main[0];
|
|
gcodes[5] = call_main[1];
|
|
|
|
for (int i = 0; i < CRT_CODE_SIZE; i++) {
|
|
write_inst((union rv32code) {
|
|
.code = NOP(),
|
|
}, out);
|
|
}
|
|
fflush(out);
|
|
assert(CRT_CODE_SIZE >= sizeof(gcodes) / sizeof(gcodes[0]));
|
|
fseek(out, 0, SEEK_SET);
|
|
fwrite(gcodes, sizeof(gcodes), 1, out);
|
|
fflush(out);
|
|
fseek(out, CRT_CODE_SIZE * 4, SEEK_SET);
|
|
|
|
fwrite(ctx.codes.data, sizeof(ctx.codes.data[0]), ctx.codes.size, out);
|
|
fflush(out);
|
|
fclose(in);
|
|
fclose(out);
|
|
// printf("comiler end out: %s\n", outfilename);
|
|
return 0;
|
|
}
|