This commit is contained in:
ZZY
2025-04-01 00:13:21 +08:00
parent 2b4857001c
commit 74f43a1ab7
79 changed files with 2271 additions and 2861 deletions

57
ccompiler/Makefile Normal file
View File

@ -0,0 +1,57 @@
# all: cc
# # run: ccompiler
# # ./ccompiler test.c flat.bin
# # simple_test:
# # make -C tests/simple
# cc: frontend middleend backend ccompiler.c test_main.c
# gcc -g ccompiler.c test_main.c -I../ -L./frontend -lfrontend -L./middleend -lmiddleend -L./backend -lbackend -L../lib -lcore -o cc
# frontend:
# make -C ./frontend
# middleend:
# make -C ./middleend
# backend:
# make -C ./backend
# clean:
# rm -f cc
# make -C ./frontend clean
# make -C ./middleend clean
# make -C ./backend clean
# 顶层Makefile修改
CC = gcc
AR = ar
CFLAGS = -g -Wall -I..
MODULES = frontend middleend backend
FRONTEND_SUBDIRS = lexer parser parser/ast
MODULES += $(addprefix frontend/, $(FRONTEND_SUBDIRS))
MIDDLEEND_MODULES = ir
MODULES += middleend/$(MIDDLEEND_MODULES)
BACKEND_MODULES = riscv32
MODULES += backend/$(BACKEND_MODULES)
# 自动收集所有子模块源文件
EXCLUDE = test*.c
SRCS = $(filter-out $(EXCLUDE), $(wildcard $(addsuffix /*.c,$(MODULES))))
SRCS += ccompiler.c
OBJS = $(SRCS:.c=.o)
libcc.a: $(OBJS)
$(AR) rcs $@ $^
%.o: %.c
$(CC) $(CFLAGS) -c -o $@ $<
clean:
rm -f libcc.a $(OBJS)

View File

@ -0,0 +1,30 @@
# 编译器设置
CC = gcc
AR = ar
CFLAGS = -g -Wall -I../..
RISCV32_DIR = ./riscv32
# 源文件列表
SRCS = \
backend.c \
$(RISCV32_DIR)/riscv32.c
# 生成目标文件列表
OBJS = $(SRCS:.c=.o)
# 最终目标
TARGET = libbackend.a
all: $(TARGET)
$(TARGET): $(OBJS)
$(AR) rcs $@ $^
%.o: %.c
$(CC) $(CFLAGS) -c -o $@ $<
clean:
rm -f $(OBJS) $(TARGET)
.PHONY: all clean

View File

@ -0,0 +1,23 @@
#include "backend.h"
int gen_asm_from_ir(ir_prog_t* ir, cc_arch_t arch, asm_prog_t* out_asm) {
switch (arch) {
case CC_ARCH_RISCV32:
// TODO using maroc to choice
init_rv32_prog(&(out_asm->rv32), NULL);
gen_rv32_from_ir(ir, &(out_asm->rv32));
break;
case CC_ARCH_X86_32:
default:
Panic("Unsupported arch");
break;
}
return 0;
}
asm_prog_t* cc_backend(ir_prog_t* ir, cc_backend_conf_t* conf) {
// TODO
asm_prog_t* bin = (asm_prog_t*)salloc_alloc(sizeof(asm_prog_t));
gen_asm_from_ir(ir, conf->arch, bin);
return bin;
}

View File

@ -0,0 +1,30 @@
#ifndef __SMCC_CC_BACKEND_H__
#define __SMCC_CC_BACKEND_H__
// TODO Use Maroc to choice architecture
#ifndef __SMCC_CC_NO_RISCV32__
#include "riscv32/riscv32.h"
#endif
// #ifndef __SMCC_CC_NO_X86_32__
// #include "x86_32/x86_32.h"
// #endif
// TODO 统一 汇编器 接口
#include <assembler/assembler.h>
#include "../middleend/ir/ir.h"
typedef enum cc_arch {
CC_ARCH_RISCV32,
CC_ARCH_X86_32
} cc_arch_t;
typedef union asm_prog asm_prog_t;
int gen_asm_from_ir(ir_prog_t* ir, cc_arch_t arch, asm_prog_t* asm_prog);
typedef struct cc_backend_conf {
cc_arch_t arch;
} cc_backend_conf_t;
asm_prog_t* cc_backend(ir_prog_t* ir, cc_backend_conf_t* conf);
#endif

View File

@ -1,21 +0,0 @@
all: ccompiler
run: ccompiler
./ccompiler test.c flat.bin
simple_test:
make -C tests/simple
ccompiler: frontend ir
gcc -g rv32.c -I../../.. -L../../frontend -lfrontend -L../../middleend -lmiddleend -L../../../lib -lcore -o ccompiler
frontend:
make -C ../../frontend
ir:
make -C ../../middleend
clean:
rm -f ccompiler flat.bin
make -C ../../frontend clean
make -C ../../middleend clean

View File

@ -0,0 +1,41 @@
# 后端代码生成
## riscv32i
> 仿照ripes的syscall实现了rv32-vm
### syscall ecall 系统调用
```c
// ecall 系统调用函数实现
#define ECALL_PNT_INT(num) \
ADDI(REG_A0, REG_X0, num), \
ADDI(REG_A7, REG_X0, 0x1), \
ECALL(),
#define ECALL_PNT_STR(str) \
ADDI(REG_A0, REG_X0, str), \
ADDI(REG_A7, REG_X0, 0x4), \
ECALL(),
#define ECALL_EXIT(errno) \
ADDI(REG_A0, REG_X0, errno), \
ADDI(REG_A7, REG_X0, 10), \
ECALL(),
#define ECALL_SCAN_INT(int) \
ADDI(REG_A7, (1025 + 4)), \
ECALL(),
#define ECALL_SCAN_STR(str) \
ADDI(REG_A0, REG_X0, str), \
ADDI(REG_A7, REG_X0, (1025 + 5)), \
ECALL(),
// 函数声明
void ecall_pnt_int(int num);
void ecall_pnt_str(char *str);
void ecall_exit(int errno);
int ecall_scani();
void ecall_scans(char *str);
```

View File

@ -0,0 +1,301 @@
#include "riscv32.h"
#include <assembler/riscv32/riscv32_instr.h>
typedef struct {
ir_func_t* func;
int stack_offset;
int stack_base;
int func_idx;
int block_idx;
} gen_ctx_t;
static inline int stack_pos(ir_node_t* ptr, gen_ctx_t *ctx) {
// ir_func_t *func, int stack_base, int stack_offset
int offset = ctx->stack_base;
for (int i = 0; i < ctx->func->bblocks.size; i ++) {
ir_bblock_t* block = vector_at(ctx->func->bblocks, i);
for (int i = 0; i < block->instrs.size; i++) {
if (vector_at(block->instrs, i) == ptr) {
offset += i * 4;
Assert(offset >= 0 && offset < ctx->stack_offset);
return offset;
}
}
offset += block->instrs.size * 4;
}
Panic("stack pos got error");
return 0;
}
static int system_func(const char* name) {
static struct {
const char* name;
int ecall_num;
} defined_func[] = {
{"ecall_pnt_int", 1},
{"ecall_pnt_char", 11},
{"ecall_scan_int", 1025 + 4},
};
for (int i = 0; i < sizeof(defined_func)/sizeof(defined_func[0]); i++) {
if (rt_strcmp(name, defined_func[i].name) == 0) {
return defined_func[i].ecall_num;
}
}
return -1;
}
static int get_node_val(rv32_prog_t* out_asm, gen_ctx_t* ctx, ir_node_t* ptr, int reg) {
int len = 0;
switch (ptr->tag) {
case IR_NODE_CONST_INT: {
// TODO
rv32_li(out_asm, reg, ptr->data.const_int.val);
// emit_rv32_instr(out_asm, RV_ADDI, reg, reg, 0, ptr->data.const_int.val);
break;
}
default: {
int offset = stack_pos(ptr, ctx);
rv32_lw(out_asm, reg, REG_SP, offset);
break;
}
}
return len;
}
static int gen_instr(rv32_prog_t* out_asm, gen_ctx_t* ctx, ir_node_t* instr) {
int idx = 0;
int offset;
char buf[1024];
symasm_entry_t label;
switch (instr->tag) {
case IR_NODE_ALLOC: {
// TODO
break;
}
case IR_NODE_LOAD: {
offset = stack_pos(instr->data.load.target, ctx);
// t0 = M[sp + offset]
rv32_lw(out_asm, REG_T0, REG_SP, offset);
break;
}
case IR_NODE_STORE: {
idx += get_node_val(out_asm, ctx, instr->data.store.value, REG_T0);
offset = stack_pos(instr->data.store.target, ctx);
// M[sp + offset] = t0
rv32_sw(out_asm, REG_T0, REG_SP, offset);
break;
}
case IR_NODE_RET: {
// A0 = S0
if (instr->data.ret.ret_val != NULL) {
idx += get_node_val(out_asm, ctx, instr->data.ret.ret_val, REG_A0);
}
// ra = M[sp + 0]
rv32_lw(out_asm, REG_RA, REG_SP, 0);
// sp = sp + stack_offset
rv32_addi(out_asm, REG_SP, REG_SP, ctx->stack_offset);
// ret == JALR(REG_X0, REG_RA, 0)
rv32_ret(out_asm);
break;
}
case IR_NODE_OP: {
idx += get_node_val(out_asm, ctx, instr->data.op.lhs, REG_T1);
idx += get_node_val(out_asm, ctx, instr->data.op.rhs, REG_T2);
rv32_instr_t _instr = {
.rd = REG_T0,
.rs1 = REG_T1,
.rs2 = REG_T2,
.imm = 0
};
#define GEN_BIN_OP(type) _instr.instr_type = type, \
emit_rv32_instr(out_asm, &_instr, EMIT_PUSH_BACK, NULL)
switch (instr->data.op.op) {
case IR_OP_ADD:
GEN_BIN_OP(RV_ADD);
break;
case IR_OP_SUB:
GEN_BIN_OP(RV_SUB);
break;
case IR_OP_MUL:
GEN_BIN_OP(RV_MUL);
break;
case IR_OP_DIV:
GEN_BIN_OP(RV_DIV);
break;
case IR_OP_MOD:
GEN_BIN_OP(RV_REM);
break;
case IR_OP_EQ:
GEN_BIN_OP(RV_XOR);
rv32_seqz(out_asm, REG_T0, REG_T0);
break;
case IR_OP_GE:
GEN_BIN_OP(RV_SLT);
rv32_seqz(out_asm, REG_T0, REG_T0);
break;
case IR_OP_GT:
// SGT(rd, rs1, rs2) SLT(rd, rs2, rs1)
// GENCODE(SGT(REG_T0, REG_T1, REG_T2));
rv32_slt(out_asm, REG_T0, REG_T2, REG_T1);
break;
case IR_OP_LE:
// GENCODE(SGT(REG_T0, REG_T1, REG_T2));
rv32_slt(out_asm, REG_T0, REG_T2, REG_T1);
rv32_seqz(out_asm, REG_T0, REG_T0);
break;
case IR_OP_LT:
rv32_slt(out_asm, REG_T0, REG_T1, REG_T2);
break;
case IR_OP_NEQ:
GEN_BIN_OP(RV_XOR);
break;
default:
LOG_ERROR("ERROR gen_instr op in riscv");
break;
}
offset = stack_pos(instr, ctx);
rv32_sw(out_asm, REG_T0, REG_SP, offset);
break;
}
case IR_NODE_BRANCH: {
get_node_val(out_asm, ctx, instr->data.branch.cond, REG_T0);
rt.snprintf(buf, sizeof(buf), "L%s%p", instr->data.branch.true_bblock->label, instr->data.branch.true_bblock);
label.name = strpool_intern(out_asm->strpool, buf);
label.attr = LOCAL;
rv32_bne_l(out_asm, REG_T0, REG_X0, &label);
rt.snprintf(buf, sizeof(buf), "L%s%p", instr->data.branch.false_bblock->label, instr->data.branch.false_bblock);
label.name = strpool_intern(out_asm->strpool, buf);
label.attr = LOCAL;
rv32_jal_l(out_asm, REG_X0, &label);
break;
}
case IR_NODE_JUMP: {
// TODO
rt.snprintf(buf, sizeof(buf), "L%s%p", instr->data.jump.target_bblock->label, instr->data.jump.target_bblock);
label.name = strpool_intern(out_asm->strpool, buf);
label.attr = LOCAL;
rv32_jal_l(out_asm, REG_X0, &label);
break;
}
case IR_NODE_CALL: {
if (instr->data.call.args.size > 8) {
LOG_ERROR("can't add so much params");
}
int param_regs[8] = {
REG_A0, REG_A1, REG_A2, REG_A3,
REG_A4, REG_A5, REG_A6, REG_A7
};
for (int i = 0; i < instr->data.call.args.size; i++) {
ir_node_t* param = vector_at(instr->data.call.args, i);
idx += get_node_val(out_asm, ctx, param, param_regs[i]);
}
int system_func_idx = system_func(instr->data.call.callee->name);
if (system_func_idx != -1) {
rv32_li(out_asm, REG_A7, system_func_idx);
rv32_ecall(out_asm);
goto CALL_END;
}
/*
// GENCODES(CALL(0));
// AUIPC(REG_X1, REG_X0), \
// JALR(REG_X1, REG_X1, offset)
*/
// TODO CALL
label.name = strpool_intern(out_asm->strpool, instr->data.call.callee->name);
label.attr = GLOBAL;
rv32_call_l(out_asm, &label);
CALL_END:
offset = stack_pos(instr, ctx);
rv32_sw(out_asm, REG_A0, REG_SP, offset);
break;
}
default:
LOG_ERROR("ERROR gen_instr in riscv");
}
return idx;
}
static int gen_block(rv32_prog_t* out_asm, gen_ctx_t* ctx, ir_bblock_t* block) {
symasm_entry_t label;
char buf[1024];
rt.snprintf(buf, sizeof(buf), "L%s%p", block->label, block);
label.name = strpool_intern(out_asm->strpool, buf);
label.attr = LOCAL;
rv32_append_label(out_asm, &label, out_asm->text.size);
for (int i = 0; i < block->instrs.size; i ++) {
gen_instr(out_asm, ctx, vector_at(block->instrs, i));
}
return 0;
}
static int gen_func(rv32_prog_t* out_asm, ir_func_t* func) {
gen_ctx_t ctx;
symasm_entry_t label = {
.name = strpool_intern(out_asm->strpool, func->name),
.attr = GLOBAL,
};
rv32_append_label(out_asm, &label, out_asm->text.size);
int stack_base = 4;
int stack_offset = stack_base;
for (int i = 0; i < func->bblocks.size; i++) {
// TODO every instr push ret val to stack
stack_offset += 4 * (*vector_at(func->bblocks, i)).instrs.size;
}
ctx.func = func;
ctx.stack_base = stack_base;
ctx.stack_offset = stack_offset;
ctx.func_idx = 0;
ctx.block_idx = 0;
// TODO Alignment by 16
// sp = sp - stack_offset;
rv32_addi(out_asm, REG_SP, REG_SP, -stack_offset);
// M[sp] = ra;
rv32_sw(out_asm, REG_RA, REG_SP, 0);
int param_regs[8] = {
REG_A0, REG_A1, REG_A2, REG_A3,
REG_A4, REG_A5, REG_A6, REG_A7
};
if (func->params.size > 8) {
LOG_ERROR("can't add so much params");
}
for (int i = 0; i < func->params.size; i++) {
int offset = stack_pos(vector_at(func->params, i), &ctx);
// M[sp + offset] = param[idx];
rv32_sw(out_asm, param_regs[i], REG_SP, offset);
}
for(int i = 0; i < func->bblocks.size; i ++) {
gen_block(out_asm, &ctx ,vector_at(func->bblocks, i));
}
return 0;
}
int gen_rv32_from_ir(ir_prog_t* ir, rv32_prog_t* out_asm) {
init_rv32_prog(out_asm, NULL);
for(int i = 0; i < ir->funcs.size; i ++) {
gen_func(out_asm, vector_at(ir->funcs, i));
}
return 0;
// // Got Main pos;
// for (int i = 0; i < prog->funcs.size; i++) {
// if (strcmp(vector_at(prog->funcs, i)->name, "main") == 0) {
// return jmp_cache[i];
// }
// }
// LOG_ERROR("main not found");
}

View File

@ -0,0 +1,10 @@
#ifndef __SMCC_CC_RISCV32_H__
#define __SMCC_CC_RISCV32_H__
#include <assembler/assembler.h>
#include <assembler/riscv32/riscv32.h>
#include "../../middleend/ir/ir.h"
int gen_rv32_from_ir(ir_prog_t* ir, rv32_prog_t* out_asm);
#endif

View File

@ -1,464 +0,0 @@
#define RISCV_VM_BUILDIN_ECALL
#include "rv32gen.h"
#include <stdio.h>
#include <string.h>
#include <assert.h>
// 指令编码联合体(自动处理小端序)
typedef union rv32code {
uint32_t code;
uint8_t bytes[4];
} rv32code_t;
#include "../../frontend/frontend.h"
#include "../../middleend/middleend.h"
typedef struct {
int code_pos;
int to_idx;
int cur_idx;
int base_offset;
enum {
JMP_BRANCH,
JMP_JUMP,
JMP_CALL,
} type;
} jmp_t;
static struct {
vector_header(codes, rv32code_t);
int stack_offset;
int stack_base;
int tmp_reg;
ir_bblock_t* cur_block;
ir_func_t* cur_func;
ir_prog_t* prog;
vector_header(jmp, jmp_t*);
vector_header(call, jmp_t*);
int cur_func_offset;
int cur_block_offset;
} ctx;
int write_inst(union rv32code ins, FILE* fp) {
return fwrite(&ins, sizeof(union rv32code), 1, fp);
}
#define GENCODE(code) vector_push(ctx.codes, (rv32code_t)(code)); len += 4
#define GENCODES(...) do { \
rv32code_t codes[] = { \
__VA_ARGS__ \
}; \
for (int i = 0; i < sizeof(codes) / sizeof(codes[0]); i ++) { \
GENCODE(codes[i]); \
} \
} while (0)
static int stack_offset(ir_node_t* ptr) {
int offset = ctx.stack_base;
for (int i = 0; i < ctx.cur_func->bblocks.size; i ++) {
ir_bblock_t* block = vector_at(ctx.cur_func->bblocks, i);
for (int i = 0; i < block->instrs.size; i++) {
if (vector_at(block->instrs, i) == ptr) {
offset += i * 4;
assert(offset >= 0 && offset < ctx.stack_offset);
return offset;
}
}
offset += block->instrs.size * 4;
}
assert(0);
}
static int block_idx(ir_bblock_t* toblock) {
for (int i = 0; i < ctx.cur_func->bblocks.size; i ++) {
ir_bblock_t* block = vector_at(ctx.cur_func->bblocks, i);
if (toblock == block) {
return i;
}
}
assert(0);
}
static int func_idx(ir_func_t* tofunc) {
for (int i = 0; i < ctx.prog->funcs.size; i ++) {
ir_func_t* func = vector_at(ctx.prog->funcs, i);
if (tofunc == func) {
return i;
}
}
assert(0);
}
static int system_func(const char* name) {
static struct {
const char* name;
int ecall_num;
} defined_func[] = {
{"ecall_pnt_int", 1},
{"ecall_pnt_char", 11},
{"ecall_scan_int", 1025 + 4},
};
for (int i = 0; i < sizeof(defined_func)/sizeof(defined_func[0]); i++) {
if (strcmp(name, defined_func[i].name) == 0) {
return defined_func[i].ecall_num;
}
}
return -1;
}
static int get_node_val(ir_node_t* ptr, int reg) {
int len = 0;
switch (ptr->tag) {
case IR_NODE_CONST_INT: {
GENCODES(LI(reg, ptr->data.const_int.val));
break;
}
// case IR_NODE_CALL: {
// // GENCODE(SW(REG_A0, REG_SP, ctx.stack_offset));
// // GENCODE()
// // break;
// }
default: {
int offset = stack_offset(ptr);
GENCODE(LW(reg, REG_SP, offset));
break;
}
}
return len;
}
static int gen_instr(ir_bblock_t* block, ir_node_t* instr) {
int len = 0;
int offset;
switch (instr->tag) {
case IR_NODE_ALLOC: {
break;
}
case IR_NODE_LOAD: {
// S1 = *(S0 + imm)
offset = stack_offset(instr->data.load.target);
GENCODE(LW(REG_T0, REG_SP, offset));
break;
}
case IR_NODE_STORE: {
// *(S0 + imm) = S1
len += get_node_val(instr->data.store.value, REG_T0);
offset = stack_offset(instr->data.store.target);
GENCODE(SW(REG_T0, REG_SP, offset));
break;
}
case IR_NODE_RET: {
// A0 = S0
if (instr->data.ret.ret_val != NULL) {
len += get_node_val(instr->data.ret.ret_val, REG_A0);
}
GENCODE(LW(REG_RA, REG_SP, 0));
GENCODE(ADDI(REG_SP, REG_SP, ctx.stack_offset));
GENCODE(RET());
break;
}
case IR_NODE_OP: {
len += get_node_val(instr->data.op.lhs, REG_T1);
len += get_node_val(instr->data.op.rhs, REG_T2);
switch (instr->data.op.op) {
case IR_OP_ADD:
GENCODE(ADD(REG_T0, REG_T1, REG_T2));
break;
case IR_OP_SUB:
GENCODE(SUB(REG_T0, REG_T1, REG_T2));
break;
case IR_OP_MUL:
GENCODE(MUL(REG_T0, REG_T1, REG_T2));
break;
case IR_OP_DIV:
GENCODE(DIV(REG_T0, REG_T1, REG_T2));
break;
case IR_OP_MOD:
GENCODE(REM(REG_T0, REG_T1, REG_T2));
break;
case IR_OP_EQ:
GENCODE(XOR(REG_T0, REG_T1, REG_T2));
GENCODE(SEQZ(REG_T0, REG_T0));
break;
case IR_OP_GE:
GENCODE(SLT(REG_T0, REG_T1, REG_T2));
GENCODE(SEQZ(REG_T0, REG_T0));
break;
case IR_OP_GT:
GENCODE(SGT(REG_T0, REG_T1, REG_T2));
break;
case IR_OP_LE:
GENCODE(SGT(REG_T0, REG_T1, REG_T2));
GENCODE(SEQZ(REG_T0, REG_T0));
break;
case IR_OP_LT:
GENCODE(SLT(REG_T0, REG_T1, REG_T2));
break;
case IR_OP_NEQ:
GENCODE(XOR(REG_T0, REG_T1, REG_T2));
break;
default:
LOG_ERROR("ERROR gen_instr op in riscv");
break;
}
offset = stack_offset(instr);
GENCODE(SW(REG_T0, REG_SP, offset));
break;
}
case IR_NODE_BRANCH: {
len += get_node_val(instr->data.branch.cond, REG_T0);
int tidx = block_idx(instr->data.branch.true_bblock);
int fidx = block_idx(instr->data.branch.false_bblock);
int cidx = block_idx(ctx.cur_block);
jmp_t* jmp;
jmp = rt._malloc(sizeof(jmp_t));
*jmp = (jmp_t) {
.base_offset = 8,
.code_pos = ctx.codes.size,
.type = JMP_BRANCH,
.to_idx = tidx,
.cur_idx=cidx,
};
vector_push(ctx.jmp, jmp);
GENCODE(BNEZ(REG_T0, 0));
jmp = rt._malloc(sizeof(jmp_t));
*jmp = (jmp_t) {
.base_offset = 4,
.code_pos = ctx.codes.size,
.type = JMP_JUMP,
.to_idx = fidx,
.cur_idx=cidx,
};
vector_push(ctx.jmp, jmp);
GENCODE(J(0));
break;
}
case IR_NODE_JUMP: {
int idx = block_idx(instr->data.jump.target_bblock);
jmp_t* jmp = rt._malloc(sizeof(jmp_t));
*jmp = (jmp_t) {
.base_offset = 4,
.code_pos = ctx.codes.size,
.type = JMP_JUMP,
.to_idx = idx,
.cur_idx=block_idx(ctx.cur_block),
};
vector_push(ctx.jmp, jmp);
GENCODE(J(0));
break;
}
case IR_NODE_CALL: {
if (instr->data.call.args.size > 8) {
LOG_ERROR("can't add so much params");
}
int param_regs[8] = {
REG_A0, REG_A1, REG_A2, REG_A3,
REG_A4, REG_A5, REG_A6, REG_A7
};
for (int i = 0; i < instr->data.call.args.size; i++) {
ir_node_t* param = vector_at(instr->data.call.args, i);
len += get_node_val(param, param_regs[i]);
}
int system_func_idx = system_func(instr->data.call.callee->name);
if (system_func_idx != -1) {
// ecall
GENCODES(
ADDI(REG_A7, REG_X0, system_func_idx),
ECALL()
);
goto CALL_END;
}
jmp_t* jmp = rt._malloc(sizeof(jmp_t));
*jmp = (jmp_t) {
.base_offset = ctx.cur_func_offset + ctx.cur_block_offset + len,
.code_pos = ctx.codes.size,
.type = JMP_CALL,
.to_idx = func_idx(instr->data.call.callee),
.cur_idx = func_idx(ctx.cur_func),
};
vector_push(ctx.call, jmp);
GENCODES(CALL(0));
CALL_END:
offset = stack_offset(instr);
GENCODE(SW(REG_A0, REG_SP, offset));
break;
}
default:
LOG_ERROR("ERROR gen_instr in riscv");
}
return len;
}
static int gen_block(ir_bblock_t* block) {
int len = 0;
ctx.cur_block = block;
for (int i = 0; i < block->instrs.size; i ++) {
ctx.cur_block_offset = len;
len += gen_instr(block, vector_at(block->instrs, i));
}
return len;
}
static int gen_func(ir_func_t* func) {
int len = 0;
ctx.cur_func = func;
ctx.stack_base = 16;
ctx.stack_offset = ctx.stack_base;
for (int i = 0; i < func->bblocks.size; i++) {
ctx.stack_offset += 4 * (*vector_at(func->bblocks, i)).instrs.size;
}
GENCODE(ADDI(REG_SP, REG_SP, -ctx.stack_offset));
GENCODE(SW(REG_RA, REG_SP, 0));
int param_regs[8] = {
REG_A0, REG_A1, REG_A2, REG_A3,
REG_A4, REG_A5, REG_A6, REG_A7
};
if (func->params.size > 8) {
LOG_ERROR("can't add so much params");
}
for (int i = 0; i < func->params.size; i++) {
int offset = stack_offset(vector_at(func->params, i));
GENCODE(SW(param_regs[i], REG_SP, offset));
}
int jmp_cache[func->bblocks.size + 1];
if (ctx.jmp.data != NULL) vector_free(ctx.jmp);
vector_init(ctx.jmp);
jmp_cache[0] = 0;
for(int i = 0; i < func->bblocks.size; i ++) {
ctx.cur_func_offset = len;
jmp_cache[i + 1] = jmp_cache[i];
int ret = gen_block(vector_at(func->bblocks, i));
jmp_cache[i + 1] += ret;
len += ret;
}
for (int i = 0; i < ctx.jmp.size; i++) {
jmp_t* jmp = vector_at(ctx.jmp, i);
int32_t code = 0;
int offset = jmp_cache[jmp->to_idx] - (jmp_cache[jmp->cur_idx + 1] - jmp->base_offset);
if (jmp->type == JMP_JUMP) {
code = J(offset);
} else {
code = BNEZ(REG_T0, offset);
}
ctx.codes.data[jmp->code_pos] = (rv32code_t) {
.code = code,
};
}
return len;
}
static int gen_code(ir_prog_t* prog) {
ctx.prog = prog;
for (int i = 0; i < prog->extern_funcs.size; i++) {
if (system_func(prog->extern_funcs.data[i]->name) == -1) {
LOG_ERROR("func %s not defined and not a system func", prog->extern_funcs.data[i]->name);
}
}
int len = 0;
int jmp_cache[prog->funcs.size + 1];
for(int i = 0; i < prog->funcs.size; i ++) {
jmp_cache[i + 1] = jmp_cache[i];
int ret = gen_func(vector_at(prog->funcs, i));
jmp_cache[i + 1] += ret;
len += ret;
}
for (int i = 0; i < ctx.call.size; i++) {
jmp_t* jmp = vector_at(ctx.call, i);
int32_t code = 0;
// FIXME ERROR
int offset = jmp_cache[jmp->to_idx] - (jmp_cache[jmp->cur_idx] + jmp->base_offset);
assert(offset > -0xfff && offset < 0xfff);
int32_t codes[2] = {
CALL(offset)
};
for (int i = 0; i < 2; i++) {
ctx.codes.data[jmp->code_pos + i] = (rv32code_t) {
.code = codes[i],
};
}
}
// Got Main pos;
for (int i = 0; i < prog->funcs.size; i++) {
if (strcmp(vector_at(prog->funcs, i)->name, "main") == 0) {
return jmp_cache[i];
}
}
LOG_ERROR("main not found");
}
int main(int argc, char** argv) {
// gcc rv32ima_codegen.c -o rv32gen.exe
init_lib_core();
log_set_level(NULL, LOG_LEVEL_NOTSET);
const char* infilename = "test.c";
const char* outfilename = "flat.bin";
if (argc >= 2) {
infilename = argv[1];
}
if (argc >= 3) {
outfilename = argv[2];
}
FILE* in = fopen(infilename, "r");
FILE* out = fopen(outfilename, "wb");
if (in == NULL || out == NULL) {
printf("Failed to open file\n");
return 1;
}
ast_node_t* root = frontend(infilename, in, (sread_fn)fread_s);
ir_prog_t* prog = gen_ir_from_ast(root);
int main_pos = gen_code(prog);
#define CRT_CODE_SIZE 16
rv32code_t gcodes[] = {
LI(REG_SP, 0x1000),
LI(REG_RA, 0x0),
CALL(0),
// Exit
ECALL_EXIT2(),
};
main_pos += (CRT_CODE_SIZE - 4) * 4;
assert(main_pos > -0xfff && main_pos < 0xfff);
rv32code_t call_main[2] = {
CALL(main_pos)
};
gcodes[4] = call_main[0];
gcodes[5] = call_main[1];
for (int i = 0; i < CRT_CODE_SIZE; i++) {
write_inst((union rv32code) {
.code = NOP(),
}, out);
}
fflush(out);
assert(CRT_CODE_SIZE >= sizeof(gcodes) / sizeof(gcodes[0]));
fseek(out, 0, SEEK_SET);
fwrite(gcodes, sizeof(gcodes), 1, out);
fflush(out);
fseek(out, CRT_CODE_SIZE * 4, SEEK_SET);
fwrite(ctx.codes.data, sizeof(ctx.codes.data[0]), ctx.codes.size, out);
fflush(out);
fclose(in);
fclose(out);
// printf("comiler end out: %s\n", outfilename);
return 0;
}

View File

@ -1,341 +0,0 @@
#ifndef __RV32I_GEN_H__
#define __RV32I_GEN_H__
/**
31 25 24 20 19 15 14 12 11 7 6 0
imm[31:12] rd 0110111 U lui
imm[31:12] rd 0010111 U auipc
imm[20|10:1|11|19:12] rd 1101111 J jal
imm[11:0] rs1 000 rd 1100111 I jalr
imm[12|10:5] rs2 rs1 000 imm[4:1|11] 1100011 B beq
imm[12|10:5] rs2 rs1 001 imm[4:1|11] 1100011 B bne
imm[12|10:5] rs2 rs1 100 imm[4:1|11] 1100011 B blt
imm[12|10:5] rs2 rs1 101 imm[4:1|11] 1100011 B bge
imm[12|10:5] rs2 rs1 110 imm[4:1|11] 1100011 B bltu
imm[12|10:5] rs2 rs1 111 imm[4:1|11] 1100011 B bgeu
imm[11:0] rs1 000 rd 0000011 I lb
imm[11:0] rs1 001 rd 0000011 I lh
imm[11:0] rs1 010 rd 0000011 I lw
imm[11:0] rs1 100 rd 0000011 I lbu
imm[11:0] rs1 101 rd 0000011 I lhu
imm[11:5] rs2 rs1 000 imm[4:0] 0100011 S sb
imm[11:5] rs2 rs1 001 imm[4:0] 0100011 S sh
imm[11:5] rs2 rs1 010 imm[4:0] 0100011 S sw
imm[11:0] rs1 000 rd 0010011 I addi
imm[11:0] rs1 010 rd 0010011 I slti
imm[11:0] rs1 011 rd 0010011 I sltiu
imm[11:0] rs1 100 rd 0010011 I xori
imm[11:0] rs1 110 rd 0010011 I ori
imm[11:0] rs1 111 rd 0010011 I andi
0000000 shamt rs1 001 rd 0010011 I slli
0000000 shamt rs1 101 rd 0010011 I srli
0100000 shamt rs1 101 rd 0010011 I srai
0000000 rs2 rs1 000 rd 0110011 R add
0100000 rs2 rs1 000 rd 0110011 R sub
0000000 rs2 rs1 001 rd 0110011 R sll
0000000 rs2 rs1 010 rd 0110011 R slt
0000000 rs2 rs1 011 rd 0110011 R sltu
0000000 rs2 rs1 100 rd 0110011 R xor
0000000 rs2 rs1 101 rd 0110011 R srl
0100000 rs2 rs1 101 rd 0110011 R sra
0000000 rs2 rs1 110 rd 0110011 R or
0000000 rs2 rs1 111 rd 0110011 R and
0000 pred succ 00000 000 00000 0001111 I fence
0000 0000 0000 00000 001 00000 0001111 I fence.i
000000000000 00000 00 00000 1110011 I ecall
000000000000 00000 000 00000 1110011 I ebreak
csr rs1 001 rd 1110011 I csrrw
csr rs1 010 rd 1110011 I csrrs
csr rs1 011 rd 1110011 I csrrc
csr zimm 101 rd 1110011 I csrrwi
csr zimm 110 rd 1110011 I cssrrsi
csr zimm 111 rd 1110011 I csrrci
*/
#include <stdint.h>
// 寄存器枚举定义
typedef enum {
REG_X0, REG_X1, REG_X2, REG_X3, REG_X4, REG_X5, REG_X6, REG_X7,
REG_X8, REG_X9, REG_X10, REG_X11, REG_X12, REG_X13, REG_X14, REG_X15,
REG_X16, REG_X17, REG_X18, REG_X19, REG_X20, REG_X21, REG_X22, REG_X23,
REG_X24, REG_X25, REG_X26, REG_X27, REG_X28, REG_X29, REG_X30, REG_X31,
REG_ZERO = REG_X0, REG_RA = REG_X1, REG_SP = REG_X2, REG_GP = REG_X3,
REG_TP = REG_X4, REG_T0 = REG_X5, REG_T1 = REG_X6, REG_T2 = REG_X7,
REG_S0 = REG_X8, REG_S1 = REG_X9, REG_A0 = REG_X10, REG_A1 = REG_X11,
REG_A2 = REG_X12, REG_A3 = REG_X13, REG_A4 = REG_X14, REG_A5 = REG_X15,
REG_A6 = REG_X16, REG_A7 = REG_X17, REG_S2 = REG_X18, REG_S3 = REG_X19,
REG_S4 = REG_X20, REG_S5 = REG_X21, REG_S6 = REG_X22, REG_S7 = REG_X23,
REG_S8 = REG_X24, REG_S9 = REG_X25, REG_S10 = REG_X26, REG_S11 = REG_X27,
REG_T3 = REG_X28, REG_T4 = REG_X29, REG_T5 = REG_X30, REG_T6 = REG_X31,
} RV32Reg;
/******************** 立即数处理宏 ********************/
#define IMM_12BITS(imm) ((imm) & 0xFFF)
#define IMM_20BITS(imm) ((imm) & 0xFFFFF)
#define SHAMT_VAL(imm) ((imm) & 0x1F)
#define CSR_VAL(csr) ((csr) & 0xFFF)
// B型立即数编码[12|10:5|4:1|11]
#define ENCODE_B_IMM(imm) ( \
(((imm) >> 12) & 0x1) << 31 | /* imm[12:12] -> instr[31:31] */ \
(((imm) >> 5) & 0x3F) << 25 | /* imm[10:5] -> instr[30:25] */ \
(((imm) >> 1) & 0xF) << 8 | /* imm[4:1] -> instr[11:8] */ \
(((imm) >> 11) & 0x1) << 7) /* imm[11:11] -> instr[7:7] */
// J型立即数编码[20|10:1|11|19:12]W
#define ENCODE_J_IMM(imm) ( \
(((imm) >> 20) & 0x1) << 31 | /* imm[20:20] -> instr[31:31] */ \
(((imm) >> 1) & 0x3FF)<< 21 | /* imm[10:1] -> instr[30:21] */ \
(((imm) >> 11) & 0x1) << 20 | /* imm[11:11] -> instr[20:20] */ \
(((imm) >> 12) & 0xFF) << 12) /* imm[19:12] -> instr[19:12] */
/******************** 指令生成宏 ********************/
// R型指令宏
#define RV32_RTYPE(op, f3, f7, rd, rs1, rs2) (uint32_t)( \
(0x33 | ((rd) << 7) | ((f3) << 12) | ((rs1) << 15) | \
((rs2) << 20) | ((f7) << 25)) )
// I型指令宏
#define RV32_ITYPE(op, f3, rd, rs1, imm) (uint32_t)( \
(op | ((rd) << 7) | ((f3) << 12) | ((rs1) << 15) | \
(IMM_12BITS(imm) << 20)) )
// S型指令宏
#define RV32_STYPE(op, f3, rs1, rs2, imm) (uint32_t)( \
(op | ((IMM_12BITS(imm) & 0xFE0) << 20) | ((rs1) << 15) | \
((rs2) << 20) | ((f3) << 12) | ((IMM_12BITS(imm) & 0x1F) << 7)) )
// B型指令宏
#define RV32_BTYPE(op, f3, rs1, rs2, imm) (uint32_t)( \
(op | (ENCODE_B_IMM(imm)) | ((rs1) << 15) | \
((rs2) << 20) | ((f3) << 12)) )
// U型指令宏
#define RV32_UTYPE(op, rd, imm) (uint32_t)( \
(op | ((rd) << 7) | (IMM_20BITS((imm) >> 12) << 12)) )
// J型指令宏
#define RV32_JTYPE(op, rd, imm) (uint32_t)( \
(op | ((rd) << 7) | ENCODE_J_IMM(imm)) )
/******************** U-type ********************/
#define LUI(rd, imm) RV32_UTYPE(0x37, rd, imm)
#define AUIPC(rd, imm) RV32_UTYPE(0x17, rd, imm)
/******************** J-type ********************/
#define JAL(rd, imm) RV32_JTYPE(0x6F, rd, imm)
/******************** I-type ********************/
#define JALR(rd, rs1, imm) RV32_ITYPE(0x67, 0x0, rd, rs1, imm)
// Load instructions
#define LB(rd, rs1, imm) RV32_ITYPE(0x03, 0x0, rd, rs1, imm)
#define LH(rd, rs1, imm) RV32_ITYPE(0x03, 0x1, rd, rs1, imm)
#define LW(rd, rs1, imm) RV32_ITYPE(0x03, 0x2, rd, rs1, imm)
#define LBU(rd, rs1, imm) RV32_ITYPE(0x03, 0x4, rd, rs1, imm)
#define LHU(rd, rs1, imm) RV32_ITYPE(0x03, 0x5, rd, rs1, imm)
// Immediate arithmetic
#define ADDI(rd, rs1, imm) RV32_ITYPE(0x13, 0x0, rd, rs1, imm)
#define SLTI(rd, rs1, imm) RV32_ITYPE(0x13, 0x2, rd, rs1, imm)
#define SLTIU(rd, rs1, imm) RV32_ITYPE(0x13, 0x3, rd, rs1, imm)
#define XORI(rd, rs1, imm) RV32_ITYPE(0x13, 0x4, rd, rs1, imm)
#define ORI(rd, rs1, imm) RV32_ITYPE(0x13, 0x6, rd, rs1, imm)
#define ANDI(rd, rs1, imm) RV32_ITYPE(0x13, 0x7, rd, rs1, imm)
// Shift instructions
#define SLLI(rd, rs1, shamt) RV32_ITYPE(0x13, 0x1, rd, rs1, (0x00000000 | (shamt << 20)))
#define SRLI(rd, rs1, shamt) RV32_ITYPE(0x13, 0x5, rd, rs1, (0x00000000 | (shamt << 20)))
#define SRAI(rd, rs1, shamt) RV32_ITYPE(0x13, 0x5, rd, rs1, (0x40000000 | (shamt << 20)))
/******************** B-type ********************/
#define BEQ(rs1, rs2, imm) RV32_BTYPE(0x63, 0x0, rs1, rs2, imm)
#define BNE(rs1, rs2, imm) RV32_BTYPE(0x63, 0x1, rs1, rs2, imm)
#define BLT(rs1, rs2, imm) RV32_BTYPE(0x63, 0x4, rs1, rs2, imm)
#define BGE(rs1, rs2, imm) RV32_BTYPE(0x63, 0x5, rs1, rs2, imm)
#define BLTU(rs1, rs2, imm) RV32_BTYPE(0x63, 0x6, rs1, rs2, imm)
#define BGEU(rs1, rs2, imm) RV32_BTYPE(0x63, 0x7, rs1, rs2, imm)
/******************** S-type ********************/
#define SB(rs2, rs1, imm) RV32_STYPE(0x23, 0x0, rs1, rs2, imm)
#define SH(rs2, rs1, imm) RV32_STYPE(0x23, 0x1, rs1, rs2, imm)
#define SW(rs2, rs1, imm) RV32_STYPE(0x23, 0x2, rs1, rs2, imm)
/******************** R-type ********************/
#define ADD(rd, rs1, rs2) RV32_RTYPE(0x33, 0x0, 0x00, rd, rs1, rs2)
#define SUB(rd, rs1, rs2) RV32_RTYPE(0x33, 0x0, 0x20, rd, rs1, rs2)
#define SLL(rd, rs1, rs2) RV32_RTYPE(0x33, 0x1, 0x00, rd, rs1, rs2)
#define SLT(rd, rs1, rs2) RV32_RTYPE(0x33, 0x2, 0x00, rd, rs1, rs2)
#define SLTU(rd, rs1, rs2) RV32_RTYPE(0x33, 0x3, 0x00, rd, rs1, rs2)
#define XOR(rd, rs1, rs2) RV32_RTYPE(0x33, 0x4, 0x00, rd, rs1, rs2)
#define SRL(rd, rs1, rs2) RV32_RTYPE(0x33, 0x5, 0x00, rd, rs1, rs2)
#define SRA(rd, rs1, rs2) RV32_RTYPE(0x33, 0x5, 0x20, rd, rs1, rs2)
#define OR(rd, rs1, rs2) RV32_RTYPE(0x33, 0x6, 0x00, rd, rs1, rs2)
#define AND(rd, rs1, rs2) RV32_RTYPE(0x33, 0x7, 0x00, rd, rs1, rs2)
/******************** I-type (system) ********************/
#define FENCE(pred, succ) (uint32_t)( 0x0F | ((pred) << 23) | ((succ) << 27) )
#define FENCE_I() (uint32_t)( 0x100F )
#define ECALL() (uint32_t)( 0x73 )
#define EBREAK() (uint32_t)( 0x100073 )
// CSR instructions
#define CSRRW(rd, csr, rs) RV32_ITYPE(0x73, 0x1, rd, rs, CSR_VAL(csr))
#define CSRRS(rd, csr, rs) RV32_ITYPE(0x73, 0x2, rd, rs, CSR_VAL(csr))
#define CSRRC(rd, csr, rs) RV32_ITYPE(0x73, 0x3, rd, rs, CSR_VAL(csr))
#define CSRRWI(rd, csr, zimm) RV32_ITYPE(0x73, 0x5, rd, 0, (CSR_VAL(csr) | ((zimm) << 15)))
#define CSRRSI(rd, csr, zimm) RV32_ITYPE(0x73, 0x6, rd, 0, (CSR_VAL(csr) | ((zimm) << 15)))
#define CSRRCI(rd, csr, zimm) RV32_ITYPE(0x73, 0x7, rd, 0, (CSR_VAL(csr) | ((zimm) << 15)))
/* M-Extention */
#define MUL(rd, rs1, rs2) RV32_RTYPE(0x33, 0x0, 0x01, rd, rs1, rs2)
#define DIV(rd, rs1, rs2) RV32_RTYPE(0x33, 0x0, 0x05, rd, rs1, rs2)
#define REM(rd, rs1, rs2) RV32_RTYPE(0x33, 0x0, 0x07, rd, rs1, rs2)
/******************** Pseudo-instructions ********************/
// 伪指令
// nop (No operation)
#define NOP() ADDI(REG_X0, REG_X0, 0) // 无操作
// neg rd, rs (Two's complement of rs)
#define NEG(rd, rs) SUB(rd, REG_ZERO, rs) // 补码
// negw rd, rs (Two's complement word of rs)
#define NEGW(rd, rs) SUBW(rd, REG_ZERO, rs) // 字的补码
// snez rd, rs (Set if ≠ zero)
#define SNEZ(rd, rs) SLTU(rd, REG_X0, rs) // 非0则置位
// sltz rd, rs (Set if < zero)
#define SLTZ(rd, rs) SLT(rd, rs, REG_X0) // 小于0则置位
// sgtz rd, rs (Set if > zero)
#define SGTZ(rd, rs) SLT(rd, REG_X0, rs) // 大于0则置位
// beqz rs, offset (Branch if = zero)
#define BEQZ(rs, offset) BEQ(rs, REG_X0, offset) // 为0则转移
// bnez rs, offset (Branch if ≠ zero)
#define BNEZ(rs, offset) BNE(rs, REG_X0, offset) // 非0则转移
// blez rs, offset (Branch if ≤ zero)
#define BLEZ(rs, offset) BGE(REG_X0, rs, offset) // 小于等于0则转移
// bgez rs, offset (Branch if ≥ zero)
#define BGEZ(rs, offset) BGE(rs, REG_X0, offset) // 大于等于0则转移
// bltz rs, offset (Branch if < zero)
#define BLTZ(rs, offset) BLT(rs, REG_X0, offset) // 小于0则转移
// bgtz rs, offset (Branch if > zero)
#define BGTZ(rs, offset) BLT(REG_X0, rs, offset) // 大于0则转移
// j offset (Jump)
#define J(offset) JAL(REG_X0, offset) // 跳转
// jr rs (Jump register)
#define JR(rs) JALR(REG_X0, rs, 0) // 寄存器跳转
// ret (Return from subroutine)
#define RET() JALR(REG_X0, REG_RA, 0) // 从子过程返回
// tail offset (Tail call far-away subroutine)
#define TAIL_2(offset) AUIPC(REG_X6, offset), JAL(REG_X0, REG_X6, offset) // 尾调用远程子过程, 有2条指令
#define TAIL(offset) TAIL_2(offset) // Warning this have 2 instructions
// csrr csr, rd (Read CSR)
#define CSRR(csr, rd) CSRRS(rd, csr, REG_X0) // 读CSR寄存器
// csrw csr, rs (Write CSR)
#define CSR W(csr, rs) CSRRW(csr, REG_X0, rs) // 写CSR寄存器
// csrs csr, rs (Set bits in CSR)
#define CSRS(csr, rs) CSRRS(REG_X0, csr, rs) // CSR寄存器置零位
// csrrc csr, rs (Clear bits in CSR)
#define CSRC(csr, rs) CSRRC(REG_X0, csr, rs) // CSR寄存器清
// csrci csr, imm (Immediate clear bits in CSR)
#define CSRCI(csr, imm) CSRRCI(REG_X0, csr, imm) // 立即数清除CSR
// csrrwi csr, imm (Write CSR immediate)
#define CSRRWI2(csr, imm) CSRRWI(REG_X0, csr, imm) // 立即数写入CSR
// csrrsi csr, imm (Immediate set bits in CSR)
#define CSRRSI2(csr, imm) CSRRSI(REG_X0, csr, imm) // 立即数置位CSR
// csrrci csr, imm (Immediate clear bits in CSR)
#define CSRRCI2(csr, imm) CSRRCI(REG_X0, csr, imm) // 立即数清除CSR
// // frcsr rd (Read FP control/status register)
// #define FRC SR(rd) CSRRS(rd, FCSR, REG_X0) // 读取FP控制/状态寄存器
// // fscsr rs (Write FP control/status register)
// #define FSCSR(rs) CSRRW(REG_X0, FCSR, rs) // 写入FP控制/状态寄存器
// // frrm rd (Read FP rounding mode)
// #define FRRM(rd) CSRRS(rd, FRM, REG_X0) // 读取FP舍入模式
// // fsrm rs (Write FP rounding mode)
// #define FS RM(rs) CSRRW(REG_X0, FRM, rs) // 写入FP舍入模式
// // frflags rd (Read FP exception flags)
// #define FRFLAGS(rd) CSRRS(rd, FFLAGS, REG_X0) // 读取FP例外标志
// // fsflags rs (Write FP exception flags)
// #define FS FLAGS(rs) CSRRW(REG_X0, FFLAGS, rs) // 写入FP例外标志
// Myriad sequences
#define LI(rd, num) \
LUI(rd, num), \
ADDI(rd, rd, num)
#define MV(rd, rs) ADDI(rd, rs, 0)
#define NOT(rd, rs) XORI(rd, rs, -1)
#define SEQZ(rd, rs) SLTIU(rd, rs, 1)
#define SGT(rd, rs1, rs2) SLT(rd, rs2, rs1)
// TODO call have error when outof jalr
#define CALL(offset) \
AUIPC(REG_X1, REG_X0), \
JALR(REG_X1, REG_X1, offset)
#define CALL_ABS(addr) \
AUIPC(REG_X0, addr), \
JALR(REG_X1, REG_X0, addr)
#ifdef RISCV_VM_BUILDIN_ECALL
#define ECALL_PNT_INT(num) \
ADDI(REG_A0, REG_X0, num), \
ADDI(REG_A7, REG_X0, 0x1), \
ECALL()
#define ECALL_PNT_STR(str) \
ADDI(REG_A0, REG_X0, str), \
ADDI(REG_A7, REG_X0, 0x4), \
ECALL()
#define ECALL_EXIT2() \
ADDI(REG_A7, REG_X0, 93), \
ECALL()
#define ECALL_EXIT_ARG(errno) \
ADDI(REG_A0, REG_X0, errno), \
ECALL_EXIT2()
#define ECALL_EXIT() \
ADDI(REG_A7, REG_X0, 93), \
ECALL()
#define ECALL_SCAN_INT(int) \
ADDI(REG_A7, (1025 + 4)), \
ECALL()
#define ECALL_SCAN_STR(str) \
ADDI(REG_A0, REG_X0, str), \
ADDI(REG_A7, REG_X0, (1025 + 5)), \
ECALL()
#endif
#endif

View File

@ -1,8 +0,0 @@
CC = gcc
CFLAGS = -g -Wall
all = rv32-vm
CFLAGS += -DDEFAULT_FILE='\"flat.bin\"'
rv32-vm:
$(CC) $(CFLAGS) -g -o rv32-vm .\ripes-vm.c

View File

@ -1,520 +0,0 @@
// Copyright 2022 Charles Lohr, you may use this file or any portions herein under any of the BSD, MIT, or CC0 licenses.
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include "default64mbdtc.h"
// Just default RAM amount is 64MB.
uint32_t ram_amt = 64*1024*1024;
int fail_on_all_faults = 0;
static int64_t SimpleReadNumberInt( const char * number, int64_t defaultNumber );
static uint64_t GetTimeMicroseconds();
static void ResetKeyboardInput();
static void CaptureKeyboardInput();
static uint32_t HandleException( uint32_t ir, uint32_t retval );
static uint32_t HandleControlStore( uint32_t addy, uint32_t val );
static uint32_t HandleControlLoad( uint32_t addy );
static void HandleOtherCSRWrite( uint8_t * image, uint16_t csrno, uint32_t value );
static int32_t HandleOtherCSRRead( uint8_t * image, uint16_t csrno );
static void MiniSleep();
static int IsKBHit();
static int ReadKBByte();
// This is the functionality we want to override in the emulator.
// think of this as the way the emulator's processor is connected to the outside world.
#define MINIRV32WARN( x... ) printf( x );
#define MINIRV32_DECORATE static
#define MINI_RV32_RAM_SIZE ram_amt
#define MINIRV32_IMPLEMENTATION
#define MINIRV32_POSTEXEC( pc, ir, retval ) { if( retval > 0 ) { if( fail_on_all_faults ) { printf( "FAULT\n" ); return 3; } else retval = HandleException( ir, retval ); } }
#define MINIRV32_HANDLE_MEM_STORE_CONTROL( addy, val ) if( HandleControlStore( addy, val ) ) return val;
#define MINIRV32_HANDLE_MEM_LOAD_CONTROL( addy, rval ) rval = HandleControlLoad( addy );
#define MINIRV32_OTHERCSR_WRITE( csrno, value ) HandleOtherCSRWrite( image, csrno, value );
#define MINIRV32_OTHERCSR_READ( csrno, value ) value = HandleOtherCSRRead( image, csrno );
#include "mini-rv32ima.h"
uint8_t * ram_image = 0;
struct MiniRV32IMAState * core;
const char * kernel_command_line = 0;
static void DumpState( struct MiniRV32IMAState * core, uint8_t * ram_image );
int main( int argc, char ** argv )
{
int i;
long long instct = -1;
int show_help = 0;
int time_divisor = 1;
int fixed_update = 0;
int do_sleep = 1;
int single_step = 0;
int dtb_ptr = 0;
const char * image_file_name = 0;
const char * dtb_file_name = 0;
for( i = 1; i < argc; i++ )
{
const char * param = argv[i];
int param_continue = 0; // Can combine parameters, like -lpt x
do
{
if( param[0] == '-' || param_continue )
{
switch( param[1] )
{
case 'm': if( ++i < argc ) ram_amt = SimpleReadNumberInt( argv[i], ram_amt ); break;
case 'c': if( ++i < argc ) instct = SimpleReadNumberInt( argv[i], -1 ); break;
case 'k': if( ++i < argc ) kernel_command_line = argv[i]; break;
case 'f': image_file_name = (++i<argc)?argv[i]:0; break;
case 'b': dtb_file_name = (++i<argc)?argv[i]:0; break;
case 'l': param_continue = 1; fixed_update = 1; break;
case 'p': param_continue = 1; do_sleep = 0; break;
case 's': param_continue = 1; single_step = 1; break;
case 'd': param_continue = 1; fail_on_all_faults = 1; break;
case 't': if( ++i < argc ) time_divisor = SimpleReadNumberInt( argv[i], 1 ); break;
default:
if( param_continue )
param_continue = 0;
else
show_help = 1;
break;
}
}
else
{
show_help = 1;
break;
}
param++;
} while( param_continue );
}
if( show_help || image_file_name == 0 || time_divisor <= 0 )
{
fprintf( stderr, "./mini-rv32imaf [parameters]\n\t-m [ram amount]\n\t-f [running image]\n\t-k [kernel command line]\n\t-b [dtb file, or 'disable']\n\t-c instruction count\n\t-s single step with full processor state\n\t-t time divion base\n\t-l lock time base to instruction count\n\t-p disable sleep when wfi\n\t-d fail out immediately on all faults\n" );
return 1;
}
ram_image = malloc( ram_amt );
if( !ram_image )
{
fprintf( stderr, "Error: could not allocate system image.\n" );
return -4;
}
restart:
{
FILE * f = fopen( image_file_name, "rb" );
if( !f || ferror( f ) )
{
fprintf( stderr, "Error: \"%s\" not found\n", image_file_name );
return -5;
}
fseek( f, 0, SEEK_END );
long flen = ftell( f );
fseek( f, 0, SEEK_SET );
if( flen > ram_amt )
{
fprintf( stderr, "Error: Could not fit RAM image (%ld bytes) into %d\n", flen, ram_amt );
return -6;
}
memset( ram_image, 0, ram_amt );
if( fread( ram_image, flen, 1, f ) != 1)
{
fprintf( stderr, "Error: Could not load image.\n" );
return -7;
}
fclose( f );
if( dtb_file_name )
{
if( strcmp( dtb_file_name, "disable" ) == 0 )
{
// No DTB reading.
}
else
{
f = fopen( dtb_file_name, "rb" );
if( !f || ferror( f ) )
{
fprintf( stderr, "Error: \"%s\" not found\n", dtb_file_name );
return -5;
}
fseek( f, 0, SEEK_END );
long dtblen = ftell( f );
fseek( f, 0, SEEK_SET );
dtb_ptr = ram_amt - dtblen - sizeof( struct MiniRV32IMAState );
if( fread( ram_image + dtb_ptr, dtblen, 1, f ) != 1 )
{
fprintf( stderr, "Error: Could not open dtb \"%s\"\n", dtb_file_name );
return -9;
}
fclose( f );
}
}
else
{
// Load a default dtb.
dtb_ptr = ram_amt - sizeof(default64mbdtb) - sizeof( struct MiniRV32IMAState );
memcpy( ram_image + dtb_ptr, default64mbdtb, sizeof( default64mbdtb ) );
if( kernel_command_line )
{
strncpy( (char*)( ram_image + dtb_ptr + 0xc0 ), kernel_command_line, 54 );
}
}
}
CaptureKeyboardInput();
// The core lives at the end of RAM.
core = (struct MiniRV32IMAState *)(ram_image + ram_amt - sizeof( struct MiniRV32IMAState ));
core->pc = MINIRV32_RAM_IMAGE_OFFSET;
core->regs[10] = 0x00; //hart ID
core->regs[11] = dtb_ptr?(dtb_ptr+MINIRV32_RAM_IMAGE_OFFSET):0; //dtb_pa (Must be valid pointer) (Should be pointer to dtb)
core->extraflags |= 3; // Machine-mode.
if( dtb_file_name == 0 )
{
// Update system ram size in DTB (but if and only if we're using the default DTB)
// Warning - this will need to be updated if the skeleton DTB is ever modified.
uint32_t * dtb = (uint32_t*)(ram_image + dtb_ptr);
if( dtb[0x13c/4] == 0x00c0ff03 )
{
uint32_t validram = dtb_ptr;
dtb[0x13c/4] = (validram>>24) | ((( validram >> 16 ) & 0xff) << 8 ) | (((validram>>8) & 0xff ) << 16 ) | ( ( validram & 0xff) << 24 );
}
}
// Image is loaded.
uint64_t rt;
uint64_t lastTime = (fixed_update)?0:(GetTimeMicroseconds()/time_divisor);
int instrs_per_flip = single_step?1:1024;
for( rt = 0; rt < instct+1 || instct < 0; rt += instrs_per_flip )
{
uint64_t * this_ccount = ((uint64_t*)&core->cyclel);
uint32_t elapsedUs = 0;
if( fixed_update )
elapsedUs = *this_ccount / time_divisor - lastTime;
else
elapsedUs = GetTimeMicroseconds()/time_divisor - lastTime;
lastTime += elapsedUs;
if( single_step )
DumpState( core, ram_image);
int ret = MiniRV32IMAStep( core, ram_image, 0, elapsedUs, instrs_per_flip ); // Execute upto 1024 cycles before breaking out.
switch( ret )
{
case 0: break;
case 1: if( do_sleep ) MiniSleep(); *this_ccount += instrs_per_flip; break;
case 3: instct = 0; break;
case 0x7777: goto restart; //syscon code for restart
case 0x5555: printf( "POWEROFF@0x%08x%08x\n", core->cycleh, core->cyclel ); return 0; //syscon code for power-off
default: printf( "Unknown failure\n" ); break;
}
}
DumpState( core, ram_image);
}
//////////////////////////////////////////////////////////////////////////
// Platform-specific functionality
//////////////////////////////////////////////////////////////////////////
#if defined(WINDOWS) || defined(WIN32) || defined(_WIN32)
#include <windows.h>
#include <conio.h>
#define strtoll _strtoi64
static void CaptureKeyboardInput()
{
system(""); // Poorly documented tick: Enable VT100 Windows mode.
}
static void ResetKeyboardInput()
{
}
static void MiniSleep()
{
Sleep(1);
}
static uint64_t GetTimeMicroseconds()
{
static LARGE_INTEGER lpf;
LARGE_INTEGER li;
if( !lpf.QuadPart )
QueryPerformanceFrequency( &lpf );
QueryPerformanceCounter( &li );
return ((uint64_t)li.QuadPart * 1000000LL) / (uint64_t)lpf.QuadPart;
}
static int IsKBHit()
{
return _kbhit();
}
static int ReadKBByte()
{
// This code is kind of tricky, but used to convert windows arrow keys
// to VT100 arrow keys.
static int is_escape_sequence = 0;
int r;
if( is_escape_sequence == 1 )
{
is_escape_sequence++;
return '[';
}
r = _getch();
if( is_escape_sequence )
{
is_escape_sequence = 0;
switch( r )
{
case 'H': return 'A'; // Up
case 'P': return 'B'; // Down
case 'K': return 'D'; // Left
case 'M': return 'C'; // Right
case 'G': return 'H'; // Home
case 'O': return 'F'; // End
default: return r; // Unknown code.
}
}
else
{
switch( r )
{
case 13: return 10; //cr->lf
case 224: is_escape_sequence = 1; return 27; // Escape arrow keys
default: return r;
}
}
}
#else
#include <sys/ioctl.h>
#include <termios.h>
#include <unistd.h>
#include <signal.h>
#include <sys/time.h>
static void CtrlC()
{
DumpState( core, ram_image);
exit( 0 );
}
// Override keyboard, so we can capture all keyboard input for the VM.
static void CaptureKeyboardInput()
{
// Hook exit, because we want to re-enable keyboard.
atexit(ResetKeyboardInput);
signal(SIGINT, CtrlC);
struct termios term;
tcgetattr(0, &term);
term.c_lflag &= ~(ICANON | ECHO); // Disable echo as well
tcsetattr(0, TCSANOW, &term);
}
static void ResetKeyboardInput()
{
// Re-enable echo, etc. on keyboard.
struct termios term;
tcgetattr(0, &term);
term.c_lflag |= ICANON | ECHO;
tcsetattr(0, TCSANOW, &term);
}
static void MiniSleep()
{
usleep(500);
}
static uint64_t GetTimeMicroseconds()
{
struct timeval tv;
gettimeofday( &tv, 0 );
return tv.tv_usec + ((uint64_t)(tv.tv_sec)) * 1000000LL;
}
static int is_eofd;
static int ReadKBByte()
{
if( is_eofd ) return 0xffffffff;
char rxchar = 0;
int rread = read(fileno(stdin), (char*)&rxchar, 1);
if( rread > 0 ) // Tricky: getchar can't be used with arrow keys.
return rxchar;
else
return -1;
}
static int IsKBHit()
{
if( is_eofd ) return -1;
int byteswaiting;
ioctl(0, FIONREAD, &byteswaiting);
if( !byteswaiting && write( fileno(stdin), 0, 0 ) != 0 ) { is_eofd = 1; return -1; } // Is end-of-file for
return !!byteswaiting;
}
#endif
//////////////////////////////////////////////////////////////////////////
// Rest of functions functionality
//////////////////////////////////////////////////////////////////////////
static uint32_t HandleException( uint32_t ir, uint32_t code )
{
// Weird opcode emitted by duktape on exit.
if( code == 3 )
{
// Could handle other opcodes here.
}
return code;
}
static uint32_t HandleControlStore( uint32_t addy, uint32_t val )
{
if( addy == 0x10000000 ) //UART 8250 / 16550 Data Buffer
{
printf( "%c", val );
fflush( stdout );
}
else if( addy == 0x11004004 ) //CLNT
core->timermatchh = val;
else if( addy == 0x11004000 ) //CLNT
core->timermatchl = val;
else if( addy == 0x11100000 ) //SYSCON (reboot, poweroff, etc.)
{
core->pc = core->pc + 4;
return val; // NOTE: PC will be PC of Syscon.
}
return 0;
}
static uint32_t HandleControlLoad( uint32_t addy )
{
// Emulating a 8250 / 16550 UART
if( addy == 0x10000005 )
return 0x60 | IsKBHit();
else if( addy == 0x10000000 && IsKBHit() )
return ReadKBByte();
else if( addy == 0x1100bffc ) // https://chromitem-soc.readthedocs.io/en/latest/clint.html
return core->timerh;
else if( addy == 0x1100bff8 )
return core->timerl;
return 0;
}
static void HandleOtherCSRWrite( uint8_t * image, uint16_t csrno, uint32_t value )
{
if( csrno == 0x136 )
{
printf( "%d", value ); fflush( stdout );
}
if( csrno == 0x137 )
{
printf( "%08x", value ); fflush( stdout );
}
else if( csrno == 0x138 )
{
//Print "string"
uint32_t ptrstart = value - MINIRV32_RAM_IMAGE_OFFSET;
uint32_t ptrend = ptrstart;
if( ptrstart >= ram_amt )
printf( "DEBUG PASSED INVALID PTR (%08x)\n", value );
while( ptrend < ram_amt )
{
if( image[ptrend] == 0 ) break;
ptrend++;
}
if( ptrend != ptrstart )
fwrite( image + ptrstart, ptrend - ptrstart, 1, stdout );
}
else if( csrno == 0x139 )
{
putchar( value ); fflush( stdout );
}
}
static int32_t HandleOtherCSRRead( uint8_t * image, uint16_t csrno )
{
if( csrno == 0x140 )
{
if( !IsKBHit() ) return -1;
return ReadKBByte();
}
return 0;
}
static int64_t SimpleReadNumberInt( const char * number, int64_t defaultNumber )
{
if( !number || !number[0] ) return defaultNumber;
int radix = 10;
if( number[0] == '0' )
{
char nc = number[1];
number+=2;
if( nc == 0 ) return 0;
else if( nc == 'x' ) radix = 16;
else if( nc == 'b' ) radix = 2;
else { number--; radix = 8; }
}
char * endptr;
uint64_t ret = strtoll( number, &endptr, radix );
if( endptr == number )
{
return defaultNumber;
}
else
{
return ret;
}
}
static void DumpState( struct MiniRV32IMAState * core, uint8_t * ram_image )
{
uint32_t pc = core->pc;
uint32_t pc_offset = pc - MINIRV32_RAM_IMAGE_OFFSET;
uint32_t ir = 0;
printf( "PC: %08x ", pc );
if( pc_offset >= 0 && pc_offset < ram_amt - 3 )
{
ir = *((uint32_t*)(&((uint8_t*)ram_image)[pc_offset]));
printf( "[0x%08x] ", ir );
}
else
printf( "[xxxxxxxxxx] " );
uint32_t * regs = core->regs;
printf( "Z:%08x ra:%08x sp:%08x gp:%08x tp:%08x t0:%08x t1:%08x t2:%08x s0:%08x s1:%08x a0:%08x a1:%08x a2:%08x a3:%08x a4:%08x a5:%08x ",
regs[0], regs[1], regs[2], regs[3], regs[4], regs[5], regs[6], regs[7],
regs[8], regs[9], regs[10], regs[11], regs[12], regs[13], regs[14], regs[15] );
printf( "a6:%08x a7:%08x s2:%08x s3:%08x s4:%08x s5:%08x s6:%08x s7:%08x s8:%08x s9:%08x s10:%08x s11:%08x t3:%08x t4:%08x t5:%08x t6:%08x\n",
regs[16], regs[17], regs[18], regs[19], regs[20], regs[21], regs[22], regs[23],
regs[24], regs[25], regs[26], regs[27], regs[28], regs[29], regs[30], regs[31] );
}

View File

@ -1,547 +0,0 @@
// Copyright 2022 Charles Lohr, you may use this file or any portions herein under any of the BSD, MIT, or CC0 licenses.
#ifndef _MINI_RV32IMAH_H
#define _MINI_RV32IMAH_H
/**
To use mini-rv32ima.h for the bare minimum, the following:
#define MINI_RV32_RAM_SIZE ram_amt
#define MINIRV32_IMPLEMENTATION
#include "mini-rv32ima.h"
Though, that's not _that_ interesting. You probably want I/O!
Notes:
* There is a dedicated CLNT at 0x10000000.
* There is free MMIO from there to 0x12000000.
* You can put things like a UART, or whatever there.
* Feel free to override any of the functionality with macros.
*/
#ifndef MINIRV32WARN
#define MINIRV32WARN( x... );
#endif
#ifndef MINIRV32_DECORATE
#define MINIRV32_DECORATE static
#endif
#ifndef MINIRV32_RAM_IMAGE_OFFSET
#define MINIRV32_RAM_IMAGE_OFFSET 0x80000000
#endif
#ifndef MINIRV32_MMIO_RANGE
#define MINIRV32_MMIO_RANGE(n) (0x10000000 <= (n) && (n) < 0x12000000)
#endif
#ifndef MINIRV32_POSTEXEC
#define MINIRV32_POSTEXEC(...);
#endif
#ifndef MINIRV32_HANDLE_MEM_STORE_CONTROL
#define MINIRV32_HANDLE_MEM_STORE_CONTROL(...);
#endif
#ifndef MINIRV32_HANDLE_MEM_LOAD_CONTROL
#define MINIRV32_HANDLE_MEM_LOAD_CONTROL(...);
#endif
#ifndef MINIRV32_OTHERCSR_WRITE
#define MINIRV32_OTHERCSR_WRITE(...);
#endif
#ifndef MINIRV32_OTHERCSR_READ
#define MINIRV32_OTHERCSR_READ(...);
#endif
#ifndef MINIRV32_CUSTOM_MEMORY_BUS
#define MINIRV32_STORE4( ofs, val ) *(uint32_t*)(image + ofs) = val
#define MINIRV32_STORE2( ofs, val ) *(uint16_t*)(image + ofs) = val
#define MINIRV32_STORE1( ofs, val ) *(uint8_t*)(image + ofs) = val
#define MINIRV32_LOAD4( ofs ) *(uint32_t*)(image + ofs)
#define MINIRV32_LOAD2( ofs ) *(uint16_t*)(image + ofs)
#define MINIRV32_LOAD1( ofs ) *(uint8_t*)(image + ofs)
#define MINIRV32_LOAD2_SIGNED( ofs ) *(int16_t*)(image + ofs)
#define MINIRV32_LOAD1_SIGNED( ofs ) *(int8_t*)(image + ofs)
#endif
// As a note: We quouple-ify these, because in HLSL, we will be operating with
// uint4's. We are going to uint4 data to/from system RAM.
//
// We're going to try to keep the full processor state to 12 x uint4.
struct MiniRV32IMAState
{
uint32_t regs[32];
uint32_t pc;
uint32_t mstatus;
uint32_t cyclel;
uint32_t cycleh;
uint32_t timerl;
uint32_t timerh;
uint32_t timermatchl;
uint32_t timermatchh;
uint32_t mscratch;
uint32_t mtvec;
uint32_t mie;
uint32_t mip;
uint32_t mepc;
uint32_t mtval;
uint32_t mcause;
// Note: only a few bits are used. (Machine = 3, User = 0)
// Bits 0..1 = privilege.
// Bit 2 = WFI (Wait for interrupt)
// Bit 3+ = Load/Store reservation LSBs.
uint32_t extraflags;
};
#ifndef MINIRV32_STEPPROTO
MINIRV32_DECORATE int32_t MiniRV32IMAStep( struct MiniRV32IMAState * state, uint8_t * image, uint32_t vProcAddress, uint32_t elapsedUs, int count );
#endif
#ifdef MINIRV32_IMPLEMENTATION
#ifndef MINIRV32_CUSTOM_INTERNALS
#define CSR( x ) state->x
#define SETCSR( x, val ) { state->x = val; }
#define REG( x ) state->regs[x]
#define REGSET( x, val ) { state->regs[x] = val; }
#endif
#ifndef MINIRV32_STEPPROTO
MINIRV32_DECORATE int32_t MiniRV32IMAStep( struct MiniRV32IMAState * state, uint8_t * image, uint32_t vProcAddress, uint32_t elapsedUs, int count )
#else
MINIRV32_STEPPROTO
#endif
{
uint32_t new_timer = CSR( timerl ) + elapsedUs;
if( new_timer < CSR( timerl ) ) CSR( timerh )++;
CSR( timerl ) = new_timer;
// Handle Timer interrupt.
if( ( CSR( timerh ) > CSR( timermatchh ) || ( CSR( timerh ) == CSR( timermatchh ) && CSR( timerl ) > CSR( timermatchl ) ) ) && ( CSR( timermatchh ) || CSR( timermatchl ) ) )
{
CSR( extraflags ) &= ~4; // Clear WFI
CSR( mip ) |= 1<<7; //MTIP of MIP // https://stackoverflow.com/a/61916199/2926815 Fire interrupt.
}
else
CSR( mip ) &= ~(1<<7);
// If WFI, don't run processor.
if( CSR( extraflags ) & 4 )
return 1;
uint32_t trap = 0;
uint32_t rval = 0;
uint32_t pc = CSR( pc );
uint32_t cycle = CSR( cyclel );
if( ( CSR( mip ) & (1<<7) ) && ( CSR( mie ) & (1<<7) /*mtie*/ ) && ( CSR( mstatus ) & 0x8 /*mie*/) )
{
// Timer interrupt.
trap = 0x80000007;
pc -= 4;
}
else // No timer interrupt? Execute a bunch of instructions.
for( int icount = 0; icount < count; icount++ )
{
uint32_t ir = 0;
rval = 0;
cycle++;
uint32_t ofs_pc = pc - MINIRV32_RAM_IMAGE_OFFSET;
if( ofs_pc >= MINI_RV32_RAM_SIZE )
{
trap = 1 + 1; // Handle access violation on instruction read.
break;
}
else if( ofs_pc & 3 )
{
trap = 1 + 0; //Handle PC-misaligned access
break;
}
else
{
ir = MINIRV32_LOAD4( ofs_pc );
uint32_t rdid = (ir >> 7) & 0x1f;
switch( ir & 0x7f )
{
case 0x37: // LUI (0b0110111)
rval = ( ir & 0xfffff000 );
break;
case 0x17: // AUIPC (0b0010111)
rval = pc + ( ir & 0xfffff000 );
break;
case 0x6F: // JAL (0b1101111)
{
int32_t reladdy = ((ir & 0x80000000)>>11) | ((ir & 0x7fe00000)>>20) | ((ir & 0x00100000)>>9) | ((ir&0x000ff000));
if( reladdy & 0x00100000 ) reladdy |= 0xffe00000; // Sign extension.
rval = pc + 4;
pc = pc + reladdy - 4;
break;
}
case 0x67: // JALR (0b1100111)
{
uint32_t imm = ir >> 20;
int32_t imm_se = imm | (( imm & 0x800 )?0xfffff000:0);
rval = pc + 4;
pc = ( (REG( (ir >> 15) & 0x1f ) + imm_se) & ~1) - 4;
break;
}
case 0x63: // Branch (0b1100011)
{
uint32_t immm4 = ((ir & 0xf00)>>7) | ((ir & 0x7e000000)>>20) | ((ir & 0x80) << 4) | ((ir >> 31)<<12);
if( immm4 & 0x1000 ) immm4 |= 0xffffe000;
int32_t rs1 = REG((ir >> 15) & 0x1f);
int32_t rs2 = REG((ir >> 20) & 0x1f);
immm4 = pc + immm4 - 4;
rdid = 0;
switch( ( ir >> 12 ) & 0x7 )
{
// BEQ, BNE, BLT, BGE, BLTU, BGEU
case 0: if( rs1 == rs2 ) pc = immm4; break;
case 1: if( rs1 != rs2 ) pc = immm4; break;
case 4: if( rs1 < rs2 ) pc = immm4; break;
case 5: if( rs1 >= rs2 ) pc = immm4; break; //BGE
case 6: if( (uint32_t)rs1 < (uint32_t)rs2 ) pc = immm4; break; //BLTU
case 7: if( (uint32_t)rs1 >= (uint32_t)rs2 ) pc = immm4; break; //BGEU
default: trap = (2+1);
}
break;
}
case 0x03: // Load (0b0000011)
{
uint32_t rs1 = REG((ir >> 15) & 0x1f);
uint32_t imm = ir >> 20;
int32_t imm_se = imm | (( imm & 0x800 )?0xfffff000:0);
uint32_t rsval = rs1 + imm_se;
rsval -= MINIRV32_RAM_IMAGE_OFFSET;
if( rsval >= MINI_RV32_RAM_SIZE-3 )
{
rsval += MINIRV32_RAM_IMAGE_OFFSET;
if( MINIRV32_MMIO_RANGE( rsval ) ) // UART, CLNT
{
MINIRV32_HANDLE_MEM_LOAD_CONTROL( rsval, rval );
}
else
{
trap = (5+1);
rval = rsval;
}
}
else
{
switch( ( ir >> 12 ) & 0x7 )
{
//LB, LH, LW, LBU, LHU
case 0: rval = MINIRV32_LOAD1_SIGNED( rsval ); break;
case 1: rval = MINIRV32_LOAD2_SIGNED( rsval ); break;
case 2: rval = MINIRV32_LOAD4( rsval ); break;
case 4: rval = MINIRV32_LOAD1( rsval ); break;
case 5: rval = MINIRV32_LOAD2( rsval ); break;
default: trap = (2+1);
}
}
break;
}
case 0x23: // Store 0b0100011
{
uint32_t rs1 = REG((ir >> 15) & 0x1f);
uint32_t rs2 = REG((ir >> 20) & 0x1f);
uint32_t addy = ( ( ir >> 7 ) & 0x1f ) | ( ( ir & 0xfe000000 ) >> 20 );
if( addy & 0x800 ) addy |= 0xfffff000;
addy += rs1 - MINIRV32_RAM_IMAGE_OFFSET;
rdid = 0;
if( addy >= MINI_RV32_RAM_SIZE-3 )
{
addy += MINIRV32_RAM_IMAGE_OFFSET;
if( MINIRV32_MMIO_RANGE( addy ) )
{
MINIRV32_HANDLE_MEM_STORE_CONTROL( addy, rs2 );
}
else
{
trap = (7+1); // Store access fault.
rval = addy;
}
}
else
{
switch( ( ir >> 12 ) & 0x7 )
{
//SB, SH, SW
case 0: MINIRV32_STORE1( addy, rs2 ); break;
case 1: MINIRV32_STORE2( addy, rs2 ); break;
case 2: MINIRV32_STORE4( addy, rs2 ); break;
default: trap = (2+1);
}
}
break;
}
case 0x13: // Op-immediate 0b0010011
case 0x33: // Op 0b0110011
{
uint32_t imm = ir >> 20;
imm = imm | (( imm & 0x800 )?0xfffff000:0);
uint32_t rs1 = REG((ir >> 15) & 0x1f);
uint32_t is_reg = !!( ir & 0x20 );
uint32_t rs2 = is_reg ? REG(imm & 0x1f) : imm;
if( is_reg && ( ir & 0x02000000 ) )
{
switch( (ir>>12)&7 ) //0x02000000 = RV32M
{
case 0: rval = rs1 * rs2; break; // MUL
#ifndef CUSTOM_MULH // If compiling on a system that doesn't natively, or via libgcc support 64-bit math.
case 1: rval = ((int64_t)((int32_t)rs1) * (int64_t)((int32_t)rs2)) >> 32; break; // MULH
case 2: rval = ((int64_t)((int32_t)rs1) * (uint64_t)rs2) >> 32; break; // MULHSU
case 3: rval = ((uint64_t)rs1 * (uint64_t)rs2) >> 32; break; // MULHU
#else
CUSTOM_MULH
#endif
case 4: if( rs2 == 0 ) rval = -1; else rval = ((int32_t)rs1 == INT32_MIN && (int32_t)rs2 == -1) ? rs1 : ((int32_t)rs1 / (int32_t)rs2); break; // DIV
case 5: if( rs2 == 0 ) rval = 0xffffffff; else rval = rs1 / rs2; break; // DIVU
case 6: if( rs2 == 0 ) rval = rs1; else rval = ((int32_t)rs1 == INT32_MIN && (int32_t)rs2 == -1) ? 0 : ((uint32_t)((int32_t)rs1 % (int32_t)rs2)); break; // REM
case 7: if( rs2 == 0 ) rval = rs1; else rval = rs1 % rs2; break; // REMU
}
}
else
{
switch( (ir>>12)&7 ) // These could be either op-immediate or op commands. Be careful.
{
case 0: rval = (is_reg && (ir & 0x40000000) ) ? ( rs1 - rs2 ) : ( rs1 + rs2 ); break;
case 1: rval = rs1 << (rs2 & 0x1F); break;
case 2: rval = (int32_t)rs1 < (int32_t)rs2; break;
case 3: rval = rs1 < rs2; break;
case 4: rval = rs1 ^ rs2; break;
case 5: rval = (ir & 0x40000000 ) ? ( ((int32_t)rs1) >> (rs2 & 0x1F) ) : ( rs1 >> (rs2 & 0x1F) ); break;
case 6: rval = rs1 | rs2; break;
case 7: rval = rs1 & rs2; break;
}
}
break;
}
case 0x0f: // 0b0001111
rdid = 0; // fencetype = (ir >> 12) & 0b111; We ignore fences in this impl.
break;
case 0x73: // Zifencei+Zicsr (0b1110011)
{
uint32_t csrno = ir >> 20;
uint32_t microop = ( ir >> 12 ) & 0x7;
if( (microop & 3) ) // It's a Zicsr function.
{
int rs1imm = (ir >> 15) & 0x1f;
uint32_t rs1 = REG(rs1imm);
uint32_t writeval = rs1;
// https://raw.githubusercontent.com/riscv/virtual-memory/main/specs/663-Svpbmt.pdf
// Generally, support for Zicsr
switch( csrno )
{
case 0x340: rval = CSR( mscratch ); break;
case 0x305: rval = CSR( mtvec ); break;
case 0x304: rval = CSR( mie ); break;
case 0xC00: rval = cycle; break;
case 0x344: rval = CSR( mip ); break;
case 0x341: rval = CSR( mepc ); break;
case 0x300: rval = CSR( mstatus ); break; //mstatus
case 0x342: rval = CSR( mcause ); break;
case 0x343: rval = CSR( mtval ); break;
case 0xf11: rval = 0xff0ff0ff; break; //mvendorid
case 0x301: rval = 0x40401101; break; //misa (XLEN=32, IMA+X)
//case 0x3B0: rval = 0; break; //pmpaddr0
//case 0x3a0: rval = 0; break; //pmpcfg0
//case 0xf12: rval = 0x00000000; break; //marchid
//case 0xf13: rval = 0x00000000; break; //mimpid
//case 0xf14: rval = 0x00000000; break; //mhartid
default:
MINIRV32_OTHERCSR_READ( csrno, rval );
break;
}
switch( microop )
{
case 1: writeval = rs1; break; //CSRRW
case 2: writeval = rval | rs1; break; //CSRRS
case 3: writeval = rval & ~rs1; break; //CSRRC
case 5: writeval = rs1imm; break; //CSRRWI
case 6: writeval = rval | rs1imm; break; //CSRRSI
case 7: writeval = rval & ~rs1imm; break; //CSRRCI
}
switch( csrno )
{
case 0x340: SETCSR( mscratch, writeval ); break;
case 0x305: SETCSR( mtvec, writeval ); break;
case 0x304: SETCSR( mie, writeval ); break;
case 0x344: SETCSR( mip, writeval ); break;
case 0x341: SETCSR( mepc, writeval ); break;
case 0x300: SETCSR( mstatus, writeval ); break; //mstatus
case 0x342: SETCSR( mcause, writeval ); break;
case 0x343: SETCSR( mtval, writeval ); break;
//case 0x3a0: break; //pmpcfg0
//case 0x3B0: break; //pmpaddr0
//case 0xf11: break; //mvendorid
//case 0xf12: break; //marchid
//case 0xf13: break; //mimpid
//case 0xf14: break; //mhartid
//case 0x301: break; //misa
default:
MINIRV32_OTHERCSR_WRITE( csrno, writeval );
break;
}
}
else if( microop == 0x0 ) // "SYSTEM" 0b000
{
rdid = 0;
if( ( ( csrno & 0xff ) == 0x02 ) ) // MRET
{
//https://raw.githubusercontent.com/riscv/virtual-memory/main/specs/663-Svpbmt.pdf
//Table 7.6. MRET then in mstatus/mstatush sets MPV=0, MPP=0, MIE=MPIE, and MPIE=1. La
// Should also update mstatus to reflect correct mode.
uint32_t startmstatus = CSR( mstatus );
uint32_t startextraflags = CSR( extraflags );
SETCSR( mstatus , (( startmstatus & 0x80) >> 4) | ((startextraflags&3) << 11) | 0x80 );
SETCSR( extraflags, (startextraflags & ~3) | ((startmstatus >> 11) & 3) );
pc = CSR( mepc ) -4;
} else {
switch (csrno) {
case 0:
#ifndef ECALL_HANDLER
trap = ( CSR( extraflags ) & 3) ? (11+1) : (8+1); // ECALL; 8 = "Environment call from U-mode"; 11 = "Environment call from M-mode"
#else
ECALL_HANDLER(state);
trap = 0;
#endif
break;
case 1:
trap = (3+1); break; // EBREAK 3 = "Breakpoint"
case 0x105: //WFI (Wait for interrupts)
CSR( mstatus ) |= 8; //Enable interrupts
CSR( extraflags ) |= 4; //Infor environment we want to go to sleep.
SETCSR( pc, pc + 4 );
return 1;
default:
trap = (2+1); break; // Illegal opcode.
}
}
}
else
trap = (2+1); // Note micrrop 0b100 == undefined.
break;
}
case 0x2f: // RV32A (0b00101111)
{
uint32_t rs1 = REG((ir >> 15) & 0x1f);
uint32_t rs2 = REG((ir >> 20) & 0x1f);
uint32_t irmid = ( ir>>27 ) & 0x1f;
rs1 -= MINIRV32_RAM_IMAGE_OFFSET;
// We don't implement load/store from UART or CLNT with RV32A here.
if( rs1 >= MINI_RV32_RAM_SIZE-3 )
{
trap = (7+1); //Store/AMO access fault
rval = rs1 + MINIRV32_RAM_IMAGE_OFFSET;
}
else
{
rval = MINIRV32_LOAD4( rs1 );
// Referenced a little bit of https://github.com/franzflasch/riscv_em/blob/master/src/core/core.c
uint32_t dowrite = 1;
switch( irmid )
{
case 2: //LR.W (0b00010)
dowrite = 0;
CSR( extraflags ) = (CSR( extraflags ) & 0x07) | (rs1<<3);
break;
case 3: //SC.W (0b00011) (Make sure we have a slot, and, it's valid)
rval = ( CSR( extraflags ) >> 3 != ( rs1 & 0x1fffffff ) ); // Validate that our reservation slot is OK.
dowrite = !rval; // Only write if slot is valid.
break;
case 1: break; //AMOSWAP.W (0b00001)
case 0: rs2 += rval; break; //AMOADD.W (0b00000)
case 4: rs2 ^= rval; break; //AMOXOR.W (0b00100)
case 12: rs2 &= rval; break; //AMOAND.W (0b01100)
case 8: rs2 |= rval; break; //AMOOR.W (0b01000)
case 16: rs2 = ((int32_t)rs2<(int32_t)rval)?rs2:rval; break; //AMOMIN.W (0b10000)
case 20: rs2 = ((int32_t)rs2>(int32_t)rval)?rs2:rval; break; //AMOMAX.W (0b10100)
case 24: rs2 = (rs2<rval)?rs2:rval; break; //AMOMINU.W (0b11000)
case 28: rs2 = (rs2>rval)?rs2:rval; break; //AMOMAXU.W (0b11100)
default: trap = (2+1); dowrite = 0; break; //Not supported.
}
if( dowrite ) MINIRV32_STORE4( rs1, rs2 );
}
break;
}
default: trap = (2+1); // Fault: Invalid opcode.
}
// If there was a trap, do NOT allow register writeback.
if( trap ) {
SETCSR( pc, pc );
MINIRV32_POSTEXEC( pc, ir, trap );
break;
}
if( rdid )
{
REGSET( rdid, rval ); // Write back register.
}
}
MINIRV32_POSTEXEC( pc, ir, trap );
pc += 4;
}
// Handle traps and interrupts.
if( trap )
{
if( trap & 0x80000000 ) // If prefixed with 1 in MSB, it's an interrupt, not a trap.
{
SETCSR( mcause, trap );
SETCSR( mtval, 0 );
pc += 4; // PC needs to point to where the PC will return to.
}
else
{
SETCSR( mcause, trap - 1 );
SETCSR( mtval, (trap > 5 && trap <= 8)? rval : pc );
}
SETCSR( mepc, pc ); //TRICKY: The kernel advances mepc automatically.
//CSR( mstatus ) & 8 = MIE, & 0x80 = MPIE
// On an interrupt, the system moves current MIE into MPIE
SETCSR( mstatus, (( CSR( mstatus ) & 0x08) << 4) | (( CSR( extraflags ) & 3 ) << 11) );
pc = (CSR( mtvec ) - 4);
// If trapping, always enter machine mode.
CSR( extraflags ) |= 3;
trap = 0;
pc += 4;
}
if( CSR( cyclel ) > cycle ) CSR( cycleh )++;
SETCSR( cyclel, cycle );
SETCSR( pc, pc );
return 0;
}
#endif
#endif

View File

@ -1,192 +0,0 @@
# riscv_emufun (mini-rv32ima)
Click below for the YouTube video introducing this project:
[![Writing a Really Tiny RISC-V Emulator](https://img.youtube.com/vi/YT5vB3UqU_E/0.jpg)](https://www.youtube.com/watch?v=YT5vB3UqU_E) [![But Will It Run Doom?](https://img.youtube.com/vi/uZMNK17VCMU/0.jpg)](https://www.youtube.com/watch?v=uZMNK17VCMU)
## What
mini-rv32ima is a single-file-header, [mini-rv32ima.h](https://github.com/cnlohr/riscv_emufun/blob/master/mini-rv32ima/mini-rv32ima.h), in the [STB Style library](https://github.com/nothings/stb) that:
* Implements a RISC-V **rv32ima/Zifencei†+Zicsr** (and partial su), with CLINT and MMIO.
* Is about **400 lines** of actual code.
* Has **no dependencies**, not even libc.
* Is **easily extensible**. So you can easily add CSRs, instructions, MMIO, etc!
* Is pretty **performant**. (~450 coremark on my laptop, about 1/2 the speed of QEMU)
* Is human-readable and in **basic C** code.
* Is "**incomplete**" in that it didn't implement the tons of the spec that Linux doesn't (and you shouldn't) use.
* Is trivially **embeddable** in applications.
It has a [demo wrapper](https://github.com/cnlohr/riscv_emufun/blob/master/mini-rv32ima/mini-rv32ima.c) that:
* Implements a CLI, SYSCON, UART, DTB and Kernel image loading.
* And it only around **250 lines** of code, itself.
* Compiles down to a **~18kB executable** and only relies on libc.
†: Zifence+RV32A are stubbed. So, tweaks will need to be made if you want to emulate a multiprocessor system with this emulator.
Just see the `mini-rv32ima` folder.
It's "fully functional" now in that I can run Linux, apps, etc. Compile flat binaries and drop them in an image.
## Why
I'm working on a really really simple C Risc-V emulator. So simple it doesn't even have an MMU (Memory Management Unit). I have a few goals, they include:
* Furthering RV32-NOMMU work to improve Linux support for RV32-NOMMU. (Imagine if we could run Linux on the $1 ESP32-C3)
* Learning more about RV32 and writing emulators.
* Being further inspired by @pimaker's amazing work on [Running Linux in a Pixel Shader](https://blog.pimaker.at/texts/rvc1/) and having the sneaking suspicion performance could be even better!
* Hoping to port it to some weird places.
* Understand the *most simplistic* system you can run Linux on and trying to push that boundary.
* Continue to include my [education of people about assembly language](https://www.youtube.com/watch?v=Gelf0AyVGy4).
## How
Windows instructions (Just playing with the image)
* Clone this repo.
* Install or have TinyCC. [Powershell Installer](https://github.com/cntools/Install-TCC) or [Regular Windows Installer](https://github.com/cnlohr/tinycc-win64-installer/releases/tag/v0_0.9.27)
* Run `winrun.ps` in the `windows` folder.
WSL (For full toolchain and image build:
* You will need to remove all spaces from your path i.e. `export PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/mnt/c/Windows/system32:/snap/bin` and continue the instructions. P.S. What in the world was Windows thinking, putting a space between "Program" and "Files"??!?
Linux instructions (both):
* Clone this repo.
* Install `git build-essential` and/or whatever other requirements are in place for [buildroot](https://buildroot.org/).
* `make testdlimage`
* It automatically downloads the image (~1MB) and runs the emulator.
* Should be up and running in about 2.5s depending on internet speed.
You can do in-depth work on Linux by:
* `make everything`
If you want to play with the bare metal system, see below, or if you have the toolchain installed, just:
* `make testbare`
If you just want to play emdoom, and use the prebuilt image:
* On Windows, run `windows\winrundoom.ps1`
* On Linux, `cd mini-rv32ima`, and type `make testdoom`
## Questions?
* Why not rv64?
* Because then I can't run it as easily in a pixel shader if I ever hope to.
* Can I add an MMU?
* Yes. It actually probably wouldn't be too difficult.
* Should I add an MMU?
* No. It is important to further support for nommu systems to empower minimal Risc-V designs!
Everything else: Contact us on my Discord: https://discord.com/invite/CCeyWyZ
## How do I use this in my own project?
You shoud not need to modify `mini-rv32ima.h`, but instead, use `mini-rv32ima.c` as a template for what you are trying to do in your own project.
You can override all functionality by defining the following macros. Here are examples of what `mini-rv32ima.c` does with them. You can see the definition of the functions, or augment their definitions, by altering `mini-rv32ima.c`.
| Macro | Definition / Comment |
| --- | --- |
| `MINIRV32WARN( x... )` | `printf( x );` <br> Warnings emitted from mini-rv32ima.h |
| `MINIRV32_DECORATE` | `static` <br> How to decorate the functions. |
| `MINI_RV32_RAM_SIZE` | `ram_amt` <br> A variable, how big is system RAM? |
| `MINIRV32_IMPLEMENTATION` | If using mini-rv32ima.h, need to define this. |
| `MINIRV32_POSTEXEC( pc, ir, retval )` | `{ if( retval > 0 ) { if( fail_on_all_faults ) { printf( "FAULT\n" ); return 3; } else retval = HandleException( ir, retval ); } }` <br> If you want to execute something every time slice. |
| `MINIRV32_HANDLE_MEM_STORE_CONTROL( addy, val )` | `if( HandleControlStore( addy, val ) ) return val;` <br> Called on non-RAM memory access. |
| `MINIRV32_HANDLE_MEM_LOAD_CONTROL( addy, rval )` | `rval = HandleControlLoad( addy );` <br> Called on non-RAM memory access return a value. |
| `MINIRV32_OTHERCSR_WRITE( csrno, value )` | `HandleOtherCSRWrite( image, csrno, value );` <br> You can use CSRs for control requests. |
| `MINIRV32_OTHERCSR_READ( csrno, value )` | `value = HandleOtherCSRRead( image, csrno );` <br> You can use CSRs for control requests. |
## Hopeful goals?
* Further drive down needed features to run Linux.
* Remove need for RV32A extension on systems with only one CPU.
* Support for relocatable ELF executables.
* Add support for an unreal UART. One that's **much** simpler than the current 8250 driver.
* Maybe run this in a pixelshader too!
* Get opensbi working with this.
* Be able to "embed" rv32 emulators in random projects.
* Can I use early console to be a full system console?
* Can I increase the maximum contiguous memory allocatable?
## Special Thanks
* For @regymm and their [patches to buildroot](https://github.com/regymm/buildroot) and help!
* callout: Regymm's [quazisoc project](https://github.com/regymm/quasiSoC/).
* Buildroot (For being so helpful).
* @vowstar and their team working on [k210-linux-nommu](https://github.com/vowstar/k210-linux-nommu).
* This [guide](https://jborza.com/emulation/2020/04/09/riscv-environment.html)
* [rvcodecjs](https://luplab.gitlab.io/rvcodecjs/) I probably went through over 1,000 codes here.
* @splinedrive from the [KianV RISC-V noMMU SoC](https://github.com/splinedrive/kianRiscV/tree/master/linux_socs/kianv_harris_mcycle_edition?s=09) project.
## More details
If you want to build the kernel yourself:
* `make everything`
* About 20 minutes. (Or 4+ hours if you're on [Windows Subsytem for Linux 2](https://github.com/microsoft/WSL/issues/4197))
* And you should be dropped into a Linux busybox shell with some little tools that were compiled here.
## Emdoom notes
* Emdoom building is in the `experiments/emdoom` folder
* You *MUST* build your kernel with `MAX_ORDER` set to >12 in `buildroot/output/build/linux-5.19/include/linux/mmzone.h` if you are building your own image.
* You CAN use the pre-existing image that is described above.
* On Windows, it will be very slow. Not sure why.
If you want to use bare metal to build your binaries so you don't need buildroot, you can use the rv64 gcc in 32-bit mode built into Ubuntu 20.04 and up.
```
sudo apt-get install gcc-multilib gcc-riscv64-unknown-elf make
```
## Links
* "Hackaday Supercon 2022: Charles Lohr - Assembly in 2022: Yes! We Still Use it and Here's Why" : https://www.youtube.com/watch?v=Gelf0AyVGy4
## Attic
## General notes:
* https://github.com/cnlohr/riscv_emufun/commit/2f09cdeb378dc0215c07eb63f5a6fb43dbbf1871#diff-b48ccd795ae9aced07d022bf010bf9376232c4d78210c3113d90a8d349c59b3dL440
(These things don't currently work)
### Building Tests
(This does not work, now)
```
cd riscv-tests
export CROSS_COMPILE=riscv64-linux-gnu-
export PLATFORM_RISCV_XLEN=32
CC=riscv64-linux-gnu-gcc ./configure
make XLEN=32 RISCV_PREFIX=riscv64-unknown-elf- RISCV_GCC_OPTS="-g -O1 -march=rv32imaf -mabi=ilp32f -I/usr/include"
```
### Building OpenSBI
(This does not currently work!)
```
cd opensbi
export CROSS_COMPILE=riscv64-unknown-elf-
export PLATFORM_RISCV_XLEN=32
make
```
### Extra links
* Clear outline of CSRs: https://five-embeddev.com/riscv-isa-manual/latest/priv-csrs.html
* Fonts used in videos: https://audiolink.dev/
### Using custom build
Where yminpatch is the patch from the mailing list.
```
rm -rf buildroot
git clone git://git.buildroot.net/buildroot
cd buildroot
git am < ../yminpatch.txt
make qemu_riscv32_nommu_virt_defconfig
make
# Or use our configs.
```
Note: For emdoom you will need to modify include/linux/mmzone.h and change MAX_ORDER to 13.
### Buildroot Notes
Add this:
https://github.com/cnlohr/buildroot/pull/1/commits/bc890f74354e7e2f2b1cf7715f6ef334ff6ed1b2
Use this:
https://github.com/cnlohr/buildroot/commit/e97714621bfae535d947817e98956b112eb80a75

View File

@ -1,143 +0,0 @@
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
struct MiniRV32IMAState;
void ecall_handler(struct MiniRV32IMAState *state);
#define ECALL_HANDLER(state) ecall_handler(state)
#define MINIRV32WARN( x... ) printf( x );
#define MINIRV32_DECORATE static
#define MINI_RV32_RAM_SIZE (32 * 1024 * 1024)
#define MINIRV32_IMPLEMENTATION
#define MINIRV32_RAM_IMAGE_OFFSET 0x0
#include "mini-rv32ima.h"
#define SYSCALL(num) (1025 + num)
void ecall_handler(struct MiniRV32IMAState *state) {
uint32_t a0 = REG(10);
uint32_t a1 = REG(11);
switch (state->regs[17]) // x17 | a7
{
case 1:
// PrintInt
printf("%d", a0);
break;
case 4:
// PrintString
printf("%s", a0);
break;
case 10:
fprintf(stderr, "\nexit: %d\n", a0);
exit(a0);
case 11:
// PrintChar
printf("%c", a0);
break;
case 93:
fprintf(stderr, "\nmain return code: %d\n", a0);
exit(a0);
case SYSCALL(0):
// getchar();
REGSET(10, getchar());
case SYSCALL(1):
// putchar
putchar(a0);
break;
case SYSCALL(4):
// input int
scanf("%d", &a0);
REGSET(10, a0);
break;
case SYSCALL(5):
// input string
scanf("%s", a0);
REGSET(10, a0);
break;
default:
MINIRV32WARN("Unhandled ECALL: %d\n", state->regs[17]);
exit(1);
break;
}
}
int main(int argc, char *argv[]) {
// gcc -DDEFAULT_FILE='\"flat.bin\"' .\ripes-vm.c -o rv32-vm.exe
struct MiniRV32IMAState state;
uint8_t *image = (uint8_t *)malloc(MINI_RV32_RAM_SIZE);
// 初始化状态
memset(&state, 0, sizeof(state));
state.pc = 0; // 程序计数器从0开始
state.mstatus = 0x80000000; // 设置机器模式
state.mtvec = 0x1000;
state.mie = 0x7; // 启用所有中断
// 初始化内存
memset(image, 0, MINI_RV32_RAM_SIZE);
#ifndef DEFAULT_FILE
#define DEFAULT_FILE "../ccompiler/backend/test_rv.bin"
#endif
const char* filename = DEFAULT_FILE;
// 加载 flatbin 文件
if (argc == 2) {
filename = argv[1];
}
FILE *file = fopen(filename, "rb");
if (!file) {
fprintf(stderr, "Usage: %s <flatbin_file>\n", argv[0]);
printf("Failed to open file %s\n", filename);
return 1;
}
fseek(file, 0, SEEK_END);
long flen = ftell(file);
fseek(file, 0, SEEK_SET);
if (flen > MINI_RV32_RAM_SIZE) {
fprintf(stderr, "Flatbin file is too large\n");
fclose(file);
return 1;
}
fread(image, flen, 1, file);
fclose(file);
// 运行模拟器
while (1) {
int32_t ret = MiniRV32IMAStep(&state, image, MINIRV32_RAM_IMAGE_OFFSET, 0, 1);
if (ret != 0) {
printf("Exception or interrupt occurred at PC: %d\n", state.pc);
return ret;
}
}
free(image);
return 0;
}
// static void DumpState( struct MiniRV32IMAState * core, uint8_t * ram_image )
// {
// uint32_t pc = core->pc;
// uint32_t pc_offset = pc - MINIRV32_RAM_IMAGE_OFFSET;
// uint32_t ir = 0;
// printf( "PC: %08x ", pc );
// if( pc_offset >= 0 && pc_offset < ram_amt - 3 )
// {
// ir = *((uint32_t*)(&((uint8_t*)ram_image)[pc_offset]));
// printf( "[0x%08x] ", ir );
// }
// else
// printf( "[xxxxxxxxxx] " );
// uint32_t * regs = core->regs;
// printf( "Z:%08x ra:%08x sp:%08x gp:%08x tp:%08x t0:%08x t1:%08x t2:%08x s0:%08x s1:%08x a0:%08x a1:%08x a2:%08x a3:%08x a4:%08x a5:%08x ",
// regs[0], regs[1], regs[2], regs[3], regs[4], regs[5], regs[6], regs[7],
// regs[8], regs[9], regs[10], regs[11], regs[12], regs[13], regs[14], regs[15] );
// printf( "a6:%08x a7:%08x s2:%08x s3:%08x s4:%08x s5:%08x s6:%08x s7:%08x s8:%08x s9:%08x s10:%08x s11:%08x t3:%08x t4:%08x t5:%08x t6:%08x\n",
// regs[16], regs[17], regs[18], regs[19], regs[20], regs[21], regs[22], regs[23],
// regs[24], regs[25], regs[26], regs[27], regs[28], regs[29], regs[30], regs[31] );
// }

View File

@ -1,3 +0,0 @@
int main() {
return 65536;
}

View File

@ -1,8 +0,0 @@
int main() {
int a;
int b;
a = 1 + 2 * 3;
b = 7;
a = a - b + 1;
return a;
}

View File

@ -1,6 +0,0 @@
int main() {
int x = 10;
x = x + 1;
return x;
}

View File

@ -1,10 +0,0 @@
int main(void) {
int a;
a = 1;
if (a) {
a = 1;
} else {
a = 2;
}
return a;
}

View File

@ -1,10 +0,0 @@
int main(void) {
int a;
a = 0;
if (a) {
a = 1;
} else {
a = 2;
}
return a;
}

View File

@ -1,9 +0,0 @@
int add(int, int);
int main(void) {
return add(1, 2);
}
int add(int a, int b) {
return a + b;
}

View File

@ -1,5 +0,0 @@
int main() {
int i = 0;
while (i < 10) i = i + 1;
return i;
}

View File

@ -1,12 +0,0 @@
// #include <stdio.h>
int main() {
int i = 0;
int pow = 1;
do {
pow = pow * 2;
i = i + 1;
} while(i < 7);
// printf("%d", pow);
return pow;
}

View File

@ -1,7 +0,0 @@
int main() {
int num = 0;
for (int i = 0; i < 10; i += 1) {
num = num + 1;
}
return num;
}

View File

@ -1,7 +0,0 @@
int add(int a, int b) {
return a + b;
}
int main(void) {
return add(1, 2);
}

View File

@ -1,18 +0,0 @@
// #include <stdio.h>
int factorial(int num);
int main() {
int num = 5;
int result = factorial(num);
// printf("%d", result);
return result;
}
int factorial(int num) {
if (num == 0) {
return 1;
} else {
return num * factorial(num - 1);
}
}

View File

@ -1,30 +0,0 @@
# VM := ../../rv32-vm
# CC := ../../ccompiler
# STD_CC := gcc
# TESTS := $(wildcard *.c)
# # 定义所有测试目标
# TEST_TARGETS := $(patsubst %.c, %_test, $(TESTS))
# all: $(TEST_TARGETS)
# %_test: %.c
# @$(STD_CC) -g -o $@ $<
# @$(CC) $< flat.bin
# @./$@ ; ret_gcc=$$?
# @$(VM) flat.bin ; ret_vm=$$?
# @echo "Testing $@"
# @if [ $$ret_gcc -eq $$ret_vm ]; then \
# echo "$@ passed"; \
# else \
# echo "$@ failed: GCC returned $$ret_gcc, VM returned $$ret_vm"; \
# exit 1; \
# fi
# clean:
# rm -f $(TEST_TARGETS) flat.bin
# .PHONY: all clean
all:
python test.py

View File

@ -1,6 +0,0 @@
int main() {
int a, b;
a = 1;
b = 2;
return a + b;
}

View File

@ -1,86 +0,0 @@
import subprocess
import os
from pathlib import Path
# 配置参数
TEST_DIR = Path(".")
CC_PATH = Path("../../ccompiler.exe")
VM_PATH = Path("../../rv32-vm.exe")
WORKSPACE = Path(".") # 测试工作目录
# 测试用例映射表(示例)
TEST_CASE_MAP = {
"./01_return.c": 65536,
"./02_decl_expr.c": 1,
"./03_decl_init.c": 11,
"./04_if.c": 1,
"./05_else.c": 2,
"./06_fcall.c": 3,
"./07_while.c": 10,
"./08_do_while.c": 128,
"./09_for.c": 10,
"./10_main.c": 3,
"./11_recursive.c": 120,
}
def run_command(cmd, capture_stderr=True):
"""执行命令并捕获stderr"""
result = subprocess.run(
cmd,
cwd=WORKSPACE,
stderr=subprocess.PIPE if capture_stderr else None,
text=True,
timeout=1,
)
return result.stderr.strip() if capture_stderr else None
def run_test(test_file, expected):
print(f"\nTesting {test_file}...")
# 1. 编译生成flat.bin
compile_cmd = [str(CC_PATH), str(test_file)]
compile_err = run_command(compile_cmd)
if not (WORKSPACE / "flat.bin").exists():
print(f" Compilation failed: {compile_err}")
return False
# 2. 执行虚拟机
vm_cmd = [str(VM_PATH), "flat.bin"]
# 3. 解析返回值(假设最后一行是返回值)
try:
vm_err = run_command(vm_cmd)
actual = int(vm_err.split()[-1])
except (ValueError, IndexError) as e:
print(f" Invalid VM output: {vm_err}")
return False
except subprocess.TimeoutExpired:
print(" Timeout expired")
return False
# 4. 验证结果
if actual == expected:
print(f" PASSED {test_file}")
return True
else:
print(f" FAILED: Expected {expected}, got {actual}")
return False
def main():
passed = 0
total = 0
for test_file, expected in TEST_CASE_MAP.items():
total += 1
if run_test(TEST_DIR / test_file, expected):
passed += 1
# 清理中间文件
if (WORKSPACE / "flat.bin").exists():
os.remove(WORKSPACE / "flat.bin")
print(f"\nTest Summary: {passed}/{total} passed")
if __name__ == "__main__":
main()

11
ccompiler/ccompiler.c Normal file
View File

@ -0,0 +1,11 @@
#include "ccompiler.h"
asm_prog_t* smcc_cc(smcc_cc_t* cc) {
ast_node_t* root = cc_frontend(cc->file, cc->stream, cc->sread);
// TODO add config
ir_prog_t* prog = cc_middleend(root, &cc->midend_conf);
// TODO add config
asm_prog_t* asm_prog = cc_backend(prog, &cc->backend_conf);
}

21
ccompiler/ccompiler.h Normal file
View File

@ -0,0 +1,21 @@
#ifndef __SMCC_CC_H__
#define __SMCC_CC_H__
// TODO
#include "frontend/frontend.h"
#include "middleend/middleend.h"
#include "backend/backend.h"
typedef struct smcc_cc {
const char *file;
void *stream;
sread_fn sread;
cc_midend_conf_t midend_conf;
cc_backend_conf_t backend_conf;
} smcc_cc_t;
typedef union asm_prog asm_prog_t;
asm_prog_t* smcc_cc(smcc_cc_t* cc);
#endif

View File

@ -7,7 +7,6 @@ CFLAGS = -g -Wall -I../..
LEXER_DIR = ./lexer
PARSER_DIR = ./parser
AST_DIR = ./parser/ast
SYMTAB_DIR = ./parser/symtab
# 源文件列表
SRCS = \
@ -23,9 +22,6 @@ SRCS = \
$(AST_DIR)/program.c \
$(AST_DIR)/stmt.c \
$(AST_DIR)/term.c \
$(SYMTAB_DIR)/hashmap.c \
$(SYMTAB_DIR)/scope.c \
$(SYMTAB_DIR)/symtab.c \
# 生成目标文件列表
OBJS = $(SRCS:.c=.o)

View File

@ -1,17 +1,18 @@
#include <lib/core.h>
#include "frontend.h"
#include "parser/symtab/symtab.h"
ast_node_t* frontend(const char* file, void* stream, sread_fn sread) {
ast_node_t* cc_frontend(const char* file, void* stream, sread_fn sread) {
init_lib_core();
strpool_t strpool;
init_strpool(&strpool);
lexer_t lexer;
cc_lexer_t lexer;
init_lexer(&lexer, file, stream, sread, &strpool);
symtab_t symtab;
init_symtab(&symtab);
// TODO global scope
symtab_enter_scope(&symtab);
parser_t parser;
init_parser(&parser, &lexer, &symtab);

View File

@ -1,9 +1,9 @@
#ifndef __SMCC_FRONTEND_H__
#define __SMCC_FRONTEND_H__
#ifndef __SMCC_CC_FRONTEND_H__
#define __SMCC_CC_FRONTEND_H__
#include "lexer/lexer.h"
#include "parser/parser.h"
typedef int (*sread_fn)(void *dst_buf, int dst_size, int elem_size, int count, void *stream);
ast_node_t* frontend(const char* file, void* stream, sread_fn sread);
typedef int (*sread_fn)(void *dst_buf, int elem_size, int count, void *stream);
ast_node_t* cc_frontend(const char* file, void* stream, sread_fn sread);
#endif

View File

@ -74,7 +74,7 @@ static inline int keyword_cmp(const char* name, int len) {
return -1; // Not a keyword.
}
void init_lexer(lexer_t* lexer, const char* file_name, void* stream, lexer_sread_fn sread, strpool_t* strpool) {
void init_lexer(cc_lexer_t* lexer, const char* file_name, void* stream, lexer_sread_fn sread, strpool_t* strpool) {
lexer->strpool = strpool;
lexer->cur_ptr = lexer->end_ptr = (char*)&(lexer->buffer);
lexer->loc.fname = strpool_intern(lexer->strpool, file_name);
@ -87,7 +87,7 @@ void init_lexer(lexer_t* lexer, const char* file_name, void* stream, lexer_sread
rt_memset(lexer->buffer, 0, sizeof(lexer->buffer));
}
static void flush_buffer(lexer_t* lexer) {
static void flush_buffer(cc_lexer_t* lexer) {
int num = lexer->end_ptr - lexer->cur_ptr;
for (int i = 0; i < num; i++) {
lexer->buffer[i] = lexer->cur_ptr[i];
@ -96,7 +96,7 @@ static void flush_buffer(lexer_t* lexer) {
int read_size = LEXER_BUFFER_SIZE - num;
// TODO rt_size_t to int maybe lose precision
int got_size = lexer->sread(lexer->buffer + num, read_size, 1, read_size, lexer->stream);
int got_size = lexer->sread(lexer->buffer + num, 1, read_size, lexer->stream);
if (got_size < 0) {
LEX_ERROR("lexer read error");
} else if (got_size < read_size) {
@ -110,7 +110,7 @@ static void flush_buffer(lexer_t* lexer) {
}
}
static void goto_newline(lexer_t* lexer) {
static void goto_newline(cc_lexer_t* lexer) {
do {
if (lexer->cur_ptr == lexer->end_ptr) {
flush_buffer(lexer);
@ -120,7 +120,7 @@ static void goto_newline(lexer_t* lexer) {
} while (*lexer->cur_ptr != '\n' && *lexer->cur_ptr != '\0');
}
static void goto_block_comment(lexer_t* lexer) {
static void goto_block_comment(cc_lexer_t* lexer) {
while (1) {
if (lexer->end_ptr - lexer->cur_ptr < 2) {
flush_buffer(lexer);
@ -159,7 +159,7 @@ static char got_slash(char* peek) {
return -1;
}
static void parse_char_literal(lexer_t* lexer, tok_t* token) {
static void parse_char_literal(cc_lexer_t* lexer, tok_t* token) {
char val = 0;
char* peek = lexer->cur_ptr + 1;
if (*peek == '\\') {
@ -175,7 +175,7 @@ static void parse_char_literal(lexer_t* lexer, tok_t* token) {
token->val.ch = val;
}
static void parse_string_literal(lexer_t* lexer, tok_t* token) {
static void parse_string_literal(cc_lexer_t* lexer, tok_t* token) {
char* peek = lexer->cur_ptr + 1;
// TODO string literal size check
static char dest[LEXER_MAX_TOKEN_SIZE + 1];
@ -200,7 +200,7 @@ static void parse_string_literal(lexer_t* lexer, tok_t* token) {
}
// FIXME it write by AI maybe error
static void parse_number(lexer_t* lexer, tok_t* token) {
static void parse_number(cc_lexer_t* lexer, tok_t* token) {
char* peek = lexer->cur_ptr;
int base = 10;
int is_float = 0;
@ -290,7 +290,7 @@ static void parse_number(lexer_t* lexer, tok_t* token) {
#define GOT_ONE_TOKEN_BUF_SIZE 64
// /zh/c/language/operator_arithmetic.html
void get_token(lexer_t* lexer, tok_t* token) {
void get_token(cc_lexer_t* lexer, tok_t* token) {
// 需要保证缓冲区始终可读
if (lexer->end_ptr - lexer->cur_ptr < GOT_ONE_TOKEN_BUF_SIZE) {
flush_buffer(lexer);
@ -515,7 +515,7 @@ static const tok_basic_type_t tok_type_map[] = {
}
// get_token maybe got invalid (with parser)
void get_valid_token(lexer_t* lexer, tok_t* token) {
void get_valid_token(cc_lexer_t* lexer, tok_t* token) {
tok_basic_type_t type;
do {
get_token(lexer, token);

View File

@ -10,10 +10,9 @@
#define LEXER_BUFFER_SIZE 4095
#endif
typedef int (*lexer_sread_fn)(void *dst_buf, int dst_size,
int elem_size, int count, void *stream);
typedef int (*lexer_sread_fn)(void *dst_buf, int elem_size, int count, void *stream);
typedef struct lexer {
typedef struct cc_lexer {
loc_t loc;
char* cur_ptr; // 当前扫描的字符,但是还没有开始扫描
@ -24,15 +23,15 @@ typedef struct lexer {
void* stream;
strpool_t* strpool;
} lexer_t;
} cc_lexer_t;
void init_lexer(lexer_t* lexer, const char* file_name, void* stream,
void init_lexer(cc_lexer_t* lexer, const char* file_name, void* stream,
lexer_sread_fn sread, strpool_t* strpool);
// pure token getter it will included empty token like TOKEN_BLANK
void get_token(lexer_t* lexer, tok_t* token);
void get_token(cc_lexer_t* lexer, tok_t* token);
// get_token maybe got invalid (with parser as TOKEN_BLANK)
void get_valid_token(lexer_t* lexer, tok_t* token);
void get_valid_token(cc_lexer_t* lexer, tok_t* token);
#endif

View File

@ -1,18 +0,0 @@
CC = gcc
CFLAGS = -g -Wall -I../../../.. -DLEX_LOG_LEVEL=4
SRC = ../lexer.c ../token.c
LIB = -L../../../../lib -lcore
all = test_all
test_all: test
./test
run:
$(CC) $(CFLAGS) $(SRC) run.c $(LIB) -o run
test:
$(CC) $(CFLAGS) $(SRC) $(LIB) -o test test.c
clean:
rm -f test run

View File

@ -1,56 +0,0 @@
#include "../lexer.h"
#include <stdio.h>
#include <string.h>
// gcc -g ../lexer.c ../token.c test_lexer.c -o test_lexer
/*
tok_tConstant {
int have;
union {
char ch;
int i;
float f;
double d;
long long ll;
char* str;
};
};
*/
int g_num;
int g_num_arr[3];
int main(int argc, char* argv[]) {
// int num = 0;
// You Must Be Call
init_lib_core();
if (argc == 3 && strcmp(argv[2], "-nodebug") == 0) {
log_set_level(NULL, LOG_LEVEL_ALL & ~LOG_LEVEL_DEBUG);
}
const char* file_name = "run.c";
if (argc == 2) {
file_name = argv[1];
}
FILE* fp = fopen(file_name, "r");
if (fp == NULL) {
perror("open file failed");
return 1;
}
printf("open file success\n");
lexer_t lexer;
strpool_t strpool;
init_strpool(&strpool);
init_lexer(&lexer, file_name, fp, (lexer_sread_fn)fread_s, &strpool);
tok_t tok;
while (1) {
get_valid_token(&lexer, &tok);
if (tok.sub_type == TOKEN_EOF) {
break;
}
LOG_DEBUG("tk type `%s` in %s:%d:%d", get_tok_name(tok.sub_type), tok.loc.fname, tok.loc.line, tok.loc.col);
// LOG_DEBUG("%s", tok.val.str);
// printf("line: %d, column: %d, type: %3d, typename: %s\n",
// lexer.line, lexer.index, tok.type, get_tok_name(tok.type));
}
}

View File

@ -1,172 +0,0 @@
// test_lexer.c
#include <lib/acutest.h>
#include "../lexer.h"
#include <string.h>
int test_read(void *dst_buf, int dst_size, int elem_size, int count, void *stream) {
if (stream == NULL) {
return 0;
}
int size = dst_size > elem_size * count ? elem_size * count : dst_size;
memcpy(dst_buf, stream, size);
return size;
}
// 测试辅助函数
static inline void test_lexer_string(const char* input, cc_tktype_t expected_type) {
lexer_t lexer;
tok_t token;
init_lexer(&lexer, "test.c", (void*)input, test_read);
get_valid_token(&lexer, &token);
TEST_CHECK(token.type == expected_type);
TEST_MSG("Expected: %s", get_tok_name(expected_type));
TEST_MSG("Got: %s", get_tok_name(token.type));
}
// 基础运算符测试
void test_operators() {
TEST_CASE("Arithmetic operators"); {
test_lexer_string("+", TOKEN_ADD);
test_lexer_string("++", TOKEN_ADD_ADD);
test_lexer_string("+=", TOKEN_ASSIGN_ADD);
test_lexer_string("-", TOKEN_SUB);
test_lexer_string("--", TOKEN_SUB_SUB);
test_lexer_string("-=", TOKEN_ASSIGN_SUB);
test_lexer_string("*", TOKEN_MUL);
test_lexer_string("*=", TOKEN_ASSIGN_MUL);
test_lexer_string("/", TOKEN_DIV);
test_lexer_string("/=", TOKEN_ASSIGN_DIV);
test_lexer_string("%", TOKEN_MOD);
test_lexer_string("%=", TOKEN_ASSIGN_MOD);
}
TEST_CASE("Bitwise operators"); {
test_lexer_string("&", TOKEN_AND);
test_lexer_string("&&", TOKEN_AND_AND);
test_lexer_string("&=", TOKEN_ASSIGN_AND);
test_lexer_string("|", TOKEN_OR);
test_lexer_string("||", TOKEN_OR_OR);
test_lexer_string("|=", TOKEN_ASSIGN_OR);
test_lexer_string("^", TOKEN_XOR);
test_lexer_string("^=", TOKEN_ASSIGN_XOR);
test_lexer_string("~", TOKEN_BIT_NOT);
test_lexer_string("<<", TOKEN_L_SH);
test_lexer_string("<<=", TOKEN_ASSIGN_L_SH);
test_lexer_string(">>", TOKEN_R_SH);
test_lexer_string(">>=", TOKEN_ASSIGN_R_SH);
}
TEST_CASE("Comparison operators"); {
test_lexer_string("==", TOKEN_EQ);
test_lexer_string("!=", TOKEN_NEQ);
test_lexer_string("<", TOKEN_LT);
test_lexer_string("<=", TOKEN_LE);
test_lexer_string(">", TOKEN_GT);
test_lexer_string(">=", TOKEN_GE);
}
TEST_CASE("Special symbols"); {
test_lexer_string("(", TOKEN_L_PAREN);
test_lexer_string(")", TOKEN_R_PAREN);
test_lexer_string("[", TOKEN_L_BRACKET);
test_lexer_string("]", TOKEN_R_BRACKET);
test_lexer_string("{", TOKEN_L_BRACE);
test_lexer_string("}", TOKEN_R_BRACE);
test_lexer_string(";", TOKEN_SEMICOLON);
test_lexer_string(",", TOKEN_COMMA);
test_lexer_string(":", TOKEN_COLON);
test_lexer_string(".", TOKEN_DOT);
test_lexer_string("...", TOKEN_ELLIPSIS);
test_lexer_string("->", TOKEN_DEREF);
test_lexer_string("?", TOKEN_COND);
}
}
// 关键字测试
void test_keywords() {
TEST_CASE("C89 keywords");
test_lexer_string("while", TOKEN_WHILE);
test_lexer_string("sizeof", TOKEN_SIZEOF);
// TEST_CASE("C99 keywords");
// test_lexer_string("restrict", TOKEN_RESTRICT);
// test_lexer_string("_Bool", TOKEN_INT); // 需确认你的类型定义
}
// 字面量测试
void test_literals() {
TEST_CASE("Integer literals"); {
// 十进制
test_lexer_string("0", TOKEN_INT_LITERAL);
test_lexer_string("123", TOKEN_INT_LITERAL);
// test_lexer_string("2147483647", TOKEN_INT_LITERAL);
// // 十六进制
// test_lexer_string("0x0", TOKEN_INT_LITERAL);
// test_lexer_string("0x1A3F", TOKEN_INT_LITERAL);
// test_lexer_string("0XABCDEF", TOKEN_INT_LITERAL);
// // 八进制
// test_lexer_string("0123", TOKEN_INT_LITERAL);
// test_lexer_string("0777", TOKEN_INT_LITERAL);
// // 边界值测试
// test_lexer_string("2147483647", TOKEN_INT_LITERAL); // INT_MAX
// test_lexer_string("4294967295", TOKEN_INT_LITERAL); // UINT_MAX
}
TEST_CASE("Character literals"); {
test_lexer_string("'a'", TOKEN_CHAR_LITERAL);
test_lexer_string("'\\n'", TOKEN_CHAR_LITERAL);
test_lexer_string("'\\t'", TOKEN_CHAR_LITERAL);
test_lexer_string("'\\\\'", TOKEN_CHAR_LITERAL);
test_lexer_string("'\\0'", TOKEN_CHAR_LITERAL);
}
TEST_CASE("String literals"); {
test_lexer_string("\"hello\"", TOKEN_STRING_LITERAL);
test_lexer_string("\"multi-line\\nstring\"", TOKEN_STRING_LITERAL);
test_lexer_string("\"escape\\\"quote\"", TOKEN_STRING_LITERAL);
}
// TEST_CASE("Floating literals");
// test_lexer_string("3.14e-5", TOKEN_FLOAT_LITERAL);
}
// 边界测试
void test_edge_cases() {
// TEST_CASE("Long identifiers");
// char long_id[LEXER_MAX_TOKEN_SIZE+2] = {0};
// memset(long_id, 'a', LEXER_MAX_TOKEN_SIZE+1);
// test_lexer_string(long_id, TOKEN_IDENT);
// TEST_CASE("Buffer boundary");
// char boundary[LEXER_BUFFER_SIZE*2] = {0};
// memset(boundary, '+', LEXER_BUFFER_SIZE*2-1);
// test_lexer_string(boundary, TOKEN_ADD);
}
// 错误处理测试
void test_error_handling() {
TEST_CASE("Invalid characters");
lexer_t lexer;
tok_t token;
init_lexer(&lexer, "test.c", NULL, test_read);
get_valid_token(&lexer, &token);
TEST_CHECK(token.type == TOKEN_EOF); // 应触发错误处理
}
// 测试列表
TEST_LIST = {
{"operators", test_operators},
{"keywords", test_keywords},
{"literals", test_literals},
{"edge_cases", test_edge_cases},
{"error_handling", test_error_handling},
{NULL, NULL}
};

View File

@ -89,6 +89,7 @@ typedef struct ast_node {
vector_header(children, struct ast_node *);
} block;
struct {
symtab_key_t key;
struct ast_node * decl_node;
tok_t tok;
} syms;

View File

@ -1,8 +1,6 @@
#include "../ast.h"
#include "../parser.h"
#include "../symtab/symtab.h"
#ifndef BLOCK_MAX_NODE
#define BLOCK_MAX_NODE (1024)

View File

@ -1,6 +1,5 @@
#include "../ast.h"
#include "../parser.h"
#include "../symtab/symtab.h"
/**
* 0 false
@ -49,7 +48,10 @@ ast_node_t* parse_decl_val(parser_t* parser) {
node->decl_val.type = type_node;
node->decl_val.name = name_node;
node->type = NT_DECL_VAR;
symtab_add_symbol(parser->symtab, name_node->syms.tok.val.str, node, 0);
type_node->syms.key.uid = parser->symtab->cur_scope->uid;
type_node->syms.key.strp_name = name_node->syms.tok.val.str;
symtab_add(parser->symtab, &type_node->syms.key, node);
ttype = peek_tok_type(tokbuf);
if (ttype == TOKEN_ASSIGN) {

View File

@ -1,6 +1,5 @@
#include "../ast.h"
#include "../parser.h"
#include "../symtab/symtab.h"
// Copy from `CParse`
/**
@ -253,11 +252,10 @@ static ast_node_t* parse_call(tok_stream_t* tokbuf, symtab_t *symtab, ast_node_t
}
pop_tok(tokbuf); // 跳过 ')'
const char* name = ident->syms.tok.val.str;
ast_node_t* sym = symtab_lookup_symbol(symtab, name);
ast_node_t* sym = symtab_get(symtab, &ident->syms.key);
// TODO check func is match
if (sym == NULL || sym->type != NT_DECL_FUNC) {
LOG_ERROR("function not decl %s", name);
LOG_FATAL("function not decl %s", ident->syms.key.strp_name);
}
node->call.name = ident;
node->call.func_decl = sym;
@ -345,10 +343,12 @@ static ast_node_t *parse_primary_expression(tok_stream_t* tokbuf, symtab_t *symt
case TOKEN_IDENT:
node = expect_pop_ident(tokbuf);
cc_tktype_t ttype = peek_tok_type(tokbuf);
node->syms.key.uid = 0;
node->syms.key.strp_name = tok->val.str;
if (ttype == TOKEN_L_PAREN) {
node = parse_call(tokbuf, symtab, node);
} else {
void *sym = symtab_lookup_symbol(symtab, tok->val.str);
void *sym = symtab_get(symtab, &node->syms.key);
if (sym == NULL) {
LOG_ERROR("undefined symbol but use %s", tok->val.str);
}

View File

@ -1,6 +1,5 @@
#include "../ast.h"
#include "../parser.h"
#include "../symtab/symtab.h"
#ifndef FUNC_PARAM_CACHE_SIZE
#define FUNC_PARAM_CACHE_SIZE 32 // 合理初始值可覆盖99%常见情况
@ -40,7 +39,10 @@ static void parse_params(parser_t* parser, tok_stream_t* cache, ast_node_t* node
node->decl_val.expr_stmt = NULL;
node->decl_val.data = NULL;
vector_push(params->params.params, node);
symtab_add_symbol(parser->symtab, id_node->syms.tok.val.str, node, 0);
id_node->syms.key.uid = parser->symtab->cur_scope->uid;
id_node->syms.key.strp_name = id_node->syms.tok.val.str;
symtab_add(parser->symtab, &id_node->syms.key, node);
break;
case TOKEN_L_PAREN: {
depth++;
@ -129,7 +131,11 @@ void parse_func(parser_t* parser) {
ast_type_t type = check_is_func_decl(&(parser->tokbuf), &cache);
ast_node_t* prev = symtab_add_symbol(parser->symtab, func_name, decl, 1);
name_node->syms.key.uid = parser->symtab->cur_scope->uid;
name_node->syms.key.strp_name = func_name;
ast_node_t* prev = symtab_get(parser->symtab, &name_node->syms.key);
// TODO Change something
if (prev != NULL) {
if (prev->type != NT_DECL_FUNC) {
LOG_ERROR("the symbol duplicate old is %d, new is func", prev->type);
@ -143,6 +149,7 @@ void parse_func(parser_t* parser) {
}
return;
}
symtab_add(parser->symtab, &name_node->syms.key, decl);
vector_push(parser->root->root.children, decl);
if (type == NT_DECL_FUNC) {
return;

View File

@ -2,7 +2,7 @@
#include "parser.h"
#include "type.h"
void init_parser(parser_t* parser, lexer_t* lexer, symtab_t* symtab) {
void init_parser(parser_t* parser, cc_lexer_t* lexer, symtab_t* symtab) {
init_lib_core();
parser->cur_node = NULL;

View File

@ -2,23 +2,22 @@
#define __PARSER_H__
#include "../lexer/lexer.h"
#include <lib/utils/symtab/symtab.h>
#include "ast.h"
typedef struct lexer lexer_t;
typedef struct symtab symtab_t;
#define PARSER_MAX_TOKEN_QUEUE 16
typedef struct parser {
ast_node_t* root;
ast_node_t* cur_node;
lexer_t* lexer;
cc_lexer_t* lexer;
symtab_t* symtab;
tok_stream_t tokbuf;
tok_t TokenBuffer[PARSER_MAX_TOKEN_QUEUE];
int err_level;
} parser_t;
void init_parser(parser_t* parser, lexer_t* lexer, symtab_t* symtab);
void init_parser(parser_t* parser, cc_lexer_t* lexer, symtab_t* symtab);
void run_parser(parser_t* parser);
#endif

View File

@ -1,4 +0,0 @@
extern int _print_str(const char* str);
int main(void) {
_print_str("Hello, world!\n");
}

View File

@ -1,14 +0,0 @@
// int __print_str(char* str);
int f(void);
int main(void) {
int a;
// f();
// a = 1 + 2 * 3 + 4;
// __print_str("Hello, world!\n");
a = 3 - f() * (3 + 2) % 6;
// 测试用例:
// if (a) if (2) 3; else b;
// 是否正确解析为 if (a) { if (b) c else d }
}

View File

@ -1,37 +0,0 @@
#include "../parser.h"
#include "../ast/ast.h"
#include "../symtab/symtab.h"
#include <stdio.h>
// gcc -g ../parser.c ../../lexer/lexer.c ../ast/ast.c ../ast/block.c ../ast/decl.c ../ast/expr.c ../ast/func.c ../ast/program.c ../ast/stmt.c ../ast/term.c ../symtab/hashmap.c ../symtab/scope.c ../symtab/symtab.c test_parser.c -o test_parser
// gcc -g test_parser.c -L../.. -lfrontend -o test_parser
int main(int argc, char** argv) {
init_lib_core();
const char* file_name = "test_file.c";
if (argc == 2) {
file_name = argv[1];
}
FILE* fp = fopen(file_name, "r");
if (fp == NULL) {
perror("open file failed");
return 1;
}
printf("open file success\n");
lexer_t lexer;
strpool_t strpool;
init_strpool(&strpool);
init_lexer(&lexer, file_name, fp, (lexer_sread_fn)fread_s, &strpool);
struct SymbolTable symtab;
init_symtab(&symtab);
struct parser parser;
init_parser(&parser, &lexer, &symtab);
parse_prog(&parser);
printf("parse_end\n");
pnt_ast(parser.root, 0);
return 0;
}

View File

@ -58,6 +58,8 @@ static ir_node_t* gen_ir_term(ast_node_t* node) {
Panic("gen_ir_expr: unknown node type");
}
}
TODO();
return NULL;
}
static ir_node_t* gen_ir_expr(ast_node_t* node) {
@ -239,8 +241,11 @@ void gen_ir_jmp(ast_node_t* node) {
switch (node->type) {
case NT_STMT_IF: {
ir_bblock_t* trueb = bblocks[0];
trueb->label = "if_true";
ir_bblock_t* falseb = bblocks[1];
falseb->label = "if_false";
ir_bblock_t* endb = bblocks[2];
endb->label = "if_end";
ir_node_t* jmp;
// cond
@ -248,19 +253,16 @@ void gen_ir_jmp(ast_node_t* node) {
emit_br(cond, trueb, falseb);
// true block
vector_push(ctx.cur_func->bblocks, trueb);
ctx.cur_block = trueb;
_gen_ir_from_ast(node->if_stmt.if_stmt);
// else block
if (node->if_stmt.else_stmt != NULL) {
vector_push(ctx.cur_func->bblocks, falseb);
ctx.cur_block = falseb;
_gen_ir_from_ast(node->if_stmt.else_stmt);
ir_node_t* jmp;
ctx.cur_block = endb;
vector_push(ctx.cur_func->bblocks, ctx.cur_block);
NEW_IR_JMP(jmp, ctx.cur_block);
emit_instr(falseb, jmp);
} else {

View File

@ -2,6 +2,7 @@
#define __IR_AST_H__
#include "ir.h"
typedef struct ast_node ast_node_t;
ir_prog_t* gen_ir_from_ast(ast_node_t* node);
#endif //

View File

@ -0,0 +1,5 @@
#include "middleend.h"
ir_prog_t* cc_middleend(ast_node_t* root, cc_midend_conf_t* conf) {
return gen_ir_from_ast(root);
}

View File

@ -1,7 +1,13 @@
#ifndef __SMCC_MIDDLEEND_H__
#define __SMCC_MIDDLEEND_H__
#ifndef __SMCC_CC_MIDDLEEND_H__
#define __SMCC_CC_MIDDLEEND_H__
#include "ir/ir.h"
#include "ir/ir_ast.h"
typedef struct cc_midend_conf {
// cc_arch_t arch;
} cc_midend_conf_t;
// TODO add some feature to cc_middleend like optimization
ir_prog_t* cc_middleend(ast_node_t* root, cc_midend_conf_t* conf);
#endif // __SMCC_MIDDLEEND_H__

View File

@ -1,8 +0,0 @@
all: test_ir
test_ir: frontend
gcc -g ../ir.c test_ir.c -L../../frontend -lfrontend -o test_ir
frontend:
make -C ../../frontend

View File

@ -1,7 +0,0 @@
int add(int a, int b) {
return a + b;
}
int main(void) {
return add(1, 2);
}

View File

@ -1,18 +0,0 @@
#include "../ir.h"
#include "../../frontend/frontend.h"
int main(int argc, const char** argv) {
const char* file_name = "test_file.c";
if (argc == 2) {
file_name = argv[1];
}
FILE* fp = fopen(file_name, "r");
if (fp == NULL) {
perror("open file failed");
return 1;
}
printf("open file success\n");
struct ASTNode* root = frontend("test.c", fp, (sread_fn)fread_s);
gen_ir_from_ast(root);
return 0;
}