feat add func call and rewrite codes

This commit is contained in:
ZZY 2025-03-07 12:29:53 +08:00
parent 09299e339c
commit 95bf44eb3f
37 changed files with 3369 additions and 1063 deletions

View File

@ -0,0 +1,13 @@
all: ccompiler
run: ccompiler
./ccompiler test.c flat.bin
ccompiler: frontend
gcc -g rv32ima_codegen.c ../../middleend/ir.c -L../../frontend -lfrontend -o ccompiler
frontend:
make -C ../../frontend
clean:
rm -f ccompiler flat.bin

View File

@ -0,0 +1,338 @@
#ifndef __RV32I_GEN_H__
#define __RV32I_GEN_H__
/**
31 25 24 20 19 15 14 12 11 7 6 0
imm[31:12] rd 0110111 U lui
imm[31:12] rd 0010111 U auipc
imm[20|10:1|11|19:12] rd 1101111 J jal
imm[11:0] rs1 000 rd 1100111 I jalr
imm[12|10:5] rs2 rs1 000 imm[4:1|11] 1100011 B beq
imm[12|10:5] rs2 rs1 001 imm[4:1|11] 1100011 B bne
imm[12|10:5] rs2 rs1 100 imm[4:1|11] 1100011 B blt
imm[12|10:5] rs2 rs1 101 imm[4:1|11] 1100011 B bge
imm[12|10:5] rs2 rs1 110 imm[4:1|11] 1100011 B bltu
imm[12|10:5] rs2 rs1 111 imm[4:1|11] 1100011 B bgeu
imm[11:0] rs1 000 rd 0000011 I lb
imm[11:0] rs1 001 rd 0000011 I lh
imm[11:0] rs1 010 rd 0000011 I lw
imm[11:0] rs1 100 rd 0000011 I lbu
imm[11:0] rs1 101 rd 0000011 I lhu
imm[11:5] rs2 rs1 000 imm[4:0] 0100011 S sb
imm[11:5] rs2 rs1 001 imm[4:0] 0100011 S sh
imm[11:5] rs2 rs1 010 imm[4:0] 0100011 S sw
imm[11:0] rs1 000 rd 0010011 I addi
imm[11:0] rs1 010 rd 0010011 I slti
imm[11:0] rs1 011 rd 0010011 I sltiu
imm[11:0] rs1 100 rd 0010011 I xori
imm[11:0] rs1 110 rd 0010011 I ori
imm[11:0] rs1 111 rd 0010011 I andi
0000000 shamt rs1 001 rd 0010011 I slli
0000000 shamt rs1 101 rd 0010011 I srli
0100000 shamt rs1 101 rd 0010011 I srai
0000000 rs2 rs1 000 rd 0110011 R add
0100000 rs2 rs1 000 rd 0110011 R sub
0000000 rs2 rs1 001 rd 0110011 R sll
0000000 rs2 rs1 010 rd 0110011 R slt
0000000 rs2 rs1 011 rd 0110011 R sltu
0000000 rs2 rs1 100 rd 0110011 R xor
0000000 rs2 rs1 101 rd 0110011 R srl
0100000 rs2 rs1 101 rd 0110011 R sra
0000000 rs2 rs1 110 rd 0110011 R or
0000000 rs2 rs1 111 rd 0110011 R and
0000 pred succ 00000 000 00000 0001111 I fence
0000 0000 0000 00000 001 00000 0001111 I fence.i
000000000000 00000 00 00000 1110011 I ecall
000000000000 00000 000 00000 1110011 I ebreak
csr rs1 001 rd 1110011 I csrrw
csr rs1 010 rd 1110011 I csrrs
csr rs1 011 rd 1110011 I csrrc
csr zimm 101 rd 1110011 I csrrwi
csr zimm 110 rd 1110011 I cssrrsi
csr zimm 111 rd 1110011 I csrrci
*/
#include <stdint.h>
// 寄存器枚举定义
typedef enum {
REG_X0, REG_X1, REG_X2, REG_X3, REG_X4, REG_X5, REG_X6, REG_X7,
REG_X8, REG_X9, REG_X10, REG_X11, REG_X12, REG_X13, REG_X14, REG_X15,
REG_X16, REG_X17, REG_X18, REG_X19, REG_X20, REG_X21, REG_X22, REG_X23,
REG_X24, REG_X25, REG_X26, REG_X27, REG_X28, REG_X29, REG_X30, REG_X31,
REG_ZERO = REG_X0, REG_RA = REG_X1, REG_SP = REG_X2, REG_GP = REG_X3,
REG_TP = REG_X4, REG_T0 = REG_X5, REG_T1 = REG_X6, REG_T2 = REG_X7,
REG_S0 = REG_X8, REG_S1 = REG_X9, REG_A0 = REG_X10, REG_A1 = REG_X11,
REG_A2 = REG_X12, REG_A3 = REG_X13, REG_A4 = REG_X14, REG_A5 = REG_X15,
REG_A6 = REG_X16, REG_A7 = REG_X17, REG_S2 = REG_X18, REG_S3 = REG_X19,
REG_S4 = REG_X20, REG_S5 = REG_X21, REG_S6 = REG_X22, REG_S7 = REG_X23,
REG_S8 = REG_X24, REG_S9 = REG_X25, REG_S10 = REG_X26, REG_S11 = REG_X27,
REG_T3 = REG_X28, REG_T4 = REG_X29, REG_T5 = REG_X30, REG_T6 = REG_X31,
} RV32Reg;
/******************** 立即数处理宏 ********************/
#define IMM_12BITS(imm) ((imm) & 0xFFF)
#define IMM_20BITS(imm) ((imm) & 0xFFFFF)
#define SHAMT_VAL(imm) ((imm) & 0x1F)
#define CSR_VAL(csr) ((csr) & 0xFFF)
// B型立即数编码[12|10:5|4:1|11]
#define ENCODE_B_IMM(imm) ( \
(((imm) >> 12) & 0x1) << 31 | /* imm[12:12] -> instr[31:31] */ \
(((imm) >> 5) & 0x3F) << 25 | /* imm[10:5] -> instr[30:25] */ \
(((imm) >> 1) & 0xF) << 8 | /* imm[4:1] -> instr[11:8] */ \
(((imm) >> 11) & 0x1) << 7) /* imm[11:11] -> instr[7:7] */
// J型立即数编码[20|10:1|11|19:12]W
#define ENCODE_J_IMM(imm) ( \
(((imm) >> 20) & 0x1) << 31 | /* imm[20:20] -> instr[31:31] */ \
(((imm) >> 1) & 0x3FF)<< 21 | /* imm[10:1] -> instr[30:21] */ \
(((imm) >> 11) & 0x1) << 20 | /* imm[11:11] -> instr[20:20] */ \
(((imm) >> 12) & 0xFF) << 12) /* imm[19:12] -> instr[19:12] */
/******************** 指令生成宏 ********************/
// R型指令宏
#define RV32_RTYPE(op, f3, f7, rd, rs1, rs2) (uint32_t)( \
(0x33 | ((rd) << 7) | ((f3) << 12) | ((rs1) << 15) | \
((rs2) << 20) | ((f7) << 25)) )
// I型指令宏
#define RV32_ITYPE(op, f3, rd, rs1, imm) (uint32_t)( \
(op | ((rd) << 7) | ((f3) << 12) | ((rs1) << 15) | \
(IMM_12BITS(imm) << 20)) )
// S型指令宏
#define RV32_STYPE(op, f3, rs1, rs2, imm) (uint32_t)( \
(op | ((IMM_12BITS(imm) & 0xFE0) << 20) | ((rs1) << 15) | \
((rs2) << 20) | ((f3) << 12) | ((IMM_12BITS(imm) & 0x1F) << 7)) )
// B型指令宏
#define RV32_BTYPE(op, f3, rs1, rs2, imm) (uint32_t)( \
(op | (ENCODE_B_IMM(imm)) | ((rs1) << 15) | \
((rs2) << 20) | ((f3) << 12)) )
// U型指令宏
#define RV32_UTYPE(op, rd, imm) (uint32_t)( \
(op | ((rd) << 7) | (IMM_20BITS((imm) >> 12) << 12)) )
// J型指令宏
#define RV32_JTYPE(op, rd, imm) (uint32_t)( \
(op | ((rd) << 7) | ENCODE_J_IMM(imm)) )
/******************** U-type ********************/
#define LUI(rd, imm) RV32_UTYPE(0x37, rd, imm)
#define AUIPC(rd, imm) RV32_UTYPE(0x17, rd, imm)
/******************** J-type ********************/
#define JAL(rd, imm) RV32_JTYPE(0x6F, rd, imm)
/******************** I-type ********************/
#define JALR(rd, rs1, imm) RV32_ITYPE(0x67, 0x0, rd, rs1, imm)
// Load instructions
#define LB(rd, rs1, imm) RV32_ITYPE(0x03, 0x0, rd, rs1, imm)
#define LH(rd, rs1, imm) RV32_ITYPE(0x03, 0x1, rd, rs1, imm)
#define LW(rd, rs1, imm) RV32_ITYPE(0x03, 0x2, rd, rs1, imm)
#define LBU(rd, rs1, imm) RV32_ITYPE(0x03, 0x4, rd, rs1, imm)
#define LHU(rd, rs1, imm) RV32_ITYPE(0x03, 0x5, rd, rs1, imm)
// Immediate arithmetic
#define ADDI(rd, rs1, imm) RV32_ITYPE(0x13, 0x0, rd, rs1, imm)
#define SLTI(rd, rs1, imm) RV32_ITYPE(0x13, 0x2, rd, rs1, imm)
#define SLTIU(rd, rs1, imm) RV32_ITYPE(0x13, 0x3, rd, rs1, imm)
#define XORI(rd, rs1, imm) RV32_ITYPE(0x13, 0x4, rd, rs1, imm)
#define ORI(rd, rs1, imm) RV32_ITYPE(0x13, 0x6, rd, rs1, imm)
#define ANDI(rd, rs1, imm) RV32_ITYPE(0x13, 0x7, rd, rs1, imm)
// Shift instructions
#define SLLI(rd, rs1, shamt) RV32_ITYPE(0x13, 0x1, rd, rs1, (0x00000000 | (shamt << 20)))
#define SRLI(rd, rs1, shamt) RV32_ITYPE(0x13, 0x5, rd, rs1, (0x00000000 | (shamt << 20)))
#define SRAI(rd, rs1, shamt) RV32_ITYPE(0x13, 0x5, rd, rs1, (0x40000000 | (shamt << 20)))
/******************** B-type ********************/
#define BEQ(rs1, rs2, imm) RV32_BTYPE(0x63, 0x0, rs1, rs2, imm)
#define BNE(rs1, rs2, imm) RV32_BTYPE(0x63, 0x1, rs1, rs2, imm)
#define BLT(rs1, rs2, imm) RV32_BTYPE(0x63, 0x4, rs1, rs2, imm)
#define BGE(rs1, rs2, imm) RV32_BTYPE(0x63, 0x5, rs1, rs2, imm)
#define BLTU(rs1, rs2, imm) RV32_BTYPE(0x63, 0x6, rs1, rs2, imm)
#define BGEU(rs1, rs2, imm) RV32_BTYPE(0x63, 0x7, rs1, rs2, imm)
/******************** S-type ********************/
#define SB(rs2, rs1, imm) RV32_STYPE(0x23, 0x0, rs1, rs2, imm)
#define SH(rs2, rs1, imm) RV32_STYPE(0x23, 0x1, rs1, rs2, imm)
#define SW(rs2, rs1, imm) RV32_STYPE(0x23, 0x2, rs1, rs2, imm)
/******************** R-type ********************/
#define ADD(rd, rs1, rs2) RV32_RTYPE(0x33, 0x0, 0x00, rd, rs1, rs2)
#define SUB(rd, rs1, rs2) RV32_RTYPE(0x33, 0x0, 0x20, rd, rs1, rs2)
#define SLL(rd, rs1, rs2) RV32_RTYPE(0x33, 0x1, 0x00, rd, rs1, rs2)
#define SLT(rd, rs1, rs2) RV32_RTYPE(0x33, 0x2, 0x00, rd, rs1, rs2)
#define SLTU(rd, rs1, rs2) RV32_RTYPE(0x33, 0x3, 0x00, rd, rs1, rs2)
#define XOR(rd, rs1, rs2) RV32_RTYPE(0x33, 0x4, 0x00, rd, rs1, rs2)
#define SRL(rd, rs1, rs2) RV32_RTYPE(0x33, 0x5, 0x00, rd, rs1, rs2)
#define SRA(rd, rs1, rs2) RV32_RTYPE(0x33, 0x5, 0x20, rd, rs1, rs2)
#define OR(rd, rs1, rs2) RV32_RTYPE(0x33, 0x6, 0x00, rd, rs1, rs2)
#define AND(rd, rs1, rs2) RV32_RTYPE(0x33, 0x7, 0x00, rd, rs1, rs2)
/******************** I-type (system) ********************/
#define FENCE(pred, succ) (uint32_t)( 0x0F | ((pred) << 23) | ((succ) << 27) )
#define FENCE_I() (uint32_t)( 0x100F )
#define ECALL() (uint32_t)( 0x73 )
#define EBREAK() (uint32_t)( 0x100073 )
// CSR instructions
#define CSRRW(rd, csr, rs) RV32_ITYPE(0x73, 0x1, rd, rs, CSR_VAL(csr))
#define CSRRS(rd, csr, rs) RV32_ITYPE(0x73, 0x2, rd, rs, CSR_VAL(csr))
#define CSRRC(rd, csr, rs) RV32_ITYPE(0x73, 0x3, rd, rs, CSR_VAL(csr))
#define CSRRWI(rd, csr, zimm) RV32_ITYPE(0x73, 0x5, rd, 0, (CSR_VAL(csr) | ((zimm) << 15)))
#define CSRRSI(rd, csr, zimm) RV32_ITYPE(0x73, 0x6, rd, 0, (CSR_VAL(csr) | ((zimm) << 15)))
#define CSRRCI(rd, csr, zimm) RV32_ITYPE(0x73, 0x7, rd, 0, (CSR_VAL(csr) | ((zimm) << 15)))
/* M-Extention */
#define MUL(rd, rs1, rs2) RV32_RTYPE(0x33, 0x0, 0x01, rd, rs1, rs2)
#define DIV(rd, rs1, rs2) RV32_RTYPE(0x33, 0x0, 0x05, rd, rs1, rs2)
#define REM(rd, rs1, rs2) RV32_RTYPE(0x33, 0x0, 0x07, rd, rs1, rs2)
/******************** Pseudo-instructions ********************/
// 伪指令
// nop (No operation)
#define NOP() ADDI(REG_X0, REG_X0, 0) // 无操作
// neg rd, rs (Two's complement of rs)
#define NEG(rd, rs) SUB(rd, REG_ZERO, rs) // 补码
// negw rd, rs (Two's complement word of rs)
#define NEGW(rd, rs) SUBW(rd, REG_ZERO, rs) // 字的补码
// snez rd, rs (Set if ≠ zero)
#define SNEZ(rd, rs) SLTU(rd, REG_X0, rs) // 非0则置位
// sltz rd, rs (Set if < zero)
#define SLTZ(rd, rs) SLT(rd, rs, REG_X0) // 小于0则置位
// sgtz rd, rs (Set if > zero)
#define SG TZ(rd, rs) SLT(rd, REG_X0, rs) // 大于0则置位
// beqz rs, offset (Branch if = zero)
#define BEQZ(rs, offset) BEQ(rs, REG_X0, offset) // 为0则转移
// bnez rs, offset (Branch if ≠ zero)
#define BNEZ(rs, offset) BNE(rs, REG_X0, offset) // 非0则转移
// blez rs, offset (Branch if ≤ zero)
#define BLEZ(rs, offset) BGE(REG_X0, rs, offset) // 小于等于0则转移
// bgez rs, offset (Branch if ≥ zero)
#define BGEZ(rs, offset) BGE(rs, REG_X0, offset) // 大于等于0则转移
// bltz rs, offset (Branch if < zero)
#define BLTZ(rs, offset) BLT(rs, REG_X0, offset) // 小于0则转移
// bgtz rs, offset (Branch if > zero)
#define BGTZ(rs, offset) BLT(REG_X0, rs, offset) // 大于0则转移
// j offset (Jump)
#define J(offset) JAL(REG_X0, offset) // 跳转
// jr rs (Jump register)
#define JR(rs) JALR(REG_X0, rs, 0) // 寄存器跳转
// ret (Return from subroutine)
#define RET() JALR(REG_X0, REG_RA, 0) // 从子过程返回
// tail offset (Tail call far-away subroutine)
#define TAIL_2(offset) AUIPC(REG_X6, offset), JAL(REG_X0, REG_X6, offset) // 尾调用远程子过程, 有2条指令
#define TAIL(offset) TAIL_2(offset) // Warning this have 2 instructions
// csrr csr, rd (Read CSR)
#define CSRR(csr, rd) CSRRS(rd, csr, REG_X0) // 读CSR寄存器
// csrw csr, rs (Write CSR)
#define CSR W(csr, rs) CSRRW(csr, REG_X0, rs) // 写CSR寄存器
// csrs csr, rs (Set bits in CSR)
#define CSRS(csr, rs) CSRRS(REG_X0, csr, rs) // CSR寄存器置零位
// csrrc csr, rs (Clear bits in CSR)
#define CSRC(csr, rs) CSRRC(REG_X0, csr, rs) // CSR寄存器清
// csrci csr, imm (Immediate clear bits in CSR)
#define CSRCI(csr, imm) CSRRCI(REG_X0, csr, imm) // 立即数清除CSR
// csrrwi csr, imm (Write CSR immediate)
#define CSRRWI2(csr, imm) CSRRWI(REG_X0, csr, imm) // 立即数写入CSR
// csrrsi csr, imm (Immediate set bits in CSR)
#define CSRRSI2(csr, imm) CSRRSI(REG_X0, csr, imm) // 立即数置位CSR
// csrrci csr, imm (Immediate clear bits in CSR)
#define CSRRCI2(csr, imm) CSRRCI(REG_X0, csr, imm) // 立即数清除CSR
// // frcsr rd (Read FP control/status register)
// #define FRC SR(rd) CSRRS(rd, FCSR, REG_X0) // 读取FP控制/状态寄存器
// // fscsr rs (Write FP control/status register)
// #define FSCSR(rs) CSRRW(REG_X0, FCSR, rs) // 写入FP控制/状态寄存器
// // frrm rd (Read FP rounding mode)
// #define FRRM(rd) CSRRS(rd, FRM, REG_X0) // 读取FP舍入模式
// // fsrm rs (Write FP rounding mode)
// #define FS RM(rs) CSRRW(REG_X0, FRM, rs) // 写入FP舍入模式
// // frflags rd (Read FP exception flags)
// #define FRFLAGS(rd) CSRRS(rd, FFLAGS, REG_X0) // 读取FP例外标志
// // fsflags rs (Write FP exception flags)
// #define FS FLAGS(rs) CSRRW(REG_X0, FFLAGS, rs) // 写入FP例外标志
// Myriad sequences
#define LI(rd, num) \
LUI(rd, num), \
ADDI(rd, rd, num)
#define MV(rd, rs) ADDI(rd, rs, 0)
#define NOT(rd, rs) XORI(rd, rs, -1)
#define CALL(offset) \
AUIPC(REG_X1, offset), \
JALR(REG_X1, REG_X1, offset)
#define CALL_ABS(addr) \
AUIPC(REG_X0, addr), \
JALR(REG_X1, REG_X0, addr)
#ifdef RISCV_VM_BUILDIN_ECALL
#define ECALL_PNT_INT(num) \
ADDI(REG_A0, REG_X0, num), \
ADDI(REG_A7, REG_X0, 0x1), \
ECALL()
#define ECALL_PNT_STR(str) \
ADDI(REG_A0, REG_X0, str), \
ADDI(REG_A7, REG_X0, 0x4), \
ECALL()
#define ECALL_EXIT2() \
ADDI(REG_A7, REG_X0, 93), \
ECALL()
#define ECALL_EXIT_ARG(errno) \
ADDI(REG_A0, REG_X0, errno), \
ECALL_EXIT2()
#define ECALL_EXIT() \
ADDI(REG_A7, REG_X0, 93), \
ECALL()
#define ECALL_SCAN_INT(int) \
ADDI(REG_A7, (1025 + 4)), \
ECALL()
#define ECALL_SCAN_STR(str) \
ADDI(REG_A0, REG_X0, str), \
ADDI(REG_A7, REG_X0, (1025 + 5)), \
ECALL()
#endif
#endif

View File

@ -0,0 +1,413 @@
#define RISCV_VM_BUILDIN_ECALL
#include "rv32gen.h"
#include <stdio.h>
#include <assert.h>
// 指令编码联合体(自动处理小端序)
typedef union rv32code {
uint32_t code;
uint8_t bytes[4];
} rv32code_t;
#define CRT_CODE_SIZE 16
// 使用示例
rv32code_t gcodes[] = {
LI(REG_SP, 0x1000),
LI(REG_RA, 0x0),
CALL_ABS(CRT_CODE_SIZE << 2),
// Exit
ECALL_EXIT2(),
};
void test_raw_gen(FILE* out) {
fwrite(gcodes, sizeof(rv32code_t), sizeof(gcodes)/sizeof(gcodes[0]), out);
}
#include "../../frontend/frontend.h"
#include "../../middleend/ir.h"
typedef struct {
int code_pos;
int to_idx;
int cur_idx;
int base_offset;
enum {
JMP_BRANCH,
JMP_JUMP,
JMP_CALL,
} type;
} jmp_t;
static struct {
vector_header(codes, rv32code_t);
int stack_offset;
int stack_base;
int tmp_reg;
ir_bblock_t* cur_block;
ir_func_t* cur_func;
ir_prog_t* prog;
vector_header(jmp, jmp_t*);
vector_header(call, jmp_t*);
int cur_func_offset;
int cur_block_offset;
} ctx;
int write_inst(union rv32code ins, FILE* fp) {
return fwrite(&ins, sizeof(union rv32code), 1, fp);
}
#define GENCODE(code) vector_push(ctx.codes, (rv32code_t)(code)); len += 4
#define GENCODES(code) do { \
rv32code_t codes[] = { \
code \
}; \
for (int i = 0; i < sizeof(codes) / sizeof(codes[0]); i ++) { \
GENCODE(codes[i]); \
} \
} while (0)
static int stack_offset(ir_node_t* ptr) {
int offset = ctx.stack_base;
for (int i = 0; i < ctx.cur_func->bblocks.size; i ++) {
ir_bblock_t* block = vector_at(ctx.cur_func->bblocks, i);
for (int i = 0; i < block->instrs.size; i++) {
if (vector_at(block->instrs, i) == ptr) {
offset += i * 4;
assert(offset >= 0 && offset < ctx.stack_offset);
return offset;
}
}
offset += block->instrs.size * 4;
}
assert(0);
}
static int block_idx(ir_bblock_t* toblock) {
for (int i = 0; i < ctx.cur_func->bblocks.size; i ++) {
ir_bblock_t* block = vector_at(ctx.cur_func->bblocks, i);
if (toblock == block) {
return i;
}
}
assert(0);
}
static int func_idx(ir_func_t* tofunc) {
for (int i = 0; i < ctx.prog->funcs.size; i ++) {
ir_func_t* func = vector_at(ctx.prog->funcs, i);
if (tofunc == func) {
return i;
}
}
assert(0);
}
static int system_func(const char* name) {
static const char defined_func[][16] = {
"ecall_pnt_int",
};
for (int j = 0; j < sizeof(defined_func)/sizeof(defined_func[0]); j++) {
if (strcmp(name, defined_func[j]) == 0) {
return j;
}
}
return -1;
}
static int get_node_val(ir_node_t* ptr, int reg) {
int len = 0;
if (ptr->tag == IR_NODE_CONST_INT) {
GENCODES(LI(reg, ptr->data.const_int.val));
} else {
int offset = stack_offset(ptr);
GENCODE(LW(reg, REG_SP, offset));
}
return len;
}
static int gen_instr(ir_bblock_t* block, ir_node_t* instr) {
int len = 0;
int offset;
switch (instr->tag) {
case IR_NODE_ALLOC: {
break;
}
case IR_NODE_LOAD: {
// S1 = *(S0 + imm)
offset = stack_offset(instr->data.load.target);
GENCODE(LW(REG_T0, REG_SP, offset));
// offset = STACK_OFFSET(instr);
// GENCODE(SW(REG_T0, REG_SP, offset));
break;
}
case IR_NODE_STORE: {
// *(S0 + imm) = S1
len += get_node_val(instr->data.store.value, REG_T0);
offset = stack_offset(instr->data.store.target);
GENCODE(SW(REG_T0, REG_SP, offset));
break;
}
case IR_NODE_RET: {
// A0 = S0
if (instr->data.ret.ret_val != NULL) {
len += get_node_val(instr->data.ret.ret_val, REG_A0);
}
GENCODE(LW(REG_RA, REG_SP, 0));
GENCODE(ADDI(REG_SP, REG_SP, ctx.stack_offset));
GENCODE(RET());
break;
}
case IR_NODE_OP: {
len += get_node_val(instr->data.op.lhs, REG_T1);
len += get_node_val(instr->data.op.rhs, REG_T2);
switch (instr->data.op.op) {
case IR_OP_ADD:
GENCODE(ADD(REG_T0, REG_T1, REG_T2));
break;
case IR_OP_SUB:
GENCODE(SUB(REG_T0, REG_T1, REG_T2));
break;
case IR_OP_MUL:
GENCODE(MUL(REG_T0, REG_T1, REG_T2));
break;
case IR_OP_DIV:
GENCODE(DIV(REG_T0, REG_T1, REG_T2));
break;
case IR_OP_MOD:
GENCODE(REM(REG_T0, REG_T1, REG_T2));
break;
default:
error("ERROR gen_instr op in riscv");
break;
}
offset = stack_offset(instr);
GENCODE(SW(REG_T0, REG_SP, offset));
break;
}
case IR_NODE_BRANCH: {
len += get_node_val(instr->data.branch.cond, REG_T0);
int tidx = block_idx(instr->data.branch.true_bblock);
int fidx = block_idx(instr->data.branch.false_bblock);
int cidx = block_idx(ctx.cur_block);
jmp_t* jmp;
jmp = xmalloc(sizeof(jmp_t));
*jmp = (jmp_t) {
.base_offset = 8,
.code_pos = ctx.codes.size,
.type = JMP_BRANCH,
.to_idx = tidx,
.cur_idx=cidx,
};
vector_push(ctx.jmp, jmp);
GENCODE(BNEZ(REG_T0, 0));
jmp = xmalloc(sizeof(jmp_t));
*jmp = (jmp_t) {
.base_offset = 4,
.code_pos = ctx.codes.size,
.type = JMP_JUMP,
.to_idx = fidx,
.cur_idx=cidx,
};
vector_push(ctx.jmp, jmp);
GENCODE(J(0));
break;
}
case IR_NODE_JUMP: {
int idx = block_idx(instr->data.jump.target_bblock);
jmp_t* jmp = xmalloc(sizeof(jmp_t));
*jmp = (jmp_t) {
.base_offset = 4,
.code_pos = ctx.codes.size,
.type = JMP_JUMP,
.to_idx = idx,
.cur_idx=block_idx(ctx.cur_block),
};
vector_push(ctx.jmp, jmp);
GENCODE(J(0));
break;
}
case IR_NODE_CALL: {
if (instr->data.call.args.size > 8) {
error("can't add so much params");
}
int param_regs[8] = {
REG_A0, REG_A1, REG_A2, REG_A3,
REG_A4, REG_A5, REG_A6, REG_A7
};
for (int i = 0; i < instr->data.call.args.size; i++) {
ir_node_t* param = vector_at(instr->data.call.args, i);
len += get_node_val(param, param_regs[i]);
}
int system_func_idx = system_func(instr->data.call.callee->name);
if (system_func_idx == 0) {
// ecall_pnt_int
GENCODE(ADDI(REG_A7, REG_X0, 0x1));
GENCODE(ECALL());
break;
}
jmp_t* jmp = xmalloc(sizeof(jmp_t));
*jmp = (jmp_t) {
.base_offset = ctx.cur_func_offset + ctx.cur_block_offset + len,
.code_pos = ctx.codes.size,
.type = JMP_CALL,
.to_idx = func_idx(instr->data.call.callee),
.cur_idx = func_idx(ctx.cur_func),
};
vector_push(ctx.call, jmp);
GENCODES((
CALL(0)
));
break;
}
default:
error("ERROR gen_instr in riscv");
}
return len;
}
static int gen_block(ir_bblock_t* block) {
int len = 0;
ctx.cur_block = block;
for (int i = 0; i < block->instrs.size; i ++) {
ctx.cur_block_offset = len;
len += gen_instr(block, vector_at(block->instrs, i));
}
return len;
}
static int gen_func(ir_func_t* func) {
int len = 0;
ctx.cur_func = func;
ctx.stack_base = 16;
ctx.stack_offset = ctx.stack_base;
for (int i = 0; i < func->bblocks.size; i++) {
ctx.stack_offset += 4 * (*vector_at(func->bblocks, i)).instrs.size;
}
GENCODE(ADDI(REG_SP, REG_SP, -ctx.stack_offset));
GENCODE(SW(REG_RA, REG_SP, 0));
int param_regs[8] = {
REG_A0, REG_A1, REG_A2, REG_A3,
REG_A4, REG_A5, REG_A6, REG_A7
};
if (func->params.size > 8) {
error("can't add so much params");
}
for (int i = 0; i < func->params.size; i++) {
int offset = stack_offset(vector_at(func->params, i));
GENCODE(SW(param_regs[i], REG_SP, offset));
}
int jmp_cache[func->bblocks.size + 1];
if (ctx.jmp.data != NULL) vector_free(ctx.jmp);
vector_init(ctx.jmp);
jmp_cache[0] = 0;
for(int i = 0; i < func->bblocks.size; i ++) {
ctx.cur_func_offset = len;
jmp_cache[i + 1] = jmp_cache[i];
int ret = gen_block(vector_at(func->bblocks, i));
jmp_cache[i + 1] += ret;
len += ret;
}
for (int i = 0; i < ctx.jmp.size; i++) {
jmp_t* jmp = vector_at(ctx.jmp, i);
int32_t code = 0;
int offset = jmp_cache[jmp->to_idx] - (jmp_cache[jmp->cur_idx + 1] - jmp->base_offset);
if (jmp->type == JMP_JUMP) {
code = J(offset);
} else {
code = BNEZ(REG_T0, offset);
}
ctx.codes.data[jmp->code_pos] = (rv32code_t) {
.code = code,
};
}
return len;
}
static void gen_code(ir_prog_t* prog) {
ctx.prog = prog;
for (int i = 0; i < prog->extern_funcs.size; i++) {
if (system_func(prog->extern_funcs.data[i]->name) == -1) {
error("func %s not defined and not a system func", prog->extern_funcs.data[i]->name);
}
}
int len = 0;
int jmp_cache[prog->funcs.size + 1];
for(int i = 0; i < prog->funcs.size; i ++) {
jmp_cache[i + 1] = jmp_cache[i];
int ret = gen_func(vector_at(prog->funcs, i));
jmp_cache[i + 1] += ret;
len += ret;
}
for (int i = 0; i < ctx.call.size; i++) {
jmp_t* jmp = vector_at(ctx.call, i);
int32_t code = 0;
// FIXME ERROR
int offset = jmp_cache[jmp->to_idx] - (jmp_cache[jmp->cur_idx] + jmp->base_offset);
int32_t codes[2] = {
CALL(offset)
};
for (int i = 0; i < 2; i++) {
ctx.codes.data[jmp->code_pos + i] = (rv32code_t) {
.code = codes[i],
};
}
}
}
int main(int argc, char** argv) {
// gcc rv32ima_codegen.c -o rv32gen.exe
const char* infilename = "test.c";
const char* outfilename = "flat.bin";
if (argc >= 2) {
infilename = argv[1];
}
if (argc >= 3) {
outfilename = argv[2];
}
FILE* in = fopen(infilename, "r");
FILE* out = fopen(outfilename, "wb");
if (in == NULL || out == NULL) {
printf("Failed to open file\n");
return 1;
}
struct ASTNode* root = frontend(infilename, in, (sread_fn)fread_s);
gen_ir_from_ast(root);
gen_code(&prog);
for (int i = 0; i < CRT_CODE_SIZE; i++) {
write_inst((union rv32code) {
.code = NOP(),
}, out);
}
fflush(out);
assert(CRT_CODE_SIZE >= sizeof(gcodes) / sizeof(gcodes[0]));
fseek(out, 0, SEEK_SET);
fwrite(gcodes, sizeof(gcodes), 1, out);
fflush(out);
fseek(out, CRT_CODE_SIZE * 4, SEEK_SET);
fwrite(ctx.codes.data, sizeof(ctx.codes.data[0]), ctx.codes.size, out);
fflush(out);
fclose(in);
fclose(out);
// printf("comiler end out: %s\n", outfilename);
return 0;
}

View File

@ -1,7 +1,7 @@
# 编译器设置
CC = gcc
AR = ar
CFLAGS = -g
CFLAGS = -g -Wall
# 源文件路径
LEXER_DIR = ./lexer
@ -13,6 +13,7 @@ SYMTAB_DIR = ./parser/symtab
SRCS = \
frontend.c \
$(LEXER_DIR)/lexer.c \
$(LEXER_DIR)/token.c \
$(PARSER_DIR)/parser.c \
$(AST_DIR)/ast.c \
$(AST_DIR)/block.c \

View File

@ -3,13 +3,13 @@
#include "frontend.h"
struct ASTNode* frontend(const char* file, void* stream, sread_fn sread) {
struct Lexer lexer;
lexer_t lexer;
init_lexer(&lexer, file, stream, sread);
struct SymbolTable symtab;
symtab_t symtab;
init_symtab(&symtab);
struct Parser parser;
parser_t parser;
init_parser(&parser, &lexer, &symtab);
parse_prog(&parser);

View File

@ -4,8 +4,9 @@
#ifndef error
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#define STD_LIBRARY
#define error(...) do { fprintf(stderr, __VA_ARGS__); exit(1); } while (0)
#define error(...) do { fprintf(stderr, __VA_ARGS__); assert(0); } while (0)
#endif
#ifndef warn
#include <stdio.h>
@ -15,10 +16,12 @@
#define xmalloc(size) malloc(size)
#ifndef FRONTEND_IMPLEMENTATION
#include "parser/parser.h"
#include "parser/ast/ast.h"
typedef int (*sread_fn)(void *dst_buf, int dst_size, int elem_size, int count, void *stream);
struct ASTNode* frontend(const char* file, void* stream, sread_fn sread);
#endif
#endif

View File

@ -26,13 +26,15 @@ the distribution and installation instructions.
Chris Fraser / cwf@aya.yale.edu
David Hanson / drh@drhanson.net
*/
#define FRONTEND_IMPLEMENTATION
#include "../frontend.h"
#include "token.h"
#include "lexer.h"
static const struct {
const char* name;
enum CSTD_KEYWORD std_type;
enum TokenType tok;
tok_type_t tok;
} keywords[] = {
#define X(name, std_type, tok, ...) { #name, std_type, tok },
KEYWORD_TABLE
@ -72,7 +74,7 @@ static inline int keyword_cmp(const char* name, int len) {
return -1; // Not a keyword.
}
void init_lexer(struct Lexer* lexer, const char* file_name, void* stream, lexer_sread_fn sread)
void init_lexer(lexer_t* lexer, const char* file_name, void* stream, lexer_sread_fn sread)
{
lexer->cur_ptr = lexer->end_ptr = (unsigned char*)&(lexer->buffer);
lexer->index = 1;
@ -86,12 +88,12 @@ void init_lexer(struct Lexer* lexer, const char* file_name, void* stream, lexer_
}
}
static void flush_buffer(struct Lexer* lexer) {
static void flush_buffer(lexer_t* lexer) {
int num = lexer->end_ptr - lexer->cur_ptr;
for (int i = 0; i < num; i++) {
lexer->buffer[i] = lexer->cur_ptr[i];
}
lexer->cur_ptr = lexer->buffer;
lexer->cur_ptr = (unsigned char*)lexer->buffer;
int read_size = LEXER_BUFFER_SIZE - num;
// TODO size_t to int maybe lose precision
@ -109,7 +111,7 @@ static void flush_buffer(struct Lexer* lexer) {
}
}
static void goto_newline(struct Lexer* lexer) {
static void goto_newline(lexer_t* lexer) {
do {
if (lexer->cur_ptr == lexer->end_ptr) {
flush_buffer(lexer);
@ -119,7 +121,7 @@ static void goto_newline(struct Lexer* lexer) {
} while (*lexer->cur_ptr != '\n' && *lexer->cur_ptr != '\0');
}
static void goto_block_comment(struct Lexer* lexer) {
static void goto_block_comment(lexer_t* lexer) {
while (1) {
if (lexer->end_ptr - lexer->cur_ptr < 2) {
flush_buffer(lexer);
@ -155,7 +157,7 @@ static char got_slash(unsigned char* peek) {
}
}
static void parse_char_literal(struct Lexer* lexer, struct Token* token) {
static void parse_char_literal(lexer_t* lexer, tok_t* token) {
char val = 0;
unsigned char* peek = lexer->cur_ptr + 1;
if (*peek == '\\') {
@ -166,16 +168,16 @@ static void parse_char_literal(struct Lexer* lexer, struct Token* token) {
}
if (*peek != '\'') error("Unclosed character literal");
token->constant.ch = val;
token->val.ch = val;
lexer->cur_ptr = peek + 1;
token->constant.have = 1;
token->val.have = 1;
token->type = TOKEN_CHAR_LITERAL;
}
static void parse_string_literal(struct Lexer* lexer, struct Token* token) {
static void parse_string_literal(lexer_t* lexer, tok_t* token) {
unsigned char* peek = lexer->cur_ptr + 1;
// TODO string literal size check
char* dest = token->constant.str = xmalloc(LEXER_MAX_TOKEN_SIZE + 1);
char* dest = token->val.str = xmalloc(LEXER_MAX_TOKEN_SIZE + 1);
int len = 0;
while (*peek != '"') {
@ -191,12 +193,12 @@ static void parse_string_literal(struct Lexer* lexer, struct Token* token) {
}
dest[len] = '\0';
lexer->cur_ptr = peek + 1;
token->constant.have = 1;
token->val.have = 1;
token->type = TOKEN_STRING_LITERAL;
}
// FIXME it write by AI maybe error
static void parse_number(struct Lexer* lexer, struct Token* token) {
static void parse_number(lexer_t* lexer, tok_t* token) {
unsigned char* peek = lexer->cur_ptr;
int base = 10;
int is_float = 0;
@ -255,12 +257,12 @@ static void parse_number(struct Lexer* lexer, struct Token* token) {
if ((*peek == 'e' || *peek == 'E') && base == 10) {
is_float = 1;
peek++;
int exp_sign = 1;
// int exp_sign = 1;
int exponent = 0;
if (*peek == '+') peek++;
else if (*peek == '-') {
exp_sign = -1;
// exp_sign = -1;
peek++;
}
@ -273,19 +275,19 @@ static void parse_number(struct Lexer* lexer, struct Token* token) {
// 存储结果
lexer->cur_ptr = peek;
token->constant.have = 1;
token->val.have = 1;
if (is_float) {
token->constant.d = float_val;
token->val.d = float_val;
token->type = TOKEN_FLOAT_LITERAL;
} else {
token->constant.ll = int_val;
token->val.ll = int_val;
token->type = TOKEN_INT_LITERAL;
}
}
#define GOT_ONE_TOKEN_BUF_SIZE 64
// /zh/c/language/operator_arithmetic.html
void get_token(struct Lexer* lexer, struct Token* token) {
void get_token(lexer_t* lexer, tok_t* token) {
// 需要保证缓冲区始终可读
if (lexer->end_ptr - lexer->cur_ptr < GOT_ONE_TOKEN_BUF_SIZE) {
flush_buffer(lexer);
@ -305,8 +307,8 @@ void get_token(struct Lexer* lexer, struct Token* token) {
token->type = TOKEN_FLUSH;
}
enum TokenType tok = TOKEN_INIT;
struct TokenConstant constant;
tok_type_t tok = TOKEN_INIT;
tok_val_t constant;
constant.have = 0;
// once step
@ -392,7 +394,7 @@ void get_token(struct Lexer* lexer, struct Token* token) {
switch (*peek++) {
case '=': tok = TOKEN_NEQ; break;
default: peek--, tok = TOKEN_NOT; break;
}
} break;
case '[':
tok = TOKEN_L_BRACKET; break;
case ']':
@ -454,7 +456,7 @@ void get_token(struct Lexer* lexer, struct Token* token) {
case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':case 'Y': case 'Z':
case '_':
// TOKEN_IDENT
if (*peek == 'L' && *peek == '\'' || *peek == 'L' && *peek == '"') {
if ((*peek == 'L' && *peek == '\'') || (*peek == 'L' && *peek == '"')) {
error("unsupport wide-character char literal by `L` format");
}
while (1) {
@ -469,18 +471,18 @@ void get_token(struct Lexer* lexer, struct Token* token) {
break;
}
int res = keyword_cmp(lexer->cur_ptr, peek - (lexer->cur_ptr));
int res = keyword_cmp((const char*)lexer->cur_ptr, peek - (lexer->cur_ptr));
if (res == -1) {
int strlen = peek - lexer->cur_ptr;
unsigned char* str = xmalloc(strlen + 1);
constant.have = 1;
constant.str = str;
constant.str = (char*)str;
for (int i = 0; i < strlen; i++) {
str[i] = lexer->cur_ptr[i];
}
str[strlen] = '\0';
constant.have = 1;
constant.str = str;
constant.str = (char*)str;
tok = TOKEN_IDENT; break;
} else {
tok = keywords[res].tok; break;
@ -492,32 +494,16 @@ void get_token(struct Lexer* lexer, struct Token* token) {
lexer->cur_ptr = peek;
END:
token->constant = constant;
token->val = constant;
token->type = tok;
}
// get_token maybe got invalid (with parser)
void get_valid_token(struct Lexer* lexer, struct Token* token) {
enum TokenType type;
void get_valid_token(lexer_t* lexer, tok_t* token) {
tok_type_t type;
do {
get_token(lexer, token);
type = token->type;
} while (type == TOKEN_FLUSH || type == TOKEN_LINE_COMMENT || type == TOKEN_BLOCK_COMMENT);
}
// 生成字符串映射(根据需求选择#str或#name
static const char* token_strings[] = {
// 普通token使用#str
#define X(str, tok) [tok] = #str,
TOKEN_TABLE
#undef X
// 关键字使用#name
#define X(name, std, tok) [tok] = #name,
KEYWORD_TABLE
#undef X
};
const char* get_token_name(enum TokenType type) {
return token_strings[type];
}

View File

@ -2,13 +2,17 @@
#define __LEXER_H__
#include "token.h"
#ifndef LEXER_MAX_TOKEN_SIZE
#define LEXER_MAX_TOKEN_SIZE 63
#endif
#ifndef LEXER_BUFFER_SIZE
#define LEXER_BUFFER_SIZE 4095
#endif
typedef int (*lexer_sread_fn)(void *dst_buf, int dst_size,
int elem_size, int count, void *stream);
struct Lexer {
typedef struct lexer {
int line;
int index;
// const char current_file_name[LEXER_BUFFER_SIZE+1];
@ -19,22 +23,15 @@ struct Lexer {
lexer_sread_fn sread;
void* stream;
};
} lexer_t;
struct Token {
enum TokenType type;
struct TokenConstant constant;
};
void init_lexer(struct Lexer* lexer, const char* file_name, void* stream,
void init_lexer(lexer_t* lexer, const char* file_name, void* stream,
lexer_sread_fn sread);
//
void get_token(struct Lexer* lexer, struct Token* token);
// pure token getter it will included empty token like TOKEN_FLUSH
void get_token(lexer_t* lexer, tok_t* token);
// get_token maybe got invalid (with parser as TOKEN_FLUSH)
void get_valid_token(struct Lexer* lexer, struct Token* token);
const char* get_token_name(enum TokenType token);
void get_valid_token(lexer_t* lexer, tok_t* token);
#endif

View File

@ -0,0 +1,17 @@
CC = gcc
CFLAGS = -g -Wall
SRC = ../lexer.c ../token.c
all = test_all
test_all: test
./test
run:
$(CC) $(CFLAGS) $(SRC) run.c -o run
test:
$(CC) $(CFLAGS) $(SRC) -o test test.c
clean:
rm -f test run

View File

@ -1,8 +1,8 @@
#include "../lexer.h"
#include <stdio.h>
// gcc -g ../lexer.c test_lexer.c -o test_lexer
// gcc -g ../lexer.c ../token.c test_lexer.c -o test_lexer
/*
struct TokenConstant {
tok_tConstant {
int have;
union {
char ch;
@ -31,9 +31,9 @@ int main(int argc, char* argv[]) {
}
printf("open file success\n");
struct Lexer lexer;
lexer_t lexer;
init_lexer(&lexer, "test_lexter.c", fp, (lexer_sread_fn)fread_s);
struct Token tok;
tok_t tok;
while (1) {
get_valid_token(&lexer, &tok);
@ -41,6 +41,6 @@ int main(int argc, char* argv[]) {
break;
}
printf("line: %d, column: %d, type: %3d, typename: %s\n",
lexer.line, lexer.index, tok.type, get_token_name(tok.type));
lexer.line, lexer.index, tok.type, get_tok_name(tok.type));
}
}

View File

@ -0,0 +1,178 @@
// test_lexer.c
#include "../../../../libcore/acutest.h"
#include "../lexer.h"
#include <string.h>
int test_read(void *dst_buf, int dst_size, int elem_size, int count, void *stream) {
if (stream == NULL) {
return 0;
}
int size = dst_size > elem_size * count ? elem_size * count : dst_size;
memcpy(dst_buf, stream, size);
return size;
}
// 测试辅助函数
static inline void test_lexer_string(const char* input, tok_type_t expected_type) {
lexer_t lexer;
tok_t token;
init_lexer(&lexer, "test.c", (void*)input, test_read);
get_valid_token(&lexer, &token);
TEST_CHECK(token.type == expected_type);
TEST_MSG("Expected: %s", get_tok_name(expected_type));
TEST_MSG("Got: %s", get_tok_name(token.type));
}
// 基础运算符测试
void test_operators() {
TEST_CASE("Arithmetic operators"); {
test_lexer_string("+", TOKEN_ADD);
test_lexer_string("++", TOKEN_ADD_ADD);
test_lexer_string("+=", TOKEN_ASSIGN_ADD);
test_lexer_string("-", TOKEN_SUB);
test_lexer_string("--", TOKEN_SUB_SUB);
test_lexer_string("-=", TOKEN_ASSIGN_SUB);
test_lexer_string("*", TOKEN_MUL);
test_lexer_string("*=", TOKEN_ASSIGN_MUL);
test_lexer_string("/", TOKEN_DIV);
test_lexer_string("/=", TOKEN_ASSIGN_DIV);
test_lexer_string("%", TOKEN_MOD);
test_lexer_string("%=", TOKEN_ASSIGN_MOD);
}
TEST_CASE("Bitwise operators"); {
test_lexer_string("&", TOKEN_AND);
test_lexer_string("&&", TOKEN_AND_AND);
test_lexer_string("&=", TOKEN_ASSIGN_AND);
test_lexer_string("|", TOKEN_OR);
test_lexer_string("||", TOKEN_OR_OR);
test_lexer_string("|=", TOKEN_ASSIGN_OR);
test_lexer_string("^", TOKEN_XOR);
test_lexer_string("^=", TOKEN_ASSIGN_XOR);
test_lexer_string("~", TOKEN_BIT_NOT);
test_lexer_string("<<", TOKEN_L_SH);
test_lexer_string("<<=", TOKEN_ASSIGN_L_SH);
test_lexer_string(">>", TOKEN_R_SH);
test_lexer_string(">>=", TOKEN_ASSIGN_R_SH);
}
TEST_CASE("Comparison operators"); {
test_lexer_string("==", TOKEN_EQ);
test_lexer_string("!=", TOKEN_NEQ);
test_lexer_string("<", TOKEN_LT);
test_lexer_string("<=", TOKEN_LE);
test_lexer_string(">", TOKEN_GT);
test_lexer_string(">=", TOKEN_GE);
}
TEST_CASE("Special symbols"); {
test_lexer_string("(", TOKEN_L_PAREN);
test_lexer_string(")", TOKEN_R_PAREN);
test_lexer_string("[", TOKEN_L_BRACKET);
test_lexer_string("]", TOKEN_R_BRACKET);
test_lexer_string("{", TOKEN_L_BRACE);
test_lexer_string("}", TOKEN_R_BRACE);
test_lexer_string(";", TOKEN_SEMICOLON);
test_lexer_string(",", TOKEN_COMMA);
test_lexer_string(":", TOKEN_COLON);
test_lexer_string(".", TOKEN_DOT);
test_lexer_string("...", TOKEN_ELLIPSIS);
test_lexer_string("->", TOKEN_DEREF);
test_lexer_string("?", TOKEN_COND);
}
}
// 关键字测试
void test_keywords() {
TEST_CASE("C89 keywords");
test_lexer_string("while", TOKEN_WHILE);
test_lexer_string("sizeof", TOKEN_SIZEOF);
// TEST_CASE("C99 keywords");
// test_lexer_string("restrict", TOKEN_RESTRICT);
// test_lexer_string("_Bool", TOKEN_INT); // 需确认你的类型定义
}
// 字面量测试
void test_literals() {
TEST_CASE("Integer literals"); {
// 十进制
test_lexer_string("0", TOKEN_INT_LITERAL);
test_lexer_string("123", TOKEN_INT_LITERAL);
// test_lexer_string("2147483647", TOKEN_INT_LITERAL);
// // 十六进制
// test_lexer_string("0x0", TOKEN_INT_LITERAL);
// test_lexer_string("0x1A3F", TOKEN_INT_LITERAL);
// test_lexer_string("0XABCDEF", TOKEN_INT_LITERAL);
// // 八进制
// test_lexer_string("0123", TOKEN_INT_LITERAL);
// test_lexer_string("0777", TOKEN_INT_LITERAL);
// // 边界值测试
// test_lexer_string("2147483647", TOKEN_INT_LITERAL); // INT_MAX
// test_lexer_string("4294967295", TOKEN_INT_LITERAL); // UINT_MAX
}
// TEST_CASE("Character literals"); {
// test_lexer_string("'a'", TOKEN_CHAR_LITERAL);
// test_lexer_string("'\\n'", TOKEN_CHAR_LITERAL);
// test_lexer_string("'\\t'", TOKEN_CHAR_LITERAL);
// test_lexer_string("'\\\\'", TOKEN_CHAR_LITERAL);
// test_lexer_string("'\\0'", TOKEN_CHAR_LITERAL);
// }
TEST_CASE("String literals"); {
test_lexer_string("\"hello\"", TOKEN_STRING_LITERAL);
test_lexer_string("\"multi-line\\nstring\"", TOKEN_STRING_LITERAL);
test_lexer_string("\"escape\\\"quote\"", TOKEN_STRING_LITERAL);
}
// TEST_CASE("Integer literals");
// test_lexer_string("123", TOKEN_INT_LITERAL);
// test_lexer_string("0x1F", TOKEN_INT_LITERAL);
// TEST_CASE("Floating literals");
// test_lexer_string("3.14e-5", TOKEN_FLOAT_LITERAL);
// TEST_CASE("Character literals");
// test_lexer_string("'\\n'", TOKEN_CHAR_LITERAL);
}
// 边界测试
void test_edge_cases() {
// TEST_CASE("Long identifiers");
// char long_id[LEXER_MAX_TOKEN_SIZE+2] = {0};
// memset(long_id, 'a', LEXER_MAX_TOKEN_SIZE+1);
// test_lexer_string(long_id, TOKEN_IDENT);
// TEST_CASE("Buffer boundary");
// char boundary[LEXER_BUFFER_SIZE*2] = {0};
// memset(boundary, '+', LEXER_BUFFER_SIZE*2-1);
// test_lexer_string(boundary, TOKEN_ADD);
}
// 错误处理测试
void test_error_handling() {
TEST_CASE("Invalid characters");
lexer_t lexer;
tok_t token;
init_lexer(&lexer, "test.c", NULL, test_read);
get_valid_token(&lexer, &token);
TEST_CHECK(token.type == TOKEN_EOF); // 应触发错误处理
}
// 测试列表
TEST_LIST = {
{"operators", test_operators},
{"keywords", test_keywords},
{"literals", test_literals},
{"edge_cases", test_edge_cases},
{"error_handling", test_error_handling},
{NULL, NULL}
};

View File

@ -0,0 +1,86 @@
#define FRONTEND_IMPLEMENTATION
#include "../frontend.h"
#include "token.h"
#define ROUND_IDX(idx) ((idx) % tokbuf->cap)
tok_t* pop_tok(tok_buf_t* tokbuf) {
if (tokbuf->size == 0) {
error("no token to pop");
return NULL;
}
int idx = tokbuf->cur;
tokbuf->cur = ROUND_IDX(idx + 1);
tokbuf->size -= 1;
return tokbuf->buf + idx;
}
void flush_peek_tok(tok_buf_t* tokbuf) {
tokbuf->peek = tokbuf->cur;
}
void init_tokbuf(tok_buf_t *tokbuf, void *stream, get_tokbuf_func gettok) {
tokbuf->cur = 0;
tokbuf->end = 0;
tokbuf->peek = 0;
tokbuf->size = 0;
tokbuf->stream = stream;
tokbuf->gettok = gettok;
tokbuf->buf = NULL;
tokbuf->cap = 0;
}
tok_t *peek_tok(tok_buf_t *tokbuf)
{
int idx = tokbuf->peek;
idx = ROUND_IDX(idx + 1);
if (tokbuf->size >= tokbuf->cap) {
error("peek too deep, outof array size");
}
if (tokbuf->peek == tokbuf->end) {
if (tokbuf->size == tokbuf->cap) {
error("peek_tok buffer overflow");
}
if (tokbuf->gettok == NULL) {
error("peek_tok can not got tok");
}
tokbuf->gettok(tokbuf->stream, &(tokbuf->buf[idx]));
tokbuf->size++;
tokbuf->end = idx;
}
tokbuf->peek = idx;
return &(tokbuf->buf[idx]);
}
tok_type_t peek_tok_type(tok_buf_t* tokbuf) {
return peek_tok(tokbuf)->type;
}
int expect_pop_tok(tok_buf_t* tokbuf, tok_type_t type) {
flush_peek_tok(tokbuf);
tok_t* tok = peek_tok(tokbuf);
if (tok->type != type) {
error("expected tok: %s, got %s", get_tok_name(type), get_tok_name(tok->type));
} else {
pop_tok(tokbuf);
}
return 0;
}
// 生成字符串映射(根据需求选择#str或#name
static const char* token_strings[] = {
// 普通token使用#str
#define X(str, tok) [tok] = #str,
TOKEN_TABLE
#undef X
// 关键字使用#name
#define X(name, std, tok) [tok] = #name,
KEYWORD_TABLE
#undef X
};
const char* get_tok_name(tok_type_t type) {
return token_strings[type];
}

View File

@ -105,7 +105,7 @@ enum CSTD_KEYWORD {
// END
// 定义TokenType枚举
enum TokenType {
typedef enum tok_type {
// 处理普通token
#define X(str, tok) tok,
TOKEN_TABLE
@ -115,9 +115,9 @@ enum TokenType {
#define X(name, std, tok) tok,
KEYWORD_TABLE
#undef X
};
} tok_type_t;
struct TokenConstant {
typedef struct tok_val {
int have;
union {
char ch;
@ -127,124 +127,31 @@ struct TokenConstant {
long long ll;
char* str;
};
};
} tok_val_t;
// "break"
// "case"
// "char"
// "const"
// "continue"
// "default"
// "do"
// "double"
// "else"
// "enum"
// "extern"
// "float"
// "for"
// "goto"
// "if"
// "inline (C99)"
// "int"
// "long"
// "register"
// "restrict (C99)"
// "return"
// "short"
// "signed"
// "sizeof"
// "static"
// "struct"
// "switch"
// "typedef"
// "union"
// "unsigned"
// "void"
// "volatile"
// "while"
typedef struct tok {
tok_type_t type;
tok_val_t val;
} tok_t;
// alignas (C23)
// alignof (C23)
// auto
// bool (C23)
// constexpr (C23)
// false (C23)
// nullptr (C23)
// static_assert (C23)
// thread_local (C23)
// true (C23)
// typeof (C23)
// typeof_unqual (C23)
// _Alignas (C11)
// _Alignof (C11)
// _Atomic (C11)
// _BitInt (C23)
// _Bool (C99)
// _Complex (C99)
// _Decimal128 (C23)
// _Decimal32 (C23)
// _Decimal64 (C23)
// _Generic (C11)
// _Imaginary (C99)
// _Noreturn (C11)
// _Static_assert (C11)
// _Thread_local (C11)
typedef struct tok_buf {
int cur;
int end;
int peek;
int size;
int cap;
tok_t* buf;
void* stream;
void (*gettok)(void* stream, tok_t* token);
} tok_buf_t;
// a = b
// a += b
// a -= b
// a *= b
// a /= b
// a %= b
// a &= b
// a |= b
// a ^= b
// a <<= b
// a >>= b
// ++a
// --a
// a++
// a--
// +a
// -a
// a + b
// a - b
// a * b
// a / b
// a % b
// ~a
// a & b
// a | b
// a ^ b
// a << b
// a >> b
// !a
// a && b
// a || b
// a == b
// a != b
// a < b
// a > b
// a <= b
// a >= b
// a[b]
// *a
// &a
// a->b
// a.b
// a(...)
// a, b
// (type) a
// a ? b : c
// sizeof
// _Alignof
// (C11)
typedef void(*get_tokbuf_func)(void* stream, tok_t* token);
void init_tokbuf(tok_buf_t* tokbuf, void* stream, get_tokbuf_func gettok);
tok_t* peek_tok(tok_buf_t* tokbuf);
tok_t* pop_tok(tok_buf_t* tokbuf);
void flush_peek_tok(tok_buf_t* tokbuf);
tok_type_t peek_tok_type(tok_buf_t* tokbuf);
int expect_pop_tok(tok_buf_t* tokbuf, tok_type_t type);
const char* get_tok_name(tok_type_t type);
#endif

View File

@ -14,9 +14,9 @@ void init_ast_node(struct ASTNode* node) {
}
}
struct ASTNode* find_ast_node(struct ASTNode* node, enum ASTType type) {
// struct ASTNode* find_ast_node(struct ASTNode* node, ast_type_t type) {
}
// }
#include <stdio.h>
static void pnt_depth(int depth) {
@ -25,149 +25,149 @@ static void pnt_depth(int depth) {
}
}
void pnt_ast(struct ASTNode* node, int depth) {
if (!node) return;
pnt_depth(depth);
switch (node->type) {
case NT_ROOT:
for (int i = 0; i < node->root.child_size; i++) {
pnt_ast(node->root.children[i], depth);
}
return;
// void pnt_ast(struct ASTNode* node, int depth) {
// if (!node) return;
// pnt_depth(depth);
// switch (node->type) {
// case NT_ROOT:
// for (int i = 0; i < node->root.child_size; i++) {
// pnt_ast(node->root.children[i], depth);
// }
// return;
case NT_ADD : printf("+ \n"); break; // (expr) + (expr)
case NT_SUB : printf("- \n"); break; // (expr) - (expr)
case NT_MUL : printf("* \n"); break; // (expr) * (expr)
case NT_DIV : printf("/ \n"); break; // (expr) / (expr)
case NT_MOD : printf("%%\n"); break; // (expr) % (expr)
case NT_AND : printf("& \n"); break; // (expr) & (expr)
case NT_OR : printf("| \n"); break; // (expr) | (expr)
case NT_XOR : printf("^ \n"); break; // (expr) ^ (expr)
case NT_L_SH : printf("<<\n"); break; // (expr) << (expr)
case NT_R_SH : printf(">>\n"); break; // (expr) >> (expr)
case NT_EQ : printf("==\n"); break; // (expr) == (expr)
case NT_NEQ : printf("!=\n"); break; // (expr) != (expr)
case NT_LE : printf("<=\n"); break; // (expr) <= (expr)
case NT_GE : printf(">=\n"); break; // (expr) >= (expr)
case NT_LT : printf("< \n"); break; // (expr) < (expr)
case NT_GT : printf("> \n"); break; // (expr) > (expr)
case NT_AND_AND : printf("&&\n"); break; // (expr) && (expr)
case NT_OR_OR : printf("||\n"); break; // (expr) || (expr)
case NT_NOT : printf("! \n"); break; // ! (expr)
case NT_BIT_NOT : printf("~ \n"); break; // ~ (expr)
case NT_COMMA : printf(", \n"); break; // expr, expr 逗号运算符
case NT_ASSIGN : printf("= \n"); break; // (expr) = (expr)
// case NT_COND : // (expr) ? (expr) : (expr)
// case NT_ADD : printf("+ \n"); break; // (expr) + (expr)
// case NT_SUB : printf("- \n"); break; // (expr) - (expr)
// case NT_MUL : printf("* \n"); break; // (expr) * (expr)
// case NT_DIV : printf("/ \n"); break; // (expr) / (expr)
// case NT_MOD : printf("%%\n"); break; // (expr) % (expr)
// case NT_AND : printf("& \n"); break; // (expr) & (expr)
// case NT_OR : printf("| \n"); break; // (expr) | (expr)
// case NT_XOR : printf("^ \n"); break; // (expr) ^ (expr)
// case NT_L_SH : printf("<<\n"); break; // (expr) << (expr)
// case NT_R_SH : printf(">>\n"); break; // (expr) >> (expr)
// case NT_EQ : printf("==\n"); break; // (expr) == (expr)
// case NT_NEQ : printf("!=\n"); break; // (expr) != (expr)
// case NT_LE : printf("<=\n"); break; // (expr) <= (expr)
// case NT_GE : printf(">=\n"); break; // (expr) >= (expr)
// case NT_LT : printf("< \n"); break; // (expr) < (expr)
// case NT_GT : printf("> \n"); break; // (expr) > (expr)
// case NT_AND_AND : printf("&&\n"); break; // (expr) && (expr)
// case NT_OR_OR : printf("||\n"); break; // (expr) || (expr)
// case NT_NOT : printf("! \n"); break; // ! (expr)
// case NT_BIT_NOT : printf("~ \n"); break; // ~ (expr)
// case NT_COMMA : printf(", \n"); break; // expr, expr 逗号运算符
// case NT_ASSIGN : printf("= \n"); break; // (expr) = (expr)
// // case NT_COND : // (expr) ? (expr) : (expr)
case NT_STMT_EMPTY : // ;
printf(";\n");
break;
case NT_STMT_IF : // if (cond) { ... } [else {...}]
printf("if");
pnt_ast(node->if_stmt.cond, depth+1);
pnt_ast(node->if_stmt.if_stmt, depth+1);
if (node->if_stmt.else_stmt) {
pnt_depth(depth);
printf("else");
pnt_ast(node->if_stmt.else_stmt, depth+1);
}
break;
case NT_STMT_WHILE : // while (cond) { ... }
printf("while\n");
pnt_ast(node->while_stmt.cond, depth+1);
pnt_ast(node->while_stmt.body, depth+1);
break;
case NT_STMT_DOWHILE : // do {...} while (cond)
printf("do-while\n");
pnt_ast(node->do_while_stmt.body, depth+1);
pnt_ast(node->do_while_stmt.cond, depth+1);
break;
case NT_STMT_FOR : // for (init; cond; iter) {...}
printf("for\n");
if (node->for_stmt.init)
pnt_ast(node->for_stmt.init, depth+1);
if (node->for_stmt.cond)
pnt_ast(node->for_stmt.cond, depth+1);
if (node->for_stmt.iter)
pnt_ast(node->for_stmt.iter, depth+1);
pnt_ast(node->for_stmt.body, depth+1);
break;
case NT_STMT_SWITCH : // switch (expr) { case ... }
case NT_STMT_BREAK : // break;
case NT_STMT_CONTINUE : // continue;
case NT_STMT_GOTO : // goto label;
case NT_STMT_CASE : // case const_expr:
case NT_STMT_DEFAULT : // default:
case NT_STMT_LABEL : // label:
break;
case NT_STMT_BLOCK : // { ... }
printf("{\n");
for (int i = 0; i < node->block.child_size; i++) {
pnt_ast(node->block.children[i], depth+1);
}
pnt_depth(depth);
printf("}\n");
break;
case NT_STMT_RETURN : // return expr;
printf("return");
if (node->return_stmt.expr_stmt) {
printf(" ");
pnt_ast(node->return_stmt.expr_stmt, depth+1);
} else {
printf("\n");
}
break;
case NT_STMT_EXPR : // expr;
printf("stmt\n");
pnt_ast(node->expr_stmt.expr_stmt, depth);
pnt_depth(depth);
printf(";\n");
break;
case NT_DECL_VAR : // type name; or type name = expr;
printf("decl_val\n");
break;
case NT_DECL_FUNC: // type func_name(param_list);
printf("decl func %s\n", node->func.name->syms.tok.constant.str);
break;
case NT_FUNC : // type func_name(param_list) {...}
printf("def func %s\n", node->func.name->syms.tok.constant.str);
// pnt_ast(node->child.func.params, depth);
pnt_ast(node->func.body, depth);
// pnt_ast(node->child.func.ret, depth);
break;
case NT_PARAM : // 函数形参
printf("param\n");
case NT_ARG_LIST : // 实参列表需要与NT_CALL配合
printf("arg_list\n");
case NT_TERM_CALL : // func (expr)
printf("call\n");
break;
case NT_TERM_IDENT:
printf("%s\n", node->syms.tok.constant.str);
break;
case NT_TERM_VAL : // Terminal Symbols like constant, identifier, keyword
struct Token * tok = &node->syms.tok;
switch (tok->type) {
case TOKEN_CHAR_LITERAL:
printf("%c\n", tok->constant.ch);
break;
case TOKEN_INT_LITERAL:
printf("%d\n", tok->constant.i);
break;
case TOKEN_STRING_LITERAL:
printf("%s\n", tok->constant.str);
break;
default:
printf("unknown term val\n");
break;
}
default:
break;
}
// case NT_STMT_EMPTY : // ;
// printf(";\n");
// break;
// case NT_STMT_IF : // if (cond) { ... } [else {...}]
// printf("if");
// pnt_ast(node->if_stmt.cond, depth+1);
// pnt_ast(node->if_stmt.if_stmt, depth+1);
// if (node->if_stmt.else_stmt) {
// pnt_depth(depth);
// printf("else");
// pnt_ast(node->if_stmt.else_stmt, depth+1);
// }
// break;
// case NT_STMT_WHILE : // while (cond) { ... }
// printf("while\n");
// pnt_ast(node->while_stmt.cond, depth+1);
// pnt_ast(node->while_stmt.body, depth+1);
// break;
// case NT_STMT_DOWHILE : // do {...} while (cond)
// printf("do-while\n");
// pnt_ast(node->do_while_stmt.body, depth+1);
// pnt_ast(node->do_while_stmt.cond, depth+1);
// break;
// case NT_STMT_FOR : // for (init; cond; iter) {...}
// printf("for\n");
// if (node->for_stmt.init)
// pnt_ast(node->for_stmt.init, depth+1);
// if (node->for_stmt.cond)
// pnt_ast(node->for_stmt.cond, depth+1);
// if (node->for_stmt.iter)
// pnt_ast(node->for_stmt.iter, depth+1);
// pnt_ast(node->for_stmt.body, depth+1);
// break;
// case NT_STMT_SWITCH : // switch (expr) { case ... }
// case NT_STMT_BREAK : // break;
// case NT_STMT_CONTINUE : // continue;
// case NT_STMT_GOTO : // goto label;
// case NT_STMT_CASE : // case const_expr:
// case NT_STMT_DEFAULT : // default:
// case NT_STMT_LABEL : // label:
// break;
// case NT_STMT_BLOCK : // { ... }
// printf("{\n");
// for (int i = 0; i < node->block.child_size; i++) {
// pnt_ast(node->block.children[i], depth+1);
// }
// pnt_depth(depth);
// printf("}\n");
// break;
// case NT_STMT_RETURN : // return expr;
// printf("return");
// if (node->return_stmt.expr_stmt) {
// printf(" ");
// pnt_ast(node->return_stmt.expr_stmt, depth+1);
// } else {
// printf("\n");
// }
// break;
// case NT_STMT_EXPR : // expr;
// printf("stmt\n");
// pnt_ast(node->expr_stmt.expr_stmt, depth);
// pnt_depth(depth);
// printf(";\n");
// break;
// case NT_DECL_VAR : // type name; or type name = expr;
// printf("decl_val\n");
// break;
// case NT_DECL_FUNC: // type func_name(param_list);
// printf("decl func %s\n", node->func.name->syms.tok.val.str);
// break;
// case NT_FUNC : // type func_name(param_list) {...}
// printf("def func %s\n", node->func.name->syms.tok.val.str);
// // pnt_ast(node->child.func.params, depth);
// pnt_ast(node->func.body, depth);
// // pnt_ast(node->child.func.ret, depth);
// break;
// case NT_PARAM : // 函数形参
// printf("param\n");
// case NT_ARG_LIST : // 实参列表需要与NT_CALL配合
// printf("arg_list\n");
// case NT_TERM_CALL : // func (expr)
// printf("call\n");
// break;
// case NT_TERM_IDENT:
// printf("%s\n", node->syms.tok.val.str);
// break;
// case NT_TERM_VAL : // Terminal Symbols like constant, identifier, keyword
// tok_t * tok = &node->syms.tok;
// switch (tok->type) {
// case TOKEN_CHAR_LITERAL:
// printf("%c\n", tok->val.ch);
// break;
// case TOKEN_INT_LITERAL:
// printf("%d\n", tok->val.i);
// break;
// case TOKEN_STRING_LITERAL:
// printf("%s\n", tok->val.str);
// break;
// default:
// printf("unknown term val\n");
// break;
// }
// default:
// break;
// }
// 通用子节点递归处理
if (node->type <= NT_ASSIGN) { // 表达式类统一处理子节点
if (node->expr.left) pnt_ast(node->expr.left, depth+1);
if (node->expr.right) pnt_ast(node->expr.right, depth + 1);
}
}
// // 通用子节点递归处理
// if (node->type <= NT_ASSIGN) { // 表达式类统一处理子节点
// if (node->expr.left) pnt_ast(node->expr.left, depth+1);
// if (node->expr.right) pnt_ast(node->expr.right, depth + 1);
// }
// }

View File

@ -3,9 +3,10 @@
#include "../../frontend.h"
#include "../../lexer/lexer.h"
#include "../../../../libcore/vector.h"
#include "../type.h"
enum ASTType {
typedef enum {
NT_INIT,
NT_ROOT, // global scope in root node
NT_ADD, // (expr) + (expr)
@ -75,31 +76,28 @@ enum ASTType {
NT_TERM_VAL,
NT_TERM_IDENT,
NT_TERM_TYPE,
};
} ast_type_t;
struct ASTNode {
enum ASTType type;
typedef struct ASTNode {
ast_type_t type;
union {
void *children[6];
struct {
struct ASTNode** children;
int child_size;
vector_header(children, struct ASTNode*);
} root;
struct {
struct ASTNode** children; // array of children
int child_size;
vector_header(children, struct ASTNode*);
} block;
struct {
struct ASTNode* decl_node;
struct Token tok;
tok_t tok;
} syms;
struct {
struct ASTNode *arr;
int size;
vector_header(params, struct ASTNode*);
} params;
struct {
const char* name;
struct ASTNode* name;
struct ASTNode* params;
struct ASTNode* func_decl;
} call;
@ -113,13 +111,12 @@ struct ASTNode {
struct ASTNode *ret;
struct ASTNode *name;
struct ASTNode *params; // array of params
void* data;
} func_decl;
struct ASTNode *def;
} decl_func;
struct {
struct ASTNode *ret;
struct ASTNode *name;
struct ASTNode *params; // array of params
struct ASTNode *decl;
struct ASTNode *body; // optional
void* data;
} func;
struct {
struct ASTNode *left;
@ -165,27 +162,26 @@ struct ASTNode {
struct ASTNode *expr_stmt;
} expr_stmt;
};
};
} ast_node_t;
struct ASTNode* new_ast_node(void);
void init_ast_node(struct ASTNode* node);
void pnt_ast(struct ASTNode* node, int depth);
struct Parser;
typedef struct ASTNode* (*parse_func_t) (struct Parser*);
typedef struct parser parser_t;
typedef struct ASTNode* (*parse_func_t) (parser_t*);
void parse_prog(struct Parser* parser);
struct ASTNode* parse_block(struct Parser* parser);
struct ASTNode* parse_stmt(struct Parser* parser);
struct ASTNode* parse_expr(struct Parser* parser);
struct ASTNode* parse_func(struct Parser* parser);
struct ASTNode* parse_decl(struct Parser* parser);
void parse_prog(parser_t* parser);
ast_node_t* parse_decl(parser_t* parser);
ast_node_t* parse_block(parser_t* parser);
ast_node_t* parse_stmt(parser_t* parser);
ast_node_t* parse_expr(parser_t* parser);
struct ASTNode* parse_ident(struct Parser* parser);
struct ASTNode* parse_type(struct Parser* parser);
ast_node_t* parse_type(parser_t* parser);
int peek_decl(struct Parser* parser);
ast_node_t* new_ast_ident_node(tok_t* tok);
ast_node_t* expect_pop_ident(tok_buf_t* tokbuf);
struct ASTNode* parser_ident_without_pop(struct Parser* parser);
int peek_decl(tok_buf_t* tokbuf);
#endif

View File

@ -1,48 +1,49 @@
#include "../parser.h"
#include "ast.h"
#include "../parser.h"
#include "../symtab/symtab.h"
#ifndef BLOCK_MAX_NODE
#define BLOCK_MAX_NODE (1024)
#endif
struct ASTNode* parse_block(struct Parser* parser) {
symtab_enter_scope(parser->symtab);
// parse_decl(parser); // decl_var
enum TokenType ttype;
struct ASTNode* node = new_ast_node();
ast_node_t* new_ast_node_block() {
ast_node_t* node = new_ast_node();
node->type = NT_BLOCK;
flushpeektok(parser);
ttype = peektoktype(parser);
if (ttype != TOKEN_L_BRACE) {
error("block need '{' start");
}
poptok(parser);
vector_init(node->block.children);
return node;
}
node->block.children = malloc(sizeof(struct ASTNode*) * BLOCK_MAX_NODE);
struct ASTNode* child = NULL;
ast_node_t* parse_block(parser_t* parser) {
symtab_enter_scope(parser->symtab);
tok_buf_t *tokbuf = &parser->tokbuf;
flush_peek_tok(tokbuf);
tok_type_t ttype;
ast_node_t* node = new_ast_node_block();
expect_pop_tok(tokbuf, TOKEN_L_BRACE);
ast_node_t* child = NULL;
while (1) {
if (peek_decl(parser) == 1) {
if (peek_decl(tokbuf)) {
child = parse_decl(parser);
goto ADD_CHILD;
vector_push(node->block.children, child);
continue;
}
flushpeektok(parser);
ttype = peektoktype(parser);
flush_peek_tok(tokbuf);
ttype = peek_tok_type(tokbuf);
switch (ttype) {
case TOKEN_R_BRACE:
poptok(parser);
goto END;
default:
child = parse_stmt(parser);
goto ADD_CHILD;
break;
case TOKEN_R_BRACE: {
pop_tok(tokbuf);
goto END;
}
default: {
child = parse_stmt(parser);
vector_push(node->block.children, child);
break;
}
}
continue;
ADD_CHILD:
node->block.children[node->block.child_size++] = child;
}
END:
symtab_leave_scope(parser->symtab);

View File

@ -6,9 +6,9 @@
* 0 false
* 1 true
*/
int peek_decl(struct Parser* parser) {
flushpeektok(parser);
switch (peektoktype(parser)) {
int peek_decl(tok_buf_t* tokbuf) {
flush_peek_tok(tokbuf);
switch (peek_tok_type(tokbuf)) {
case TOKEN_STATIC:
case TOKEN_EXTERN:
case TOKEN_REGISTER:
@ -16,10 +16,10 @@ int peek_decl(struct Parser* parser) {
error("not impliment");
break;
default:
flushpeektok(parser);
flush_peek_tok(tokbuf);
}
switch (peektoktype(parser)) {
switch (peek_tok_type(tokbuf)) {
case TOKEN_VOID:
case TOKEN_CHAR:
case TOKEN_SHORT:
@ -27,60 +27,62 @@ int peek_decl(struct Parser* parser) {
case TOKEN_LONG:
case TOKEN_FLOAT:
case TOKEN_DOUBLE:
// FIXME Ptr
return 1;
default:
flushpeektok(parser);
flush_peek_tok(tokbuf);
}
return 0;
}
struct ASTNode* parse_decl_val(struct Parser* parser) {
flushpeektok(parser);
// parse_type
enum TokenType ttype;
struct ASTNode* node;
ast_node_t* parse_decl_val(parser_t* parser) {
tok_buf_t* tokbuf = &parser->tokbuf;
tok_type_t ttype;
flush_peek_tok(tokbuf);
struct ASTNode* type_node = parse_type(parser);
struct ASTNode* name_node = parser_ident_without_pop(parser);
ast_node_t* node;
ast_node_t* type_node = parse_type(parser);
flush_peek_tok(tokbuf);
ast_node_t* name_node = new_ast_ident_node(peek_tok(tokbuf));
node = new_ast_node();
node->decl_val.type = type_node;
node->decl_val.name = name_node;
node->type = NT_DECL_VAR;
symtab_add_symbol(parser->symtab, name_node->syms.tok.constant.str, node);
symtab_add_symbol(parser->symtab, name_node->syms.tok.val.str, node, 0);
ttype = peektoktype(parser);
ttype = peek_tok_type(tokbuf);
if (ttype == TOKEN_ASSIGN) {
node->decl_val.expr_stmt = parse_stmt(parser);
if (node->decl_val.expr_stmt->type != NT_STMT_EXPR) {
error("parser_decl_val want stmt_expr");
}
} else if (ttype == TOKEN_SEMICOLON) {
poptok(parser);
expecttok(parser, TOKEN_SEMICOLON);
pop_tok(tokbuf);
expect_pop_tok(tokbuf, TOKEN_SEMICOLON);
} else {
error("parser_decl_val syntax error");
}
return node;
}
// 类型解析入口改进
struct ASTNode* parse_decl(struct Parser* parser) {
flushpeektok(parser);
int idx;
enum TokenType ttype;
struct ASTNode* node;
ast_node_t* parse_decl(parser_t* parser) {
tok_buf_t* tokbuf = &parser->tokbuf;
flush_peek_tok(tokbuf);
tok_type_t ttype;
ast_node_t* node;
if (peek_decl(parser) == 0) {
if (peek_decl(tokbuf) == 0) {
error("syntax error expect decl_val TYPE");
}
if (peektoktype(parser) != TOKEN_IDENT) {
if (peek_tok_type(tokbuf) != TOKEN_IDENT) {
error("syntax error expect decl_val IDENT");
}
ttype = peektoktype(parser);
ttype = peek_tok_type(tokbuf);
switch (ttype) {
case TOKEN_L_PAREN: // (
node = parse_func(parser);
return NULL;
break;
case TOKEN_ASSIGN:
case TOKEN_SEMICOLON:

View File

@ -33,14 +33,18 @@ enum ParseType {
PREFIX_PARSER,
};
static struct ASTNode *parse_subexpression(struct Parser* parser, enum Precedence prec);
static ast_node_t *parse_subexpression(tok_buf_t* tokbuf, symtab_t *symtab, enum Precedence prec);
#define NEXT(prec) parse_subexpression(tokbuf, symtab, prec)
static struct ASTNode* gen_node2(struct ASTNode* left, struct ASTNode* right,
enum ASTType type) {
struct ASTNode* node = new_ast_node();
static ast_node_t* gen_node2(ast_node_t* left, ast_node_t* right,
ast_type_t type) {
ast_node_t* node = new_ast_node();
node->type = type;
node->expr.left = left;
node->expr.right = right;
return node;
// FIXME
// switch (type) {
// case NT_ADD : printf("+ \n"); break; // (expr) + (expr)
// case NT_SUB : printf("- \n"); break; // (expr) - (expr)
@ -68,154 +72,157 @@ static struct ASTNode* gen_node2(struct ASTNode* left, struct ASTNode* right,
// }
}
static struct ASTNode* parse_comma(struct Parser* parser, struct ASTNode* left) {
struct ASTNode* node = new_ast_node();
static ast_node_t* parse_comma(tok_buf_t* tokbuf, symtab_t *symtab, ast_node_t* left) {
ast_node_t* node = new_ast_node();
node->type = NT_COMMA;
node->expr.left = left;
node->expr.right = parse_subexpression(parser, PREC_EXPRESSION);
node->expr.right = NEXT(PREC_EXPRESSION);
return node;
}
static struct ASTNode* parse_assign(struct Parser* parser, struct ASTNode* left) {
flushpeektok(parser);
enum TokenType ttype = peektoktype(parser);
poptok(parser);
struct ASTNode* node = new_ast_node();
static ast_node_t* parse_assign(tok_buf_t* tokbuf, symtab_t *symtab, ast_node_t* left) {
flush_peek_tok(tokbuf);
tok_type_t ttype = peek_tok_type(tokbuf);
pop_tok(tokbuf);
ast_node_t* node = new_ast_node();
node->type = NT_ASSIGN;
// saved left
node->expr.left = left;
enum Precedence next = PREC_ASSIGNMENT + 1;
switch (ttype) {
case TOKEN_ASSIGN :
left = parse_subexpression(parser, next);
left = NEXT(next);
break;
case TOKEN_ASSIGN_ADD :
left = gen_node2(left, parse_subexpression(parser, next), NT_ADD);
left = gen_node2(left, NEXT(next), NT_ADD);
break;
case TOKEN_ASSIGN_SUB :
left = gen_node2(left, parse_subexpression(parser, next), NT_SUB);
left = gen_node2(left, NEXT(next), NT_SUB);
break;
case TOKEN_ASSIGN_MUL :
left = gen_node2(left, parse_subexpression(parser, next), NT_MUL);
left = gen_node2(left, NEXT(next), NT_MUL);
break;
case TOKEN_ASSIGN_DIV :
left = gen_node2(left, parse_subexpression(parser, next), NT_DIV);
left = gen_node2(left, NEXT(next), NT_DIV);
break;
case TOKEN_ASSIGN_MOD :
left = gen_node2(left, parse_subexpression(parser, next), NT_MOD);
left = gen_node2(left, NEXT(next), NT_MOD);
break;
case TOKEN_ASSIGN_L_SH :
left = gen_node2(left, parse_subexpression(parser, next), NT_L_SH);
left = gen_node2(left, NEXT(next), NT_L_SH);
break;
case TOKEN_ASSIGN_R_SH :
left = gen_node2(left, parse_subexpression(parser, next), NT_R_SH);
left = gen_node2(left, NEXT(next), NT_R_SH);
break;
case TOKEN_ASSIGN_AND :
left = gen_node2(left, parse_subexpression(parser, next), NT_AND);
left = gen_node2(left, NEXT(next), NT_AND);
break;
case TOKEN_ASSIGN_OR :
left = gen_node2(left, parse_subexpression(parser, next), NT_OR);
left = gen_node2(left, NEXT(next), NT_OR);
break;
case TOKEN_ASSIGN_XOR :
left = gen_node2(left, parse_subexpression(parser, next), NT_XOR);
left = gen_node2(left, NEXT(next), NT_XOR);
break;
default:
error("unsupported operator");
break;
}
node->expr.right = left;
return node;
}
static struct ASTNode* parse_cmp(struct Parser* parser, struct ASTNode* left) {
flushpeektok(parser);
enum TokenType ttype = peektoktype(parser);
poptok(parser);
struct ASTNode* node = new_ast_node();
static ast_node_t* parse_cmp(tok_buf_t* tokbuf, symtab_t *symtab, ast_node_t* left) {
flush_peek_tok(tokbuf);
tok_type_t ttype = peek_tok_type(tokbuf);
pop_tok(tokbuf);
ast_node_t* node = new_ast_node();
// saved left
node->expr.left = left;
switch (ttype) {
case TOKEN_EQ:
node->type = NT_EQ;
node->expr.right = parse_subexpression(parser, PREC_EQUALITY);
node->expr.right = NEXT(PREC_EQUALITY);
break;
case TOKEN_NEQ:
node->type = NT_NEQ;
node->expr.right = parse_subexpression(parser, PREC_EQUALITY);
node->expr.right = NEXT(PREC_EQUALITY);
break;
case TOKEN_LT:
node->type = NT_LT;
node->expr.right = parse_subexpression(parser, PREC_RELATIONAL);
node->expr.right = NEXT(PREC_RELATIONAL);
break;
case TOKEN_GT:
node->type = NT_GT;
node->expr.right = parse_subexpression(parser, PREC_RELATIONAL);
node->expr.right = NEXT(PREC_RELATIONAL);
break;
case TOKEN_LE:
node->type = NT_LE;
node->expr.right = parse_subexpression(parser, PREC_RELATIONAL);
node->expr.right = NEXT(PREC_RELATIONAL);
break;
case TOKEN_GE:
node->type = NT_GE;
node->expr.right = parse_subexpression(parser, PREC_RELATIONAL);
node->expr.right = NEXT(PREC_RELATIONAL);
break;
default:
error("invalid operator");
}
return node;
}
static struct ASTNode* parse_cal(struct Parser* parser, struct ASTNode* left) {
flushpeektok(parser);
enum TokenType ttype = peektoktype(parser);
poptok(parser);
struct ASTNode* node = new_ast_node();
static ast_node_t* parse_cal(tok_buf_t* tokbuf, symtab_t *symtab, ast_node_t* left) {
flush_peek_tok(tokbuf);
tok_type_t ttype = peek_tok_type(tokbuf);
pop_tok(tokbuf);
ast_node_t* node = new_ast_node();
node->expr.left = left;
switch (ttype) {
case TOKEN_OR_OR:
node->type = NT_OR_OR;
node->expr.right = parse_subexpression(parser, PREC_LOGICAL_OR);
node->expr.right = NEXT(PREC_LOGICAL_OR);
break;
case TOKEN_AND_AND:
node->type = NT_AND_AND;
node->expr.right = parse_subexpression(parser, PREC_LOGICAL_AND);
node->expr.right = NEXT(PREC_LOGICAL_AND);
break;
case TOKEN_OR:
node->type = NT_OR;
node->expr.right = parse_subexpression(parser, PREC_OR);
node->expr.right = NEXT(PREC_OR);
break;
case TOKEN_XOR:
node->type = NT_XOR;
node->expr.right = parse_subexpression(parser, PREC_XOR);
node->expr.right = NEXT(PREC_XOR);
break;
case TOKEN_AND:
node->type = NT_AND;
node->expr.right = parse_subexpression(parser, PREC_AND);
node->expr.right = NEXT(PREC_AND);
break;
case TOKEN_L_SH:
node->type = NT_L_SH;
node->expr.right = parse_subexpression(parser, PREC_SHIFT);
node->expr.right = NEXT(PREC_SHIFT);
break;
case TOKEN_R_SH:
node->type = NT_R_SH;
node->expr.right = parse_subexpression(parser, PREC_SHIFT);
node->expr.right = NEXT(PREC_SHIFT);
break;
case TOKEN_ADD:
node->type = NT_ADD;
node->expr.right = parse_subexpression(parser, PREC_ADDITIVE);
node->expr.right = NEXT(PREC_ADDITIVE);
break;
case TOKEN_SUB:
node->type = NT_SUB;
node->expr.right = parse_subexpression(parser, PREC_ADDITIVE);
node->expr.right = NEXT(PREC_ADDITIVE);
break;
case TOKEN_MUL:
node->type = NT_MUL;
node->expr.right = parse_subexpression(parser, PREC_MULTIPLICATIVE);
node->expr.right = NEXT(PREC_MULTIPLICATIVE);
break;
case TOKEN_DIV:
node->type = NT_DIV;
node->expr.right = parse_subexpression(parser, PREC_MULTIPLICATIVE);
node->expr.right = NEXT(PREC_MULTIPLICATIVE);
break;
case TOKEN_MOD:
node->type = NT_MOD;
node->expr.right = parse_subexpression(parser, PREC_MULTIPLICATIVE);
node->expr.right = NEXT(PREC_MULTIPLICATIVE);
break;
default:
break;
@ -223,44 +230,50 @@ static struct ASTNode* parse_cal(struct Parser* parser, struct ASTNode* left) {
return node;
}
// 新增函数调用解析
static struct ASTNode* parse_call(struct Parser* parser, struct ASTNode* ident) {
struct ASTNode* node = new_ast_node();
static ast_node_t* parse_call(tok_buf_t* tokbuf, symtab_t *symtab, ast_node_t* ident) {
ast_node_t* node = new_ast_node();
node->type = NT_TERM_CALL;
poptok(parser); // 跳过 '('
node->call.name = ident;
node->call.params = new_ast_node();
vector_init(node->call.params->params.params);
pop_tok(tokbuf); // 跳过 '('
enum TokenType ttype;
// 解析参数列表
while ((ttype = peektoktype(parser)) != TOKEN_R_PAREN) {
// add_arg(node, parse_expr(parser));
if (ttype == TOKEN_COMMA) poptok(parser);
else poptok(parser);
tok_type_t ttype;
while (1) {
flush_peek_tok(tokbuf);
ttype = peek_tok_type(tokbuf);
if (ttype == TOKEN_R_PAREN) {
break;
}
ast_node_t* param = NEXT(PREC_EXPRESSION);
vector_push(node->call.params->params.params, param);
flush_peek_tok(tokbuf);
ttype = peek_tok_type(tokbuf);
if (ttype == TOKEN_COMMA) pop_tok(tokbuf);
}
poptok(parser); // 跳过 ')'
pop_tok(tokbuf); // 跳过 ')'
char* name = ident->syms.tok.constant.str;
void* sym = symtab_lookup_symbol(parser->symtab, name);
if (sym == NULL) {
const char* name = ident->syms.tok.val.str;
ast_node_t* sym = symtab_lookup_symbol(symtab, name);
// TODO check func is match
if (sym == NULL || sym->type != NT_DECL_FUNC) {
error("function not decl %s", name);
}
node->call.name = name;
node->call.params = NULL;
node->call.name = ident;
node->call.func_decl = sym;
return node;
}
static struct ASTNode* parse_paren(struct Parser* parser, struct ASTNode* left) {
flushpeektok(parser);
enum TokenType ttype;
expecttok(parser, TOKEN_L_PAREN);
left = parse_subexpression(parser, PREC_EXPRESSION);
flushpeektok(parser);
expecttok(parser, TOKEN_R_PAREN);
static ast_node_t* parse_paren(tok_buf_t* tokbuf, symtab_t *symtab, ast_node_t* left) {
flush_peek_tok(tokbuf);
expect_pop_tok(tokbuf, TOKEN_L_PAREN);
left = NEXT(PREC_EXPRESSION);
flush_peek_tok(tokbuf);
expect_pop_tok(tokbuf, TOKEN_R_PAREN);
return left;
}
typedef struct ASTNode* (*parse_expr_fun_t)(struct Parser*, struct ASTNode*);
typedef ast_node_t* (*parse_expr_fun_t)(tok_buf_t*, symtab_t* , ast_node_t*);
static struct expr_prec_table_t {
parse_expr_fun_t parser;
enum Precedence prec;
@ -309,11 +322,11 @@ static struct expr_prec_table_t {
[TOKEN_L_PAREN] = {parse_paren, PREC_POSTFIX, INFIX_PARSER},
};
static struct ASTNode *parse_primary_expression(struct Parser* parser) {
flushpeektok(parser);
static ast_node_t *parse_primary_expression(tok_buf_t* tokbuf, symtab_t *symtab) {
flush_peek_tok(tokbuf);
struct Token* tok = peektok(parser);
struct ASTNode *node = new_ast_node();
tok_t* tok = peek_tok(tokbuf);
ast_node_t *node = new_ast_node();
node->type = NT_TERM_VAL;
node->syms.tok = *tok;
@ -330,34 +343,35 @@ static struct ASTNode *parse_primary_expression(struct Parser* parser) {
case TOKEN_STRING_LITERAL:
// node->data.data_type = TYPE_POINTER;
case TOKEN_IDENT:
node = parse_ident(parser);
if (peektoktype(parser) == TOKEN_L_PAREN) {
node = parse_call(parser, node);
node = expect_pop_ident(tokbuf);
tok_type_t ttype = peek_tok_type(tokbuf);
if (ttype == TOKEN_L_PAREN) {
node = parse_call(tokbuf, symtab, node);
} else {
void *sym = symtab_lookup_symbol(parser->symtab, tok->constant.str);
void *sym = symtab_lookup_symbol(symtab, tok->val.str);
if (sym == NULL) {
error("undefined symbol but use %s", tok->constant.str);
error("undefined symbol but use %s", tok->val.str);
}
node->type = NT_TERM_IDENT;
node->syms.decl_node = sym;
goto END;
}
goto END;
default:
return NULL;
}
poptok(parser);
pop_tok(tokbuf);
END:
return node;
}
static struct ASTNode *parse_subexpression(struct Parser* parser, enum Precedence prec) {
enum TokenType ttype;
struct expr_prec_table_t* work;
struct ASTNode* left;
static ast_node_t *parse_subexpression(tok_buf_t* tokbuf, symtab_t *symtab, enum Precedence prec) {
tok_type_t ttype;
struct expr_prec_table_t* work;
ast_node_t* left;
while (1) {
flushpeektok(parser);
ttype = peektoktype(parser);
flush_peek_tok(tokbuf);
ttype = peek_tok_type(tokbuf);
work = &expr_table[ttype];
// FIXME
if (ttype == TOKEN_SEMICOLON || ttype == TOKEN_R_PAREN) {
@ -365,16 +379,16 @@ static struct ASTNode *parse_subexpression(struct Parser* parser, enum Precedenc
}
if (work == NULL || work->parser == NULL || work->ptype == PREFIX_PARSER) {
if (work->parser != NULL) {
left = work->parser(parser, NULL);
left = work->parser(tokbuf, symtab, NULL);
} else {
left = parse_primary_expression(parser);
left = parse_primary_expression(tokbuf, symtab);
}
} else if (work->ptype == INFIX_PARSER) {
if (work->parser == NULL)
break;
if (work->prec <= prec)
break;
left = work->parser(parser, left);
left = work->parser(tokbuf, symtab, left);
}
// assert(left != NULL);
}
@ -382,9 +396,11 @@ static struct ASTNode *parse_subexpression(struct Parser* parser, enum Precedenc
return left;
}
struct ASTNode* parse_expr(struct Parser* parser) {
flushpeektok(parser);
enum TokenType ttype = peektoktype(parser);
ast_node_t* parse_expr(parser_t* parser) {
tok_buf_t* tokbuf = &(parser->tokbuf);
symtab_t *symtab = parser->symtab;
flush_peek_tok(tokbuf);
tok_type_t ttype = peek_tok_type(tokbuf);
switch (ttype) {
case TOKEN_NOT:
case TOKEN_AND:
@ -401,9 +417,9 @@ struct ASTNode* parse_expr(struct Parser* parser) {
case TOKEN_SUB_SUB:
case TOKEN_SIZEOF:
case TOKEN_IDENT:
return parse_subexpression(parser, PREC_EXPRESSION);
return NEXT(PREC_EXPRESSION);
default:
error("Want expr but not got %s", get_token_name(ttype));
error("Want expr but not got %s", get_tok_name(ttype));
break;
}
}

View File

@ -6,34 +6,21 @@
#define FUNC_PARAM_CACHE_SIZE 32 // 合理初始值可覆盖99%常见情况
#endif
struct FuncParamCache {
struct Token tokens[FUNC_PARAM_CACHE_SIZE];
int read_pos; // 当前读取位置
int write_pos; // 写入位置
int depth; // 当前缓存深度
};
static enum TokenType peekcachetype(struct FuncParamCache* cache) {
return cache->tokens[cache->read_pos++].type;
}
// TODO 语义分析压入符号表
static void parse_params(struct Parser* parser, struct FuncParamCache* cache, struct ASTNode* node) {
// = peekcachetype(cache);
enum TokenType ttype;
// if (ttype != TOKEN_L_PAREN) {
// error("function expected '('\n");
// }
struct ASTNode *params = new_ast_node();
node->func.params = params;
int params_size = 0;
static void parse_params(parser_t* parser, tok_buf_t* cache, ast_node_t* node) {
tok_type_t ttype;
ast_node_t *params = new_ast_node();
node->decl_func.params = params;
vector_init(params->params.params);
while ((ttype = peekcachetype(cache)) != TOKEN_R_PAREN) {
int depth = 1;
while (depth) {
ttype = peek_tok_type(cache);
switch (ttype) {
case TOKEN_COMMA:
break;
case TOKEN_ELLIPSIS:
ttype = peekcachetype(cache);
ttype = peek_tok_type(cache);
if (ttype != TOKEN_R_PAREN) {
error("... must be a last parameter list (expect ')')");
}
@ -41,9 +28,29 @@ static void parse_params(struct Parser* parser, struct FuncParamCache* cache, st
error("not implement");
break;
case TOKEN_IDENT:
params->children[params_size++] = NULL;
// TODO 静态数组
flush_peek_tok(cache);
ast_node_t* id_node = new_ast_ident_node(peek_tok(cache));
ast_node_t* node = new_ast_node();
node->type = NT_DECL_VAR;
node->decl_val.name = id_node;
// TODO typing sys
node->decl_val.type = NULL;
node->decl_val.expr_stmt = NULL;
node->decl_val.data = NULL;
vector_push(params->params.params, node);
symtab_add_symbol(parser->symtab, id_node->syms.tok.val.str, node, 0);
break;
case TOKEN_L_PAREN: {
depth++;
break;
}
case TOKEN_R_PAREN: {
depth--;
break;
}
default:
break;
// TODO 使用cache的类型解析
// parse_type(parser);
// TODO type parse
@ -51,39 +58,42 @@ static void parse_params(struct Parser* parser, struct FuncParamCache* cache, st
// ttype = peekcachetype(cache);
// if (ttype != TOKEN_IDENT) {
// node->node_type = NT_DECL_FUNC;
// flushpeektok(parser);
// flush_peek_tok(tokbuf);
// continue;
// }
// error("function expected ')' or ','\n");
}
pop_tok(cache);
}
}
enum ASTType check_is_func_decl(struct Parser* parser, struct FuncParamCache* cache) {
cache->depth = 1;
cache->read_pos = 0;
cache->write_pos = 0;
ast_type_t check_is_func_decl(tok_buf_t* tokbuf, tok_buf_t* cache) {
expect_pop_tok(tokbuf, TOKEN_L_PAREN);
int depth = 1;
while (cache->depth) {
struct Token* tok = peektok(parser);
poptok(parser);
if (cache->write_pos >= FUNC_PARAM_CACHE_SIZE - 1) {
while (depth) {
tok_t* tok = peek_tok(tokbuf);
pop_tok(tokbuf);
if (cache->size >= cache->cap - 1) {
error("function parameter list too long");
}
cache->tokens[cache->write_pos++] = *tok;
cache->buf[cache->size++] = *tok;
switch (tok->type) {
case TOKEN_L_PAREN:
cache->depth++;
depth++;
break;
case TOKEN_R_PAREN:
cache->depth--;
depth--;
break;
default:
break;
}
}
cache->end = cache->size;
switch (peektoktype(parser)) {
switch (peek_tok_type(tokbuf)) {
case TOKEN_SEMICOLON:
poptok(parser);
pop_tok(tokbuf);
return NT_DECL_FUNC;
case TOKEN_L_BRACE:
return NT_FUNC;
@ -93,28 +103,66 @@ enum ASTType check_is_func_decl(struct Parser* parser, struct FuncParamCache* ca
}
}
struct ASTNode* parse_func(struct Parser* parser) {
struct ASTNode* ret_type = parse_type(parser);
struct ASTNode* func_name = parse_ident(parser);
static ast_node_t* new_ast_node_funcdecl(ast_node_t* ret, ast_node_t* name) {
ast_node_t* node = new_ast_node();
node->type = NT_DECL_FUNC;
node->decl_func.ret = ret;
node->decl_func.name = name;
node->decl_func.def = NULL;
return node;
}
struct ASTNode* node = new_ast_node();
node->func.ret = ret_type;
node->func.name = func_name;
void parse_func(parser_t* parser) {
tok_buf_t* tokbuf = &(parser->tokbuf);
flush_peek_tok(tokbuf);
ast_node_t* ret_node = parse_type(parser);
ast_node_t* name_node = expect_pop_ident(tokbuf);
const char* func_name = name_node->syms.tok.val.str;
ast_node_t* decl = new_ast_node_funcdecl(ret_node, name_node);
flushpeektok(parser);
expecttok(parser, TOKEN_L_PAREN);
struct FuncParamCache cache;
node->type = check_is_func_decl(parser, &cache);
tok_buf_t cache;
init_tokbuf(&cache, NULL, NULL);
cache.cap = FUNC_PARAM_CACHE_SIZE;
tok_t buf[FUNC_PARAM_CACHE_SIZE];
cache.buf = buf;
ast_type_t type = check_is_func_decl(&(parser->tokbuf), &cache);
symtab_add_symbol(parser->symtab, func_name->syms.tok.constant.str, node);
if (node->type == NT_DECL_FUNC) {
return node;
ast_node_t* prev = symtab_add_symbol(parser->symtab, func_name, decl, 1);
if (prev != NULL) {
if (prev->type != NT_DECL_FUNC) {
error("the symbol duplicate old is %d, new is func", prev->type);
}
// TODO check redeclare func is match
if (type == NT_FUNC) {
// TODO Free decl;
free(decl);
decl = prev;
goto FUNC;
}
return;
}
vector_push(parser->root->root.children, decl);
if (type == NT_DECL_FUNC) {
return;
}
FUNC:
// 该data临时用于判断是否重复定义
if (decl->decl_func.def != NULL) {
error("redefinition of function %s", func_name);
}
ast_node_t* node = new_ast_node();
node->type = NT_FUNC;
node->func.decl = decl;
node->func.data = NULL;
decl->decl_func.def = node;
symtab_enter_scope(parser->symtab);
parse_params(parser, &cache, node);
parse_params(parser, &cache, decl);
node->func.body = parse_block(parser);
symtab_leave_scope(parser->symtab);
return node;
vector_push(parser->root->root.children, node);
}

View File

@ -5,25 +5,30 @@
#define PROG_MAX_NODE_SIZE (1024 * 4)
#endif
void parse_prog(struct Parser* parser) {
void parse_func(parser_t* parser);
void parse_prog(parser_t* parser) {
/**
* Program := (Declaration | Definition)*
* same as
* Program := Declaration* Definition*
*/
int child_size = 0;
tok_buf_t *tokbuf = &(parser->tokbuf);
parser->root = new_ast_node();
struct ASTNode* node;
parser->root->root.children = xmalloc(sizeof(struct ASTNode*) * PROG_MAX_NODE_SIZE);
ast_node_t* node;
parser->root->type = NT_ROOT;
vector_init(parser->root->root.children);
while (1) {
flushpeektok(parser);
if (peektoktype(parser) == TOKEN_EOF) {
flush_peek_tok(tokbuf);
if (peek_tok_type(tokbuf) == TOKEN_EOF) {
break;
}
node = parse_decl(parser);
parser->root->root.children[child_size++] = node;
if (node == NULL) {
parse_func(parser);
} else {
vector_push(parser->root->root.children, node);
}
}
parser->root->type = NT_ROOT;
parser->root->root.child_size = child_size;
return;
}

View File

@ -1,27 +1,28 @@
#include "../parser.h"
#include "ast.h"
struct ASTNode* parse_stmt(struct Parser* parser) {
flushpeektok(parser);
enum TokenType ttype = peektoktype(parser);
struct ASTNode* node = new_ast_node();
ast_node_t* parse_stmt(parser_t* parser) {
tok_buf_t* tokbuf = &parser->tokbuf;
flush_peek_tok(tokbuf);
tok_type_t ttype = peek_tok_type(tokbuf);
ast_node_t* node = new_ast_node();
switch (ttype) {
case TOKEN_IF: {
/**
* if (exp) stmt
* if (exp) stmt else stmt
*/
poptok(parser);
pop_tok(tokbuf);
expecttok(parser, TOKEN_L_PAREN);
expect_pop_tok(tokbuf, TOKEN_L_PAREN);
node->if_stmt.cond = parse_expr(parser);
flushpeektok(parser);
expecttok(parser, TOKEN_R_PAREN);
flush_peek_tok(tokbuf);
expect_pop_tok(tokbuf, TOKEN_R_PAREN);
node->if_stmt.if_stmt = parse_stmt(parser);
ttype = peektoktype(parser);
ttype = peek_tok_type(tokbuf);
if (ttype == TOKEN_ELSE) {
poptok(parser);
pop_tok(tokbuf);
node->if_stmt.else_stmt = parse_stmt(parser);
} else {
node->if_stmt.else_stmt = NULL;
@ -33,11 +34,11 @@ struct ASTNode* parse_stmt(struct Parser* parser) {
/**
* switch (exp) stmt
*/
poptok(parser);
pop_tok(tokbuf);
expecttok(parser, TOKEN_L_PAREN);
expect_pop_tok(tokbuf, TOKEN_L_PAREN);
node->switch_stmt.cond = parse_expr(parser);
expecttok(parser, TOKEN_R_PAREN);
expect_pop_tok(tokbuf, TOKEN_R_PAREN);
node->switch_stmt.body = parse_stmt(parser);
node->type = NT_STMT_SWITCH;
@ -47,11 +48,11 @@ struct ASTNode* parse_stmt(struct Parser* parser) {
/**
* while (exp) stmt
*/
poptok(parser);
pop_tok(tokbuf);
expecttok(parser, TOKEN_L_PAREN);
expect_pop_tok(tokbuf, TOKEN_L_PAREN);
node->while_stmt.cond = parse_expr(parser);
expecttok(parser, TOKEN_R_PAREN);
expect_pop_tok(tokbuf, TOKEN_R_PAREN);
node->while_stmt.body = parse_stmt(parser);
node->type = NT_STMT_WHILE;
@ -61,16 +62,16 @@ struct ASTNode* parse_stmt(struct Parser* parser) {
/**
* do stmt while (exp)
*/
poptok(parser);
pop_tok(tokbuf);
node->do_while_stmt.body = parse_stmt(parser);
ttype = peektoktype(parser);
ttype = peek_tok_type(tokbuf);
if (ttype != TOKEN_WHILE) {
error("expected while after do");
}
poptok(parser);
expecttok(parser, TOKEN_L_PAREN);
pop_tok(tokbuf);
expect_pop_tok(tokbuf, TOKEN_L_PAREN);
node->do_while_stmt.cond = parse_expr(parser);
expecttok(parser, TOKEN_R_PAREN);
expect_pop_tok(tokbuf, TOKEN_R_PAREN);
node->type = NT_STMT_DOWHILE;
break;
}
@ -79,36 +80,36 @@ struct ASTNode* parse_stmt(struct Parser* parser) {
* for (init; [cond]; [iter]) stmt
*/
// node->children.stmt.for_stmt.init
poptok(parser);
ttype = peektoktype(parser);
pop_tok(tokbuf);
ttype = peek_tok_type(tokbuf);
if (ttype != TOKEN_L_PAREN) {
error("expected ( after for");
}
poptok(parser);
pop_tok(tokbuf);
// init expr or init decl_var
// TODO need add this feature
node->for_stmt.init = parse_expr(parser);
expecttok(parser, TOKEN_SEMICOLON);
expect_pop_tok(tokbuf, TOKEN_SEMICOLON);
// cond expr or null
ttype = peektoktype(parser);
ttype = peek_tok_type(tokbuf);
if (ttype != TOKEN_SEMICOLON) {
node->for_stmt.cond = parse_expr(parser);
expecttok(parser, TOKEN_SEMICOLON);
expect_pop_tok(tokbuf, TOKEN_SEMICOLON);
} else {
node->for_stmt.cond = NULL;
poptok(parser);
pop_tok(tokbuf);
}
// iter expr or null
ttype = peektoktype(parser);
ttype = peek_tok_type(tokbuf);
if (ttype != TOKEN_R_PAREN) {
node->for_stmt.iter = parse_expr(parser);
expecttok(parser, TOKEN_R_PAREN);
expect_pop_tok(tokbuf, TOKEN_R_PAREN);
} else {
node->for_stmt.iter = NULL;
poptok(parser);
pop_tok(tokbuf);
}
node->for_stmt.body = parse_stmt(parser);
@ -120,8 +121,8 @@ struct ASTNode* parse_stmt(struct Parser* parser) {
* break ;
*/
// TODO check 导致外围 for、while 或 do-while 循环或 switch 语句终止。
poptok(parser);
expecttok(parser, TOKEN_SEMICOLON);
pop_tok(tokbuf);
expect_pop_tok(tokbuf, TOKEN_SEMICOLON);
node->type = NT_STMT_BREAK;
break;
@ -131,8 +132,8 @@ struct ASTNode* parse_stmt(struct Parser* parser) {
* continue ;
*/
// TODO check 导致跳过整个 for、 while 或 do-while 循环体的剩余部分。
poptok(parser);
expecttok(parser, TOKEN_SEMICOLON);
pop_tok(tokbuf);
expect_pop_tok(tokbuf, TOKEN_SEMICOLON);
node->type = NT_STMT_CONTINUE;
break;
@ -142,16 +143,16 @@ struct ASTNode* parse_stmt(struct Parser* parser) {
* return [exp] ;
*/
// TODO 终止当前函数并返回指定值给调用方函数。
poptok(parser);
ttype = peektoktype(parser);
pop_tok(tokbuf);
ttype = peek_tok_type(tokbuf);
if (ttype != TOKEN_SEMICOLON) {
node->return_stmt.expr_stmt = parse_expr(parser);
flushpeektok(parser);
expecttok(parser, TOKEN_SEMICOLON);
flush_peek_tok(tokbuf);
expect_pop_tok(tokbuf, TOKEN_SEMICOLON);
} else {
node->return_stmt.expr_stmt = NULL;
pop_tok(tokbuf);
}
poptok(parser);
node->type = NT_STMT_RETURN;
break;
}
@ -161,15 +162,15 @@ struct ASTNode* parse_stmt(struct Parser* parser) {
*/
// TODO check label 将控制无条件转移到所欲位置。
//在无法用约定的构造将控制转移到所欲位置时使用。
poptok(parser);
pop_tok(tokbuf);
// find symbol table
ttype = peektoktype(parser);
ttype = peek_tok_type(tokbuf);
if (ttype != TOKEN_IDENT) {
error("expect identifier after goto");
}
expecttok(parser, TOKEN_SEMICOLON);
expect_pop_tok(tokbuf, TOKEN_SEMICOLON);
// TODO filling label
node->goto_stmt.label = parse_ident(parser);
node->goto_stmt.label = expect_pop_ident(tokbuf);
node->type = NT_STMT_GOTO;
break;
}
@ -181,7 +182,7 @@ struct ASTNode* parse_stmt(struct Parser* parser) {
* if () ;
* for () ;
*/
poptok(parser);
pop_tok(tokbuf);
node->type = NT_STMT_EMPTY;
break;
}
@ -193,30 +194,30 @@ struct ASTNode* parse_stmt(struct Parser* parser) {
node->type = NT_STMT_BLOCK;
break;
}
case TOKEN_IDENT: {
case TOKEN_IDENT: {
// TODO label goto
if (peektoktype(parser) != TOKEN_COLON) {
if (peek_tok_type(tokbuf) != TOKEN_COLON) {
goto EXP;
}
node->label_stmt.label = parse_ident(parser);
expecttok(parser, TOKEN_COLON);
node->label_stmt.label = expect_pop_ident(tokbuf);
expect_pop_tok(tokbuf, TOKEN_COLON);
node->type = NT_STMT_LABEL;
break;
}
case TOKEN_CASE: {
// TODO label switch
poptok(parser);
pop_tok(tokbuf);
error("unimplemented switch label");
node->label_stmt.label = parse_expr(parser);
// TODO 该表达式为const int
expecttok(parser, TOKEN_COLON);
expect_pop_tok(tokbuf, TOKEN_COLON);
node->type = NT_STMT_CASE;
break;
}
case TOKEN_DEFAULT: {
// TODO label switch default
poptok(parser);
expecttok(parser, TOKEN_COLON);
pop_tok(tokbuf);
expect_pop_tok(tokbuf, TOKEN_COLON);
node->type = NT_STMT_DEFAULT;
break;
}
@ -226,15 +227,16 @@ struct ASTNode* parse_stmt(struct Parser* parser) {
*/
EXP:
node->expr_stmt.expr_stmt = parse_expr(parser);
flushpeektok(parser);
ttype = peektoktype(parser);
flush_peek_tok(tokbuf);
ttype = peek_tok_type(tokbuf);
if (ttype != TOKEN_SEMICOLON) {
error("exp must end with \";\"");
}
poptok(parser);
pop_tok(tokbuf);
node->type = NT_STMT_EXPR;
break;
}
}
return node;
}

View File

@ -2,162 +2,30 @@
#include "../type.h"
#include "ast.h"
// /* 状态跳转表定义 */
// typedef void (*StateHandler)(struct Parser*, struct ASTNode**);
// enum TypeParseState {
// TPS_BASE_TYPE, // 解析基础类型 (int/char等)
// TPS_QUALIFIER, // 解析限定符 (const/volatile)
// TPS_POINTER, // 解析指针 (*)
// TPS_ARRAY, // 解析数组维度 ([n])
// TPS_FUNC_PARAMS, // 解析函数参数列表
// TPS_END,
// };
// ;
// /* 状态处理函数前置声明 */
// static void handle_base_type(struct Parser*, struct ASTNode**);
// static void handle_qualifier(struct Parser*, struct ASTNode**);
// static void handle_pointer(struct Parser*, struct ASTNode**);
// static void handle_array(struct Parser*, struct ASTNode**);
// static void handle_func_params(struct Parser*, struct ASTNode**);
// static void handle_error(struct Parser*, struct ASTNode**);
// /* 状态跳转表(核心优化部分) */
// static const struct StateTransition {
// enum TokenType tok; // 触发token
// StateHandler handler; // 处理函数
// enum TypeParseState next_state; // 下一个状态
// } state_table[][8] = {
// [TPS_QUALIFIER] = {
// {TOKEN_CONST, handle_qualifier, TPS_QUALIFIER},
// {TOKEN_VOLATILE, handle_qualifier, TPS_QUALIFIER},
// {TOKEN_VOID, handle_base_type, TPS_POINTER},
// {TOKEN_CHAR, handle_base_type, TPS_POINTER},
// {TOKEN_INT, handle_base_type, TPS_POINTER},
// {TOKEN_EOF, handle_error, TPS_QUALIFIER},
// /* 其他token默认处理 */
// {0, NULL, TPS_BASE_TYPE}
// },
// [TPS_BASE_TYPE] = {
// {TOKEN_MUL, handle_pointer, TPS_POINTER},
// {TOKEN_L_BRACKET, handle_array, TPS_ARRAY},
// {TOKEN_L_PAREN, handle_func_params,TPS_FUNC_PARAMS},
// {TOKEN_EOF, NULL, TPS_END},
// {0, NULL, TPS_POINTER}
// },
// [TPS_POINTER] = {
// {TOKEN_MUL, handle_pointer, TPS_POINTER},
// {TOKEN_L_BRACKET, handle_array, TPS_ARRAY},
// {TOKEN_L_PAREN, handle_func_params,TPS_FUNC_PARAMS},
// {0, NULL, TPS_END}
// },
// [TPS_ARRAY] = {
// {TOKEN_L_BRACKET, handle_array, TPS_ARRAY},
// {TOKEN_L_PAREN, handle_func_params,TPS_FUNC_PARAMS},
// {0, NULL, TPS_END}
// },
// [TPS_FUNC_PARAMS] = {
// {0, NULL, TPS_END}
// }
// };
// /* 新的类型解析函数 */
// struct ASTNode* parse_type(struct Parser* p) {
// struct ASTNode* type_root = NULL;
// struct ASTNode** current = &type_root;
// enum TypeParseState state = TPS_QUALIFIER;
// while (state != TPS_END) {
// enum TokenType t = peektoktype(p);
// const struct StateTransition* trans = state_table[state];
// // 查找匹配的转换规则
// while (trans->tok != 0 && trans->tok != t) {
// trans++;
// }
// if (trans->handler) {
// trans->handler(p, current);
// } else if (trans->tok == 0) { // 默认规则
// state = trans->next_state;
// continue;
// } else {
// error("syntax error type parse error");
// }
// state = trans->next_state;
// }
// return type_root;
// }
// /* 具体状态处理函数实现 */
// static void handle_qualifier(struct Parser* p, struct ASTNode** current) {
// struct ASTNode* node = new_ast_node();
// node->node_type = NT_TYPE_QUAL;
// node->data.data_type = poptok(p).type;
// if (*current) {
// (*current)->child.decl.type = node;
// } else {
// *current = node;
// }
// }
// static void handle_base_type(struct Parser* p, struct ASTNode** current) {
// struct ASTNode* node = new_ast_node();
// node->node_type = NT_TYPE_BASE;
// node->data.data_type = poptok(p).type;
// // 链接到当前节点链的末端
// while (*current && (*current)->child.decl.type) {
// current = &(*current)->child.decl.type;
// }
// if (*current) {
// (*current)->child.decl.type = node;
// } else {
// *current = node;
// }
// }
// static void handle_pointer(struct Parser* p, struct ASTNode** current) {
// poptok(p); // 吃掉*
// struct ASTNode* node = new_ast_node();
// node->node_type = NT_TYPE_PTR;
// // 插入到当前节点之前
// node->child.decl.type = *current;
// *current = node;
// }
// /* 其他处理函数类似实现... */
struct ASTNode* parser_ident_without_pop(struct Parser* parser) {
flushpeektok(parser);
struct Token* tok = peektok(parser);
ast_node_t* new_ast_ident_node(tok_t* tok) {
if (tok->type != TOKEN_IDENT) {
error("syntax error: want identifier but got %d", tok->type);
}
struct ASTNode* node = new_ast_node();
ast_node_t* node = new_ast_node();
node->type = NT_TERM_IDENT;
node->syms.tok = *tok;
node->syms.decl_node = NULL;
return node;
}
struct ASTNode* parse_ident(struct Parser* parser) {
struct ASTNode* node = parser_ident_without_pop(parser);
poptok(parser);
ast_node_t* expect_pop_ident(tok_buf_t* tokbuf) {
flush_peek_tok(tokbuf);
tok_t* tok = peek_tok(tokbuf);
ast_node_t* node = new_ast_ident_node(tok);
pop_tok(tokbuf);
return node;
}
struct ASTNode* parse_type(struct Parser* parser) {
flushpeektok(parser);
enum TokenType ttype = peektoktype(parser);
enum DataType dtype;
ast_node_t* parse_type(parser_t* parser) {
tok_buf_t* tokbuf = &parser->tokbuf;
flush_peek_tok(tokbuf);
tok_type_t ttype = peek_tok_type(tokbuf);
data_type_t dtype;
switch(ttype) {
case TOKEN_VOID: dtype = TYPE_VOID; break;
case TOKEN_CHAR: dtype = TYPE_CHAR; break;
@ -170,13 +38,14 @@ struct ASTNode* parse_type(struct Parser* parser) {
error("无效的类型说明符");
}
struct ASTNode* node = new_ast_node();
ast_node_t* node = new_ast_node();
node->type = NT_TERM_TYPE;
// node->data.data_type = dtype;
poptok(parser);
// TODO added by disable warning, will add typing system
dtype += 1;
pop_tok(tokbuf);
if (peektoktype(parser) == TOKEN_MUL) {
poptok(parser);
if (peek_tok_type(tokbuf) == TOKEN_MUL) {
pop_tok(tokbuf);
}
return node;
}

View File

@ -1,136 +1,136 @@
#include "../parser.h"
#include "../type.h"
// #include "../parser.h"
// #include "../type.h"
enum TypeParseState {
TPS_BASE_TYPE, // 解析基础类型 (int/char等)
TPS_QUALIFIER, // 解析限定符 (const/volatile)
TPS_POINTER, // 解析指针 (*)
TPS_ARRAY, // 解析数组维度 ([n])
TPS_FUNC_PARAMS // 解析函数参数列表
};
// enum TypeParseState {
// TPS_BASE_TYPE, // 解析基础类型 (int/char等)
// TPS_QUALIFIER, // 解析限定符 (const/volatile)
// TPS_POINTER, // 解析指针 (*)
// TPS_ARRAY, // 解析数组维度 ([n])
// TPS_FUNC_PARAMS // 解析函数参数列表
// };
struct ASTNode* parse_type(struct Parser* p) {
struct ASTNode* type_root = new_ast_node();
struct ASTNode* current = type_root;
current->type = NT_TYPE_BASE;
// ast_node_t* parse_type(parser_t* p) {
// ast_node_t* type_root = new_ast_node();
// ast_node_t* current = type_root;
// current->type = NT_TYPE_BASE;
enum TypeParseState state = TPS_QUALIFIER;
int pointer_level = 0;
// enum TypeParseState state = TPS_QUALIFIER;
// int pointer_level = 0;
while (1) {
enum TokenType t = peektoktype(p);
// while (1) {
// tok_type_t t = peektoktype(p);
switch (state) {
// 基础类型解析 (int, char等)
case TPS_BASE_TYPE:
if (is_base_type(t)) {
// current->data.data_type = token_to_datatype(t);
poptok(p);
state = TPS_POINTER;
} else {
error("Expected type specifier");
}
break;
// switch (state) {
// // 基础类型解析 (int, char等)
// case TPS_BASE_TYPE:
// if (is_base_type(t)) {
// // current->data.data_type = token_to_datatype(t);
// pop_tok(p);
// state = TPS_POINTER;
// } else {
// error("Expected type specifier");
// }
// break;
// 类型限定符 (const/volatile)
case TPS_QUALIFIER:
if (t == TOKEN_CONST || t == TOKEN_VOLATILE) {
struct ASTNode* qual_node = new_ast_node();
qual_node->type = NT_TYPE_QUAL;
qual_node->data.data_type = t; // 复用data_type字段存储限定符
current->child.decl.type = qual_node;
current = qual_node;
poptok(p);
} else {
state = TPS_BASE_TYPE;
}
break;
// // 类型限定符 (const/volatile)
// case TPS_QUALIFIER:
// if (t == TOKEN_CONST || t == TOKEN_VOLATILE) {
// ast_node_t* qual_node = new_ast_node();
// qual_node->type = NT_TYPE_QUAL;
// qual_node->data.data_type = t; // 复用data_type字段存储限定符
// current->child.decl.type = qual_node;
// current = qual_node;
// pop_tok(p);
// } else {
// state = TPS_BASE_TYPE;
// }
// break;
// 指针解析 (*)
case TPS_POINTER:
if (t == TOKEN_MUL) {
struct ASTNode* ptr_node = new_ast_node();
ptr_node->type = NT_TYPE_PTR;
current->child.decl.type = ptr_node;
current = ptr_node;
pointer_level++;
poptok(p);
} else {
state = TPS_ARRAY;
}
break;
// // 指针解析 (*)
// case TPS_POINTER:
// if (t == TOKEN_MUL) {
// ast_node_t* ptr_node = new_ast_node();
// ptr_node->type = NT_TYPE_PTR;
// current->child.decl.type = ptr_node;
// current = ptr_node;
// pointer_level++;
// pop_tok(p);
// } else {
// state = TPS_ARRAY;
// }
// break;
// 数组维度 ([n])
case TPS_ARRAY:
if (t == TOKEN_L_BRACKET) {
poptok(p); // 吃掉[
struct ASTNode* arr_node = new_ast_node();
arr_node->type = NT_TYPE_ARRAY;
// // 数组维度 ([n])
// case TPS_ARRAY:
// if (t == TOKEN_L_BRACKET) {
// pop_tok(p); // 吃掉[
// ast_node_t* arr_node = new_ast_node();
// arr_node->type = NT_TYPE_ARRAY;
// 解析数组大小(仅语法检查)
if (peektoktype(p) != TOKEN_R_BRACKET) {
parse_expr(p); // 不计算实际值
}
// // 解析数组大小(仅语法检查)
// if (peektoktype(p) != TOKEN_R_BRACKET) {
// parse_expr(p); // 不计算实际值
// }
expecttok(p, TOKEN_R_BRACKET);
current->child.decl.type = arr_node;
current = arr_node;
} else {
state = TPS_FUNC_PARAMS;
}
break;
// expecttok(p, TOKEN_R_BRACKET);
// current->child.decl.type = arr_node;
// current = arr_node;
// } else {
// state = TPS_FUNC_PARAMS;
// }
// break;
// 函数参数列表
case TPS_FUNC_PARAMS:
if (t == TOKEN_L_PAREN) {
struct ASTNode* func_node = new_ast_node();
func_node->type = NT_TYPE_FUNC;
current->child.decl.type = func_node;
// // 函数参数列表
// case TPS_FUNC_PARAMS:
// if (t == TOKEN_L_PAREN) {
// ast_node_t* func_node = new_ast_node();
// func_node->type = NT_TYPE_FUNC;
// current->child.decl.type = func_node;
// 解析参数列表(仅结构,不验证类型)
parse_param_list(p, func_node);
current = func_node;
} else {
return type_root; // 类型解析结束
}
break;
}
}
}
// 判断是否是基础类型
static int is_base_type(enum TokenType t) {
return t >= TOKEN_VOID && t <= TOKEN_DOUBLE;
}
// // 转换token到数据类型简化版
// static enum DataType token_to_datatype(enum TokenType t) {
// static enum DataType map[] = {
// [TOKEN_VOID] = DT_VOID,
// [TOKEN_CHAR] = DT_CHAR,
// [TOKEN_INT] = DT_INT,
// // ...其他类型映射
// };
// return map[t];
// // 解析参数列表(仅结构,不验证类型)
// parse_param_list(p, func_node);
// current = func_node;
// } else {
// return type_root; // 类型解析结束
// }
// break;
// }
// }
// }
// 解析参数列表(轻量级)
static void parse_param_list(struct Parser* p, struct ASTNode* func) {
expecttok(p, TOKEN_L_PAREN);
while (peektoktype(p) != TOKEN_R_PAREN) {
struct ASTNode* param = parse_type(p); // 递归解析类型
// 允许可选参数名(仅语法检查)
if (peektoktype(p) == TOKEN_IDENT) {
poptok(p); // 吃掉参数名
}
if (peektoktype(p) == TOKEN_COMMA) {
poptok(p);
}
}
expecttok(p, TOKEN_R_PAREN);
}
// // 判断是否是基础类型
// static int is_base_type(tok_type_t t) {
// return t >= TOKEN_VOID && t <= TOKEN_DOUBLE;
// }
// // // 转换token到数据类型简化版
// // static enum DataType token_to_datatype(tok_type_t t) {
// // static enum DataType map[] = {
// // [TOKEN_VOID] = DT_VOID,
// // [TOKEN_CHAR] = DT_CHAR,
// // [TOKEN_INT] = DT_INT,
// // // ...其他类型映射
// // };
// // return map[t];
// // }
// // 解析参数列表(轻量级)
// static void parse_param_list(parser_t* p, ast_node_t* func) {
// expecttok(p, TOKEN_L_PAREN);
// while (peektoktype(p) != TOKEN_R_PAREN) {
// ast_node_t* param = parse_type(p); // 递归解析类型
// // 允许可选参数名(仅语法检查)
// if (peektoktype(p) == TOKEN_IDENT) {
// pop_tok(p); // 吃掉参数名
// }
// if (peektoktype(p) == TOKEN_COMMA) {
// pop_tok(p);
// }
// }
// expecttok(p, TOKEN_R_PAREN);
// }

View File

@ -1,67 +1,17 @@
#include "parser.h"
#include "type.h"
#include "ast/ast.h"
int poptok(struct Parser* parser) {
if (parser->size == 0) {
return -1;
}
int idx = parser->cur_idx;
parser->cur_idx = (idx + 1) % PARSER_MAX_TOKEN_QUEUE;
parser->size--;
return 0;
}
void flushpeektok(struct Parser* parser) {
parser->peek_idx = parser->cur_idx;
}
struct Token* peektok(struct Parser* parser) {
int idx = parser->peek_idx;
idx = (idx + 1) % PARSER_MAX_TOKEN_QUEUE;
if (parser->size >= PARSER_MAX_TOKEN_QUEUE) {
warn("peek maybe too deep");
}
if (parser->peek_idx == parser->end_idx) {
if (parser->size == PARSER_MAX_TOKEN_QUEUE) {
// FIXME
error("buffer overflow");
}
get_valid_token(parser->lexer, &(parser->TokenBuffer[idx]));
parser->size++;
parser->end_idx = idx;
}
parser->peek_idx = idx;
return &(parser->TokenBuffer[idx]);
}
enum TokenType peektoktype(struct Parser* parser) {
return peektok(parser)->type;
}
void expecttok(struct Parser* parser, enum TokenType type) {
struct Token* tok = peektok(parser);
if (tok->type != type) {
error("expected tok: %s, got %s", get_token_name(type), get_token_name(tok->type));
} else {
poptok(parser);
}
}
void init_parser(struct Parser* parser, struct Lexer* lexer, struct SymbolTable* symtab) {
void init_parser(parser_t* parser, lexer_t* lexer, symtab_t* symtab) {
parser->cur_node = NULL;
parser->root = NULL;
parser->cur_idx = 0;
parser->peek_idx = 0;
parser->end_idx = 0;
parser->size = 0;
parser->lexer = lexer;
parser->symtab = symtab;
// TODO
init_tokbuf(&parser->tokbuf, lexer, (get_tokbuf_func)get_valid_token);
parser->tokbuf.cap = sizeof(parser->TokenBuffer) / sizeof(parser->TokenBuffer[0]);
parser->tokbuf.buf = parser->TokenBuffer;
}
void run_parser(struct Parser* parser) {
void run_parser(parser_t* parser) {
parse_prog(parser);
}

View File

@ -2,32 +2,24 @@
#define __PARSER_H__
#include "../frontend.h"
#include "../lexer/lexer.h"
// #include "symbol_table/symtab.h"
// #include "ast/ast.h"
#include "../lexer/lexer.h"
typedef struct lexer lexer_t;
typedef struct symtab symtab_t;
#define PARSER_MAX_TOKEN_QUEUE 16
struct Parser {
typedef struct parser {
struct ASTNode* root;
struct ASTNode* cur_node;
struct Lexer* lexer;
struct SymbolTable* symtab;
int cur_idx;
int peek_idx;
int end_idx;
int size;
struct Token TokenBuffer[PARSER_MAX_TOKEN_QUEUE];
lexer_t* lexer;
symtab_t* symtab;
tok_buf_t tokbuf;
tok_t TokenBuffer[PARSER_MAX_TOKEN_QUEUE];
int err_level;
};
} parser_t;
void init_parser(struct Parser* parser, struct Lexer* lexer, struct SymbolTable* symtab);
void run_parser(struct Parser* parser);
void flushpeektok(struct Parser* parser);
int poptok(struct Parser* parser);
struct Token* peektok(struct Parser* parser);
enum TokenType peektoktype(struct Parser* parser);
void expecttok(struct Parser* parser, enum TokenType type);
void init_parser(parser_t* parser, lexer_t* lexer, symtab_t* symtab);
void run_parser(parser_t* parser);
#endif

View File

@ -3,25 +3,25 @@
#include "scope.h"
#include "symtab.h"
typedef struct SymbolTable SymbolTable;
typedef symtab_t symtab_t;
typedef struct Scope Scope;
void init_symtab(SymbolTable* symtab) {
void init_symtab(symtab_t* symtab) {
symtab->global_scope = scope_create(NULL);
symtab->cur_scope = symtab->global_scope;
}
void del_symtab(SymbolTable* symtab) {
void del_symtab(symtab_t* symtab) {
scope_destroy(symtab->global_scope);
}
void symtab_enter_scope(SymbolTable* symtab) {
void symtab_enter_scope(symtab_t* symtab) {
struct Scope* scope = scope_create(symtab->cur_scope);
scope->base_offset = symtab->cur_scope->base_offset + symtab->cur_scope->cur_offset;
symtab->cur_scope = scope;
}
void symtab_leave_scope(SymbolTable* symtab) {
void symtab_leave_scope(symtab_t* symtab) {
Scope * scope = symtab->cur_scope;
if (scope == NULL) {
error("cannot leave NULL scope or global scope");
@ -30,16 +30,20 @@ void symtab_leave_scope(SymbolTable* symtab) {
scope_destroy(scope);
}
void symtab_add_symbol(SymbolTable* symtab, const char* name, void* ast_node) {
void* symtab_add_symbol(symtab_t* symtab, const char* name, void* ast_node, int can_duplicate) {
struct Scope* scope = symtab->cur_scope;
if (scope_lookup_current(scope, name) != NULL) {
// TODO WARNING
// return NULL;
void* node = scope_lookup_current(scope, name);
if (node != NULL) {
if (!can_duplicate) {
error("duplicate symbol %s", name);
}
return node;
}
scope_insert(scope, name, ast_node);
return node;
}
void* symtab_lookup_symbol(SymbolTable* symtab, const char* name) {
void* symtab_lookup_symbol(symtab_t* symtab, const char* name) {
return scope_lookup(symtab->cur_scope, name);
}

View File

@ -2,17 +2,17 @@
#ifndef __SYMTAB_H__
#define __SYMTAB_H__
struct SymbolTable {
typedef struct symtab {
struct Scope* cur_scope;
struct Scope* global_scope;
};
} symtab_t;
void init_symtab(struct SymbolTable* symtab);
void del_symtab(struct SymbolTable* symtab);
void init_symtab(symtab_t* symtab);
void del_symtab(symtab_t* symtab);
void symtab_enter_scope(struct SymbolTable* symtab);
void symtab_leave_scope(struct SymbolTable* symtab);
void symtab_add_symbol(struct SymbolTable* symtab, const char* name, void* ast_node);
void* symtab_lookup_symbol(struct SymbolTable* symtab, const char* name);
void symtab_enter_scope(symtab_t* symtab);
void symtab_leave_scope(symtab_t* symtab);
void* symtab_add_symbol(symtab_t* symtab, const char* name, void* ast_node, int can_duplicate);
void* symtab_lookup_symbol(symtab_t* symtab, const char* name);
#endif

View File

@ -23,7 +23,7 @@ int main(int argc, char** argv) {
struct SymbolTable symtab;
init_symtab(&symtab);
struct Parser parser;
struct parser parser;
init_parser(&parser, &lexer, &symtab);
parse_prog(&parser);

View File

@ -3,7 +3,7 @@
#include "../lexer/token.h"
enum DataType {
typedef enum {
TYPE_VOID,
TYPE_CHAR,
TYPE_SHORT,
@ -30,6 +30,6 @@ enum DataType {
TYPE_ATOMIC,
TYPE_TYPEDEF,
};
} data_type_t;
#endif

View File

View File

@ -5,48 +5,61 @@ typedef struct ASTNode ASTNode;
// 上下文结构,记录生成过程中的状态
typedef struct {
ir_func_t* current_func; // 当前处理的函数
ir_bblock_t* current_block; // 当前基本块
uint32_t vreg_counter; // 虚拟寄存器计数器
ir_func_t* cur_func; // 当前处理的函数
ir_bblock_t* cur_block; // 当前基本块
} IRGenContext;
IRGenContext ctx;
ir_prog_t prog;
ir_type_t type_i32 = {
.tag = IR_TYPE_INT32,
};
static inline void init_ir_node_t(ir_node_t* node) {
node->name = NULL;
node->type = NULL;
vector_init(node->used_by);
}
static inline ir_node_t* new_ir_node_t() {
static inline ir_node_t* new_irnode() {
ir_node_t* node = xmalloc(sizeof(ir_node_t));
init_ir_node_t(node);
}
static inline ir_bblock_t* new_irbblock(const char* name) {
ir_bblock_t* block = xmalloc(sizeof(ir_bblock_t));
block->label = name;
vector_init(block->instrs);
return block;
}
ir_node_t* emit_instr(ir_bblock_t* block) {
if (block == NULL) block = ctx.current_block;
ir_node_t *node = new_ir_node_t();
if (block == NULL) block = ctx.cur_block;
ir_node_t *node = new_irnode();
vector_push(block->instrs, node);
return vector_at(block->instrs, block->instrs.size - 1);
}
void emit_br(ir_node_t cond, const char* true_lable, const char* false_lable) {
ir_node_t br = {
.tag = IR_NODE_RET,
.data = {
ir_node_t* emit_br(ir_node_t* cond, ir_bblock_t* trueb, ir_bblock_t* falseb) {
ir_node_t* br = emit_instr(NULL);
*br = (ir_node_t) {
.tag = IR_NODE_BRANCH,
.data.branch = {
.cond = cond,
.true_bblock = trueb,
.false_bblock = falseb,
}
};
// emit_instr(br, NULL);
return br;
}
ir_node_t* gen_ir_expr(ASTNode* node) {
switch (node->type) {
case NT_TERM_VAL: {
ir_node_t* ir = new_ir_node_t();
ir_node_t* ir = new_irnode();
*ir = (ir_node_t) {
.tag = IR_NODE_CONST_INT,
.data.const_int = {
.val = node->syms.tok.constant.i,
.val = node->syms.tok.val.i,
},
};
return ir;
@ -56,15 +69,18 @@ ir_node_t* gen_ir_expr(ASTNode* node) {
return decl;
}
case NT_TERM_CALL: {
// TODO
ir_node_t* ir = new_ir_node_t();
ir_node_t* ir = emit_instr(NULL);
*ir = (ir_node_t) {
.tag = IR_NODE_CALL,
.data.call = {
.callee = NULL,
.callee = node->call.func_decl->decl_func.def->func.data,
},
};
vector_init(ir->data.call.args);
for (int i = 0; i < node->call.params->params.params.size; i++) {
vector_push(ir->data.call.args, \
gen_ir_expr(node->call.params->params.params.data[i]));
}
return ir;
}
default:
@ -191,42 +207,75 @@ NEXT:
}
return ret;
}
static ir_func_t* new_irfunc(const char* name) {
ir_func_t *func = xmalloc(sizeof(ir_func_t));
vector_init(func->bblocks);
vector_init(func->params);
*func = (ir_func_t) {
.name = name,
// TODO typing system
.type = &type_i32,
};
return func;
}
static void gen_ir_func(ASTNode* node, ir_func_t* func) {
assert(node->type == NT_FUNC);
ir_bblock_t *entry = new_irbblock("entry");
vector_push(func->bblocks, entry);
vector_push(prog.funcs, func);
IRGenContext prev_ctx = ctx;
ctx.cur_func = func;
ctx.cur_block = entry;
ast_node_t* params = node->func.decl->decl_func.params;
for (int i = 0; i < params->params.params.size; i ++) {
ir_node_t* decl = emit_instr(entry);
ast_node_t* param = params->params.params.data[i];
vector_push(func->params, decl);
*decl = (ir_node_t) {
.tag = IR_NODE_ALLOC,
.name = param->decl_val.name->syms.tok.val.str,
.type = &type_i32,
};
param->decl_val.data = decl;
}
gen_ir_from_ast(node->func.body);
ctx = prev_ctx;
}
void gen_ir_from_ast(struct ASTNode* node) {
switch (node->type) {
case NT_ROOT: {
for (int i = 0; i < node->root.child_size; i ++) {
gen_ir_from_ast(node->root.children[i]);
for (int i = 0; i < node->root.children.size; i ++) {
gen_ir_from_ast(node->root.children.data[i]);
}
} break;
break;
}
case NT_DECL_FUNC: {
ir_func_t* func = new_irfunc(node->decl_func.name->syms.tok.val.str);
if (node->decl_func.def == NULL) {
ast_node_t* def = new_ast_node();
def->func.body = NULL;
def->func.decl = node;
node->decl_func.def = def;
vector_push(prog.extern_funcs, func);
}
node->decl_func.def->func.data = func;
break;
}
case NT_FUNC: {
ir_func_t *func = xmalloc(sizeof(ir_func_t));
*func = (ir_func_t) {
.name = node->func.name->syms.tok.constant.str,
};
vector_init(func->bblocks);
ir_bblock_t *entry = xmalloc(sizeof(ir_bblock_t));
*entry = (ir_bblock_t) {
.label = "entry",
};
vector_init(entry->instrs);
vector_push(func->bblocks, entry);
IRGenContext prev_ctx = ctx;
ctx = (IRGenContext) {
.current_func = func,
.current_block = vector_at(func->bblocks, 0),
.vreg_counter = 0,
};
gen_ir_from_ast(node->func.body);
ctx = prev_ctx;
vector_push(prog.funcs, func);
} break;
gen_ir_func(node, node->func.data);
break;
}
case NT_STMT_RETURN: {
ir_node_t* ret = gen_ir_expr(node->return_stmt.expr_stmt);
ir_node_t* ret = NULL;
if (node->return_stmt.expr_stmt != NULL) {
ret = gen_ir_expr(node->return_stmt.expr_stmt);
}
ir_node_t* ir = emit_instr(NULL);
*ir = (ir_node_t) {
.tag = IR_NODE_RET,
@ -236,22 +285,54 @@ void gen_ir_from_ast(struct ASTNode* node) {
}
}
};
vector_push(ctx.cur_func->bblocks, new_irbblock(NULL));
break;
}
case NT_STMT_BLOCK: {
gen_ir_from_ast(node->block_stmt.block);
break;
}
case NT_BLOCK: {
for (int i = 0; i < node->block.child_size; i ++) {
gen_ir_from_ast(node->block.children[i]);
for (int i = 0; i < node->block.children.size; i ++) {
gen_ir_from_ast(node->block.children.data[i]);
}
break;
}
case NT_STMT_IF: {
ir_node_t *cond = gen_ir_expr(node->if_stmt.cond);
ir_bblock_t* trueb = new_irbblock("true_block");
ir_bblock_t* falseb = new_irbblock("false_block");
emit_br(cond, trueb, falseb);
// xmalloc();
// ir_bblock_t then_block = {
// };
node->if_stmt.if_stmt;
node->if_stmt.else_stmt;
vector_push(ctx.cur_func->bblocks, trueb);
ctx.cur_block = trueb;
gen_ir_from_ast(node->if_stmt.if_stmt);
ir_node_t* jmp = emit_instr(NULL);
if (node->if_stmt.else_stmt != NULL) {
vector_push(ctx.cur_func->bblocks, falseb);
ctx.cur_block = falseb;
gen_ir_from_ast(node->if_stmt.else_stmt);
ir_node_t* jmp = emit_instr(NULL);
ctx.cur_block = new_irbblock("jmp_block");
vector_push(ctx.cur_func->bblocks, ctx.cur_block);
*jmp = (ir_node_t) {
.tag = IR_NODE_JUMP,
.data.jump = {
.target_bblock = ctx.cur_block,
},
};
} else {
ctx.cur_block = falseb;
}
*jmp = (ir_node_t) {
.tag = IR_NODE_JUMP,
.data.jump = {
.target_bblock = ctx.cur_block,
},
};
break;
}
case NT_STMT_WHILE: {
@ -275,7 +356,7 @@ void gen_ir_from_ast(struct ASTNode* node) {
ir_node_t* ret_node = emit_instr(NULL);
*ret_node = (ir_node_t) {
.tag = IR_NODE_ALLOC,
.name = node->decl_val.name->syms.tok.constant.str,
.name = node->decl_val.name->syms.tok.val.str,
.type = &type_i32,
};
node->decl_val.data = ret_node;

View File

@ -54,6 +54,7 @@ typedef struct {
typedef struct {
vector_header(global, ir_node_t*);
vector_header(funcs, ir_func_t*);
vector_header(extern_funcs, ir_func_t*);
} ir_prog_t;
struct ir_node {
@ -131,15 +132,15 @@ struct ir_node {
} op;
struct {
ir_node_t* cond;
ir_bblock_t true_bblock;
ir_bblock_t false_bblock;
ir_bblock_t* true_bblock;
ir_bblock_t* false_bblock;
} branch;
struct {
ir_bblock_t target_bblock;
ir_bblock_t* target_bblock;
} jump;
struct {
ir_func_t callee;
vector_header(args, ir_node_t);
ir_func_t* callee;
vector_header(args, ir_node_t*);
} call;
struct {
ir_node_t* ret_val;

View File

@ -1,5 +1,7 @@
int main(void) {
int a;
a = 1 + 2 * 3;
return a;
int add(int a, int b) {
return a + b;
}
int main(void) {
return add(1, 2);
}

192
test_rv_vm/README.md Normal file
View File

@ -0,0 +1,192 @@
# riscv_emufun (mini-rv32ima)
Click below for the YouTube video introducing this project:
[![Writing a Really Tiny RISC-V Emulator](https://img.youtube.com/vi/YT5vB3UqU_E/0.jpg)](https://www.youtube.com/watch?v=YT5vB3UqU_E) [![But Will It Run Doom?](https://img.youtube.com/vi/uZMNK17VCMU/0.jpg)](https://www.youtube.com/watch?v=uZMNK17VCMU)
## What
mini-rv32ima is a single-file-header, [mini-rv32ima.h](https://github.com/cnlohr/riscv_emufun/blob/master/mini-rv32ima/mini-rv32ima.h), in the [STB Style library](https://github.com/nothings/stb) that:
* Implements a RISC-V **rv32ima/Zifencei†+Zicsr** (and partial su), with CLINT and MMIO.
* Is about **400 lines** of actual code.
* Has **no dependencies**, not even libc.
* Is **easily extensible**. So you can easily add CSRs, instructions, MMIO, etc!
* Is pretty **performant**. (~450 coremark on my laptop, about 1/2 the speed of QEMU)
* Is human-readable and in **basic C** code.
* Is "**incomplete**" in that it didn't implement the tons of the spec that Linux doesn't (and you shouldn't) use.
* Is trivially **embeddable** in applications.
It has a [demo wrapper](https://github.com/cnlohr/riscv_emufun/blob/master/mini-rv32ima/mini-rv32ima.c) that:
* Implements a CLI, SYSCON, UART, DTB and Kernel image loading.
* And it only around **250 lines** of code, itself.
* Compiles down to a **~18kB executable** and only relies on libc.
†: Zifence+RV32A are stubbed. So, tweaks will need to be made if you want to emulate a multiprocessor system with this emulator.
Just see the `mini-rv32ima` folder.
It's "fully functional" now in that I can run Linux, apps, etc. Compile flat binaries and drop them in an image.
## Why
I'm working on a really really simple C Risc-V emulator. So simple it doesn't even have an MMU (Memory Management Unit). I have a few goals, they include:
* Furthering RV32-NOMMU work to improve Linux support for RV32-NOMMU. (Imagine if we could run Linux on the $1 ESP32-C3)
* Learning more about RV32 and writing emulators.
* Being further inspired by @pimaker's amazing work on [Running Linux in a Pixel Shader](https://blog.pimaker.at/texts/rvc1/) and having the sneaking suspicion performance could be even better!
* Hoping to port it to some weird places.
* Understand the *most simplistic* system you can run Linux on and trying to push that boundary.
* Continue to include my [education of people about assembly language](https://www.youtube.com/watch?v=Gelf0AyVGy4).
## How
Windows instructions (Just playing with the image)
* Clone this repo.
* Install or have TinyCC. [Powershell Installer](https://github.com/cntools/Install-TCC) or [Regular Windows Installer](https://github.com/cnlohr/tinycc-win64-installer/releases/tag/v0_0.9.27)
* Run `winrun.ps` in the `windows` folder.
WSL (For full toolchain and image build:
* You will need to remove all spaces from your path i.e. `export PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/mnt/c/Windows/system32:/snap/bin` and continue the instructions. P.S. What in the world was Windows thinking, putting a space between "Program" and "Files"??!?
Linux instructions (both):
* Clone this repo.
* Install `git build-essential` and/or whatever other requirements are in place for [buildroot](https://buildroot.org/).
* `make testdlimage`
* It automatically downloads the image (~1MB) and runs the emulator.
* Should be up and running in about 2.5s depending on internet speed.
You can do in-depth work on Linux by:
* `make everything`
If you want to play with the bare metal system, see below, or if you have the toolchain installed, just:
* `make testbare`
If you just want to play emdoom, and use the prebuilt image:
* On Windows, run `windows\winrundoom.ps1`
* On Linux, `cd mini-rv32ima`, and type `make testdoom`
## Questions?
* Why not rv64?
* Because then I can't run it as easily in a pixel shader if I ever hope to.
* Can I add an MMU?
* Yes. It actually probably wouldn't be too difficult.
* Should I add an MMU?
* No. It is important to further support for nommu systems to empower minimal Risc-V designs!
Everything else: Contact us on my Discord: https://discord.com/invite/CCeyWyZ
## How do I use this in my own project?
You shoud not need to modify `mini-rv32ima.h`, but instead, use `mini-rv32ima.c` as a template for what you are trying to do in your own project.
You can override all functionality by defining the following macros. Here are examples of what `mini-rv32ima.c` does with them. You can see the definition of the functions, or augment their definitions, by altering `mini-rv32ima.c`.
| Macro | Definition / Comment |
| --- | --- |
| `MINIRV32WARN( x... )` | `printf( x );` <br> Warnings emitted from mini-rv32ima.h |
| `MINIRV32_DECORATE` | `static` <br> How to decorate the functions. |
| `MINI_RV32_RAM_SIZE` | `ram_amt` <br> A variable, how big is system RAM? |
| `MINIRV32_IMPLEMENTATION` | If using mini-rv32ima.h, need to define this. |
| `MINIRV32_POSTEXEC( pc, ir, retval )` | `{ if( retval > 0 ) { if( fail_on_all_faults ) { printf( "FAULT\n" ); return 3; } else retval = HandleException( ir, retval ); } }` <br> If you want to execute something every time slice. |
| `MINIRV32_HANDLE_MEM_STORE_CONTROL( addy, val )` | `if( HandleControlStore( addy, val ) ) return val;` <br> Called on non-RAM memory access. |
| `MINIRV32_HANDLE_MEM_LOAD_CONTROL( addy, rval )` | `rval = HandleControlLoad( addy );` <br> Called on non-RAM memory access return a value. |
| `MINIRV32_OTHERCSR_WRITE( csrno, value )` | `HandleOtherCSRWrite( image, csrno, value );` <br> You can use CSRs for control requests. |
| `MINIRV32_OTHERCSR_READ( csrno, value )` | `value = HandleOtherCSRRead( image, csrno );` <br> You can use CSRs for control requests. |
## Hopeful goals?
* Further drive down needed features to run Linux.
* Remove need for RV32A extension on systems with only one CPU.
* Support for relocatable ELF executables.
* Add support for an unreal UART. One that's **much** simpler than the current 8250 driver.
* Maybe run this in a pixelshader too!
* Get opensbi working with this.
* Be able to "embed" rv32 emulators in random projects.
* Can I use early console to be a full system console?
* Can I increase the maximum contiguous memory allocatable?
## Special Thanks
* For @regymm and their [patches to buildroot](https://github.com/regymm/buildroot) and help!
* callout: Regymm's [quazisoc project](https://github.com/regymm/quasiSoC/).
* Buildroot (For being so helpful).
* @vowstar and their team working on [k210-linux-nommu](https://github.com/vowstar/k210-linux-nommu).
* This [guide](https://jborza.com/emulation/2020/04/09/riscv-environment.html)
* [rvcodecjs](https://luplab.gitlab.io/rvcodecjs/) I probably went through over 1,000 codes here.
* @splinedrive from the [KianV RISC-V noMMU SoC](https://github.com/splinedrive/kianRiscV/tree/master/linux_socs/kianv_harris_mcycle_edition?s=09) project.
## More details
If you want to build the kernel yourself:
* `make everything`
* About 20 minutes. (Or 4+ hours if you're on [Windows Subsytem for Linux 2](https://github.com/microsoft/WSL/issues/4197))
* And you should be dropped into a Linux busybox shell with some little tools that were compiled here.
## Emdoom notes
* Emdoom building is in the `experiments/emdoom` folder
* You *MUST* build your kernel with `MAX_ORDER` set to >12 in `buildroot/output/build/linux-5.19/include/linux/mmzone.h` if you are building your own image.
* You CAN use the pre-existing image that is described above.
* On Windows, it will be very slow. Not sure why.
If you want to use bare metal to build your binaries so you don't need buildroot, you can use the rv64 gcc in 32-bit mode built into Ubuntu 20.04 and up.
```
sudo apt-get install gcc-multilib gcc-riscv64-unknown-elf make
```
## Links
* "Hackaday Supercon 2022: Charles Lohr - Assembly in 2022: Yes! We Still Use it and Here's Why" : https://www.youtube.com/watch?v=Gelf0AyVGy4
## Attic
## General notes:
* https://github.com/cnlohr/riscv_emufun/commit/2f09cdeb378dc0215c07eb63f5a6fb43dbbf1871#diff-b48ccd795ae9aced07d022bf010bf9376232c4d78210c3113d90a8d349c59b3dL440
(These things don't currently work)
### Building Tests
(This does not work, now)
```
cd riscv-tests
export CROSS_COMPILE=riscv64-linux-gnu-
export PLATFORM_RISCV_XLEN=32
CC=riscv64-linux-gnu-gcc ./configure
make XLEN=32 RISCV_PREFIX=riscv64-unknown-elf- RISCV_GCC_OPTS="-g -O1 -march=rv32imaf -mabi=ilp32f -I/usr/include"
```
### Building OpenSBI
(This does not currently work!)
```
cd opensbi
export CROSS_COMPILE=riscv64-unknown-elf-
export PLATFORM_RISCV_XLEN=32
make
```
### Extra links
* Clear outline of CSRs: https://five-embeddev.com/riscv-isa-manual/latest/priv-csrs.html
* Fonts used in videos: https://audiolink.dev/
### Using custom build
Where yminpatch is the patch from the mailing list.
```
rm -rf buildroot
git clone git://git.buildroot.net/buildroot
cd buildroot
git am < ../yminpatch.txt
make qemu_riscv32_nommu_virt_defconfig
make
# Or use our configs.
```
Note: For emdoom you will need to modify include/linux/mmzone.h and change MAX_ORDER to 13.
### Buildroot Notes
Add this:
https://github.com/cnlohr/buildroot/pull/1/commits/bc890f74354e7e2f2b1cf7715f6ef334ff6ed1b2
Use this:
https://github.com/cnlohr/buildroot/commit/e97714621bfae535d947817e98956b112eb80a75

520
test_rv_vm/mini-rv32ima.c Normal file
View File

@ -0,0 +1,520 @@
// Copyright 2022 Charles Lohr, you may use this file or any portions herein under any of the BSD, MIT, or CC0 licenses.
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include "default64mbdtc.h"
// Just default RAM amount is 64MB.
uint32_t ram_amt = 64*1024*1024;
int fail_on_all_faults = 0;
static int64_t SimpleReadNumberInt( const char * number, int64_t defaultNumber );
static uint64_t GetTimeMicroseconds();
static void ResetKeyboardInput();
static void CaptureKeyboardInput();
static uint32_t HandleException( uint32_t ir, uint32_t retval );
static uint32_t HandleControlStore( uint32_t addy, uint32_t val );
static uint32_t HandleControlLoad( uint32_t addy );
static void HandleOtherCSRWrite( uint8_t * image, uint16_t csrno, uint32_t value );
static int32_t HandleOtherCSRRead( uint8_t * image, uint16_t csrno );
static void MiniSleep();
static int IsKBHit();
static int ReadKBByte();
// This is the functionality we want to override in the emulator.
// think of this as the way the emulator's processor is connected to the outside world.
#define MINIRV32WARN( x... ) printf( x );
#define MINIRV32_DECORATE static
#define MINI_RV32_RAM_SIZE ram_amt
#define MINIRV32_IMPLEMENTATION
#define MINIRV32_POSTEXEC( pc, ir, retval ) { if( retval > 0 ) { if( fail_on_all_faults ) { printf( "FAULT\n" ); return 3; } else retval = HandleException( ir, retval ); } }
#define MINIRV32_HANDLE_MEM_STORE_CONTROL( addy, val ) if( HandleControlStore( addy, val ) ) return val;
#define MINIRV32_HANDLE_MEM_LOAD_CONTROL( addy, rval ) rval = HandleControlLoad( addy );
#define MINIRV32_OTHERCSR_WRITE( csrno, value ) HandleOtherCSRWrite( image, csrno, value );
#define MINIRV32_OTHERCSR_READ( csrno, value ) value = HandleOtherCSRRead( image, csrno );
#include "mini-rv32ima.h"
uint8_t * ram_image = 0;
struct MiniRV32IMAState * core;
const char * kernel_command_line = 0;
static void DumpState( struct MiniRV32IMAState * core, uint8_t * ram_image );
int main( int argc, char ** argv )
{
int i;
long long instct = -1;
int show_help = 0;
int time_divisor = 1;
int fixed_update = 0;
int do_sleep = 1;
int single_step = 0;
int dtb_ptr = 0;
const char * image_file_name = 0;
const char * dtb_file_name = 0;
for( i = 1; i < argc; i++ )
{
const char * param = argv[i];
int param_continue = 0; // Can combine parameters, like -lpt x
do
{
if( param[0] == '-' || param_continue )
{
switch( param[1] )
{
case 'm': if( ++i < argc ) ram_amt = SimpleReadNumberInt( argv[i], ram_amt ); break;
case 'c': if( ++i < argc ) instct = SimpleReadNumberInt( argv[i], -1 ); break;
case 'k': if( ++i < argc ) kernel_command_line = argv[i]; break;
case 'f': image_file_name = (++i<argc)?argv[i]:0; break;
case 'b': dtb_file_name = (++i<argc)?argv[i]:0; break;
case 'l': param_continue = 1; fixed_update = 1; break;
case 'p': param_continue = 1; do_sleep = 0; break;
case 's': param_continue = 1; single_step = 1; break;
case 'd': param_continue = 1; fail_on_all_faults = 1; break;
case 't': if( ++i < argc ) time_divisor = SimpleReadNumberInt( argv[i], 1 ); break;
default:
if( param_continue )
param_continue = 0;
else
show_help = 1;
break;
}
}
else
{
show_help = 1;
break;
}
param++;
} while( param_continue );
}
if( show_help || image_file_name == 0 || time_divisor <= 0 )
{
fprintf( stderr, "./mini-rv32imaf [parameters]\n\t-m [ram amount]\n\t-f [running image]\n\t-k [kernel command line]\n\t-b [dtb file, or 'disable']\n\t-c instruction count\n\t-s single step with full processor state\n\t-t time divion base\n\t-l lock time base to instruction count\n\t-p disable sleep when wfi\n\t-d fail out immediately on all faults\n" );
return 1;
}
ram_image = malloc( ram_amt );
if( !ram_image )
{
fprintf( stderr, "Error: could not allocate system image.\n" );
return -4;
}
restart:
{
FILE * f = fopen( image_file_name, "rb" );
if( !f || ferror( f ) )
{
fprintf( stderr, "Error: \"%s\" not found\n", image_file_name );
return -5;
}
fseek( f, 0, SEEK_END );
long flen = ftell( f );
fseek( f, 0, SEEK_SET );
if( flen > ram_amt )
{
fprintf( stderr, "Error: Could not fit RAM image (%ld bytes) into %d\n", flen, ram_amt );
return -6;
}
memset( ram_image, 0, ram_amt );
if( fread( ram_image, flen, 1, f ) != 1)
{
fprintf( stderr, "Error: Could not load image.\n" );
return -7;
}
fclose( f );
if( dtb_file_name )
{
if( strcmp( dtb_file_name, "disable" ) == 0 )
{
// No DTB reading.
}
else
{
f = fopen( dtb_file_name, "rb" );
if( !f || ferror( f ) )
{
fprintf( stderr, "Error: \"%s\" not found\n", dtb_file_name );
return -5;
}
fseek( f, 0, SEEK_END );
long dtblen = ftell( f );
fseek( f, 0, SEEK_SET );
dtb_ptr = ram_amt - dtblen - sizeof( struct MiniRV32IMAState );
if( fread( ram_image + dtb_ptr, dtblen, 1, f ) != 1 )
{
fprintf( stderr, "Error: Could not open dtb \"%s\"\n", dtb_file_name );
return -9;
}
fclose( f );
}
}
else
{
// Load a default dtb.
dtb_ptr = ram_amt - sizeof(default64mbdtb) - sizeof( struct MiniRV32IMAState );
memcpy( ram_image + dtb_ptr, default64mbdtb, sizeof( default64mbdtb ) );
if( kernel_command_line )
{
strncpy( (char*)( ram_image + dtb_ptr + 0xc0 ), kernel_command_line, 54 );
}
}
}
CaptureKeyboardInput();
// The core lives at the end of RAM.
core = (struct MiniRV32IMAState *)(ram_image + ram_amt - sizeof( struct MiniRV32IMAState ));
core->pc = MINIRV32_RAM_IMAGE_OFFSET;
core->regs[10] = 0x00; //hart ID
core->regs[11] = dtb_ptr?(dtb_ptr+MINIRV32_RAM_IMAGE_OFFSET):0; //dtb_pa (Must be valid pointer) (Should be pointer to dtb)
core->extraflags |= 3; // Machine-mode.
if( dtb_file_name == 0 )
{
// Update system ram size in DTB (but if and only if we're using the default DTB)
// Warning - this will need to be updated if the skeleton DTB is ever modified.
uint32_t * dtb = (uint32_t*)(ram_image + dtb_ptr);
if( dtb[0x13c/4] == 0x00c0ff03 )
{
uint32_t validram = dtb_ptr;
dtb[0x13c/4] = (validram>>24) | ((( validram >> 16 ) & 0xff) << 8 ) | (((validram>>8) & 0xff ) << 16 ) | ( ( validram & 0xff) << 24 );
}
}
// Image is loaded.
uint64_t rt;
uint64_t lastTime = (fixed_update)?0:(GetTimeMicroseconds()/time_divisor);
int instrs_per_flip = single_step?1:1024;
for( rt = 0; rt < instct+1 || instct < 0; rt += instrs_per_flip )
{
uint64_t * this_ccount = ((uint64_t*)&core->cyclel);
uint32_t elapsedUs = 0;
if( fixed_update )
elapsedUs = *this_ccount / time_divisor - lastTime;
else
elapsedUs = GetTimeMicroseconds()/time_divisor - lastTime;
lastTime += elapsedUs;
if( single_step )
DumpState( core, ram_image);
int ret = MiniRV32IMAStep( core, ram_image, 0, elapsedUs, instrs_per_flip ); // Execute upto 1024 cycles before breaking out.
switch( ret )
{
case 0: break;
case 1: if( do_sleep ) MiniSleep(); *this_ccount += instrs_per_flip; break;
case 3: instct = 0; break;
case 0x7777: goto restart; //syscon code for restart
case 0x5555: printf( "POWEROFF@0x%08x%08x\n", core->cycleh, core->cyclel ); return 0; //syscon code for power-off
default: printf( "Unknown failure\n" ); break;
}
}
DumpState( core, ram_image);
}
//////////////////////////////////////////////////////////////////////////
// Platform-specific functionality
//////////////////////////////////////////////////////////////////////////
#if defined(WINDOWS) || defined(WIN32) || defined(_WIN32)
#include <windows.h>
#include <conio.h>
#define strtoll _strtoi64
static void CaptureKeyboardInput()
{
system(""); // Poorly documented tick: Enable VT100 Windows mode.
}
static void ResetKeyboardInput()
{
}
static void MiniSleep()
{
Sleep(1);
}
static uint64_t GetTimeMicroseconds()
{
static LARGE_INTEGER lpf;
LARGE_INTEGER li;
if( !lpf.QuadPart )
QueryPerformanceFrequency( &lpf );
QueryPerformanceCounter( &li );
return ((uint64_t)li.QuadPart * 1000000LL) / (uint64_t)lpf.QuadPart;
}
static int IsKBHit()
{
return _kbhit();
}
static int ReadKBByte()
{
// This code is kind of tricky, but used to convert windows arrow keys
// to VT100 arrow keys.
static int is_escape_sequence = 0;
int r;
if( is_escape_sequence == 1 )
{
is_escape_sequence++;
return '[';
}
r = _getch();
if( is_escape_sequence )
{
is_escape_sequence = 0;
switch( r )
{
case 'H': return 'A'; // Up
case 'P': return 'B'; // Down
case 'K': return 'D'; // Left
case 'M': return 'C'; // Right
case 'G': return 'H'; // Home
case 'O': return 'F'; // End
default: return r; // Unknown code.
}
}
else
{
switch( r )
{
case 13: return 10; //cr->lf
case 224: is_escape_sequence = 1; return 27; // Escape arrow keys
default: return r;
}
}
}
#else
#include <sys/ioctl.h>
#include <termios.h>
#include <unistd.h>
#include <signal.h>
#include <sys/time.h>
static void CtrlC()
{
DumpState( core, ram_image);
exit( 0 );
}
// Override keyboard, so we can capture all keyboard input for the VM.
static void CaptureKeyboardInput()
{
// Hook exit, because we want to re-enable keyboard.
atexit(ResetKeyboardInput);
signal(SIGINT, CtrlC);
struct termios term;
tcgetattr(0, &term);
term.c_lflag &= ~(ICANON | ECHO); // Disable echo as well
tcsetattr(0, TCSANOW, &term);
}
static void ResetKeyboardInput()
{
// Re-enable echo, etc. on keyboard.
struct termios term;
tcgetattr(0, &term);
term.c_lflag |= ICANON | ECHO;
tcsetattr(0, TCSANOW, &term);
}
static void MiniSleep()
{
usleep(500);
}
static uint64_t GetTimeMicroseconds()
{
struct timeval tv;
gettimeofday( &tv, 0 );
return tv.tv_usec + ((uint64_t)(tv.tv_sec)) * 1000000LL;
}
static int is_eofd;
static int ReadKBByte()
{
if( is_eofd ) return 0xffffffff;
char rxchar = 0;
int rread = read(fileno(stdin), (char*)&rxchar, 1);
if( rread > 0 ) // Tricky: getchar can't be used with arrow keys.
return rxchar;
else
return -1;
}
static int IsKBHit()
{
if( is_eofd ) return -1;
int byteswaiting;
ioctl(0, FIONREAD, &byteswaiting);
if( !byteswaiting && write( fileno(stdin), 0, 0 ) != 0 ) { is_eofd = 1; return -1; } // Is end-of-file for
return !!byteswaiting;
}
#endif
//////////////////////////////////////////////////////////////////////////
// Rest of functions functionality
//////////////////////////////////////////////////////////////////////////
static uint32_t HandleException( uint32_t ir, uint32_t code )
{
// Weird opcode emitted by duktape on exit.
if( code == 3 )
{
// Could handle other opcodes here.
}
return code;
}
static uint32_t HandleControlStore( uint32_t addy, uint32_t val )
{
if( addy == 0x10000000 ) //UART 8250 / 16550 Data Buffer
{
printf( "%c", val );
fflush( stdout );
}
else if( addy == 0x11004004 ) //CLNT
core->timermatchh = val;
else if( addy == 0x11004000 ) //CLNT
core->timermatchl = val;
else if( addy == 0x11100000 ) //SYSCON (reboot, poweroff, etc.)
{
core->pc = core->pc + 4;
return val; // NOTE: PC will be PC of Syscon.
}
return 0;
}
static uint32_t HandleControlLoad( uint32_t addy )
{
// Emulating a 8250 / 16550 UART
if( addy == 0x10000005 )
return 0x60 | IsKBHit();
else if( addy == 0x10000000 && IsKBHit() )
return ReadKBByte();
else if( addy == 0x1100bffc ) // https://chromitem-soc.readthedocs.io/en/latest/clint.html
return core->timerh;
else if( addy == 0x1100bff8 )
return core->timerl;
return 0;
}
static void HandleOtherCSRWrite( uint8_t * image, uint16_t csrno, uint32_t value )
{
if( csrno == 0x136 )
{
printf( "%d", value ); fflush( stdout );
}
if( csrno == 0x137 )
{
printf( "%08x", value ); fflush( stdout );
}
else if( csrno == 0x138 )
{
//Print "string"
uint32_t ptrstart = value - MINIRV32_RAM_IMAGE_OFFSET;
uint32_t ptrend = ptrstart;
if( ptrstart >= ram_amt )
printf( "DEBUG PASSED INVALID PTR (%08x)\n", value );
while( ptrend < ram_amt )
{
if( image[ptrend] == 0 ) break;
ptrend++;
}
if( ptrend != ptrstart )
fwrite( image + ptrstart, ptrend - ptrstart, 1, stdout );
}
else if( csrno == 0x139 )
{
putchar( value ); fflush( stdout );
}
}
static int32_t HandleOtherCSRRead( uint8_t * image, uint16_t csrno )
{
if( csrno == 0x140 )
{
if( !IsKBHit() ) return -1;
return ReadKBByte();
}
return 0;
}
static int64_t SimpleReadNumberInt( const char * number, int64_t defaultNumber )
{
if( !number || !number[0] ) return defaultNumber;
int radix = 10;
if( number[0] == '0' )
{
char nc = number[1];
number+=2;
if( nc == 0 ) return 0;
else if( nc == 'x' ) radix = 16;
else if( nc == 'b' ) radix = 2;
else { number--; radix = 8; }
}
char * endptr;
uint64_t ret = strtoll( number, &endptr, radix );
if( endptr == number )
{
return defaultNumber;
}
else
{
return ret;
}
}
static void DumpState( struct MiniRV32IMAState * core, uint8_t * ram_image )
{
uint32_t pc = core->pc;
uint32_t pc_offset = pc - MINIRV32_RAM_IMAGE_OFFSET;
uint32_t ir = 0;
printf( "PC: %08x ", pc );
if( pc_offset >= 0 && pc_offset < ram_amt - 3 )
{
ir = *((uint32_t*)(&((uint8_t*)ram_image)[pc_offset]));
printf( "[0x%08x] ", ir );
}
else
printf( "[xxxxxxxxxx] " );
uint32_t * regs = core->regs;
printf( "Z:%08x ra:%08x sp:%08x gp:%08x tp:%08x t0:%08x t1:%08x t2:%08x s0:%08x s1:%08x a0:%08x a1:%08x a2:%08x a3:%08x a4:%08x a5:%08x ",
regs[0], regs[1], regs[2], regs[3], regs[4], regs[5], regs[6], regs[7],
regs[8], regs[9], regs[10], regs[11], regs[12], regs[13], regs[14], regs[15] );
printf( "a6:%08x a7:%08x s2:%08x s3:%08x s4:%08x s5:%08x s6:%08x s7:%08x s8:%08x s9:%08x s10:%08x s11:%08x t3:%08x t4:%08x t5:%08x t6:%08x\n",
regs[16], regs[17], regs[18], regs[19], regs[20], regs[21], regs[22], regs[23],
regs[24], regs[25], regs[26], regs[27], regs[28], regs[29], regs[30], regs[31] );
}

547
test_rv_vm/mini-rv32ima.h Normal file
View File

@ -0,0 +1,547 @@
// Copyright 2022 Charles Lohr, you may use this file or any portions herein under any of the BSD, MIT, or CC0 licenses.
#ifndef _MINI_RV32IMAH_H
#define _MINI_RV32IMAH_H
/**
To use mini-rv32ima.h for the bare minimum, the following:
#define MINI_RV32_RAM_SIZE ram_amt
#define MINIRV32_IMPLEMENTATION
#include "mini-rv32ima.h"
Though, that's not _that_ interesting. You probably want I/O!
Notes:
* There is a dedicated CLNT at 0x10000000.
* There is free MMIO from there to 0x12000000.
* You can put things like a UART, or whatever there.
* Feel free to override any of the functionality with macros.
*/
#ifndef MINIRV32WARN
#define MINIRV32WARN( x... );
#endif
#ifndef MINIRV32_DECORATE
#define MINIRV32_DECORATE static
#endif
#ifndef MINIRV32_RAM_IMAGE_OFFSET
#define MINIRV32_RAM_IMAGE_OFFSET 0x80000000
#endif
#ifndef MINIRV32_MMIO_RANGE
#define MINIRV32_MMIO_RANGE(n) (0x10000000 <= (n) && (n) < 0x12000000)
#endif
#ifndef MINIRV32_POSTEXEC
#define MINIRV32_POSTEXEC(...);
#endif
#ifndef MINIRV32_HANDLE_MEM_STORE_CONTROL
#define MINIRV32_HANDLE_MEM_STORE_CONTROL(...);
#endif
#ifndef MINIRV32_HANDLE_MEM_LOAD_CONTROL
#define MINIRV32_HANDLE_MEM_LOAD_CONTROL(...);
#endif
#ifndef MINIRV32_OTHERCSR_WRITE
#define MINIRV32_OTHERCSR_WRITE(...);
#endif
#ifndef MINIRV32_OTHERCSR_READ
#define MINIRV32_OTHERCSR_READ(...);
#endif
#ifndef MINIRV32_CUSTOM_MEMORY_BUS
#define MINIRV32_STORE4( ofs, val ) *(uint32_t*)(image + ofs) = val
#define MINIRV32_STORE2( ofs, val ) *(uint16_t*)(image + ofs) = val
#define MINIRV32_STORE1( ofs, val ) *(uint8_t*)(image + ofs) = val
#define MINIRV32_LOAD4( ofs ) *(uint32_t*)(image + ofs)
#define MINIRV32_LOAD2( ofs ) *(uint16_t*)(image + ofs)
#define MINIRV32_LOAD1( ofs ) *(uint8_t*)(image + ofs)
#define MINIRV32_LOAD2_SIGNED( ofs ) *(int16_t*)(image + ofs)
#define MINIRV32_LOAD1_SIGNED( ofs ) *(int8_t*)(image + ofs)
#endif
// As a note: We quouple-ify these, because in HLSL, we will be operating with
// uint4's. We are going to uint4 data to/from system RAM.
//
// We're going to try to keep the full processor state to 12 x uint4.
struct MiniRV32IMAState
{
uint32_t regs[32];
uint32_t pc;
uint32_t mstatus;
uint32_t cyclel;
uint32_t cycleh;
uint32_t timerl;
uint32_t timerh;
uint32_t timermatchl;
uint32_t timermatchh;
uint32_t mscratch;
uint32_t mtvec;
uint32_t mie;
uint32_t mip;
uint32_t mepc;
uint32_t mtval;
uint32_t mcause;
// Note: only a few bits are used. (Machine = 3, User = 0)
// Bits 0..1 = privilege.
// Bit 2 = WFI (Wait for interrupt)
// Bit 3+ = Load/Store reservation LSBs.
uint32_t extraflags;
};
#ifndef MINIRV32_STEPPROTO
MINIRV32_DECORATE int32_t MiniRV32IMAStep( struct MiniRV32IMAState * state, uint8_t * image, uint32_t vProcAddress, uint32_t elapsedUs, int count );
#endif
#ifdef MINIRV32_IMPLEMENTATION
#ifndef MINIRV32_CUSTOM_INTERNALS
#define CSR( x ) state->x
#define SETCSR( x, val ) { state->x = val; }
#define REG( x ) state->regs[x]
#define REGSET( x, val ) { state->regs[x] = val; }
#endif
#ifndef MINIRV32_STEPPROTO
MINIRV32_DECORATE int32_t MiniRV32IMAStep( struct MiniRV32IMAState * state, uint8_t * image, uint32_t vProcAddress, uint32_t elapsedUs, int count )
#else
MINIRV32_STEPPROTO
#endif
{
uint32_t new_timer = CSR( timerl ) + elapsedUs;
if( new_timer < CSR( timerl ) ) CSR( timerh )++;
CSR( timerl ) = new_timer;
// Handle Timer interrupt.
if( ( CSR( timerh ) > CSR( timermatchh ) || ( CSR( timerh ) == CSR( timermatchh ) && CSR( timerl ) > CSR( timermatchl ) ) ) && ( CSR( timermatchh ) || CSR( timermatchl ) ) )
{
CSR( extraflags ) &= ~4; // Clear WFI
CSR( mip ) |= 1<<7; //MTIP of MIP // https://stackoverflow.com/a/61916199/2926815 Fire interrupt.
}
else
CSR( mip ) &= ~(1<<7);
// If WFI, don't run processor.
if( CSR( extraflags ) & 4 )
return 1;
uint32_t trap = 0;
uint32_t rval = 0;
uint32_t pc = CSR( pc );
uint32_t cycle = CSR( cyclel );
if( ( CSR( mip ) & (1<<7) ) && ( CSR( mie ) & (1<<7) /*mtie*/ ) && ( CSR( mstatus ) & 0x8 /*mie*/) )
{
// Timer interrupt.
trap = 0x80000007;
pc -= 4;
}
else // No timer interrupt? Execute a bunch of instructions.
for( int icount = 0; icount < count; icount++ )
{
uint32_t ir = 0;
rval = 0;
cycle++;
uint32_t ofs_pc = pc - MINIRV32_RAM_IMAGE_OFFSET;
if( ofs_pc >= MINI_RV32_RAM_SIZE )
{
trap = 1 + 1; // Handle access violation on instruction read.
break;
}
else if( ofs_pc & 3 )
{
trap = 1 + 0; //Handle PC-misaligned access
break;
}
else
{
ir = MINIRV32_LOAD4( ofs_pc );
uint32_t rdid = (ir >> 7) & 0x1f;
switch( ir & 0x7f )
{
case 0x37: // LUI (0b0110111)
rval = ( ir & 0xfffff000 );
break;
case 0x17: // AUIPC (0b0010111)
rval = pc + ( ir & 0xfffff000 );
break;
case 0x6F: // JAL (0b1101111)
{
int32_t reladdy = ((ir & 0x80000000)>>11) | ((ir & 0x7fe00000)>>20) | ((ir & 0x00100000)>>9) | ((ir&0x000ff000));
if( reladdy & 0x00100000 ) reladdy |= 0xffe00000; // Sign extension.
rval = pc + 4;
pc = pc + reladdy - 4;
break;
}
case 0x67: // JALR (0b1100111)
{
uint32_t imm = ir >> 20;
int32_t imm_se = imm | (( imm & 0x800 )?0xfffff000:0);
rval = pc + 4;
pc = ( (REG( (ir >> 15) & 0x1f ) + imm_se) & ~1) - 4;
break;
}
case 0x63: // Branch (0b1100011)
{
uint32_t immm4 = ((ir & 0xf00)>>7) | ((ir & 0x7e000000)>>20) | ((ir & 0x80) << 4) | ((ir >> 31)<<12);
if( immm4 & 0x1000 ) immm4 |= 0xffffe000;
int32_t rs1 = REG((ir >> 15) & 0x1f);
int32_t rs2 = REG((ir >> 20) & 0x1f);
immm4 = pc + immm4 - 4;
rdid = 0;
switch( ( ir >> 12 ) & 0x7 )
{
// BEQ, BNE, BLT, BGE, BLTU, BGEU
case 0: if( rs1 == rs2 ) pc = immm4; break;
case 1: if( rs1 != rs2 ) pc = immm4; break;
case 4: if( rs1 < rs2 ) pc = immm4; break;
case 5: if( rs1 >= rs2 ) pc = immm4; break; //BGE
case 6: if( (uint32_t)rs1 < (uint32_t)rs2 ) pc = immm4; break; //BLTU
case 7: if( (uint32_t)rs1 >= (uint32_t)rs2 ) pc = immm4; break; //BGEU
default: trap = (2+1);
}
break;
}
case 0x03: // Load (0b0000011)
{
uint32_t rs1 = REG((ir >> 15) & 0x1f);
uint32_t imm = ir >> 20;
int32_t imm_se = imm | (( imm & 0x800 )?0xfffff000:0);
uint32_t rsval = rs1 + imm_se;
rsval -= MINIRV32_RAM_IMAGE_OFFSET;
if( rsval >= MINI_RV32_RAM_SIZE-3 )
{
rsval += MINIRV32_RAM_IMAGE_OFFSET;
if( MINIRV32_MMIO_RANGE( rsval ) ) // UART, CLNT
{
MINIRV32_HANDLE_MEM_LOAD_CONTROL( rsval, rval );
}
else
{
trap = (5+1);
rval = rsval;
}
}
else
{
switch( ( ir >> 12 ) & 0x7 )
{
//LB, LH, LW, LBU, LHU
case 0: rval = MINIRV32_LOAD1_SIGNED( rsval ); break;
case 1: rval = MINIRV32_LOAD2_SIGNED( rsval ); break;
case 2: rval = MINIRV32_LOAD4( rsval ); break;
case 4: rval = MINIRV32_LOAD1( rsval ); break;
case 5: rval = MINIRV32_LOAD2( rsval ); break;
default: trap = (2+1);
}
}
break;
}
case 0x23: // Store 0b0100011
{
uint32_t rs1 = REG((ir >> 15) & 0x1f);
uint32_t rs2 = REG((ir >> 20) & 0x1f);
uint32_t addy = ( ( ir >> 7 ) & 0x1f ) | ( ( ir & 0xfe000000 ) >> 20 );
if( addy & 0x800 ) addy |= 0xfffff000;
addy += rs1 - MINIRV32_RAM_IMAGE_OFFSET;
rdid = 0;
if( addy >= MINI_RV32_RAM_SIZE-3 )
{
addy += MINIRV32_RAM_IMAGE_OFFSET;
if( MINIRV32_MMIO_RANGE( addy ) )
{
MINIRV32_HANDLE_MEM_STORE_CONTROL( addy, rs2 );
}
else
{
trap = (7+1); // Store access fault.
rval = addy;
}
}
else
{
switch( ( ir >> 12 ) & 0x7 )
{
//SB, SH, SW
case 0: MINIRV32_STORE1( addy, rs2 ); break;
case 1: MINIRV32_STORE2( addy, rs2 ); break;
case 2: MINIRV32_STORE4( addy, rs2 ); break;
default: trap = (2+1);
}
}
break;
}
case 0x13: // Op-immediate 0b0010011
case 0x33: // Op 0b0110011
{
uint32_t imm = ir >> 20;
imm = imm | (( imm & 0x800 )?0xfffff000:0);
uint32_t rs1 = REG((ir >> 15) & 0x1f);
uint32_t is_reg = !!( ir & 0x20 );
uint32_t rs2 = is_reg ? REG(imm & 0x1f) : imm;
if( is_reg && ( ir & 0x02000000 ) )
{
switch( (ir>>12)&7 ) //0x02000000 = RV32M
{
case 0: rval = rs1 * rs2; break; // MUL
#ifndef CUSTOM_MULH // If compiling on a system that doesn't natively, or via libgcc support 64-bit math.
case 1: rval = ((int64_t)((int32_t)rs1) * (int64_t)((int32_t)rs2)) >> 32; break; // MULH
case 2: rval = ((int64_t)((int32_t)rs1) * (uint64_t)rs2) >> 32; break; // MULHSU
case 3: rval = ((uint64_t)rs1 * (uint64_t)rs2) >> 32; break; // MULHU
#else
CUSTOM_MULH
#endif
case 4: if( rs2 == 0 ) rval = -1; else rval = ((int32_t)rs1 == INT32_MIN && (int32_t)rs2 == -1) ? rs1 : ((int32_t)rs1 / (int32_t)rs2); break; // DIV
case 5: if( rs2 == 0 ) rval = 0xffffffff; else rval = rs1 / rs2; break; // DIVU
case 6: if( rs2 == 0 ) rval = rs1; else rval = ((int32_t)rs1 == INT32_MIN && (int32_t)rs2 == -1) ? 0 : ((uint32_t)((int32_t)rs1 % (int32_t)rs2)); break; // REM
case 7: if( rs2 == 0 ) rval = rs1; else rval = rs1 % rs2; break; // REMU
}
}
else
{
switch( (ir>>12)&7 ) // These could be either op-immediate or op commands. Be careful.
{
case 0: rval = (is_reg && (ir & 0x40000000) ) ? ( rs1 - rs2 ) : ( rs1 + rs2 ); break;
case 1: rval = rs1 << (rs2 & 0x1F); break;
case 2: rval = (int32_t)rs1 < (int32_t)rs2; break;
case 3: rval = rs1 < rs2; break;
case 4: rval = rs1 ^ rs2; break;
case 5: rval = (ir & 0x40000000 ) ? ( ((int32_t)rs1) >> (rs2 & 0x1F) ) : ( rs1 >> (rs2 & 0x1F) ); break;
case 6: rval = rs1 | rs2; break;
case 7: rval = rs1 & rs2; break;
}
}
break;
}
case 0x0f: // 0b0001111
rdid = 0; // fencetype = (ir >> 12) & 0b111; We ignore fences in this impl.
break;
case 0x73: // Zifencei+Zicsr (0b1110011)
{
uint32_t csrno = ir >> 20;
uint32_t microop = ( ir >> 12 ) & 0x7;
if( (microop & 3) ) // It's a Zicsr function.
{
int rs1imm = (ir >> 15) & 0x1f;
uint32_t rs1 = REG(rs1imm);
uint32_t writeval = rs1;
// https://raw.githubusercontent.com/riscv/virtual-memory/main/specs/663-Svpbmt.pdf
// Generally, support for Zicsr
switch( csrno )
{
case 0x340: rval = CSR( mscratch ); break;
case 0x305: rval = CSR( mtvec ); break;
case 0x304: rval = CSR( mie ); break;
case 0xC00: rval = cycle; break;
case 0x344: rval = CSR( mip ); break;
case 0x341: rval = CSR( mepc ); break;
case 0x300: rval = CSR( mstatus ); break; //mstatus
case 0x342: rval = CSR( mcause ); break;
case 0x343: rval = CSR( mtval ); break;
case 0xf11: rval = 0xff0ff0ff; break; //mvendorid
case 0x301: rval = 0x40401101; break; //misa (XLEN=32, IMA+X)
//case 0x3B0: rval = 0; break; //pmpaddr0
//case 0x3a0: rval = 0; break; //pmpcfg0
//case 0xf12: rval = 0x00000000; break; //marchid
//case 0xf13: rval = 0x00000000; break; //mimpid
//case 0xf14: rval = 0x00000000; break; //mhartid
default:
MINIRV32_OTHERCSR_READ( csrno, rval );
break;
}
switch( microop )
{
case 1: writeval = rs1; break; //CSRRW
case 2: writeval = rval | rs1; break; //CSRRS
case 3: writeval = rval & ~rs1; break; //CSRRC
case 5: writeval = rs1imm; break; //CSRRWI
case 6: writeval = rval | rs1imm; break; //CSRRSI
case 7: writeval = rval & ~rs1imm; break; //CSRRCI
}
switch( csrno )
{
case 0x340: SETCSR( mscratch, writeval ); break;
case 0x305: SETCSR( mtvec, writeval ); break;
case 0x304: SETCSR( mie, writeval ); break;
case 0x344: SETCSR( mip, writeval ); break;
case 0x341: SETCSR( mepc, writeval ); break;
case 0x300: SETCSR( mstatus, writeval ); break; //mstatus
case 0x342: SETCSR( mcause, writeval ); break;
case 0x343: SETCSR( mtval, writeval ); break;
//case 0x3a0: break; //pmpcfg0
//case 0x3B0: break; //pmpaddr0
//case 0xf11: break; //mvendorid
//case 0xf12: break; //marchid
//case 0xf13: break; //mimpid
//case 0xf14: break; //mhartid
//case 0x301: break; //misa
default:
MINIRV32_OTHERCSR_WRITE( csrno, writeval );
break;
}
}
else if( microop == 0x0 ) // "SYSTEM" 0b000
{
rdid = 0;
if( ( ( csrno & 0xff ) == 0x02 ) ) // MRET
{
//https://raw.githubusercontent.com/riscv/virtual-memory/main/specs/663-Svpbmt.pdf
//Table 7.6. MRET then in mstatus/mstatush sets MPV=0, MPP=0, MIE=MPIE, and MPIE=1. La
// Should also update mstatus to reflect correct mode.
uint32_t startmstatus = CSR( mstatus );
uint32_t startextraflags = CSR( extraflags );
SETCSR( mstatus , (( startmstatus & 0x80) >> 4) | ((startextraflags&3) << 11) | 0x80 );
SETCSR( extraflags, (startextraflags & ~3) | ((startmstatus >> 11) & 3) );
pc = CSR( mepc ) -4;
} else {
switch (csrno) {
case 0:
#ifndef ECALL_HANDLER
trap = ( CSR( extraflags ) & 3) ? (11+1) : (8+1); // ECALL; 8 = "Environment call from U-mode"; 11 = "Environment call from M-mode"
#else
ECALL_HANDLER(state);
trap = 0;
#endif
break;
case 1:
trap = (3+1); break; // EBREAK 3 = "Breakpoint"
case 0x105: //WFI (Wait for interrupts)
CSR( mstatus ) |= 8; //Enable interrupts
CSR( extraflags ) |= 4; //Infor environment we want to go to sleep.
SETCSR( pc, pc + 4 );
return 1;
default:
trap = (2+1); break; // Illegal opcode.
}
}
}
else
trap = (2+1); // Note micrrop 0b100 == undefined.
break;
}
case 0x2f: // RV32A (0b00101111)
{
uint32_t rs1 = REG((ir >> 15) & 0x1f);
uint32_t rs2 = REG((ir >> 20) & 0x1f);
uint32_t irmid = ( ir>>27 ) & 0x1f;
rs1 -= MINIRV32_RAM_IMAGE_OFFSET;
// We don't implement load/store from UART or CLNT with RV32A here.
if( rs1 >= MINI_RV32_RAM_SIZE-3 )
{
trap = (7+1); //Store/AMO access fault
rval = rs1 + MINIRV32_RAM_IMAGE_OFFSET;
}
else
{
rval = MINIRV32_LOAD4( rs1 );
// Referenced a little bit of https://github.com/franzflasch/riscv_em/blob/master/src/core/core.c
uint32_t dowrite = 1;
switch( irmid )
{
case 2: //LR.W (0b00010)
dowrite = 0;
CSR( extraflags ) = (CSR( extraflags ) & 0x07) | (rs1<<3);
break;
case 3: //SC.W (0b00011) (Make sure we have a slot, and, it's valid)
rval = ( CSR( extraflags ) >> 3 != ( rs1 & 0x1fffffff ) ); // Validate that our reservation slot is OK.
dowrite = !rval; // Only write if slot is valid.
break;
case 1: break; //AMOSWAP.W (0b00001)
case 0: rs2 += rval; break; //AMOADD.W (0b00000)
case 4: rs2 ^= rval; break; //AMOXOR.W (0b00100)
case 12: rs2 &= rval; break; //AMOAND.W (0b01100)
case 8: rs2 |= rval; break; //AMOOR.W (0b01000)
case 16: rs2 = ((int32_t)rs2<(int32_t)rval)?rs2:rval; break; //AMOMIN.W (0b10000)
case 20: rs2 = ((int32_t)rs2>(int32_t)rval)?rs2:rval; break; //AMOMAX.W (0b10100)
case 24: rs2 = (rs2<rval)?rs2:rval; break; //AMOMINU.W (0b11000)
case 28: rs2 = (rs2>rval)?rs2:rval; break; //AMOMAXU.W (0b11100)
default: trap = (2+1); dowrite = 0; break; //Not supported.
}
if( dowrite ) MINIRV32_STORE4( rs1, rs2 );
}
break;
}
default: trap = (2+1); // Fault: Invalid opcode.
}
// If there was a trap, do NOT allow register writeback.
if( trap ) {
SETCSR( pc, pc );
MINIRV32_POSTEXEC( pc, ir, trap );
break;
}
if( rdid )
{
REGSET( rdid, rval ); // Write back register.
}
}
MINIRV32_POSTEXEC( pc, ir, trap );
pc += 4;
}
// Handle traps and interrupts.
if( trap )
{
if( trap & 0x80000000 ) // If prefixed with 1 in MSB, it's an interrupt, not a trap.
{
SETCSR( mcause, trap );
SETCSR( mtval, 0 );
pc += 4; // PC needs to point to where the PC will return to.
}
else
{
SETCSR( mcause, trap - 1 );
SETCSR( mtval, (trap > 5 && trap <= 8)? rval : pc );
}
SETCSR( mepc, pc ); //TRICKY: The kernel advances mepc automatically.
//CSR( mstatus ) & 8 = MIE, & 0x80 = MPIE
// On an interrupt, the system moves current MIE into MPIE
SETCSR( mstatus, (( CSR( mstatus ) & 0x08) << 4) | (( CSR( extraflags ) & 3 ) << 11) );
pc = (CSR( mtvec ) - 4);
// If trapping, always enter machine mode.
CSR( extraflags ) |= 3;
trap = 0;
pc += 4;
}
if( CSR( cyclel ) > cycle ) CSR( cycleh )++;
SETCSR( cyclel, cycle );
SETCSR( pc, pc );
return 0;
}
#endif
#endif

139
test_rv_vm/ripes-vm.c Normal file
View File

@ -0,0 +1,139 @@
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
struct MiniRV32IMAState;
void ecall_handler(struct MiniRV32IMAState *state);
#define ECALL_HANDLER(state) ecall_handler(state)
#define MINIRV32WARN( x... ) printf( x );
#define MINIRV32_DECORATE static
#define MINI_RV32_RAM_SIZE (32 * 1024 * 1024)
#define MINIRV32_IMPLEMENTATION
#define MINIRV32_RAM_IMAGE_OFFSET 0x0
#include "mini-rv32ima.h"
#define SYSCALL(num) (1025 + num)
void ecall_handler(struct MiniRV32IMAState *state) {
uint32_t a0 = REG(10);
uint32_t a1 = REG(11);
switch (state->regs[17]) // x17 | a7
{
case 1:
// PrintInt
printf("%d", a0);
break;
case 4:
// PrintString
printf("%s", a0);
break;
case 10:
fprintf(stderr, "\nexit: %d\n", a0);
exit(a0);
case 93:
fprintf(stderr, "\nmain return code: %d\n", a0);
exit(a0);
case SYSCALL(0):
// getchar();
REGSET(10, getchar());
case SYSCALL(1):
// putchar
putchar(a0);
case SYSCALL(4):
// input int
scanf("%d", &a0);
REGSET(10, a0);
break;
case SYSCALL(5):
// input string
scanf("%s", a0);
REGSET(10, a0);
break;
default:
MINIRV32WARN("Unhandled ECALL: %d\n", state->regs[17]);
exit(1);
break;
}
}
int main(int argc, char *argv[]) {
// gcc -DDEFAULT_FILE='\"flat.bin\"' .\ripes-vm.c -o rv32-vm.exe
struct MiniRV32IMAState state;
uint8_t *image = (uint8_t *)malloc(MINI_RV32_RAM_SIZE);
// 初始化状态
memset(&state, 0, sizeof(state));
state.pc = 0; // 程序计数器从0开始
state.mstatus = 0x80000000; // 设置机器模式
state.mtvec = 0x1000;
state.mie = 0x7; // 启用所有中断
// 初始化内存
memset(image, 0, MINI_RV32_RAM_SIZE);
#ifndef DEFAULT_FILE
#define DEFAULT_FILE "../ccompiler/backend/test_rv.bin"
#endif
const char* filename = DEFAULT_FILE;
// 加载 flatbin 文件
if (argc == 2) {
filename = argv[1];
}
FILE *file = fopen(filename, "rb");
if (!file) {
fprintf(stderr, "Usage: %s <flatbin_file>\n", argv[0]);
printf("Failed to open file %s\n", filename);
return 1;
}
fseek(file, 0, SEEK_END);
long flen = ftell(file);
fseek(file, 0, SEEK_SET);
if (flen > MINI_RV32_RAM_SIZE) {
fprintf(stderr, "Flatbin file is too large\n");
fclose(file);
return 1;
}
fread(image, flen, 1, file);
fclose(file);
// 运行模拟器
while (1) {
int32_t ret = MiniRV32IMAStep(&state, image, MINIRV32_RAM_IMAGE_OFFSET, 0, 1);
if (ret != 0) {
printf("Exception or interrupt occurred at PC: %d\n", state.pc);
return ret;
}
}
free(image);
return 0;
}
// static void DumpState( struct MiniRV32IMAState * core, uint8_t * ram_image )
// {
// uint32_t pc = core->pc;
// uint32_t pc_offset = pc - MINIRV32_RAM_IMAGE_OFFSET;
// uint32_t ir = 0;
// printf( "PC: %08x ", pc );
// if( pc_offset >= 0 && pc_offset < ram_amt - 3 )
// {
// ir = *((uint32_t*)(&((uint8_t*)ram_image)[pc_offset]));
// printf( "[0x%08x] ", ir );
// }
// else
// printf( "[xxxxxxxxxx] " );
// uint32_t * regs = core->regs;
// printf( "Z:%08x ra:%08x sp:%08x gp:%08x tp:%08x t0:%08x t1:%08x t2:%08x s0:%08x s1:%08x a0:%08x a1:%08x a2:%08x a3:%08x a4:%08x a5:%08x ",
// regs[0], regs[1], regs[2], regs[3], regs[4], regs[5], regs[6], regs[7],
// regs[8], regs[9], regs[10], regs[11], regs[12], regs[13], regs[14], regs[15] );
// printf( "a6:%08x a7:%08x s2:%08x s3:%08x s4:%08x s5:%08x s6:%08x s7:%08x s8:%08x s9:%08x s10:%08x s11:%08x t3:%08x t4:%08x t5:%08x t6:%08x\n",
// regs[16], regs[17], regs[18], regs[19], regs[20], regs[21], regs[22], regs[23],
// regs[24], regs[25], regs[26], regs[27], regs[28], regs[29], regs[30], regs[31] );
// }