diff --git a/ccompiler/backend/riscv32/Makefile b/ccompiler/backend/riscv32/Makefile new file mode 100644 index 0000000..4d3107f --- /dev/null +++ b/ccompiler/backend/riscv32/Makefile @@ -0,0 +1,13 @@ +all: ccompiler + +run: ccompiler + ./ccompiler test.c flat.bin + +ccompiler: frontend + gcc -g rv32ima_codegen.c ../../middleend/ir.c -L../../frontend -lfrontend -o ccompiler + +frontend: + make -C ../../frontend + +clean: + rm -f ccompiler flat.bin \ No newline at end of file diff --git a/ccompiler/backend/riscv32/rv32gen.h b/ccompiler/backend/riscv32/rv32gen.h new file mode 100644 index 0000000..9dd1dee --- /dev/null +++ b/ccompiler/backend/riscv32/rv32gen.h @@ -0,0 +1,338 @@ +#ifndef __RV32I_GEN_H__ +#define __RV32I_GEN_H__ + +/** +31 25 24 20 19 15 14 12 11 7 6 0 +imm[31:12] rd 0110111 U lui +imm[31:12] rd 0010111 U auipc +imm[20|10:1|11|19:12] rd 1101111 J jal +imm[11:0] rs1 000 rd 1100111 I jalr +imm[12|10:5] rs2 rs1 000 imm[4:1|11] 1100011 B beq +imm[12|10:5] rs2 rs1 001 imm[4:1|11] 1100011 B bne +imm[12|10:5] rs2 rs1 100 imm[4:1|11] 1100011 B blt +imm[12|10:5] rs2 rs1 101 imm[4:1|11] 1100011 B bge +imm[12|10:5] rs2 rs1 110 imm[4:1|11] 1100011 B bltu +imm[12|10:5] rs2 rs1 111 imm[4:1|11] 1100011 B bgeu +imm[11:0] rs1 000 rd 0000011 I lb +imm[11:0] rs1 001 rd 0000011 I lh +imm[11:0] rs1 010 rd 0000011 I lw +imm[11:0] rs1 100 rd 0000011 I lbu +imm[11:0] rs1 101 rd 0000011 I lhu +imm[11:5] rs2 rs1 000 imm[4:0] 0100011 S sb +imm[11:5] rs2 rs1 001 imm[4:0] 0100011 S sh +imm[11:5] rs2 rs1 010 imm[4:0] 0100011 S sw +imm[11:0] rs1 000 rd 0010011 I addi +imm[11:0] rs1 010 rd 0010011 I slti +imm[11:0] rs1 011 rd 0010011 I sltiu +imm[11:0] rs1 100 rd 0010011 I xori +imm[11:0] rs1 110 rd 0010011 I ori +imm[11:0] rs1 111 rd 0010011 I andi +0000000 shamt rs1 001 rd 0010011 I slli +0000000 shamt rs1 101 rd 0010011 I srli +0100000 shamt rs1 101 rd 0010011 I srai +0000000 rs2 rs1 000 rd 0110011 R add +0100000 rs2 rs1 000 rd 0110011 R sub +0000000 rs2 rs1 001 rd 0110011 R sll +0000000 rs2 rs1 010 rd 0110011 R slt +0000000 rs2 rs1 011 rd 0110011 R sltu +0000000 rs2 rs1 100 rd 0110011 R xor +0000000 rs2 rs1 101 rd 0110011 R srl +0100000 rs2 rs1 101 rd 0110011 R sra +0000000 rs2 rs1 110 rd 0110011 R or +0000000 rs2 rs1 111 rd 0110011 R and +0000 pred succ 00000 000 00000 0001111 I fence +0000 0000 0000 00000 001 00000 0001111 I fence.i +000000000000 00000 00 00000 1110011 I ecall +000000000000 00000 000 00000 1110011 I ebreak +csr rs1 001 rd 1110011 I csrrw +csr rs1 010 rd 1110011 I csrrs +csr rs1 011 rd 1110011 I csrrc +csr zimm 101 rd 1110011 I csrrwi +csr zimm 110 rd 1110011 I cssrrsi +csr zimm 111 rd 1110011 I csrrci + */ +#include + +// 寄存器枚举定义 +typedef enum { + REG_X0, REG_X1, REG_X2, REG_X3, REG_X4, REG_X5, REG_X6, REG_X7, + REG_X8, REG_X9, REG_X10, REG_X11, REG_X12, REG_X13, REG_X14, REG_X15, + REG_X16, REG_X17, REG_X18, REG_X19, REG_X20, REG_X21, REG_X22, REG_X23, + REG_X24, REG_X25, REG_X26, REG_X27, REG_X28, REG_X29, REG_X30, REG_X31, + REG_ZERO = REG_X0, REG_RA = REG_X1, REG_SP = REG_X2, REG_GP = REG_X3, + REG_TP = REG_X4, REG_T0 = REG_X5, REG_T1 = REG_X6, REG_T2 = REG_X7, + REG_S0 = REG_X8, REG_S1 = REG_X9, REG_A0 = REG_X10, REG_A1 = REG_X11, + REG_A2 = REG_X12, REG_A3 = REG_X13, REG_A4 = REG_X14, REG_A5 = REG_X15, + REG_A6 = REG_X16, REG_A7 = REG_X17, REG_S2 = REG_X18, REG_S3 = REG_X19, + REG_S4 = REG_X20, REG_S5 = REG_X21, REG_S6 = REG_X22, REG_S7 = REG_X23, + REG_S8 = REG_X24, REG_S9 = REG_X25, REG_S10 = REG_X26, REG_S11 = REG_X27, + REG_T3 = REG_X28, REG_T4 = REG_X29, REG_T5 = REG_X30, REG_T6 = REG_X31, +} RV32Reg; + +/******************** 立即数处理宏 ********************/ +#define IMM_12BITS(imm) ((imm) & 0xFFF) +#define IMM_20BITS(imm) ((imm) & 0xFFFFF) +#define SHAMT_VAL(imm) ((imm) & 0x1F) +#define CSR_VAL(csr) ((csr) & 0xFFF) + +// B型立即数编码([12|10:5|4:1|11]) +#define ENCODE_B_IMM(imm) ( \ + (((imm) >> 12) & 0x1) << 31 | /* imm[12:12] -> instr[31:31] */ \ + (((imm) >> 5) & 0x3F) << 25 | /* imm[10:5] -> instr[30:25] */ \ + (((imm) >> 1) & 0xF) << 8 | /* imm[4:1] -> instr[11:8] */ \ + (((imm) >> 11) & 0x1) << 7) /* imm[11:11] -> instr[7:7] */ + +// J型立即数编码([20|10:1|11|19:12])W +#define ENCODE_J_IMM(imm) ( \ + (((imm) >> 20) & 0x1) << 31 | /* imm[20:20] -> instr[31:31] */ \ + (((imm) >> 1) & 0x3FF)<< 21 | /* imm[10:1] -> instr[30:21] */ \ + (((imm) >> 11) & 0x1) << 20 | /* imm[11:11] -> instr[20:20] */ \ + (((imm) >> 12) & 0xFF) << 12) /* imm[19:12] -> instr[19:12] */ +/******************** 指令生成宏 ********************/ +// R型指令宏 +#define RV32_RTYPE(op, f3, f7, rd, rs1, rs2) (uint32_t)( \ + (0x33 | ((rd) << 7) | ((f3) << 12) | ((rs1) << 15) | \ + ((rs2) << 20) | ((f7) << 25)) ) + +// I型指令宏 +#define RV32_ITYPE(op, f3, rd, rs1, imm) (uint32_t)( \ + (op | ((rd) << 7) | ((f3) << 12) | ((rs1) << 15) | \ + (IMM_12BITS(imm) << 20)) ) + +// S型指令宏 +#define RV32_STYPE(op, f3, rs1, rs2, imm) (uint32_t)( \ + (op | ((IMM_12BITS(imm) & 0xFE0) << 20) | ((rs1) << 15) | \ + ((rs2) << 20) | ((f3) << 12) | ((IMM_12BITS(imm) & 0x1F) << 7)) ) + +// B型指令宏 +#define RV32_BTYPE(op, f3, rs1, rs2, imm) (uint32_t)( \ + (op | (ENCODE_B_IMM(imm)) | ((rs1) << 15) | \ + ((rs2) << 20) | ((f3) << 12)) ) + +// U型指令宏 +#define RV32_UTYPE(op, rd, imm) (uint32_t)( \ + (op | ((rd) << 7) | (IMM_20BITS((imm) >> 12) << 12)) ) + +// J型指令宏 +#define RV32_JTYPE(op, rd, imm) (uint32_t)( \ + (op | ((rd) << 7) | ENCODE_J_IMM(imm)) ) + +/******************** U-type ********************/ +#define LUI(rd, imm) RV32_UTYPE(0x37, rd, imm) +#define AUIPC(rd, imm) RV32_UTYPE(0x17, rd, imm) + +/******************** J-type ********************/ +#define JAL(rd, imm) RV32_JTYPE(0x6F, rd, imm) + +/******************** I-type ********************/ +#define JALR(rd, rs1, imm) RV32_ITYPE(0x67, 0x0, rd, rs1, imm) + +// Load instructions +#define LB(rd, rs1, imm) RV32_ITYPE(0x03, 0x0, rd, rs1, imm) +#define LH(rd, rs1, imm) RV32_ITYPE(0x03, 0x1, rd, rs1, imm) +#define LW(rd, rs1, imm) RV32_ITYPE(0x03, 0x2, rd, rs1, imm) +#define LBU(rd, rs1, imm) RV32_ITYPE(0x03, 0x4, rd, rs1, imm) +#define LHU(rd, rs1, imm) RV32_ITYPE(0x03, 0x5, rd, rs1, imm) + +// Immediate arithmetic +#define ADDI(rd, rs1, imm) RV32_ITYPE(0x13, 0x0, rd, rs1, imm) +#define SLTI(rd, rs1, imm) RV32_ITYPE(0x13, 0x2, rd, rs1, imm) +#define SLTIU(rd, rs1, imm) RV32_ITYPE(0x13, 0x3, rd, rs1, imm) +#define XORI(rd, rs1, imm) RV32_ITYPE(0x13, 0x4, rd, rs1, imm) +#define ORI(rd, rs1, imm) RV32_ITYPE(0x13, 0x6, rd, rs1, imm) +#define ANDI(rd, rs1, imm) RV32_ITYPE(0x13, 0x7, rd, rs1, imm) + +// Shift instructions +#define SLLI(rd, rs1, shamt) RV32_ITYPE(0x13, 0x1, rd, rs1, (0x00000000 | (shamt << 20))) +#define SRLI(rd, rs1, shamt) RV32_ITYPE(0x13, 0x5, rd, rs1, (0x00000000 | (shamt << 20))) +#define SRAI(rd, rs1, shamt) RV32_ITYPE(0x13, 0x5, rd, rs1, (0x40000000 | (shamt << 20))) + +/******************** B-type ********************/ +#define BEQ(rs1, rs2, imm) RV32_BTYPE(0x63, 0x0, rs1, rs2, imm) +#define BNE(rs1, rs2, imm) RV32_BTYPE(0x63, 0x1, rs1, rs2, imm) +#define BLT(rs1, rs2, imm) RV32_BTYPE(0x63, 0x4, rs1, rs2, imm) +#define BGE(rs1, rs2, imm) RV32_BTYPE(0x63, 0x5, rs1, rs2, imm) +#define BLTU(rs1, rs2, imm) RV32_BTYPE(0x63, 0x6, rs1, rs2, imm) +#define BGEU(rs1, rs2, imm) RV32_BTYPE(0x63, 0x7, rs1, rs2, imm) + +/******************** S-type ********************/ +#define SB(rs2, rs1, imm) RV32_STYPE(0x23, 0x0, rs1, rs2, imm) +#define SH(rs2, rs1, imm) RV32_STYPE(0x23, 0x1, rs1, rs2, imm) +#define SW(rs2, rs1, imm) RV32_STYPE(0x23, 0x2, rs1, rs2, imm) + +/******************** R-type ********************/ +#define ADD(rd, rs1, rs2) RV32_RTYPE(0x33, 0x0, 0x00, rd, rs1, rs2) +#define SUB(rd, rs1, rs2) RV32_RTYPE(0x33, 0x0, 0x20, rd, rs1, rs2) +#define SLL(rd, rs1, rs2) RV32_RTYPE(0x33, 0x1, 0x00, rd, rs1, rs2) +#define SLT(rd, rs1, rs2) RV32_RTYPE(0x33, 0x2, 0x00, rd, rs1, rs2) +#define SLTU(rd, rs1, rs2) RV32_RTYPE(0x33, 0x3, 0x00, rd, rs1, rs2) +#define XOR(rd, rs1, rs2) RV32_RTYPE(0x33, 0x4, 0x00, rd, rs1, rs2) +#define SRL(rd, rs1, rs2) RV32_RTYPE(0x33, 0x5, 0x00, rd, rs1, rs2) +#define SRA(rd, rs1, rs2) RV32_RTYPE(0x33, 0x5, 0x20, rd, rs1, rs2) +#define OR(rd, rs1, rs2) RV32_RTYPE(0x33, 0x6, 0x00, rd, rs1, rs2) +#define AND(rd, rs1, rs2) RV32_RTYPE(0x33, 0x7, 0x00, rd, rs1, rs2) + +/******************** I-type (system) ********************/ +#define FENCE(pred, succ) (uint32_t)( 0x0F | ((pred) << 23) | ((succ) << 27) ) +#define FENCE_I() (uint32_t)( 0x100F ) +#define ECALL() (uint32_t)( 0x73 ) +#define EBREAK() (uint32_t)( 0x100073 ) + +// CSR instructions +#define CSRRW(rd, csr, rs) RV32_ITYPE(0x73, 0x1, rd, rs, CSR_VAL(csr)) +#define CSRRS(rd, csr, rs) RV32_ITYPE(0x73, 0x2, rd, rs, CSR_VAL(csr)) +#define CSRRC(rd, csr, rs) RV32_ITYPE(0x73, 0x3, rd, rs, CSR_VAL(csr)) +#define CSRRWI(rd, csr, zimm) RV32_ITYPE(0x73, 0x5, rd, 0, (CSR_VAL(csr) | ((zimm) << 15))) +#define CSRRSI(rd, csr, zimm) RV32_ITYPE(0x73, 0x6, rd, 0, (CSR_VAL(csr) | ((zimm) << 15))) +#define CSRRCI(rd, csr, zimm) RV32_ITYPE(0x73, 0x7, rd, 0, (CSR_VAL(csr) | ((zimm) << 15))) + + +/* M-Extention */ +#define MUL(rd, rs1, rs2) RV32_RTYPE(0x33, 0x0, 0x01, rd, rs1, rs2) +#define DIV(rd, rs1, rs2) RV32_RTYPE(0x33, 0x0, 0x05, rd, rs1, rs2) +#define REM(rd, rs1, rs2) RV32_RTYPE(0x33, 0x0, 0x07, rd, rs1, rs2) + +/******************** Pseudo-instructions ********************/ +// 伪指令 + +// nop (No operation) +#define NOP() ADDI(REG_X0, REG_X0, 0) // 无操作 + +// neg rd, rs (Two's complement of rs) +#define NEG(rd, rs) SUB(rd, REG_ZERO, rs) // 补码 + +// negw rd, rs (Two's complement word of rs) +#define NEGW(rd, rs) SUBW(rd, REG_ZERO, rs) // 字的补码 + +// snez rd, rs (Set if ≠ zero) +#define SNEZ(rd, rs) SLTU(rd, REG_X0, rs) // 非0则置位 + +// sltz rd, rs (Set if < zero) +#define SLTZ(rd, rs) SLT(rd, rs, REG_X0) // 小于0则置位 + +// sgtz rd, rs (Set if > zero) +#define SG TZ(rd, rs) SLT(rd, REG_X0, rs) // 大于0则置位 + +// beqz rs, offset (Branch if = zero) +#define BEQZ(rs, offset) BEQ(rs, REG_X0, offset) // 为0则转移 + +// bnez rs, offset (Branch if ≠ zero) +#define BNEZ(rs, offset) BNE(rs, REG_X0, offset) // 非0则转移 + +// blez rs, offset (Branch if ≤ zero) +#define BLEZ(rs, offset) BGE(REG_X0, rs, offset) // 小于等于0则转移 + +// bgez rs, offset (Branch if ≥ zero) +#define BGEZ(rs, offset) BGE(rs, REG_X0, offset) // 大于等于0则转移 + +// bltz rs, offset (Branch if < zero) +#define BLTZ(rs, offset) BLT(rs, REG_X0, offset) // 小于0则转移 + +// bgtz rs, offset (Branch if > zero) +#define BGTZ(rs, offset) BLT(REG_X0, rs, offset) // 大于0则转移 + +// j offset (Jump) +#define J(offset) JAL(REG_X0, offset) // 跳转 + +// jr rs (Jump register) +#define JR(rs) JALR(REG_X0, rs, 0) // 寄存器跳转 + +// ret (Return from subroutine) +#define RET() JALR(REG_X0, REG_RA, 0) // 从子过程返回 + +// tail offset (Tail call far-away subroutine) +#define TAIL_2(offset) AUIPC(REG_X6, offset), JAL(REG_X0, REG_X6, offset) // 尾调用远程子过程, 有2条指令 +#define TAIL(offset) TAIL_2(offset) // Warning this have 2 instructions + +// csrr csr, rd (Read CSR) +#define CSRR(csr, rd) CSRRS(rd, csr, REG_X0) // 读CSR寄存器 + +// csrw csr, rs (Write CSR) +#define CSR W(csr, rs) CSRRW(csr, REG_X0, rs) // 写CSR寄存器 + +// csrs csr, rs (Set bits in CSR) +#define CSRS(csr, rs) CSRRS(REG_X0, csr, rs) // CSR寄存器置零位 + +// csrrc csr, rs (Clear bits in CSR) +#define CSRC(csr, rs) CSRRC(REG_X0, csr, rs) // CSR寄存器清 + +// csrci csr, imm (Immediate clear bits in CSR) +#define CSRCI(csr, imm) CSRRCI(REG_X0, csr, imm) // 立即数清除CSR + +// csrrwi csr, imm (Write CSR immediate) +#define CSRRWI2(csr, imm) CSRRWI(REG_X0, csr, imm) // 立即数写入CSR + +// csrrsi csr, imm (Immediate set bits in CSR) +#define CSRRSI2(csr, imm) CSRRSI(REG_X0, csr, imm) // 立即数置位CSR + +// csrrci csr, imm (Immediate clear bits in CSR) +#define CSRRCI2(csr, imm) CSRRCI(REG_X0, csr, imm) // 立即数清除CSR + +// // frcsr rd (Read FP control/status register) +// #define FRC SR(rd) CSRRS(rd, FCSR, REG_X0) // 读取FP控制/状态寄存器 + +// // fscsr rs (Write FP control/status register) +// #define FSCSR(rs) CSRRW(REG_X0, FCSR, rs) // 写入FP控制/状态寄存器 + +// // frrm rd (Read FP rounding mode) +// #define FRRM(rd) CSRRS(rd, FRM, REG_X0) // 读取FP舍入模式 + +// // fsrm rs (Write FP rounding mode) +// #define FS RM(rs) CSRRW(REG_X0, FRM, rs) // 写入FP舍入模式 + +// // frflags rd (Read FP exception flags) +// #define FRFLAGS(rd) CSRRS(rd, FFLAGS, REG_X0) // 读取FP例外标志 + +// // fsflags rs (Write FP exception flags) +// #define FS FLAGS(rs) CSRRW(REG_X0, FFLAGS, rs) // 写入FP例外标志 + + +// Myriad sequences +#define LI(rd, num) \ + LUI(rd, num), \ + ADDI(rd, rd, num) + +#define MV(rd, rs) ADDI(rd, rs, 0) +#define NOT(rd, rs) XORI(rd, rs, -1) +#define CALL(offset) \ + AUIPC(REG_X1, offset), \ + JALR(REG_X1, REG_X1, offset) + +#define CALL_ABS(addr) \ + AUIPC(REG_X0, addr), \ + JALR(REG_X1, REG_X0, addr) + +#ifdef RISCV_VM_BUILDIN_ECALL +#define ECALL_PNT_INT(num) \ + ADDI(REG_A0, REG_X0, num), \ + ADDI(REG_A7, REG_X0, 0x1), \ + ECALL() + +#define ECALL_PNT_STR(str) \ + ADDI(REG_A0, REG_X0, str), \ + ADDI(REG_A7, REG_X0, 0x4), \ + ECALL() + +#define ECALL_EXIT2() \ + ADDI(REG_A7, REG_X0, 93), \ + ECALL() + +#define ECALL_EXIT_ARG(errno) \ + ADDI(REG_A0, REG_X0, errno), \ + ECALL_EXIT2() + +#define ECALL_EXIT() \ + ADDI(REG_A7, REG_X0, 93), \ + ECALL() + +#define ECALL_SCAN_INT(int) \ + ADDI(REG_A7, (1025 + 4)), \ + ECALL() + +#define ECALL_SCAN_STR(str) \ + ADDI(REG_A0, REG_X0, str), \ + ADDI(REG_A7, REG_X0, (1025 + 5)), \ + ECALL() +#endif + +#endif diff --git a/ccompiler/backend/riscv32/rv32ima_codegen.c b/ccompiler/backend/riscv32/rv32ima_codegen.c new file mode 100644 index 0000000..3dda238 --- /dev/null +++ b/ccompiler/backend/riscv32/rv32ima_codegen.c @@ -0,0 +1,413 @@ +#define RISCV_VM_BUILDIN_ECALL +#include "rv32gen.h" +#include +#include + +// 指令编码联合体(自动处理小端序) +typedef union rv32code { + uint32_t code; + uint8_t bytes[4]; +} rv32code_t; + +#define CRT_CODE_SIZE 16 + +// 使用示例 +rv32code_t gcodes[] = { + LI(REG_SP, 0x1000), + LI(REG_RA, 0x0), + + CALL_ABS(CRT_CODE_SIZE << 2), + // Exit + ECALL_EXIT2(), +}; + +void test_raw_gen(FILE* out) { + fwrite(gcodes, sizeof(rv32code_t), sizeof(gcodes)/sizeof(gcodes[0]), out); +} + +#include "../../frontend/frontend.h" +#include "../../middleend/ir.h" +typedef struct { + int code_pos; + int to_idx; + int cur_idx; + int base_offset; + enum { + JMP_BRANCH, + JMP_JUMP, + JMP_CALL, + } type; +} jmp_t; + +static struct { + vector_header(codes, rv32code_t); + int stack_offset; + int stack_base; + int tmp_reg; + ir_bblock_t* cur_block; + ir_func_t* cur_func; + ir_prog_t* prog; + vector_header(jmp, jmp_t*); + vector_header(call, jmp_t*); + + int cur_func_offset; + int cur_block_offset; +} ctx; + +int write_inst(union rv32code ins, FILE* fp) { + return fwrite(&ins, sizeof(union rv32code), 1, fp); +} + +#define GENCODE(code) vector_push(ctx.codes, (rv32code_t)(code)); len += 4 +#define GENCODES(code) do { \ + rv32code_t codes[] = { \ + code \ + }; \ + for (int i = 0; i < sizeof(codes) / sizeof(codes[0]); i ++) { \ + GENCODE(codes[i]); \ + } \ + } while (0) + +static int stack_offset(ir_node_t* ptr) { + int offset = ctx.stack_base; + for (int i = 0; i < ctx.cur_func->bblocks.size; i ++) { + ir_bblock_t* block = vector_at(ctx.cur_func->bblocks, i); + for (int i = 0; i < block->instrs.size; i++) { + if (vector_at(block->instrs, i) == ptr) { + offset += i * 4; + assert(offset >= 0 && offset < ctx.stack_offset); + return offset; + } + } + offset += block->instrs.size * 4; + } + assert(0); +} + +static int block_idx(ir_bblock_t* toblock) { + for (int i = 0; i < ctx.cur_func->bblocks.size; i ++) { + ir_bblock_t* block = vector_at(ctx.cur_func->bblocks, i); + if (toblock == block) { + return i; + } + } + assert(0); +} + +static int func_idx(ir_func_t* tofunc) { + for (int i = 0; i < ctx.prog->funcs.size; i ++) { + ir_func_t* func = vector_at(ctx.prog->funcs, i); + if (tofunc == func) { + return i; + } + } + assert(0); +} + +static int system_func(const char* name) { + static const char defined_func[][16] = { + "ecall_pnt_int", + }; + + for (int j = 0; j < sizeof(defined_func)/sizeof(defined_func[0]); j++) { + if (strcmp(name, defined_func[j]) == 0) { + return j; + } + } + return -1; +} + +static int get_node_val(ir_node_t* ptr, int reg) { + int len = 0; + if (ptr->tag == IR_NODE_CONST_INT) { + GENCODES(LI(reg, ptr->data.const_int.val)); + } else { + int offset = stack_offset(ptr); + GENCODE(LW(reg, REG_SP, offset)); + } + return len; +} + +static int gen_instr(ir_bblock_t* block, ir_node_t* instr) { + int len = 0; + int offset; + switch (instr->tag) { + case IR_NODE_ALLOC: { + break; + } + case IR_NODE_LOAD: { + // S1 = *(S0 + imm) + offset = stack_offset(instr->data.load.target); + GENCODE(LW(REG_T0, REG_SP, offset)); + // offset = STACK_OFFSET(instr); + // GENCODE(SW(REG_T0, REG_SP, offset)); + break; + } + case IR_NODE_STORE: { + // *(S0 + imm) = S1 + len += get_node_val(instr->data.store.value, REG_T0); + offset = stack_offset(instr->data.store.target); + GENCODE(SW(REG_T0, REG_SP, offset)); + break; + } + case IR_NODE_RET: { + // A0 = S0 + if (instr->data.ret.ret_val != NULL) { + len += get_node_val(instr->data.ret.ret_val, REG_A0); + } + GENCODE(LW(REG_RA, REG_SP, 0)); + GENCODE(ADDI(REG_SP, REG_SP, ctx.stack_offset)); + GENCODE(RET()); + break; + } + case IR_NODE_OP: { + len += get_node_val(instr->data.op.lhs, REG_T1); + len += get_node_val(instr->data.op.rhs, REG_T2); + + switch (instr->data.op.op) { + case IR_OP_ADD: + GENCODE(ADD(REG_T0, REG_T1, REG_T2)); + break; + case IR_OP_SUB: + GENCODE(SUB(REG_T0, REG_T1, REG_T2)); + break; + case IR_OP_MUL: + GENCODE(MUL(REG_T0, REG_T1, REG_T2)); + break; + case IR_OP_DIV: + GENCODE(DIV(REG_T0, REG_T1, REG_T2)); + break; + case IR_OP_MOD: + GENCODE(REM(REG_T0, REG_T1, REG_T2)); + break; + default: + error("ERROR gen_instr op in riscv"); + break; + } + offset = stack_offset(instr); + GENCODE(SW(REG_T0, REG_SP, offset)); + break; + } + case IR_NODE_BRANCH: { + len += get_node_val(instr->data.branch.cond, REG_T0); + int tidx = block_idx(instr->data.branch.true_bblock); + int fidx = block_idx(instr->data.branch.false_bblock); + int cidx = block_idx(ctx.cur_block); + jmp_t* jmp; + jmp = xmalloc(sizeof(jmp_t)); + *jmp = (jmp_t) { + .base_offset = 8, + .code_pos = ctx.codes.size, + .type = JMP_BRANCH, + .to_idx = tidx, + .cur_idx=cidx, + }; + vector_push(ctx.jmp, jmp); + GENCODE(BNEZ(REG_T0, 0)); + jmp = xmalloc(sizeof(jmp_t)); + *jmp = (jmp_t) { + .base_offset = 4, + .code_pos = ctx.codes.size, + .type = JMP_JUMP, + .to_idx = fidx, + .cur_idx=cidx, + }; + vector_push(ctx.jmp, jmp); + GENCODE(J(0)); + break; + } + case IR_NODE_JUMP: { + int idx = block_idx(instr->data.jump.target_bblock); + jmp_t* jmp = xmalloc(sizeof(jmp_t)); + *jmp = (jmp_t) { + .base_offset = 4, + .code_pos = ctx.codes.size, + .type = JMP_JUMP, + .to_idx = idx, + .cur_idx=block_idx(ctx.cur_block), + }; + vector_push(ctx.jmp, jmp); + GENCODE(J(0)); + break; + } + case IR_NODE_CALL: { + if (instr->data.call.args.size > 8) { + error("can't add so much params"); + } + int param_regs[8] = { + REG_A0, REG_A1, REG_A2, REG_A3, + REG_A4, REG_A5, REG_A6, REG_A7 + }; + for (int i = 0; i < instr->data.call.args.size; i++) { + ir_node_t* param = vector_at(instr->data.call.args, i); + len += get_node_val(param, param_regs[i]); + } + + int system_func_idx = system_func(instr->data.call.callee->name); + if (system_func_idx == 0) { + // ecall_pnt_int + GENCODE(ADDI(REG_A7, REG_X0, 0x1)); + GENCODE(ECALL()); + break; + } + + jmp_t* jmp = xmalloc(sizeof(jmp_t)); + *jmp = (jmp_t) { + .base_offset = ctx.cur_func_offset + ctx.cur_block_offset + len, + .code_pos = ctx.codes.size, + .type = JMP_CALL, + .to_idx = func_idx(instr->data.call.callee), + .cur_idx = func_idx(ctx.cur_func), + }; + vector_push(ctx.call, jmp); + + GENCODES(( + CALL(0) + )); + break; + } + default: + error("ERROR gen_instr in riscv"); + } + return len; +} + +static int gen_block(ir_bblock_t* block) { + int len = 0; + ctx.cur_block = block; + for (int i = 0; i < block->instrs.size; i ++) { + ctx.cur_block_offset = len; + len += gen_instr(block, vector_at(block->instrs, i)); + } + return len; +} + +static int gen_func(ir_func_t* func) { + int len = 0; + ctx.cur_func = func; + ctx.stack_base = 16; + ctx.stack_offset = ctx.stack_base; + for (int i = 0; i < func->bblocks.size; i++) { + ctx.stack_offset += 4 * (*vector_at(func->bblocks, i)).instrs.size; + } + GENCODE(ADDI(REG_SP, REG_SP, -ctx.stack_offset)); + GENCODE(SW(REG_RA, REG_SP, 0)); + + int param_regs[8] = { + REG_A0, REG_A1, REG_A2, REG_A3, + REG_A4, REG_A5, REG_A6, REG_A7 + }; + if (func->params.size > 8) { + error("can't add so much params"); + } + for (int i = 0; i < func->params.size; i++) { + int offset = stack_offset(vector_at(func->params, i)); + GENCODE(SW(param_regs[i], REG_SP, offset)); + } + + int jmp_cache[func->bblocks.size + 1]; + + if (ctx.jmp.data != NULL) vector_free(ctx.jmp); + vector_init(ctx.jmp); + jmp_cache[0] = 0; + for(int i = 0; i < func->bblocks.size; i ++) { + ctx.cur_func_offset = len; + jmp_cache[i + 1] = jmp_cache[i]; + int ret = gen_block(vector_at(func->bblocks, i)); + jmp_cache[i + 1] += ret; + len += ret; + } + + for (int i = 0; i < ctx.jmp.size; i++) { + jmp_t* jmp = vector_at(ctx.jmp, i); + int32_t code = 0; + int offset = jmp_cache[jmp->to_idx] - (jmp_cache[jmp->cur_idx + 1] - jmp->base_offset); + if (jmp->type == JMP_JUMP) { + code = J(offset); + } else { + code = BNEZ(REG_T0, offset); + } + ctx.codes.data[jmp->code_pos] = (rv32code_t) { + .code = code, + }; + } + + return len; +} + +static void gen_code(ir_prog_t* prog) { + ctx.prog = prog; + + + for (int i = 0; i < prog->extern_funcs.size; i++) { + if (system_func(prog->extern_funcs.data[i]->name) == -1) { + error("func %s not defined and not a system func", prog->extern_funcs.data[i]->name); + } + } + + int len = 0; + int jmp_cache[prog->funcs.size + 1]; + for(int i = 0; i < prog->funcs.size; i ++) { + jmp_cache[i + 1] = jmp_cache[i]; + int ret = gen_func(vector_at(prog->funcs, i)); + jmp_cache[i + 1] += ret; + len += ret; + } + + + for (int i = 0; i < ctx.call.size; i++) { + jmp_t* jmp = vector_at(ctx.call, i); + int32_t code = 0; + // FIXME ERROR + int offset = jmp_cache[jmp->to_idx] - (jmp_cache[jmp->cur_idx] + jmp->base_offset); + int32_t codes[2] = { + CALL(offset) + }; + for (int i = 0; i < 2; i++) { + ctx.codes.data[jmp->code_pos + i] = (rv32code_t) { + .code = codes[i], + }; + } + } +} + +int main(int argc, char** argv) { + // gcc rv32ima_codegen.c -o rv32gen.exe + const char* infilename = "test.c"; + const char* outfilename = "flat.bin"; + if (argc >= 2) { + infilename = argv[1]; + } + if (argc >= 3) { + outfilename = argv[2]; + } + FILE* in = fopen(infilename, "r"); + FILE* out = fopen(outfilename, "wb"); + if (in == NULL || out == NULL) { + printf("Failed to open file\n"); + return 1; + } + + struct ASTNode* root = frontend(infilename, in, (sread_fn)fread_s); + gen_ir_from_ast(root); + gen_code(&prog); + + for (int i = 0; i < CRT_CODE_SIZE; i++) { + write_inst((union rv32code) { + .code = NOP(), + }, out); + } + fflush(out); + assert(CRT_CODE_SIZE >= sizeof(gcodes) / sizeof(gcodes[0])); + fseek(out, 0, SEEK_SET); + fwrite(gcodes, sizeof(gcodes), 1, out); + fflush(out); + fseek(out, CRT_CODE_SIZE * 4, SEEK_SET); + + fwrite(ctx.codes.data, sizeof(ctx.codes.data[0]), ctx.codes.size, out); + fflush(out); + fclose(in); + fclose(out); + // printf("comiler end out: %s\n", outfilename); + return 0; +} diff --git a/ccompiler/frontend/Makefile b/ccompiler/frontend/Makefile index f115087..bfc4ace 100644 --- a/ccompiler/frontend/Makefile +++ b/ccompiler/frontend/Makefile @@ -1,7 +1,7 @@ # 编译器设置 CC = gcc AR = ar -CFLAGS = -g +CFLAGS = -g -Wall # 源文件路径 LEXER_DIR = ./lexer @@ -13,6 +13,7 @@ SYMTAB_DIR = ./parser/symtab SRCS = \ frontend.c \ $(LEXER_DIR)/lexer.c \ + $(LEXER_DIR)/token.c \ $(PARSER_DIR)/parser.c \ $(AST_DIR)/ast.c \ $(AST_DIR)/block.c \ diff --git a/ccompiler/frontend/frontend.c b/ccompiler/frontend/frontend.c index 9d4b190..b23495f 100644 --- a/ccompiler/frontend/frontend.c +++ b/ccompiler/frontend/frontend.c @@ -3,13 +3,13 @@ #include "frontend.h" struct ASTNode* frontend(const char* file, void* stream, sread_fn sread) { - struct Lexer lexer; + lexer_t lexer; init_lexer(&lexer, file, stream, sread); - struct SymbolTable symtab; + symtab_t symtab; init_symtab(&symtab); - struct Parser parser; + parser_t parser; init_parser(&parser, &lexer, &symtab); parse_prog(&parser); diff --git a/ccompiler/frontend/frontend.h b/ccompiler/frontend/frontend.h index bc4714e..dd565ef 100644 --- a/ccompiler/frontend/frontend.h +++ b/ccompiler/frontend/frontend.h @@ -4,8 +4,9 @@ #ifndef error #include #include +#include #define STD_LIBRARY -#define error(...) do { fprintf(stderr, __VA_ARGS__); exit(1); } while (0) +#define error(...) do { fprintf(stderr, __VA_ARGS__); assert(0); } while (0) #endif #ifndef warn #include @@ -15,10 +16,12 @@ #define xmalloc(size) malloc(size) +#ifndef FRONTEND_IMPLEMENTATION #include "parser/parser.h" #include "parser/ast/ast.h" typedef int (*sread_fn)(void *dst_buf, int dst_size, int elem_size, int count, void *stream); struct ASTNode* frontend(const char* file, void* stream, sread_fn sread); +#endif #endif \ No newline at end of file diff --git a/ccompiler/frontend/lexer/lexer.c b/ccompiler/frontend/lexer/lexer.c index c3e735f..d0ca174 100644 --- a/ccompiler/frontend/lexer/lexer.c +++ b/ccompiler/frontend/lexer/lexer.c @@ -26,13 +26,15 @@ the distribution and installation instructions. Chris Fraser / cwf@aya.yale.edu David Hanson / drh@drhanson.net */ +#define FRONTEND_IMPLEMENTATION #include "../frontend.h" +#include "token.h" #include "lexer.h" static const struct { const char* name; enum CSTD_KEYWORD std_type; - enum TokenType tok; + tok_type_t tok; } keywords[] = { #define X(name, std_type, tok, ...) { #name, std_type, tok }, KEYWORD_TABLE @@ -72,7 +74,7 @@ static inline int keyword_cmp(const char* name, int len) { return -1; // Not a keyword. } -void init_lexer(struct Lexer* lexer, const char* file_name, void* stream, lexer_sread_fn sread) +void init_lexer(lexer_t* lexer, const char* file_name, void* stream, lexer_sread_fn sread) { lexer->cur_ptr = lexer->end_ptr = (unsigned char*)&(lexer->buffer); lexer->index = 1; @@ -86,12 +88,12 @@ void init_lexer(struct Lexer* lexer, const char* file_name, void* stream, lexer_ } } -static void flush_buffer(struct Lexer* lexer) { +static void flush_buffer(lexer_t* lexer) { int num = lexer->end_ptr - lexer->cur_ptr; for (int i = 0; i < num; i++) { lexer->buffer[i] = lexer->cur_ptr[i]; } - lexer->cur_ptr = lexer->buffer; + lexer->cur_ptr = (unsigned char*)lexer->buffer; int read_size = LEXER_BUFFER_SIZE - num; // TODO size_t to int maybe lose precision @@ -109,7 +111,7 @@ static void flush_buffer(struct Lexer* lexer) { } } -static void goto_newline(struct Lexer* lexer) { +static void goto_newline(lexer_t* lexer) { do { if (lexer->cur_ptr == lexer->end_ptr) { flush_buffer(lexer); @@ -119,7 +121,7 @@ static void goto_newline(struct Lexer* lexer) { } while (*lexer->cur_ptr != '\n' && *lexer->cur_ptr != '\0'); } -static void goto_block_comment(struct Lexer* lexer) { +static void goto_block_comment(lexer_t* lexer) { while (1) { if (lexer->end_ptr - lexer->cur_ptr < 2) { flush_buffer(lexer); @@ -155,7 +157,7 @@ static char got_slash(unsigned char* peek) { } } -static void parse_char_literal(struct Lexer* lexer, struct Token* token) { +static void parse_char_literal(lexer_t* lexer, tok_t* token) { char val = 0; unsigned char* peek = lexer->cur_ptr + 1; if (*peek == '\\') { @@ -166,16 +168,16 @@ static void parse_char_literal(struct Lexer* lexer, struct Token* token) { } if (*peek != '\'') error("Unclosed character literal"); - token->constant.ch = val; + token->val.ch = val; lexer->cur_ptr = peek + 1; - token->constant.have = 1; + token->val.have = 1; token->type = TOKEN_CHAR_LITERAL; } -static void parse_string_literal(struct Lexer* lexer, struct Token* token) { +static void parse_string_literal(lexer_t* lexer, tok_t* token) { unsigned char* peek = lexer->cur_ptr + 1; // TODO string literal size check - char* dest = token->constant.str = xmalloc(LEXER_MAX_TOKEN_SIZE + 1); + char* dest = token->val.str = xmalloc(LEXER_MAX_TOKEN_SIZE + 1); int len = 0; while (*peek != '"') { @@ -191,12 +193,12 @@ static void parse_string_literal(struct Lexer* lexer, struct Token* token) { } dest[len] = '\0'; lexer->cur_ptr = peek + 1; - token->constant.have = 1; + token->val.have = 1; token->type = TOKEN_STRING_LITERAL; } // FIXME it write by AI maybe error -static void parse_number(struct Lexer* lexer, struct Token* token) { +static void parse_number(lexer_t* lexer, tok_t* token) { unsigned char* peek = lexer->cur_ptr; int base = 10; int is_float = 0; @@ -255,12 +257,12 @@ static void parse_number(struct Lexer* lexer, struct Token* token) { if ((*peek == 'e' || *peek == 'E') && base == 10) { is_float = 1; peek++; - int exp_sign = 1; + // int exp_sign = 1; int exponent = 0; if (*peek == '+') peek++; else if (*peek == '-') { - exp_sign = -1; + // exp_sign = -1; peek++; } @@ -273,19 +275,19 @@ static void parse_number(struct Lexer* lexer, struct Token* token) { // 存储结果 lexer->cur_ptr = peek; - token->constant.have = 1; + token->val.have = 1; if (is_float) { - token->constant.d = float_val; + token->val.d = float_val; token->type = TOKEN_FLOAT_LITERAL; } else { - token->constant.ll = int_val; + token->val.ll = int_val; token->type = TOKEN_INT_LITERAL; } } #define GOT_ONE_TOKEN_BUF_SIZE 64 // /zh/c/language/operator_arithmetic.html -void get_token(struct Lexer* lexer, struct Token* token) { +void get_token(lexer_t* lexer, tok_t* token) { // 需要保证缓冲区始终可读 if (lexer->end_ptr - lexer->cur_ptr < GOT_ONE_TOKEN_BUF_SIZE) { flush_buffer(lexer); @@ -305,8 +307,8 @@ void get_token(struct Lexer* lexer, struct Token* token) { token->type = TOKEN_FLUSH; } - enum TokenType tok = TOKEN_INIT; - struct TokenConstant constant; + tok_type_t tok = TOKEN_INIT; + tok_val_t constant; constant.have = 0; // once step @@ -392,7 +394,7 @@ void get_token(struct Lexer* lexer, struct Token* token) { switch (*peek++) { case '=': tok = TOKEN_NEQ; break; default: peek--, tok = TOKEN_NOT; break; - } + } break; case '[': tok = TOKEN_L_BRACKET; break; case ']': @@ -454,7 +456,7 @@ void get_token(struct Lexer* lexer, struct Token* token) { case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':case 'Y': case 'Z': case '_': // TOKEN_IDENT - if (*peek == 'L' && *peek == '\'' || *peek == 'L' && *peek == '"') { + if ((*peek == 'L' && *peek == '\'') || (*peek == 'L' && *peek == '"')) { error("unsupport wide-character char literal by `L` format"); } while (1) { @@ -469,18 +471,18 @@ void get_token(struct Lexer* lexer, struct Token* token) { break; } - int res = keyword_cmp(lexer->cur_ptr, peek - (lexer->cur_ptr)); + int res = keyword_cmp((const char*)lexer->cur_ptr, peek - (lexer->cur_ptr)); if (res == -1) { int strlen = peek - lexer->cur_ptr; unsigned char* str = xmalloc(strlen + 1); constant.have = 1; - constant.str = str; + constant.str = (char*)str; for (int i = 0; i < strlen; i++) { str[i] = lexer->cur_ptr[i]; } str[strlen] = '\0'; constant.have = 1; - constant.str = str; + constant.str = (char*)str; tok = TOKEN_IDENT; break; } else { tok = keywords[res].tok; break; @@ -492,32 +494,16 @@ void get_token(struct Lexer* lexer, struct Token* token) { lexer->cur_ptr = peek; END: - token->constant = constant; + token->val = constant; token->type = tok; } // get_token maybe got invalid (with parser) -void get_valid_token(struct Lexer* lexer, struct Token* token) { - enum TokenType type; +void get_valid_token(lexer_t* lexer, tok_t* token) { + tok_type_t type; do { get_token(lexer, token); type = token->type; } while (type == TOKEN_FLUSH || type == TOKEN_LINE_COMMENT || type == TOKEN_BLOCK_COMMENT); } -// 生成字符串映射(根据需求选择#str或#name) -static const char* token_strings[] = { - // 普通token使用#str - #define X(str, tok) [tok] = #str, - TOKEN_TABLE - #undef X - - // 关键字使用#name - #define X(name, std, tok) [tok] = #name, - KEYWORD_TABLE - #undef X -}; - -const char* get_token_name(enum TokenType type) { - return token_strings[type]; -} diff --git a/ccompiler/frontend/lexer/lexer.h b/ccompiler/frontend/lexer/lexer.h index 57a8d26..726f4d0 100644 --- a/ccompiler/frontend/lexer/lexer.h +++ b/ccompiler/frontend/lexer/lexer.h @@ -2,13 +2,17 @@ #define __LEXER_H__ #include "token.h" +#ifndef LEXER_MAX_TOKEN_SIZE #define LEXER_MAX_TOKEN_SIZE 63 +#endif +#ifndef LEXER_BUFFER_SIZE #define LEXER_BUFFER_SIZE 4095 +#endif typedef int (*lexer_sread_fn)(void *dst_buf, int dst_size, int elem_size, int count, void *stream); -struct Lexer { +typedef struct lexer { int line; int index; // const char current_file_name[LEXER_BUFFER_SIZE+1]; @@ -19,22 +23,15 @@ struct Lexer { lexer_sread_fn sread; void* stream; -}; +} lexer_t; -struct Token { - enum TokenType type; - struct TokenConstant constant; -}; - -void init_lexer(struct Lexer* lexer, const char* file_name, void* stream, +void init_lexer(lexer_t* lexer, const char* file_name, void* stream, lexer_sread_fn sread); -// -void get_token(struct Lexer* lexer, struct Token* token); +// pure token getter it will included empty token like TOKEN_FLUSH +void get_token(lexer_t* lexer, tok_t* token); // get_token maybe got invalid (with parser as TOKEN_FLUSH) -void get_valid_token(struct Lexer* lexer, struct Token* token); - -const char* get_token_name(enum TokenType token); +void get_valid_token(lexer_t* lexer, tok_t* token); #endif diff --git a/ccompiler/frontend/lexer/tests/Makefile b/ccompiler/frontend/lexer/tests/Makefile new file mode 100644 index 0000000..91c4737 --- /dev/null +++ b/ccompiler/frontend/lexer/tests/Makefile @@ -0,0 +1,17 @@ +CC = gcc +CFLAGS = -g -Wall +SRC = ../lexer.c ../token.c + +all = test_all + +test_all: test + ./test + +run: + $(CC) $(CFLAGS) $(SRC) run.c -o run + +test: + $(CC) $(CFLAGS) $(SRC) -o test test.c + +clean: + rm -f test run diff --git a/ccompiler/frontend/lexer/tests/test_lexer.c b/ccompiler/frontend/lexer/tests/run.c similarity index 80% rename from ccompiler/frontend/lexer/tests/test_lexer.c rename to ccompiler/frontend/lexer/tests/run.c index ffdb335..3e4cab9 100644 --- a/ccompiler/frontend/lexer/tests/test_lexer.c +++ b/ccompiler/frontend/lexer/tests/run.c @@ -1,8 +1,8 @@ #include "../lexer.h" #include -// gcc -g ../lexer.c test_lexer.c -o test_lexer +// gcc -g ../lexer.c ../token.c test_lexer.c -o test_lexer /* -struct TokenConstant { +tok_tConstant { int have; union { char ch; @@ -31,9 +31,9 @@ int main(int argc, char* argv[]) { } printf("open file success\n"); - struct Lexer lexer; + lexer_t lexer; init_lexer(&lexer, "test_lexter.c", fp, (lexer_sread_fn)fread_s); - struct Token tok; + tok_t tok; while (1) { get_valid_token(&lexer, &tok); @@ -41,6 +41,6 @@ int main(int argc, char* argv[]) { break; } printf("line: %d, column: %d, type: %3d, typename: %s\n", - lexer.line, lexer.index, tok.type, get_token_name(tok.type)); + lexer.line, lexer.index, tok.type, get_tok_name(tok.type)); } } diff --git a/ccompiler/frontend/lexer/tests/test.c b/ccompiler/frontend/lexer/tests/test.c new file mode 100644 index 0000000..8a1f866 --- /dev/null +++ b/ccompiler/frontend/lexer/tests/test.c @@ -0,0 +1,178 @@ +// test_lexer.c +#include "../../../../libcore/acutest.h" +#include "../lexer.h" +#include + +int test_read(void *dst_buf, int dst_size, int elem_size, int count, void *stream) { + if (stream == NULL) { + return 0; + } + int size = dst_size > elem_size * count ? elem_size * count : dst_size; + memcpy(dst_buf, stream, size); + return size; +} + +// 测试辅助函数 +static inline void test_lexer_string(const char* input, tok_type_t expected_type) { + lexer_t lexer; + tok_t token; + + init_lexer(&lexer, "test.c", (void*)input, test_read); + get_valid_token(&lexer, &token); + + TEST_CHECK(token.type == expected_type); + TEST_MSG("Expected: %s", get_tok_name(expected_type)); + TEST_MSG("Got: %s", get_tok_name(token.type)); +} + +// 基础运算符测试 +void test_operators() { + TEST_CASE("Arithmetic operators"); { + test_lexer_string("+", TOKEN_ADD); + test_lexer_string("++", TOKEN_ADD_ADD); + test_lexer_string("+=", TOKEN_ASSIGN_ADD); + test_lexer_string("-", TOKEN_SUB); + test_lexer_string("--", TOKEN_SUB_SUB); + test_lexer_string("-=", TOKEN_ASSIGN_SUB); + test_lexer_string("*", TOKEN_MUL); + test_lexer_string("*=", TOKEN_ASSIGN_MUL); + test_lexer_string("/", TOKEN_DIV); + test_lexer_string("/=", TOKEN_ASSIGN_DIV); + test_lexer_string("%", TOKEN_MOD); + test_lexer_string("%=", TOKEN_ASSIGN_MOD); + } + + TEST_CASE("Bitwise operators"); { + test_lexer_string("&", TOKEN_AND); + test_lexer_string("&&", TOKEN_AND_AND); + test_lexer_string("&=", TOKEN_ASSIGN_AND); + test_lexer_string("|", TOKEN_OR); + test_lexer_string("||", TOKEN_OR_OR); + test_lexer_string("|=", TOKEN_ASSIGN_OR); + test_lexer_string("^", TOKEN_XOR); + test_lexer_string("^=", TOKEN_ASSIGN_XOR); + test_lexer_string("~", TOKEN_BIT_NOT); + test_lexer_string("<<", TOKEN_L_SH); + test_lexer_string("<<=", TOKEN_ASSIGN_L_SH); + test_lexer_string(">>", TOKEN_R_SH); + test_lexer_string(">>=", TOKEN_ASSIGN_R_SH); + } + + TEST_CASE("Comparison operators"); { + test_lexer_string("==", TOKEN_EQ); + test_lexer_string("!=", TOKEN_NEQ); + test_lexer_string("<", TOKEN_LT); + test_lexer_string("<=", TOKEN_LE); + test_lexer_string(">", TOKEN_GT); + test_lexer_string(">=", TOKEN_GE); + } + + TEST_CASE("Special symbols"); { + test_lexer_string("(", TOKEN_L_PAREN); + test_lexer_string(")", TOKEN_R_PAREN); + test_lexer_string("[", TOKEN_L_BRACKET); + test_lexer_string("]", TOKEN_R_BRACKET); + test_lexer_string("{", TOKEN_L_BRACE); + test_lexer_string("}", TOKEN_R_BRACE); + test_lexer_string(";", TOKEN_SEMICOLON); + test_lexer_string(",", TOKEN_COMMA); + test_lexer_string(":", TOKEN_COLON); + test_lexer_string(".", TOKEN_DOT); + test_lexer_string("...", TOKEN_ELLIPSIS); + test_lexer_string("->", TOKEN_DEREF); + test_lexer_string("?", TOKEN_COND); + } +} + +// 关键字测试 +void test_keywords() { + TEST_CASE("C89 keywords"); + test_lexer_string("while", TOKEN_WHILE); + test_lexer_string("sizeof", TOKEN_SIZEOF); + + // TEST_CASE("C99 keywords"); + // test_lexer_string("restrict", TOKEN_RESTRICT); + // test_lexer_string("_Bool", TOKEN_INT); // 需确认你的类型定义 +} + +// 字面量测试 +void test_literals() { + TEST_CASE("Integer literals"); { + // 十进制 + test_lexer_string("0", TOKEN_INT_LITERAL); + test_lexer_string("123", TOKEN_INT_LITERAL); + // test_lexer_string("2147483647", TOKEN_INT_LITERAL); + + // // 十六进制 + // test_lexer_string("0x0", TOKEN_INT_LITERAL); + // test_lexer_string("0x1A3F", TOKEN_INT_LITERAL); + // test_lexer_string("0XABCDEF", TOKEN_INT_LITERAL); + + // // 八进制 + // test_lexer_string("0123", TOKEN_INT_LITERAL); + // test_lexer_string("0777", TOKEN_INT_LITERAL); + + // // 边界值测试 + // test_lexer_string("2147483647", TOKEN_INT_LITERAL); // INT_MAX + // test_lexer_string("4294967295", TOKEN_INT_LITERAL); // UINT_MAX + } + + // TEST_CASE("Character literals"); { + // test_lexer_string("'a'", TOKEN_CHAR_LITERAL); + // test_lexer_string("'\\n'", TOKEN_CHAR_LITERAL); + // test_lexer_string("'\\t'", TOKEN_CHAR_LITERAL); + // test_lexer_string("'\\\\'", TOKEN_CHAR_LITERAL); + // test_lexer_string("'\\0'", TOKEN_CHAR_LITERAL); + // } + + TEST_CASE("String literals"); { + test_lexer_string("\"hello\"", TOKEN_STRING_LITERAL); + test_lexer_string("\"multi-line\\nstring\"", TOKEN_STRING_LITERAL); + test_lexer_string("\"escape\\\"quote\"", TOKEN_STRING_LITERAL); + } + + // TEST_CASE("Integer literals"); + // test_lexer_string("123", TOKEN_INT_LITERAL); + // test_lexer_string("0x1F", TOKEN_INT_LITERAL); + + // TEST_CASE("Floating literals"); + // test_lexer_string("3.14e-5", TOKEN_FLOAT_LITERAL); + + // TEST_CASE("Character literals"); + // test_lexer_string("'\\n'", TOKEN_CHAR_LITERAL); +} + +// 边界测试 +void test_edge_cases() { + // TEST_CASE("Long identifiers"); + // char long_id[LEXER_MAX_TOKEN_SIZE+2] = {0}; + // memset(long_id, 'a', LEXER_MAX_TOKEN_SIZE+1); + // test_lexer_string(long_id, TOKEN_IDENT); + + // TEST_CASE("Buffer boundary"); + // char boundary[LEXER_BUFFER_SIZE*2] = {0}; + // memset(boundary, '+', LEXER_BUFFER_SIZE*2-1); + // test_lexer_string(boundary, TOKEN_ADD); +} + +// 错误处理测试 +void test_error_handling() { + TEST_CASE("Invalid characters"); + lexer_t lexer; + tok_t token; + + init_lexer(&lexer, "test.c", NULL, test_read); + get_valid_token(&lexer, &token); + + TEST_CHECK(token.type == TOKEN_EOF); // 应触发错误处理 +} + +// 测试列表 +TEST_LIST = { + {"operators", test_operators}, + {"keywords", test_keywords}, + {"literals", test_literals}, + {"edge_cases", test_edge_cases}, + {"error_handling", test_error_handling}, + {NULL, NULL} +}; \ No newline at end of file diff --git a/ccompiler/frontend/lexer/token.c b/ccompiler/frontend/lexer/token.c new file mode 100644 index 0000000..39ed3bf --- /dev/null +++ b/ccompiler/frontend/lexer/token.c @@ -0,0 +1,86 @@ +#define FRONTEND_IMPLEMENTATION +#include "../frontend.h" +#include "token.h" + +#define ROUND_IDX(idx) ((idx) % tokbuf->cap) + +tok_t* pop_tok(tok_buf_t* tokbuf) { + if (tokbuf->size == 0) { + error("no token to pop"); + return NULL; + } + int idx = tokbuf->cur; + tokbuf->cur = ROUND_IDX(idx + 1); + tokbuf->size -= 1; + return tokbuf->buf + idx; +} + +void flush_peek_tok(tok_buf_t* tokbuf) { + tokbuf->peek = tokbuf->cur; +} + +void init_tokbuf(tok_buf_t *tokbuf, void *stream, get_tokbuf_func gettok) { + tokbuf->cur = 0; + tokbuf->end = 0; + tokbuf->peek = 0; + tokbuf->size = 0; + tokbuf->stream = stream; + tokbuf->gettok = gettok; + tokbuf->buf = NULL; + tokbuf->cap = 0; +} + +tok_t *peek_tok(tok_buf_t *tokbuf) +{ + int idx = tokbuf->peek; + idx = ROUND_IDX(idx + 1); + if (tokbuf->size >= tokbuf->cap) { + error("peek too deep, outof array size"); + } + if (tokbuf->peek == tokbuf->end) { + if (tokbuf->size == tokbuf->cap) { + error("peek_tok buffer overflow"); + } + if (tokbuf->gettok == NULL) { + error("peek_tok can not got tok"); + } + tokbuf->gettok(tokbuf->stream, &(tokbuf->buf[idx])); + tokbuf->size++; + tokbuf->end = idx; + } + + tokbuf->peek = idx; + return &(tokbuf->buf[idx]); +} + +tok_type_t peek_tok_type(tok_buf_t* tokbuf) { + return peek_tok(tokbuf)->type; +} + +int expect_pop_tok(tok_buf_t* tokbuf, tok_type_t type) { + flush_peek_tok(tokbuf); + tok_t* tok = peek_tok(tokbuf); + if (tok->type != type) { + error("expected tok: %s, got %s", get_tok_name(type), get_tok_name(tok->type)); + } else { + pop_tok(tokbuf); + } + return 0; +} + +// 生成字符串映射(根据需求选择#str或#name) +static const char* token_strings[] = { + // 普通token使用#str + #define X(str, tok) [tok] = #str, + TOKEN_TABLE + #undef X + + // 关键字使用#name + #define X(name, std, tok) [tok] = #name, + KEYWORD_TABLE + #undef X +}; + +const char* get_tok_name(tok_type_t type) { + return token_strings[type]; +} diff --git a/ccompiler/frontend/lexer/token.h b/ccompiler/frontend/lexer/token.h index 81458dd..ae8bec9 100644 --- a/ccompiler/frontend/lexer/token.h +++ b/ccompiler/frontend/lexer/token.h @@ -105,7 +105,7 @@ enum CSTD_KEYWORD { // END // 定义TokenType枚举 -enum TokenType { +typedef enum tok_type { // 处理普通token #define X(str, tok) tok, TOKEN_TABLE @@ -115,9 +115,9 @@ enum TokenType { #define X(name, std, tok) tok, KEYWORD_TABLE #undef X -}; +} tok_type_t; -struct TokenConstant { +typedef struct tok_val { int have; union { char ch; @@ -127,124 +127,31 @@ struct TokenConstant { long long ll; char* str; }; -}; +} tok_val_t; -// "break" -// "case" -// "char" -// "const" -// "continue" -// "default" -// "do" -// "double" -// "else" -// "enum" -// "extern" -// "float" -// "for" -// "goto" -// "if" -// "inline (C99)" -// "int" -// "long" -// "register" -// "restrict (C99)" -// "return" -// "short" -// "signed" -// "sizeof" -// "static" -// "struct" -// "switch" -// "typedef" -// "union" -// "unsigned" -// "void" -// "volatile" -// "while" +typedef struct tok { + tok_type_t type; + tok_val_t val; +} tok_t; -// alignas (C23) -// alignof (C23) -// auto -// bool (C23) -// constexpr (C23) -// false (C23) -// nullptr (C23) -// static_assert (C23) -// thread_local (C23) -// true (C23) -// typeof (C23) -// typeof_unqual (C23) -// _Alignas (C11) -// _Alignof (C11) -// _Atomic (C11) -// _BitInt (C23) -// _Bool (C99) -// _Complex (C99) -// _Decimal128 (C23) -// _Decimal32 (C23) -// _Decimal64 (C23) -// _Generic (C11) -// _Imaginary (C99) -// _Noreturn (C11) -// _Static_assert (C11) -// _Thread_local (C11) +typedef struct tok_buf { + int cur; + int end; + int peek; + int size; + int cap; + tok_t* buf; + void* stream; + void (*gettok)(void* stream, tok_t* token); +} tok_buf_t; -// a = b -// a += b -// a -= b -// a *= b -// a /= b -// a %= b -// a &= b -// a |= b -// a ^= b -// a <<= b -// a >>= b - -// ++a -// --a -// a++ -// a-- - -// +a -// -a -// a + b -// a - b -// a * b -// a / b -// a % b -// ~a -// a & b -// a | b -// a ^ b -// a << b -// a >> b - -// !a -// a && b -// a || b - -// a == b -// a != b -// a < b -// a > b -// a <= b -// a >= b - -// a[b] -// *a -// &a -// a->b -// a.b - -// a(...) -// a, b -// (type) a -// a ? b : c -// sizeof - -// _Alignof -// (C11) +typedef void(*get_tokbuf_func)(void* stream, tok_t* token); +void init_tokbuf(tok_buf_t* tokbuf, void* stream, get_tokbuf_func gettok); +tok_t* peek_tok(tok_buf_t* tokbuf); +tok_t* pop_tok(tok_buf_t* tokbuf); +void flush_peek_tok(tok_buf_t* tokbuf); +tok_type_t peek_tok_type(tok_buf_t* tokbuf); +int expect_pop_tok(tok_buf_t* tokbuf, tok_type_t type); +const char* get_tok_name(tok_type_t type); #endif \ No newline at end of file diff --git a/ccompiler/frontend/parser/ast/ast.c b/ccompiler/frontend/parser/ast/ast.c index 9612c6b..2db771d 100644 --- a/ccompiler/frontend/parser/ast/ast.c +++ b/ccompiler/frontend/parser/ast/ast.c @@ -14,9 +14,9 @@ void init_ast_node(struct ASTNode* node) { } } -struct ASTNode* find_ast_node(struct ASTNode* node, enum ASTType type) { +// struct ASTNode* find_ast_node(struct ASTNode* node, ast_type_t type) { -} +// } #include static void pnt_depth(int depth) { @@ -25,149 +25,149 @@ static void pnt_depth(int depth) { } } -void pnt_ast(struct ASTNode* node, int depth) { - if (!node) return; - pnt_depth(depth); - switch (node->type) { - case NT_ROOT: - for (int i = 0; i < node->root.child_size; i++) { - pnt_ast(node->root.children[i], depth); - } - return; +// void pnt_ast(struct ASTNode* node, int depth) { +// if (!node) return; +// pnt_depth(depth); +// switch (node->type) { +// case NT_ROOT: +// for (int i = 0; i < node->root.child_size; i++) { +// pnt_ast(node->root.children[i], depth); +// } +// return; - case NT_ADD : printf("+ \n"); break; // (expr) + (expr) - case NT_SUB : printf("- \n"); break; // (expr) - (expr) - case NT_MUL : printf("* \n"); break; // (expr) * (expr) - case NT_DIV : printf("/ \n"); break; // (expr) / (expr) - case NT_MOD : printf("%%\n"); break; // (expr) % (expr) - case NT_AND : printf("& \n"); break; // (expr) & (expr) - case NT_OR : printf("| \n"); break; // (expr) | (expr) - case NT_XOR : printf("^ \n"); break; // (expr) ^ (expr) - case NT_L_SH : printf("<<\n"); break; // (expr) << (expr) - case NT_R_SH : printf(">>\n"); break; // (expr) >> (expr) - case NT_EQ : printf("==\n"); break; // (expr) == (expr) - case NT_NEQ : printf("!=\n"); break; // (expr) != (expr) - case NT_LE : printf("<=\n"); break; // (expr) <= (expr) - case NT_GE : printf(">=\n"); break; // (expr) >= (expr) - case NT_LT : printf("< \n"); break; // (expr) < (expr) - case NT_GT : printf("> \n"); break; // (expr) > (expr) - case NT_AND_AND : printf("&&\n"); break; // (expr) && (expr) - case NT_OR_OR : printf("||\n"); break; // (expr) || (expr) - case NT_NOT : printf("! \n"); break; // ! (expr) - case NT_BIT_NOT : printf("~ \n"); break; // ~ (expr) - case NT_COMMA : printf(", \n"); break; // expr, expr 逗号运算符 - case NT_ASSIGN : printf("= \n"); break; // (expr) = (expr) - // case NT_COND : // (expr) ? (expr) : (expr) +// case NT_ADD : printf("+ \n"); break; // (expr) + (expr) +// case NT_SUB : printf("- \n"); break; // (expr) - (expr) +// case NT_MUL : printf("* \n"); break; // (expr) * (expr) +// case NT_DIV : printf("/ \n"); break; // (expr) / (expr) +// case NT_MOD : printf("%%\n"); break; // (expr) % (expr) +// case NT_AND : printf("& \n"); break; // (expr) & (expr) +// case NT_OR : printf("| \n"); break; // (expr) | (expr) +// case NT_XOR : printf("^ \n"); break; // (expr) ^ (expr) +// case NT_L_SH : printf("<<\n"); break; // (expr) << (expr) +// case NT_R_SH : printf(">>\n"); break; // (expr) >> (expr) +// case NT_EQ : printf("==\n"); break; // (expr) == (expr) +// case NT_NEQ : printf("!=\n"); break; // (expr) != (expr) +// case NT_LE : printf("<=\n"); break; // (expr) <= (expr) +// case NT_GE : printf(">=\n"); break; // (expr) >= (expr) +// case NT_LT : printf("< \n"); break; // (expr) < (expr) +// case NT_GT : printf("> \n"); break; // (expr) > (expr) +// case NT_AND_AND : printf("&&\n"); break; // (expr) && (expr) +// case NT_OR_OR : printf("||\n"); break; // (expr) || (expr) +// case NT_NOT : printf("! \n"); break; // ! (expr) +// case NT_BIT_NOT : printf("~ \n"); break; // ~ (expr) +// case NT_COMMA : printf(", \n"); break; // expr, expr 逗号运算符 +// case NT_ASSIGN : printf("= \n"); break; // (expr) = (expr) +// // case NT_COND : // (expr) ? (expr) : (expr) - case NT_STMT_EMPTY : // ; - printf(";\n"); - break; - case NT_STMT_IF : // if (cond) { ... } [else {...}] - printf("if"); - pnt_ast(node->if_stmt.cond, depth+1); - pnt_ast(node->if_stmt.if_stmt, depth+1); - if (node->if_stmt.else_stmt) { - pnt_depth(depth); - printf("else"); - pnt_ast(node->if_stmt.else_stmt, depth+1); - } - break; - case NT_STMT_WHILE : // while (cond) { ... } - printf("while\n"); - pnt_ast(node->while_stmt.cond, depth+1); - pnt_ast(node->while_stmt.body, depth+1); - break; - case NT_STMT_DOWHILE : // do {...} while (cond) - printf("do-while\n"); - pnt_ast(node->do_while_stmt.body, depth+1); - pnt_ast(node->do_while_stmt.cond, depth+1); - break; - case NT_STMT_FOR : // for (init; cond; iter) {...} - printf("for\n"); - if (node->for_stmt.init) - pnt_ast(node->for_stmt.init, depth+1); - if (node->for_stmt.cond) - pnt_ast(node->for_stmt.cond, depth+1); - if (node->for_stmt.iter) - pnt_ast(node->for_stmt.iter, depth+1); - pnt_ast(node->for_stmt.body, depth+1); - break; - case NT_STMT_SWITCH : // switch (expr) { case ... } - case NT_STMT_BREAK : // break; - case NT_STMT_CONTINUE : // continue; - case NT_STMT_GOTO : // goto label; - case NT_STMT_CASE : // case const_expr: - case NT_STMT_DEFAULT : // default: - case NT_STMT_LABEL : // label: - break; - case NT_STMT_BLOCK : // { ... } - printf("{\n"); - for (int i = 0; i < node->block.child_size; i++) { - pnt_ast(node->block.children[i], depth+1); - } - pnt_depth(depth); - printf("}\n"); - break; - case NT_STMT_RETURN : // return expr; - printf("return"); - if (node->return_stmt.expr_stmt) { - printf(" "); - pnt_ast(node->return_stmt.expr_stmt, depth+1); - } else { - printf("\n"); - } - break; - case NT_STMT_EXPR : // expr; - printf("stmt\n"); - pnt_ast(node->expr_stmt.expr_stmt, depth); - pnt_depth(depth); - printf(";\n"); - break; - case NT_DECL_VAR : // type name; or type name = expr; - printf("decl_val\n"); - break; - case NT_DECL_FUNC: // type func_name(param_list); - printf("decl func %s\n", node->func.name->syms.tok.constant.str); - break; - case NT_FUNC : // type func_name(param_list) {...} - printf("def func %s\n", node->func.name->syms.tok.constant.str); - // pnt_ast(node->child.func.params, depth); - pnt_ast(node->func.body, depth); - // pnt_ast(node->child.func.ret, depth); - break; - case NT_PARAM : // 函数形参 - printf("param\n"); - case NT_ARG_LIST : // 实参列表(需要与NT_CALL配合) - printf("arg_list\n"); - case NT_TERM_CALL : // func (expr) - printf("call\n"); - break; - case NT_TERM_IDENT: - printf("%s\n", node->syms.tok.constant.str); - break; - case NT_TERM_VAL : // Terminal Symbols like constant, identifier, keyword - struct Token * tok = &node->syms.tok; - switch (tok->type) { - case TOKEN_CHAR_LITERAL: - printf("%c\n", tok->constant.ch); - break; - case TOKEN_INT_LITERAL: - printf("%d\n", tok->constant.i); - break; - case TOKEN_STRING_LITERAL: - printf("%s\n", tok->constant.str); - break; - default: - printf("unknown term val\n"); - break; - } - default: - break; - } +// case NT_STMT_EMPTY : // ; +// printf(";\n"); +// break; +// case NT_STMT_IF : // if (cond) { ... } [else {...}] +// printf("if"); +// pnt_ast(node->if_stmt.cond, depth+1); +// pnt_ast(node->if_stmt.if_stmt, depth+1); +// if (node->if_stmt.else_stmt) { +// pnt_depth(depth); +// printf("else"); +// pnt_ast(node->if_stmt.else_stmt, depth+1); +// } +// break; +// case NT_STMT_WHILE : // while (cond) { ... } +// printf("while\n"); +// pnt_ast(node->while_stmt.cond, depth+1); +// pnt_ast(node->while_stmt.body, depth+1); +// break; +// case NT_STMT_DOWHILE : // do {...} while (cond) +// printf("do-while\n"); +// pnt_ast(node->do_while_stmt.body, depth+1); +// pnt_ast(node->do_while_stmt.cond, depth+1); +// break; +// case NT_STMT_FOR : // for (init; cond; iter) {...} +// printf("for\n"); +// if (node->for_stmt.init) +// pnt_ast(node->for_stmt.init, depth+1); +// if (node->for_stmt.cond) +// pnt_ast(node->for_stmt.cond, depth+1); +// if (node->for_stmt.iter) +// pnt_ast(node->for_stmt.iter, depth+1); +// pnt_ast(node->for_stmt.body, depth+1); +// break; +// case NT_STMT_SWITCH : // switch (expr) { case ... } +// case NT_STMT_BREAK : // break; +// case NT_STMT_CONTINUE : // continue; +// case NT_STMT_GOTO : // goto label; +// case NT_STMT_CASE : // case const_expr: +// case NT_STMT_DEFAULT : // default: +// case NT_STMT_LABEL : // label: +// break; +// case NT_STMT_BLOCK : // { ... } +// printf("{\n"); +// for (int i = 0; i < node->block.child_size; i++) { +// pnt_ast(node->block.children[i], depth+1); +// } +// pnt_depth(depth); +// printf("}\n"); +// break; +// case NT_STMT_RETURN : // return expr; +// printf("return"); +// if (node->return_stmt.expr_stmt) { +// printf(" "); +// pnt_ast(node->return_stmt.expr_stmt, depth+1); +// } else { +// printf("\n"); +// } +// break; +// case NT_STMT_EXPR : // expr; +// printf("stmt\n"); +// pnt_ast(node->expr_stmt.expr_stmt, depth); +// pnt_depth(depth); +// printf(";\n"); +// break; +// case NT_DECL_VAR : // type name; or type name = expr; +// printf("decl_val\n"); +// break; +// case NT_DECL_FUNC: // type func_name(param_list); +// printf("decl func %s\n", node->func.name->syms.tok.val.str); +// break; +// case NT_FUNC : // type func_name(param_list) {...} +// printf("def func %s\n", node->func.name->syms.tok.val.str); +// // pnt_ast(node->child.func.params, depth); +// pnt_ast(node->func.body, depth); +// // pnt_ast(node->child.func.ret, depth); +// break; +// case NT_PARAM : // 函数形参 +// printf("param\n"); +// case NT_ARG_LIST : // 实参列表(需要与NT_CALL配合) +// printf("arg_list\n"); +// case NT_TERM_CALL : // func (expr) +// printf("call\n"); +// break; +// case NT_TERM_IDENT: +// printf("%s\n", node->syms.tok.val.str); +// break; +// case NT_TERM_VAL : // Terminal Symbols like constant, identifier, keyword +// tok_t * tok = &node->syms.tok; +// switch (tok->type) { +// case TOKEN_CHAR_LITERAL: +// printf("%c\n", tok->val.ch); +// break; +// case TOKEN_INT_LITERAL: +// printf("%d\n", tok->val.i); +// break; +// case TOKEN_STRING_LITERAL: +// printf("%s\n", tok->val.str); +// break; +// default: +// printf("unknown term val\n"); +// break; +// } +// default: +// break; +// } - // 通用子节点递归处理 - if (node->type <= NT_ASSIGN) { // 表达式类统一处理子节点 - if (node->expr.left) pnt_ast(node->expr.left, depth+1); - if (node->expr.right) pnt_ast(node->expr.right, depth + 1); - } -} +// // 通用子节点递归处理 +// if (node->type <= NT_ASSIGN) { // 表达式类统一处理子节点 +// if (node->expr.left) pnt_ast(node->expr.left, depth+1); +// if (node->expr.right) pnt_ast(node->expr.right, depth + 1); +// } +// } diff --git a/ccompiler/frontend/parser/ast/ast.h b/ccompiler/frontend/parser/ast/ast.h index cf77d59..e2e53cc 100644 --- a/ccompiler/frontend/parser/ast/ast.h +++ b/ccompiler/frontend/parser/ast/ast.h @@ -3,9 +3,10 @@ #include "../../frontend.h" #include "../../lexer/lexer.h" +#include "../../../../libcore/vector.h" #include "../type.h" -enum ASTType { +typedef enum { NT_INIT, NT_ROOT, // global scope in root node NT_ADD, // (expr) + (expr) @@ -75,31 +76,28 @@ enum ASTType { NT_TERM_VAL, NT_TERM_IDENT, NT_TERM_TYPE, -}; +} ast_type_t; -struct ASTNode { - enum ASTType type; +typedef struct ASTNode { + ast_type_t type; union { void *children[6]; struct { - struct ASTNode** children; - int child_size; + vector_header(children, struct ASTNode*); } root; struct { - struct ASTNode** children; // array of children - int child_size; + vector_header(children, struct ASTNode*); } block; struct { struct ASTNode* decl_node; - struct Token tok; + tok_t tok; } syms; struct { - struct ASTNode *arr; - int size; + vector_header(params, struct ASTNode*); } params; struct { - const char* name; + struct ASTNode* name; struct ASTNode* params; struct ASTNode* func_decl; } call; @@ -113,13 +111,12 @@ struct ASTNode { struct ASTNode *ret; struct ASTNode *name; struct ASTNode *params; // array of params - void* data; - } func_decl; + struct ASTNode *def; + } decl_func; struct { - struct ASTNode *ret; - struct ASTNode *name; - struct ASTNode *params; // array of params + struct ASTNode *decl; struct ASTNode *body; // optional + void* data; } func; struct { struct ASTNode *left; @@ -165,27 +162,26 @@ struct ASTNode { struct ASTNode *expr_stmt; } expr_stmt; }; -}; +} ast_node_t; struct ASTNode* new_ast_node(void); void init_ast_node(struct ASTNode* node); void pnt_ast(struct ASTNode* node, int depth); -struct Parser; -typedef struct ASTNode* (*parse_func_t) (struct Parser*); +typedef struct parser parser_t; +typedef struct ASTNode* (*parse_func_t) (parser_t*); -void parse_prog(struct Parser* parser); -struct ASTNode* parse_block(struct Parser* parser); -struct ASTNode* parse_stmt(struct Parser* parser); -struct ASTNode* parse_expr(struct Parser* parser); -struct ASTNode* parse_func(struct Parser* parser); -struct ASTNode* parse_decl(struct Parser* parser); +void parse_prog(parser_t* parser); +ast_node_t* parse_decl(parser_t* parser); +ast_node_t* parse_block(parser_t* parser); +ast_node_t* parse_stmt(parser_t* parser); +ast_node_t* parse_expr(parser_t* parser); -struct ASTNode* parse_ident(struct Parser* parser); -struct ASTNode* parse_type(struct Parser* parser); +ast_node_t* parse_type(parser_t* parser); -int peek_decl(struct Parser* parser); +ast_node_t* new_ast_ident_node(tok_t* tok); +ast_node_t* expect_pop_ident(tok_buf_t* tokbuf); -struct ASTNode* parser_ident_without_pop(struct Parser* parser); +int peek_decl(tok_buf_t* tokbuf); #endif diff --git a/ccompiler/frontend/parser/ast/block.c b/ccompiler/frontend/parser/ast/block.c index 525c795..2e362c7 100644 --- a/ccompiler/frontend/parser/ast/block.c +++ b/ccompiler/frontend/parser/ast/block.c @@ -1,48 +1,49 @@ -#include "../parser.h" #include "ast.h" +#include "../parser.h" #include "../symtab/symtab.h" + #ifndef BLOCK_MAX_NODE #define BLOCK_MAX_NODE (1024) #endif -struct ASTNode* parse_block(struct Parser* parser) { - symtab_enter_scope(parser->symtab); - - // parse_decl(parser); // decl_var - enum TokenType ttype; - struct ASTNode* node = new_ast_node(); +ast_node_t* new_ast_node_block() { + ast_node_t* node = new_ast_node(); node->type = NT_BLOCK; - flushpeektok(parser); - ttype = peektoktype(parser); - if (ttype != TOKEN_L_BRACE) { - error("block need '{' start"); - } - poptok(parser); + vector_init(node->block.children); + return node; +} - node->block.children = malloc(sizeof(struct ASTNode*) * BLOCK_MAX_NODE); - struct ASTNode* child = NULL; +ast_node_t* parse_block(parser_t* parser) { + symtab_enter_scope(parser->symtab); + tok_buf_t *tokbuf = &parser->tokbuf; + flush_peek_tok(tokbuf); + tok_type_t ttype; + ast_node_t* node = new_ast_node_block(); + + expect_pop_tok(tokbuf, TOKEN_L_BRACE); + ast_node_t* child = NULL; while (1) { - if (peek_decl(parser) == 1) { + if (peek_decl(tokbuf)) { child = parse_decl(parser); - goto ADD_CHILD; + vector_push(node->block.children, child); + continue; } - flushpeektok(parser); - ttype = peektoktype(parser); + flush_peek_tok(tokbuf); + ttype = peek_tok_type(tokbuf); switch (ttype) { - case TOKEN_R_BRACE: - poptok(parser); - goto END; - default: - child = parse_stmt(parser); - goto ADD_CHILD; - break; + case TOKEN_R_BRACE: { + pop_tok(tokbuf); + goto END; + } + default: { + child = parse_stmt(parser); + vector_push(node->block.children, child); + break; + } } - continue; - ADD_CHILD: - node->block.children[node->block.child_size++] = child; } END: symtab_leave_scope(parser->symtab); diff --git a/ccompiler/frontend/parser/ast/decl.c b/ccompiler/frontend/parser/ast/decl.c index 56b1414..9a2580c 100644 --- a/ccompiler/frontend/parser/ast/decl.c +++ b/ccompiler/frontend/parser/ast/decl.c @@ -6,9 +6,9 @@ * 0 false * 1 true */ -int peek_decl(struct Parser* parser) { - flushpeektok(parser); - switch (peektoktype(parser)) { +int peek_decl(tok_buf_t* tokbuf) { + flush_peek_tok(tokbuf); + switch (peek_tok_type(tokbuf)) { case TOKEN_STATIC: case TOKEN_EXTERN: case TOKEN_REGISTER: @@ -16,10 +16,10 @@ int peek_decl(struct Parser* parser) { error("not impliment"); break; default: - flushpeektok(parser); + flush_peek_tok(tokbuf); } - switch (peektoktype(parser)) { + switch (peek_tok_type(tokbuf)) { case TOKEN_VOID: case TOKEN_CHAR: case TOKEN_SHORT: @@ -27,60 +27,62 @@ int peek_decl(struct Parser* parser) { case TOKEN_LONG: case TOKEN_FLOAT: case TOKEN_DOUBLE: + // FIXME Ptr return 1; default: - flushpeektok(parser); + flush_peek_tok(tokbuf); } + return 0; } -struct ASTNode* parse_decl_val(struct Parser* parser) { - flushpeektok(parser); - // parse_type - enum TokenType ttype; - struct ASTNode* node; +ast_node_t* parse_decl_val(parser_t* parser) { + tok_buf_t* tokbuf = &parser->tokbuf; + tok_type_t ttype; + flush_peek_tok(tokbuf); - struct ASTNode* type_node = parse_type(parser); - struct ASTNode* name_node = parser_ident_without_pop(parser); + ast_node_t* node; + ast_node_t* type_node = parse_type(parser); + flush_peek_tok(tokbuf); + ast_node_t* name_node = new_ast_ident_node(peek_tok(tokbuf)); node = new_ast_node(); node->decl_val.type = type_node; node->decl_val.name = name_node; node->type = NT_DECL_VAR; - symtab_add_symbol(parser->symtab, name_node->syms.tok.constant.str, node); + symtab_add_symbol(parser->symtab, name_node->syms.tok.val.str, node, 0); - ttype = peektoktype(parser); + ttype = peek_tok_type(tokbuf); if (ttype == TOKEN_ASSIGN) { node->decl_val.expr_stmt = parse_stmt(parser); if (node->decl_val.expr_stmt->type != NT_STMT_EXPR) { error("parser_decl_val want stmt_expr"); } } else if (ttype == TOKEN_SEMICOLON) { - poptok(parser); - expecttok(parser, TOKEN_SEMICOLON); + pop_tok(tokbuf); + expect_pop_tok(tokbuf, TOKEN_SEMICOLON); } else { error("parser_decl_val syntax error"); } return node; } -// 类型解析入口改进 -struct ASTNode* parse_decl(struct Parser* parser) { - flushpeektok(parser); - int idx; - enum TokenType ttype; - struct ASTNode* node; +ast_node_t* parse_decl(parser_t* parser) { + tok_buf_t* tokbuf = &parser->tokbuf; + flush_peek_tok(tokbuf); + tok_type_t ttype; + ast_node_t* node; - if (peek_decl(parser) == 0) { + if (peek_decl(tokbuf) == 0) { error("syntax error expect decl_val TYPE"); } - if (peektoktype(parser) != TOKEN_IDENT) { + if (peek_tok_type(tokbuf) != TOKEN_IDENT) { error("syntax error expect decl_val IDENT"); } - ttype = peektoktype(parser); + ttype = peek_tok_type(tokbuf); switch (ttype) { case TOKEN_L_PAREN: // ( - node = parse_func(parser); + return NULL; break; case TOKEN_ASSIGN: case TOKEN_SEMICOLON: diff --git a/ccompiler/frontend/parser/ast/expr.c b/ccompiler/frontend/parser/ast/expr.c index a34368e..e92d0b0 100644 --- a/ccompiler/frontend/parser/ast/expr.c +++ b/ccompiler/frontend/parser/ast/expr.c @@ -33,14 +33,18 @@ enum ParseType { PREFIX_PARSER, }; -static struct ASTNode *parse_subexpression(struct Parser* parser, enum Precedence prec); +static ast_node_t *parse_subexpression(tok_buf_t* tokbuf, symtab_t *symtab, enum Precedence prec); +#define NEXT(prec) parse_subexpression(tokbuf, symtab, prec) -static struct ASTNode* gen_node2(struct ASTNode* left, struct ASTNode* right, - enum ASTType type) { - struct ASTNode* node = new_ast_node(); +static ast_node_t* gen_node2(ast_node_t* left, ast_node_t* right, + ast_type_t type) { + ast_node_t* node = new_ast_node(); node->type = type; node->expr.left = left; node->expr.right = right; + return node; + // FIXME + // switch (type) { // case NT_ADD : printf("+ \n"); break; // (expr) + (expr) // case NT_SUB : printf("- \n"); break; // (expr) - (expr) @@ -68,154 +72,157 @@ static struct ASTNode* gen_node2(struct ASTNode* left, struct ASTNode* right, // } } -static struct ASTNode* parse_comma(struct Parser* parser, struct ASTNode* left) { - struct ASTNode* node = new_ast_node(); +static ast_node_t* parse_comma(tok_buf_t* tokbuf, symtab_t *symtab, ast_node_t* left) { + ast_node_t* node = new_ast_node(); node->type = NT_COMMA; node->expr.left = left; - node->expr.right = parse_subexpression(parser, PREC_EXPRESSION); + node->expr.right = NEXT(PREC_EXPRESSION); + return node; } -static struct ASTNode* parse_assign(struct Parser* parser, struct ASTNode* left) { - flushpeektok(parser); - enum TokenType ttype = peektoktype(parser); - poptok(parser); - struct ASTNode* node = new_ast_node(); +static ast_node_t* parse_assign(tok_buf_t* tokbuf, symtab_t *symtab, ast_node_t* left) { + flush_peek_tok(tokbuf); + tok_type_t ttype = peek_tok_type(tokbuf); + pop_tok(tokbuf); + ast_node_t* node = new_ast_node(); node->type = NT_ASSIGN; // saved left node->expr.left = left; enum Precedence next = PREC_ASSIGNMENT + 1; switch (ttype) { case TOKEN_ASSIGN : - left = parse_subexpression(parser, next); + left = NEXT(next); break; case TOKEN_ASSIGN_ADD : - left = gen_node2(left, parse_subexpression(parser, next), NT_ADD); + left = gen_node2(left, NEXT(next), NT_ADD); break; case TOKEN_ASSIGN_SUB : - left = gen_node2(left, parse_subexpression(parser, next), NT_SUB); + left = gen_node2(left, NEXT(next), NT_SUB); break; case TOKEN_ASSIGN_MUL : - left = gen_node2(left, parse_subexpression(parser, next), NT_MUL); + left = gen_node2(left, NEXT(next), NT_MUL); break; case TOKEN_ASSIGN_DIV : - left = gen_node2(left, parse_subexpression(parser, next), NT_DIV); + left = gen_node2(left, NEXT(next), NT_DIV); break; case TOKEN_ASSIGN_MOD : - left = gen_node2(left, parse_subexpression(parser, next), NT_MOD); + left = gen_node2(left, NEXT(next), NT_MOD); break; case TOKEN_ASSIGN_L_SH : - left = gen_node2(left, parse_subexpression(parser, next), NT_L_SH); + left = gen_node2(left, NEXT(next), NT_L_SH); break; case TOKEN_ASSIGN_R_SH : - left = gen_node2(left, parse_subexpression(parser, next), NT_R_SH); + left = gen_node2(left, NEXT(next), NT_R_SH); break; case TOKEN_ASSIGN_AND : - left = gen_node2(left, parse_subexpression(parser, next), NT_AND); + left = gen_node2(left, NEXT(next), NT_AND); break; case TOKEN_ASSIGN_OR : - left = gen_node2(left, parse_subexpression(parser, next), NT_OR); + left = gen_node2(left, NEXT(next), NT_OR); break; case TOKEN_ASSIGN_XOR : - left = gen_node2(left, parse_subexpression(parser, next), NT_XOR); + left = gen_node2(left, NEXT(next), NT_XOR); break; default: error("unsupported operator"); break; } node->expr.right = left; + return node; } -static struct ASTNode* parse_cmp(struct Parser* parser, struct ASTNode* left) { - flushpeektok(parser); - enum TokenType ttype = peektoktype(parser); - poptok(parser); - struct ASTNode* node = new_ast_node(); +static ast_node_t* parse_cmp(tok_buf_t* tokbuf, symtab_t *symtab, ast_node_t* left) { + flush_peek_tok(tokbuf); + tok_type_t ttype = peek_tok_type(tokbuf); + pop_tok(tokbuf); + ast_node_t* node = new_ast_node(); // saved left node->expr.left = left; switch (ttype) { case TOKEN_EQ: node->type = NT_EQ; - node->expr.right = parse_subexpression(parser, PREC_EQUALITY); + node->expr.right = NEXT(PREC_EQUALITY); break; case TOKEN_NEQ: node->type = NT_NEQ; - node->expr.right = parse_subexpression(parser, PREC_EQUALITY); + node->expr.right = NEXT(PREC_EQUALITY); break; case TOKEN_LT: node->type = NT_LT; - node->expr.right = parse_subexpression(parser, PREC_RELATIONAL); + node->expr.right = NEXT(PREC_RELATIONAL); break; case TOKEN_GT: node->type = NT_GT; - node->expr.right = parse_subexpression(parser, PREC_RELATIONAL); + node->expr.right = NEXT(PREC_RELATIONAL); break; case TOKEN_LE: node->type = NT_LE; - node->expr.right = parse_subexpression(parser, PREC_RELATIONAL); + node->expr.right = NEXT(PREC_RELATIONAL); break; case TOKEN_GE: node->type = NT_GE; - node->expr.right = parse_subexpression(parser, PREC_RELATIONAL); + node->expr.right = NEXT(PREC_RELATIONAL); break; default: error("invalid operator"); } + return node; } -static struct ASTNode* parse_cal(struct Parser* parser, struct ASTNode* left) { - flushpeektok(parser); - enum TokenType ttype = peektoktype(parser); - poptok(parser); - struct ASTNode* node = new_ast_node(); +static ast_node_t* parse_cal(tok_buf_t* tokbuf, symtab_t *symtab, ast_node_t* left) { + flush_peek_tok(tokbuf); + tok_type_t ttype = peek_tok_type(tokbuf); + pop_tok(tokbuf); + ast_node_t* node = new_ast_node(); node->expr.left = left; switch (ttype) { case TOKEN_OR_OR: node->type = NT_OR_OR; - node->expr.right = parse_subexpression(parser, PREC_LOGICAL_OR); + node->expr.right = NEXT(PREC_LOGICAL_OR); break; case TOKEN_AND_AND: node->type = NT_AND_AND; - node->expr.right = parse_subexpression(parser, PREC_LOGICAL_AND); + node->expr.right = NEXT(PREC_LOGICAL_AND); break; case TOKEN_OR: node->type = NT_OR; - node->expr.right = parse_subexpression(parser, PREC_OR); + node->expr.right = NEXT(PREC_OR); break; case TOKEN_XOR: node->type = NT_XOR; - node->expr.right = parse_subexpression(parser, PREC_XOR); + node->expr.right = NEXT(PREC_XOR); break; case TOKEN_AND: node->type = NT_AND; - node->expr.right = parse_subexpression(parser, PREC_AND); + node->expr.right = NEXT(PREC_AND); break; case TOKEN_L_SH: node->type = NT_L_SH; - node->expr.right = parse_subexpression(parser, PREC_SHIFT); + node->expr.right = NEXT(PREC_SHIFT); break; case TOKEN_R_SH: node->type = NT_R_SH; - node->expr.right = parse_subexpression(parser, PREC_SHIFT); + node->expr.right = NEXT(PREC_SHIFT); break; case TOKEN_ADD: node->type = NT_ADD; - node->expr.right = parse_subexpression(parser, PREC_ADDITIVE); + node->expr.right = NEXT(PREC_ADDITIVE); break; case TOKEN_SUB: node->type = NT_SUB; - node->expr.right = parse_subexpression(parser, PREC_ADDITIVE); + node->expr.right = NEXT(PREC_ADDITIVE); break; case TOKEN_MUL: node->type = NT_MUL; - node->expr.right = parse_subexpression(parser, PREC_MULTIPLICATIVE); + node->expr.right = NEXT(PREC_MULTIPLICATIVE); break; case TOKEN_DIV: node->type = NT_DIV; - node->expr.right = parse_subexpression(parser, PREC_MULTIPLICATIVE); + node->expr.right = NEXT(PREC_MULTIPLICATIVE); break; case TOKEN_MOD: node->type = NT_MOD; - node->expr.right = parse_subexpression(parser, PREC_MULTIPLICATIVE); + node->expr.right = NEXT(PREC_MULTIPLICATIVE); break; default: break; @@ -223,44 +230,50 @@ static struct ASTNode* parse_cal(struct Parser* parser, struct ASTNode* left) { return node; } - -// 新增函数调用解析 -static struct ASTNode* parse_call(struct Parser* parser, struct ASTNode* ident) { - struct ASTNode* node = new_ast_node(); +static ast_node_t* parse_call(tok_buf_t* tokbuf, symtab_t *symtab, ast_node_t* ident) { + ast_node_t* node = new_ast_node(); node->type = NT_TERM_CALL; - poptok(parser); // 跳过 '(' + node->call.name = ident; + node->call.params = new_ast_node(); + vector_init(node->call.params->params.params); + pop_tok(tokbuf); // 跳过 '(' - enum TokenType ttype; - // 解析参数列表 - while ((ttype = peektoktype(parser)) != TOKEN_R_PAREN) { - // add_arg(node, parse_expr(parser)); - if (ttype == TOKEN_COMMA) poptok(parser); - else poptok(parser); + tok_type_t ttype; + while (1) { + flush_peek_tok(tokbuf); + ttype = peek_tok_type(tokbuf); + if (ttype == TOKEN_R_PAREN) { + break; + } + ast_node_t* param = NEXT(PREC_EXPRESSION); + vector_push(node->call.params->params.params, param); + flush_peek_tok(tokbuf); + ttype = peek_tok_type(tokbuf); + if (ttype == TOKEN_COMMA) pop_tok(tokbuf); } - poptok(parser); // 跳过 ')' + pop_tok(tokbuf); // 跳过 ')' - char* name = ident->syms.tok.constant.str; - void* sym = symtab_lookup_symbol(parser->symtab, name); - if (sym == NULL) { + const char* name = ident->syms.tok.val.str; + ast_node_t* sym = symtab_lookup_symbol(symtab, name); + // TODO check func is match + if (sym == NULL || sym->type != NT_DECL_FUNC) { error("function not decl %s", name); } - node->call.name = name; - node->call.params = NULL; + node->call.name = ident; node->call.func_decl = sym; return node; } -static struct ASTNode* parse_paren(struct Parser* parser, struct ASTNode* left) { - flushpeektok(parser); - enum TokenType ttype; - expecttok(parser, TOKEN_L_PAREN); - left = parse_subexpression(parser, PREC_EXPRESSION); - flushpeektok(parser); - expecttok(parser, TOKEN_R_PAREN); +static ast_node_t* parse_paren(tok_buf_t* tokbuf, symtab_t *symtab, ast_node_t* left) { + flush_peek_tok(tokbuf); + expect_pop_tok(tokbuf, TOKEN_L_PAREN); + left = NEXT(PREC_EXPRESSION); + flush_peek_tok(tokbuf); + expect_pop_tok(tokbuf, TOKEN_R_PAREN); return left; } -typedef struct ASTNode* (*parse_expr_fun_t)(struct Parser*, struct ASTNode*); +typedef ast_node_t* (*parse_expr_fun_t)(tok_buf_t*, symtab_t* , ast_node_t*); static struct expr_prec_table_t { parse_expr_fun_t parser; enum Precedence prec; @@ -309,11 +322,11 @@ static struct expr_prec_table_t { [TOKEN_L_PAREN] = {parse_paren, PREC_POSTFIX, INFIX_PARSER}, }; -static struct ASTNode *parse_primary_expression(struct Parser* parser) { - flushpeektok(parser); +static ast_node_t *parse_primary_expression(tok_buf_t* tokbuf, symtab_t *symtab) { + flush_peek_tok(tokbuf); - struct Token* tok = peektok(parser); - struct ASTNode *node = new_ast_node(); + tok_t* tok = peek_tok(tokbuf); + ast_node_t *node = new_ast_node(); node->type = NT_TERM_VAL; node->syms.tok = *tok; @@ -330,34 +343,35 @@ static struct ASTNode *parse_primary_expression(struct Parser* parser) { case TOKEN_STRING_LITERAL: // node->data.data_type = TYPE_POINTER; case TOKEN_IDENT: - node = parse_ident(parser); - if (peektoktype(parser) == TOKEN_L_PAREN) { - node = parse_call(parser, node); + node = expect_pop_ident(tokbuf); + tok_type_t ttype = peek_tok_type(tokbuf); + if (ttype == TOKEN_L_PAREN) { + node = parse_call(tokbuf, symtab, node); } else { - void *sym = symtab_lookup_symbol(parser->symtab, tok->constant.str); + void *sym = symtab_lookup_symbol(symtab, tok->val.str); if (sym == NULL) { - error("undefined symbol but use %s", tok->constant.str); + error("undefined symbol but use %s", tok->val.str); } node->type = NT_TERM_IDENT; node->syms.decl_node = sym; - goto END; } + goto END; default: return NULL; } - poptok(parser); + pop_tok(tokbuf); END: return node; } -static struct ASTNode *parse_subexpression(struct Parser* parser, enum Precedence prec) { - enum TokenType ttype; - struct expr_prec_table_t* work; - struct ASTNode* left; +static ast_node_t *parse_subexpression(tok_buf_t* tokbuf, symtab_t *symtab, enum Precedence prec) { + tok_type_t ttype; + struct expr_prec_table_t* work; + ast_node_t* left; while (1) { - flushpeektok(parser); - ttype = peektoktype(parser); + flush_peek_tok(tokbuf); + ttype = peek_tok_type(tokbuf); work = &expr_table[ttype]; // FIXME if (ttype == TOKEN_SEMICOLON || ttype == TOKEN_R_PAREN) { @@ -365,16 +379,16 @@ static struct ASTNode *parse_subexpression(struct Parser* parser, enum Precedenc } if (work == NULL || work->parser == NULL || work->ptype == PREFIX_PARSER) { if (work->parser != NULL) { - left = work->parser(parser, NULL); + left = work->parser(tokbuf, symtab, NULL); } else { - left = parse_primary_expression(parser); + left = parse_primary_expression(tokbuf, symtab); } } else if (work->ptype == INFIX_PARSER) { if (work->parser == NULL) break; if (work->prec <= prec) break; - left = work->parser(parser, left); + left = work->parser(tokbuf, symtab, left); } // assert(left != NULL); } @@ -382,9 +396,11 @@ static struct ASTNode *parse_subexpression(struct Parser* parser, enum Precedenc return left; } -struct ASTNode* parse_expr(struct Parser* parser) { - flushpeektok(parser); - enum TokenType ttype = peektoktype(parser); +ast_node_t* parse_expr(parser_t* parser) { + tok_buf_t* tokbuf = &(parser->tokbuf); + symtab_t *symtab = parser->symtab; + flush_peek_tok(tokbuf); + tok_type_t ttype = peek_tok_type(tokbuf); switch (ttype) { case TOKEN_NOT: case TOKEN_AND: @@ -401,9 +417,9 @@ struct ASTNode* parse_expr(struct Parser* parser) { case TOKEN_SUB_SUB: case TOKEN_SIZEOF: case TOKEN_IDENT: - return parse_subexpression(parser, PREC_EXPRESSION); + return NEXT(PREC_EXPRESSION); default: - error("Want expr but not got %s", get_token_name(ttype)); + error("Want expr but not got %s", get_tok_name(ttype)); break; } } diff --git a/ccompiler/frontend/parser/ast/func.c b/ccompiler/frontend/parser/ast/func.c index 293daab..2c31d15 100644 --- a/ccompiler/frontend/parser/ast/func.c +++ b/ccompiler/frontend/parser/ast/func.c @@ -6,34 +6,21 @@ #define FUNC_PARAM_CACHE_SIZE 32 // 合理初始值,可覆盖99%常见情况 #endif -struct FuncParamCache { - struct Token tokens[FUNC_PARAM_CACHE_SIZE]; - int read_pos; // 当前读取位置 - int write_pos; // 写入位置 - int depth; // 当前缓存深度 -}; - -static enum TokenType peekcachetype(struct FuncParamCache* cache) { - return cache->tokens[cache->read_pos++].type; -} - // TODO 语义分析压入符号表 -static void parse_params(struct Parser* parser, struct FuncParamCache* cache, struct ASTNode* node) { - // = peekcachetype(cache); - enum TokenType ttype; - // if (ttype != TOKEN_L_PAREN) { - // error("function expected '('\n"); - // } - struct ASTNode *params = new_ast_node(); - node->func.params = params; - int params_size = 0; +static void parse_params(parser_t* parser, tok_buf_t* cache, ast_node_t* node) { + tok_type_t ttype; + ast_node_t *params = new_ast_node(); + node->decl_func.params = params; + vector_init(params->params.params); - while ((ttype = peekcachetype(cache)) != TOKEN_R_PAREN) { + int depth = 1; + while (depth) { + ttype = peek_tok_type(cache); switch (ttype) { case TOKEN_COMMA: break; case TOKEN_ELLIPSIS: - ttype = peekcachetype(cache); + ttype = peek_tok_type(cache); if (ttype != TOKEN_R_PAREN) { error("... must be a last parameter list (expect ')')"); } @@ -41,9 +28,29 @@ static void parse_params(struct Parser* parser, struct FuncParamCache* cache, st error("not implement"); break; case TOKEN_IDENT: - params->children[params_size++] = NULL; + // TODO 静态数组 + flush_peek_tok(cache); + ast_node_t* id_node = new_ast_ident_node(peek_tok(cache)); + ast_node_t* node = new_ast_node(); + node->type = NT_DECL_VAR; + node->decl_val.name = id_node; + // TODO typing sys + node->decl_val.type = NULL; + node->decl_val.expr_stmt = NULL; + node->decl_val.data = NULL; + vector_push(params->params.params, node); + symtab_add_symbol(parser->symtab, id_node->syms.tok.val.str, node, 0); break; + case TOKEN_L_PAREN: { + depth++; + break; + } + case TOKEN_R_PAREN: { + depth--; + break; + } default: + break; // TODO 使用cache的类型解析 // parse_type(parser); // TODO type parse @@ -51,39 +58,42 @@ static void parse_params(struct Parser* parser, struct FuncParamCache* cache, st // ttype = peekcachetype(cache); // if (ttype != TOKEN_IDENT) { // node->node_type = NT_DECL_FUNC; - // flushpeektok(parser); + // flush_peek_tok(tokbuf); // continue; // } // error("function expected ')' or ','\n"); } + pop_tok(cache); } } -enum ASTType check_is_func_decl(struct Parser* parser, struct FuncParamCache* cache) { - cache->depth = 1; - cache->read_pos = 0; - cache->write_pos = 0; +ast_type_t check_is_func_decl(tok_buf_t* tokbuf, tok_buf_t* cache) { + expect_pop_tok(tokbuf, TOKEN_L_PAREN); + int depth = 1; - while (cache->depth) { - struct Token* tok = peektok(parser); - poptok(parser); - if (cache->write_pos >= FUNC_PARAM_CACHE_SIZE - 1) { + while (depth) { + tok_t* tok = peek_tok(tokbuf); + pop_tok(tokbuf); + if (cache->size >= cache->cap - 1) { error("function parameter list too long"); } - cache->tokens[cache->write_pos++] = *tok; + cache->buf[cache->size++] = *tok; switch (tok->type) { case TOKEN_L_PAREN: - cache->depth++; + depth++; break; case TOKEN_R_PAREN: - cache->depth--; + depth--; + break; + default: break; } } + cache->end = cache->size; - switch (peektoktype(parser)) { + switch (peek_tok_type(tokbuf)) { case TOKEN_SEMICOLON: - poptok(parser); + pop_tok(tokbuf); return NT_DECL_FUNC; case TOKEN_L_BRACE: return NT_FUNC; @@ -93,28 +103,66 @@ enum ASTType check_is_func_decl(struct Parser* parser, struct FuncParamCache* ca } } -struct ASTNode* parse_func(struct Parser* parser) { - struct ASTNode* ret_type = parse_type(parser); - struct ASTNode* func_name = parse_ident(parser); +static ast_node_t* new_ast_node_funcdecl(ast_node_t* ret, ast_node_t* name) { + ast_node_t* node = new_ast_node(); + node->type = NT_DECL_FUNC; + node->decl_func.ret = ret; + node->decl_func.name = name; + node->decl_func.def = NULL; + return node; +} - struct ASTNode* node = new_ast_node(); - node->func.ret = ret_type; - node->func.name = func_name; +void parse_func(parser_t* parser) { + tok_buf_t* tokbuf = &(parser->tokbuf); + flush_peek_tok(tokbuf); + ast_node_t* ret_node = parse_type(parser); + ast_node_t* name_node = expect_pop_ident(tokbuf); + const char* func_name = name_node->syms.tok.val.str; + ast_node_t* decl = new_ast_node_funcdecl(ret_node, name_node); - flushpeektok(parser); - expecttok(parser, TOKEN_L_PAREN); - struct FuncParamCache cache; - node->type = check_is_func_decl(parser, &cache); + tok_buf_t cache; + init_tokbuf(&cache, NULL, NULL); + cache.cap = FUNC_PARAM_CACHE_SIZE; + tok_t buf[FUNC_PARAM_CACHE_SIZE]; + cache.buf = buf; + + ast_type_t type = check_is_func_decl(&(parser->tokbuf), &cache); - symtab_add_symbol(parser->symtab, func_name->syms.tok.constant.str, node); - if (node->type == NT_DECL_FUNC) { - return node; + ast_node_t* prev = symtab_add_symbol(parser->symtab, func_name, decl, 1); + if (prev != NULL) { + if (prev->type != NT_DECL_FUNC) { + error("the symbol duplicate old is %d, new is func", prev->type); + } + // TODO check redeclare func is match + if (type == NT_FUNC) { + // TODO Free decl; + free(decl); + decl = prev; + goto FUNC; + } + return; + } + vector_push(parser->root->root.children, decl); + if (type == NT_DECL_FUNC) { + return; } +FUNC: + // 该data临时用于判断是否重复定义 + if (decl->decl_func.def != NULL) { + error("redefinition of function %s", func_name); + } + + ast_node_t* node = new_ast_node(); + node->type = NT_FUNC; + node->func.decl = decl; + node->func.data = NULL; + + decl->decl_func.def = node; symtab_enter_scope(parser->symtab); - parse_params(parser, &cache, node); + parse_params(parser, &cache, decl); node->func.body = parse_block(parser); symtab_leave_scope(parser->symtab); - return node; + vector_push(parser->root->root.children, node); } diff --git a/ccompiler/frontend/parser/ast/program.c b/ccompiler/frontend/parser/ast/program.c index 08281cd..a6d3fc8 100644 --- a/ccompiler/frontend/parser/ast/program.c +++ b/ccompiler/frontend/parser/ast/program.c @@ -5,25 +5,30 @@ #define PROG_MAX_NODE_SIZE (1024 * 4) #endif -void parse_prog(struct Parser* parser) { +void parse_func(parser_t* parser); + +void parse_prog(parser_t* parser) { /** * Program := (Declaration | Definition)* * same as * Program := Declaration* Definition* */ - int child_size = 0; + tok_buf_t *tokbuf = &(parser->tokbuf); parser->root = new_ast_node(); - struct ASTNode* node; - parser->root->root.children = xmalloc(sizeof(struct ASTNode*) * PROG_MAX_NODE_SIZE); + ast_node_t* node; + parser->root->type = NT_ROOT; + vector_init(parser->root->root.children); while (1) { - flushpeektok(parser); - if (peektoktype(parser) == TOKEN_EOF) { + flush_peek_tok(tokbuf); + if (peek_tok_type(tokbuf) == TOKEN_EOF) { break; } node = parse_decl(parser); - parser->root->root.children[child_size++] = node; + if (node == NULL) { + parse_func(parser); + } else { + vector_push(parser->root->root.children, node); + } } - parser->root->type = NT_ROOT; - parser->root->root.child_size = child_size; return; } diff --git a/ccompiler/frontend/parser/ast/stmt.c b/ccompiler/frontend/parser/ast/stmt.c index 6797e8b..e6b3903 100644 --- a/ccompiler/frontend/parser/ast/stmt.c +++ b/ccompiler/frontend/parser/ast/stmt.c @@ -1,27 +1,28 @@ #include "../parser.h" #include "ast.h" -struct ASTNode* parse_stmt(struct Parser* parser) { - flushpeektok(parser); - enum TokenType ttype = peektoktype(parser); - struct ASTNode* node = new_ast_node(); +ast_node_t* parse_stmt(parser_t* parser) { + tok_buf_t* tokbuf = &parser->tokbuf; + flush_peek_tok(tokbuf); + tok_type_t ttype = peek_tok_type(tokbuf); + ast_node_t* node = new_ast_node(); switch (ttype) { case TOKEN_IF: { /** * if (exp) stmt * if (exp) stmt else stmt */ - poptok(parser); + pop_tok(tokbuf); - expecttok(parser, TOKEN_L_PAREN); + expect_pop_tok(tokbuf, TOKEN_L_PAREN); node->if_stmt.cond = parse_expr(parser); - flushpeektok(parser); - expecttok(parser, TOKEN_R_PAREN); + flush_peek_tok(tokbuf); + expect_pop_tok(tokbuf, TOKEN_R_PAREN); node->if_stmt.if_stmt = parse_stmt(parser); - ttype = peektoktype(parser); + ttype = peek_tok_type(tokbuf); if (ttype == TOKEN_ELSE) { - poptok(parser); + pop_tok(tokbuf); node->if_stmt.else_stmt = parse_stmt(parser); } else { node->if_stmt.else_stmt = NULL; @@ -33,11 +34,11 @@ struct ASTNode* parse_stmt(struct Parser* parser) { /** * switch (exp) stmt */ - poptok(parser); + pop_tok(tokbuf); - expecttok(parser, TOKEN_L_PAREN); + expect_pop_tok(tokbuf, TOKEN_L_PAREN); node->switch_stmt.cond = parse_expr(parser); - expecttok(parser, TOKEN_R_PAREN); + expect_pop_tok(tokbuf, TOKEN_R_PAREN); node->switch_stmt.body = parse_stmt(parser); node->type = NT_STMT_SWITCH; @@ -47,11 +48,11 @@ struct ASTNode* parse_stmt(struct Parser* parser) { /** * while (exp) stmt */ - poptok(parser); + pop_tok(tokbuf); - expecttok(parser, TOKEN_L_PAREN); + expect_pop_tok(tokbuf, TOKEN_L_PAREN); node->while_stmt.cond = parse_expr(parser); - expecttok(parser, TOKEN_R_PAREN); + expect_pop_tok(tokbuf, TOKEN_R_PAREN); node->while_stmt.body = parse_stmt(parser); node->type = NT_STMT_WHILE; @@ -61,16 +62,16 @@ struct ASTNode* parse_stmt(struct Parser* parser) { /** * do stmt while (exp) */ - poptok(parser); + pop_tok(tokbuf); node->do_while_stmt.body = parse_stmt(parser); - ttype = peektoktype(parser); + ttype = peek_tok_type(tokbuf); if (ttype != TOKEN_WHILE) { error("expected while after do"); } - poptok(parser); - expecttok(parser, TOKEN_L_PAREN); + pop_tok(tokbuf); + expect_pop_tok(tokbuf, TOKEN_L_PAREN); node->do_while_stmt.cond = parse_expr(parser); - expecttok(parser, TOKEN_R_PAREN); + expect_pop_tok(tokbuf, TOKEN_R_PAREN); node->type = NT_STMT_DOWHILE; break; } @@ -79,36 +80,36 @@ struct ASTNode* parse_stmt(struct Parser* parser) { * for (init; [cond]; [iter]) stmt */ // node->children.stmt.for_stmt.init - poptok(parser); - ttype = peektoktype(parser); + pop_tok(tokbuf); + ttype = peek_tok_type(tokbuf); if (ttype != TOKEN_L_PAREN) { error("expected ( after for"); } - poptok(parser); + pop_tok(tokbuf); // init expr or init decl_var // TODO need add this feature node->for_stmt.init = parse_expr(parser); - expecttok(parser, TOKEN_SEMICOLON); + expect_pop_tok(tokbuf, TOKEN_SEMICOLON); // cond expr or null - ttype = peektoktype(parser); + ttype = peek_tok_type(tokbuf); if (ttype != TOKEN_SEMICOLON) { node->for_stmt.cond = parse_expr(parser); - expecttok(parser, TOKEN_SEMICOLON); + expect_pop_tok(tokbuf, TOKEN_SEMICOLON); } else { node->for_stmt.cond = NULL; - poptok(parser); + pop_tok(tokbuf); } // iter expr or null - ttype = peektoktype(parser); + ttype = peek_tok_type(tokbuf); if (ttype != TOKEN_R_PAREN) { node->for_stmt.iter = parse_expr(parser); - expecttok(parser, TOKEN_R_PAREN); + expect_pop_tok(tokbuf, TOKEN_R_PAREN); } else { node->for_stmt.iter = NULL; - poptok(parser); + pop_tok(tokbuf); } node->for_stmt.body = parse_stmt(parser); @@ -120,8 +121,8 @@ struct ASTNode* parse_stmt(struct Parser* parser) { * break ; */ // TODO check 导致外围 for、while 或 do-while 循环或 switch 语句终止。 - poptok(parser); - expecttok(parser, TOKEN_SEMICOLON); + pop_tok(tokbuf); + expect_pop_tok(tokbuf, TOKEN_SEMICOLON); node->type = NT_STMT_BREAK; break; @@ -131,8 +132,8 @@ struct ASTNode* parse_stmt(struct Parser* parser) { * continue ; */ // TODO check 导致跳过整个 for、 while 或 do-while 循环体的剩余部分。 - poptok(parser); - expecttok(parser, TOKEN_SEMICOLON); + pop_tok(tokbuf); + expect_pop_tok(tokbuf, TOKEN_SEMICOLON); node->type = NT_STMT_CONTINUE; break; @@ -142,16 +143,16 @@ struct ASTNode* parse_stmt(struct Parser* parser) { * return [exp] ; */ // TODO 终止当前函数并返回指定值给调用方函数。 - poptok(parser); - ttype = peektoktype(parser); + pop_tok(tokbuf); + ttype = peek_tok_type(tokbuf); if (ttype != TOKEN_SEMICOLON) { node->return_stmt.expr_stmt = parse_expr(parser); - flushpeektok(parser); - expecttok(parser, TOKEN_SEMICOLON); + flush_peek_tok(tokbuf); + expect_pop_tok(tokbuf, TOKEN_SEMICOLON); } else { node->return_stmt.expr_stmt = NULL; + pop_tok(tokbuf); } - poptok(parser); node->type = NT_STMT_RETURN; break; } @@ -161,15 +162,15 @@ struct ASTNode* parse_stmt(struct Parser* parser) { */ // TODO check label 将控制无条件转移到所欲位置。 //在无法用约定的构造将控制转移到所欲位置时使用。 - poptok(parser); + pop_tok(tokbuf); // find symbol table - ttype = peektoktype(parser); + ttype = peek_tok_type(tokbuf); if (ttype != TOKEN_IDENT) { error("expect identifier after goto"); } - expecttok(parser, TOKEN_SEMICOLON); + expect_pop_tok(tokbuf, TOKEN_SEMICOLON); // TODO filling label - node->goto_stmt.label = parse_ident(parser); + node->goto_stmt.label = expect_pop_ident(tokbuf); node->type = NT_STMT_GOTO; break; } @@ -181,7 +182,7 @@ struct ASTNode* parse_stmt(struct Parser* parser) { * if () ; * for () ; */ - poptok(parser); + pop_tok(tokbuf); node->type = NT_STMT_EMPTY; break; } @@ -193,30 +194,30 @@ struct ASTNode* parse_stmt(struct Parser* parser) { node->type = NT_STMT_BLOCK; break; } - case TOKEN_IDENT: { + case TOKEN_IDENT: { // TODO label goto - if (peektoktype(parser) != TOKEN_COLON) { + if (peek_tok_type(tokbuf) != TOKEN_COLON) { goto EXP; } - node->label_stmt.label = parse_ident(parser); - expecttok(parser, TOKEN_COLON); + node->label_stmt.label = expect_pop_ident(tokbuf); + expect_pop_tok(tokbuf, TOKEN_COLON); node->type = NT_STMT_LABEL; break; } case TOKEN_CASE: { // TODO label switch - poptok(parser); + pop_tok(tokbuf); error("unimplemented switch label"); node->label_stmt.label = parse_expr(parser); // TODO 该表达式为const int - expecttok(parser, TOKEN_COLON); + expect_pop_tok(tokbuf, TOKEN_COLON); node->type = NT_STMT_CASE; break; } case TOKEN_DEFAULT: { // TODO label switch default - poptok(parser); - expecttok(parser, TOKEN_COLON); + pop_tok(tokbuf); + expect_pop_tok(tokbuf, TOKEN_COLON); node->type = NT_STMT_DEFAULT; break; } @@ -226,15 +227,16 @@ struct ASTNode* parse_stmt(struct Parser* parser) { */ EXP: node->expr_stmt.expr_stmt = parse_expr(parser); - flushpeektok(parser); - ttype = peektoktype(parser); + flush_peek_tok(tokbuf); + ttype = peek_tok_type(tokbuf); if (ttype != TOKEN_SEMICOLON) { error("exp must end with \";\""); } - poptok(parser); + pop_tok(tokbuf); node->type = NT_STMT_EXPR; break; } } + return node; } diff --git a/ccompiler/frontend/parser/ast/term.c b/ccompiler/frontend/parser/ast/term.c index bff6b2a..797c45c 100644 --- a/ccompiler/frontend/parser/ast/term.c +++ b/ccompiler/frontend/parser/ast/term.c @@ -2,162 +2,30 @@ #include "../type.h" #include "ast.h" -// /* 状态跳转表定义 */ -// typedef void (*StateHandler)(struct Parser*, struct ASTNode**); - -// enum TypeParseState { -// TPS_BASE_TYPE, // 解析基础类型 (int/char等) -// TPS_QUALIFIER, // 解析限定符 (const/volatile) -// TPS_POINTER, // 解析指针 (*) -// TPS_ARRAY, // 解析数组维度 ([n]) -// TPS_FUNC_PARAMS, // 解析函数参数列表 -// TPS_END, -// }; - -// ; - -// /* 状态处理函数前置声明 */ -// static void handle_base_type(struct Parser*, struct ASTNode**); -// static void handle_qualifier(struct Parser*, struct ASTNode**); -// static void handle_pointer(struct Parser*, struct ASTNode**); -// static void handle_array(struct Parser*, struct ASTNode**); -// static void handle_func_params(struct Parser*, struct ASTNode**); -// static void handle_error(struct Parser*, struct ASTNode**); - -// /* 状态跳转表(核心优化部分) */ -// static const struct StateTransition { -// enum TokenType tok; // 触发token -// StateHandler handler; // 处理函数 -// enum TypeParseState next_state; // 下一个状态 -// } state_table[][8] = { -// [TPS_QUALIFIER] = { -// {TOKEN_CONST, handle_qualifier, TPS_QUALIFIER}, -// {TOKEN_VOLATILE, handle_qualifier, TPS_QUALIFIER}, -// {TOKEN_VOID, handle_base_type, TPS_POINTER}, -// {TOKEN_CHAR, handle_base_type, TPS_POINTER}, -// {TOKEN_INT, handle_base_type, TPS_POINTER}, -// {TOKEN_EOF, handle_error, TPS_QUALIFIER}, -// /* 其他token默认处理 */ -// {0, NULL, TPS_BASE_TYPE} -// }, -// [TPS_BASE_TYPE] = { -// {TOKEN_MUL, handle_pointer, TPS_POINTER}, -// {TOKEN_L_BRACKET, handle_array, TPS_ARRAY}, -// {TOKEN_L_PAREN, handle_func_params,TPS_FUNC_PARAMS}, -// {TOKEN_EOF, NULL, TPS_END}, -// {0, NULL, TPS_POINTER} -// }, -// [TPS_POINTER] = { -// {TOKEN_MUL, handle_pointer, TPS_POINTER}, -// {TOKEN_L_BRACKET, handle_array, TPS_ARRAY}, -// {TOKEN_L_PAREN, handle_func_params,TPS_FUNC_PARAMS}, -// {0, NULL, TPS_END} -// }, -// [TPS_ARRAY] = { -// {TOKEN_L_BRACKET, handle_array, TPS_ARRAY}, -// {TOKEN_L_PAREN, handle_func_params,TPS_FUNC_PARAMS}, -// {0, NULL, TPS_END} -// }, -// [TPS_FUNC_PARAMS] = { -// {0, NULL, TPS_END} -// } -// }; - -// /* 新的类型解析函数 */ -// struct ASTNode* parse_type(struct Parser* p) { -// struct ASTNode* type_root = NULL; -// struct ASTNode** current = &type_root; -// enum TypeParseState state = TPS_QUALIFIER; - -// while (state != TPS_END) { -// enum TokenType t = peektoktype(p); -// const struct StateTransition* trans = state_table[state]; - -// // 查找匹配的转换规则 -// while (trans->tok != 0 && trans->tok != t) { -// trans++; -// } - -// if (trans->handler) { -// trans->handler(p, current); -// } else if (trans->tok == 0) { // 默认规则 -// state = trans->next_state; -// continue; -// } else { -// error("syntax error type parse error"); -// } - -// state = trans->next_state; -// } - -// return type_root; -// } - -// /* 具体状态处理函数实现 */ -// static void handle_qualifier(struct Parser* p, struct ASTNode** current) { -// struct ASTNode* node = new_ast_node(); -// node->node_type = NT_TYPE_QUAL; -// node->data.data_type = poptok(p).type; - -// if (*current) { -// (*current)->child.decl.type = node; -// } else { -// *current = node; -// } -// } - -// static void handle_base_type(struct Parser* p, struct ASTNode** current) { -// struct ASTNode* node = new_ast_node(); -// node->node_type = NT_TYPE_BASE; -// node->data.data_type = poptok(p).type; - -// // 链接到当前节点链的末端 -// while (*current && (*current)->child.decl.type) { -// current = &(*current)->child.decl.type; -// } - -// if (*current) { -// (*current)->child.decl.type = node; -// } else { -// *current = node; -// } -// } - -// static void handle_pointer(struct Parser* p, struct ASTNode** current) { -// poptok(p); // 吃掉* -// struct ASTNode* node = new_ast_node(); -// node->node_type = NT_TYPE_PTR; - -// // 插入到当前节点之前 -// node->child.decl.type = *current; -// *current = node; -// } - -// /* 其他处理函数类似实现... */ - -struct ASTNode* parser_ident_without_pop(struct Parser* parser) { - flushpeektok(parser); - struct Token* tok = peektok(parser); +ast_node_t* new_ast_ident_node(tok_t* tok) { if (tok->type != TOKEN_IDENT) { error("syntax error: want identifier but got %d", tok->type); } - struct ASTNode* node = new_ast_node(); + ast_node_t* node = new_ast_node(); node->type = NT_TERM_IDENT; node->syms.tok = *tok; node->syms.decl_node = NULL; return node; } -struct ASTNode* parse_ident(struct Parser* parser) { - struct ASTNode* node = parser_ident_without_pop(parser); - poptok(parser); +ast_node_t* expect_pop_ident(tok_buf_t* tokbuf) { + flush_peek_tok(tokbuf); + tok_t* tok = peek_tok(tokbuf); + ast_node_t* node = new_ast_ident_node(tok); + pop_tok(tokbuf); return node; } -struct ASTNode* parse_type(struct Parser* parser) { - flushpeektok(parser); - enum TokenType ttype = peektoktype(parser); - enum DataType dtype; +ast_node_t* parse_type(parser_t* parser) { + tok_buf_t* tokbuf = &parser->tokbuf; + flush_peek_tok(tokbuf); + tok_type_t ttype = peek_tok_type(tokbuf); + data_type_t dtype; switch(ttype) { case TOKEN_VOID: dtype = TYPE_VOID; break; case TOKEN_CHAR: dtype = TYPE_CHAR; break; @@ -170,13 +38,14 @@ struct ASTNode* parse_type(struct Parser* parser) { error("无效的类型说明符"); } - struct ASTNode* node = new_ast_node(); + ast_node_t* node = new_ast_node(); node->type = NT_TERM_TYPE; - // node->data.data_type = dtype; - poptok(parser); + // TODO added by disable warning, will add typing system + dtype += 1; + pop_tok(tokbuf); - if (peektoktype(parser) == TOKEN_MUL) { - poptok(parser); + if (peek_tok_type(tokbuf) == TOKEN_MUL) { + pop_tok(tokbuf); } return node; } diff --git a/ccompiler/frontend/parser/ast/type.c b/ccompiler/frontend/parser/ast/type.c index 6e10717..582abc1 100644 --- a/ccompiler/frontend/parser/ast/type.c +++ b/ccompiler/frontend/parser/ast/type.c @@ -1,136 +1,136 @@ -#include "../parser.h" -#include "../type.h" +// #include "../parser.h" +// #include "../type.h" -enum TypeParseState { - TPS_BASE_TYPE, // 解析基础类型 (int/char等) - TPS_QUALIFIER, // 解析限定符 (const/volatile) - TPS_POINTER, // 解析指针 (*) - TPS_ARRAY, // 解析数组维度 ([n]) - TPS_FUNC_PARAMS // 解析函数参数列表 -}; +// enum TypeParseState { +// TPS_BASE_TYPE, // 解析基础类型 (int/char等) +// TPS_QUALIFIER, // 解析限定符 (const/volatile) +// TPS_POINTER, // 解析指针 (*) +// TPS_ARRAY, // 解析数组维度 ([n]) +// TPS_FUNC_PARAMS // 解析函数参数列表 +// }; -struct ASTNode* parse_type(struct Parser* p) { - struct ASTNode* type_root = new_ast_node(); - struct ASTNode* current = type_root; - current->type = NT_TYPE_BASE; +// ast_node_t* parse_type(parser_t* p) { +// ast_node_t* type_root = new_ast_node(); +// ast_node_t* current = type_root; +// current->type = NT_TYPE_BASE; - enum TypeParseState state = TPS_QUALIFIER; - int pointer_level = 0; +// enum TypeParseState state = TPS_QUALIFIER; +// int pointer_level = 0; - while (1) { - enum TokenType t = peektoktype(p); +// while (1) { +// tok_type_t t = peektoktype(p); - switch (state) { - // 基础类型解析 (int, char等) - case TPS_BASE_TYPE: - if (is_base_type(t)) { - // current->data.data_type = token_to_datatype(t); - poptok(p); - state = TPS_POINTER; - } else { - error("Expected type specifier"); - } - break; +// switch (state) { +// // 基础类型解析 (int, char等) +// case TPS_BASE_TYPE: +// if (is_base_type(t)) { +// // current->data.data_type = token_to_datatype(t); +// pop_tok(p); +// state = TPS_POINTER; +// } else { +// error("Expected type specifier"); +// } +// break; - // 类型限定符 (const/volatile) - case TPS_QUALIFIER: - if (t == TOKEN_CONST || t == TOKEN_VOLATILE) { - struct ASTNode* qual_node = new_ast_node(); - qual_node->type = NT_TYPE_QUAL; - qual_node->data.data_type = t; // 复用data_type字段存储限定符 - current->child.decl.type = qual_node; - current = qual_node; - poptok(p); - } else { - state = TPS_BASE_TYPE; - } - break; +// // 类型限定符 (const/volatile) +// case TPS_QUALIFIER: +// if (t == TOKEN_CONST || t == TOKEN_VOLATILE) { +// ast_node_t* qual_node = new_ast_node(); +// qual_node->type = NT_TYPE_QUAL; +// qual_node->data.data_type = t; // 复用data_type字段存储限定符 +// current->child.decl.type = qual_node; +// current = qual_node; +// pop_tok(p); +// } else { +// state = TPS_BASE_TYPE; +// } +// break; - // 指针解析 (*) - case TPS_POINTER: - if (t == TOKEN_MUL) { - struct ASTNode* ptr_node = new_ast_node(); - ptr_node->type = NT_TYPE_PTR; - current->child.decl.type = ptr_node; - current = ptr_node; - pointer_level++; - poptok(p); - } else { - state = TPS_ARRAY; - } - break; +// // 指针解析 (*) +// case TPS_POINTER: +// if (t == TOKEN_MUL) { +// ast_node_t* ptr_node = new_ast_node(); +// ptr_node->type = NT_TYPE_PTR; +// current->child.decl.type = ptr_node; +// current = ptr_node; +// pointer_level++; +// pop_tok(p); +// } else { +// state = TPS_ARRAY; +// } +// break; - // 数组维度 ([n]) - case TPS_ARRAY: - if (t == TOKEN_L_BRACKET) { - poptok(p); // 吃掉[ - struct ASTNode* arr_node = new_ast_node(); - arr_node->type = NT_TYPE_ARRAY; +// // 数组维度 ([n]) +// case TPS_ARRAY: +// if (t == TOKEN_L_BRACKET) { +// pop_tok(p); // 吃掉[ +// ast_node_t* arr_node = new_ast_node(); +// arr_node->type = NT_TYPE_ARRAY; - // 解析数组大小(仅语法检查) - if (peektoktype(p) != TOKEN_R_BRACKET) { - parse_expr(p); // 不计算实际值 - } +// // 解析数组大小(仅语法检查) +// if (peektoktype(p) != TOKEN_R_BRACKET) { +// parse_expr(p); // 不计算实际值 +// } - expecttok(p, TOKEN_R_BRACKET); - current->child.decl.type = arr_node; - current = arr_node; - } else { - state = TPS_FUNC_PARAMS; - } - break; +// expecttok(p, TOKEN_R_BRACKET); +// current->child.decl.type = arr_node; +// current = arr_node; +// } else { +// state = TPS_FUNC_PARAMS; +// } +// break; - // 函数参数列表 - case TPS_FUNC_PARAMS: - if (t == TOKEN_L_PAREN) { - struct ASTNode* func_node = new_ast_node(); - func_node->type = NT_TYPE_FUNC; - current->child.decl.type = func_node; +// // 函数参数列表 +// case TPS_FUNC_PARAMS: +// if (t == TOKEN_L_PAREN) { +// ast_node_t* func_node = new_ast_node(); +// func_node->type = NT_TYPE_FUNC; +// current->child.decl.type = func_node; - // 解析参数列表(仅结构,不验证类型) - parse_param_list(p, func_node); - current = func_node; - } else { - return type_root; // 类型解析结束 - } - break; - } - } -} - -// 判断是否是基础类型 -static int is_base_type(enum TokenType t) { - return t >= TOKEN_VOID && t <= TOKEN_DOUBLE; -} - -// // 转换token到数据类型(简化版) -// static enum DataType token_to_datatype(enum TokenType t) { -// static enum DataType map[] = { -// [TOKEN_VOID] = DT_VOID, -// [TOKEN_CHAR] = DT_CHAR, -// [TOKEN_INT] = DT_INT, -// // ...其他类型映射 -// }; -// return map[t]; +// // 解析参数列表(仅结构,不验证类型) +// parse_param_list(p, func_node); +// current = func_node; +// } else { +// return type_root; // 类型解析结束 +// } +// break; +// } +// } // } -// 解析参数列表(轻量级) -static void parse_param_list(struct Parser* p, struct ASTNode* func) { - expecttok(p, TOKEN_L_PAREN); - - while (peektoktype(p) != TOKEN_R_PAREN) { - struct ASTNode* param = parse_type(p); // 递归解析类型 - - // 允许可选参数名(仅语法检查) - if (peektoktype(p) == TOKEN_IDENT) { - poptok(p); // 吃掉参数名 - } - - if (peektoktype(p) == TOKEN_COMMA) { - poptok(p); - } - } - - expecttok(p, TOKEN_R_PAREN); -} +// // 判断是否是基础类型 +// static int is_base_type(tok_type_t t) { +// return t >= TOKEN_VOID && t <= TOKEN_DOUBLE; +// } + +// // // 转换token到数据类型(简化版) +// // static enum DataType token_to_datatype(tok_type_t t) { +// // static enum DataType map[] = { +// // [TOKEN_VOID] = DT_VOID, +// // [TOKEN_CHAR] = DT_CHAR, +// // [TOKEN_INT] = DT_INT, +// // // ...其他类型映射 +// // }; +// // return map[t]; +// // } + +// // 解析参数列表(轻量级) +// static void parse_param_list(parser_t* p, ast_node_t* func) { +// expecttok(p, TOKEN_L_PAREN); + +// while (peektoktype(p) != TOKEN_R_PAREN) { +// ast_node_t* param = parse_type(p); // 递归解析类型 + +// // 允许可选参数名(仅语法检查) +// if (peektoktype(p) == TOKEN_IDENT) { +// pop_tok(p); // 吃掉参数名 +// } + +// if (peektoktype(p) == TOKEN_COMMA) { +// pop_tok(p); +// } +// } + +// expecttok(p, TOKEN_R_PAREN); +// } diff --git a/ccompiler/frontend/parser/parser.c b/ccompiler/frontend/parser/parser.c index d8cceb8..bbeb3a7 100644 --- a/ccompiler/frontend/parser/parser.c +++ b/ccompiler/frontend/parser/parser.c @@ -1,67 +1,17 @@ #include "parser.h" #include "type.h" -#include "ast/ast.h" -int poptok(struct Parser* parser) { - if (parser->size == 0) { - return -1; - } - int idx = parser->cur_idx; - parser->cur_idx = (idx + 1) % PARSER_MAX_TOKEN_QUEUE; - parser->size--; - return 0; -} - -void flushpeektok(struct Parser* parser) { - parser->peek_idx = parser->cur_idx; -} - -struct Token* peektok(struct Parser* parser) { - int idx = parser->peek_idx; - idx = (idx + 1) % PARSER_MAX_TOKEN_QUEUE; - if (parser->size >= PARSER_MAX_TOKEN_QUEUE) { - warn("peek maybe too deep"); - } - if (parser->peek_idx == parser->end_idx) { - if (parser->size == PARSER_MAX_TOKEN_QUEUE) { - // FIXME - error("buffer overflow"); - } - get_valid_token(parser->lexer, &(parser->TokenBuffer[idx])); - parser->size++; - parser->end_idx = idx; - } - - parser->peek_idx = idx; - return &(parser->TokenBuffer[idx]); -} - -enum TokenType peektoktype(struct Parser* parser) { - return peektok(parser)->type; -} - -void expecttok(struct Parser* parser, enum TokenType type) { - struct Token* tok = peektok(parser); - if (tok->type != type) { - error("expected tok: %s, got %s", get_token_name(type), get_token_name(tok->type)); - } else { - poptok(parser); - } -} - -void init_parser(struct Parser* parser, struct Lexer* lexer, struct SymbolTable* symtab) { +void init_parser(parser_t* parser, lexer_t* lexer, symtab_t* symtab) { parser->cur_node = NULL; parser->root = NULL; - parser->cur_idx = 0; - parser->peek_idx = 0; - parser->end_idx = 0; - parser->size = 0; parser->lexer = lexer; parser->symtab = symtab; - // TODO + init_tokbuf(&parser->tokbuf, lexer, (get_tokbuf_func)get_valid_token); + parser->tokbuf.cap = sizeof(parser->TokenBuffer) / sizeof(parser->TokenBuffer[0]); + parser->tokbuf.buf = parser->TokenBuffer; } -void run_parser(struct Parser* parser) { +void run_parser(parser_t* parser) { parse_prog(parser); } diff --git a/ccompiler/frontend/parser/parser.h b/ccompiler/frontend/parser/parser.h index b409393..ede7ceb 100644 --- a/ccompiler/frontend/parser/parser.h +++ b/ccompiler/frontend/parser/parser.h @@ -2,32 +2,24 @@ #define __PARSER_H__ #include "../frontend.h" -#include "../lexer/lexer.h" -// #include "symbol_table/symtab.h" -// #include "ast/ast.h" +#include "../lexer/lexer.h" +typedef struct lexer lexer_t; +typedef struct symtab symtab_t; #define PARSER_MAX_TOKEN_QUEUE 16 -struct Parser { +typedef struct parser { struct ASTNode* root; struct ASTNode* cur_node; - struct Lexer* lexer; - struct SymbolTable* symtab; - int cur_idx; - int peek_idx; - int end_idx; - int size; - struct Token TokenBuffer[PARSER_MAX_TOKEN_QUEUE]; + lexer_t* lexer; + symtab_t* symtab; + tok_buf_t tokbuf; + tok_t TokenBuffer[PARSER_MAX_TOKEN_QUEUE]; int err_level; -}; +} parser_t; -void init_parser(struct Parser* parser, struct Lexer* lexer, struct SymbolTable* symtab); -void run_parser(struct Parser* parser); -void flushpeektok(struct Parser* parser); -int poptok(struct Parser* parser); -struct Token* peektok(struct Parser* parser); -enum TokenType peektoktype(struct Parser* parser); -void expecttok(struct Parser* parser, enum TokenType type); +void init_parser(parser_t* parser, lexer_t* lexer, symtab_t* symtab); +void run_parser(parser_t* parser); #endif diff --git a/ccompiler/frontend/parser/symtab/symtab.c b/ccompiler/frontend/parser/symtab/symtab.c index 026632e..5cab0cc 100644 --- a/ccompiler/frontend/parser/symtab/symtab.c +++ b/ccompiler/frontend/parser/symtab/symtab.c @@ -3,25 +3,25 @@ #include "scope.h" #include "symtab.h" -typedef struct SymbolTable SymbolTable; +typedef symtab_t symtab_t; typedef struct Scope Scope; -void init_symtab(SymbolTable* symtab) { +void init_symtab(symtab_t* symtab) { symtab->global_scope = scope_create(NULL); symtab->cur_scope = symtab->global_scope; } -void del_symtab(SymbolTable* symtab) { +void del_symtab(symtab_t* symtab) { scope_destroy(symtab->global_scope); } -void symtab_enter_scope(SymbolTable* symtab) { +void symtab_enter_scope(symtab_t* symtab) { struct Scope* scope = scope_create(symtab->cur_scope); scope->base_offset = symtab->cur_scope->base_offset + symtab->cur_scope->cur_offset; symtab->cur_scope = scope; } -void symtab_leave_scope(SymbolTable* symtab) { +void symtab_leave_scope(symtab_t* symtab) { Scope * scope = symtab->cur_scope; if (scope == NULL) { error("cannot leave NULL scope or global scope"); @@ -30,16 +30,20 @@ void symtab_leave_scope(SymbolTable* symtab) { scope_destroy(scope); } -void symtab_add_symbol(SymbolTable* symtab, const char* name, void* ast_node) { +void* symtab_add_symbol(symtab_t* symtab, const char* name, void* ast_node, int can_duplicate) { struct Scope* scope = symtab->cur_scope; - if (scope_lookup_current(scope, name) != NULL) { - // TODO WARNING - // return NULL; + void* node = scope_lookup_current(scope, name); + if (node != NULL) { + if (!can_duplicate) { + error("duplicate symbol %s", name); + } + return node; } scope_insert(scope, name, ast_node); + return node; } -void* symtab_lookup_symbol(SymbolTable* symtab, const char* name) { +void* symtab_lookup_symbol(symtab_t* symtab, const char* name) { return scope_lookup(symtab->cur_scope, name); } diff --git a/ccompiler/frontend/parser/symtab/symtab.h b/ccompiler/frontend/parser/symtab/symtab.h index 8f0a6a6..97d7c00 100644 --- a/ccompiler/frontend/parser/symtab/symtab.h +++ b/ccompiler/frontend/parser/symtab/symtab.h @@ -2,17 +2,17 @@ #ifndef __SYMTAB_H__ #define __SYMTAB_H__ -struct SymbolTable { +typedef struct symtab { struct Scope* cur_scope; struct Scope* global_scope; -}; +} symtab_t; -void init_symtab(struct SymbolTable* symtab); -void del_symtab(struct SymbolTable* symtab); +void init_symtab(symtab_t* symtab); +void del_symtab(symtab_t* symtab); -void symtab_enter_scope(struct SymbolTable* symtab); -void symtab_leave_scope(struct SymbolTable* symtab); -void symtab_add_symbol(struct SymbolTable* symtab, const char* name, void* ast_node); -void* symtab_lookup_symbol(struct SymbolTable* symtab, const char* name); +void symtab_enter_scope(symtab_t* symtab); +void symtab_leave_scope(symtab_t* symtab); +void* symtab_add_symbol(symtab_t* symtab, const char* name, void* ast_node, int can_duplicate); +void* symtab_lookup_symbol(symtab_t* symtab, const char* name); #endif diff --git a/ccompiler/frontend/parser/tests/test_parser.c b/ccompiler/frontend/parser/tests/test_parser.c index adfd084..b03ddcc 100644 --- a/ccompiler/frontend/parser/tests/test_parser.c +++ b/ccompiler/frontend/parser/tests/test_parser.c @@ -23,7 +23,7 @@ int main(int argc, char** argv) { struct SymbolTable symtab; init_symtab(&symtab); - struct Parser parser; + struct parser parser; init_parser(&parser, &lexer, &symtab); parse_prog(&parser); diff --git a/ccompiler/frontend/parser/type.h b/ccompiler/frontend/parser/type.h index 7c22572..b45a991 100644 --- a/ccompiler/frontend/parser/type.h +++ b/ccompiler/frontend/parser/type.h @@ -3,7 +3,7 @@ #include "../lexer/token.h" -enum DataType { +typedef enum { TYPE_VOID, TYPE_CHAR, TYPE_SHORT, @@ -30,6 +30,6 @@ enum DataType { TYPE_ATOMIC, TYPE_TYPEDEF, -}; +} data_type_t; #endif diff --git a/ccompiler/middleend/Makefile b/ccompiler/middleend/Makefile new file mode 100644 index 0000000..e69de29 diff --git a/ccompiler/middleend/ir.c b/ccompiler/middleend/ir.c index 98be859..479a15a 100644 --- a/ccompiler/middleend/ir.c +++ b/ccompiler/middleend/ir.c @@ -5,48 +5,61 @@ typedef struct ASTNode ASTNode; // 上下文结构,记录生成过程中的状态 typedef struct { - ir_func_t* current_func; // 当前处理的函数 - ir_bblock_t* current_block; // 当前基本块 - uint32_t vreg_counter; // 虚拟寄存器计数器 + ir_func_t* cur_func; // 当前处理的函数 + ir_bblock_t* cur_block; // 当前基本块 } IRGenContext; IRGenContext ctx; ir_prog_t prog; ir_type_t type_i32 = { .tag = IR_TYPE_INT32, }; + static inline void init_ir_node_t(ir_node_t* node) { + node->name = NULL; + node->type = NULL; vector_init(node->used_by); } -static inline ir_node_t* new_ir_node_t() { +static inline ir_node_t* new_irnode() { ir_node_t* node = xmalloc(sizeof(ir_node_t)); init_ir_node_t(node); } +static inline ir_bblock_t* new_irbblock(const char* name) { + ir_bblock_t* block = xmalloc(sizeof(ir_bblock_t)); + block->label = name; + vector_init(block->instrs); + return block; +} + ir_node_t* emit_instr(ir_bblock_t* block) { - if (block == NULL) block = ctx.current_block; - ir_node_t *node = new_ir_node_t(); + if (block == NULL) block = ctx.cur_block; + ir_node_t *node = new_irnode(); vector_push(block->instrs, node); return vector_at(block->instrs, block->instrs.size - 1); } -void emit_br(ir_node_t cond, const char* true_lable, const char* false_lable) { - ir_node_t br = { - .tag = IR_NODE_RET, - .data = { +ir_node_t* emit_br(ir_node_t* cond, ir_bblock_t* trueb, ir_bblock_t* falseb) { + ir_node_t* br = emit_instr(NULL); + *br = (ir_node_t) { + .tag = IR_NODE_BRANCH, + .data.branch = { + .cond = cond, + .true_bblock = trueb, + .false_bblock = falseb, } }; - // emit_instr(br, NULL); + return br; } ir_node_t* gen_ir_expr(ASTNode* node) { switch (node->type) { case NT_TERM_VAL: { - ir_node_t* ir = new_ir_node_t(); + ir_node_t* ir = new_irnode(); *ir = (ir_node_t) { .tag = IR_NODE_CONST_INT, .data.const_int = { - .val = node->syms.tok.constant.i, + .val = node->syms.tok.val.i, }, }; return ir; @@ -56,15 +69,18 @@ ir_node_t* gen_ir_expr(ASTNode* node) { return decl; } case NT_TERM_CALL: { - // TODO - ir_node_t* ir = new_ir_node_t(); + ir_node_t* ir = emit_instr(NULL); *ir = (ir_node_t) { .tag = IR_NODE_CALL, .data.call = { - .callee = NULL, + .callee = node->call.func_decl->decl_func.def->func.data, }, }; vector_init(ir->data.call.args); + for (int i = 0; i < node->call.params->params.params.size; i++) { + vector_push(ir->data.call.args, \ + gen_ir_expr(node->call.params->params.params.data[i])); + } return ir; } default: @@ -191,42 +207,75 @@ NEXT: } return ret; } +static ir_func_t* new_irfunc(const char* name) { + ir_func_t *func = xmalloc(sizeof(ir_func_t)); + + vector_init(func->bblocks); + vector_init(func->params); + *func = (ir_func_t) { + .name = name, + // TODO typing system + .type = &type_i32, + }; + return func; +} + +static void gen_ir_func(ASTNode* node, ir_func_t* func) { + assert(node->type == NT_FUNC); + ir_bblock_t *entry = new_irbblock("entry"); + vector_push(func->bblocks, entry); + + vector_push(prog.funcs, func); + IRGenContext prev_ctx = ctx; + ctx.cur_func = func; + ctx.cur_block = entry; + + ast_node_t* params = node->func.decl->decl_func.params; + for (int i = 0; i < params->params.params.size; i ++) { + ir_node_t* decl = emit_instr(entry); + ast_node_t* param = params->params.params.data[i]; + vector_push(func->params, decl); + *decl = (ir_node_t) { + .tag = IR_NODE_ALLOC, + .name = param->decl_val.name->syms.tok.val.str, + .type = &type_i32, + }; + param->decl_val.data = decl; + } + gen_ir_from_ast(node->func.body); + + ctx = prev_ctx; +} void gen_ir_from_ast(struct ASTNode* node) { switch (node->type) { case NT_ROOT: { - for (int i = 0; i < node->root.child_size; i ++) { - gen_ir_from_ast(node->root.children[i]); + for (int i = 0; i < node->root.children.size; i ++) { + gen_ir_from_ast(node->root.children.data[i]); } - } break; + break; + } + case NT_DECL_FUNC: { + ir_func_t* func = new_irfunc(node->decl_func.name->syms.tok.val.str); + if (node->decl_func.def == NULL) { + ast_node_t* def = new_ast_node(); + def->func.body = NULL; + def->func.decl = node; + node->decl_func.def = def; + vector_push(prog.extern_funcs, func); + } + node->decl_func.def->func.data = func; + break; + } case NT_FUNC: { - ir_func_t *func = xmalloc(sizeof(ir_func_t)); - *func = (ir_func_t) { - .name = node->func.name->syms.tok.constant.str, - }; - vector_init(func->bblocks); - - ir_bblock_t *entry = xmalloc(sizeof(ir_bblock_t)); - *entry = (ir_bblock_t) { - .label = "entry", - }; - vector_init(entry->instrs); - vector_push(func->bblocks, entry); - - IRGenContext prev_ctx = ctx; - ctx = (IRGenContext) { - .current_func = func, - .current_block = vector_at(func->bblocks, 0), - .vreg_counter = 0, - }; - - gen_ir_from_ast(node->func.body); - - ctx = prev_ctx; - vector_push(prog.funcs, func); - } break; + gen_ir_func(node, node->func.data); + break; + } case NT_STMT_RETURN: { - ir_node_t* ret = gen_ir_expr(node->return_stmt.expr_stmt); + ir_node_t* ret = NULL; + if (node->return_stmt.expr_stmt != NULL) { + ret = gen_ir_expr(node->return_stmt.expr_stmt); + } ir_node_t* ir = emit_instr(NULL); *ir = (ir_node_t) { .tag = IR_NODE_RET, @@ -236,22 +285,54 @@ void gen_ir_from_ast(struct ASTNode* node) { } } }; + + vector_push(ctx.cur_func->bblocks, new_irbblock(NULL)); + break; + } + case NT_STMT_BLOCK: { + gen_ir_from_ast(node->block_stmt.block); break; } case NT_BLOCK: { - for (int i = 0; i < node->block.child_size; i ++) { - gen_ir_from_ast(node->block.children[i]); + for (int i = 0; i < node->block.children.size; i ++) { + gen_ir_from_ast(node->block.children.data[i]); } break; } case NT_STMT_IF: { ir_node_t *cond = gen_ir_expr(node->if_stmt.cond); + ir_bblock_t* trueb = new_irbblock("true_block"); + ir_bblock_t* falseb = new_irbblock("false_block"); + emit_br(cond, trueb, falseb); - // xmalloc(); - // ir_bblock_t then_block = { - // }; - node->if_stmt.if_stmt; - node->if_stmt.else_stmt; + vector_push(ctx.cur_func->bblocks, trueb); + ctx.cur_block = trueb; + gen_ir_from_ast(node->if_stmt.if_stmt); + ir_node_t* jmp = emit_instr(NULL); + + if (node->if_stmt.else_stmt != NULL) { + vector_push(ctx.cur_func->bblocks, falseb); + ctx.cur_block = falseb; + gen_ir_from_ast(node->if_stmt.else_stmt); + ir_node_t* jmp = emit_instr(NULL); + + ctx.cur_block = new_irbblock("jmp_block"); + vector_push(ctx.cur_func->bblocks, ctx.cur_block); + *jmp = (ir_node_t) { + .tag = IR_NODE_JUMP, + .data.jump = { + .target_bblock = ctx.cur_block, + }, + }; + } else { + ctx.cur_block = falseb; + } + *jmp = (ir_node_t) { + .tag = IR_NODE_JUMP, + .data.jump = { + .target_bblock = ctx.cur_block, + }, + }; break; } case NT_STMT_WHILE: { @@ -275,7 +356,7 @@ void gen_ir_from_ast(struct ASTNode* node) { ir_node_t* ret_node = emit_instr(NULL); *ret_node = (ir_node_t) { .tag = IR_NODE_ALLOC, - .name = node->decl_val.name->syms.tok.constant.str, + .name = node->decl_val.name->syms.tok.val.str, .type = &type_i32, }; node->decl_val.data = ret_node; diff --git a/ccompiler/middleend/ir.h b/ccompiler/middleend/ir.h index 0c96308..b06891c 100644 --- a/ccompiler/middleend/ir.h +++ b/ccompiler/middleend/ir.h @@ -54,6 +54,7 @@ typedef struct { typedef struct { vector_header(global, ir_node_t*); vector_header(funcs, ir_func_t*); + vector_header(extern_funcs, ir_func_t*); } ir_prog_t; struct ir_node { @@ -131,15 +132,15 @@ struct ir_node { } op; struct { ir_node_t* cond; - ir_bblock_t true_bblock; - ir_bblock_t false_bblock; + ir_bblock_t* true_bblock; + ir_bblock_t* false_bblock; } branch; struct { - ir_bblock_t target_bblock; + ir_bblock_t* target_bblock; } jump; struct { - ir_func_t callee; - vector_header(args, ir_node_t); + ir_func_t* callee; + vector_header(args, ir_node_t*); } call; struct { ir_node_t* ret_val; diff --git a/ccompiler/middleend/tests/test_file.c b/ccompiler/middleend/tests/test_file.c index 1886a0a..045475b 100644 --- a/ccompiler/middleend/tests/test_file.c +++ b/ccompiler/middleend/tests/test_file.c @@ -1,5 +1,7 @@ -int main(void) { - int a; - a = 1 + 2 * 3; - return a; +int add(int a, int b) { + return a + b; +} + +int main(void) { + return add(1, 2); } diff --git a/test_rv_vm/README.md b/test_rv_vm/README.md new file mode 100644 index 0000000..2cab07b --- /dev/null +++ b/test_rv_vm/README.md @@ -0,0 +1,192 @@ +# riscv_emufun (mini-rv32ima) + +Click below for the YouTube video introducing this project: + +[![Writing a Really Tiny RISC-V Emulator](https://img.youtube.com/vi/YT5vB3UqU_E/0.jpg)](https://www.youtube.com/watch?v=YT5vB3UqU_E) [![But Will It Run Doom?](https://img.youtube.com/vi/uZMNK17VCMU/0.jpg)](https://www.youtube.com/watch?v=uZMNK17VCMU) + +## What + +mini-rv32ima is a single-file-header, [mini-rv32ima.h](https://github.com/cnlohr/riscv_emufun/blob/master/mini-rv32ima/mini-rv32ima.h), in the [STB Style library](https://github.com/nothings/stb) that: + * Implements a RISC-V **rv32ima/Zifencei†+Zicsr** (and partial su), with CLINT and MMIO. + * Is about **400 lines** of actual code. + * Has **no dependencies**, not even libc. + * Is **easily extensible**. So you can easily add CSRs, instructions, MMIO, etc! + * Is pretty **performant**. (~450 coremark on my laptop, about 1/2 the speed of QEMU) + * Is human-readable and in **basic C** code. + * Is "**incomplete**" in that it didn't implement the tons of the spec that Linux doesn't (and you shouldn't) use. + * Is trivially **embeddable** in applications. + +It has a [demo wrapper](https://github.com/cnlohr/riscv_emufun/blob/master/mini-rv32ima/mini-rv32ima.c) that: + * Implements a CLI, SYSCON, UART, DTB and Kernel image loading. + * And it only around **250 lines** of code, itself. + * Compiles down to a **~18kB executable** and only relies on libc. + +†: Zifence+RV32A are stubbed. So, tweaks will need to be made if you want to emulate a multiprocessor system with this emulator. + +Just see the `mini-rv32ima` folder. + +It's "fully functional" now in that I can run Linux, apps, etc. Compile flat binaries and drop them in an image. + +## Why + +I'm working on a really really simple C Risc-V emulator. So simple it doesn't even have an MMU (Memory Management Unit). I have a few goals, they include: + * Furthering RV32-NOMMU work to improve Linux support for RV32-NOMMU. (Imagine if we could run Linux on the $1 ESP32-C3) + * Learning more about RV32 and writing emulators. + * Being further inspired by @pimaker's amazing work on [Running Linux in a Pixel Shader](https://blog.pimaker.at/texts/rvc1/) and having the sneaking suspicion performance could be even better! + * Hoping to port it to some weird places. + * Understand the *most simplistic* system you can run Linux on and trying to push that boundary. + * Continue to include my [education of people about assembly language](https://www.youtube.com/watch?v=Gelf0AyVGy4). + +## How + +Windows instructions (Just playing with the image) + * Clone this repo. + * Install or have TinyCC. [Powershell Installer](https://github.com/cntools/Install-TCC) or [Regular Windows Installer](https://github.com/cnlohr/tinycc-win64-installer/releases/tag/v0_0.9.27) + * Run `winrun.ps` in the `windows` folder. + +WSL (For full toolchain and image build: + * You will need to remove all spaces from your path i.e. `export PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/mnt/c/Windows/system32:/snap/bin` and continue the instructions. P.S. What in the world was Windows thinking, putting a space between "Program" and "Files"??!? + +Linux instructions (both): + * Clone this repo. + * Install `git build-essential` and/or whatever other requirements are in place for [buildroot](https://buildroot.org/). + * `make testdlimage` + * It automatically downloads the image (~1MB) and runs the emulator. + * Should be up and running in about 2.5s depending on internet speed. + +You can do in-depth work on Linux by: + * `make everything` + +If you want to play with the bare metal system, see below, or if you have the toolchain installed, just: + * `make testbare` + +If you just want to play emdoom, and use the prebuilt image: + * On Windows, run `windows\winrundoom.ps1` + * On Linux, `cd mini-rv32ima`, and type `make testdoom` + +## Questions? + * Why not rv64? + * Because then I can't run it as easily in a pixel shader if I ever hope to. + * Can I add an MMU? + * Yes. It actually probably wouldn't be too difficult. + * Should I add an MMU? + * No. It is important to further support for nommu systems to empower minimal Risc-V designs! + +Everything else: Contact us on my Discord: https://discord.com/invite/CCeyWyZ + +## How do I use this in my own project? + +You shoud not need to modify `mini-rv32ima.h`, but instead, use `mini-rv32ima.c` as a template for what you are trying to do in your own project. + +You can override all functionality by defining the following macros. Here are examples of what `mini-rv32ima.c` does with them. You can see the definition of the functions, or augment their definitions, by altering `mini-rv32ima.c`. + +| Macro | Definition / Comment | +| --- | --- | +| `MINIRV32WARN( x... )` | `printf( x );`
Warnings emitted from mini-rv32ima.h | +| `MINIRV32_DECORATE` | `static`
How to decorate the functions. | +| `MINI_RV32_RAM_SIZE` | `ram_amt`
A variable, how big is system RAM? | +| `MINIRV32_IMPLEMENTATION` | If using mini-rv32ima.h, need to define this. | +| `MINIRV32_POSTEXEC( pc, ir, retval )` | `{ if( retval > 0 ) { if( fail_on_all_faults ) { printf( "FAULT\n" ); return 3; } else retval = HandleException( ir, retval ); } }`
If you want to execute something every time slice. | +| `MINIRV32_HANDLE_MEM_STORE_CONTROL( addy, val )` | `if( HandleControlStore( addy, val ) ) return val;`
Called on non-RAM memory access. | +| `MINIRV32_HANDLE_MEM_LOAD_CONTROL( addy, rval )` | `rval = HandleControlLoad( addy );`
Called on non-RAM memory access return a value. | +| `MINIRV32_OTHERCSR_WRITE( csrno, value )` | `HandleOtherCSRWrite( image, csrno, value );`
You can use CSRs for control requests. | +| `MINIRV32_OTHERCSR_READ( csrno, value )` | `value = HandleOtherCSRRead( image, csrno );`
You can use CSRs for control requests. | + +## Hopeful goals? + * Further drive down needed features to run Linux. + * Remove need for RV32A extension on systems with only one CPU. + * Support for relocatable ELF executables. + * Add support for an unreal UART. One that's **much** simpler than the current 8250 driver. + * Maybe run this in a pixelshader too! + * Get opensbi working with this. + * Be able to "embed" rv32 emulators in random projects. + * Can I use early console to be a full system console? + * Can I increase the maximum contiguous memory allocatable? + +## Special Thanks + * For @regymm and their [patches to buildroot](https://github.com/regymm/buildroot) and help! + * callout: Regymm's [quazisoc project](https://github.com/regymm/quasiSoC/). + * Buildroot (For being so helpful). + * @vowstar and their team working on [k210-linux-nommu](https://github.com/vowstar/k210-linux-nommu). + * This [guide](https://jborza.com/emulation/2020/04/09/riscv-environment.html) + * [rvcodecjs](https://luplab.gitlab.io/rvcodecjs/) I probably went through over 1,000 codes here. + * @splinedrive from the [KianV RISC-V noMMU SoC](https://github.com/splinedrive/kianRiscV/tree/master/linux_socs/kianv_harris_mcycle_edition?s=09) project. + +## More details + +If you want to build the kernel yourself: + * `make everything` + * About 20 minutes. (Or 4+ hours if you're on [Windows Subsytem for Linux 2](https://github.com/microsoft/WSL/issues/4197)) + * And you should be dropped into a Linux busybox shell with some little tools that were compiled here. + +## Emdoom notes + * Emdoom building is in the `experiments/emdoom` folder + * You *MUST* build your kernel with `MAX_ORDER` set to >12 in `buildroot/output/build/linux-5.19/include/linux/mmzone.h` if you are building your own image. + * You CAN use the pre-existing image that is described above. + * On Windows, it will be very slow. Not sure why. + +If you want to use bare metal to build your binaries so you don't need buildroot, you can use the rv64 gcc in 32-bit mode built into Ubuntu 20.04 and up. +``` +sudo apt-get install gcc-multilib gcc-riscv64-unknown-elf make +``` + +## Links + * "Hackaday Supercon 2022: Charles Lohr - Assembly in 2022: Yes! We Still Use it and Here's Why" : https://www.youtube.com/watch?v=Gelf0AyVGy4 + +## Attic + + +## General notes: + * https://github.com/cnlohr/riscv_emufun/commit/2f09cdeb378dc0215c07eb63f5a6fb43dbbf1871#diff-b48ccd795ae9aced07d022bf010bf9376232c4d78210c3113d90a8d349c59b3dL440 + + +(These things don't currently work) + +### Building Tests + +(This does not work, now) +``` +cd riscv-tests +export CROSS_COMPILE=riscv64-linux-gnu- +export PLATFORM_RISCV_XLEN=32 +CC=riscv64-linux-gnu-gcc ./configure +make XLEN=32 RISCV_PREFIX=riscv64-unknown-elf- RISCV_GCC_OPTS="-g -O1 -march=rv32imaf -mabi=ilp32f -I/usr/include" +``` + +### Building OpenSBI + +(This does not currently work!) +``` +cd opensbi +export CROSS_COMPILE=riscv64-unknown-elf- +export PLATFORM_RISCV_XLEN=32 +make +``` + +### Extra links + * Clear outline of CSRs: https://five-embeddev.com/riscv-isa-manual/latest/priv-csrs.html + * Fonts used in videos: https://audiolink.dev/ + +### Using custom build + +Where yminpatch is the patch from the mailing list. +``` +rm -rf buildroot +git clone git://git.buildroot.net/buildroot +cd buildroot +git am < ../yminpatch.txt +make qemu_riscv32_nommu_virt_defconfig +make +# Or use our configs. +``` + +Note: For emdoom you will need to modify include/linux/mmzone.h and change MAX_ORDER to 13. + +### Buildroot Notes + +Add this: +https://github.com/cnlohr/buildroot/pull/1/commits/bc890f74354e7e2f2b1cf7715f6ef334ff6ed1b2 + +Use this: +https://github.com/cnlohr/buildroot/commit/e97714621bfae535d947817e98956b112eb80a75 + diff --git a/test_rv_vm/mini-rv32ima.c b/test_rv_vm/mini-rv32ima.c new file mode 100644 index 0000000..f443891 --- /dev/null +++ b/test_rv_vm/mini-rv32ima.c @@ -0,0 +1,520 @@ +// Copyright 2022 Charles Lohr, you may use this file or any portions herein under any of the BSD, MIT, or CC0 licenses. + +#include +#include +#include +#include +#include + +#include "default64mbdtc.h" + +// Just default RAM amount is 64MB. +uint32_t ram_amt = 64*1024*1024; +int fail_on_all_faults = 0; + +static int64_t SimpleReadNumberInt( const char * number, int64_t defaultNumber ); +static uint64_t GetTimeMicroseconds(); +static void ResetKeyboardInput(); +static void CaptureKeyboardInput(); +static uint32_t HandleException( uint32_t ir, uint32_t retval ); +static uint32_t HandleControlStore( uint32_t addy, uint32_t val ); +static uint32_t HandleControlLoad( uint32_t addy ); +static void HandleOtherCSRWrite( uint8_t * image, uint16_t csrno, uint32_t value ); +static int32_t HandleOtherCSRRead( uint8_t * image, uint16_t csrno ); +static void MiniSleep(); +static int IsKBHit(); +static int ReadKBByte(); + +// This is the functionality we want to override in the emulator. +// think of this as the way the emulator's processor is connected to the outside world. +#define MINIRV32WARN( x... ) printf( x ); +#define MINIRV32_DECORATE static +#define MINI_RV32_RAM_SIZE ram_amt +#define MINIRV32_IMPLEMENTATION +#define MINIRV32_POSTEXEC( pc, ir, retval ) { if( retval > 0 ) { if( fail_on_all_faults ) { printf( "FAULT\n" ); return 3; } else retval = HandleException( ir, retval ); } } +#define MINIRV32_HANDLE_MEM_STORE_CONTROL( addy, val ) if( HandleControlStore( addy, val ) ) return val; +#define MINIRV32_HANDLE_MEM_LOAD_CONTROL( addy, rval ) rval = HandleControlLoad( addy ); +#define MINIRV32_OTHERCSR_WRITE( csrno, value ) HandleOtherCSRWrite( image, csrno, value ); +#define MINIRV32_OTHERCSR_READ( csrno, value ) value = HandleOtherCSRRead( image, csrno ); + +#include "mini-rv32ima.h" + +uint8_t * ram_image = 0; +struct MiniRV32IMAState * core; +const char * kernel_command_line = 0; + +static void DumpState( struct MiniRV32IMAState * core, uint8_t * ram_image ); + +int main( int argc, char ** argv ) +{ + int i; + long long instct = -1; + int show_help = 0; + int time_divisor = 1; + int fixed_update = 0; + int do_sleep = 1; + int single_step = 0; + int dtb_ptr = 0; + const char * image_file_name = 0; + const char * dtb_file_name = 0; + for( i = 1; i < argc; i++ ) + { + const char * param = argv[i]; + int param_continue = 0; // Can combine parameters, like -lpt x + do + { + if( param[0] == '-' || param_continue ) + { + switch( param[1] ) + { + case 'm': if( ++i < argc ) ram_amt = SimpleReadNumberInt( argv[i], ram_amt ); break; + case 'c': if( ++i < argc ) instct = SimpleReadNumberInt( argv[i], -1 ); break; + case 'k': if( ++i < argc ) kernel_command_line = argv[i]; break; + case 'f': image_file_name = (++i ram_amt ) + { + fprintf( stderr, "Error: Could not fit RAM image (%ld bytes) into %d\n", flen, ram_amt ); + return -6; + } + + memset( ram_image, 0, ram_amt ); + if( fread( ram_image, flen, 1, f ) != 1) + { + fprintf( stderr, "Error: Could not load image.\n" ); + return -7; + } + fclose( f ); + + if( dtb_file_name ) + { + if( strcmp( dtb_file_name, "disable" ) == 0 ) + { + // No DTB reading. + } + else + { + f = fopen( dtb_file_name, "rb" ); + if( !f || ferror( f ) ) + { + fprintf( stderr, "Error: \"%s\" not found\n", dtb_file_name ); + return -5; + } + fseek( f, 0, SEEK_END ); + long dtblen = ftell( f ); + fseek( f, 0, SEEK_SET ); + dtb_ptr = ram_amt - dtblen - sizeof( struct MiniRV32IMAState ); + if( fread( ram_image + dtb_ptr, dtblen, 1, f ) != 1 ) + { + fprintf( stderr, "Error: Could not open dtb \"%s\"\n", dtb_file_name ); + return -9; + } + fclose( f ); + } + } + else + { + // Load a default dtb. + dtb_ptr = ram_amt - sizeof(default64mbdtb) - sizeof( struct MiniRV32IMAState ); + memcpy( ram_image + dtb_ptr, default64mbdtb, sizeof( default64mbdtb ) ); + if( kernel_command_line ) + { + strncpy( (char*)( ram_image + dtb_ptr + 0xc0 ), kernel_command_line, 54 ); + } + } + } + + CaptureKeyboardInput(); + + // The core lives at the end of RAM. + core = (struct MiniRV32IMAState *)(ram_image + ram_amt - sizeof( struct MiniRV32IMAState )); + core->pc = MINIRV32_RAM_IMAGE_OFFSET; + core->regs[10] = 0x00; //hart ID + core->regs[11] = dtb_ptr?(dtb_ptr+MINIRV32_RAM_IMAGE_OFFSET):0; //dtb_pa (Must be valid pointer) (Should be pointer to dtb) + core->extraflags |= 3; // Machine-mode. + + if( dtb_file_name == 0 ) + { + // Update system ram size in DTB (but if and only if we're using the default DTB) + // Warning - this will need to be updated if the skeleton DTB is ever modified. + uint32_t * dtb = (uint32_t*)(ram_image + dtb_ptr); + if( dtb[0x13c/4] == 0x00c0ff03 ) + { + uint32_t validram = dtb_ptr; + dtb[0x13c/4] = (validram>>24) | ((( validram >> 16 ) & 0xff) << 8 ) | (((validram>>8) & 0xff ) << 16 ) | ( ( validram & 0xff) << 24 ); + } + } + + // Image is loaded. + uint64_t rt; + uint64_t lastTime = (fixed_update)?0:(GetTimeMicroseconds()/time_divisor); + int instrs_per_flip = single_step?1:1024; + for( rt = 0; rt < instct+1 || instct < 0; rt += instrs_per_flip ) + { + uint64_t * this_ccount = ((uint64_t*)&core->cyclel); + uint32_t elapsedUs = 0; + if( fixed_update ) + elapsedUs = *this_ccount / time_divisor - lastTime; + else + elapsedUs = GetTimeMicroseconds()/time_divisor - lastTime; + lastTime += elapsedUs; + + if( single_step ) + DumpState( core, ram_image); + + int ret = MiniRV32IMAStep( core, ram_image, 0, elapsedUs, instrs_per_flip ); // Execute upto 1024 cycles before breaking out. + switch( ret ) + { + case 0: break; + case 1: if( do_sleep ) MiniSleep(); *this_ccount += instrs_per_flip; break; + case 3: instct = 0; break; + case 0x7777: goto restart; //syscon code for restart + case 0x5555: printf( "POWEROFF@0x%08x%08x\n", core->cycleh, core->cyclel ); return 0; //syscon code for power-off + default: printf( "Unknown failure\n" ); break; + } + } + + DumpState( core, ram_image); +} + + +////////////////////////////////////////////////////////////////////////// +// Platform-specific functionality +////////////////////////////////////////////////////////////////////////// + + +#if defined(WINDOWS) || defined(WIN32) || defined(_WIN32) + +#include +#include + +#define strtoll _strtoi64 + +static void CaptureKeyboardInput() +{ + system(""); // Poorly documented tick: Enable VT100 Windows mode. +} + +static void ResetKeyboardInput() +{ +} + +static void MiniSleep() +{ + Sleep(1); +} + +static uint64_t GetTimeMicroseconds() +{ + static LARGE_INTEGER lpf; + LARGE_INTEGER li; + + if( !lpf.QuadPart ) + QueryPerformanceFrequency( &lpf ); + + QueryPerformanceCounter( &li ); + return ((uint64_t)li.QuadPart * 1000000LL) / (uint64_t)lpf.QuadPart; +} + + +static int IsKBHit() +{ + return _kbhit(); +} + +static int ReadKBByte() +{ + // This code is kind of tricky, but used to convert windows arrow keys + // to VT100 arrow keys. + static int is_escape_sequence = 0; + int r; + if( is_escape_sequence == 1 ) + { + is_escape_sequence++; + return '['; + } + + r = _getch(); + + if( is_escape_sequence ) + { + is_escape_sequence = 0; + switch( r ) + { + case 'H': return 'A'; // Up + case 'P': return 'B'; // Down + case 'K': return 'D'; // Left + case 'M': return 'C'; // Right + case 'G': return 'H'; // Home + case 'O': return 'F'; // End + default: return r; // Unknown code. + } + } + else + { + switch( r ) + { + case 13: return 10; //cr->lf + case 224: is_escape_sequence = 1; return 27; // Escape arrow keys + default: return r; + } + } +} + +#else + +#include +#include +#include +#include +#include + +static void CtrlC() +{ + DumpState( core, ram_image); + exit( 0 ); +} + +// Override keyboard, so we can capture all keyboard input for the VM. +static void CaptureKeyboardInput() +{ + // Hook exit, because we want to re-enable keyboard. + atexit(ResetKeyboardInput); + signal(SIGINT, CtrlC); + + struct termios term; + tcgetattr(0, &term); + term.c_lflag &= ~(ICANON | ECHO); // Disable echo as well + tcsetattr(0, TCSANOW, &term); +} + +static void ResetKeyboardInput() +{ + // Re-enable echo, etc. on keyboard. + struct termios term; + tcgetattr(0, &term); + term.c_lflag |= ICANON | ECHO; + tcsetattr(0, TCSANOW, &term); +} + +static void MiniSleep() +{ + usleep(500); +} + +static uint64_t GetTimeMicroseconds() +{ + struct timeval tv; + gettimeofday( &tv, 0 ); + return tv.tv_usec + ((uint64_t)(tv.tv_sec)) * 1000000LL; +} + +static int is_eofd; + +static int ReadKBByte() +{ + if( is_eofd ) return 0xffffffff; + char rxchar = 0; + int rread = read(fileno(stdin), (char*)&rxchar, 1); + + if( rread > 0 ) // Tricky: getchar can't be used with arrow keys. + return rxchar; + else + return -1; +} + +static int IsKBHit() +{ + if( is_eofd ) return -1; + int byteswaiting; + ioctl(0, FIONREAD, &byteswaiting); + if( !byteswaiting && write( fileno(stdin), 0, 0 ) != 0 ) { is_eofd = 1; return -1; } // Is end-of-file for + return !!byteswaiting; +} + + +#endif + + +////////////////////////////////////////////////////////////////////////// +// Rest of functions functionality +////////////////////////////////////////////////////////////////////////// + +static uint32_t HandleException( uint32_t ir, uint32_t code ) +{ + // Weird opcode emitted by duktape on exit. + if( code == 3 ) + { + // Could handle other opcodes here. + } + return code; +} + +static uint32_t HandleControlStore( uint32_t addy, uint32_t val ) +{ + if( addy == 0x10000000 ) //UART 8250 / 16550 Data Buffer + { + printf( "%c", val ); + fflush( stdout ); + } + else if( addy == 0x11004004 ) //CLNT + core->timermatchh = val; + else if( addy == 0x11004000 ) //CLNT + core->timermatchl = val; + else if( addy == 0x11100000 ) //SYSCON (reboot, poweroff, etc.) + { + core->pc = core->pc + 4; + return val; // NOTE: PC will be PC of Syscon. + } + return 0; +} + + +static uint32_t HandleControlLoad( uint32_t addy ) +{ + // Emulating a 8250 / 16550 UART + if( addy == 0x10000005 ) + return 0x60 | IsKBHit(); + else if( addy == 0x10000000 && IsKBHit() ) + return ReadKBByte(); + else if( addy == 0x1100bffc ) // https://chromitem-soc.readthedocs.io/en/latest/clint.html + return core->timerh; + else if( addy == 0x1100bff8 ) + return core->timerl; + return 0; +} + +static void HandleOtherCSRWrite( uint8_t * image, uint16_t csrno, uint32_t value ) +{ + if( csrno == 0x136 ) + { + printf( "%d", value ); fflush( stdout ); + } + if( csrno == 0x137 ) + { + printf( "%08x", value ); fflush( stdout ); + } + else if( csrno == 0x138 ) + { + //Print "string" + uint32_t ptrstart = value - MINIRV32_RAM_IMAGE_OFFSET; + uint32_t ptrend = ptrstart; + if( ptrstart >= ram_amt ) + printf( "DEBUG PASSED INVALID PTR (%08x)\n", value ); + while( ptrend < ram_amt ) + { + if( image[ptrend] == 0 ) break; + ptrend++; + } + if( ptrend != ptrstart ) + fwrite( image + ptrstart, ptrend - ptrstart, 1, stdout ); + } + else if( csrno == 0x139 ) + { + putchar( value ); fflush( stdout ); + } +} + +static int32_t HandleOtherCSRRead( uint8_t * image, uint16_t csrno ) +{ + if( csrno == 0x140 ) + { + if( !IsKBHit() ) return -1; + return ReadKBByte(); + } + return 0; +} + +static int64_t SimpleReadNumberInt( const char * number, int64_t defaultNumber ) +{ + if( !number || !number[0] ) return defaultNumber; + int radix = 10; + if( number[0] == '0' ) + { + char nc = number[1]; + number+=2; + if( nc == 0 ) return 0; + else if( nc == 'x' ) radix = 16; + else if( nc == 'b' ) radix = 2; + else { number--; radix = 8; } + } + char * endptr; + uint64_t ret = strtoll( number, &endptr, radix ); + if( endptr == number ) + { + return defaultNumber; + } + else + { + return ret; + } +} + +static void DumpState( struct MiniRV32IMAState * core, uint8_t * ram_image ) +{ + uint32_t pc = core->pc; + uint32_t pc_offset = pc - MINIRV32_RAM_IMAGE_OFFSET; + uint32_t ir = 0; + + printf( "PC: %08x ", pc ); + if( pc_offset >= 0 && pc_offset < ram_amt - 3 ) + { + ir = *((uint32_t*)(&((uint8_t*)ram_image)[pc_offset])); + printf( "[0x%08x] ", ir ); + } + else + printf( "[xxxxxxxxxx] " ); + uint32_t * regs = core->regs; + printf( "Z:%08x ra:%08x sp:%08x gp:%08x tp:%08x t0:%08x t1:%08x t2:%08x s0:%08x s1:%08x a0:%08x a1:%08x a2:%08x a3:%08x a4:%08x a5:%08x ", + regs[0], regs[1], regs[2], regs[3], regs[4], regs[5], regs[6], regs[7], + regs[8], regs[9], regs[10], regs[11], regs[12], regs[13], regs[14], regs[15] ); + printf( "a6:%08x a7:%08x s2:%08x s3:%08x s4:%08x s5:%08x s6:%08x s7:%08x s8:%08x s9:%08x s10:%08x s11:%08x t3:%08x t4:%08x t5:%08x t6:%08x\n", + regs[16], regs[17], regs[18], regs[19], regs[20], regs[21], regs[22], regs[23], + regs[24], regs[25], regs[26], regs[27], regs[28], regs[29], regs[30], regs[31] ); +} + diff --git a/test_rv_vm/mini-rv32ima.h b/test_rv_vm/mini-rv32ima.h new file mode 100644 index 0000000..28d1580 --- /dev/null +++ b/test_rv_vm/mini-rv32ima.h @@ -0,0 +1,547 @@ +// Copyright 2022 Charles Lohr, you may use this file or any portions herein under any of the BSD, MIT, or CC0 licenses. + +#ifndef _MINI_RV32IMAH_H +#define _MINI_RV32IMAH_H + +/** + To use mini-rv32ima.h for the bare minimum, the following: + + #define MINI_RV32_RAM_SIZE ram_amt + #define MINIRV32_IMPLEMENTATION + + #include "mini-rv32ima.h" + + Though, that's not _that_ interesting. You probably want I/O! + + + Notes: + * There is a dedicated CLNT at 0x10000000. + * There is free MMIO from there to 0x12000000. + * You can put things like a UART, or whatever there. + * Feel free to override any of the functionality with macros. +*/ + +#ifndef MINIRV32WARN + #define MINIRV32WARN( x... ); +#endif + +#ifndef MINIRV32_DECORATE + #define MINIRV32_DECORATE static +#endif + +#ifndef MINIRV32_RAM_IMAGE_OFFSET + #define MINIRV32_RAM_IMAGE_OFFSET 0x80000000 +#endif + +#ifndef MINIRV32_MMIO_RANGE + #define MINIRV32_MMIO_RANGE(n) (0x10000000 <= (n) && (n) < 0x12000000) +#endif + +#ifndef MINIRV32_POSTEXEC + #define MINIRV32_POSTEXEC(...); +#endif + +#ifndef MINIRV32_HANDLE_MEM_STORE_CONTROL + #define MINIRV32_HANDLE_MEM_STORE_CONTROL(...); +#endif + +#ifndef MINIRV32_HANDLE_MEM_LOAD_CONTROL + #define MINIRV32_HANDLE_MEM_LOAD_CONTROL(...); +#endif + +#ifndef MINIRV32_OTHERCSR_WRITE + #define MINIRV32_OTHERCSR_WRITE(...); +#endif + +#ifndef MINIRV32_OTHERCSR_READ + #define MINIRV32_OTHERCSR_READ(...); +#endif + +#ifndef MINIRV32_CUSTOM_MEMORY_BUS + #define MINIRV32_STORE4( ofs, val ) *(uint32_t*)(image + ofs) = val + #define MINIRV32_STORE2( ofs, val ) *(uint16_t*)(image + ofs) = val + #define MINIRV32_STORE1( ofs, val ) *(uint8_t*)(image + ofs) = val + #define MINIRV32_LOAD4( ofs ) *(uint32_t*)(image + ofs) + #define MINIRV32_LOAD2( ofs ) *(uint16_t*)(image + ofs) + #define MINIRV32_LOAD1( ofs ) *(uint8_t*)(image + ofs) + #define MINIRV32_LOAD2_SIGNED( ofs ) *(int16_t*)(image + ofs) + #define MINIRV32_LOAD1_SIGNED( ofs ) *(int8_t*)(image + ofs) +#endif + +// As a note: We quouple-ify these, because in HLSL, we will be operating with +// uint4's. We are going to uint4 data to/from system RAM. +// +// We're going to try to keep the full processor state to 12 x uint4. +struct MiniRV32IMAState +{ + uint32_t regs[32]; + + uint32_t pc; + uint32_t mstatus; + uint32_t cyclel; + uint32_t cycleh; + + uint32_t timerl; + uint32_t timerh; + uint32_t timermatchl; + uint32_t timermatchh; + + uint32_t mscratch; + uint32_t mtvec; + uint32_t mie; + uint32_t mip; + + uint32_t mepc; + uint32_t mtval; + uint32_t mcause; + + // Note: only a few bits are used. (Machine = 3, User = 0) + // Bits 0..1 = privilege. + // Bit 2 = WFI (Wait for interrupt) + // Bit 3+ = Load/Store reservation LSBs. + uint32_t extraflags; +}; + +#ifndef MINIRV32_STEPPROTO +MINIRV32_DECORATE int32_t MiniRV32IMAStep( struct MiniRV32IMAState * state, uint8_t * image, uint32_t vProcAddress, uint32_t elapsedUs, int count ); +#endif + +#ifdef MINIRV32_IMPLEMENTATION + +#ifndef MINIRV32_CUSTOM_INTERNALS +#define CSR( x ) state->x +#define SETCSR( x, val ) { state->x = val; } +#define REG( x ) state->regs[x] +#define REGSET( x, val ) { state->regs[x] = val; } +#endif + +#ifndef MINIRV32_STEPPROTO +MINIRV32_DECORATE int32_t MiniRV32IMAStep( struct MiniRV32IMAState * state, uint8_t * image, uint32_t vProcAddress, uint32_t elapsedUs, int count ) +#else +MINIRV32_STEPPROTO +#endif +{ + uint32_t new_timer = CSR( timerl ) + elapsedUs; + if( new_timer < CSR( timerl ) ) CSR( timerh )++; + CSR( timerl ) = new_timer; + + // Handle Timer interrupt. + if( ( CSR( timerh ) > CSR( timermatchh ) || ( CSR( timerh ) == CSR( timermatchh ) && CSR( timerl ) > CSR( timermatchl ) ) ) && ( CSR( timermatchh ) || CSR( timermatchl ) ) ) + { + CSR( extraflags ) &= ~4; // Clear WFI + CSR( mip ) |= 1<<7; //MTIP of MIP // https://stackoverflow.com/a/61916199/2926815 Fire interrupt. + } + else + CSR( mip ) &= ~(1<<7); + + // If WFI, don't run processor. + if( CSR( extraflags ) & 4 ) + return 1; + + uint32_t trap = 0; + uint32_t rval = 0; + uint32_t pc = CSR( pc ); + uint32_t cycle = CSR( cyclel ); + + if( ( CSR( mip ) & (1<<7) ) && ( CSR( mie ) & (1<<7) /*mtie*/ ) && ( CSR( mstatus ) & 0x8 /*mie*/) ) + { + // Timer interrupt. + trap = 0x80000007; + pc -= 4; + } + else // No timer interrupt? Execute a bunch of instructions. + for( int icount = 0; icount < count; icount++ ) + { + uint32_t ir = 0; + rval = 0; + cycle++; + uint32_t ofs_pc = pc - MINIRV32_RAM_IMAGE_OFFSET; + + if( ofs_pc >= MINI_RV32_RAM_SIZE ) + { + trap = 1 + 1; // Handle access violation on instruction read. + break; + } + else if( ofs_pc & 3 ) + { + trap = 1 + 0; //Handle PC-misaligned access + break; + } + else + { + ir = MINIRV32_LOAD4( ofs_pc ); + uint32_t rdid = (ir >> 7) & 0x1f; + + switch( ir & 0x7f ) + { + case 0x37: // LUI (0b0110111) + rval = ( ir & 0xfffff000 ); + break; + case 0x17: // AUIPC (0b0010111) + rval = pc + ( ir & 0xfffff000 ); + break; + case 0x6F: // JAL (0b1101111) + { + int32_t reladdy = ((ir & 0x80000000)>>11) | ((ir & 0x7fe00000)>>20) | ((ir & 0x00100000)>>9) | ((ir&0x000ff000)); + if( reladdy & 0x00100000 ) reladdy |= 0xffe00000; // Sign extension. + rval = pc + 4; + pc = pc + reladdy - 4; + break; + } + case 0x67: // JALR (0b1100111) + { + uint32_t imm = ir >> 20; + int32_t imm_se = imm | (( imm & 0x800 )?0xfffff000:0); + rval = pc + 4; + pc = ( (REG( (ir >> 15) & 0x1f ) + imm_se) & ~1) - 4; + break; + } + case 0x63: // Branch (0b1100011) + { + uint32_t immm4 = ((ir & 0xf00)>>7) | ((ir & 0x7e000000)>>20) | ((ir & 0x80) << 4) | ((ir >> 31)<<12); + if( immm4 & 0x1000 ) immm4 |= 0xffffe000; + int32_t rs1 = REG((ir >> 15) & 0x1f); + int32_t rs2 = REG((ir >> 20) & 0x1f); + immm4 = pc + immm4 - 4; + rdid = 0; + switch( ( ir >> 12 ) & 0x7 ) + { + // BEQ, BNE, BLT, BGE, BLTU, BGEU + case 0: if( rs1 == rs2 ) pc = immm4; break; + case 1: if( rs1 != rs2 ) pc = immm4; break; + case 4: if( rs1 < rs2 ) pc = immm4; break; + case 5: if( rs1 >= rs2 ) pc = immm4; break; //BGE + case 6: if( (uint32_t)rs1 < (uint32_t)rs2 ) pc = immm4; break; //BLTU + case 7: if( (uint32_t)rs1 >= (uint32_t)rs2 ) pc = immm4; break; //BGEU + default: trap = (2+1); + } + break; + } + case 0x03: // Load (0b0000011) + { + uint32_t rs1 = REG((ir >> 15) & 0x1f); + uint32_t imm = ir >> 20; + int32_t imm_se = imm | (( imm & 0x800 )?0xfffff000:0); + uint32_t rsval = rs1 + imm_se; + + rsval -= MINIRV32_RAM_IMAGE_OFFSET; + if( rsval >= MINI_RV32_RAM_SIZE-3 ) + { + rsval += MINIRV32_RAM_IMAGE_OFFSET; + if( MINIRV32_MMIO_RANGE( rsval ) ) // UART, CLNT + { + MINIRV32_HANDLE_MEM_LOAD_CONTROL( rsval, rval ); + } + else + { + trap = (5+1); + rval = rsval; + } + } + else + { + switch( ( ir >> 12 ) & 0x7 ) + { + //LB, LH, LW, LBU, LHU + case 0: rval = MINIRV32_LOAD1_SIGNED( rsval ); break; + case 1: rval = MINIRV32_LOAD2_SIGNED( rsval ); break; + case 2: rval = MINIRV32_LOAD4( rsval ); break; + case 4: rval = MINIRV32_LOAD1( rsval ); break; + case 5: rval = MINIRV32_LOAD2( rsval ); break; + default: trap = (2+1); + } + } + break; + } + case 0x23: // Store 0b0100011 + { + uint32_t rs1 = REG((ir >> 15) & 0x1f); + uint32_t rs2 = REG((ir >> 20) & 0x1f); + uint32_t addy = ( ( ir >> 7 ) & 0x1f ) | ( ( ir & 0xfe000000 ) >> 20 ); + if( addy & 0x800 ) addy |= 0xfffff000; + addy += rs1 - MINIRV32_RAM_IMAGE_OFFSET; + rdid = 0; + + if( addy >= MINI_RV32_RAM_SIZE-3 ) + { + addy += MINIRV32_RAM_IMAGE_OFFSET; + if( MINIRV32_MMIO_RANGE( addy ) ) + { + MINIRV32_HANDLE_MEM_STORE_CONTROL( addy, rs2 ); + } + else + { + trap = (7+1); // Store access fault. + rval = addy; + } + } + else + { + switch( ( ir >> 12 ) & 0x7 ) + { + //SB, SH, SW + case 0: MINIRV32_STORE1( addy, rs2 ); break; + case 1: MINIRV32_STORE2( addy, rs2 ); break; + case 2: MINIRV32_STORE4( addy, rs2 ); break; + default: trap = (2+1); + } + } + break; + } + case 0x13: // Op-immediate 0b0010011 + case 0x33: // Op 0b0110011 + { + uint32_t imm = ir >> 20; + imm = imm | (( imm & 0x800 )?0xfffff000:0); + uint32_t rs1 = REG((ir >> 15) & 0x1f); + uint32_t is_reg = !!( ir & 0x20 ); + uint32_t rs2 = is_reg ? REG(imm & 0x1f) : imm; + + if( is_reg && ( ir & 0x02000000 ) ) + { + switch( (ir>>12)&7 ) //0x02000000 = RV32M + { + case 0: rval = rs1 * rs2; break; // MUL +#ifndef CUSTOM_MULH // If compiling on a system that doesn't natively, or via libgcc support 64-bit math. + case 1: rval = ((int64_t)((int32_t)rs1) * (int64_t)((int32_t)rs2)) >> 32; break; // MULH + case 2: rval = ((int64_t)((int32_t)rs1) * (uint64_t)rs2) >> 32; break; // MULHSU + case 3: rval = ((uint64_t)rs1 * (uint64_t)rs2) >> 32; break; // MULHU +#else + CUSTOM_MULH +#endif + case 4: if( rs2 == 0 ) rval = -1; else rval = ((int32_t)rs1 == INT32_MIN && (int32_t)rs2 == -1) ? rs1 : ((int32_t)rs1 / (int32_t)rs2); break; // DIV + case 5: if( rs2 == 0 ) rval = 0xffffffff; else rval = rs1 / rs2; break; // DIVU + case 6: if( rs2 == 0 ) rval = rs1; else rval = ((int32_t)rs1 == INT32_MIN && (int32_t)rs2 == -1) ? 0 : ((uint32_t)((int32_t)rs1 % (int32_t)rs2)); break; // REM + case 7: if( rs2 == 0 ) rval = rs1; else rval = rs1 % rs2; break; // REMU + } + } + else + { + switch( (ir>>12)&7 ) // These could be either op-immediate or op commands. Be careful. + { + case 0: rval = (is_reg && (ir & 0x40000000) ) ? ( rs1 - rs2 ) : ( rs1 + rs2 ); break; + case 1: rval = rs1 << (rs2 & 0x1F); break; + case 2: rval = (int32_t)rs1 < (int32_t)rs2; break; + case 3: rval = rs1 < rs2; break; + case 4: rval = rs1 ^ rs2; break; + case 5: rval = (ir & 0x40000000 ) ? ( ((int32_t)rs1) >> (rs2 & 0x1F) ) : ( rs1 >> (rs2 & 0x1F) ); break; + case 6: rval = rs1 | rs2; break; + case 7: rval = rs1 & rs2; break; + } + } + break; + } + case 0x0f: // 0b0001111 + rdid = 0; // fencetype = (ir >> 12) & 0b111; We ignore fences in this impl. + break; + case 0x73: // Zifencei+Zicsr (0b1110011) + { + uint32_t csrno = ir >> 20; + uint32_t microop = ( ir >> 12 ) & 0x7; + if( (microop & 3) ) // It's a Zicsr function. + { + int rs1imm = (ir >> 15) & 0x1f; + uint32_t rs1 = REG(rs1imm); + uint32_t writeval = rs1; + + // https://raw.githubusercontent.com/riscv/virtual-memory/main/specs/663-Svpbmt.pdf + // Generally, support for Zicsr + switch( csrno ) + { + case 0x340: rval = CSR( mscratch ); break; + case 0x305: rval = CSR( mtvec ); break; + case 0x304: rval = CSR( mie ); break; + case 0xC00: rval = cycle; break; + case 0x344: rval = CSR( mip ); break; + case 0x341: rval = CSR( mepc ); break; + case 0x300: rval = CSR( mstatus ); break; //mstatus + case 0x342: rval = CSR( mcause ); break; + case 0x343: rval = CSR( mtval ); break; + case 0xf11: rval = 0xff0ff0ff; break; //mvendorid + case 0x301: rval = 0x40401101; break; //misa (XLEN=32, IMA+X) + //case 0x3B0: rval = 0; break; //pmpaddr0 + //case 0x3a0: rval = 0; break; //pmpcfg0 + //case 0xf12: rval = 0x00000000; break; //marchid + //case 0xf13: rval = 0x00000000; break; //mimpid + //case 0xf14: rval = 0x00000000; break; //mhartid + default: + MINIRV32_OTHERCSR_READ( csrno, rval ); + break; + } + + switch( microop ) + { + case 1: writeval = rs1; break; //CSRRW + case 2: writeval = rval | rs1; break; //CSRRS + case 3: writeval = rval & ~rs1; break; //CSRRC + case 5: writeval = rs1imm; break; //CSRRWI + case 6: writeval = rval | rs1imm; break; //CSRRSI + case 7: writeval = rval & ~rs1imm; break; //CSRRCI + } + + switch( csrno ) + { + case 0x340: SETCSR( mscratch, writeval ); break; + case 0x305: SETCSR( mtvec, writeval ); break; + case 0x304: SETCSR( mie, writeval ); break; + case 0x344: SETCSR( mip, writeval ); break; + case 0x341: SETCSR( mepc, writeval ); break; + case 0x300: SETCSR( mstatus, writeval ); break; //mstatus + case 0x342: SETCSR( mcause, writeval ); break; + case 0x343: SETCSR( mtval, writeval ); break; + //case 0x3a0: break; //pmpcfg0 + //case 0x3B0: break; //pmpaddr0 + //case 0xf11: break; //mvendorid + //case 0xf12: break; //marchid + //case 0xf13: break; //mimpid + //case 0xf14: break; //mhartid + //case 0x301: break; //misa + default: + MINIRV32_OTHERCSR_WRITE( csrno, writeval ); + break; + } + } + else if( microop == 0x0 ) // "SYSTEM" 0b000 + { + rdid = 0; + if( ( ( csrno & 0xff ) == 0x02 ) ) // MRET + { + //https://raw.githubusercontent.com/riscv/virtual-memory/main/specs/663-Svpbmt.pdf + //Table 7.6. MRET then in mstatus/mstatush sets MPV=0, MPP=0, MIE=MPIE, and MPIE=1. La + // Should also update mstatus to reflect correct mode. + uint32_t startmstatus = CSR( mstatus ); + uint32_t startextraflags = CSR( extraflags ); + SETCSR( mstatus , (( startmstatus & 0x80) >> 4) | ((startextraflags&3) << 11) | 0x80 ); + SETCSR( extraflags, (startextraflags & ~3) | ((startmstatus >> 11) & 3) ); + pc = CSR( mepc ) -4; + } else { + switch (csrno) { + case 0: + #ifndef ECALL_HANDLER + trap = ( CSR( extraflags ) & 3) ? (11+1) : (8+1); // ECALL; 8 = "Environment call from U-mode"; 11 = "Environment call from M-mode" + #else + ECALL_HANDLER(state); + trap = 0; + #endif + break; + case 1: + trap = (3+1); break; // EBREAK 3 = "Breakpoint" + case 0x105: //WFI (Wait for interrupts) + CSR( mstatus ) |= 8; //Enable interrupts + CSR( extraflags ) |= 4; //Infor environment we want to go to sleep. + SETCSR( pc, pc + 4 ); + return 1; + default: + trap = (2+1); break; // Illegal opcode. + } + } + } + else + trap = (2+1); // Note micrrop 0b100 == undefined. + break; + } + case 0x2f: // RV32A (0b00101111) + { + uint32_t rs1 = REG((ir >> 15) & 0x1f); + uint32_t rs2 = REG((ir >> 20) & 0x1f); + uint32_t irmid = ( ir>>27 ) & 0x1f; + + rs1 -= MINIRV32_RAM_IMAGE_OFFSET; + + // We don't implement load/store from UART or CLNT with RV32A here. + + if( rs1 >= MINI_RV32_RAM_SIZE-3 ) + { + trap = (7+1); //Store/AMO access fault + rval = rs1 + MINIRV32_RAM_IMAGE_OFFSET; + } + else + { + rval = MINIRV32_LOAD4( rs1 ); + + // Referenced a little bit of https://github.com/franzflasch/riscv_em/blob/master/src/core/core.c + uint32_t dowrite = 1; + switch( irmid ) + { + case 2: //LR.W (0b00010) + dowrite = 0; + CSR( extraflags ) = (CSR( extraflags ) & 0x07) | (rs1<<3); + break; + case 3: //SC.W (0b00011) (Make sure we have a slot, and, it's valid) + rval = ( CSR( extraflags ) >> 3 != ( rs1 & 0x1fffffff ) ); // Validate that our reservation slot is OK. + dowrite = !rval; // Only write if slot is valid. + break; + case 1: break; //AMOSWAP.W (0b00001) + case 0: rs2 += rval; break; //AMOADD.W (0b00000) + case 4: rs2 ^= rval; break; //AMOXOR.W (0b00100) + case 12: rs2 &= rval; break; //AMOAND.W (0b01100) + case 8: rs2 |= rval; break; //AMOOR.W (0b01000) + case 16: rs2 = ((int32_t)rs2<(int32_t)rval)?rs2:rval; break; //AMOMIN.W (0b10000) + case 20: rs2 = ((int32_t)rs2>(int32_t)rval)?rs2:rval; break; //AMOMAX.W (0b10100) + case 24: rs2 = (rs2rval)?rs2:rval; break; //AMOMAXU.W (0b11100) + default: trap = (2+1); dowrite = 0; break; //Not supported. + } + if( dowrite ) MINIRV32_STORE4( rs1, rs2 ); + } + break; + } + default: trap = (2+1); // Fault: Invalid opcode. + } + + // If there was a trap, do NOT allow register writeback. + if( trap ) { + SETCSR( pc, pc ); + MINIRV32_POSTEXEC( pc, ir, trap ); + break; + } + + if( rdid ) + { + REGSET( rdid, rval ); // Write back register. + } + } + + MINIRV32_POSTEXEC( pc, ir, trap ); + + pc += 4; + } + + // Handle traps and interrupts. + if( trap ) + { + if( trap & 0x80000000 ) // If prefixed with 1 in MSB, it's an interrupt, not a trap. + { + SETCSR( mcause, trap ); + SETCSR( mtval, 0 ); + pc += 4; // PC needs to point to where the PC will return to. + } + else + { + SETCSR( mcause, trap - 1 ); + SETCSR( mtval, (trap > 5 && trap <= 8)? rval : pc ); + } + SETCSR( mepc, pc ); //TRICKY: The kernel advances mepc automatically. + //CSR( mstatus ) & 8 = MIE, & 0x80 = MPIE + // On an interrupt, the system moves current MIE into MPIE + SETCSR( mstatus, (( CSR( mstatus ) & 0x08) << 4) | (( CSR( extraflags ) & 3 ) << 11) ); + pc = (CSR( mtvec ) - 4); + + // If trapping, always enter machine mode. + CSR( extraflags ) |= 3; + + trap = 0; + pc += 4; + } + + if( CSR( cyclel ) > cycle ) CSR( cycleh )++; + SETCSR( cyclel, cycle ); + SETCSR( pc, pc ); + return 0; +} + +#endif + +#endif + + diff --git a/test_rv_vm/ripes-vm.c b/test_rv_vm/ripes-vm.c new file mode 100644 index 0000000..3944090 --- /dev/null +++ b/test_rv_vm/ripes-vm.c @@ -0,0 +1,139 @@ +#include +#include +#include +#include + +struct MiniRV32IMAState; +void ecall_handler(struct MiniRV32IMAState *state); +#define ECALL_HANDLER(state) ecall_handler(state) +#define MINIRV32WARN( x... ) printf( x ); +#define MINIRV32_DECORATE static +#define MINI_RV32_RAM_SIZE (32 * 1024 * 1024) +#define MINIRV32_IMPLEMENTATION + +#define MINIRV32_RAM_IMAGE_OFFSET 0x0 +#include "mini-rv32ima.h" + +#define SYSCALL(num) (1025 + num) +void ecall_handler(struct MiniRV32IMAState *state) { + uint32_t a0 = REG(10); + uint32_t a1 = REG(11); + switch (state->regs[17]) // x17 | a7 + { + case 1: + // PrintInt + printf("%d", a0); + break; + case 4: + // PrintString + printf("%s", a0); + break; + case 10: + fprintf(stderr, "\nexit: %d\n", a0); + exit(a0); + case 93: + fprintf(stderr, "\nmain return code: %d\n", a0); + exit(a0); + case SYSCALL(0): + // getchar(); + REGSET(10, getchar()); + case SYSCALL(1): + // putchar + putchar(a0); + + case SYSCALL(4): + // input int + scanf("%d", &a0); + REGSET(10, a0); + break; + case SYSCALL(5): + // input string + scanf("%s", a0); + REGSET(10, a0); + break; + default: + MINIRV32WARN("Unhandled ECALL: %d\n", state->regs[17]); + exit(1); + break; + } +} + +int main(int argc, char *argv[]) { + // gcc -DDEFAULT_FILE='\"flat.bin\"' .\ripes-vm.c -o rv32-vm.exe + struct MiniRV32IMAState state; + uint8_t *image = (uint8_t *)malloc(MINI_RV32_RAM_SIZE); + + // 初始化状态 + memset(&state, 0, sizeof(state)); + state.pc = 0; // 程序计数器从0开始 + state.mstatus = 0x80000000; // 设置机器模式 + state.mtvec = 0x1000; + state.mie = 0x7; // 启用所有中断 + + // 初始化内存 + memset(image, 0, MINI_RV32_RAM_SIZE); + + #ifndef DEFAULT_FILE + #define DEFAULT_FILE "../ccompiler/backend/test_rv.bin" + #endif + const char* filename = DEFAULT_FILE; + // 加载 flatbin 文件 + if (argc == 2) { + filename = argv[1]; + } + + FILE *file = fopen(filename, "rb"); + if (!file) { + fprintf(stderr, "Usage: %s \n", argv[0]); + printf("Failed to open file %s\n", filename); + return 1; + } + + fseek(file, 0, SEEK_END); + long flen = ftell(file); + fseek(file, 0, SEEK_SET); + + if (flen > MINI_RV32_RAM_SIZE) { + fprintf(stderr, "Flatbin file is too large\n"); + fclose(file); + return 1; + } + + fread(image, flen, 1, file); + fclose(file); + + // 运行模拟器 + while (1) { + int32_t ret = MiniRV32IMAStep(&state, image, MINIRV32_RAM_IMAGE_OFFSET, 0, 1); + if (ret != 0) { + printf("Exception or interrupt occurred at PC: %d\n", state.pc); + return ret; + } + } + + free(image); + return 0; +} + +// static void DumpState( struct MiniRV32IMAState * core, uint8_t * ram_image ) +// { +// uint32_t pc = core->pc; +// uint32_t pc_offset = pc - MINIRV32_RAM_IMAGE_OFFSET; +// uint32_t ir = 0; + +// printf( "PC: %08x ", pc ); +// if( pc_offset >= 0 && pc_offset < ram_amt - 3 ) +// { +// ir = *((uint32_t*)(&((uint8_t*)ram_image)[pc_offset])); +// printf( "[0x%08x] ", ir ); +// } +// else +// printf( "[xxxxxxxxxx] " ); +// uint32_t * regs = core->regs; +// printf( "Z:%08x ra:%08x sp:%08x gp:%08x tp:%08x t0:%08x t1:%08x t2:%08x s0:%08x s1:%08x a0:%08x a1:%08x a2:%08x a3:%08x a4:%08x a5:%08x ", +// regs[0], regs[1], regs[2], regs[3], regs[4], regs[5], regs[6], regs[7], +// regs[8], regs[9], regs[10], regs[11], regs[12], regs[13], regs[14], regs[15] ); +// printf( "a6:%08x a7:%08x s2:%08x s3:%08x s4:%08x s5:%08x s6:%08x s7:%08x s8:%08x s9:%08x s10:%08x s11:%08x t3:%08x t4:%08x t5:%08x t6:%08x\n", +// regs[16], regs[17], regs[18], regs[19], regs[20], regs[21], regs[22], regs[23], +// regs[24], regs[25], regs[26], regs[27], regs[28], regs[29], regs[30], regs[31] ); +// }