feat add func call and rewrite codes
This commit is contained in:
		
							
								
								
									
										13
									
								
								ccompiler/backend/riscv32/Makefile
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										13
									
								
								ccompiler/backend/riscv32/Makefile
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,13 @@ | ||||
| all: ccompiler | ||||
|  | ||||
| run: ccompiler | ||||
| 	./ccompiler test.c flat.bin | ||||
|  | ||||
| ccompiler: frontend | ||||
| 	gcc -g rv32ima_codegen.c ../../middleend/ir.c -L../../frontend -lfrontend -o ccompiler | ||||
|  | ||||
| frontend: | ||||
| 	make -C ../../frontend | ||||
|  | ||||
| clean: | ||||
| 	rm -f ccompiler flat.bin | ||||
							
								
								
									
										338
									
								
								ccompiler/backend/riscv32/rv32gen.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										338
									
								
								ccompiler/backend/riscv32/rv32gen.h
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,338 @@ | ||||
| #ifndef __RV32I_GEN_H__ | ||||
| #define __RV32I_GEN_H__ | ||||
|  | ||||
| /** | ||||
| 31                  25 24        20 19        15 14    12 11            7 6        0  | ||||
| imm[31:12] rd 0110111 U lui  | ||||
| imm[31:12] rd 0010111 U auipc  | ||||
| imm[20|10:1|11|19:12] rd 1101111 J jal  | ||||
| imm[11:0] rs1 000 rd 1100111 I jalr  | ||||
| imm[12|10:5] rs2 rs1 000 imm[4:1|11] 1100011 B beq  | ||||
| imm[12|10:5] rs2 rs1 001 imm[4:1|11] 1100011 B bne  | ||||
| imm[12|10:5] rs2 rs1 100 imm[4:1|11] 1100011 B blt  | ||||
| imm[12|10:5] rs2 rs1 101 imm[4:1|11] 1100011 B bge  | ||||
| imm[12|10:5] rs2 rs1 110 imm[4:1|11] 1100011 B bltu  | ||||
| imm[12|10:5] rs2 rs1 111 imm[4:1|11] 1100011 B bgeu  | ||||
| imm[11:0] rs1 000 rd 0000011 I lb  | ||||
| imm[11:0] rs1 001 rd 0000011 I lh  | ||||
| imm[11:0] rs1 010 rd 0000011 I lw  | ||||
| imm[11:0] rs1 100 rd 0000011 I lbu  | ||||
| imm[11:0] rs1 101 rd 0000011 I lhu  | ||||
| imm[11:5] rs2 rs1 000 imm[4:0] 0100011 S sb  | ||||
| imm[11:5] rs2 rs1 001 imm[4:0] 0100011 S sh  | ||||
| imm[11:5] rs2 rs1 010 imm[4:0] 0100011 S sw  | ||||
| imm[11:0] rs1 000 rd 0010011 I addi  | ||||
| imm[11:0] rs1 010 rd 0010011 I slti  | ||||
| imm[11:0] rs1 011 rd 0010011 I sltiu  | ||||
| imm[11:0] rs1 100 rd 0010011 I xori  | ||||
| imm[11:0] rs1 110 rd 0010011 I ori  | ||||
| imm[11:0] rs1 111 rd 0010011 I andi  | ||||
| 0000000 shamt rs1 001 rd 0010011 I slli  | ||||
| 0000000 shamt rs1 101 rd 0010011 I srli  | ||||
| 0100000 shamt rs1 101 rd 0010011 I srai  | ||||
| 0000000 rs2 rs1 000 rd 0110011 R add  | ||||
| 0100000 rs2 rs1 000 rd 0110011 R sub  | ||||
| 0000000 rs2 rs1 001 rd 0110011 R sll  | ||||
| 0000000 rs2 rs1 010 rd 0110011 R slt  | ||||
| 0000000 rs2 rs1 011 rd 0110011 R sltu  | ||||
| 0000000 rs2 rs1 100 rd 0110011 R xor  | ||||
| 0000000 rs2 rs1 101 rd 0110011 R srl  | ||||
| 0100000 rs2 rs1 101 rd 0110011 R sra  | ||||
| 0000000 rs2 rs1 110 rd 0110011 R or  | ||||
| 0000000 rs2 rs1 111 rd 0110011 R and  | ||||
| 0000 pred succ 00000 000 00000 0001111 I fence  | ||||
| 0000 0000 0000 00000 001 00000 0001111 I fence.i  | ||||
| 000000000000 00000 00 00000 1110011 I ecall  | ||||
| 000000000000 00000 000 00000 1110011 I ebreak  | ||||
| csr rs1 001 rd 1110011 I csrrw  | ||||
| csr rs1 010 rd 1110011 I csrrs  | ||||
| csr rs1 011 rd 1110011 I csrrc  | ||||
| csr zimm 101 rd 1110011 I csrrwi  | ||||
| csr zimm 110 rd 1110011 I cssrrsi  | ||||
| csr zimm 111 rd 1110011 I csrrci | ||||
|  */ | ||||
| #include <stdint.h> | ||||
|  | ||||
| // 寄存器枚举定义 | ||||
| typedef enum { | ||||
|     REG_X0,  REG_X1,  REG_X2,  REG_X3,  REG_X4,  REG_X5,  REG_X6,  REG_X7, | ||||
|     REG_X8,  REG_X9,  REG_X10, REG_X11, REG_X12, REG_X13, REG_X14, REG_X15, | ||||
|     REG_X16, REG_X17, REG_X18, REG_X19, REG_X20, REG_X21, REG_X22, REG_X23, | ||||
|     REG_X24, REG_X25, REG_X26, REG_X27, REG_X28, REG_X29, REG_X30, REG_X31, | ||||
|     REG_ZERO = REG_X0,  REG_RA = REG_X1, REG_SP = REG_X2, REG_GP = REG_X3, | ||||
|     REG_TP = REG_X4,    REG_T0 = REG_X5, REG_T1 = REG_X6, REG_T2 = REG_X7, | ||||
|     REG_S0 = REG_X8,    REG_S1 = REG_X9, REG_A0 = REG_X10, REG_A1 = REG_X11, | ||||
|     REG_A2 = REG_X12,   REG_A3 = REG_X13, REG_A4 = REG_X14, REG_A5 = REG_X15, | ||||
|     REG_A6 = REG_X16,   REG_A7 = REG_X17, REG_S2 = REG_X18, REG_S3 = REG_X19, | ||||
|     REG_S4 = REG_X20,   REG_S5 = REG_X21, REG_S6 = REG_X22, REG_S7 = REG_X23, | ||||
|     REG_S8 = REG_X24,   REG_S9 = REG_X25, REG_S10 = REG_X26, REG_S11 = REG_X27, | ||||
|     REG_T3 = REG_X28,   REG_T4 = REG_X29, REG_T5 = REG_X30, REG_T6 = REG_X31, | ||||
| } RV32Reg; | ||||
|  | ||||
| /******************** 立即数处理宏 ********************/ | ||||
| #define IMM_12BITS(imm)     ((imm) & 0xFFF) | ||||
| #define IMM_20BITS(imm)     ((imm) & 0xFFFFF) | ||||
| #define SHAMT_VAL(imm)      ((imm) & 0x1F) | ||||
| #define CSR_VAL(csr)        ((csr) & 0xFFF) | ||||
|  | ||||
| // B型立即数编码([12|10:5|4:1|11]) | ||||
| #define ENCODE_B_IMM(imm) ( \ | ||||
|     (((imm) >> 12) & 0x1)  << 31 |  /* imm[12:12] -> instr[31:31] */ \ | ||||
|     (((imm) >> 5)  & 0x3F) << 25 |  /* imm[10:5]  -> instr[30:25] */ \ | ||||
|     (((imm) >> 1)  & 0xF)  << 8  |  /* imm[4:1]   -> instr[11:8]  */ \ | ||||
|     (((imm) >> 11) & 0x1)  << 7)    /* imm[11:11] -> instr[7:7]   */ | ||||
|  | ||||
| // J型立即数编码([20|10:1|11|19:12])W | ||||
| #define ENCODE_J_IMM(imm) ( \ | ||||
|     (((imm) >> 20) & 0x1)  << 31 |  /* imm[20:20] -> instr[31:31] */ \ | ||||
|     (((imm) >> 1)  & 0x3FF)<< 21 |  /* imm[10:1]  -> instr[30:21] */ \ | ||||
|     (((imm) >> 11) & 0x1)  << 20 |  /* imm[11:11] -> instr[20:20] */ \ | ||||
|     (((imm) >> 12) & 0xFF) << 12)    /* imm[19:12] -> instr[19:12] */ | ||||
| /******************** 指令生成宏 ********************/ | ||||
| // R型指令宏 | ||||
| #define RV32_RTYPE(op, f3, f7, rd, rs1, rs2) (uint32_t)( \ | ||||
|     (0x33 | ((rd) << 7) | ((f3) << 12) | ((rs1) << 15) | \ | ||||
|            ((rs2) << 20) | ((f7) << 25)) ) | ||||
|  | ||||
| // I型指令宏 | ||||
| #define RV32_ITYPE(op, f3, rd, rs1, imm) (uint32_t)( \ | ||||
|     (op | ((rd) << 7) | ((f3) << 12) | ((rs1) << 15) | \ | ||||
|            (IMM_12BITS(imm) << 20)) ) | ||||
|  | ||||
| // S型指令宏 | ||||
| #define RV32_STYPE(op, f3, rs1, rs2, imm) (uint32_t)( \ | ||||
|     (op | ((IMM_12BITS(imm) & 0xFE0) << 20) | ((rs1) << 15) | \ | ||||
|            ((rs2) << 20) | ((f3) << 12) | ((IMM_12BITS(imm) & 0x1F) << 7)) ) | ||||
|  | ||||
| // B型指令宏 | ||||
| #define RV32_BTYPE(op, f3, rs1, rs2, imm) (uint32_t)( \ | ||||
|     (op | (ENCODE_B_IMM(imm)) | ((rs1) << 15) | \ | ||||
|            ((rs2) << 20) | ((f3) << 12)) ) | ||||
|  | ||||
| // U型指令宏 | ||||
| #define RV32_UTYPE(op, rd, imm) (uint32_t)( \ | ||||
|     (op | ((rd) << 7) | (IMM_20BITS((imm) >> 12) << 12)) ) | ||||
|  | ||||
| // J型指令宏 | ||||
| #define RV32_JTYPE(op, rd, imm) (uint32_t)( \ | ||||
|     (op | ((rd) << 7) | ENCODE_J_IMM(imm)) ) | ||||
|  | ||||
| /******************** U-type ********************/ | ||||
| #define LUI(rd, imm)    RV32_UTYPE(0x37, rd, imm) | ||||
| #define AUIPC(rd, imm)  RV32_UTYPE(0x17, rd, imm) | ||||
|  | ||||
| /******************** J-type ********************/ | ||||
| #define JAL(rd, imm)    RV32_JTYPE(0x6F, rd, imm) | ||||
|  | ||||
| /******************** I-type ********************/ | ||||
| #define JALR(rd, rs1, imm)  RV32_ITYPE(0x67, 0x0, rd, rs1, imm) | ||||
|  | ||||
| // Load instructions | ||||
| #define LB(rd, rs1, imm)  RV32_ITYPE(0x03, 0x0, rd, rs1, imm) | ||||
| #define LH(rd, rs1, imm)  RV32_ITYPE(0x03, 0x1, rd, rs1, imm) | ||||
| #define LW(rd, rs1, imm)  RV32_ITYPE(0x03, 0x2, rd, rs1, imm) | ||||
| #define LBU(rd, rs1, imm) RV32_ITYPE(0x03, 0x4, rd, rs1, imm) | ||||
| #define LHU(rd, rs1, imm) RV32_ITYPE(0x03, 0x5, rd, rs1, imm) | ||||
|  | ||||
| // Immediate arithmetic | ||||
| #define ADDI(rd, rs1, imm)  RV32_ITYPE(0x13, 0x0, rd, rs1, imm) | ||||
| #define SLTI(rd, rs1, imm)  RV32_ITYPE(0x13, 0x2, rd, rs1, imm) | ||||
| #define SLTIU(rd, rs1, imm) RV32_ITYPE(0x13, 0x3, rd, rs1, imm) | ||||
| #define XORI(rd, rs1, imm)  RV32_ITYPE(0x13, 0x4, rd, rs1, imm) | ||||
| #define ORI(rd, rs1, imm)   RV32_ITYPE(0x13, 0x6, rd, rs1, imm) | ||||
| #define ANDI(rd, rs1, imm)  RV32_ITYPE(0x13, 0x7, rd, rs1, imm) | ||||
|  | ||||
| // Shift instructions | ||||
| #define SLLI(rd, rs1, shamt) RV32_ITYPE(0x13, 0x1, rd, rs1, (0x00000000 | (shamt << 20))) | ||||
| #define SRLI(rd, rs1, shamt) RV32_ITYPE(0x13, 0x5, rd, rs1, (0x00000000 | (shamt << 20))) | ||||
| #define SRAI(rd, rs1, shamt) RV32_ITYPE(0x13, 0x5, rd, rs1, (0x40000000 | (shamt << 20))) | ||||
|  | ||||
| /******************** B-type ********************/ | ||||
| #define BEQ(rs1, rs2, imm)  RV32_BTYPE(0x63, 0x0, rs1, rs2, imm) | ||||
| #define BNE(rs1, rs2, imm)  RV32_BTYPE(0x63, 0x1, rs1, rs2, imm) | ||||
| #define BLT(rs1, rs2, imm)  RV32_BTYPE(0x63, 0x4, rs1, rs2, imm) | ||||
| #define BGE(rs1, rs2, imm)  RV32_BTYPE(0x63, 0x5, rs1, rs2, imm) | ||||
| #define BLTU(rs1, rs2, imm) RV32_BTYPE(0x63, 0x6, rs1, rs2, imm) | ||||
| #define BGEU(rs1, rs2, imm) RV32_BTYPE(0x63, 0x7, rs1, rs2, imm) | ||||
|  | ||||
| /******************** S-type ********************/ | ||||
| #define SB(rs2, rs1, imm) RV32_STYPE(0x23, 0x0, rs1, rs2, imm) | ||||
| #define SH(rs2, rs1, imm) RV32_STYPE(0x23, 0x1, rs1, rs2, imm) | ||||
| #define SW(rs2, rs1, imm) RV32_STYPE(0x23, 0x2, rs1, rs2, imm) | ||||
|  | ||||
| /******************** R-type ********************/ | ||||
| #define ADD(rd, rs1, rs2)  RV32_RTYPE(0x33, 0x0, 0x00, rd, rs1, rs2) | ||||
| #define SUB(rd, rs1, rs2)  RV32_RTYPE(0x33, 0x0, 0x20, rd, rs1, rs2) | ||||
| #define SLL(rd, rs1, rs2)  RV32_RTYPE(0x33, 0x1, 0x00, rd, rs1, rs2) | ||||
| #define SLT(rd, rs1, rs2)  RV32_RTYPE(0x33, 0x2, 0x00, rd, rs1, rs2) | ||||
| #define SLTU(rd, rs1, rs2) RV32_RTYPE(0x33, 0x3, 0x00, rd, rs1, rs2) | ||||
| #define XOR(rd, rs1, rs2)  RV32_RTYPE(0x33, 0x4, 0x00, rd, rs1, rs2) | ||||
| #define SRL(rd, rs1, rs2)  RV32_RTYPE(0x33, 0x5, 0x00, rd, rs1, rs2) | ||||
| #define SRA(rd, rs1, rs2)  RV32_RTYPE(0x33, 0x5, 0x20, rd, rs1, rs2) | ||||
| #define OR(rd, rs1, rs2)   RV32_RTYPE(0x33, 0x6, 0x00, rd, rs1, rs2) | ||||
| #define AND(rd, rs1, rs2)  RV32_RTYPE(0x33, 0x7, 0x00, rd, rs1, rs2) | ||||
|  | ||||
| /******************** I-type (system) ********************/ | ||||
| #define FENCE(pred, succ) (uint32_t)( 0x0F | ((pred) << 23) | ((succ) << 27) ) | ||||
| #define FENCE_I()         (uint32_t)( 0x100F ) | ||||
| #define ECALL()           (uint32_t)( 0x73 ) | ||||
| #define EBREAK()          (uint32_t)( 0x100073 ) | ||||
|  | ||||
| // CSR instructions | ||||
| #define CSRRW(rd, csr, rs)  RV32_ITYPE(0x73, 0x1, rd, rs, CSR_VAL(csr)) | ||||
| #define CSRRS(rd, csr, rs)  RV32_ITYPE(0x73, 0x2, rd, rs, CSR_VAL(csr)) | ||||
| #define CSRRC(rd, csr, rs)  RV32_ITYPE(0x73, 0x3, rd, rs, CSR_VAL(csr)) | ||||
| #define CSRRWI(rd, csr, zimm) RV32_ITYPE(0x73, 0x5, rd, 0, (CSR_VAL(csr) | ((zimm) << 15))) | ||||
| #define CSRRSI(rd, csr, zimm) RV32_ITYPE(0x73, 0x6, rd, 0, (CSR_VAL(csr) | ((zimm) << 15))) | ||||
| #define CSRRCI(rd, csr, zimm) RV32_ITYPE(0x73, 0x7, rd, 0, (CSR_VAL(csr) | ((zimm) << 15))) | ||||
|  | ||||
|  | ||||
| /* M-Extention */ | ||||
| #define MUL(rd, rs1, rs2) RV32_RTYPE(0x33, 0x0, 0x01, rd, rs1, rs2) | ||||
| #define DIV(rd, rs1, rs2) RV32_RTYPE(0x33, 0x0, 0x05, rd, rs1, rs2) | ||||
| #define REM(rd, rs1, rs2) RV32_RTYPE(0x33, 0x0, 0x07, rd, rs1, rs2) | ||||
|  | ||||
| /******************** Pseudo-instructions ********************/ | ||||
| // 伪指令 | ||||
|  | ||||
| // nop (No operation) | ||||
| #define NOP() ADDI(REG_X0, REG_X0, 0) // 无操作 | ||||
|  | ||||
| // neg rd, rs (Two's complement of rs) | ||||
| #define NEG(rd, rs) SUB(rd, REG_ZERO, rs) // 补码 | ||||
|  | ||||
| // negw rd, rs (Two's complement word of rs) | ||||
| #define NEGW(rd, rs) SUBW(rd, REG_ZERO, rs) // 字的补码 | ||||
|  | ||||
| // snez rd, rs (Set if ≠ zero) | ||||
| #define SNEZ(rd, rs) SLTU(rd, REG_X0, rs) // 非0则置位 | ||||
|  | ||||
| // sltz rd, rs (Set if < zero) | ||||
| #define SLTZ(rd, rs) SLT(rd, rs, REG_X0) // 小于0则置位 | ||||
|  | ||||
| // sgtz rd, rs (Set if > zero) | ||||
| #define SG TZ(rd, rs) SLT(rd, REG_X0, rs) // 大于0则置位 | ||||
|  | ||||
| // beqz rs, offset (Branch if = zero) | ||||
| #define BEQZ(rs, offset) BEQ(rs, REG_X0, offset) // 为0则转移 | ||||
|  | ||||
| // bnez rs, offset (Branch if ≠ zero) | ||||
| #define BNEZ(rs, offset) BNE(rs, REG_X0, offset) // 非0则转移 | ||||
|  | ||||
| // blez rs, offset (Branch if ≤ zero) | ||||
| #define BLEZ(rs, offset) BGE(REG_X0, rs, offset) // 小于等于0则转移 | ||||
|  | ||||
| // bgez rs, offset (Branch if ≥ zero) | ||||
| #define BGEZ(rs, offset) BGE(rs, REG_X0, offset) // 大于等于0则转移 | ||||
|  | ||||
| // bltz rs, offset (Branch if < zero) | ||||
| #define BLTZ(rs, offset) BLT(rs, REG_X0, offset) // 小于0则转移 | ||||
|  | ||||
| // bgtz rs, offset (Branch if > zero) | ||||
| #define BGTZ(rs, offset) BLT(REG_X0, rs, offset) // 大于0则转移 | ||||
|  | ||||
| // j offset (Jump) | ||||
| #define J(offset) JAL(REG_X0, offset) // 跳转 | ||||
|  | ||||
| // jr rs (Jump register) | ||||
| #define JR(rs) JALR(REG_X0, rs, 0) // 寄存器跳转 | ||||
|  | ||||
| // ret (Return from subroutine) | ||||
| #define RET() JALR(REG_X0, REG_RA, 0) // 从子过程返回 | ||||
|  | ||||
| // tail offset (Tail call far-away subroutine) | ||||
| #define TAIL_2(offset) AUIPC(REG_X6, offset), JAL(REG_X0, REG_X6, offset) // 尾调用远程子过程, 有2条指令 | ||||
| #define TAIL(offset) TAIL_2(offset) // Warning this have 2 instructions | ||||
|  | ||||
| // csrr csr, rd (Read CSR) | ||||
| #define CSRR(csr, rd) CSRRS(rd, csr, REG_X0) // 读CSR寄存器 | ||||
|  | ||||
| // csrw csr, rs (Write CSR) | ||||
| #define CSR W(csr, rs) CSRRW(csr, REG_X0, rs) // 写CSR寄存器 | ||||
|  | ||||
| // csrs csr, rs (Set bits in CSR) | ||||
| #define CSRS(csr, rs) CSRRS(REG_X0, csr, rs) // CSR寄存器置零位 | ||||
|  | ||||
| // csrrc csr, rs (Clear bits in CSR) | ||||
| #define CSRC(csr, rs) CSRRC(REG_X0, csr, rs) // CSR寄存器清 | ||||
|  | ||||
| // csrci csr, imm (Immediate clear bits in CSR) | ||||
| #define CSRCI(csr, imm) CSRRCI(REG_X0, csr, imm) // 立即数清除CSR | ||||
|  | ||||
| // csrrwi csr, imm (Write CSR immediate) | ||||
| #define CSRRWI2(csr, imm) CSRRWI(REG_X0, csr, imm) // 立即数写入CSR | ||||
|  | ||||
| // csrrsi csr, imm (Immediate set bits in CSR) | ||||
| #define CSRRSI2(csr, imm) CSRRSI(REG_X0, csr, imm) // 立即数置位CSR | ||||
|  | ||||
| // csrrci csr, imm (Immediate clear bits in CSR) | ||||
| #define CSRRCI2(csr, imm) CSRRCI(REG_X0, csr, imm) // 立即数清除CSR | ||||
|  | ||||
| // // frcsr rd (Read FP control/status register) | ||||
| // #define FRC SR(rd) CSRRS(rd, FCSR, REG_X0) // 读取FP控制/状态寄存器 | ||||
|  | ||||
| // // fscsr rs (Write FP control/status register) | ||||
| // #define FSCSR(rs) CSRRW(REG_X0, FCSR, rs) // 写入FP控制/状态寄存器 | ||||
|  | ||||
| // // frrm rd (Read FP rounding mode) | ||||
| // #define FRRM(rd) CSRRS(rd, FRM, REG_X0) // 读取FP舍入模式 | ||||
|  | ||||
| // // fsrm rs (Write FP rounding mode) | ||||
| // #define FS RM(rs) CSRRW(REG_X0, FRM, rs) // 写入FP舍入模式 | ||||
|  | ||||
| // // frflags rd (Read FP exception flags) | ||||
| // #define FRFLAGS(rd) CSRRS(rd, FFLAGS, REG_X0) // 读取FP例外标志 | ||||
|  | ||||
| // // fsflags rs (Write FP exception flags) | ||||
| // #define FS FLAGS(rs) CSRRW(REG_X0, FFLAGS, rs) // 写入FP例外标志 | ||||
|  | ||||
|  | ||||
| // Myriad sequences | ||||
| #define LI(rd, num) \ | ||||
|     LUI(rd, num), \ | ||||
|     ADDI(rd, rd, num) | ||||
|  | ||||
| #define MV(rd, rs)  ADDI(rd, rs, 0) | ||||
| #define NOT(rd, rs) XORI(rd, rs, -1) | ||||
| #define CALL(offset) \ | ||||
|     AUIPC(REG_X1, offset), \ | ||||
|     JALR(REG_X1, REG_X1, offset) | ||||
|  | ||||
| #define CALL_ABS(addr) \ | ||||
|     AUIPC(REG_X0, addr), \ | ||||
|     JALR(REG_X1, REG_X0, addr) | ||||
|  | ||||
| #ifdef RISCV_VM_BUILDIN_ECALL | ||||
| #define ECALL_PNT_INT(num) \ | ||||
|     ADDI(REG_A0, REG_X0, num), \ | ||||
|     ADDI(REG_A7, REG_X0, 0x1), \ | ||||
|     ECALL() | ||||
|  | ||||
| #define ECALL_PNT_STR(str) \ | ||||
|     ADDI(REG_A0, REG_X0, str), \ | ||||
|     ADDI(REG_A7, REG_X0, 0x4), \ | ||||
|     ECALL() | ||||
|  | ||||
| #define ECALL_EXIT2() \ | ||||
|     ADDI(REG_A7, REG_X0, 93), \ | ||||
|     ECALL() | ||||
|  | ||||
| #define ECALL_EXIT_ARG(errno) \ | ||||
|     ADDI(REG_A0, REG_X0, errno), \ | ||||
|     ECALL_EXIT2() | ||||
|  | ||||
| #define ECALL_EXIT() \ | ||||
|     ADDI(REG_A7, REG_X0, 93), \ | ||||
|     ECALL() | ||||
|  | ||||
| #define ECALL_SCAN_INT(int) \ | ||||
|     ADDI(REG_A7, (1025 + 4)), \ | ||||
|     ECALL() | ||||
|  | ||||
| #define ECALL_SCAN_STR(str) \ | ||||
|     ADDI(REG_A0, REG_X0, str), \ | ||||
|     ADDI(REG_A7, REG_X0, (1025 + 5)), \ | ||||
|     ECALL() | ||||
| #endif | ||||
|  | ||||
| #endif | ||||
							
								
								
									
										413
									
								
								ccompiler/backend/riscv32/rv32ima_codegen.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										413
									
								
								ccompiler/backend/riscv32/rv32ima_codegen.c
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,413 @@ | ||||
| #define RISCV_VM_BUILDIN_ECALL | ||||
| #include "rv32gen.h" | ||||
| #include <stdio.h> | ||||
| #include <assert.h> | ||||
|  | ||||
| // 指令编码联合体(自动处理小端序) | ||||
| typedef union rv32code { | ||||
|     uint32_t code; | ||||
|     uint8_t bytes[4]; | ||||
| } rv32code_t; | ||||
|  | ||||
| #define CRT_CODE_SIZE 16 | ||||
|  | ||||
| // 使用示例 | ||||
| rv32code_t gcodes[] = { | ||||
|     LI(REG_SP, 0x1000), | ||||
|     LI(REG_RA, 0x0), | ||||
|  | ||||
|     CALL_ABS(CRT_CODE_SIZE << 2), | ||||
|     // Exit | ||||
|     ECALL_EXIT2(), | ||||
| }; | ||||
|  | ||||
| void test_raw_gen(FILE* out) { | ||||
|     fwrite(gcodes, sizeof(rv32code_t), sizeof(gcodes)/sizeof(gcodes[0]), out); | ||||
| } | ||||
|  | ||||
| #include "../../frontend/frontend.h" | ||||
| #include "../../middleend/ir.h" | ||||
| typedef struct { | ||||
|     int code_pos; | ||||
|     int to_idx; | ||||
|     int cur_idx; | ||||
|     int base_offset; | ||||
|     enum { | ||||
|         JMP_BRANCH, | ||||
|         JMP_JUMP, | ||||
|         JMP_CALL, | ||||
|     } type; | ||||
| } jmp_t; | ||||
|  | ||||
| static struct { | ||||
|     vector_header(codes, rv32code_t); | ||||
|     int stack_offset; | ||||
|     int stack_base; | ||||
|     int tmp_reg; | ||||
|     ir_bblock_t* cur_block; | ||||
|     ir_func_t* cur_func; | ||||
|     ir_prog_t* prog; | ||||
|     vector_header(jmp, jmp_t*); | ||||
|     vector_header(call, jmp_t*); | ||||
|  | ||||
|     int cur_func_offset; | ||||
|     int cur_block_offset; | ||||
| } ctx; | ||||
|  | ||||
| int write_inst(union rv32code ins, FILE* fp) { | ||||
|     return fwrite(&ins, sizeof(union rv32code), 1, fp); | ||||
| } | ||||
|  | ||||
| #define GENCODE(code) vector_push(ctx.codes, (rv32code_t)(code)); len += 4 | ||||
| #define GENCODES(code) do { \ | ||||
|         rv32code_t codes[] = { \ | ||||
|             code \ | ||||
|         }; \ | ||||
|         for (int i = 0; i < sizeof(codes) / sizeof(codes[0]); i ++) { \ | ||||
|             GENCODE(codes[i]); \ | ||||
|         } \ | ||||
|     } while (0) | ||||
|  | ||||
| static int stack_offset(ir_node_t* ptr) { | ||||
|     int offset = ctx.stack_base; | ||||
|     for (int i = 0; i < ctx.cur_func->bblocks.size; i ++) { | ||||
|         ir_bblock_t* block = vector_at(ctx.cur_func->bblocks, i); | ||||
|         for (int i = 0; i < block->instrs.size; i++) { | ||||
|             if (vector_at(block->instrs, i) == ptr) { | ||||
|                 offset += i * 4; | ||||
|                 assert(offset >= 0 && offset < ctx.stack_offset); | ||||
|                 return offset; | ||||
|             } | ||||
|         } | ||||
|         offset += block->instrs.size * 4; | ||||
|     } | ||||
|     assert(0); | ||||
| } | ||||
|  | ||||
| static int block_idx(ir_bblock_t* toblock) { | ||||
|     for (int i = 0; i < ctx.cur_func->bblocks.size; i ++) { | ||||
|         ir_bblock_t* block = vector_at(ctx.cur_func->bblocks, i); | ||||
|         if (toblock == block) { | ||||
|             return i; | ||||
|         } | ||||
|     } | ||||
|     assert(0); | ||||
| } | ||||
|  | ||||
| static int func_idx(ir_func_t* tofunc) { | ||||
|     for (int i = 0; i < ctx.prog->funcs.size; i ++) { | ||||
|         ir_func_t* func = vector_at(ctx.prog->funcs, i); | ||||
|         if (tofunc == func) { | ||||
|             return i; | ||||
|         } | ||||
|     } | ||||
|     assert(0); | ||||
| } | ||||
|  | ||||
| static int system_func(const char* name) { | ||||
|     static const char defined_func[][16] = { | ||||
|         "ecall_pnt_int", | ||||
|     }; | ||||
|  | ||||
|     for (int j = 0; j < sizeof(defined_func)/sizeof(defined_func[0]); j++) { | ||||
|         if (strcmp(name, defined_func[j]) == 0) { | ||||
|             return j; | ||||
|         } | ||||
|     } | ||||
|     return -1; | ||||
| } | ||||
|  | ||||
| static int get_node_val(ir_node_t* ptr, int reg) { | ||||
|     int len = 0; | ||||
|     if (ptr->tag == IR_NODE_CONST_INT) { | ||||
|         GENCODES(LI(reg, ptr->data.const_int.val)); | ||||
|     } else { | ||||
|         int offset = stack_offset(ptr); | ||||
|         GENCODE(LW(reg, REG_SP, offset)); | ||||
|     } | ||||
|     return len; | ||||
| } | ||||
|  | ||||
| static int gen_instr(ir_bblock_t* block, ir_node_t* instr) { | ||||
|     int len = 0; | ||||
|     int offset; | ||||
|     switch (instr->tag) { | ||||
|         case IR_NODE_ALLOC: { | ||||
|             break; | ||||
|         } | ||||
|         case IR_NODE_LOAD: { | ||||
|             // S1 = *(S0 + imm) | ||||
|             offset = stack_offset(instr->data.load.target); | ||||
|             GENCODE(LW(REG_T0, REG_SP, offset)); | ||||
|             // offset = STACK_OFFSET(instr); | ||||
|             // GENCODE(SW(REG_T0, REG_SP, offset)); | ||||
|             break; | ||||
|         } | ||||
|         case IR_NODE_STORE: { | ||||
|             // *(S0 + imm) = S1 | ||||
|             len += get_node_val(instr->data.store.value,  REG_T0); | ||||
|             offset = stack_offset(instr->data.store.target); | ||||
|             GENCODE(SW(REG_T0, REG_SP, offset)); | ||||
|             break; | ||||
|         } | ||||
|         case IR_NODE_RET: { | ||||
|             // A0 = S0 | ||||
|             if (instr->data.ret.ret_val != NULL) { | ||||
|                 len += get_node_val(instr->data.ret.ret_val, REG_A0); | ||||
|             } | ||||
|             GENCODE(LW(REG_RA, REG_SP, 0)); | ||||
|             GENCODE(ADDI(REG_SP, REG_SP, ctx.stack_offset)); | ||||
|             GENCODE(RET()); | ||||
|             break; | ||||
|         } | ||||
|         case IR_NODE_OP: { | ||||
|             len += get_node_val(instr->data.op.lhs, REG_T1); | ||||
|             len += get_node_val(instr->data.op.rhs, REG_T2); | ||||
|  | ||||
|             switch (instr->data.op.op) { | ||||
|             case IR_OP_ADD: | ||||
|                 GENCODE(ADD(REG_T0, REG_T1, REG_T2)); | ||||
|                 break; | ||||
|             case IR_OP_SUB: | ||||
|                 GENCODE(SUB(REG_T0, REG_T1, REG_T2)); | ||||
|                 break; | ||||
|             case IR_OP_MUL: | ||||
|                 GENCODE(MUL(REG_T0, REG_T1, REG_T2)); | ||||
|                 break; | ||||
|             case IR_OP_DIV: | ||||
|                 GENCODE(DIV(REG_T0, REG_T1, REG_T2)); | ||||
|                 break; | ||||
|             case IR_OP_MOD: | ||||
|                 GENCODE(REM(REG_T0, REG_T1, REG_T2)); | ||||
|                 break; | ||||
|             default: | ||||
|                 error("ERROR gen_instr op in riscv"); | ||||
|                 break; | ||||
|             } | ||||
|             offset = stack_offset(instr); | ||||
|             GENCODE(SW(REG_T0, REG_SP, offset)); | ||||
|             break; | ||||
|         } | ||||
|         case IR_NODE_BRANCH: { | ||||
|             len += get_node_val(instr->data.branch.cond, REG_T0); | ||||
|             int tidx = block_idx(instr->data.branch.true_bblock); | ||||
|             int fidx = block_idx(instr->data.branch.false_bblock); | ||||
|             int cidx = block_idx(ctx.cur_block); | ||||
|             jmp_t* jmp; | ||||
|             jmp = xmalloc(sizeof(jmp_t)); | ||||
|             *jmp = (jmp_t) { | ||||
|                 .base_offset = 8, | ||||
|                 .code_pos = ctx.codes.size, | ||||
|                 .type = JMP_BRANCH, | ||||
|                 .to_idx = tidx, | ||||
|                 .cur_idx=cidx, | ||||
|             }; | ||||
|             vector_push(ctx.jmp, jmp); | ||||
|             GENCODE(BNEZ(REG_T0, 0)); | ||||
|             jmp = xmalloc(sizeof(jmp_t)); | ||||
|             *jmp = (jmp_t) { | ||||
|                 .base_offset = 4, | ||||
|                 .code_pos = ctx.codes.size, | ||||
|                 .type = JMP_JUMP, | ||||
|                 .to_idx = fidx, | ||||
|                 .cur_idx=cidx, | ||||
|             }; | ||||
|             vector_push(ctx.jmp, jmp); | ||||
|             GENCODE(J(0)); | ||||
|             break; | ||||
|         } | ||||
|         case IR_NODE_JUMP: { | ||||
|             int idx = block_idx(instr->data.jump.target_bblock); | ||||
|             jmp_t* jmp = xmalloc(sizeof(jmp_t)); | ||||
|             *jmp = (jmp_t) { | ||||
|                 .base_offset = 4, | ||||
|                 .code_pos = ctx.codes.size, | ||||
|                 .type = JMP_JUMP, | ||||
|                 .to_idx = idx, | ||||
|                 .cur_idx=block_idx(ctx.cur_block), | ||||
|             }; | ||||
|             vector_push(ctx.jmp, jmp); | ||||
|             GENCODE(J(0)); | ||||
|             break; | ||||
|         } | ||||
|         case IR_NODE_CALL: { | ||||
|             if (instr->data.call.args.size > 8) { | ||||
|                 error("can't add so much params"); | ||||
|             } | ||||
|             int param_regs[8] = { | ||||
|                 REG_A0, REG_A1, REG_A2, REG_A3, | ||||
|                 REG_A4, REG_A5, REG_A6, REG_A7 | ||||
|             }; | ||||
|             for (int i = 0; i < instr->data.call.args.size; i++) { | ||||
|                 ir_node_t* param = vector_at(instr->data.call.args, i); | ||||
|                 len += get_node_val(param, param_regs[i]); | ||||
|             } | ||||
|  | ||||
|             int system_func_idx = system_func(instr->data.call.callee->name); | ||||
|             if (system_func_idx == 0) { | ||||
|                 // ecall_pnt_int | ||||
|                 GENCODE(ADDI(REG_A7, REG_X0, 0x1)); | ||||
|                 GENCODE(ECALL()); | ||||
|                 break; | ||||
|             } | ||||
|  | ||||
|             jmp_t* jmp = xmalloc(sizeof(jmp_t)); | ||||
|             *jmp = (jmp_t) { | ||||
|                 .base_offset = ctx.cur_func_offset + ctx.cur_block_offset + len, | ||||
|                 .code_pos = ctx.codes.size, | ||||
|                 .type = JMP_CALL, | ||||
|                 .to_idx = func_idx(instr->data.call.callee), | ||||
|                 .cur_idx = func_idx(ctx.cur_func), | ||||
|             }; | ||||
|             vector_push(ctx.call, jmp); | ||||
|  | ||||
|             GENCODES(( | ||||
|                 CALL(0) | ||||
|             )); | ||||
|             break; | ||||
|         } | ||||
|         default: | ||||
|             error("ERROR gen_instr in riscv"); | ||||
|     } | ||||
|     return len; | ||||
| } | ||||
|  | ||||
| static int gen_block(ir_bblock_t* block) { | ||||
|     int len = 0; | ||||
|     ctx.cur_block = block; | ||||
|     for (int i = 0; i < block->instrs.size; i ++) { | ||||
|         ctx.cur_block_offset = len; | ||||
|         len += gen_instr(block, vector_at(block->instrs, i)); | ||||
|     } | ||||
|     return len; | ||||
| } | ||||
|  | ||||
| static int gen_func(ir_func_t* func) { | ||||
|     int len = 0; | ||||
|     ctx.cur_func = func; | ||||
|     ctx.stack_base = 16; | ||||
|     ctx.stack_offset = ctx.stack_base; | ||||
|     for (int i = 0; i < func->bblocks.size; i++) { | ||||
|         ctx.stack_offset += 4 * (*vector_at(func->bblocks, i)).instrs.size; | ||||
|     } | ||||
|     GENCODE(ADDI(REG_SP, REG_SP, -ctx.stack_offset)); | ||||
|     GENCODE(SW(REG_RA, REG_SP, 0)); | ||||
|  | ||||
|     int param_regs[8] = { | ||||
|         REG_A0, REG_A1, REG_A2, REG_A3, | ||||
|         REG_A4, REG_A5, REG_A6, REG_A7 | ||||
|     }; | ||||
|     if (func->params.size > 8) { | ||||
|         error("can't add so much params"); | ||||
|     } | ||||
|     for (int i = 0; i < func->params.size; i++) { | ||||
|         int offset = stack_offset(vector_at(func->params, i)); | ||||
|         GENCODE(SW(param_regs[i], REG_SP, offset)); | ||||
|     } | ||||
|      | ||||
|     int jmp_cache[func->bblocks.size + 1]; | ||||
|      | ||||
|     if (ctx.jmp.data != NULL) vector_free(ctx.jmp); | ||||
|     vector_init(ctx.jmp); | ||||
|     jmp_cache[0] = 0; | ||||
|     for(int i = 0; i < func->bblocks.size; i ++) { | ||||
|         ctx.cur_func_offset = len; | ||||
|         jmp_cache[i + 1] = jmp_cache[i]; | ||||
|         int ret = gen_block(vector_at(func->bblocks, i)); | ||||
|         jmp_cache[i + 1] += ret; | ||||
|         len += ret; | ||||
|     } | ||||
|  | ||||
|     for (int i = 0; i < ctx.jmp.size; i++) { | ||||
|         jmp_t* jmp = vector_at(ctx.jmp, i); | ||||
|         int32_t code = 0; | ||||
|         int offset = jmp_cache[jmp->to_idx] - (jmp_cache[jmp->cur_idx + 1] - jmp->base_offset); | ||||
|         if (jmp->type == JMP_JUMP) { | ||||
|             code = J(offset); | ||||
|         } else { | ||||
|             code = BNEZ(REG_T0, offset); | ||||
|         } | ||||
|         ctx.codes.data[jmp->code_pos] = (rv32code_t) { | ||||
|             .code = code, | ||||
|         }; | ||||
|     } | ||||
|  | ||||
|     return len; | ||||
| } | ||||
|  | ||||
| static void gen_code(ir_prog_t* prog) { | ||||
|     ctx.prog = prog; | ||||
|  | ||||
|  | ||||
|     for (int i = 0; i < prog->extern_funcs.size; i++) { | ||||
|         if (system_func(prog->extern_funcs.data[i]->name) == -1) { | ||||
|             error("func %s not defined and not a system func", prog->extern_funcs.data[i]->name); | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     int len = 0; | ||||
|     int jmp_cache[prog->funcs.size + 1]; | ||||
|     for(int i = 0; i < prog->funcs.size; i ++) { | ||||
|         jmp_cache[i + 1] = jmp_cache[i]; | ||||
|         int ret = gen_func(vector_at(prog->funcs, i)); | ||||
|         jmp_cache[i + 1] += ret; | ||||
|         len += ret; | ||||
|     } | ||||
|  | ||||
|  | ||||
|     for (int i = 0; i < ctx.call.size; i++) { | ||||
|         jmp_t* jmp = vector_at(ctx.call, i); | ||||
|         int32_t code = 0; | ||||
|         // FIXME ERROR | ||||
|         int offset = jmp_cache[jmp->to_idx] - (jmp_cache[jmp->cur_idx] + jmp->base_offset); | ||||
|         int32_t codes[2] = { | ||||
|             CALL(offset) | ||||
|         }; | ||||
|         for (int i = 0; i < 2; i++) { | ||||
|             ctx.codes.data[jmp->code_pos + i] = (rv32code_t) { | ||||
|                 .code = codes[i], | ||||
|             }; | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| int main(int argc, char** argv) { | ||||
|     // gcc rv32ima_codegen.c -o rv32gen.exe | ||||
|     const char* infilename = "test.c"; | ||||
|     const char* outfilename = "flat.bin"; | ||||
|     if (argc >= 2) { | ||||
|         infilename = argv[1]; | ||||
|     } | ||||
|     if (argc >= 3) { | ||||
|         outfilename = argv[2]; | ||||
|     } | ||||
|     FILE* in = fopen(infilename, "r"); | ||||
|     FILE* out = fopen(outfilename, "wb"); | ||||
|     if (in == NULL || out == NULL) { | ||||
|         printf("Failed to open file\n"); | ||||
|         return 1; | ||||
|     } | ||||
|  | ||||
|     struct ASTNode* root = frontend(infilename, in, (sread_fn)fread_s); | ||||
|     gen_ir_from_ast(root); | ||||
|     gen_code(&prog); | ||||
|  | ||||
|     for (int i = 0; i < CRT_CODE_SIZE; i++) { | ||||
|         write_inst((union rv32code) { | ||||
|             .code = NOP(), | ||||
|         }, out); | ||||
|     } | ||||
|     fflush(out); | ||||
|     assert(CRT_CODE_SIZE >= sizeof(gcodes) / sizeof(gcodes[0])); | ||||
|     fseek(out, 0, SEEK_SET); | ||||
|     fwrite(gcodes, sizeof(gcodes), 1, out); | ||||
|     fflush(out); | ||||
|     fseek(out, CRT_CODE_SIZE * 4, SEEK_SET); | ||||
|      | ||||
|     fwrite(ctx.codes.data, sizeof(ctx.codes.data[0]), ctx.codes.size, out); | ||||
|     fflush(out); | ||||
|     fclose(in); | ||||
|     fclose(out); | ||||
|     // printf("comiler end out: %s\n", outfilename); | ||||
|     return 0; | ||||
| } | ||||
| @ -1,7 +1,7 @@ | ||||
| # 编译器设置 | ||||
| CC = gcc | ||||
| AR = ar | ||||
| CFLAGS = -g | ||||
| CFLAGS = -g -Wall | ||||
|  | ||||
| # 源文件路径 | ||||
| LEXER_DIR = ./lexer | ||||
| @ -13,6 +13,7 @@ SYMTAB_DIR = ./parser/symtab | ||||
| SRCS = \ | ||||
|     frontend.c \ | ||||
|     $(LEXER_DIR)/lexer.c \ | ||||
|     $(LEXER_DIR)/token.c \ | ||||
|     $(PARSER_DIR)/parser.c \ | ||||
|     $(AST_DIR)/ast.c \ | ||||
|     $(AST_DIR)/block.c \ | ||||
|  | ||||
| @ -3,13 +3,13 @@ | ||||
| #include "frontend.h" | ||||
|  | ||||
| struct ASTNode* frontend(const char* file, void* stream, sread_fn sread) { | ||||
|     struct Lexer lexer; | ||||
|     lexer_t lexer; | ||||
|     init_lexer(&lexer, file, stream, sread); | ||||
|      | ||||
|     struct SymbolTable symtab; | ||||
|     symtab_t symtab; | ||||
|     init_symtab(&symtab); | ||||
|  | ||||
|     struct Parser parser; | ||||
|     parser_t  parser; | ||||
|     init_parser(&parser, &lexer, &symtab); | ||||
|     parse_prog(&parser); | ||||
|  | ||||
|  | ||||
| @ -4,8 +4,9 @@ | ||||
| #ifndef error | ||||
| #include <stdio.h> | ||||
| #include <stdlib.h> | ||||
| #include <assert.h> | ||||
| #define STD_LIBRARY | ||||
| #define error(...) do { fprintf(stderr, __VA_ARGS__); exit(1); } while (0) | ||||
| #define error(...) do { fprintf(stderr, __VA_ARGS__); assert(0); } while (0) | ||||
| #endif | ||||
| #ifndef warn | ||||
| #include <stdio.h> | ||||
| @ -15,10 +16,12 @@ | ||||
|  | ||||
| #define xmalloc(size) malloc(size) | ||||
|  | ||||
| #ifndef FRONTEND_IMPLEMENTATION | ||||
| #include "parser/parser.h" | ||||
| #include "parser/ast/ast.h" | ||||
|  | ||||
| typedef int (*sread_fn)(void *dst_buf, int dst_size, int elem_size, int count, void *stream); | ||||
| struct ASTNode* frontend(const char* file, void* stream, sread_fn sread); | ||||
| #endif | ||||
|  | ||||
| #endif | ||||
| @ -26,13 +26,15 @@ the distribution and installation instructions. | ||||
| Chris Fraser / cwf@aya.yale.edu | ||||
| David Hanson / drh@drhanson.net | ||||
|  */ | ||||
| #define FRONTEND_IMPLEMENTATION | ||||
| #include "../frontend.h" | ||||
| #include "token.h" | ||||
| #include "lexer.h" | ||||
|  | ||||
| static const struct { | ||||
|     const char* name; | ||||
|     enum CSTD_KEYWORD std_type; | ||||
|     enum TokenType tok; | ||||
|     tok_type_t tok; | ||||
| } keywords[] = { | ||||
|     #define X(name, std_type, tok, ...) { #name, std_type, tok }, | ||||
|     KEYWORD_TABLE | ||||
| @ -72,7 +74,7 @@ static inline int keyword_cmp(const char* name, int len) { | ||||
|     return -1; // Not a keyword. | ||||
| } | ||||
|  | ||||
| void init_lexer(struct Lexer* lexer, const char* file_name, void* stream, lexer_sread_fn sread) | ||||
| void init_lexer(lexer_t* lexer, const char* file_name, void* stream, lexer_sread_fn sread) | ||||
| { | ||||
|     lexer->cur_ptr = lexer->end_ptr = (unsigned char*)&(lexer->buffer); | ||||
|     lexer->index = 1; | ||||
| @ -86,12 +88,12 @@ void init_lexer(struct Lexer* lexer, const char* file_name, void* stream, lexer_ | ||||
|     } | ||||
| } | ||||
|  | ||||
| static void flush_buffer(struct Lexer* lexer) { | ||||
| static void flush_buffer(lexer_t* lexer) { | ||||
|     int num = lexer->end_ptr - lexer->cur_ptr; | ||||
|     for (int i = 0; i < num; i++) { | ||||
|         lexer->buffer[i] = lexer->cur_ptr[i]; | ||||
|     } | ||||
|     lexer->cur_ptr = lexer->buffer; | ||||
|     lexer->cur_ptr = (unsigned char*)lexer->buffer; | ||||
|  | ||||
|     int read_size = LEXER_BUFFER_SIZE - num; | ||||
|     // TODO size_t to int maybe lose precision | ||||
| @ -109,7 +111,7 @@ static void flush_buffer(struct Lexer* lexer) { | ||||
|     } | ||||
| } | ||||
|  | ||||
| static void goto_newline(struct Lexer* lexer) { | ||||
| static void goto_newline(lexer_t* lexer) { | ||||
|     do { | ||||
|         if (lexer->cur_ptr == lexer->end_ptr) { | ||||
|             flush_buffer(lexer); | ||||
| @ -119,7 +121,7 @@ static void goto_newline(struct Lexer* lexer) { | ||||
|     } while (*lexer->cur_ptr != '\n' && *lexer->cur_ptr != '\0'); | ||||
| } | ||||
|  | ||||
| static void goto_block_comment(struct Lexer* lexer) { | ||||
| static void goto_block_comment(lexer_t* lexer) { | ||||
|     while (1) { | ||||
|         if (lexer->end_ptr - lexer->cur_ptr < 2) { | ||||
|             flush_buffer(lexer); | ||||
| @ -155,7 +157,7 @@ static char got_slash(unsigned char* peek) { | ||||
|     } | ||||
| } | ||||
|  | ||||
| static void parse_char_literal(struct Lexer* lexer, struct Token* token) { | ||||
| static void parse_char_literal(lexer_t* lexer, tok_t* token) { | ||||
|     char val = 0; | ||||
|     unsigned char* peek = lexer->cur_ptr + 1; | ||||
|     if (*peek == '\\') { | ||||
| @ -166,16 +168,16 @@ static void parse_char_literal(struct Lexer* lexer, struct Token* token) { | ||||
|     } | ||||
|  | ||||
|     if (*peek != '\'') error("Unclosed character literal"); | ||||
|     token->constant.ch = val; | ||||
|     token->val.ch = val; | ||||
|     lexer->cur_ptr = peek + 1; | ||||
|     token->constant.have = 1; | ||||
|     token->val.have = 1; | ||||
|     token->type = TOKEN_CHAR_LITERAL; | ||||
| } | ||||
|  | ||||
| static void parse_string_literal(struct Lexer* lexer, struct Token* token) { | ||||
| static void parse_string_literal(lexer_t* lexer, tok_t* token) { | ||||
|     unsigned char* peek = lexer->cur_ptr + 1; | ||||
|     // TODO string literal size check | ||||
|     char* dest = token->constant.str = xmalloc(LEXER_MAX_TOKEN_SIZE + 1); | ||||
|     char* dest = token->val.str = xmalloc(LEXER_MAX_TOKEN_SIZE + 1); | ||||
|     int len = 0; | ||||
|  | ||||
|     while (*peek != '"') { | ||||
| @ -191,12 +193,12 @@ static void parse_string_literal(struct Lexer* lexer, struct Token* token) { | ||||
|     } | ||||
|     dest[len] = '\0'; | ||||
|     lexer->cur_ptr = peek + 1; | ||||
|     token->constant.have = 1; | ||||
|     token->val.have = 1; | ||||
|     token->type = TOKEN_STRING_LITERAL; | ||||
| } | ||||
|  | ||||
| // FIXME it write by AI maybe error | ||||
| static void parse_number(struct Lexer* lexer, struct Token* token) { | ||||
| static void parse_number(lexer_t* lexer, tok_t* token) { | ||||
|     unsigned char* peek = lexer->cur_ptr; | ||||
|     int base = 10; | ||||
|     int is_float = 0; | ||||
| @ -255,12 +257,12 @@ static void parse_number(struct Lexer* lexer, struct Token* token) { | ||||
|     if ((*peek == 'e' || *peek == 'E') && base == 10) { | ||||
|         is_float = 1; | ||||
|         peek++; | ||||
|         int exp_sign = 1; | ||||
|         // int exp_sign = 1; | ||||
|         int exponent = 0; | ||||
|  | ||||
|         if (*peek == '+') peek++; | ||||
|         else if (*peek == '-') { | ||||
|             exp_sign = -1; | ||||
|             // exp_sign = -1; | ||||
|             peek++; | ||||
|         } | ||||
|  | ||||
| @ -273,19 +275,19 @@ static void parse_number(struct Lexer* lexer, struct Token* token) { | ||||
|  | ||||
|     // 存储结果 | ||||
|     lexer->cur_ptr = peek; | ||||
|     token->constant.have = 1; | ||||
|     token->val.have = 1; | ||||
|     if (is_float) { | ||||
|         token->constant.d = float_val; | ||||
|         token->val.d = float_val; | ||||
|         token->type = TOKEN_FLOAT_LITERAL; | ||||
|     } else { | ||||
|         token->constant.ll = int_val; | ||||
|         token->val.ll = int_val; | ||||
|         token->type = TOKEN_INT_LITERAL; | ||||
|     } | ||||
| } | ||||
|  | ||||
| #define GOT_ONE_TOKEN_BUF_SIZE 64 | ||||
| // /zh/c/language/operator_arithmetic.html | ||||
| void get_token(struct Lexer* lexer, struct Token* token) { | ||||
| void get_token(lexer_t* lexer, tok_t* token) { | ||||
|     // 需要保证缓冲区始终可读 | ||||
|     if (lexer->end_ptr - lexer->cur_ptr < GOT_ONE_TOKEN_BUF_SIZE) { | ||||
|         flush_buffer(lexer); | ||||
| @ -305,8 +307,8 @@ void get_token(struct Lexer* lexer, struct Token* token) { | ||||
|         token->type = TOKEN_FLUSH; | ||||
|     } | ||||
|      | ||||
|     enum TokenType tok = TOKEN_INIT; | ||||
|     struct TokenConstant constant; | ||||
|     tok_type_t tok = TOKEN_INIT; | ||||
|     tok_val_t constant; | ||||
|     constant.have = 0; | ||||
|      | ||||
|     // once step | ||||
| @ -392,7 +394,7 @@ void get_token(struct Lexer* lexer, struct Token* token) { | ||||
|         switch (*peek++) { | ||||
|             case '=': tok = TOKEN_NEQ; break; | ||||
|             default: peek--, tok = TOKEN_NOT; break; | ||||
|         } | ||||
|         } break; | ||||
|     case '[': | ||||
|         tok = TOKEN_L_BRACKET; break; | ||||
|     case ']': | ||||
| @ -454,7 +456,7 @@ void get_token(struct Lexer* lexer, struct Token* token) { | ||||
|     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':case 'Y': case 'Z': | ||||
|     case '_': | ||||
|         // TOKEN_IDENT | ||||
|         if (*peek == 'L' && *peek == '\'' || *peek == 'L' && *peek == '"') { | ||||
|         if ((*peek == 'L' && *peek == '\'') || (*peek == 'L' && *peek == '"')) { | ||||
|             error("unsupport wide-character char literal by `L` format"); | ||||
|         } | ||||
|         while (1) { | ||||
| @ -469,18 +471,18 @@ void get_token(struct Lexer* lexer, struct Token* token) { | ||||
|             break; | ||||
|         } | ||||
|      | ||||
|         int res = keyword_cmp(lexer->cur_ptr, peek - (lexer->cur_ptr)); | ||||
|         int res = keyword_cmp((const char*)lexer->cur_ptr, peek - (lexer->cur_ptr)); | ||||
|         if (res == -1) { | ||||
|             int strlen = peek - lexer->cur_ptr; | ||||
|             unsigned char* str = xmalloc(strlen + 1); | ||||
|             constant.have = 1; | ||||
|             constant.str = str; | ||||
|             constant.str = (char*)str; | ||||
|             for (int i = 0; i < strlen; i++) { | ||||
|                 str[i] = lexer->cur_ptr[i]; | ||||
|             } | ||||
|             str[strlen] = '\0'; | ||||
|             constant.have = 1; | ||||
|             constant.str = str; | ||||
|             constant.str = (char*)str; | ||||
|             tok = TOKEN_IDENT; break; | ||||
|         } else { | ||||
|             tok = keywords[res].tok; break; | ||||
| @ -492,32 +494,16 @@ void get_token(struct Lexer* lexer, struct Token* token) { | ||||
|  | ||||
|     lexer->cur_ptr = peek; | ||||
| END: | ||||
|     token->constant = constant; | ||||
|     token->val = constant; | ||||
|     token->type = tok; | ||||
| } | ||||
|  | ||||
| // get_token maybe got invalid (with parser) | ||||
| void get_valid_token(struct Lexer* lexer, struct Token* token) { | ||||
|     enum TokenType type; | ||||
| void get_valid_token(lexer_t* lexer, tok_t* token) { | ||||
|     tok_type_t type; | ||||
|     do { | ||||
|         get_token(lexer, token); | ||||
|         type = token->type; | ||||
|     } while (type == TOKEN_FLUSH || type == TOKEN_LINE_COMMENT || type == TOKEN_BLOCK_COMMENT); | ||||
| } | ||||
|  | ||||
| // 生成字符串映射(根据需求选择#str或#name) | ||||
| static const char* token_strings[] = { | ||||
|     // 普通token使用#str | ||||
|     #define X(str, tok) [tok] = #str, | ||||
|     TOKEN_TABLE | ||||
|     #undef X | ||||
|      | ||||
|     // 关键字使用#name | ||||
|     #define X(name, std, tok) [tok] = #name, | ||||
|     KEYWORD_TABLE | ||||
|     #undef X | ||||
| }; | ||||
|  | ||||
| const char* get_token_name(enum TokenType type) { | ||||
|     return token_strings[type]; | ||||
| } | ||||
|  | ||||
| @ -2,13 +2,17 @@ | ||||
| #define __LEXER_H__ | ||||
|  | ||||
| #include "token.h" | ||||
| #ifndef LEXER_MAX_TOKEN_SIZE  | ||||
| #define LEXER_MAX_TOKEN_SIZE 63 | ||||
| #endif | ||||
| #ifndef LEXER_BUFFER_SIZE  | ||||
| #define LEXER_BUFFER_SIZE 4095 | ||||
| #endif | ||||
|  | ||||
| typedef int (*lexer_sread_fn)(void *dst_buf, int dst_size, | ||||
|         int elem_size, int count, void *stream); | ||||
|  | ||||
| struct Lexer { | ||||
| typedef struct lexer { | ||||
|     int line; | ||||
|     int index; | ||||
|     // const char current_file_name[LEXER_BUFFER_SIZE+1]; | ||||
| @ -19,22 +23,15 @@ struct Lexer { | ||||
|  | ||||
|     lexer_sread_fn sread; | ||||
|     void* stream; | ||||
| }; | ||||
| } lexer_t; | ||||
|  | ||||
| struct Token { | ||||
|     enum TokenType type; | ||||
|     struct TokenConstant constant; | ||||
| }; | ||||
|  | ||||
| void init_lexer(struct Lexer* lexer, const char* file_name, void* stream, | ||||
| void init_lexer(lexer_t* lexer, const char* file_name, void* stream, | ||||
|     lexer_sread_fn sread); | ||||
|  | ||||
| //  | ||||
| void get_token(struct Lexer* lexer, struct Token* token); | ||||
| // pure token getter it will included empty token like TOKEN_FLUSH | ||||
| void get_token(lexer_t* lexer, tok_t* token); | ||||
|  | ||||
| // get_token maybe got invalid (with parser as TOKEN_FLUSH) | ||||
| void get_valid_token(struct Lexer* lexer, struct Token* token); | ||||
|  | ||||
| const char* get_token_name(enum TokenType token); | ||||
| void get_valid_token(lexer_t* lexer, tok_t* token); | ||||
|  | ||||
| #endif | ||||
|  | ||||
							
								
								
									
										17
									
								
								ccompiler/frontend/lexer/tests/Makefile
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										17
									
								
								ccompiler/frontend/lexer/tests/Makefile
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,17 @@ | ||||
| CC = gcc | ||||
| CFLAGS = -g -Wall | ||||
| SRC = ../lexer.c ../token.c | ||||
|  | ||||
| all = test_all | ||||
|  | ||||
| test_all: test | ||||
| 	./test | ||||
|  | ||||
| run: | ||||
| 	$(CC) $(CFLAGS) $(SRC) run.c -o run | ||||
|  | ||||
| test: | ||||
| 	$(CC) $(CFLAGS) $(SRC) -o test test.c | ||||
|  | ||||
| clean: | ||||
| 	rm -f test run | ||||
| @ -1,8 +1,8 @@ | ||||
| #include "../lexer.h" | ||||
| #include <stdio.h> | ||||
| // gcc -g ../lexer.c test_lexer.c -o test_lexer
 | ||||
| // gcc -g ../lexer.c ../token.c test_lexer.c -o test_lexer
 | ||||
| /*
 | ||||
| struct TokenConstant { | ||||
| tok_tConstant { | ||||
|    int have; | ||||
|    union { | ||||
|        char ch; | ||||
| @ -31,9 +31,9 @@ int main(int argc, char* argv[]) { | ||||
|     } | ||||
|     printf("open file success\n"); | ||||
| 
 | ||||
|     struct Lexer lexer; | ||||
|     lexer_t lexer; | ||||
|     init_lexer(&lexer, "test_lexter.c", fp, (lexer_sread_fn)fread_s); | ||||
|     struct Token tok; | ||||
|     tok_t tok; | ||||
| 
 | ||||
|     while (1) { | ||||
|         get_valid_token(&lexer, &tok); | ||||
| @ -41,6 +41,6 @@ int main(int argc, char* argv[]) { | ||||
|             break; | ||||
|         } | ||||
|         printf("line: %d, column: %d, type: %3d, typename: %s\n", | ||||
|             lexer.line, lexer.index, tok.type, get_token_name(tok.type)); | ||||
|             lexer.line, lexer.index, tok.type, get_tok_name(tok.type)); | ||||
|     } | ||||
| } | ||||
							
								
								
									
										178
									
								
								ccompiler/frontend/lexer/tests/test.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										178
									
								
								ccompiler/frontend/lexer/tests/test.c
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,178 @@ | ||||
| // test_lexer.c | ||||
| #include "../../../../libcore/acutest.h" | ||||
| #include "../lexer.h" | ||||
| #include <string.h> | ||||
|  | ||||
| int test_read(void *dst_buf, int dst_size, int elem_size, int count, void *stream) { | ||||
|     if (stream == NULL) { | ||||
|         return 0; | ||||
|     } | ||||
|     int size = dst_size > elem_size * count ? elem_size * count : dst_size; | ||||
|     memcpy(dst_buf, stream, size); | ||||
|     return size; | ||||
| } | ||||
|  | ||||
| // 测试辅助函数 | ||||
| static inline void test_lexer_string(const char* input, tok_type_t expected_type) { | ||||
|     lexer_t lexer; | ||||
|     tok_t token; | ||||
|      | ||||
|     init_lexer(&lexer, "test.c", (void*)input, test_read); | ||||
|     get_valid_token(&lexer, &token); | ||||
|      | ||||
|     TEST_CHECK(token.type == expected_type); | ||||
|     TEST_MSG("Expected: %s", get_tok_name(expected_type)); | ||||
|     TEST_MSG("Got: %s", get_tok_name(token.type)); | ||||
| } | ||||
|  | ||||
| // 基础运算符测试 | ||||
| void test_operators() { | ||||
|     TEST_CASE("Arithmetic operators"); { | ||||
|         test_lexer_string("+", TOKEN_ADD); | ||||
|         test_lexer_string("++", TOKEN_ADD_ADD); | ||||
|         test_lexer_string("+=", TOKEN_ASSIGN_ADD); | ||||
|         test_lexer_string("-", TOKEN_SUB); | ||||
|         test_lexer_string("--", TOKEN_SUB_SUB); | ||||
|         test_lexer_string("-=", TOKEN_ASSIGN_SUB); | ||||
|         test_lexer_string("*", TOKEN_MUL); | ||||
|         test_lexer_string("*=", TOKEN_ASSIGN_MUL); | ||||
|         test_lexer_string("/", TOKEN_DIV); | ||||
|         test_lexer_string("/=", TOKEN_ASSIGN_DIV); | ||||
|         test_lexer_string("%", TOKEN_MOD); | ||||
|         test_lexer_string("%=", TOKEN_ASSIGN_MOD); | ||||
|     } | ||||
|  | ||||
|     TEST_CASE("Bitwise operators"); { | ||||
|         test_lexer_string("&", TOKEN_AND); | ||||
|         test_lexer_string("&&", TOKEN_AND_AND); | ||||
|         test_lexer_string("&=", TOKEN_ASSIGN_AND); | ||||
|         test_lexer_string("|", TOKEN_OR); | ||||
|         test_lexer_string("||", TOKEN_OR_OR); | ||||
|         test_lexer_string("|=", TOKEN_ASSIGN_OR); | ||||
|         test_lexer_string("^", TOKEN_XOR); | ||||
|         test_lexer_string("^=", TOKEN_ASSIGN_XOR); | ||||
|         test_lexer_string("~", TOKEN_BIT_NOT); | ||||
|         test_lexer_string("<<", TOKEN_L_SH); | ||||
|         test_lexer_string("<<=", TOKEN_ASSIGN_L_SH); | ||||
|         test_lexer_string(">>", TOKEN_R_SH); | ||||
|         test_lexer_string(">>=", TOKEN_ASSIGN_R_SH); | ||||
|     } | ||||
|  | ||||
|     TEST_CASE("Comparison operators"); { | ||||
|         test_lexer_string("==", TOKEN_EQ); | ||||
|         test_lexer_string("!=", TOKEN_NEQ); | ||||
|         test_lexer_string("<", TOKEN_LT); | ||||
|         test_lexer_string("<=", TOKEN_LE); | ||||
|         test_lexer_string(">", TOKEN_GT); | ||||
|         test_lexer_string(">=", TOKEN_GE); | ||||
|     } | ||||
|  | ||||
|     TEST_CASE("Special symbols"); { | ||||
|         test_lexer_string("(", TOKEN_L_PAREN); | ||||
|         test_lexer_string(")", TOKEN_R_PAREN); | ||||
|         test_lexer_string("[", TOKEN_L_BRACKET); | ||||
|         test_lexer_string("]", TOKEN_R_BRACKET); | ||||
|         test_lexer_string("{", TOKEN_L_BRACE); | ||||
|         test_lexer_string("}", TOKEN_R_BRACE); | ||||
|         test_lexer_string(";", TOKEN_SEMICOLON); | ||||
|         test_lexer_string(",", TOKEN_COMMA); | ||||
|         test_lexer_string(":", TOKEN_COLON); | ||||
|         test_lexer_string(".", TOKEN_DOT); | ||||
|         test_lexer_string("...", TOKEN_ELLIPSIS); | ||||
|         test_lexer_string("->", TOKEN_DEREF); | ||||
|         test_lexer_string("?", TOKEN_COND); | ||||
|     } | ||||
| } | ||||
|  | ||||
| // 关键字测试 | ||||
| void test_keywords() { | ||||
|     TEST_CASE("C89 keywords"); | ||||
|     test_lexer_string("while", TOKEN_WHILE); | ||||
|     test_lexer_string("sizeof", TOKEN_SIZEOF); | ||||
|      | ||||
|     // TEST_CASE("C99 keywords"); | ||||
|     // test_lexer_string("restrict", TOKEN_RESTRICT); | ||||
|     // test_lexer_string("_Bool", TOKEN_INT); // 需确认你的类型定义 | ||||
| } | ||||
|  | ||||
| // 字面量测试 | ||||
| void test_literals() { | ||||
|     TEST_CASE("Integer literals"); { | ||||
|         // 十进制 | ||||
|         test_lexer_string("0", TOKEN_INT_LITERAL); | ||||
|         test_lexer_string("123", TOKEN_INT_LITERAL); | ||||
|         // test_lexer_string("2147483647", TOKEN_INT_LITERAL); | ||||
|          | ||||
|         // // 十六进制 | ||||
|         // test_lexer_string("0x0", TOKEN_INT_LITERAL); | ||||
|         // test_lexer_string("0x1A3F", TOKEN_INT_LITERAL); | ||||
|         // test_lexer_string("0XABCDEF", TOKEN_INT_LITERAL); | ||||
|          | ||||
|         // // 八进制 | ||||
|         // test_lexer_string("0123", TOKEN_INT_LITERAL); | ||||
|         // test_lexer_string("0777", TOKEN_INT_LITERAL); | ||||
|          | ||||
|         // // 边界值测试 | ||||
|         // test_lexer_string("2147483647", TOKEN_INT_LITERAL); // INT_MAX | ||||
|         // test_lexer_string("4294967295", TOKEN_INT_LITERAL); // UINT_MAX | ||||
|     } | ||||
|  | ||||
|     // TEST_CASE("Character literals"); { | ||||
|     //     test_lexer_string("'a'", TOKEN_CHAR_LITERAL); | ||||
|     //     test_lexer_string("'\\n'", TOKEN_CHAR_LITERAL); | ||||
|     //     test_lexer_string("'\\t'", TOKEN_CHAR_LITERAL); | ||||
|     //     test_lexer_string("'\\\\'", TOKEN_CHAR_LITERAL); | ||||
|     //     test_lexer_string("'\\0'", TOKEN_CHAR_LITERAL); | ||||
|     // } | ||||
|  | ||||
|     TEST_CASE("String literals"); { | ||||
|         test_lexer_string("\"hello\"", TOKEN_STRING_LITERAL); | ||||
|         test_lexer_string("\"multi-line\\nstring\"", TOKEN_STRING_LITERAL); | ||||
|         test_lexer_string("\"escape\\\"quote\"", TOKEN_STRING_LITERAL); | ||||
|     } | ||||
|  | ||||
|     // TEST_CASE("Integer literals"); | ||||
|     // test_lexer_string("123", TOKEN_INT_LITERAL); | ||||
|     // test_lexer_string("0x1F", TOKEN_INT_LITERAL); | ||||
|      | ||||
|     // TEST_CASE("Floating literals"); | ||||
|     // test_lexer_string("3.14e-5", TOKEN_FLOAT_LITERAL); | ||||
|      | ||||
|     // TEST_CASE("Character literals"); | ||||
|     // test_lexer_string("'\\n'", TOKEN_CHAR_LITERAL); | ||||
| } | ||||
|  | ||||
| // 边界测试 | ||||
| void test_edge_cases() { | ||||
|     // TEST_CASE("Long identifiers"); | ||||
|     // char long_id[LEXER_MAX_TOKEN_SIZE+2] = {0}; | ||||
|     // memset(long_id, 'a', LEXER_MAX_TOKEN_SIZE+1); | ||||
|     // test_lexer_string(long_id, TOKEN_IDENT); | ||||
|      | ||||
|     // TEST_CASE("Buffer boundary"); | ||||
|     // char boundary[LEXER_BUFFER_SIZE*2] = {0}; | ||||
|     // memset(boundary, '+', LEXER_BUFFER_SIZE*2-1); | ||||
|     // test_lexer_string(boundary, TOKEN_ADD); | ||||
| } | ||||
|  | ||||
| // 错误处理测试 | ||||
| void test_error_handling() { | ||||
|     TEST_CASE("Invalid characters"); | ||||
|     lexer_t lexer; | ||||
|     tok_t token; | ||||
|      | ||||
|     init_lexer(&lexer, "test.c", NULL, test_read); | ||||
|     get_valid_token(&lexer, &token); | ||||
|      | ||||
|     TEST_CHECK(token.type == TOKEN_EOF); // 应触发错误处理 | ||||
| } | ||||
|  | ||||
| // 测试列表 | ||||
| TEST_LIST = { | ||||
|     {"operators", test_operators}, | ||||
|     {"keywords", test_keywords}, | ||||
|     {"literals", test_literals}, | ||||
|     {"edge_cases", test_edge_cases}, | ||||
|     {"error_handling", test_error_handling}, | ||||
|     {NULL, NULL} | ||||
| }; | ||||
							
								
								
									
										86
									
								
								ccompiler/frontend/lexer/token.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										86
									
								
								ccompiler/frontend/lexer/token.c
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,86 @@ | ||||
| #define FRONTEND_IMPLEMENTATION | ||||
| #include "../frontend.h" | ||||
| #include "token.h" | ||||
|  | ||||
| #define ROUND_IDX(idx) ((idx) % tokbuf->cap) | ||||
|  | ||||
| tok_t* pop_tok(tok_buf_t* tokbuf) { | ||||
|     if (tokbuf->size == 0) { | ||||
|         error("no token to pop"); | ||||
|         return NULL; | ||||
|     } | ||||
|     int idx = tokbuf->cur; | ||||
|     tokbuf->cur = ROUND_IDX(idx + 1); | ||||
|     tokbuf->size -= 1; | ||||
|     return tokbuf->buf + idx; | ||||
| } | ||||
|  | ||||
| void flush_peek_tok(tok_buf_t* tokbuf) { | ||||
|     tokbuf->peek = tokbuf->cur; | ||||
| } | ||||
|  | ||||
| void init_tokbuf(tok_buf_t *tokbuf, void *stream, get_tokbuf_func gettok) { | ||||
|     tokbuf->cur = 0; | ||||
|     tokbuf->end = 0; | ||||
|     tokbuf->peek = 0; | ||||
|     tokbuf->size = 0; | ||||
|     tokbuf->stream = stream; | ||||
|     tokbuf->gettok = gettok; | ||||
|     tokbuf->buf = NULL; | ||||
|     tokbuf->cap = 0; | ||||
| } | ||||
|  | ||||
| tok_t *peek_tok(tok_buf_t *tokbuf) | ||||
| { | ||||
|     int idx = tokbuf->peek; | ||||
|     idx = ROUND_IDX(idx + 1); | ||||
|     if (tokbuf->size >= tokbuf->cap) { | ||||
|         error("peek too deep, outof array size"); | ||||
|     } | ||||
|     if (tokbuf->peek == tokbuf->end) { | ||||
|         if (tokbuf->size == tokbuf->cap) { | ||||
|             error("peek_tok buffer overflow"); | ||||
|         } | ||||
|         if (tokbuf->gettok == NULL) { | ||||
|             error("peek_tok can not got tok"); | ||||
|         } | ||||
|         tokbuf->gettok(tokbuf->stream, &(tokbuf->buf[idx])); | ||||
|         tokbuf->size++; | ||||
|         tokbuf->end = idx; | ||||
|     } | ||||
|      | ||||
|     tokbuf->peek = idx; | ||||
|     return &(tokbuf->buf[idx]); | ||||
| } | ||||
|  | ||||
| tok_type_t peek_tok_type(tok_buf_t* tokbuf) { | ||||
|     return peek_tok(tokbuf)->type; | ||||
| } | ||||
|  | ||||
| int expect_pop_tok(tok_buf_t* tokbuf, tok_type_t type) { | ||||
|     flush_peek_tok(tokbuf); | ||||
|     tok_t* tok = peek_tok(tokbuf); | ||||
|     if (tok->type != type) { | ||||
|         error("expected tok: %s, got %s", get_tok_name(type), get_tok_name(tok->type)); | ||||
|     } else { | ||||
|         pop_tok(tokbuf); | ||||
|     } | ||||
|     return 0; | ||||
| } | ||||
|  | ||||
| // 生成字符串映射(根据需求选择#str或#name) | ||||
| static const char* token_strings[] = { | ||||
|     // 普通token使用#str | ||||
|     #define X(str, tok) [tok] = #str, | ||||
|     TOKEN_TABLE | ||||
|     #undef X | ||||
|      | ||||
|     // 关键字使用#name | ||||
|     #define X(name, std, tok) [tok] = #name, | ||||
|     KEYWORD_TABLE | ||||
|     #undef X | ||||
| }; | ||||
|  | ||||
| const char* get_tok_name(tok_type_t type) { | ||||
|     return token_strings[type]; | ||||
| } | ||||
| @ -105,7 +105,7 @@ enum CSTD_KEYWORD { | ||||
|     // END | ||||
|  | ||||
| // 定义TokenType枚举 | ||||
| enum TokenType { | ||||
| typedef enum tok_type { | ||||
|     // 处理普通token | ||||
|     #define X(str, tok) tok, | ||||
|     TOKEN_TABLE | ||||
| @ -115,9 +115,9 @@ enum TokenType { | ||||
|     #define X(name, std, tok) tok, | ||||
|     KEYWORD_TABLE | ||||
|     #undef X | ||||
| }; | ||||
| } tok_type_t; | ||||
|  | ||||
| struct TokenConstant { | ||||
| typedef struct tok_val { | ||||
|     int have; | ||||
|     union { | ||||
|         char ch; | ||||
| @ -127,124 +127,31 @@ struct TokenConstant { | ||||
|         long long ll; | ||||
|         char* str; | ||||
|     }; | ||||
| }; | ||||
| } tok_val_t; | ||||
|  | ||||
| // "break" | ||||
| // "case" | ||||
| // "char" | ||||
| // "const" | ||||
| // "continue" | ||||
| // "default" | ||||
| // "do" | ||||
| // "double" | ||||
| // "else" | ||||
| // "enum" | ||||
| // "extern" | ||||
| // "float" | ||||
| // "for" | ||||
| // "goto" | ||||
| // "if" | ||||
| // "inline (C99)" | ||||
| // "int" | ||||
| // "long" | ||||
| // "register" | ||||
| // "restrict (C99)" | ||||
| // "return" | ||||
| // "short" | ||||
| // "signed" | ||||
| // "sizeof" | ||||
| // "static" | ||||
| // "struct" | ||||
| // "switch" | ||||
| // "typedef" | ||||
| // "union" | ||||
| // "unsigned" | ||||
| // "void" | ||||
| // "volatile" | ||||
| // "while" | ||||
| typedef struct tok { | ||||
|     tok_type_t type; | ||||
|     tok_val_t val; | ||||
| } tok_t; | ||||
|  | ||||
| // alignas (C23) | ||||
| // alignof (C23) | ||||
| // auto | ||||
| // bool (C23) | ||||
| // constexpr (C23) | ||||
| // false (C23) | ||||
| // nullptr (C23) | ||||
| // static_assert (C23) | ||||
| // thread_local (C23) | ||||
| // true (C23) | ||||
| // typeof (C23) | ||||
| // typeof_unqual (C23) | ||||
| // _Alignas (C11) | ||||
| // _Alignof (C11) | ||||
| // _Atomic (C11) | ||||
| // _BitInt (C23) | ||||
| // _Bool (C99) | ||||
| // _Complex (C99) | ||||
| // _Decimal128 (C23) | ||||
| // _Decimal32 (C23) | ||||
| // _Decimal64 (C23) | ||||
| // _Generic (C11) | ||||
| // _Imaginary (C99) | ||||
| // _Noreturn (C11) | ||||
| // _Static_assert (C11) | ||||
| // _Thread_local (C11) | ||||
| typedef struct tok_buf { | ||||
|     int cur; | ||||
|     int end; | ||||
|     int peek; | ||||
|     int size; | ||||
|     int cap; | ||||
|     tok_t* buf; | ||||
|     void* stream; | ||||
|     void (*gettok)(void* stream, tok_t* token); | ||||
| } tok_buf_t; | ||||
|  | ||||
| // a = b | ||||
| // a += b | ||||
| // a -= b | ||||
| // a *= b | ||||
| // a /= b | ||||
| // a %= b | ||||
| // a &= b | ||||
| // a |= b | ||||
| // a ^= b | ||||
| // a <<= b | ||||
| // a >>= b | ||||
|  | ||||
| // ++a | ||||
| // --a | ||||
| // a++ | ||||
| // a-- | ||||
|  | ||||
| // +a | ||||
| // -a | ||||
| // a + b | ||||
| // a - b | ||||
| // a * b | ||||
| // a / b | ||||
| // a % b | ||||
| // ~a | ||||
| // a & b | ||||
| // a | b | ||||
| // a ^ b | ||||
| // a << b | ||||
| // a >> b | ||||
|  | ||||
| // !a | ||||
| // a && b | ||||
| // a || b | ||||
|  | ||||
| // a == b | ||||
| // a != b | ||||
| // a < b | ||||
| // a > b | ||||
| // a <= b | ||||
| // a >= b | ||||
|  | ||||
| // a[b] | ||||
| // *a | ||||
| // &a | ||||
| // a->b | ||||
| // a.b | ||||
|  | ||||
| // a(...) | ||||
| // a, b | ||||
| // (type) a | ||||
| // a ? b : c | ||||
| // sizeof | ||||
|  | ||||
| // _Alignof | ||||
| // (C11) | ||||
| typedef void(*get_tokbuf_func)(void* stream, tok_t* token); | ||||
| void init_tokbuf(tok_buf_t* tokbuf, void* stream, get_tokbuf_func gettok); | ||||
| tok_t* peek_tok(tok_buf_t* tokbuf); | ||||
| tok_t* pop_tok(tok_buf_t* tokbuf); | ||||
| void flush_peek_tok(tok_buf_t* tokbuf); | ||||
| tok_type_t peek_tok_type(tok_buf_t* tokbuf); | ||||
| int expect_pop_tok(tok_buf_t* tokbuf, tok_type_t type); | ||||
| const char* get_tok_name(tok_type_t type); | ||||
|  | ||||
| #endif | ||||
| @ -14,9 +14,9 @@ void init_ast_node(struct ASTNode* node) { | ||||
|     } | ||||
| } | ||||
|  | ||||
| struct ASTNode* find_ast_node(struct ASTNode* node, enum ASTType type) { | ||||
| // struct ASTNode* find_ast_node(struct ASTNode* node, ast_type_t type) { | ||||
|      | ||||
| } | ||||
| // } | ||||
|  | ||||
| #include <stdio.h> | ||||
| static void pnt_depth(int depth) { | ||||
| @ -25,149 +25,149 @@ static void pnt_depth(int depth) { | ||||
|     } | ||||
| } | ||||
|  | ||||
| void pnt_ast(struct ASTNode* node, int depth) { | ||||
|     if (!node) return; | ||||
|     pnt_depth(depth); | ||||
|     switch (node->type) { | ||||
|         case NT_ROOT: | ||||
|             for (int i = 0; i < node->root.child_size; i++) { | ||||
|                 pnt_ast(node->root.children[i], depth); | ||||
|             } | ||||
|             return; | ||||
| // void pnt_ast(struct ASTNode* node, int depth) { | ||||
| //     if (!node) return; | ||||
| //     pnt_depth(depth); | ||||
| //     switch (node->type) { | ||||
| //         case NT_ROOT: | ||||
| //             for (int i = 0; i < node->root.child_size; i++) { | ||||
| //                 pnt_ast(node->root.children[i], depth); | ||||
| //             } | ||||
| //             return; | ||||
|  | ||||
|         case NT_ADD     : printf("+ \n"); break; // (expr) + (expr) | ||||
|         case NT_SUB     : printf("- \n"); break; // (expr) - (expr) | ||||
|         case NT_MUL     : printf("* \n"); break; // (expr) * (expr) | ||||
|         case NT_DIV     : printf("/ \n"); break; // (expr) / (expr) | ||||
|         case NT_MOD     : printf("%%\n"); break; // (expr) % (expr) | ||||
|         case NT_AND     : printf("& \n"); break; // (expr) & (expr) | ||||
|         case NT_OR      : printf("| \n"); break; // (expr) | (expr) | ||||
|         case NT_XOR     : printf("^ \n"); break; // (expr) ^ (expr) | ||||
|         case NT_L_SH    : printf("<<\n"); break; // (expr) << (expr) | ||||
|         case NT_R_SH    : printf(">>\n"); break; // (expr) >> (expr) | ||||
|         case NT_EQ      : printf("==\n"); break; // (expr) == (expr) | ||||
|         case NT_NEQ     : printf("!=\n"); break; // (expr) != (expr) | ||||
|         case NT_LE      : printf("<=\n"); break; // (expr) <= (expr) | ||||
|         case NT_GE      : printf(">=\n"); break; // (expr) >= (expr) | ||||
|         case NT_LT      : printf("< \n"); break; // (expr) < (expr) | ||||
|         case NT_GT      : printf("> \n"); break; // (expr) > (expr) | ||||
|         case NT_AND_AND : printf("&&\n"); break; // (expr) && (expr) | ||||
|         case NT_OR_OR   : printf("||\n"); break; // (expr) || (expr) | ||||
|         case NT_NOT     : printf("! \n"); break; // ! (expr) | ||||
|         case NT_BIT_NOT : printf("~ \n"); break; // ~ (expr) | ||||
|         case NT_COMMA   : printf(", \n"); break; // expr, expr 逗号运算符 | ||||
|         case NT_ASSIGN  : printf("= \n"); break; // (expr) = (expr) | ||||
|         // case NT_COND    : // (expr) ? (expr) : (expr) | ||||
| //         case NT_ADD     : printf("+ \n"); break; // (expr) + (expr) | ||||
| //         case NT_SUB     : printf("- \n"); break; // (expr) - (expr) | ||||
| //         case NT_MUL     : printf("* \n"); break; // (expr) * (expr) | ||||
| //         case NT_DIV     : printf("/ \n"); break; // (expr) / (expr) | ||||
| //         case NT_MOD     : printf("%%\n"); break; // (expr) % (expr) | ||||
| //         case NT_AND     : printf("& \n"); break; // (expr) & (expr) | ||||
| //         case NT_OR      : printf("| \n"); break; // (expr) | (expr) | ||||
| //         case NT_XOR     : printf("^ \n"); break; // (expr) ^ (expr) | ||||
| //         case NT_L_SH    : printf("<<\n"); break; // (expr) << (expr) | ||||
| //         case NT_R_SH    : printf(">>\n"); break; // (expr) >> (expr) | ||||
| //         case NT_EQ      : printf("==\n"); break; // (expr) == (expr) | ||||
| //         case NT_NEQ     : printf("!=\n"); break; // (expr) != (expr) | ||||
| //         case NT_LE      : printf("<=\n"); break; // (expr) <= (expr) | ||||
| //         case NT_GE      : printf(">=\n"); break; // (expr) >= (expr) | ||||
| //         case NT_LT      : printf("< \n"); break; // (expr) < (expr) | ||||
| //         case NT_GT      : printf("> \n"); break; // (expr) > (expr) | ||||
| //         case NT_AND_AND : printf("&&\n"); break; // (expr) && (expr) | ||||
| //         case NT_OR_OR   : printf("||\n"); break; // (expr) || (expr) | ||||
| //         case NT_NOT     : printf("! \n"); break; // ! (expr) | ||||
| //         case NT_BIT_NOT : printf("~ \n"); break; // ~ (expr) | ||||
| //         case NT_COMMA   : printf(", \n"); break; // expr, expr 逗号运算符 | ||||
| //         case NT_ASSIGN  : printf("= \n"); break; // (expr) = (expr) | ||||
| //         // case NT_COND    : // (expr) ? (expr) : (expr) | ||||
|          | ||||
|         case NT_STMT_EMPTY    : // ; | ||||
|             printf(";\n"); | ||||
|             break; | ||||
|         case NT_STMT_IF       : // if (cond) { ... } [else {...}] | ||||
|             printf("if"); | ||||
|             pnt_ast(node->if_stmt.cond, depth+1); | ||||
|             pnt_ast(node->if_stmt.if_stmt, depth+1); | ||||
|             if (node->if_stmt.else_stmt) { | ||||
|                 pnt_depth(depth); | ||||
|                 printf("else"); | ||||
|                 pnt_ast(node->if_stmt.else_stmt, depth+1);             | ||||
|             } | ||||
|             break; | ||||
|         case NT_STMT_WHILE    : // while (cond) { ... } | ||||
|             printf("while\n"); | ||||
|             pnt_ast(node->while_stmt.cond, depth+1); | ||||
|             pnt_ast(node->while_stmt.body, depth+1); | ||||
|             break; | ||||
|         case NT_STMT_DOWHILE  : // do {...} while (cond) | ||||
|             printf("do-while\n"); | ||||
|             pnt_ast(node->do_while_stmt.body, depth+1); | ||||
|             pnt_ast(node->do_while_stmt.cond, depth+1); | ||||
|             break; | ||||
|         case NT_STMT_FOR      : // for (init; cond; iter) {...} | ||||
|             printf("for\n"); | ||||
|             if (node->for_stmt.init) | ||||
|                 pnt_ast(node->for_stmt.init, depth+1); | ||||
|             if (node->for_stmt.cond) | ||||
|                 pnt_ast(node->for_stmt.cond, depth+1); | ||||
|             if (node->for_stmt.iter) | ||||
|                 pnt_ast(node->for_stmt.iter, depth+1); | ||||
|             pnt_ast(node->for_stmt.body, depth+1); | ||||
|             break; | ||||
|         case NT_STMT_SWITCH   : // switch (expr) { case ... } | ||||
|         case NT_STMT_BREAK    : // break; | ||||
|         case NT_STMT_CONTINUE : // continue; | ||||
|         case NT_STMT_GOTO     : // goto label; | ||||
|         case NT_STMT_CASE     : // case const_expr: | ||||
|         case NT_STMT_DEFAULT  : // default: | ||||
|         case NT_STMT_LABEL    : // label: | ||||
|             break; | ||||
|         case NT_STMT_BLOCK    : // { ... } | ||||
|             printf("{\n"); | ||||
|             for (int i = 0; i < node->block.child_size; i++) { | ||||
|                 pnt_ast(node->block.children[i], depth+1); | ||||
|             } | ||||
|             pnt_depth(depth); | ||||
|             printf("}\n"); | ||||
|             break; | ||||
|         case NT_STMT_RETURN   : // return expr; | ||||
|             printf("return"); | ||||
|             if (node->return_stmt.expr_stmt) { | ||||
|                 printf(" "); | ||||
|                 pnt_ast(node->return_stmt.expr_stmt, depth+1); | ||||
|             } else { | ||||
|                 printf("\n"); | ||||
|             } | ||||
|             break; | ||||
|         case NT_STMT_EXPR     : // expr; | ||||
|             printf("stmt\n"); | ||||
|             pnt_ast(node->expr_stmt.expr_stmt, depth); | ||||
|             pnt_depth(depth); | ||||
|             printf(";\n"); | ||||
|             break; | ||||
|         case NT_DECL_VAR : // type name; or type name = expr; | ||||
|             printf("decl_val\n"); | ||||
|             break; | ||||
|         case NT_DECL_FUNC: // type func_name(param_list); | ||||
|             printf("decl func %s\n", node->func.name->syms.tok.constant.str); | ||||
|             break; | ||||
|         case NT_FUNC      : // type func_name(param_list) {...} | ||||
|             printf("def func %s\n", node->func.name->syms.tok.constant.str); | ||||
|             // pnt_ast(node->child.func.params, depth); | ||||
|             pnt_ast(node->func.body, depth); | ||||
|             // pnt_ast(node->child.func.ret, depth); | ||||
|             break; | ||||
|         case NT_PARAM     : // 函数形参 | ||||
|             printf("param\n"); | ||||
|         case NT_ARG_LIST  : // 实参列表(需要与NT_CALL配合) | ||||
|             printf("arg_list\n"); | ||||
|         case NT_TERM_CALL      : // func (expr) | ||||
|             printf("call\n"); | ||||
|             break; | ||||
|         case NT_TERM_IDENT: | ||||
|             printf("%s\n", node->syms.tok.constant.str); | ||||
|             break; | ||||
|         case NT_TERM_VAL : // Terminal Symbols like constant, identifier, keyword | ||||
|             struct Token * tok = &node->syms.tok; | ||||
|             switch (tok->type) { | ||||
|                 case TOKEN_CHAR_LITERAL: | ||||
|                     printf("%c\n", tok->constant.ch); | ||||
|                     break; | ||||
|                 case TOKEN_INT_LITERAL: | ||||
|                     printf("%d\n", tok->constant.i); | ||||
|                     break; | ||||
|                 case TOKEN_STRING_LITERAL: | ||||
|                     printf("%s\n", tok->constant.str); | ||||
|                     break; | ||||
|                 default: | ||||
|                     printf("unknown term val\n"); | ||||
|                     break; | ||||
|             } | ||||
|         default: | ||||
|             break; | ||||
|     } | ||||
| //         case NT_STMT_EMPTY    : // ; | ||||
| //             printf(";\n"); | ||||
| //             break; | ||||
| //         case NT_STMT_IF       : // if (cond) { ... } [else {...}] | ||||
| //             printf("if"); | ||||
| //             pnt_ast(node->if_stmt.cond, depth+1); | ||||
| //             pnt_ast(node->if_stmt.if_stmt, depth+1); | ||||
| //             if (node->if_stmt.else_stmt) { | ||||
| //                 pnt_depth(depth); | ||||
| //                 printf("else"); | ||||
| //                 pnt_ast(node->if_stmt.else_stmt, depth+1);             | ||||
| //             } | ||||
| //             break; | ||||
| //         case NT_STMT_WHILE    : // while (cond) { ... } | ||||
| //             printf("while\n"); | ||||
| //             pnt_ast(node->while_stmt.cond, depth+1); | ||||
| //             pnt_ast(node->while_stmt.body, depth+1); | ||||
| //             break; | ||||
| //         case NT_STMT_DOWHILE  : // do {...} while (cond) | ||||
| //             printf("do-while\n"); | ||||
| //             pnt_ast(node->do_while_stmt.body, depth+1); | ||||
| //             pnt_ast(node->do_while_stmt.cond, depth+1); | ||||
| //             break; | ||||
| //         case NT_STMT_FOR      : // for (init; cond; iter) {...} | ||||
| //             printf("for\n"); | ||||
| //             if (node->for_stmt.init) | ||||
| //                 pnt_ast(node->for_stmt.init, depth+1); | ||||
| //             if (node->for_stmt.cond) | ||||
| //                 pnt_ast(node->for_stmt.cond, depth+1); | ||||
| //             if (node->for_stmt.iter) | ||||
| //                 pnt_ast(node->for_stmt.iter, depth+1); | ||||
| //             pnt_ast(node->for_stmt.body, depth+1); | ||||
| //             break; | ||||
| //         case NT_STMT_SWITCH   : // switch (expr) { case ... } | ||||
| //         case NT_STMT_BREAK    : // break; | ||||
| //         case NT_STMT_CONTINUE : // continue; | ||||
| //         case NT_STMT_GOTO     : // goto label; | ||||
| //         case NT_STMT_CASE     : // case const_expr: | ||||
| //         case NT_STMT_DEFAULT  : // default: | ||||
| //         case NT_STMT_LABEL    : // label: | ||||
| //             break; | ||||
| //         case NT_STMT_BLOCK    : // { ... } | ||||
| //             printf("{\n"); | ||||
| //             for (int i = 0; i < node->block.child_size; i++) { | ||||
| //                 pnt_ast(node->block.children[i], depth+1); | ||||
| //             } | ||||
| //             pnt_depth(depth); | ||||
| //             printf("}\n"); | ||||
| //             break; | ||||
| //         case NT_STMT_RETURN   : // return expr; | ||||
| //             printf("return"); | ||||
| //             if (node->return_stmt.expr_stmt) { | ||||
| //                 printf(" "); | ||||
| //                 pnt_ast(node->return_stmt.expr_stmt, depth+1); | ||||
| //             } else { | ||||
| //                 printf("\n"); | ||||
| //             } | ||||
| //             break; | ||||
| //         case NT_STMT_EXPR     : // expr; | ||||
| //             printf("stmt\n"); | ||||
| //             pnt_ast(node->expr_stmt.expr_stmt, depth); | ||||
| //             pnt_depth(depth); | ||||
| //             printf(";\n"); | ||||
| //             break; | ||||
| //         case NT_DECL_VAR : // type name; or type name = expr; | ||||
| //             printf("decl_val\n"); | ||||
| //             break; | ||||
| //         case NT_DECL_FUNC: // type func_name(param_list); | ||||
| //             printf("decl func %s\n", node->func.name->syms.tok.val.str); | ||||
| //             break; | ||||
| //         case NT_FUNC      : // type func_name(param_list) {...} | ||||
| //             printf("def func %s\n", node->func.name->syms.tok.val.str); | ||||
| //             // pnt_ast(node->child.func.params, depth); | ||||
| //             pnt_ast(node->func.body, depth); | ||||
| //             // pnt_ast(node->child.func.ret, depth); | ||||
| //             break; | ||||
| //         case NT_PARAM     : // 函数形参 | ||||
| //             printf("param\n"); | ||||
| //         case NT_ARG_LIST  : // 实参列表(需要与NT_CALL配合) | ||||
| //             printf("arg_list\n"); | ||||
| //         case NT_TERM_CALL      : // func (expr) | ||||
| //             printf("call\n"); | ||||
| //             break; | ||||
| //         case NT_TERM_IDENT: | ||||
| //             printf("%s\n", node->syms.tok.val.str); | ||||
| //             break; | ||||
| //         case NT_TERM_VAL : // Terminal Symbols like constant, identifier, keyword | ||||
| //             tok_t * tok = &node->syms.tok; | ||||
| //             switch (tok->type) { | ||||
| //                 case TOKEN_CHAR_LITERAL: | ||||
| //                     printf("%c\n", tok->val.ch); | ||||
| //                     break; | ||||
| //                 case TOKEN_INT_LITERAL: | ||||
| //                     printf("%d\n", tok->val.i); | ||||
| //                     break; | ||||
| //                 case TOKEN_STRING_LITERAL: | ||||
| //                     printf("%s\n", tok->val.str); | ||||
| //                     break; | ||||
| //                 default: | ||||
| //                     printf("unknown term val\n"); | ||||
| //                     break; | ||||
| //             } | ||||
| //         default: | ||||
| //             break; | ||||
| //     } | ||||
|      | ||||
|     // 通用子节点递归处理 | ||||
|     if (node->type <= NT_ASSIGN) { // 表达式类统一处理子节点 | ||||
|         if (node->expr.left) pnt_ast(node->expr.left, depth+1); | ||||
|         if (node->expr.right) pnt_ast(node->expr.right, depth + 1); | ||||
|     } | ||||
| } | ||||
| //     // 通用子节点递归处理 | ||||
| //     if (node->type <= NT_ASSIGN) { // 表达式类统一处理子节点 | ||||
| //         if (node->expr.left) pnt_ast(node->expr.left, depth+1); | ||||
| //         if (node->expr.right) pnt_ast(node->expr.right, depth + 1); | ||||
| //     } | ||||
| // } | ||||
|  | ||||
| @ -3,9 +3,10 @@ | ||||
|  | ||||
| #include "../../frontend.h" | ||||
| #include "../../lexer/lexer.h" | ||||
| #include "../../../../libcore/vector.h" | ||||
| #include "../type.h" | ||||
|  | ||||
| enum ASTType { | ||||
| typedef enum { | ||||
|     NT_INIT, | ||||
|     NT_ROOT, // global scope in root node | ||||
|     NT_ADD, // (expr) + (expr) | ||||
| @ -75,31 +76,28 @@ enum ASTType { | ||||
|     NT_TERM_VAL, | ||||
|     NT_TERM_IDENT, | ||||
|     NT_TERM_TYPE, | ||||
| }; | ||||
| } ast_type_t; | ||||
|  | ||||
| struct ASTNode { | ||||
|     enum ASTType type; | ||||
| typedef struct ASTNode { | ||||
|     ast_type_t type; | ||||
|  | ||||
|     union { | ||||
|         void *children[6]; | ||||
|         struct { | ||||
|             struct ASTNode** children; | ||||
|             int child_size; | ||||
|             vector_header(children, struct ASTNode*); | ||||
|         } root; | ||||
|         struct { | ||||
|             struct ASTNode** children; // array of children | ||||
|             int child_size; | ||||
|             vector_header(children, struct ASTNode*); | ||||
|         } block; | ||||
|         struct { | ||||
|             struct ASTNode* decl_node; | ||||
|             struct Token tok; | ||||
|             tok_t tok; | ||||
|         } syms; | ||||
|         struct { | ||||
|             struct ASTNode *arr; | ||||
|             int size; | ||||
|             vector_header(params, struct ASTNode*); | ||||
|         } params; | ||||
|         struct { | ||||
|             const char* name; | ||||
|             struct ASTNode* name; | ||||
|             struct ASTNode* params; | ||||
|             struct ASTNode* func_decl; | ||||
|         } call; | ||||
| @ -113,13 +111,12 @@ struct ASTNode { | ||||
|             struct ASTNode *ret; | ||||
|             struct ASTNode *name; | ||||
|             struct ASTNode *params; // array of params | ||||
|             void* data; | ||||
|         } func_decl; | ||||
|             struct ASTNode *def; | ||||
|         } decl_func; | ||||
|         struct { | ||||
|             struct ASTNode *ret; | ||||
|             struct ASTNode *name; | ||||
|             struct ASTNode *params; // array of params | ||||
|             struct ASTNode *decl; | ||||
|             struct ASTNode *body; // optional | ||||
|             void* data; | ||||
|         } func; | ||||
|         struct { | ||||
|             struct ASTNode *left; | ||||
| @ -165,27 +162,26 @@ struct ASTNode { | ||||
|             struct ASTNode *expr_stmt; | ||||
|         } expr_stmt; | ||||
|     }; | ||||
| }; | ||||
| } ast_node_t; | ||||
|  | ||||
| struct ASTNode* new_ast_node(void); | ||||
| void init_ast_node(struct ASTNode* node); | ||||
| void pnt_ast(struct ASTNode* node, int depth); | ||||
|  | ||||
| struct Parser; | ||||
| typedef struct ASTNode* (*parse_func_t) (struct Parser*); | ||||
| typedef struct parser parser_t; | ||||
| typedef struct ASTNode* (*parse_func_t) (parser_t*); | ||||
|  | ||||
| void parse_prog(struct Parser* parser); | ||||
| struct ASTNode* parse_block(struct Parser* parser); | ||||
| struct ASTNode* parse_stmt(struct Parser* parser); | ||||
| struct ASTNode* parse_expr(struct Parser* parser); | ||||
| struct ASTNode* parse_func(struct Parser* parser); | ||||
| struct ASTNode* parse_decl(struct Parser* parser); | ||||
| void parse_prog(parser_t* parser); | ||||
| ast_node_t* parse_decl(parser_t* parser); | ||||
| ast_node_t* parse_block(parser_t* parser); | ||||
| ast_node_t* parse_stmt(parser_t* parser); | ||||
| ast_node_t* parse_expr(parser_t* parser); | ||||
|  | ||||
| struct ASTNode* parse_ident(struct Parser* parser); | ||||
| struct ASTNode* parse_type(struct Parser* parser); | ||||
| ast_node_t* parse_type(parser_t* parser); | ||||
|  | ||||
| int peek_decl(struct Parser* parser); | ||||
| ast_node_t* new_ast_ident_node(tok_t* tok); | ||||
| ast_node_t* expect_pop_ident(tok_buf_t* tokbuf); | ||||
|  | ||||
| struct ASTNode* parser_ident_without_pop(struct Parser* parser); | ||||
| int peek_decl(tok_buf_t* tokbuf); | ||||
|  | ||||
| #endif | ||||
|  | ||||
| @ -1,48 +1,49 @@ | ||||
|  | ||||
| #include "../parser.h" | ||||
| #include "ast.h" | ||||
| #include "../parser.h" | ||||
| #include "../symtab/symtab.h" | ||||
|  | ||||
|  | ||||
| #ifndef BLOCK_MAX_NODE | ||||
| #define BLOCK_MAX_NODE (1024) | ||||
| #endif | ||||
|  | ||||
| struct ASTNode* parse_block(struct Parser* parser) { | ||||
|     symtab_enter_scope(parser->symtab); | ||||
|  | ||||
|     // parse_decl(parser); // decl_var | ||||
|     enum TokenType ttype; | ||||
|     struct ASTNode* node = new_ast_node(); | ||||
| ast_node_t* new_ast_node_block() { | ||||
|     ast_node_t* node = new_ast_node(); | ||||
|     node->type = NT_BLOCK; | ||||
|     flushpeektok(parser); | ||||
|     ttype = peektoktype(parser); | ||||
|     if (ttype != TOKEN_L_BRACE) { | ||||
|         error("block need '{' start"); | ||||
|     } | ||||
|     poptok(parser); | ||||
|     vector_init(node->block.children); | ||||
|     return node; | ||||
| } | ||||
|  | ||||
|     node->block.children = malloc(sizeof(struct ASTNode*) * BLOCK_MAX_NODE); | ||||
|     struct ASTNode* child = NULL; | ||||
| ast_node_t* parse_block(parser_t* parser) { | ||||
|     symtab_enter_scope(parser->symtab); | ||||
|     tok_buf_t *tokbuf = &parser->tokbuf; | ||||
|     flush_peek_tok(tokbuf); | ||||
|     tok_type_t ttype; | ||||
|     ast_node_t* node = new_ast_node_block(); | ||||
|  | ||||
|     expect_pop_tok(tokbuf, TOKEN_L_BRACE); | ||||
|     ast_node_t* child = NULL; | ||||
|     while (1) { | ||||
|         if (peek_decl(parser) == 1) { | ||||
|         if (peek_decl(tokbuf)) { | ||||
|             child = parse_decl(parser); | ||||
|             goto ADD_CHILD; | ||||
|             vector_push(node->block.children, child); | ||||
|             continue; | ||||
|         } | ||||
|  | ||||
|         flushpeektok(parser); | ||||
|         ttype = peektoktype(parser); | ||||
|         flush_peek_tok(tokbuf); | ||||
|         ttype = peek_tok_type(tokbuf); | ||||
|         switch (ttype) { | ||||
|         case TOKEN_R_BRACE: | ||||
|             poptok(parser); | ||||
|             goto END; | ||||
|         default: | ||||
|             child = parse_stmt(parser); | ||||
|             goto ADD_CHILD; | ||||
|             break; | ||||
|             case TOKEN_R_BRACE: { | ||||
|                 pop_tok(tokbuf); | ||||
|                 goto END; | ||||
|             } | ||||
|             default: { | ||||
|                 child = parse_stmt(parser); | ||||
|                 vector_push(node->block.children, child); | ||||
|                 break; | ||||
|             } | ||||
|         } | ||||
|         continue; | ||||
|     ADD_CHILD: | ||||
|         node->block.children[node->block.child_size++] = child; | ||||
|     } | ||||
| END: | ||||
|     symtab_leave_scope(parser->symtab); | ||||
|  | ||||
| @ -6,9 +6,9 @@ | ||||
|  * 0 false | ||||
|  * 1 true | ||||
|  */ | ||||
| int peek_decl(struct Parser* parser) { | ||||
|     flushpeektok(parser); | ||||
|     switch (peektoktype(parser)) { | ||||
| int peek_decl(tok_buf_t* tokbuf) { | ||||
|     flush_peek_tok(tokbuf); | ||||
|     switch (peek_tok_type(tokbuf)) { | ||||
|         case TOKEN_STATIC: | ||||
|         case TOKEN_EXTERN: | ||||
|         case TOKEN_REGISTER: | ||||
| @ -16,10 +16,10 @@ int peek_decl(struct Parser* parser) { | ||||
|             error("not impliment"); | ||||
|             break; | ||||
|         default: | ||||
|             flushpeektok(parser); | ||||
|             flush_peek_tok(tokbuf); | ||||
|     } | ||||
|  | ||||
|     switch (peektoktype(parser)) { | ||||
|     switch (peek_tok_type(tokbuf)) { | ||||
|         case TOKEN_VOID: | ||||
|         case TOKEN_CHAR: | ||||
|         case TOKEN_SHORT: | ||||
| @ -27,60 +27,62 @@ int peek_decl(struct Parser* parser) { | ||||
|         case TOKEN_LONG: | ||||
|         case TOKEN_FLOAT: | ||||
|         case TOKEN_DOUBLE: | ||||
|         // FIXME Ptr | ||||
|             return 1; | ||||
|         default: | ||||
|             flushpeektok(parser); | ||||
|             flush_peek_tok(tokbuf); | ||||
|     } | ||||
|     return 0; | ||||
| } | ||||
|  | ||||
| struct ASTNode* parse_decl_val(struct Parser* parser) { | ||||
|     flushpeektok(parser); | ||||
|     // parse_type | ||||
|     enum TokenType ttype; | ||||
|     struct ASTNode* node; | ||||
| ast_node_t* parse_decl_val(parser_t* parser) { | ||||
|     tok_buf_t* tokbuf = &parser->tokbuf; | ||||
|     tok_type_t ttype; | ||||
|     flush_peek_tok(tokbuf); | ||||
|  | ||||
|     struct ASTNode* type_node = parse_type(parser); | ||||
|     struct ASTNode* name_node = parser_ident_without_pop(parser); | ||||
|     ast_node_t* node; | ||||
|     ast_node_t* type_node = parse_type(parser); | ||||
|     flush_peek_tok(tokbuf); | ||||
|     ast_node_t* name_node = new_ast_ident_node(peek_tok(tokbuf)); | ||||
|      | ||||
|     node = new_ast_node(); | ||||
|     node->decl_val.type = type_node; | ||||
|     node->decl_val.name = name_node; | ||||
|     node->type = NT_DECL_VAR; | ||||
|     symtab_add_symbol(parser->symtab, name_node->syms.tok.constant.str, node); | ||||
|     symtab_add_symbol(parser->symtab, name_node->syms.tok.val.str, node, 0); | ||||
|      | ||||
|     ttype = peektoktype(parser); | ||||
|     ttype = peek_tok_type(tokbuf); | ||||
|     if (ttype == TOKEN_ASSIGN) { | ||||
|         node->decl_val.expr_stmt = parse_stmt(parser); | ||||
|         if (node->decl_val.expr_stmt->type != NT_STMT_EXPR) { | ||||
|             error("parser_decl_val want stmt_expr"); | ||||
|         } | ||||
|     } else if (ttype == TOKEN_SEMICOLON) { | ||||
|         poptok(parser); | ||||
|         expecttok(parser, TOKEN_SEMICOLON); | ||||
|         pop_tok(tokbuf); | ||||
|         expect_pop_tok(tokbuf, TOKEN_SEMICOLON); | ||||
|     } else { | ||||
|         error("parser_decl_val syntax error"); | ||||
|     } | ||||
|     return node; | ||||
| } | ||||
|  | ||||
| // 类型解析入口改进 | ||||
| struct ASTNode* parse_decl(struct Parser* parser) { | ||||
|     flushpeektok(parser); | ||||
|     int idx; | ||||
|     enum TokenType ttype; | ||||
|     struct ASTNode* node; | ||||
| ast_node_t* parse_decl(parser_t* parser) { | ||||
|     tok_buf_t* tokbuf = &parser->tokbuf; | ||||
|     flush_peek_tok(tokbuf); | ||||
|     tok_type_t ttype; | ||||
|     ast_node_t* node; | ||||
|      | ||||
|     if (peek_decl(parser) == 0) { | ||||
|     if (peek_decl(tokbuf) == 0) { | ||||
|         error("syntax error expect decl_val TYPE"); | ||||
|     } | ||||
|     if (peektoktype(parser) != TOKEN_IDENT) { | ||||
|     if (peek_tok_type(tokbuf) != TOKEN_IDENT) { | ||||
|         error("syntax error expect decl_val IDENT"); | ||||
|     } | ||||
|  | ||||
|     ttype = peektoktype(parser); | ||||
|     ttype = peek_tok_type(tokbuf); | ||||
|     switch (ttype) { | ||||
|         case TOKEN_L_PAREN: // ( | ||||
|             node = parse_func(parser); | ||||
|             return NULL; | ||||
|             break; | ||||
|         case TOKEN_ASSIGN: | ||||
|         case TOKEN_SEMICOLON: | ||||
|  | ||||
| @ -33,14 +33,18 @@ enum ParseType { | ||||
|     PREFIX_PARSER, | ||||
| }; | ||||
|  | ||||
| static struct ASTNode *parse_subexpression(struct Parser* parser, enum Precedence prec); | ||||
| static ast_node_t *parse_subexpression(tok_buf_t* tokbuf, symtab_t *symtab, enum Precedence prec); | ||||
| #define NEXT(prec) parse_subexpression(tokbuf, symtab, prec) | ||||
|  | ||||
| static struct ASTNode* gen_node2(struct ASTNode* left, struct ASTNode* right, | ||||
|     enum ASTType type) { | ||||
|     struct ASTNode* node = new_ast_node(); | ||||
| static ast_node_t* gen_node2(ast_node_t* left, ast_node_t* right, | ||||
|     ast_type_t type) { | ||||
|     ast_node_t* node = new_ast_node(); | ||||
|     node->type = type; | ||||
|     node->expr.left = left; | ||||
|     node->expr.right = right; | ||||
|     return node; | ||||
|     // FIXME | ||||
|  | ||||
|     // switch (type) { | ||||
|     //     case NT_ADD     : printf("+ \n"); break; // (expr) + (expr) | ||||
|     //     case NT_SUB     : printf("- \n"); break; // (expr) - (expr) | ||||
| @ -68,154 +72,157 @@ static struct ASTNode* gen_node2(struct ASTNode* left, struct ASTNode* right, | ||||
|     // } | ||||
| } | ||||
|  | ||||
| static struct ASTNode* parse_comma(struct Parser* parser, struct ASTNode* left) { | ||||
|     struct ASTNode* node = new_ast_node(); | ||||
| static ast_node_t* parse_comma(tok_buf_t* tokbuf, symtab_t *symtab, ast_node_t* left) { | ||||
|     ast_node_t* node = new_ast_node(); | ||||
|     node->type = NT_COMMA; | ||||
|     node->expr.left = left; | ||||
|     node->expr.right = parse_subexpression(parser, PREC_EXPRESSION); | ||||
|     node->expr.right = NEXT(PREC_EXPRESSION); | ||||
|     return node; | ||||
| } | ||||
|  | ||||
| static struct ASTNode* parse_assign(struct Parser* parser, struct ASTNode* left) { | ||||
|     flushpeektok(parser); | ||||
|     enum TokenType ttype = peektoktype(parser); | ||||
|     poptok(parser); | ||||
|     struct ASTNode* node = new_ast_node(); | ||||
| static ast_node_t* parse_assign(tok_buf_t* tokbuf, symtab_t *symtab, ast_node_t* left) { | ||||
|     flush_peek_tok(tokbuf); | ||||
|     tok_type_t ttype = peek_tok_type(tokbuf); | ||||
|     pop_tok(tokbuf); | ||||
|     ast_node_t* node = new_ast_node(); | ||||
|     node->type = NT_ASSIGN; | ||||
|     // saved left | ||||
|     node->expr.left = left; | ||||
|     enum Precedence next = PREC_ASSIGNMENT + 1; | ||||
|     switch (ttype) { | ||||
|         case TOKEN_ASSIGN      : | ||||
|             left = parse_subexpression(parser, next); | ||||
|             left = NEXT(next); | ||||
|             break; | ||||
|         case TOKEN_ASSIGN_ADD  : | ||||
|             left = gen_node2(left, parse_subexpression(parser, next), NT_ADD); | ||||
|             left = gen_node2(left, NEXT(next), NT_ADD); | ||||
|             break; | ||||
|         case TOKEN_ASSIGN_SUB  : | ||||
|             left = gen_node2(left, parse_subexpression(parser, next), NT_SUB); | ||||
|             left = gen_node2(left, NEXT(next), NT_SUB); | ||||
|             break; | ||||
|         case TOKEN_ASSIGN_MUL  : | ||||
|             left = gen_node2(left, parse_subexpression(parser, next), NT_MUL); | ||||
|             left = gen_node2(left, NEXT(next), NT_MUL); | ||||
|             break; | ||||
|         case TOKEN_ASSIGN_DIV  : | ||||
|             left = gen_node2(left, parse_subexpression(parser, next), NT_DIV); | ||||
|             left = gen_node2(left, NEXT(next), NT_DIV); | ||||
|             break; | ||||
|         case TOKEN_ASSIGN_MOD  : | ||||
|             left = gen_node2(left, parse_subexpression(parser, next), NT_MOD); | ||||
|             left = gen_node2(left, NEXT(next), NT_MOD); | ||||
|             break; | ||||
|         case TOKEN_ASSIGN_L_SH : | ||||
|             left = gen_node2(left, parse_subexpression(parser, next), NT_L_SH); | ||||
|             left = gen_node2(left, NEXT(next), NT_L_SH); | ||||
|             break; | ||||
|         case TOKEN_ASSIGN_R_SH : | ||||
|             left = gen_node2(left, parse_subexpression(parser, next), NT_R_SH); | ||||
|             left = gen_node2(left, NEXT(next), NT_R_SH); | ||||
|             break; | ||||
|         case TOKEN_ASSIGN_AND  : | ||||
|             left = gen_node2(left, parse_subexpression(parser, next), NT_AND); | ||||
|             left = gen_node2(left, NEXT(next), NT_AND); | ||||
|             break; | ||||
|         case TOKEN_ASSIGN_OR   : | ||||
|             left = gen_node2(left, parse_subexpression(parser, next), NT_OR); | ||||
|             left = gen_node2(left, NEXT(next), NT_OR); | ||||
|             break; | ||||
|         case TOKEN_ASSIGN_XOR  : | ||||
|             left = gen_node2(left, parse_subexpression(parser, next), NT_XOR); | ||||
|             left = gen_node2(left, NEXT(next), NT_XOR); | ||||
|             break; | ||||
|         default: | ||||
|             error("unsupported operator"); | ||||
|             break; | ||||
|     } | ||||
|     node->expr.right = left; | ||||
|     return node; | ||||
| } | ||||
|  | ||||
| static struct ASTNode* parse_cmp(struct Parser* parser, struct ASTNode* left) { | ||||
|     flushpeektok(parser); | ||||
|     enum TokenType ttype = peektoktype(parser); | ||||
|     poptok(parser); | ||||
|     struct ASTNode* node = new_ast_node(); | ||||
| static ast_node_t* parse_cmp(tok_buf_t* tokbuf, symtab_t *symtab, ast_node_t* left) { | ||||
|     flush_peek_tok(tokbuf); | ||||
|     tok_type_t ttype = peek_tok_type(tokbuf); | ||||
|     pop_tok(tokbuf); | ||||
|     ast_node_t* node = new_ast_node(); | ||||
|     // saved left | ||||
|     node->expr.left = left; | ||||
|     switch (ttype) { | ||||
|         case TOKEN_EQ: | ||||
|             node->type = NT_EQ; | ||||
|             node->expr.right = parse_subexpression(parser, PREC_EQUALITY); | ||||
|             node->expr.right = NEXT(PREC_EQUALITY); | ||||
|             break; | ||||
|         case TOKEN_NEQ: | ||||
|             node->type = NT_NEQ; | ||||
|             node->expr.right = parse_subexpression(parser, PREC_EQUALITY); | ||||
|             node->expr.right = NEXT(PREC_EQUALITY); | ||||
|             break; | ||||
|         case TOKEN_LT: | ||||
|             node->type = NT_LT; | ||||
|             node->expr.right = parse_subexpression(parser, PREC_RELATIONAL); | ||||
|             node->expr.right = NEXT(PREC_RELATIONAL); | ||||
|             break; | ||||
|         case TOKEN_GT: | ||||
|             node->type = NT_GT; | ||||
|             node->expr.right = parse_subexpression(parser, PREC_RELATIONAL); | ||||
|             node->expr.right = NEXT(PREC_RELATIONAL); | ||||
|             break; | ||||
|         case TOKEN_LE: | ||||
|             node->type = NT_LE; | ||||
|             node->expr.right = parse_subexpression(parser, PREC_RELATIONAL); | ||||
|             node->expr.right = NEXT(PREC_RELATIONAL); | ||||
|             break; | ||||
|         case TOKEN_GE: | ||||
|             node->type = NT_GE; | ||||
|             node->expr.right = parse_subexpression(parser, PREC_RELATIONAL); | ||||
|             node->expr.right = NEXT(PREC_RELATIONAL); | ||||
|             break; | ||||
|         default: | ||||
|             error("invalid operator"); | ||||
|     } | ||||
|     return node; | ||||
| } | ||||
|  | ||||
| static struct ASTNode* parse_cal(struct Parser* parser, struct ASTNode* left) { | ||||
|     flushpeektok(parser); | ||||
|     enum TokenType ttype = peektoktype(parser); | ||||
|     poptok(parser); | ||||
|     struct ASTNode* node = new_ast_node(); | ||||
| static ast_node_t* parse_cal(tok_buf_t* tokbuf, symtab_t *symtab, ast_node_t* left) { | ||||
|     flush_peek_tok(tokbuf); | ||||
|     tok_type_t ttype = peek_tok_type(tokbuf); | ||||
|     pop_tok(tokbuf); | ||||
|     ast_node_t* node = new_ast_node(); | ||||
|     node->expr.left = left; | ||||
|     switch (ttype) { | ||||
|     case TOKEN_OR_OR: | ||||
|         node->type = NT_OR_OR; | ||||
|         node->expr.right = parse_subexpression(parser, PREC_LOGICAL_OR); | ||||
|         node->expr.right = NEXT(PREC_LOGICAL_OR); | ||||
|         break; | ||||
|     case TOKEN_AND_AND: | ||||
|         node->type = NT_AND_AND; | ||||
|         node->expr.right = parse_subexpression(parser, PREC_LOGICAL_AND); | ||||
|         node->expr.right = NEXT(PREC_LOGICAL_AND); | ||||
|         break; | ||||
|     case TOKEN_OR: | ||||
|         node->type = NT_OR; | ||||
|         node->expr.right = parse_subexpression(parser, PREC_OR); | ||||
|         node->expr.right = NEXT(PREC_OR); | ||||
|         break; | ||||
|     case TOKEN_XOR: | ||||
|         node->type = NT_XOR; | ||||
|         node->expr.right = parse_subexpression(parser, PREC_XOR); | ||||
|         node->expr.right = NEXT(PREC_XOR); | ||||
|         break; | ||||
|     case TOKEN_AND: | ||||
|         node->type = NT_AND; | ||||
|         node->expr.right = parse_subexpression(parser, PREC_AND); | ||||
|         node->expr.right = NEXT(PREC_AND); | ||||
|         break; | ||||
|     case TOKEN_L_SH: | ||||
|         node->type = NT_L_SH; | ||||
|         node->expr.right = parse_subexpression(parser, PREC_SHIFT); | ||||
|         node->expr.right = NEXT(PREC_SHIFT); | ||||
|         break; | ||||
|     case TOKEN_R_SH: | ||||
|         node->type = NT_R_SH; | ||||
|         node->expr.right = parse_subexpression(parser, PREC_SHIFT); | ||||
|         node->expr.right = NEXT(PREC_SHIFT); | ||||
|         break; | ||||
|     case TOKEN_ADD: | ||||
|         node->type = NT_ADD; | ||||
|         node->expr.right = parse_subexpression(parser, PREC_ADDITIVE); | ||||
|         node->expr.right = NEXT(PREC_ADDITIVE); | ||||
|         break; | ||||
|     case TOKEN_SUB: | ||||
|         node->type = NT_SUB; | ||||
|         node->expr.right = parse_subexpression(parser, PREC_ADDITIVE); | ||||
|         node->expr.right = NEXT(PREC_ADDITIVE); | ||||
|         break; | ||||
|     case TOKEN_MUL: | ||||
|         node->type = NT_MUL; | ||||
|         node->expr.right = parse_subexpression(parser, PREC_MULTIPLICATIVE); | ||||
|         node->expr.right = NEXT(PREC_MULTIPLICATIVE); | ||||
|         break; | ||||
|     case TOKEN_DIV: | ||||
|         node->type = NT_DIV; | ||||
|         node->expr.right = parse_subexpression(parser, PREC_MULTIPLICATIVE); | ||||
|         node->expr.right = NEXT(PREC_MULTIPLICATIVE); | ||||
|         break; | ||||
|     case TOKEN_MOD: | ||||
|         node->type = NT_MOD; | ||||
|         node->expr.right = parse_subexpression(parser, PREC_MULTIPLICATIVE); | ||||
|         node->expr.right = NEXT(PREC_MULTIPLICATIVE); | ||||
|         break; | ||||
|     default: | ||||
|         break; | ||||
| @ -223,44 +230,50 @@ static struct ASTNode* parse_cal(struct Parser* parser, struct ASTNode* left) { | ||||
|     return node; | ||||
| } | ||||
|  | ||||
|  | ||||
| // 新增函数调用解析 | ||||
| static struct ASTNode* parse_call(struct Parser* parser, struct ASTNode* ident) { | ||||
|     struct ASTNode* node = new_ast_node(); | ||||
| static ast_node_t* parse_call(tok_buf_t* tokbuf, symtab_t *symtab, ast_node_t* ident) { | ||||
|     ast_node_t* node = new_ast_node(); | ||||
|     node->type = NT_TERM_CALL; | ||||
|     poptok(parser); // 跳过 '(' | ||||
|     node->call.name = ident; | ||||
|     node->call.params = new_ast_node(); | ||||
|     vector_init(node->call.params->params.params); | ||||
|     pop_tok(tokbuf); // 跳过 '(' | ||||
|      | ||||
|     enum TokenType ttype; | ||||
|     // 解析参数列表 | ||||
|     while ((ttype = peektoktype(parser)) != TOKEN_R_PAREN) { | ||||
|         // add_arg(node, parse_expr(parser)); | ||||
|         if (ttype == TOKEN_COMMA) poptok(parser); | ||||
|         else poptok(parser); | ||||
|     tok_type_t ttype; | ||||
|     while (1) { | ||||
|         flush_peek_tok(tokbuf); | ||||
|         ttype = peek_tok_type(tokbuf); | ||||
|         if (ttype == TOKEN_R_PAREN) { | ||||
|             break; | ||||
|         } | ||||
|         ast_node_t* param = NEXT(PREC_EXPRESSION); | ||||
|         vector_push(node->call.params->params.params, param); | ||||
|         flush_peek_tok(tokbuf); | ||||
|         ttype = peek_tok_type(tokbuf); | ||||
|         if (ttype == TOKEN_COMMA) pop_tok(tokbuf); | ||||
|     } | ||||
|     poptok(parser); // 跳过 ')' | ||||
|     pop_tok(tokbuf); // 跳过 ')' | ||||
|  | ||||
|     char* name = ident->syms.tok.constant.str; | ||||
|     void* sym = symtab_lookup_symbol(parser->symtab, name); | ||||
|     if (sym == NULL) { | ||||
|     const char* name = ident->syms.tok.val.str; | ||||
|     ast_node_t* sym = symtab_lookup_symbol(symtab, name); | ||||
|     // TODO check func is match | ||||
|     if (sym == NULL || sym->type != NT_DECL_FUNC) { | ||||
|         error("function not decl %s", name); | ||||
|     } | ||||
|     node->call.name = name; | ||||
|     node->call.params = NULL; | ||||
|     node->call.name = ident; | ||||
|     node->call.func_decl = sym; | ||||
|     return node; | ||||
| } | ||||
|  | ||||
| static struct ASTNode* parse_paren(struct Parser* parser, struct ASTNode* left) { | ||||
|     flushpeektok(parser); | ||||
|     enum TokenType ttype; | ||||
|     expecttok(parser, TOKEN_L_PAREN); | ||||
|     left = parse_subexpression(parser, PREC_EXPRESSION); | ||||
|     flushpeektok(parser); | ||||
|     expecttok(parser, TOKEN_R_PAREN); | ||||
| static ast_node_t* parse_paren(tok_buf_t* tokbuf, symtab_t *symtab, ast_node_t* left) { | ||||
|     flush_peek_tok(tokbuf); | ||||
|     expect_pop_tok(tokbuf, TOKEN_L_PAREN); | ||||
|     left = NEXT(PREC_EXPRESSION); | ||||
|     flush_peek_tok(tokbuf); | ||||
|     expect_pop_tok(tokbuf, TOKEN_R_PAREN); | ||||
|     return left; | ||||
| } | ||||
|  | ||||
| typedef struct ASTNode* (*parse_expr_fun_t)(struct Parser*, struct ASTNode*); | ||||
| typedef ast_node_t* (*parse_expr_fun_t)(tok_buf_t*, symtab_t* , ast_node_t*); | ||||
| static struct expr_prec_table_t { | ||||
|     parse_expr_fun_t parser; | ||||
|     enum Precedence prec; | ||||
| @ -309,11 +322,11 @@ static struct expr_prec_table_t { | ||||
|     [TOKEN_L_PAREN] = {parse_paren, PREC_POSTFIX, INFIX_PARSER}, | ||||
| }; | ||||
|  | ||||
| static struct ASTNode *parse_primary_expression(struct Parser* parser) { | ||||
|     flushpeektok(parser); | ||||
| static ast_node_t *parse_primary_expression(tok_buf_t* tokbuf, symtab_t *symtab) { | ||||
|     flush_peek_tok(tokbuf); | ||||
|  | ||||
|     struct Token* tok = peektok(parser); | ||||
|     struct ASTNode *node = new_ast_node(); | ||||
|     tok_t* tok = peek_tok(tokbuf); | ||||
|     ast_node_t *node = new_ast_node(); | ||||
|     node->type = NT_TERM_VAL; | ||||
|     node->syms.tok = *tok; | ||||
|  | ||||
| @ -330,34 +343,35 @@ static struct ASTNode *parse_primary_expression(struct Parser* parser) { | ||||
|     case TOKEN_STRING_LITERAL: | ||||
|         // node->data.data_type = TYPE_POINTER; | ||||
|     case TOKEN_IDENT: | ||||
|         node = parse_ident(parser); | ||||
|         if (peektoktype(parser) == TOKEN_L_PAREN) { | ||||
|             node = parse_call(parser, node); | ||||
|         node = expect_pop_ident(tokbuf); | ||||
|         tok_type_t ttype = peek_tok_type(tokbuf); | ||||
|         if (ttype == TOKEN_L_PAREN) { | ||||
|             node = parse_call(tokbuf, symtab, node); | ||||
|         } else { | ||||
|             void *sym = symtab_lookup_symbol(parser->symtab, tok->constant.str); | ||||
|             void *sym = symtab_lookup_symbol(symtab, tok->val.str); | ||||
|             if (sym == NULL) { | ||||
|                 error("undefined symbol but use %s", tok->constant.str); | ||||
|                 error("undefined symbol but use %s", tok->val.str); | ||||
|             } | ||||
|             node->type = NT_TERM_IDENT; | ||||
|             node->syms.decl_node = sym; | ||||
|             goto END; | ||||
|         } | ||||
|         goto END; | ||||
|     default: | ||||
|         return NULL; | ||||
|     } | ||||
|     poptok(parser); | ||||
|     pop_tok(tokbuf); | ||||
| END: | ||||
|     return node; | ||||
| } | ||||
|  | ||||
| static struct ASTNode *parse_subexpression(struct Parser* parser, enum Precedence prec) { | ||||
|     enum TokenType 					ttype; | ||||
|     struct expr_prec_table_t* 		work; | ||||
|     struct ASTNode*                 left; | ||||
| static ast_node_t *parse_subexpression(tok_buf_t* tokbuf, symtab_t *symtab, enum Precedence prec) { | ||||
|     tok_type_t                  ttype; | ||||
|     struct expr_prec_table_t*   work; | ||||
|     ast_node_t*                 left; | ||||
|  | ||||
|     while (1) { | ||||
|         flushpeektok(parser); | ||||
|         ttype = peektoktype(parser); | ||||
|         flush_peek_tok(tokbuf); | ||||
|         ttype = peek_tok_type(tokbuf); | ||||
|         work = &expr_table[ttype]; | ||||
|         // FIXME | ||||
|         if (ttype == TOKEN_SEMICOLON || ttype == TOKEN_R_PAREN) { | ||||
| @ -365,16 +379,16 @@ static struct ASTNode *parse_subexpression(struct Parser* parser, enum Precedenc | ||||
|         } | ||||
|         if (work == NULL || work->parser == NULL || work->ptype == PREFIX_PARSER) { | ||||
|             if (work->parser != NULL) { | ||||
|                 left = work->parser(parser, NULL); | ||||
|                 left = work->parser(tokbuf, symtab, NULL); | ||||
|             } else { | ||||
|                 left = parse_primary_expression(parser); | ||||
|                 left = parse_primary_expression(tokbuf, symtab); | ||||
|             } | ||||
|         } else if (work->ptype == INFIX_PARSER) { | ||||
|             if (work->parser == NULL) | ||||
|                 break; | ||||
|             if (work->prec <= prec) | ||||
|                 break; | ||||
|             left = work->parser(parser, left); | ||||
|             left = work->parser(tokbuf, symtab, left); | ||||
|         } | ||||
|         // assert(left != NULL); | ||||
|     } | ||||
| @ -382,9 +396,11 @@ static struct ASTNode *parse_subexpression(struct Parser* parser, enum Precedenc | ||||
|     return left; | ||||
| } | ||||
|  | ||||
| struct ASTNode* parse_expr(struct Parser* parser) { | ||||
|     flushpeektok(parser); | ||||
|     enum TokenType ttype = peektoktype(parser); | ||||
| ast_node_t* parse_expr(parser_t* parser) { | ||||
|     tok_buf_t* tokbuf = &(parser->tokbuf); | ||||
|     symtab_t *symtab = parser->symtab; | ||||
|     flush_peek_tok(tokbuf); | ||||
|     tok_type_t ttype = peek_tok_type(tokbuf); | ||||
|     switch (ttype) { | ||||
|     case TOKEN_NOT: | ||||
|     case TOKEN_AND: | ||||
| @ -401,9 +417,9 @@ struct ASTNode* parse_expr(struct Parser* parser) { | ||||
|     case TOKEN_SUB_SUB: | ||||
|     case TOKEN_SIZEOF: | ||||
|     case TOKEN_IDENT: | ||||
|         return parse_subexpression(parser, PREC_EXPRESSION); | ||||
|         return NEXT(PREC_EXPRESSION); | ||||
|     default: | ||||
|         error("Want expr but not got %s", get_token_name(ttype)); | ||||
|         error("Want expr but not got %s", get_tok_name(ttype)); | ||||
|         break; | ||||
|     } | ||||
| } | ||||
|  | ||||
| @ -6,34 +6,21 @@ | ||||
| #define FUNC_PARAM_CACHE_SIZE 32  // 合理初始值,可覆盖99%常见情况 | ||||
| #endif | ||||
|  | ||||
| struct FuncParamCache { | ||||
|     struct Token tokens[FUNC_PARAM_CACHE_SIZE]; | ||||
|     int read_pos;    // 当前读取位置 | ||||
|     int write_pos;   // 写入位置 | ||||
|     int depth;       // 当前缓存深度 | ||||
| }; | ||||
|  | ||||
| static enum TokenType peekcachetype(struct FuncParamCache* cache) { | ||||
|     return cache->tokens[cache->read_pos++].type; | ||||
| } | ||||
|  | ||||
| // TODO 语义分析压入符号表 | ||||
| static void parse_params(struct Parser* parser, struct FuncParamCache* cache, struct ASTNode* node) { | ||||
|     //  = peekcachetype(cache); | ||||
|     enum TokenType ttype; | ||||
|     // if (ttype != TOKEN_L_PAREN) { | ||||
|     //     error("function expected '('\n"); | ||||
|     // } | ||||
|     struct ASTNode *params = new_ast_node(); | ||||
|     node->func.params = params; | ||||
|     int params_size = 0; | ||||
| static void parse_params(parser_t* parser, tok_buf_t* cache, ast_node_t* node) { | ||||
|     tok_type_t ttype; | ||||
|     ast_node_t *params = new_ast_node(); | ||||
|     node->decl_func.params = params; | ||||
|     vector_init(params->params.params); | ||||
|  | ||||
|     while ((ttype = peekcachetype(cache)) != TOKEN_R_PAREN) { | ||||
|     int depth = 1; | ||||
|     while (depth) { | ||||
|         ttype = peek_tok_type(cache); | ||||
|         switch (ttype) { | ||||
|         case TOKEN_COMMA: | ||||
|             break; | ||||
|         case TOKEN_ELLIPSIS: | ||||
|             ttype = peekcachetype(cache); | ||||
|             ttype = peek_tok_type(cache); | ||||
|             if (ttype != TOKEN_R_PAREN) { | ||||
|                 error("... must be a last parameter list (expect ')')"); | ||||
|             } | ||||
| @ -41,9 +28,29 @@ static void parse_params(struct Parser* parser, struct FuncParamCache* cache, st | ||||
|             error("not implement"); | ||||
|             break; | ||||
|         case TOKEN_IDENT: | ||||
|             params->children[params_size++] = NULL; | ||||
|             // TODO 静态数组 | ||||
|             flush_peek_tok(cache); | ||||
|             ast_node_t* id_node = new_ast_ident_node(peek_tok(cache)); | ||||
|             ast_node_t* node = new_ast_node(); | ||||
|             node->type = NT_DECL_VAR; | ||||
|             node->decl_val.name = id_node; | ||||
|             // TODO typing sys | ||||
|             node->decl_val.type = NULL; | ||||
|             node->decl_val.expr_stmt = NULL; | ||||
|             node->decl_val.data = NULL; | ||||
|             vector_push(params->params.params, node); | ||||
|             symtab_add_symbol(parser->symtab, id_node->syms.tok.val.str, node, 0); | ||||
|             break; | ||||
|         case TOKEN_L_PAREN: { | ||||
|             depth++; | ||||
|             break; | ||||
|         } | ||||
|         case TOKEN_R_PAREN: { | ||||
|             depth--; | ||||
|             break; | ||||
|         } | ||||
|         default: | ||||
|             break; | ||||
|             // TODO 使用cache的类型解析 | ||||
|             // parse_type(parser); | ||||
|             // TODO type parse | ||||
| @ -51,39 +58,42 @@ static void parse_params(struct Parser* parser, struct FuncParamCache* cache, st | ||||
|             // ttype = peekcachetype(cache); | ||||
|             // if (ttype != TOKEN_IDENT) { | ||||
|             //     node->node_type = NT_DECL_FUNC; | ||||
|             //     flushpeektok(parser); | ||||
|             //     flush_peek_tok(tokbuf); | ||||
|             //     continue; | ||||
|             // } | ||||
|             // error("function expected ')' or ','\n"); | ||||
|         } | ||||
|         pop_tok(cache); | ||||
|     } | ||||
| } | ||||
|  | ||||
| enum ASTType check_is_func_decl(struct Parser* parser, struct FuncParamCache* cache) { | ||||
|     cache->depth = 1; | ||||
|     cache->read_pos = 0; | ||||
|     cache->write_pos = 0; | ||||
| ast_type_t check_is_func_decl(tok_buf_t* tokbuf, tok_buf_t* cache) { | ||||
|     expect_pop_tok(tokbuf, TOKEN_L_PAREN); | ||||
|     int depth = 1; | ||||
|      | ||||
|     while (cache->depth) { | ||||
|         struct Token* tok = peektok(parser); | ||||
|         poptok(parser); | ||||
|         if (cache->write_pos >= FUNC_PARAM_CACHE_SIZE - 1) { | ||||
|     while (depth) { | ||||
|         tok_t* tok = peek_tok(tokbuf); | ||||
|         pop_tok(tokbuf); | ||||
|         if (cache->size >= cache->cap - 1) { | ||||
|             error("function parameter list too long"); | ||||
|         } | ||||
|         cache->tokens[cache->write_pos++] = *tok; | ||||
|         cache->buf[cache->size++] = *tok; | ||||
|         switch (tok->type) { | ||||
|         case TOKEN_L_PAREN: | ||||
|             cache->depth++; | ||||
|             depth++; | ||||
|             break; | ||||
|         case TOKEN_R_PAREN: | ||||
|             cache->depth--; | ||||
|             depth--; | ||||
|             break; | ||||
|         default: | ||||
|             break; | ||||
|         } | ||||
|     } | ||||
|     cache->end = cache->size; | ||||
|  | ||||
|     switch (peektoktype(parser)) { | ||||
|     switch (peek_tok_type(tokbuf)) { | ||||
|         case TOKEN_SEMICOLON: | ||||
|             poptok(parser); | ||||
|             pop_tok(tokbuf); | ||||
|             return NT_DECL_FUNC; | ||||
|         case TOKEN_L_BRACE: | ||||
|             return NT_FUNC; | ||||
| @ -93,28 +103,66 @@ enum ASTType check_is_func_decl(struct Parser* parser, struct FuncParamCache* ca | ||||
|     } | ||||
| } | ||||
|  | ||||
| struct ASTNode* parse_func(struct Parser* parser) { | ||||
|     struct ASTNode* ret_type = parse_type(parser); | ||||
|     struct ASTNode* func_name = parse_ident(parser); | ||||
| static ast_node_t* new_ast_node_funcdecl(ast_node_t* ret, ast_node_t* name) { | ||||
|     ast_node_t* node = new_ast_node(); | ||||
|     node->type = NT_DECL_FUNC; | ||||
|     node->decl_func.ret = ret; | ||||
|     node->decl_func.name = name; | ||||
|     node->decl_func.def = NULL; | ||||
|     return node; | ||||
| } | ||||
|  | ||||
|     struct ASTNode* node = new_ast_node(); | ||||
|     node->func.ret = ret_type; | ||||
|     node->func.name = func_name; | ||||
| void parse_func(parser_t* parser) { | ||||
|     tok_buf_t* tokbuf = &(parser->tokbuf); | ||||
|     flush_peek_tok(tokbuf); | ||||
|     ast_node_t* ret_node = parse_type(parser); | ||||
|     ast_node_t* name_node = expect_pop_ident(tokbuf); | ||||
|     const char* func_name = name_node->syms.tok.val.str; | ||||
|     ast_node_t* decl = new_ast_node_funcdecl(ret_node, name_node); | ||||
|  | ||||
|     flushpeektok(parser); | ||||
|     expecttok(parser, TOKEN_L_PAREN); | ||||
|     struct FuncParamCache cache; | ||||
|     node->type = check_is_func_decl(parser, &cache); | ||||
|     tok_buf_t cache; | ||||
|     init_tokbuf(&cache, NULL, NULL); | ||||
|     cache.cap = FUNC_PARAM_CACHE_SIZE; | ||||
|     tok_t buf[FUNC_PARAM_CACHE_SIZE]; | ||||
|     cache.buf = buf; | ||||
|      | ||||
|     ast_type_t type = check_is_func_decl(&(parser->tokbuf), &cache); | ||||
|  | ||||
|     symtab_add_symbol(parser->symtab, func_name->syms.tok.constant.str, node); | ||||
|     if (node->type == NT_DECL_FUNC) { | ||||
|         return node; | ||||
|     ast_node_t* prev = symtab_add_symbol(parser->symtab, func_name, decl, 1); | ||||
|     if (prev != NULL) { | ||||
|         if (prev->type != NT_DECL_FUNC) { | ||||
|             error("the symbol duplicate old is %d, new is func", prev->type); | ||||
|         } | ||||
|         // TODO check redeclare func is match | ||||
|         if (type == NT_FUNC) { | ||||
|             // TODO Free decl; | ||||
|             free(decl); | ||||
|             decl = prev; | ||||
|             goto FUNC; | ||||
|         } | ||||
|         return; | ||||
|     } | ||||
|     vector_push(parser->root->root.children, decl); | ||||
|     if (type == NT_DECL_FUNC) { | ||||
|         return; | ||||
|     } | ||||
|  | ||||
| FUNC: | ||||
|     // 该data临时用于判断是否重复定义 | ||||
|     if (decl->decl_func.def != NULL) { | ||||
|         error("redefinition of function %s", func_name); | ||||
|     } | ||||
|  | ||||
|     ast_node_t* node = new_ast_node(); | ||||
|     node->type = NT_FUNC; | ||||
|     node->func.decl = decl; | ||||
|     node->func.data = NULL; | ||||
|  | ||||
|     decl->decl_func.def = node; | ||||
|     symtab_enter_scope(parser->symtab); | ||||
|     parse_params(parser, &cache, node); | ||||
|     parse_params(parser, &cache, decl); | ||||
|     node->func.body = parse_block(parser); | ||||
|     symtab_leave_scope(parser->symtab); | ||||
|  | ||||
|     return node; | ||||
|     vector_push(parser->root->root.children, node); | ||||
| } | ||||
|  | ||||
| @ -5,25 +5,30 @@ | ||||
| #define PROG_MAX_NODE_SIZE (1024 * 4) | ||||
| #endif | ||||
|  | ||||
| void parse_prog(struct Parser* parser) { | ||||
| void parse_func(parser_t* parser); | ||||
|  | ||||
| void parse_prog(parser_t* parser) { | ||||
|     /** | ||||
|      * Program := (Declaration | Definition)* | ||||
|      * same as | ||||
|      * Program := Declaration* Definition* | ||||
|      */ | ||||
|     int child_size = 0; | ||||
|     tok_buf_t *tokbuf = &(parser->tokbuf); | ||||
|     parser->root = new_ast_node(); | ||||
|     struct ASTNode* node; | ||||
|     parser->root->root.children = xmalloc(sizeof(struct ASTNode*) * PROG_MAX_NODE_SIZE); | ||||
|     ast_node_t* node; | ||||
|     parser->root->type = NT_ROOT; | ||||
|     vector_init(parser->root->root.children); | ||||
|     while (1) { | ||||
|         flushpeektok(parser); | ||||
|         if (peektoktype(parser) == TOKEN_EOF) { | ||||
|         flush_peek_tok(tokbuf); | ||||
|         if (peek_tok_type(tokbuf) == TOKEN_EOF) { | ||||
|             break; | ||||
|         } | ||||
|         node = parse_decl(parser); | ||||
|         parser->root->root.children[child_size++] = node; | ||||
|         if (node == NULL) { | ||||
|             parse_func(parser); | ||||
|         } else { | ||||
|             vector_push(parser->root->root.children, node); | ||||
|         } | ||||
|     } | ||||
|     parser->root->type = NT_ROOT; | ||||
|     parser->root->root.child_size = child_size; | ||||
|     return; | ||||
| } | ||||
|  | ||||
| @ -1,27 +1,28 @@ | ||||
| #include "../parser.h" | ||||
| #include "ast.h" | ||||
|  | ||||
| struct ASTNode* parse_stmt(struct Parser* parser) { | ||||
|     flushpeektok(parser); | ||||
|     enum TokenType ttype = peektoktype(parser); | ||||
|     struct ASTNode* node = new_ast_node(); | ||||
| ast_node_t* parse_stmt(parser_t* parser) { | ||||
|     tok_buf_t* tokbuf = &parser->tokbuf; | ||||
|     flush_peek_tok(tokbuf); | ||||
|     tok_type_t ttype = peek_tok_type(tokbuf); | ||||
|     ast_node_t* node = new_ast_node(); | ||||
|     switch (ttype) { | ||||
|     case TOKEN_IF: { | ||||
|         /** | ||||
|          * if (exp) stmt | ||||
|          * if (exp) stmt else stmt | ||||
|          */ | ||||
|         poptok(parser); | ||||
|         pop_tok(tokbuf); | ||||
|  | ||||
|         expecttok(parser, TOKEN_L_PAREN); | ||||
|         expect_pop_tok(tokbuf, TOKEN_L_PAREN); | ||||
|         node->if_stmt.cond = parse_expr(parser); | ||||
|         flushpeektok(parser); | ||||
|         expecttok(parser, TOKEN_R_PAREN); | ||||
|         flush_peek_tok(tokbuf); | ||||
|         expect_pop_tok(tokbuf, TOKEN_R_PAREN); | ||||
|  | ||||
|         node->if_stmt.if_stmt = parse_stmt(parser); | ||||
|         ttype = peektoktype(parser); | ||||
|         ttype = peek_tok_type(tokbuf); | ||||
|         if (ttype == TOKEN_ELSE) { | ||||
|             poptok(parser); | ||||
|             pop_tok(tokbuf); | ||||
|             node->if_stmt.else_stmt = parse_stmt(parser); | ||||
|         } else { | ||||
|             node->if_stmt.else_stmt = NULL; | ||||
| @ -33,11 +34,11 @@ struct ASTNode* parse_stmt(struct Parser* parser) { | ||||
|         /** | ||||
|          * switch (exp) stmt | ||||
|          */ | ||||
|         poptok(parser); | ||||
|         pop_tok(tokbuf); | ||||
|  | ||||
|         expecttok(parser, TOKEN_L_PAREN); | ||||
|         expect_pop_tok(tokbuf, TOKEN_L_PAREN); | ||||
|         node->switch_stmt.cond = parse_expr(parser); | ||||
|         expecttok(parser, TOKEN_R_PAREN); | ||||
|         expect_pop_tok(tokbuf, TOKEN_R_PAREN); | ||||
|  | ||||
|         node->switch_stmt.body = parse_stmt(parser); | ||||
|         node->type = NT_STMT_SWITCH; | ||||
| @ -47,11 +48,11 @@ struct ASTNode* parse_stmt(struct Parser* parser) { | ||||
|         /** | ||||
|          * while (exp) stmt | ||||
|          */ | ||||
|         poptok(parser); | ||||
|         pop_tok(tokbuf); | ||||
|          | ||||
|         expecttok(parser, TOKEN_L_PAREN); | ||||
|         expect_pop_tok(tokbuf, TOKEN_L_PAREN); | ||||
|         node->while_stmt.cond = parse_expr(parser); | ||||
|         expecttok(parser, TOKEN_R_PAREN); | ||||
|         expect_pop_tok(tokbuf, TOKEN_R_PAREN); | ||||
|  | ||||
|         node->while_stmt.body = parse_stmt(parser); | ||||
|         node->type = NT_STMT_WHILE; | ||||
| @ -61,16 +62,16 @@ struct ASTNode* parse_stmt(struct Parser* parser) { | ||||
|         /** | ||||
|          * do stmt while (exp) | ||||
|          */ | ||||
|         poptok(parser); | ||||
|         pop_tok(tokbuf); | ||||
|         node->do_while_stmt.body = parse_stmt(parser); | ||||
|         ttype = peektoktype(parser); | ||||
|         ttype = peek_tok_type(tokbuf); | ||||
|         if (ttype != TOKEN_WHILE) { | ||||
|             error("expected while after do"); | ||||
|         } | ||||
|         poptok(parser); | ||||
|         expecttok(parser, TOKEN_L_PAREN); | ||||
|         pop_tok(tokbuf); | ||||
|         expect_pop_tok(tokbuf, TOKEN_L_PAREN); | ||||
|         node->do_while_stmt.cond = parse_expr(parser); | ||||
|         expecttok(parser, TOKEN_R_PAREN); | ||||
|         expect_pop_tok(tokbuf, TOKEN_R_PAREN); | ||||
|         node->type = NT_STMT_DOWHILE; | ||||
|         break; | ||||
|     } | ||||
| @ -79,36 +80,36 @@ struct ASTNode* parse_stmt(struct Parser* parser) { | ||||
|          * for (init; [cond]; [iter]) stmt | ||||
|          */ | ||||
|         // node->children.stmt.for_stmt.init | ||||
|         poptok(parser); | ||||
|         ttype = peektoktype(parser); | ||||
|         pop_tok(tokbuf); | ||||
|         ttype = peek_tok_type(tokbuf); | ||||
|         if (ttype != TOKEN_L_PAREN) { | ||||
|             error("expected ( after for"); | ||||
|         } | ||||
|         poptok(parser); | ||||
|         pop_tok(tokbuf); | ||||
|  | ||||
|         // init expr or init decl_var | ||||
|         // TODO need add this feature | ||||
|         node->for_stmt.init = parse_expr(parser); | ||||
|         expecttok(parser, TOKEN_SEMICOLON); | ||||
|         expect_pop_tok(tokbuf, TOKEN_SEMICOLON); | ||||
|  | ||||
|         // cond expr or null | ||||
|         ttype = peektoktype(parser); | ||||
|         ttype = peek_tok_type(tokbuf); | ||||
|         if (ttype != TOKEN_SEMICOLON) { | ||||
|             node->for_stmt.cond = parse_expr(parser); | ||||
|             expecttok(parser, TOKEN_SEMICOLON); | ||||
|             expect_pop_tok(tokbuf, TOKEN_SEMICOLON); | ||||
|         } else { | ||||
|             node->for_stmt.cond = NULL; | ||||
|             poptok(parser); | ||||
|             pop_tok(tokbuf); | ||||
|         } | ||||
|  | ||||
|         // iter expr or null | ||||
|         ttype = peektoktype(parser); | ||||
|         ttype = peek_tok_type(tokbuf); | ||||
|         if (ttype != TOKEN_R_PAREN) { | ||||
|             node->for_stmt.iter = parse_expr(parser); | ||||
|             expecttok(parser, TOKEN_R_PAREN); | ||||
|             expect_pop_tok(tokbuf, TOKEN_R_PAREN); | ||||
|         } else { | ||||
|             node->for_stmt.iter = NULL; | ||||
|             poptok(parser); | ||||
|             pop_tok(tokbuf); | ||||
|         } | ||||
|  | ||||
|         node->for_stmt.body = parse_stmt(parser); | ||||
| @ -120,8 +121,8 @@ struct ASTNode* parse_stmt(struct Parser* parser) { | ||||
|          * break ; | ||||
|          */ | ||||
|         // TODO check 导致外围 for、while 或 do-while 循环或 switch 语句终止。 | ||||
|         poptok(parser); | ||||
|         expecttok(parser, TOKEN_SEMICOLON); | ||||
|         pop_tok(tokbuf); | ||||
|         expect_pop_tok(tokbuf, TOKEN_SEMICOLON); | ||||
|  | ||||
|         node->type = NT_STMT_BREAK; | ||||
|         break; | ||||
| @ -131,8 +132,8 @@ struct ASTNode* parse_stmt(struct Parser* parser) { | ||||
|          * continue ; | ||||
|          */ | ||||
|         // TODO check 导致跳过整个 for、 while 或 do-while 循环体的剩余部分。 | ||||
|         poptok(parser); | ||||
|         expecttok(parser, TOKEN_SEMICOLON); | ||||
|         pop_tok(tokbuf); | ||||
|         expect_pop_tok(tokbuf, TOKEN_SEMICOLON); | ||||
|  | ||||
|         node->type = NT_STMT_CONTINUE; | ||||
|         break; | ||||
| @ -142,16 +143,16 @@ struct ASTNode* parse_stmt(struct Parser* parser) { | ||||
|          * return [exp] ; | ||||
|          */ | ||||
|         // TODO 终止当前函数并返回指定值给调用方函数。 | ||||
|         poptok(parser); | ||||
|         ttype = peektoktype(parser); | ||||
|         pop_tok(tokbuf); | ||||
|         ttype = peek_tok_type(tokbuf); | ||||
|         if (ttype != TOKEN_SEMICOLON) { | ||||
|             node->return_stmt.expr_stmt = parse_expr(parser); | ||||
|             flushpeektok(parser); | ||||
|             expecttok(parser, TOKEN_SEMICOLON); | ||||
|             flush_peek_tok(tokbuf); | ||||
|             expect_pop_tok(tokbuf, TOKEN_SEMICOLON); | ||||
|         } else { | ||||
|             node->return_stmt.expr_stmt = NULL; | ||||
|             pop_tok(tokbuf); | ||||
|         } | ||||
|         poptok(parser); | ||||
|         node->type = NT_STMT_RETURN; | ||||
|         break; | ||||
|     } | ||||
| @ -161,15 +162,15 @@ struct ASTNode* parse_stmt(struct Parser* parser) { | ||||
|          */ | ||||
|         // TODO check label 将控制无条件转移到所欲位置。 | ||||
|         //在无法用约定的构造将控制转移到所欲位置时使用。 | ||||
|         poptok(parser); | ||||
|         pop_tok(tokbuf); | ||||
|         // find symbol table | ||||
|         ttype = peektoktype(parser); | ||||
|         ttype = peek_tok_type(tokbuf); | ||||
|         if (ttype != TOKEN_IDENT) { | ||||
|             error("expect identifier after goto"); | ||||
|         } | ||||
|         expecttok(parser, TOKEN_SEMICOLON); | ||||
|         expect_pop_tok(tokbuf, TOKEN_SEMICOLON); | ||||
|         // TODO filling label | ||||
|         node->goto_stmt.label = parse_ident(parser); | ||||
|         node->goto_stmt.label = expect_pop_ident(tokbuf); | ||||
|         node->type = NT_STMT_GOTO; | ||||
|         break; | ||||
|     } | ||||
| @ -181,7 +182,7 @@ struct ASTNode* parse_stmt(struct Parser* parser) { | ||||
|          *      if () ; | ||||
|          *      for () ; | ||||
|          */ | ||||
|         poptok(parser); | ||||
|         pop_tok(tokbuf); | ||||
|         node->type = NT_STMT_EMPTY; | ||||
|         break; | ||||
|     } | ||||
| @ -193,30 +194,30 @@ struct ASTNode* parse_stmt(struct Parser* parser) { | ||||
|         node->type = NT_STMT_BLOCK; | ||||
|         break; | ||||
|     } | ||||
|     case TOKEN_IDENT: {         | ||||
|     case TOKEN_IDENT: { | ||||
|         // TODO label goto | ||||
|         if (peektoktype(parser) != TOKEN_COLON) { | ||||
|         if (peek_tok_type(tokbuf) != TOKEN_COLON) { | ||||
|             goto EXP; | ||||
|         } | ||||
|         node->label_stmt.label = parse_ident(parser); | ||||
|         expecttok(parser, TOKEN_COLON); | ||||
|         node->label_stmt.label = expect_pop_ident(tokbuf); | ||||
|         expect_pop_tok(tokbuf, TOKEN_COLON); | ||||
|         node->type = NT_STMT_LABEL; | ||||
|         break; | ||||
|     } | ||||
|     case TOKEN_CASE: { | ||||
|         // TODO label switch | ||||
|         poptok(parser); | ||||
|         pop_tok(tokbuf); | ||||
|         error("unimplemented switch label"); | ||||
|         node->label_stmt.label = parse_expr(parser); | ||||
|         // TODO 该表达式为const int | ||||
|         expecttok(parser, TOKEN_COLON); | ||||
|         expect_pop_tok(tokbuf, TOKEN_COLON); | ||||
|         node->type = NT_STMT_CASE; | ||||
|         break; | ||||
|     } | ||||
|     case TOKEN_DEFAULT: { | ||||
|         // TODO label switch default | ||||
|         poptok(parser); | ||||
|         expecttok(parser, TOKEN_COLON); | ||||
|         pop_tok(tokbuf); | ||||
|         expect_pop_tok(tokbuf, TOKEN_COLON); | ||||
|         node->type = NT_STMT_DEFAULT; | ||||
|         break; | ||||
|     } | ||||
| @ -226,15 +227,16 @@ struct ASTNode* parse_stmt(struct Parser* parser) { | ||||
|          */ | ||||
|     EXP: | ||||
|         node->expr_stmt.expr_stmt = parse_expr(parser); | ||||
|         flushpeektok(parser); | ||||
|         ttype = peektoktype(parser); | ||||
|         flush_peek_tok(tokbuf); | ||||
|         ttype = peek_tok_type(tokbuf); | ||||
|         if (ttype != TOKEN_SEMICOLON) { | ||||
|             error("exp must end with \";\""); | ||||
|         } | ||||
|         poptok(parser); | ||||
|         pop_tok(tokbuf); | ||||
|         node->type = NT_STMT_EXPR; | ||||
|         break; | ||||
|     } | ||||
|  | ||||
|     } | ||||
|     return node; | ||||
| } | ||||
|  | ||||
| @ -2,162 +2,30 @@ | ||||
| #include "../type.h" | ||||
| #include "ast.h" | ||||
|  | ||||
| // /* 状态跳转表定义 */ | ||||
| // typedef void (*StateHandler)(struct Parser*, struct ASTNode**); | ||||
|  | ||||
| // enum TypeParseState { | ||||
| //     TPS_BASE_TYPE,    // 解析基础类型 (int/char等) | ||||
| //     TPS_QUALIFIER,    // 解析限定符 (const/volatile) | ||||
| //     TPS_POINTER,      // 解析指针 (*) | ||||
| //     TPS_ARRAY,        // 解析数组维度 ([n]) | ||||
| //     TPS_FUNC_PARAMS,  // 解析函数参数列表 | ||||
| //     TPS_END, | ||||
| // }; | ||||
|  | ||||
| // ; | ||||
|  | ||||
| // /* 状态处理函数前置声明 */ | ||||
| // static void handle_base_type(struct Parser*, struct ASTNode**); | ||||
| // static void handle_qualifier(struct Parser*, struct ASTNode**); | ||||
| // static void handle_pointer(struct Parser*, struct ASTNode**); | ||||
| // static void handle_array(struct Parser*, struct ASTNode**); | ||||
| // static void handle_func_params(struct Parser*, struct ASTNode**); | ||||
| // static void handle_error(struct Parser*, struct ASTNode**); | ||||
|  | ||||
| // /* 状态跳转表(核心优化部分) */ | ||||
| // static const struct StateTransition { | ||||
| //     enum TokenType tok;      // 触发token | ||||
| //     StateHandler handler;    // 处理函数 | ||||
| //     enum TypeParseState next_state; // 下一个状态 | ||||
| // } state_table[][8] = { | ||||
| //     [TPS_QUALIFIER] = { | ||||
| //         {TOKEN_CONST,      handle_qualifier, TPS_QUALIFIER}, | ||||
| //         {TOKEN_VOLATILE,   handle_qualifier, TPS_QUALIFIER}, | ||||
| //         {TOKEN_VOID,       handle_base_type, TPS_POINTER}, | ||||
| //         {TOKEN_CHAR,       handle_base_type, TPS_POINTER}, | ||||
| //         {TOKEN_INT,        handle_base_type, TPS_POINTER}, | ||||
| //         {TOKEN_EOF,        handle_error,     TPS_QUALIFIER}, | ||||
| //         /* 其他token默认处理 */ | ||||
| //         {0,                NULL,             TPS_BASE_TYPE} | ||||
| //     }, | ||||
| //     [TPS_BASE_TYPE] = { | ||||
| //         {TOKEN_MUL,        handle_pointer,   TPS_POINTER}, | ||||
| //         {TOKEN_L_BRACKET,  handle_array,     TPS_ARRAY}, | ||||
| //         {TOKEN_L_PAREN,    handle_func_params,TPS_FUNC_PARAMS}, | ||||
| //         {TOKEN_EOF,        NULL,             TPS_END}, | ||||
| //         {0,                NULL,             TPS_POINTER} | ||||
| //     }, | ||||
| //     [TPS_POINTER] = { | ||||
| //         {TOKEN_MUL,        handle_pointer,   TPS_POINTER}, | ||||
| //         {TOKEN_L_BRACKET,  handle_array,     TPS_ARRAY}, | ||||
| //         {TOKEN_L_PAREN,    handle_func_params,TPS_FUNC_PARAMS}, | ||||
| //         {0,                NULL,             TPS_END} | ||||
| //     }, | ||||
| //     [TPS_ARRAY] = { | ||||
| //         {TOKEN_L_BRACKET,  handle_array,     TPS_ARRAY}, | ||||
| //         {TOKEN_L_PAREN,    handle_func_params,TPS_FUNC_PARAMS}, | ||||
| //         {0,                NULL,             TPS_END} | ||||
| //     }, | ||||
| //     [TPS_FUNC_PARAMS] = { | ||||
| //         {0,                NULL,             TPS_END} | ||||
| //     } | ||||
| // }; | ||||
|  | ||||
| // /* 新的类型解析函数 */ | ||||
| // struct ASTNode* parse_type(struct Parser* p) { | ||||
| //     struct ASTNode* type_root = NULL; | ||||
| //     struct ASTNode** current = &type_root; | ||||
| //     enum TypeParseState state = TPS_QUALIFIER; | ||||
|  | ||||
| //     while (state != TPS_END) { | ||||
| //         enum TokenType t = peektoktype(p); | ||||
| //         const struct StateTransition* trans = state_table[state]; | ||||
|  | ||||
| //         // 查找匹配的转换规则 | ||||
| //         while (trans->tok != 0 && trans->tok != t) { | ||||
| //             trans++; | ||||
| //         } | ||||
|  | ||||
| //         if (trans->handler) { | ||||
| //             trans->handler(p, current); | ||||
| //         } else if (trans->tok == 0) { // 默认规则 | ||||
| //             state = trans->next_state; | ||||
| //             continue; | ||||
| //         } else { | ||||
| //             error("syntax error type parse error"); | ||||
| //         } | ||||
|  | ||||
| //         state = trans->next_state; | ||||
| //     } | ||||
|  | ||||
| //     return type_root; | ||||
| // } | ||||
|  | ||||
| // /* 具体状态处理函数实现 */ | ||||
| // static void handle_qualifier(struct Parser* p, struct ASTNode** current) { | ||||
| //     struct ASTNode* node = new_ast_node(); | ||||
| //     node->node_type = NT_TYPE_QUAL; | ||||
| //     node->data.data_type = poptok(p).type; | ||||
|  | ||||
| //     if (*current) { | ||||
| //         (*current)->child.decl.type = node; | ||||
| //     } else { | ||||
| //         *current = node; | ||||
| //     } | ||||
| // } | ||||
|  | ||||
| // static void handle_base_type(struct Parser* p, struct ASTNode** current) { | ||||
| //     struct ASTNode* node = new_ast_node(); | ||||
| //     node->node_type = NT_TYPE_BASE; | ||||
| //     node->data.data_type = poptok(p).type; | ||||
|  | ||||
| //     // 链接到当前节点链的末端 | ||||
| //     while (*current && (*current)->child.decl.type) { | ||||
| //         current = &(*current)->child.decl.type; | ||||
| //     } | ||||
|      | ||||
| //     if (*current) { | ||||
| //         (*current)->child.decl.type = node; | ||||
| //     } else { | ||||
| //         *current = node; | ||||
| //     } | ||||
| // } | ||||
|  | ||||
| // static void handle_pointer(struct Parser* p, struct ASTNode** current) { | ||||
| //     poptok(p); // 吃掉* | ||||
| //     struct ASTNode* node = new_ast_node(); | ||||
| //     node->node_type = NT_TYPE_PTR; | ||||
|  | ||||
| //     // 插入到当前节点之前 | ||||
| //     node->child.decl.type = *current; | ||||
| //     *current = node; | ||||
| // } | ||||
|  | ||||
| // /* 其他处理函数类似实现... */ | ||||
|  | ||||
| struct ASTNode* parser_ident_without_pop(struct Parser* parser) { | ||||
|     flushpeektok(parser); | ||||
|     struct Token* tok = peektok(parser); | ||||
| ast_node_t* new_ast_ident_node(tok_t* tok) { | ||||
|     if (tok->type != TOKEN_IDENT) { | ||||
|         error("syntax error: want identifier but got %d", tok->type); | ||||
|     } | ||||
|     struct ASTNode* node = new_ast_node(); | ||||
|     ast_node_t* node = new_ast_node(); | ||||
|     node->type = NT_TERM_IDENT; | ||||
|     node->syms.tok = *tok; | ||||
|     node->syms.decl_node = NULL; | ||||
|     return node; | ||||
| } | ||||
|  | ||||
| struct ASTNode* parse_ident(struct Parser* parser) { | ||||
|     struct ASTNode* node = parser_ident_without_pop(parser); | ||||
|     poptok(parser); | ||||
| ast_node_t* expect_pop_ident(tok_buf_t* tokbuf) { | ||||
|     flush_peek_tok(tokbuf); | ||||
|     tok_t* tok = peek_tok(tokbuf); | ||||
|     ast_node_t* node = new_ast_ident_node(tok); | ||||
|     pop_tok(tokbuf); | ||||
|     return node; | ||||
| } | ||||
|  | ||||
| struct ASTNode* parse_type(struct Parser* parser) { | ||||
|     flushpeektok(parser); | ||||
|     enum TokenType ttype = peektoktype(parser); | ||||
|     enum DataType dtype; | ||||
| ast_node_t* parse_type(parser_t* parser) { | ||||
|     tok_buf_t* tokbuf = &parser->tokbuf; | ||||
|     flush_peek_tok(tokbuf); | ||||
|     tok_type_t ttype = peek_tok_type(tokbuf); | ||||
|     data_type_t dtype; | ||||
|     switch(ttype) { | ||||
|         case TOKEN_VOID:    dtype = TYPE_VOID; break; | ||||
|         case TOKEN_CHAR:    dtype = TYPE_CHAR; break; | ||||
| @ -170,13 +38,14 @@ struct ASTNode* parse_type(struct Parser* parser) { | ||||
|             error("无效的类型说明符"); | ||||
|     } | ||||
|  | ||||
|     struct ASTNode* node = new_ast_node(); | ||||
|     ast_node_t* node = new_ast_node(); | ||||
|     node->type = NT_TERM_TYPE; | ||||
|     // node->data.data_type = dtype; | ||||
|     poptok(parser); | ||||
|     // TODO added by disable warning, will add typing system | ||||
|     dtype += 1; | ||||
|     pop_tok(tokbuf); | ||||
|  | ||||
|     if (peektoktype(parser) == TOKEN_MUL) { | ||||
|         poptok(parser); | ||||
|     if (peek_tok_type(tokbuf) == TOKEN_MUL) { | ||||
|         pop_tok(tokbuf); | ||||
|     } | ||||
|     return node; | ||||
| } | ||||
|  | ||||
| @ -1,136 +1,136 @@ | ||||
| #include "../parser.h" | ||||
| #include "../type.h" | ||||
| // #include "../parser.h" | ||||
| // #include "../type.h" | ||||
|  | ||||
| enum TypeParseState { | ||||
|     TPS_BASE_TYPE,    // 解析基础类型 (int/char等) | ||||
|     TPS_QUALIFIER,    // 解析限定符 (const/volatile) | ||||
|     TPS_POINTER,      // 解析指针 (*) | ||||
|     TPS_ARRAY,        // 解析数组维度 ([n]) | ||||
|     TPS_FUNC_PARAMS   // 解析函数参数列表 | ||||
| }; | ||||
| // enum TypeParseState { | ||||
| //     TPS_BASE_TYPE,    // 解析基础类型 (int/char等) | ||||
| //     TPS_QUALIFIER,    // 解析限定符 (const/volatile) | ||||
| //     TPS_POINTER,      // 解析指针 (*) | ||||
| //     TPS_ARRAY,        // 解析数组维度 ([n]) | ||||
| //     TPS_FUNC_PARAMS   // 解析函数参数列表 | ||||
| // }; | ||||
|  | ||||
| struct ASTNode* parse_type(struct Parser* p) { | ||||
|     struct ASTNode* type_root = new_ast_node(); | ||||
|     struct ASTNode* current = type_root; | ||||
|     current->type = NT_TYPE_BASE; | ||||
| // ast_node_t* parse_type(parser_t* p) { | ||||
| //     ast_node_t* type_root = new_ast_node(); | ||||
| //     ast_node_t* current = type_root; | ||||
| //     current->type = NT_TYPE_BASE; | ||||
|  | ||||
|     enum TypeParseState state = TPS_QUALIFIER; | ||||
|     int pointer_level = 0; | ||||
| //     enum TypeParseState state = TPS_QUALIFIER; | ||||
| //     int pointer_level = 0; | ||||
|  | ||||
|     while (1) { | ||||
|         enum TokenType t = peektoktype(p); | ||||
| //     while (1) { | ||||
| //         tok_type_t t = peektoktype(p); | ||||
|          | ||||
|         switch (state) { | ||||
|         // 基础类型解析 (int, char等) | ||||
|         case TPS_BASE_TYPE: | ||||
|             if (is_base_type(t)) { | ||||
|                 // current->data.data_type = token_to_datatype(t); | ||||
|                 poptok(p); | ||||
|                 state = TPS_POINTER; | ||||
|             } else { | ||||
|                 error("Expected type specifier"); | ||||
|             } | ||||
|             break; | ||||
| //         switch (state) { | ||||
| //         // 基础类型解析 (int, char等) | ||||
| //         case TPS_BASE_TYPE: | ||||
| //             if (is_base_type(t)) { | ||||
| //                 // current->data.data_type = token_to_datatype(t); | ||||
| //                 pop_tok(p); | ||||
| //                 state = TPS_POINTER; | ||||
| //             } else { | ||||
| //                 error("Expected type specifier"); | ||||
| //             } | ||||
| //             break; | ||||
|  | ||||
|         // 类型限定符 (const/volatile) | ||||
|         case TPS_QUALIFIER: | ||||
|             if (t == TOKEN_CONST || t == TOKEN_VOLATILE) { | ||||
|                 struct ASTNode* qual_node = new_ast_node(); | ||||
|                 qual_node->type = NT_TYPE_QUAL; | ||||
|                 qual_node->data.data_type = t; // 复用data_type字段存储限定符 | ||||
|                 current->child.decl.type = qual_node; | ||||
|                 current = qual_node; | ||||
|                 poptok(p); | ||||
|             } else { | ||||
|                 state = TPS_BASE_TYPE; | ||||
|             } | ||||
|             break; | ||||
| //         // 类型限定符 (const/volatile) | ||||
| //         case TPS_QUALIFIER: | ||||
| //             if (t == TOKEN_CONST || t == TOKEN_VOLATILE) { | ||||
| //                 ast_node_t* qual_node = new_ast_node(); | ||||
| //                 qual_node->type = NT_TYPE_QUAL; | ||||
| //                 qual_node->data.data_type = t; // 复用data_type字段存储限定符 | ||||
| //                 current->child.decl.type = qual_node; | ||||
| //                 current = qual_node; | ||||
| //                 pop_tok(p); | ||||
| //             } else { | ||||
| //                 state = TPS_BASE_TYPE; | ||||
| //             } | ||||
| //             break; | ||||
|  | ||||
|         // 指针解析 (*) | ||||
|         case TPS_POINTER: | ||||
|             if (t == TOKEN_MUL) { | ||||
|                 struct ASTNode* ptr_node = new_ast_node(); | ||||
|                 ptr_node->type = NT_TYPE_PTR; | ||||
|                 current->child.decl.type = ptr_node; | ||||
|                 current = ptr_node; | ||||
|                 pointer_level++; | ||||
|                 poptok(p); | ||||
|             } else { | ||||
|                 state = TPS_ARRAY; | ||||
|             } | ||||
|             break; | ||||
| //         // 指针解析 (*) | ||||
| //         case TPS_POINTER: | ||||
| //             if (t == TOKEN_MUL) { | ||||
| //                 ast_node_t* ptr_node = new_ast_node(); | ||||
| //                 ptr_node->type = NT_TYPE_PTR; | ||||
| //                 current->child.decl.type = ptr_node; | ||||
| //                 current = ptr_node; | ||||
| //                 pointer_level++; | ||||
| //                 pop_tok(p); | ||||
| //             } else { | ||||
| //                 state = TPS_ARRAY; | ||||
| //             } | ||||
| //             break; | ||||
|  | ||||
|         // 数组维度 ([n]) | ||||
|         case TPS_ARRAY: | ||||
|             if (t == TOKEN_L_BRACKET) { | ||||
|                 poptok(p); // 吃掉[ | ||||
|                 struct ASTNode* arr_node = new_ast_node(); | ||||
|                 arr_node->type = NT_TYPE_ARRAY; | ||||
| //         // 数组维度 ([n]) | ||||
| //         case TPS_ARRAY: | ||||
| //             if (t == TOKEN_L_BRACKET) { | ||||
| //                 pop_tok(p); // 吃掉[ | ||||
| //                 ast_node_t* arr_node = new_ast_node(); | ||||
| //                 arr_node->type = NT_TYPE_ARRAY; | ||||
|                  | ||||
|                 // 解析数组大小(仅语法检查) | ||||
|                 if (peektoktype(p) != TOKEN_R_BRACKET) { | ||||
|                     parse_expr(p); // 不计算实际值 | ||||
|                 } | ||||
| //                 // 解析数组大小(仅语法检查) | ||||
| //                 if (peektoktype(p) != TOKEN_R_BRACKET) { | ||||
| //                     parse_expr(p); // 不计算实际值 | ||||
| //                 } | ||||
|                  | ||||
|                 expecttok(p, TOKEN_R_BRACKET); | ||||
|                 current->child.decl.type = arr_node; | ||||
|                 current = arr_node; | ||||
|             } else { | ||||
|                 state = TPS_FUNC_PARAMS; | ||||
|             } | ||||
|             break; | ||||
| //                 expecttok(p, TOKEN_R_BRACKET); | ||||
| //                 current->child.decl.type = arr_node; | ||||
| //                 current = arr_node; | ||||
| //             } else { | ||||
| //                 state = TPS_FUNC_PARAMS; | ||||
| //             } | ||||
| //             break; | ||||
|  | ||||
|         // 函数参数列表 | ||||
|         case TPS_FUNC_PARAMS: | ||||
|             if (t == TOKEN_L_PAREN) { | ||||
|                 struct ASTNode* func_node = new_ast_node(); | ||||
|                 func_node->type = NT_TYPE_FUNC; | ||||
|                 current->child.decl.type = func_node; | ||||
| //         // 函数参数列表 | ||||
| //         case TPS_FUNC_PARAMS: | ||||
| //             if (t == TOKEN_L_PAREN) { | ||||
| //                 ast_node_t* func_node = new_ast_node(); | ||||
| //                 func_node->type = NT_TYPE_FUNC; | ||||
| //                 current->child.decl.type = func_node; | ||||
|                  | ||||
|                 // 解析参数列表(仅结构,不验证类型) | ||||
|                 parse_param_list(p, func_node); | ||||
|                 current = func_node; | ||||
|             } else { | ||||
|                 return type_root; // 类型解析结束 | ||||
|             } | ||||
|             break; | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| // 判断是否是基础类型 | ||||
| static int is_base_type(enum TokenType t) { | ||||
|     return t >= TOKEN_VOID && t <= TOKEN_DOUBLE; | ||||
| } | ||||
|  | ||||
| // // 转换token到数据类型(简化版) | ||||
| // static enum DataType token_to_datatype(enum TokenType t) { | ||||
| //     static enum DataType map[] = { | ||||
| //         [TOKEN_VOID] = DT_VOID, | ||||
| //         [TOKEN_CHAR] = DT_CHAR, | ||||
| //         [TOKEN_INT] = DT_INT, | ||||
| //         // ...其他类型映射 | ||||
| //     }; | ||||
| //     return map[t]; | ||||
| //                 // 解析参数列表(仅结构,不验证类型) | ||||
| //                 parse_param_list(p, func_node); | ||||
| //                 current = func_node; | ||||
| //             } else { | ||||
| //                 return type_root; // 类型解析结束 | ||||
| //             } | ||||
| //             break; | ||||
| //         } | ||||
| //     } | ||||
| // } | ||||
|  | ||||
| // 解析参数列表(轻量级) | ||||
| static void parse_param_list(struct Parser* p, struct ASTNode* func) { | ||||
|     expecttok(p, TOKEN_L_PAREN); | ||||
|      | ||||
|     while (peektoktype(p) != TOKEN_R_PAREN) { | ||||
|         struct ASTNode* param = parse_type(p); // 递归解析类型 | ||||
|          | ||||
|         // 允许可选参数名(仅语法检查) | ||||
|         if (peektoktype(p) == TOKEN_IDENT) { | ||||
|             poptok(p); // 吃掉参数名 | ||||
|         } | ||||
|          | ||||
|         if (peektoktype(p) == TOKEN_COMMA) { | ||||
|             poptok(p); | ||||
|         } | ||||
|     } | ||||
|      | ||||
|     expecttok(p, TOKEN_R_PAREN); | ||||
| } | ||||
| // // 判断是否是基础类型 | ||||
| // static int is_base_type(tok_type_t t) { | ||||
| //     return t >= TOKEN_VOID && t <= TOKEN_DOUBLE; | ||||
| // } | ||||
|  | ||||
| // // // 转换token到数据类型(简化版) | ||||
| // // static enum DataType token_to_datatype(tok_type_t t) { | ||||
| // //     static enum DataType map[] = { | ||||
| // //         [TOKEN_VOID] = DT_VOID, | ||||
| // //         [TOKEN_CHAR] = DT_CHAR, | ||||
| // //         [TOKEN_INT] = DT_INT, | ||||
| // //         // ...其他类型映射 | ||||
| // //     }; | ||||
| // //     return map[t]; | ||||
| // // } | ||||
|  | ||||
| // // 解析参数列表(轻量级) | ||||
| // static void parse_param_list(parser_t* p, ast_node_t* func) { | ||||
| //     expecttok(p, TOKEN_L_PAREN); | ||||
|      | ||||
| //     while (peektoktype(p) != TOKEN_R_PAREN) { | ||||
| //         ast_node_t* param = parse_type(p); // 递归解析类型 | ||||
|          | ||||
| //         // 允许可选参数名(仅语法检查) | ||||
| //         if (peektoktype(p) == TOKEN_IDENT) { | ||||
| //             pop_tok(p); // 吃掉参数名 | ||||
| //         } | ||||
|          | ||||
| //         if (peektoktype(p) == TOKEN_COMMA) { | ||||
| //             pop_tok(p); | ||||
| //         } | ||||
| //     } | ||||
|      | ||||
| //     expecttok(p, TOKEN_R_PAREN); | ||||
| // } | ||||
|  | ||||
|  | ||||
| @ -1,67 +1,17 @@ | ||||
| #include "parser.h" | ||||
| #include "type.h" | ||||
| #include "ast/ast.h" | ||||
|  | ||||
| int poptok(struct Parser* parser) { | ||||
|     if (parser->size == 0) { | ||||
|         return -1; | ||||
|     } | ||||
|     int idx = parser->cur_idx; | ||||
|     parser->cur_idx = (idx + 1) % PARSER_MAX_TOKEN_QUEUE; | ||||
|     parser->size--; | ||||
|     return 0; | ||||
| } | ||||
|  | ||||
| void flushpeektok(struct Parser* parser) { | ||||
|     parser->peek_idx = parser->cur_idx; | ||||
| } | ||||
|  | ||||
| struct Token* peektok(struct Parser* parser) { | ||||
|     int idx = parser->peek_idx; | ||||
|     idx = (idx + 1) % PARSER_MAX_TOKEN_QUEUE; | ||||
|     if (parser->size >= PARSER_MAX_TOKEN_QUEUE) { | ||||
|         warn("peek maybe too deep"); | ||||
|     } | ||||
|     if (parser->peek_idx == parser->end_idx) { | ||||
|         if (parser->size == PARSER_MAX_TOKEN_QUEUE) { | ||||
|             // FIXME | ||||
|             error("buffer overflow"); | ||||
|         } | ||||
|         get_valid_token(parser->lexer, &(parser->TokenBuffer[idx])); | ||||
|         parser->size++; | ||||
|         parser->end_idx = idx; | ||||
|     } | ||||
|      | ||||
|     parser->peek_idx = idx; | ||||
|     return &(parser->TokenBuffer[idx]); | ||||
| } | ||||
|  | ||||
| enum TokenType peektoktype(struct Parser* parser) { | ||||
|     return peektok(parser)->type; | ||||
| } | ||||
|  | ||||
| void expecttok(struct Parser* parser, enum TokenType type) { | ||||
|     struct Token* tok = peektok(parser); | ||||
|     if (tok->type != type) { | ||||
|         error("expected tok: %s, got %s", get_token_name(type), get_token_name(tok->type)); | ||||
|     } else { | ||||
|         poptok(parser); | ||||
|     } | ||||
| } | ||||
|  | ||||
| void init_parser(struct Parser* parser, struct Lexer* lexer, struct SymbolTable* symtab) { | ||||
| void init_parser(parser_t* parser, lexer_t* lexer, symtab_t* symtab) { | ||||
|     parser->cur_node = NULL; | ||||
|     parser->root = NULL; | ||||
|  | ||||
|     parser->cur_idx = 0; | ||||
|     parser->peek_idx = 0; | ||||
|     parser->end_idx = 0; | ||||
|     parser->size = 0; | ||||
|     parser->lexer = lexer; | ||||
|     parser->symtab = symtab; | ||||
|     // TODO | ||||
|     init_tokbuf(&parser->tokbuf, lexer, (get_tokbuf_func)get_valid_token); | ||||
|     parser->tokbuf.cap = sizeof(parser->TokenBuffer) / sizeof(parser->TokenBuffer[0]); | ||||
|     parser->tokbuf.buf = parser->TokenBuffer; | ||||
| } | ||||
|  | ||||
| void run_parser(struct Parser* parser) { | ||||
| void run_parser(parser_t* parser) { | ||||
|     parse_prog(parser); | ||||
| } | ||||
|  | ||||
| @ -2,32 +2,24 @@ | ||||
| #define __PARSER_H__ | ||||
|  | ||||
| #include "../frontend.h" | ||||
| #include "../lexer/lexer.h" | ||||
| // #include "symbol_table/symtab.h" | ||||
| // #include "ast/ast.h" | ||||
|  | ||||
| #include "../lexer/lexer.h" | ||||
| typedef struct lexer lexer_t; | ||||
| typedef struct symtab symtab_t; | ||||
| #define PARSER_MAX_TOKEN_QUEUE 16 | ||||
|  | ||||
| struct Parser { | ||||
| typedef struct parser { | ||||
|     struct ASTNode* root; | ||||
|     struct ASTNode* cur_node; | ||||
|      | ||||
|     struct Lexer* lexer; | ||||
|     struct SymbolTable* symtab; | ||||
|     int cur_idx; | ||||
|     int peek_idx; | ||||
|     int end_idx; | ||||
|     int size; | ||||
|     struct Token TokenBuffer[PARSER_MAX_TOKEN_QUEUE]; | ||||
|     lexer_t* lexer; | ||||
|     symtab_t* symtab; | ||||
|     tok_buf_t tokbuf; | ||||
|     tok_t TokenBuffer[PARSER_MAX_TOKEN_QUEUE]; | ||||
|     int err_level; | ||||
| }; | ||||
| } parser_t; | ||||
|  | ||||
| void init_parser(struct Parser* parser, struct Lexer* lexer, struct SymbolTable* symtab); | ||||
| void run_parser(struct Parser* parser); | ||||
| void flushpeektok(struct Parser* parser); | ||||
| int poptok(struct Parser* parser); | ||||
| struct Token* peektok(struct Parser* parser); | ||||
| enum TokenType peektoktype(struct Parser* parser); | ||||
| void expecttok(struct Parser* parser, enum TokenType type); | ||||
| void init_parser(parser_t* parser, lexer_t* lexer, symtab_t* symtab); | ||||
| void run_parser(parser_t* parser); | ||||
|  | ||||
| #endif | ||||
|  | ||||
| @ -3,25 +3,25 @@ | ||||
| #include "scope.h" | ||||
| #include "symtab.h" | ||||
|  | ||||
| typedef struct SymbolTable SymbolTable; | ||||
| typedef symtab_t symtab_t; | ||||
| typedef struct Scope Scope; | ||||
|  | ||||
| void init_symtab(SymbolTable* symtab) { | ||||
| void init_symtab(symtab_t* symtab) { | ||||
|     symtab->global_scope = scope_create(NULL); | ||||
|     symtab->cur_scope = symtab->global_scope; | ||||
| } | ||||
|  | ||||
| void del_symtab(SymbolTable* symtab) { | ||||
| void del_symtab(symtab_t* symtab) { | ||||
|     scope_destroy(symtab->global_scope); | ||||
| } | ||||
|  | ||||
| void symtab_enter_scope(SymbolTable* symtab) { | ||||
| void symtab_enter_scope(symtab_t* symtab) { | ||||
|     struct Scope* scope = scope_create(symtab->cur_scope); | ||||
|     scope->base_offset = symtab->cur_scope->base_offset + symtab->cur_scope->cur_offset; | ||||
|     symtab->cur_scope = scope; | ||||
| } | ||||
|  | ||||
| void symtab_leave_scope(SymbolTable* symtab) { | ||||
| void symtab_leave_scope(symtab_t* symtab) { | ||||
|     Scope * scope = symtab->cur_scope; | ||||
|     if (scope == NULL) { | ||||
|         error("cannot leave NULL scope or global scope"); | ||||
| @ -30,16 +30,20 @@ void symtab_leave_scope(SymbolTable* symtab) { | ||||
|     scope_destroy(scope); | ||||
| } | ||||
|  | ||||
| void symtab_add_symbol(SymbolTable* symtab, const char* name, void* ast_node) { | ||||
| void* symtab_add_symbol(symtab_t* symtab, const char* name, void* ast_node, int can_duplicate) { | ||||
|     struct Scope* scope = symtab->cur_scope; | ||||
|     if (scope_lookup_current(scope, name) != NULL) { | ||||
|         // TODO WARNING | ||||
|         // return NULL; | ||||
|     void* node = scope_lookup_current(scope, name); | ||||
|     if (node != NULL) { | ||||
|         if (!can_duplicate) { | ||||
|             error("duplicate symbol %s", name); | ||||
|         } | ||||
|         return node; | ||||
|     } | ||||
|  | ||||
|     scope_insert(scope, name, ast_node); | ||||
|     return node; | ||||
| } | ||||
|  | ||||
| void* symtab_lookup_symbol(SymbolTable* symtab, const char* name) { | ||||
| void* symtab_lookup_symbol(symtab_t* symtab, const char* name) { | ||||
|     return scope_lookup(symtab->cur_scope, name); | ||||
| } | ||||
|  | ||||
| @ -2,17 +2,17 @@ | ||||
| #ifndef __SYMTAB_H__ | ||||
| #define __SYMTAB_H__ | ||||
|  | ||||
| struct SymbolTable { | ||||
| typedef struct symtab { | ||||
|     struct Scope* cur_scope; | ||||
|     struct Scope* global_scope; | ||||
| }; | ||||
| } symtab_t; | ||||
|  | ||||
| void init_symtab(struct SymbolTable* symtab); | ||||
| void del_symtab(struct SymbolTable* symtab); | ||||
| void init_symtab(symtab_t* symtab); | ||||
| void del_symtab(symtab_t* symtab); | ||||
|  | ||||
| void symtab_enter_scope(struct SymbolTable* symtab); | ||||
| void symtab_leave_scope(struct SymbolTable* symtab); | ||||
| void symtab_add_symbol(struct SymbolTable* symtab, const char* name, void* ast_node); | ||||
| void* symtab_lookup_symbol(struct SymbolTable* symtab, const char* name); | ||||
| void symtab_enter_scope(symtab_t* symtab); | ||||
| void symtab_leave_scope(symtab_t* symtab); | ||||
| void* symtab_add_symbol(symtab_t* symtab, const char* name, void* ast_node, int can_duplicate); | ||||
| void* symtab_lookup_symbol(symtab_t* symtab, const char* name); | ||||
|  | ||||
| #endif | ||||
|  | ||||
| @ -23,7 +23,7 @@ int main(int argc, char** argv) { | ||||
|     struct SymbolTable symtab; | ||||
|     init_symtab(&symtab); | ||||
|  | ||||
|     struct Parser parser; | ||||
|     struct parser parser; | ||||
|     init_parser(&parser, &lexer, &symtab); | ||||
|     parse_prog(&parser); | ||||
|  | ||||
|  | ||||
| @ -3,7 +3,7 @@ | ||||
|  | ||||
| #include "../lexer/token.h" | ||||
|  | ||||
| enum DataType { | ||||
| typedef enum { | ||||
|     TYPE_VOID, | ||||
|     TYPE_CHAR, | ||||
|     TYPE_SHORT, | ||||
| @ -30,6 +30,6 @@ enum DataType { | ||||
|     TYPE_ATOMIC, | ||||
|  | ||||
|     TYPE_TYPEDEF, | ||||
| }; | ||||
| } data_type_t; | ||||
|  | ||||
| #endif | ||||
|  | ||||
							
								
								
									
										0
									
								
								ccompiler/middleend/Makefile
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										0
									
								
								ccompiler/middleend/Makefile
									
									
									
									
									
										Normal file
									
								
							| @ -5,48 +5,61 @@ typedef struct ASTNode ASTNode; | ||||
|  | ||||
| // 上下文结构,记录生成过程中的状态 | ||||
| typedef struct { | ||||
|     ir_func_t* current_func;    // 当前处理的函数 | ||||
|     ir_bblock_t* current_block; // 当前基本块 | ||||
|     uint32_t vreg_counter;      // 虚拟寄存器计数器 | ||||
|     ir_func_t* cur_func;    // 当前处理的函数 | ||||
|     ir_bblock_t* cur_block; // 当前基本块 | ||||
| } IRGenContext; | ||||
| IRGenContext ctx; | ||||
| ir_prog_t prog; | ||||
| ir_type_t type_i32 = { | ||||
|     .tag = IR_TYPE_INT32, | ||||
| }; | ||||
|  | ||||
| static inline void init_ir_node_t(ir_node_t* node) { | ||||
|     node->name = NULL; | ||||
|     node->type = NULL; | ||||
|     vector_init(node->used_by); | ||||
| } | ||||
|  | ||||
| static inline ir_node_t* new_ir_node_t() { | ||||
| static inline ir_node_t* new_irnode() { | ||||
|     ir_node_t* node = xmalloc(sizeof(ir_node_t)); | ||||
|     init_ir_node_t(node); | ||||
| } | ||||
|  | ||||
| static inline ir_bblock_t* new_irbblock(const char* name) { | ||||
|     ir_bblock_t* block = xmalloc(sizeof(ir_bblock_t)); | ||||
|     block->label = name; | ||||
|     vector_init(block->instrs); | ||||
|     return block; | ||||
| } | ||||
|  | ||||
| ir_node_t* emit_instr(ir_bblock_t* block) { | ||||
|     if (block == NULL) block = ctx.current_block; | ||||
|     ir_node_t *node = new_ir_node_t(); | ||||
|     if (block == NULL) block = ctx.cur_block; | ||||
|     ir_node_t *node = new_irnode(); | ||||
|     vector_push(block->instrs, node); | ||||
|     return vector_at(block->instrs, block->instrs.size - 1); | ||||
| } | ||||
|  | ||||
| void emit_br(ir_node_t cond, const char* true_lable, const char* false_lable) { | ||||
|     ir_node_t br = { | ||||
|         .tag = IR_NODE_RET, | ||||
|         .data = { | ||||
| ir_node_t* emit_br(ir_node_t* cond, ir_bblock_t* trueb, ir_bblock_t* falseb) { | ||||
|     ir_node_t* br = emit_instr(NULL); | ||||
|     *br = (ir_node_t) { | ||||
|         .tag = IR_NODE_BRANCH, | ||||
|         .data.branch = { | ||||
|             .cond = cond, | ||||
|             .true_bblock = trueb, | ||||
|             .false_bblock = falseb, | ||||
|         } | ||||
|     }; | ||||
|     // emit_instr(br, NULL); | ||||
|     return br; | ||||
| } | ||||
|  | ||||
| ir_node_t* gen_ir_expr(ASTNode* node) { | ||||
|     switch (node->type) { | ||||
|         case NT_TERM_VAL: { | ||||
|             ir_node_t* ir = new_ir_node_t(); | ||||
|             ir_node_t* ir = new_irnode(); | ||||
|             *ir = (ir_node_t) { | ||||
|                 .tag = IR_NODE_CONST_INT, | ||||
|                 .data.const_int = { | ||||
|                     .val = node->syms.tok.constant.i, | ||||
|                     .val = node->syms.tok.val.i, | ||||
|                 }, | ||||
|             }; | ||||
|             return ir; | ||||
| @ -56,15 +69,18 @@ ir_node_t* gen_ir_expr(ASTNode* node) { | ||||
|             return decl; | ||||
|         } | ||||
|         case NT_TERM_CALL: { | ||||
|             // TODO | ||||
|             ir_node_t* ir = new_ir_node_t(); | ||||
|             ir_node_t* ir = emit_instr(NULL); | ||||
|             *ir = (ir_node_t) { | ||||
|                 .tag = IR_NODE_CALL, | ||||
|                 .data.call = { | ||||
|                     .callee = NULL, | ||||
|                     .callee = node->call.func_decl->decl_func.def->func.data, | ||||
|                 }, | ||||
|             }; | ||||
|             vector_init(ir->data.call.args); | ||||
|             for (int i = 0; i < node->call.params->params.params.size; i++) { | ||||
|                 vector_push(ir->data.call.args, \ | ||||
|                     gen_ir_expr(node->call.params->params.params.data[i])); | ||||
|             } | ||||
|             return ir; | ||||
|         } | ||||
|         default: | ||||
| @ -191,42 +207,75 @@ NEXT: | ||||
|     } | ||||
|     return ret; | ||||
| } | ||||
| static ir_func_t* new_irfunc(const char* name) { | ||||
|     ir_func_t *func = xmalloc(sizeof(ir_func_t)); | ||||
|      | ||||
|     vector_init(func->bblocks); | ||||
|     vector_init(func->params); | ||||
|     *func = (ir_func_t) { | ||||
|         .name = name, | ||||
|         // TODO typing system | ||||
|         .type = &type_i32, | ||||
|     }; | ||||
|     return func; | ||||
| } | ||||
|  | ||||
| static void gen_ir_func(ASTNode* node, ir_func_t* func) { | ||||
|     assert(node->type == NT_FUNC); | ||||
|     ir_bblock_t *entry = new_irbblock("entry"); | ||||
|     vector_push(func->bblocks, entry); | ||||
|      | ||||
|     vector_push(prog.funcs, func); | ||||
|     IRGenContext prev_ctx = ctx; | ||||
|     ctx.cur_func = func; | ||||
|     ctx.cur_block = entry; | ||||
|      | ||||
|     ast_node_t* params = node->func.decl->decl_func.params; | ||||
|     for (int i = 0; i < params->params.params.size; i ++) { | ||||
|         ir_node_t* decl = emit_instr(entry); | ||||
|         ast_node_t* param = params->params.params.data[i]; | ||||
|         vector_push(func->params, decl); | ||||
|         *decl = (ir_node_t) { | ||||
|             .tag = IR_NODE_ALLOC, | ||||
|             .name = param->decl_val.name->syms.tok.val.str, | ||||
|             .type = &type_i32, | ||||
|         }; | ||||
|         param->decl_val.data = decl; | ||||
|     } | ||||
|     gen_ir_from_ast(node->func.body); | ||||
|      | ||||
|     ctx = prev_ctx; | ||||
| } | ||||
|  | ||||
| void gen_ir_from_ast(struct ASTNode* node) { | ||||
|     switch (node->type) { | ||||
|         case NT_ROOT: { | ||||
|             for (int i = 0; i < node->root.child_size; i ++) { | ||||
|                 gen_ir_from_ast(node->root.children[i]); | ||||
|             for (int i = 0; i < node->root.children.size; i ++) { | ||||
|                 gen_ir_from_ast(node->root.children.data[i]); | ||||
|             } | ||||
|         } break; | ||||
|             break; | ||||
|         } | ||||
|         case NT_DECL_FUNC: { | ||||
|             ir_func_t* func = new_irfunc(node->decl_func.name->syms.tok.val.str); | ||||
|             if (node->decl_func.def == NULL) { | ||||
|                 ast_node_t* def = new_ast_node(); | ||||
|                 def->func.body = NULL; | ||||
|                 def->func.decl = node; | ||||
|                 node->decl_func.def = def; | ||||
|                 vector_push(prog.extern_funcs, func); | ||||
|             } | ||||
|             node->decl_func.def->func.data = func; | ||||
|             break; | ||||
|         } | ||||
|         case NT_FUNC: { | ||||
|             ir_func_t *func = xmalloc(sizeof(ir_func_t)); | ||||
|             *func = (ir_func_t) { | ||||
|                 .name = node->func.name->syms.tok.constant.str, | ||||
|             }; | ||||
|             vector_init(func->bblocks); | ||||
|  | ||||
|             ir_bblock_t *entry = xmalloc(sizeof(ir_bblock_t)); | ||||
|             *entry = (ir_bblock_t) { | ||||
|                 .label = "entry", | ||||
|             }; | ||||
|             vector_init(entry->instrs); | ||||
|             vector_push(func->bblocks, entry); | ||||
|  | ||||
|             IRGenContext prev_ctx = ctx; | ||||
|             ctx = (IRGenContext) { | ||||
|                 .current_func =  func, | ||||
|                 .current_block = vector_at(func->bblocks, 0), | ||||
|                 .vreg_counter = 0, | ||||
|             }; | ||||
|  | ||||
|             gen_ir_from_ast(node->func.body); | ||||
|  | ||||
|             ctx = prev_ctx; | ||||
|             vector_push(prog.funcs, func); | ||||
|         } break; | ||||
|             gen_ir_func(node, node->func.data); | ||||
|             break; | ||||
|         } | ||||
|         case NT_STMT_RETURN: { | ||||
|             ir_node_t* ret = gen_ir_expr(node->return_stmt.expr_stmt); | ||||
|             ir_node_t* ret = NULL; | ||||
|             if (node->return_stmt.expr_stmt != NULL) { | ||||
|                 ret = gen_ir_expr(node->return_stmt.expr_stmt); | ||||
|             } | ||||
|             ir_node_t* ir = emit_instr(NULL); | ||||
|             *ir = (ir_node_t) { | ||||
|                 .tag = IR_NODE_RET, | ||||
| @ -236,22 +285,54 @@ void gen_ir_from_ast(struct ASTNode* node) { | ||||
|                     } | ||||
|                 } | ||||
|             }; | ||||
|              | ||||
|             vector_push(ctx.cur_func->bblocks, new_irbblock(NULL)); | ||||
|             break; | ||||
|         } | ||||
|         case NT_STMT_BLOCK: { | ||||
|             gen_ir_from_ast(node->block_stmt.block); | ||||
|             break; | ||||
|         } | ||||
|         case NT_BLOCK: { | ||||
|             for (int i = 0; i < node->block.child_size; i ++) { | ||||
|                 gen_ir_from_ast(node->block.children[i]); | ||||
|             for (int i = 0; i < node->block.children.size; i ++) { | ||||
|                 gen_ir_from_ast(node->block.children.data[i]); | ||||
|             } | ||||
|             break; | ||||
|         } | ||||
|         case NT_STMT_IF: { | ||||
|             ir_node_t *cond = gen_ir_expr(node->if_stmt.cond); | ||||
|             ir_bblock_t* trueb = new_irbblock("true_block"); | ||||
|             ir_bblock_t* falseb = new_irbblock("false_block"); | ||||
|             emit_br(cond, trueb, falseb); | ||||
|  | ||||
|             // xmalloc(); | ||||
|             // ir_bblock_t then_block = { | ||||
|             // }; | ||||
|             node->if_stmt.if_stmt; | ||||
|             node->if_stmt.else_stmt; | ||||
|             vector_push(ctx.cur_func->bblocks, trueb); | ||||
|             ctx.cur_block = trueb; | ||||
|             gen_ir_from_ast(node->if_stmt.if_stmt); | ||||
|             ir_node_t* jmp = emit_instr(NULL); | ||||
|              | ||||
|             if (node->if_stmt.else_stmt != NULL) { | ||||
|                 vector_push(ctx.cur_func->bblocks, falseb); | ||||
|                 ctx.cur_block = falseb; | ||||
|                 gen_ir_from_ast(node->if_stmt.else_stmt); | ||||
|                 ir_node_t* jmp = emit_instr(NULL); | ||||
|  | ||||
|                 ctx.cur_block = new_irbblock("jmp_block"); | ||||
|                 vector_push(ctx.cur_func->bblocks, ctx.cur_block); | ||||
|                 *jmp = (ir_node_t) { | ||||
|                     .tag = IR_NODE_JUMP, | ||||
|                     .data.jump = { | ||||
|                         .target_bblock = ctx.cur_block, | ||||
|                     }, | ||||
|                 }; | ||||
|             } else { | ||||
|                 ctx.cur_block = falseb; | ||||
|             } | ||||
|             *jmp = (ir_node_t) { | ||||
|                 .tag = IR_NODE_JUMP, | ||||
|                 .data.jump = { | ||||
|                     .target_bblock = ctx.cur_block, | ||||
|                 }, | ||||
|             }; | ||||
|             break; | ||||
|         } | ||||
|         case NT_STMT_WHILE: { | ||||
| @ -275,7 +356,7 @@ void gen_ir_from_ast(struct ASTNode* node) { | ||||
|             ir_node_t* ret_node = emit_instr(NULL); | ||||
|             *ret_node = (ir_node_t) { | ||||
|                 .tag = IR_NODE_ALLOC, | ||||
|                 .name = node->decl_val.name->syms.tok.constant.str, | ||||
|                 .name = node->decl_val.name->syms.tok.val.str, | ||||
|                 .type = &type_i32, | ||||
|             }; | ||||
|             node->decl_val.data = ret_node; | ||||
|  | ||||
| @ -54,6 +54,7 @@ typedef struct { | ||||
| typedef struct { | ||||
|     vector_header(global, ir_node_t*); | ||||
|     vector_header(funcs, ir_func_t*); | ||||
|     vector_header(extern_funcs, ir_func_t*); | ||||
| } ir_prog_t; | ||||
|  | ||||
| struct ir_node { | ||||
| @ -131,15 +132,15 @@ struct ir_node { | ||||
|         } op; | ||||
|         struct { | ||||
|             ir_node_t* cond; | ||||
|             ir_bblock_t true_bblock; | ||||
|             ir_bblock_t false_bblock; | ||||
|             ir_bblock_t* true_bblock; | ||||
|             ir_bblock_t* false_bblock; | ||||
|         } branch; | ||||
|         struct { | ||||
|             ir_bblock_t target_bblock; | ||||
|             ir_bblock_t* target_bblock; | ||||
|         } jump; | ||||
|         struct { | ||||
|             ir_func_t callee; | ||||
|             vector_header(args, ir_node_t); | ||||
|             ir_func_t* callee; | ||||
|             vector_header(args, ir_node_t*); | ||||
|         } call; | ||||
|         struct { | ||||
|             ir_node_t* ret_val; | ||||
|  | ||||
| @ -1,5 +1,7 @@ | ||||
| int main(void) { | ||||
|     int a; | ||||
|     a = 1 + 2 * 3; | ||||
|     return a; | ||||
| int add(int a, int b) { | ||||
|     return a + b; | ||||
| } | ||||
|  | ||||
| int main(void) { | ||||
|     return add(1, 2); | ||||
| } | ||||
|  | ||||
							
								
								
									
										192
									
								
								test_rv_vm/README.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										192
									
								
								test_rv_vm/README.md
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,192 @@ | ||||
| # riscv_emufun (mini-rv32ima) | ||||
|  | ||||
| Click below for the YouTube video introducing this project: | ||||
|  | ||||
| [](https://www.youtube.com/watch?v=YT5vB3UqU_E) [](https://www.youtube.com/watch?v=uZMNK17VCMU)  | ||||
|  | ||||
| ## What | ||||
|  | ||||
| mini-rv32ima is a single-file-header, [mini-rv32ima.h](https://github.com/cnlohr/riscv_emufun/blob/master/mini-rv32ima/mini-rv32ima.h), in the [STB Style library](https://github.com/nothings/stb) that: | ||||
|  * Implements a RISC-V **rv32ima/Zifencei†+Zicsr** (and partial su), with CLINT and MMIO. | ||||
|  * Is about **400 lines** of actual code. | ||||
|  * Has **no dependencies**, not even libc. | ||||
|  * Is **easily extensible**.  So you can easily add CSRs, instructions, MMIO, etc! | ||||
|  * Is pretty **performant**. (~450 coremark on my laptop, about 1/2 the speed of QEMU) | ||||
|  * Is human-readable and in **basic C** code. | ||||
|  * Is "**incomplete**" in that it didn't implement the tons of the spec that Linux doesn't (and you shouldn't) use. | ||||
|  * Is trivially **embeddable** in applications. | ||||
|  | ||||
| It has a [demo wrapper](https://github.com/cnlohr/riscv_emufun/blob/master/mini-rv32ima/mini-rv32ima.c) that: | ||||
|  * Implements a CLI, SYSCON, UART, DTB and Kernel image loading. | ||||
|  * And it only around **250 lines** of code, itself. | ||||
|  * Compiles down to a **~18kB executable** and only relies on libc. | ||||
|  | ||||
| †: Zifence+RV32A are stubbed.  So, tweaks will need to be made if you want to emulate a multiprocessor system with this emulator. | ||||
|  | ||||
| Just see the `mini-rv32ima` folder. | ||||
|  | ||||
| It's "fully functional" now in that I can run Linux, apps, etc.  Compile flat binaries and drop them in an image. | ||||
|  | ||||
| ## Why | ||||
|  | ||||
| I'm working on a really really simple C Risc-V emulator. So simple it doesn't even have an MMU (Memory Management Unit). I have a few goals, they include: | ||||
|  * Furthering RV32-NOMMU work to improve Linux support for RV32-NOMMU.  (Imagine if we could run Linux on the $1 ESP32-C3) | ||||
|  * Learning more about RV32 and writing emulators. | ||||
|  * Being further inspired by @pimaker's amazing work on [Running Linux in a Pixel Shader](https://blog.pimaker.at/texts/rvc1/) and having the sneaking suspicion performance could be even better! | ||||
|  * Hoping to port it to some weird places. | ||||
|  * Understand the *most simplistic* system you can run Linux on and trying to push that boundary. | ||||
|  * Continue to include my [education of people about assembly language](https://www.youtube.com/watch?v=Gelf0AyVGy4). | ||||
|  | ||||
| ## How | ||||
|  | ||||
| Windows instructions (Just playing with the image) | ||||
|  * Clone this repo. | ||||
|  * Install or have TinyCC.  [Powershell Installer](https://github.com/cntools/Install-TCC) or [Regular Windows Installer](https://github.com/cnlohr/tinycc-win64-installer/releases/tag/v0_0.9.27) | ||||
|  * Run `winrun.ps` in the `windows` folder. | ||||
|  | ||||
| WSL (For full toolchain and image build: | ||||
|  * You will need to remove all spaces from your path i.e. `export PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/mnt/c/Windows/system32:/snap/bin` and continue the instructions.  P.S. What in the world was Windows thinking, putting a space between "Program" and "Files"??!? | ||||
|  | ||||
| Linux instructions (both):  | ||||
|  * Clone this repo. | ||||
|  * Install `git build-essential` and/or whatever other requirements are in place for [buildroot](https://buildroot.org/). | ||||
|  * `make testdlimage` | ||||
|  * It automatically downloads the image (~1MB) and runs the emulator. | ||||
|  * Should be up and running in about 2.5s depending on internet speed. | ||||
|  | ||||
| You can do in-depth work on Linux by: | ||||
|  * `make everything` | ||||
|  | ||||
| If you want to play with the bare metal system, see below, or if you have the toolchain installed, just: | ||||
|  * `make testbare` | ||||
|  | ||||
| If you just want to play emdoom, and use the prebuilt image: | ||||
|  * On Windows, run `windows\winrundoom.ps1` | ||||
|  * On Linux, `cd mini-rv32ima`, and type `make testdoom` | ||||
|  | ||||
| ## Questions? | ||||
|  * Why not rv64? | ||||
|    * Because then I can't run it as easily in a pixel shader if I ever hope to. | ||||
|  * Can I add an MMU? | ||||
|    * Yes.  It actually probably wouldn't be too difficult. | ||||
|  * Should I add an MMU? | ||||
|    * No.  It is important to further support for nommu systems to empower minimal Risc-V designs! | ||||
|  | ||||
| Everything else: Contact us on my Discord: https://discord.com/invite/CCeyWyZ | ||||
|  | ||||
| ## How do I use this in my own project? | ||||
|  | ||||
| You shoud not need to modify `mini-rv32ima.h`, but instead, use `mini-rv32ima.c` as a template for what you are trying to do in your own project. | ||||
|  | ||||
| You can override all functionality by defining the following macros. Here are examples of what `mini-rv32ima.c` does with them.  You can see the definition of the functions, or augment their definitions, by altering `mini-rv32ima.c`. | ||||
|  | ||||
| | Macro | Definition / Comment | | ||||
| | --- | --- | | ||||
| | `MINIRV32WARN( x... )` | `printf( x );` <br> Warnings emitted from mini-rv32ima.h | | ||||
| | `MINIRV32_DECORATE` | `static` <br> How to decorate the functions. | | ||||
| | `MINI_RV32_RAM_SIZE` | `ram_amt` <br> A variable, how big is system RAM? | | ||||
| | `MINIRV32_IMPLEMENTATION` | If using mini-rv32ima.h, need to define this. | | ||||
| | `MINIRV32_POSTEXEC( pc, ir, retval )` | `{ if( retval > 0 ) { if( fail_on_all_faults ) { printf( "FAULT\n" ); return 3; } else retval = HandleException( ir, retval ); } }` <br> If you want to execute something every time slice. | | ||||
| | `MINIRV32_HANDLE_MEM_STORE_CONTROL( addy, val )` | `if( HandleControlStore( addy, val ) ) return val;` <br> Called on non-RAM memory access. | | ||||
| | `MINIRV32_HANDLE_MEM_LOAD_CONTROL( addy, rval )` | `rval = HandleControlLoad( addy );` <br> Called on non-RAM memory access return a value. | | ||||
| | `MINIRV32_OTHERCSR_WRITE( csrno, value )` | `HandleOtherCSRWrite( image, csrno, value );` <br> You can use CSRs for control requests. | | ||||
| | `MINIRV32_OTHERCSR_READ( csrno, value )` |  `value = HandleOtherCSRRead( image, csrno );` <br> You can use CSRs for control requests. | | ||||
|  | ||||
| ## Hopeful goals? | ||||
|  * Further drive down needed features to run Linux. | ||||
|    * Remove need for RV32A extension on systems with only one CPU. | ||||
|    * Support for relocatable ELF executables. | ||||
|    * Add support for an unreal UART.  One that's **much** simpler than the current 8250 driver. | ||||
|  * Maybe run this in a pixelshader too! | ||||
|  * Get opensbi working with this. | ||||
|  * Be able to "embed" rv32 emulators in random projects. | ||||
|  * Can I use early console to be a full system console? | ||||
|  * Can I increase the maximum contiguous memory allocatable? | ||||
|  | ||||
| ## Special Thanks | ||||
|  * For @regymm and their [patches to buildroot](https://github.com/regymm/buildroot) and help! | ||||
|    * callout: Regymm's [quazisoc project](https://github.com/regymm/quasiSoC/). | ||||
|  * Buildroot (For being so helpful). | ||||
|  * @vowstar and their team working on [k210-linux-nommu](https://github.com/vowstar/k210-linux-nommu). | ||||
|  * This [guide](https://jborza.com/emulation/2020/04/09/riscv-environment.html) | ||||
|  * [rvcodecjs](https://luplab.gitlab.io/rvcodecjs/) I probably went through over 1,000 codes here. | ||||
|  * @splinedrive from the [KianV RISC-V noMMU SoC](https://github.com/splinedrive/kianRiscV/tree/master/linux_socs/kianv_harris_mcycle_edition?s=09) project. | ||||
|   | ||||
| ## More details | ||||
|  | ||||
| If you want to build the kernel yourself: | ||||
|  * `make everything` | ||||
|  * About 20 minutes.  (Or 4+ hours if you're on [Windows Subsytem for Linux 2](https://github.com/microsoft/WSL/issues/4197)) | ||||
|  * And you should be dropped into a Linux busybox shell with some little tools that were compiled here. | ||||
|  | ||||
| ## Emdoom notes | ||||
|  * Emdoom building is in the `experiments/emdoom` folder | ||||
|  * You *MUST* build your kernel with `MAX_ORDER` set to >12 in `buildroot/output/build/linux-5.19/include/linux/mmzone.h` if you are building your own image. | ||||
|  * You CAN use the pre-existing image that is described above. | ||||
|  * On Windows, it will be very slow.  Not sure why. | ||||
|  | ||||
| If you want to use bare metal to build your binaries so you don't need buildroot, you can use the rv64 gcc in 32-bit mode built into Ubuntu 20.04 and up. | ||||
| ``` | ||||
| sudo apt-get install gcc-multilib gcc-riscv64-unknown-elf make | ||||
| ``` | ||||
|  | ||||
| ## Links | ||||
|  * "Hackaday Supercon 2022: Charles Lohr - Assembly in 2022: Yes! We Still Use it and Here's Why" : https://www.youtube.com/watch?v=Gelf0AyVGy4 | ||||
|   | ||||
| ## Attic | ||||
|  | ||||
|  | ||||
| ## General notes: | ||||
|  * https://github.com/cnlohr/riscv_emufun/commit/2f09cdeb378dc0215c07eb63f5a6fb43dbbf1871#diff-b48ccd795ae9aced07d022bf010bf9376232c4d78210c3113d90a8d349c59b3dL440 | ||||
|  | ||||
|  | ||||
| (These things don't currently work) | ||||
|  | ||||
| ### Building Tests | ||||
|  | ||||
| (This does not work, now) | ||||
| ``` | ||||
| cd riscv-tests | ||||
| export CROSS_COMPILE=riscv64-linux-gnu- | ||||
| export PLATFORM_RISCV_XLEN=32 | ||||
| CC=riscv64-linux-gnu-gcc ./configure | ||||
| make XLEN=32 RISCV_PREFIX=riscv64-unknown-elf- RISCV_GCC_OPTS="-g -O1 -march=rv32imaf -mabi=ilp32f -I/usr/include" | ||||
| ``` | ||||
|  | ||||
| ### Building OpenSBI | ||||
|  | ||||
| (This does not currently work!) | ||||
| ``` | ||||
| cd opensbi | ||||
| export CROSS_COMPILE=riscv64-unknown-elf- | ||||
| export PLATFORM_RISCV_XLEN=32 | ||||
| make | ||||
| ``` | ||||
|  | ||||
| ### Extra links | ||||
|  * Clear outline of CSRs: https://five-embeddev.com/riscv-isa-manual/latest/priv-csrs.html | ||||
|  * Fonts used in videos: https://audiolink.dev/ | ||||
|  | ||||
| ### Using custom build | ||||
|  | ||||
| Where yminpatch is the patch from the mailing list. | ||||
| ``` | ||||
| rm -rf buildroot | ||||
| git clone git://git.buildroot.net/buildroot | ||||
| cd buildroot | ||||
| git am < ../yminpatch.txt | ||||
| make qemu_riscv32_nommu_virt_defconfig | ||||
| make | ||||
| # Or use our configs. | ||||
| ``` | ||||
|  | ||||
| Note: For emdoom you will need to modify include/linux/mmzone.h and change MAX_ORDER to 13. | ||||
|  | ||||
| ### Buildroot Notes | ||||
|  | ||||
| Add this: | ||||
| https://github.com/cnlohr/buildroot/pull/1/commits/bc890f74354e7e2f2b1cf7715f6ef334ff6ed1b2 | ||||
|  | ||||
| Use this: | ||||
| https://github.com/cnlohr/buildroot/commit/e97714621bfae535d947817e98956b112eb80a75 | ||||
|  | ||||
							
								
								
									
										520
									
								
								test_rv_vm/mini-rv32ima.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										520
									
								
								test_rv_vm/mini-rv32ima.c
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,520 @@ | ||||
| // Copyright 2022 Charles Lohr, you may use this file or any portions herein under any of the BSD, MIT, or CC0 licenses. | ||||
|  | ||||
| #include <stdio.h> | ||||
| #include <stdint.h> | ||||
| #include <stdlib.h> | ||||
| #include <string.h> | ||||
| #include <math.h> | ||||
|  | ||||
| #include "default64mbdtc.h" | ||||
|  | ||||
| // Just default RAM amount is 64MB. | ||||
| uint32_t ram_amt = 64*1024*1024; | ||||
| int fail_on_all_faults = 0; | ||||
|  | ||||
| static int64_t SimpleReadNumberInt( const char * number, int64_t defaultNumber ); | ||||
| static uint64_t GetTimeMicroseconds(); | ||||
| static void ResetKeyboardInput(); | ||||
| static void CaptureKeyboardInput(); | ||||
| static uint32_t HandleException( uint32_t ir, uint32_t retval ); | ||||
| static uint32_t HandleControlStore( uint32_t addy, uint32_t val ); | ||||
| static uint32_t HandleControlLoad( uint32_t addy ); | ||||
| static void HandleOtherCSRWrite( uint8_t * image, uint16_t csrno, uint32_t value ); | ||||
| static int32_t HandleOtherCSRRead( uint8_t * image, uint16_t csrno ); | ||||
| static void MiniSleep(); | ||||
| static int IsKBHit(); | ||||
| static int ReadKBByte(); | ||||
|  | ||||
| // This is the functionality we want to override in the emulator. | ||||
| //  think of this as the way the emulator's processor is connected to the outside world. | ||||
| #define MINIRV32WARN( x... ) printf( x ); | ||||
| #define MINIRV32_DECORATE  static | ||||
| #define MINI_RV32_RAM_SIZE ram_amt | ||||
| #define MINIRV32_IMPLEMENTATION | ||||
| #define MINIRV32_POSTEXEC( pc, ir, retval ) { if( retval > 0 ) { if( fail_on_all_faults ) { printf( "FAULT\n" ); return 3; } else retval = HandleException( ir, retval ); } } | ||||
| #define MINIRV32_HANDLE_MEM_STORE_CONTROL( addy, val ) if( HandleControlStore( addy, val ) ) return val; | ||||
| #define MINIRV32_HANDLE_MEM_LOAD_CONTROL( addy, rval ) rval = HandleControlLoad( addy ); | ||||
| #define MINIRV32_OTHERCSR_WRITE( csrno, value ) HandleOtherCSRWrite( image, csrno, value ); | ||||
| #define MINIRV32_OTHERCSR_READ( csrno, value ) value = HandleOtherCSRRead( image, csrno ); | ||||
|  | ||||
| #include "mini-rv32ima.h" | ||||
|  | ||||
| uint8_t * ram_image = 0; | ||||
| struct MiniRV32IMAState * core; | ||||
| const char * kernel_command_line = 0; | ||||
|  | ||||
| static void DumpState( struct MiniRV32IMAState * core, uint8_t * ram_image ); | ||||
|  | ||||
| int main( int argc, char ** argv ) | ||||
| { | ||||
| 	int i; | ||||
| 	long long instct = -1; | ||||
| 	int show_help = 0; | ||||
| 	int time_divisor = 1; | ||||
| 	int fixed_update = 0; | ||||
| 	int do_sleep = 1; | ||||
| 	int single_step = 0; | ||||
| 	int dtb_ptr = 0; | ||||
| 	const char * image_file_name = 0; | ||||
| 	const char * dtb_file_name = 0; | ||||
| 	for( i = 1; i < argc; i++ ) | ||||
| 	{ | ||||
| 		const char * param = argv[i]; | ||||
| 		int param_continue = 0; // Can combine parameters, like -lpt x | ||||
| 		do | ||||
| 		{ | ||||
| 			if( param[0] == '-' || param_continue ) | ||||
| 			{ | ||||
| 				switch( param[1] ) | ||||
| 				{ | ||||
| 				case 'm': if( ++i < argc ) ram_amt = SimpleReadNumberInt( argv[i], ram_amt ); break; | ||||
| 				case 'c': if( ++i < argc ) instct = SimpleReadNumberInt( argv[i], -1 ); break; | ||||
| 				case 'k': if( ++i < argc ) kernel_command_line = argv[i]; break; | ||||
| 				case 'f': image_file_name = (++i<argc)?argv[i]:0; break; | ||||
| 				case 'b': dtb_file_name = (++i<argc)?argv[i]:0; break; | ||||
| 				case 'l': param_continue = 1; fixed_update = 1; break; | ||||
| 				case 'p': param_continue = 1; do_sleep = 0; break; | ||||
| 				case 's': param_continue = 1; single_step = 1; break; | ||||
| 				case 'd': param_continue = 1; fail_on_all_faults = 1; break;  | ||||
| 				case 't': if( ++i < argc ) time_divisor = SimpleReadNumberInt( argv[i], 1 ); break; | ||||
| 				default: | ||||
| 					if( param_continue ) | ||||
| 						param_continue = 0; | ||||
| 					else | ||||
| 						show_help = 1; | ||||
| 					break; | ||||
| 				} | ||||
| 			} | ||||
| 			else | ||||
| 			{ | ||||
| 				show_help = 1; | ||||
| 				break; | ||||
| 			} | ||||
| 			param++; | ||||
| 		} while( param_continue ); | ||||
| 	} | ||||
| 	if( show_help || image_file_name == 0 || time_divisor <= 0 ) | ||||
| 	{ | ||||
| 		fprintf( stderr, "./mini-rv32imaf [parameters]\n\t-m [ram amount]\n\t-f [running image]\n\t-k [kernel command line]\n\t-b [dtb file, or 'disable']\n\t-c instruction count\n\t-s single step with full processor state\n\t-t time divion base\n\t-l lock time base to instruction count\n\t-p disable sleep when wfi\n\t-d fail out immediately on all faults\n" ); | ||||
| 		return 1; | ||||
| 	} | ||||
|  | ||||
| 	ram_image = malloc( ram_amt ); | ||||
| 	if( !ram_image ) | ||||
| 	{ | ||||
| 		fprintf( stderr, "Error: could not allocate system image.\n" ); | ||||
| 		return -4; | ||||
| 	} | ||||
|  | ||||
| restart: | ||||
| 	{ | ||||
| 		FILE * f = fopen( image_file_name, "rb" ); | ||||
| 		if( !f || ferror( f ) ) | ||||
| 		{ | ||||
| 			fprintf( stderr, "Error: \"%s\" not found\n", image_file_name ); | ||||
| 			return -5; | ||||
| 		} | ||||
| 		fseek( f, 0, SEEK_END ); | ||||
| 		long flen = ftell( f ); | ||||
| 		fseek( f, 0, SEEK_SET ); | ||||
| 		if( flen > ram_amt ) | ||||
| 		{ | ||||
| 			fprintf( stderr, "Error: Could not fit RAM image (%ld bytes) into %d\n", flen, ram_amt ); | ||||
| 			return -6; | ||||
| 		} | ||||
|  | ||||
| 		memset( ram_image, 0, ram_amt ); | ||||
| 		if( fread( ram_image, flen, 1, f ) != 1) | ||||
| 		{ | ||||
| 			fprintf( stderr, "Error: Could not load image.\n" ); | ||||
| 			return -7; | ||||
| 		} | ||||
| 		fclose( f ); | ||||
|  | ||||
| 		if( dtb_file_name ) | ||||
| 		{ | ||||
| 			if( strcmp( dtb_file_name, "disable" ) == 0 ) | ||||
| 			{ | ||||
| 				// No DTB reading. | ||||
| 			} | ||||
| 			else | ||||
| 			{ | ||||
| 				f = fopen( dtb_file_name, "rb" ); | ||||
| 				if( !f || ferror( f ) ) | ||||
| 				{ | ||||
| 					fprintf( stderr, "Error: \"%s\" not found\n", dtb_file_name ); | ||||
| 					return -5; | ||||
| 				} | ||||
| 				fseek( f, 0, SEEK_END ); | ||||
| 				long dtblen = ftell( f ); | ||||
| 				fseek( f, 0, SEEK_SET ); | ||||
| 				dtb_ptr = ram_amt - dtblen - sizeof( struct MiniRV32IMAState ); | ||||
| 				if( fread( ram_image + dtb_ptr, dtblen, 1, f ) != 1 ) | ||||
| 				{ | ||||
| 					fprintf( stderr, "Error: Could not open dtb \"%s\"\n", dtb_file_name ); | ||||
| 					return -9; | ||||
| 				} | ||||
| 				fclose( f ); | ||||
| 			} | ||||
| 		} | ||||
| 		else | ||||
| 		{ | ||||
| 			// Load a default dtb. | ||||
| 			dtb_ptr = ram_amt - sizeof(default64mbdtb) - sizeof( struct MiniRV32IMAState ); | ||||
| 			memcpy( ram_image + dtb_ptr, default64mbdtb, sizeof( default64mbdtb ) ); | ||||
| 			if( kernel_command_line ) | ||||
| 			{ | ||||
| 				strncpy( (char*)( ram_image + dtb_ptr + 0xc0 ), kernel_command_line, 54 ); | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	CaptureKeyboardInput(); | ||||
|  | ||||
| 	// The core lives at the end of RAM. | ||||
| 	core = (struct MiniRV32IMAState *)(ram_image + ram_amt - sizeof( struct MiniRV32IMAState )); | ||||
| 	core->pc = MINIRV32_RAM_IMAGE_OFFSET; | ||||
| 	core->regs[10] = 0x00; //hart ID | ||||
| 	core->regs[11] = dtb_ptr?(dtb_ptr+MINIRV32_RAM_IMAGE_OFFSET):0; //dtb_pa (Must be valid pointer) (Should be pointer to dtb) | ||||
| 	core->extraflags |= 3; // Machine-mode. | ||||
|  | ||||
| 	if( dtb_file_name == 0 ) | ||||
| 	{ | ||||
| 		// Update system ram size in DTB (but if and only if we're using the default DTB) | ||||
| 		// Warning - this will need to be updated if the skeleton DTB is ever modified. | ||||
| 		uint32_t * dtb = (uint32_t*)(ram_image + dtb_ptr); | ||||
| 		if( dtb[0x13c/4] == 0x00c0ff03 ) | ||||
| 		{ | ||||
| 			uint32_t validram = dtb_ptr; | ||||
| 			dtb[0x13c/4] = (validram>>24) | ((( validram >> 16 ) & 0xff) << 8 ) | (((validram>>8) & 0xff ) << 16 ) | ( ( validram & 0xff) << 24 ); | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	// Image is loaded. | ||||
| 	uint64_t rt; | ||||
| 	uint64_t lastTime = (fixed_update)?0:(GetTimeMicroseconds()/time_divisor); | ||||
| 	int instrs_per_flip = single_step?1:1024; | ||||
| 	for( rt = 0; rt < instct+1 || instct < 0; rt += instrs_per_flip ) | ||||
| 	{ | ||||
| 		uint64_t * this_ccount = ((uint64_t*)&core->cyclel); | ||||
| 		uint32_t elapsedUs = 0; | ||||
| 		if( fixed_update ) | ||||
| 			elapsedUs = *this_ccount / time_divisor - lastTime; | ||||
| 		else | ||||
| 			elapsedUs = GetTimeMicroseconds()/time_divisor - lastTime; | ||||
| 		lastTime += elapsedUs; | ||||
|  | ||||
| 		if( single_step ) | ||||
| 			DumpState( core, ram_image); | ||||
|  | ||||
| 		int ret = MiniRV32IMAStep( core, ram_image, 0, elapsedUs, instrs_per_flip ); // Execute upto 1024 cycles before breaking out. | ||||
| 		switch( ret ) | ||||
| 		{ | ||||
| 			case 0: break; | ||||
| 			case 1: if( do_sleep ) MiniSleep(); *this_ccount += instrs_per_flip; break; | ||||
| 			case 3: instct = 0; break; | ||||
| 			case 0x7777: goto restart;	//syscon code for restart | ||||
| 			case 0x5555: printf( "POWEROFF@0x%08x%08x\n", core->cycleh, core->cyclel ); return 0; //syscon code for power-off | ||||
| 			default: printf( "Unknown failure\n" ); break; | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	DumpState( core, ram_image); | ||||
| } | ||||
|  | ||||
|  | ||||
| ////////////////////////////////////////////////////////////////////////// | ||||
| // Platform-specific functionality | ||||
| ////////////////////////////////////////////////////////////////////////// | ||||
|  | ||||
|  | ||||
| #if defined(WINDOWS) || defined(WIN32) || defined(_WIN32) | ||||
|  | ||||
| #include <windows.h> | ||||
| #include <conio.h> | ||||
|  | ||||
| #define strtoll _strtoi64 | ||||
|  | ||||
| static void CaptureKeyboardInput() | ||||
| { | ||||
| 	system(""); // Poorly documented tick: Enable VT100 Windows mode. | ||||
| } | ||||
|  | ||||
| static void ResetKeyboardInput() | ||||
| { | ||||
| } | ||||
|  | ||||
| static void MiniSleep() | ||||
| { | ||||
| 	Sleep(1); | ||||
| } | ||||
|  | ||||
| static uint64_t GetTimeMicroseconds() | ||||
| { | ||||
| 	static LARGE_INTEGER lpf; | ||||
| 	LARGE_INTEGER li; | ||||
|  | ||||
| 	if( !lpf.QuadPart ) | ||||
| 		QueryPerformanceFrequency( &lpf ); | ||||
|  | ||||
| 	QueryPerformanceCounter( &li ); | ||||
| 	return ((uint64_t)li.QuadPart * 1000000LL) / (uint64_t)lpf.QuadPart; | ||||
| } | ||||
|  | ||||
|  | ||||
| static int IsKBHit() | ||||
| { | ||||
| 	return _kbhit(); | ||||
| } | ||||
|  | ||||
| static int ReadKBByte() | ||||
| { | ||||
| 	// This code is kind of tricky, but used to convert windows arrow keys | ||||
| 	// to VT100 arrow keys. | ||||
| 	static int is_escape_sequence = 0; | ||||
| 	int r; | ||||
| 	if( is_escape_sequence == 1 ) | ||||
| 	{ | ||||
| 		is_escape_sequence++; | ||||
| 		return '['; | ||||
| 	} | ||||
|  | ||||
| 	r = _getch(); | ||||
|  | ||||
| 	if( is_escape_sequence ) | ||||
| 	{ | ||||
| 		is_escape_sequence = 0; | ||||
| 		switch( r ) | ||||
| 		{ | ||||
| 			case 'H': return 'A'; // Up | ||||
| 			case 'P': return 'B'; // Down | ||||
| 			case 'K': return 'D'; // Left | ||||
| 			case 'M': return 'C'; // Right | ||||
| 			case 'G': return 'H'; // Home | ||||
| 			case 'O': return 'F'; // End | ||||
| 			default: return r; // Unknown code. | ||||
| 		} | ||||
| 	} | ||||
| 	else | ||||
| 	{ | ||||
| 		switch( r ) | ||||
| 		{ | ||||
| 			case 13: return 10; //cr->lf | ||||
| 			case 224: is_escape_sequence = 1; return 27; // Escape arrow keys | ||||
| 			default: return r; | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
|  | ||||
| #else | ||||
|  | ||||
| #include <sys/ioctl.h> | ||||
| #include <termios.h> | ||||
| #include <unistd.h> | ||||
| #include <signal.h> | ||||
| #include <sys/time.h> | ||||
|  | ||||
| static void CtrlC() | ||||
| { | ||||
| 	DumpState( core, ram_image); | ||||
| 	exit( 0 ); | ||||
| } | ||||
|  | ||||
| // Override keyboard, so we can capture all keyboard input for the VM. | ||||
| static void CaptureKeyboardInput() | ||||
| { | ||||
| 	// Hook exit, because we want to re-enable keyboard. | ||||
| 	atexit(ResetKeyboardInput); | ||||
| 	signal(SIGINT, CtrlC); | ||||
|  | ||||
| 	struct termios term; | ||||
| 	tcgetattr(0, &term); | ||||
| 	term.c_lflag &= ~(ICANON | ECHO); // Disable echo as well | ||||
| 	tcsetattr(0, TCSANOW, &term); | ||||
| } | ||||
|  | ||||
| static void ResetKeyboardInput() | ||||
| { | ||||
| 	// Re-enable echo, etc. on keyboard. | ||||
| 	struct termios term; | ||||
| 	tcgetattr(0, &term); | ||||
| 	term.c_lflag |= ICANON | ECHO; | ||||
| 	tcsetattr(0, TCSANOW, &term); | ||||
| } | ||||
|  | ||||
| static void MiniSleep() | ||||
| { | ||||
| 	usleep(500); | ||||
| } | ||||
|  | ||||
| static uint64_t GetTimeMicroseconds() | ||||
| { | ||||
| 	struct timeval tv; | ||||
| 	gettimeofday( &tv, 0 ); | ||||
| 	return tv.tv_usec + ((uint64_t)(tv.tv_sec)) * 1000000LL; | ||||
| } | ||||
|  | ||||
| static int is_eofd; | ||||
|  | ||||
| static int ReadKBByte() | ||||
| { | ||||
| 	if( is_eofd ) return 0xffffffff; | ||||
| 	char rxchar = 0; | ||||
| 	int rread = read(fileno(stdin), (char*)&rxchar, 1); | ||||
|  | ||||
| 	if( rread > 0 ) // Tricky: getchar can't be used with arrow keys. | ||||
| 		return rxchar; | ||||
| 	else | ||||
| 		return -1; | ||||
| } | ||||
|  | ||||
| static int IsKBHit() | ||||
| { | ||||
| 	if( is_eofd ) return -1; | ||||
| 	int byteswaiting; | ||||
| 	ioctl(0, FIONREAD, &byteswaiting); | ||||
| 	if( !byteswaiting && write( fileno(stdin), 0, 0 ) != 0 ) { is_eofd = 1; return -1; } // Is end-of-file for  | ||||
| 	return !!byteswaiting; | ||||
| } | ||||
|  | ||||
|  | ||||
| #endif | ||||
|  | ||||
|  | ||||
| ////////////////////////////////////////////////////////////////////////// | ||||
| // Rest of functions functionality | ||||
| ////////////////////////////////////////////////////////////////////////// | ||||
|  | ||||
| static uint32_t HandleException( uint32_t ir, uint32_t code ) | ||||
| { | ||||
| 	// Weird opcode emitted by duktape on exit. | ||||
| 	if( code == 3 ) | ||||
| 	{ | ||||
| 		// Could handle other opcodes here. | ||||
| 	} | ||||
| 	return code; | ||||
| } | ||||
|  | ||||
| static uint32_t HandleControlStore( uint32_t addy, uint32_t val ) | ||||
| { | ||||
| 	if( addy == 0x10000000 ) //UART 8250 / 16550 Data Buffer | ||||
| 	{ | ||||
| 		printf( "%c", val ); | ||||
| 		fflush( stdout ); | ||||
| 	} | ||||
| 	else if( addy == 0x11004004 ) //CLNT | ||||
| 		core->timermatchh = val; | ||||
| 	else if( addy == 0x11004000 ) //CLNT | ||||
| 		core->timermatchl = val; | ||||
| 	else if( addy == 0x11100000 ) //SYSCON (reboot, poweroff, etc.) | ||||
| 	{ | ||||
| 		core->pc = core->pc + 4; | ||||
| 		return val; // NOTE: PC will be PC of Syscon. | ||||
| 	} | ||||
| 	return 0; | ||||
| } | ||||
|  | ||||
|  | ||||
| static uint32_t HandleControlLoad( uint32_t addy ) | ||||
| { | ||||
| 	// Emulating a 8250 / 16550 UART | ||||
| 	if( addy == 0x10000005 ) | ||||
| 		return 0x60 | IsKBHit(); | ||||
| 	else if( addy == 0x10000000 && IsKBHit() ) | ||||
| 		return ReadKBByte(); | ||||
| 	else if( addy == 0x1100bffc ) // https://chromitem-soc.readthedocs.io/en/latest/clint.html | ||||
| 		return core->timerh; | ||||
| 	else if( addy == 0x1100bff8 ) | ||||
| 		return core->timerl; | ||||
| 	return 0; | ||||
| } | ||||
|  | ||||
| static void HandleOtherCSRWrite( uint8_t * image, uint16_t csrno, uint32_t value ) | ||||
| { | ||||
| 	if( csrno == 0x136 ) | ||||
| 	{ | ||||
| 		printf( "%d", value ); fflush( stdout ); | ||||
| 	} | ||||
| 	if( csrno == 0x137 ) | ||||
| 	{ | ||||
| 		printf( "%08x", value ); fflush( stdout ); | ||||
| 	} | ||||
| 	else if( csrno == 0x138 ) | ||||
| 	{ | ||||
| 		//Print "string" | ||||
| 		uint32_t ptrstart = value - MINIRV32_RAM_IMAGE_OFFSET; | ||||
| 		uint32_t ptrend = ptrstart; | ||||
| 		if( ptrstart >= ram_amt ) | ||||
| 			printf( "DEBUG PASSED INVALID PTR (%08x)\n", value ); | ||||
| 		while( ptrend < ram_amt ) | ||||
| 		{ | ||||
| 			if( image[ptrend] == 0 ) break; | ||||
| 			ptrend++; | ||||
| 		} | ||||
| 		if( ptrend != ptrstart ) | ||||
| 			fwrite( image + ptrstart, ptrend - ptrstart, 1, stdout ); | ||||
| 	} | ||||
| 	else if( csrno == 0x139 ) | ||||
| 	{ | ||||
| 		putchar( value ); fflush( stdout ); | ||||
| 	} | ||||
| } | ||||
|  | ||||
| static int32_t HandleOtherCSRRead( uint8_t * image, uint16_t csrno ) | ||||
| { | ||||
| 	if( csrno == 0x140 ) | ||||
| 	{ | ||||
| 		if( !IsKBHit() ) return -1; | ||||
| 		return ReadKBByte(); | ||||
| 	} | ||||
| 	return 0; | ||||
| } | ||||
|  | ||||
| static int64_t SimpleReadNumberInt( const char * number, int64_t defaultNumber ) | ||||
| { | ||||
| 	if( !number || !number[0] ) return defaultNumber; | ||||
| 	int radix = 10; | ||||
| 	if( number[0] == '0' ) | ||||
| 	{ | ||||
| 		char nc = number[1]; | ||||
| 		number+=2; | ||||
| 		if( nc == 0 ) return 0; | ||||
| 		else if( nc == 'x' ) radix = 16; | ||||
| 		else if( nc == 'b' ) radix = 2; | ||||
| 		else { number--; radix = 8; } | ||||
| 	} | ||||
| 	char * endptr; | ||||
| 	uint64_t ret = strtoll( number, &endptr, radix ); | ||||
| 	if( endptr == number ) | ||||
| 	{ | ||||
| 		return defaultNumber; | ||||
| 	} | ||||
| 	else | ||||
| 	{ | ||||
| 		return ret; | ||||
| 	} | ||||
| } | ||||
|  | ||||
| static void DumpState( struct MiniRV32IMAState * core, uint8_t * ram_image ) | ||||
| { | ||||
| 	uint32_t pc = core->pc; | ||||
| 	uint32_t pc_offset = pc - MINIRV32_RAM_IMAGE_OFFSET; | ||||
| 	uint32_t ir = 0; | ||||
|  | ||||
| 	printf( "PC: %08x ", pc ); | ||||
| 	if( pc_offset >= 0 && pc_offset < ram_amt - 3 ) | ||||
| 	{ | ||||
| 		ir = *((uint32_t*)(&((uint8_t*)ram_image)[pc_offset])); | ||||
| 		printf( "[0x%08x] ", ir );  | ||||
| 	} | ||||
| 	else | ||||
| 		printf( "[xxxxxxxxxx] " );  | ||||
| 	uint32_t * regs = core->regs; | ||||
| 	printf( "Z:%08x ra:%08x sp:%08x gp:%08x tp:%08x t0:%08x t1:%08x t2:%08x s0:%08x s1:%08x a0:%08x a1:%08x a2:%08x a3:%08x a4:%08x a5:%08x ", | ||||
| 		regs[0], regs[1], regs[2], regs[3], regs[4], regs[5], regs[6], regs[7], | ||||
| 		regs[8], regs[9], regs[10], regs[11], regs[12], regs[13], regs[14], regs[15] ); | ||||
| 	printf( "a6:%08x a7:%08x s2:%08x s3:%08x s4:%08x s5:%08x s6:%08x s7:%08x s8:%08x s9:%08x s10:%08x s11:%08x t3:%08x t4:%08x t5:%08x t6:%08x\n", | ||||
| 		regs[16], regs[17], regs[18], regs[19], regs[20], regs[21], regs[22], regs[23], | ||||
| 		regs[24], regs[25], regs[26], regs[27], regs[28], regs[29], regs[30], regs[31] ); | ||||
| } | ||||
|  | ||||
							
								
								
									
										547
									
								
								test_rv_vm/mini-rv32ima.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										547
									
								
								test_rv_vm/mini-rv32ima.h
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,547 @@ | ||||
| // Copyright 2022 Charles Lohr, you may use this file or any portions herein under any of the BSD, MIT, or CC0 licenses. | ||||
|  | ||||
| #ifndef _MINI_RV32IMAH_H | ||||
| #define _MINI_RV32IMAH_H | ||||
|  | ||||
| /** | ||||
|     To use mini-rv32ima.h for the bare minimum, the following: | ||||
|  | ||||
| 	#define MINI_RV32_RAM_SIZE ram_amt | ||||
| 	#define MINIRV32_IMPLEMENTATION | ||||
|  | ||||
| 	#include "mini-rv32ima.h" | ||||
|  | ||||
| 	Though, that's not _that_ interesting. You probably want I/O! | ||||
|  | ||||
|  | ||||
| 	Notes: | ||||
| 		* There is a dedicated CLNT at 0x10000000. | ||||
| 		* There is free MMIO from there to 0x12000000. | ||||
| 		* You can put things like a UART, or whatever there. | ||||
| 		* Feel free to override any of the functionality with macros. | ||||
| */ | ||||
|  | ||||
| #ifndef MINIRV32WARN | ||||
| 	#define MINIRV32WARN( x... ); | ||||
| #endif | ||||
|  | ||||
| #ifndef MINIRV32_DECORATE | ||||
| 	#define MINIRV32_DECORATE static | ||||
| #endif | ||||
|  | ||||
| #ifndef MINIRV32_RAM_IMAGE_OFFSET | ||||
| 	#define MINIRV32_RAM_IMAGE_OFFSET  0x80000000 | ||||
| #endif | ||||
|  | ||||
| #ifndef MINIRV32_MMIO_RANGE | ||||
| 	#define MINIRV32_MMIO_RANGE(n)  (0x10000000 <= (n) && (n) < 0x12000000) | ||||
| #endif | ||||
|  | ||||
| #ifndef MINIRV32_POSTEXEC | ||||
| 	#define MINIRV32_POSTEXEC(...); | ||||
| #endif | ||||
|  | ||||
| #ifndef MINIRV32_HANDLE_MEM_STORE_CONTROL | ||||
| 	#define MINIRV32_HANDLE_MEM_STORE_CONTROL(...); | ||||
| #endif | ||||
|  | ||||
| #ifndef MINIRV32_HANDLE_MEM_LOAD_CONTROL | ||||
| 	#define MINIRV32_HANDLE_MEM_LOAD_CONTROL(...); | ||||
| #endif | ||||
|  | ||||
| #ifndef MINIRV32_OTHERCSR_WRITE | ||||
| 	#define MINIRV32_OTHERCSR_WRITE(...); | ||||
| #endif | ||||
|  | ||||
| #ifndef MINIRV32_OTHERCSR_READ | ||||
| 	#define MINIRV32_OTHERCSR_READ(...); | ||||
| #endif | ||||
|  | ||||
| #ifndef MINIRV32_CUSTOM_MEMORY_BUS | ||||
| 	#define MINIRV32_STORE4( ofs, val ) *(uint32_t*)(image + ofs) = val | ||||
| 	#define MINIRV32_STORE2( ofs, val ) *(uint16_t*)(image + ofs) = val | ||||
| 	#define MINIRV32_STORE1( ofs, val ) *(uint8_t*)(image + ofs) = val | ||||
| 	#define MINIRV32_LOAD4( ofs ) *(uint32_t*)(image + ofs) | ||||
| 	#define MINIRV32_LOAD2( ofs ) *(uint16_t*)(image + ofs) | ||||
| 	#define MINIRV32_LOAD1( ofs ) *(uint8_t*)(image + ofs) | ||||
| 	#define MINIRV32_LOAD2_SIGNED( ofs ) *(int16_t*)(image + ofs) | ||||
| 	#define MINIRV32_LOAD1_SIGNED( ofs ) *(int8_t*)(image + ofs) | ||||
| #endif | ||||
|  | ||||
| // As a note: We quouple-ify these, because in HLSL, we will be operating with | ||||
| // uint4's.  We are going to uint4 data to/from system RAM. | ||||
| // | ||||
| // We're going to try to keep the full processor state to 12 x uint4. | ||||
| struct MiniRV32IMAState | ||||
| { | ||||
| 	uint32_t regs[32]; | ||||
|  | ||||
| 	uint32_t pc; | ||||
| 	uint32_t mstatus; | ||||
| 	uint32_t cyclel; | ||||
| 	uint32_t cycleh; | ||||
|  | ||||
| 	uint32_t timerl; | ||||
| 	uint32_t timerh; | ||||
| 	uint32_t timermatchl; | ||||
| 	uint32_t timermatchh; | ||||
|  | ||||
| 	uint32_t mscratch; | ||||
| 	uint32_t mtvec; | ||||
| 	uint32_t mie; | ||||
| 	uint32_t mip; | ||||
|  | ||||
| 	uint32_t mepc; | ||||
| 	uint32_t mtval; | ||||
| 	uint32_t mcause; | ||||
|  | ||||
| 	// Note: only a few bits are used.  (Machine = 3, User = 0) | ||||
| 	// Bits 0..1 = privilege. | ||||
| 	// Bit 2 = WFI (Wait for interrupt) | ||||
| 	// Bit 3+ = Load/Store reservation LSBs. | ||||
| 	uint32_t extraflags; | ||||
| }; | ||||
|  | ||||
| #ifndef MINIRV32_STEPPROTO | ||||
| MINIRV32_DECORATE int32_t MiniRV32IMAStep( struct MiniRV32IMAState * state, uint8_t * image, uint32_t vProcAddress, uint32_t elapsedUs, int count ); | ||||
| #endif | ||||
|  | ||||
| #ifdef MINIRV32_IMPLEMENTATION | ||||
|  | ||||
| #ifndef MINIRV32_CUSTOM_INTERNALS | ||||
| #define CSR( x ) state->x | ||||
| #define SETCSR( x, val ) { state->x = val; } | ||||
| #define REG( x ) state->regs[x] | ||||
| #define REGSET( x, val ) { state->regs[x] = val; } | ||||
| #endif | ||||
|  | ||||
| #ifndef MINIRV32_STEPPROTO | ||||
| MINIRV32_DECORATE int32_t MiniRV32IMAStep( struct MiniRV32IMAState * state, uint8_t * image, uint32_t vProcAddress, uint32_t elapsedUs, int count ) | ||||
| #else | ||||
| MINIRV32_STEPPROTO | ||||
| #endif | ||||
| { | ||||
| 	uint32_t new_timer = CSR( timerl ) + elapsedUs; | ||||
| 	if( new_timer < CSR( timerl ) ) CSR( timerh )++; | ||||
| 	CSR( timerl ) = new_timer; | ||||
|  | ||||
| 	// Handle Timer interrupt. | ||||
| 	if( ( CSR( timerh ) > CSR( timermatchh ) || ( CSR( timerh ) == CSR( timermatchh ) && CSR( timerl ) > CSR( timermatchl ) ) ) && ( CSR( timermatchh ) || CSR( timermatchl ) ) ) | ||||
| 	{ | ||||
| 		CSR( extraflags ) &= ~4; // Clear WFI | ||||
| 		CSR( mip ) |= 1<<7; //MTIP of MIP // https://stackoverflow.com/a/61916199/2926815  Fire interrupt. | ||||
| 	} | ||||
| 	else | ||||
| 		CSR( mip ) &= ~(1<<7); | ||||
|  | ||||
| 	// If WFI, don't run processor. | ||||
| 	if( CSR( extraflags ) & 4 ) | ||||
| 		return 1; | ||||
|  | ||||
| 	uint32_t trap = 0; | ||||
| 	uint32_t rval = 0; | ||||
| 	uint32_t pc = CSR( pc ); | ||||
| 	uint32_t cycle = CSR( cyclel ); | ||||
|  | ||||
| 	if( ( CSR( mip ) & (1<<7) ) && ( CSR( mie ) & (1<<7) /*mtie*/ ) && ( CSR( mstatus ) & 0x8 /*mie*/) ) | ||||
| 	{ | ||||
| 		// Timer interrupt. | ||||
| 		trap = 0x80000007; | ||||
| 		pc -= 4; | ||||
| 	} | ||||
| 	else // No timer interrupt?  Execute a bunch of instructions. | ||||
| 	for( int icount = 0; icount < count; icount++ ) | ||||
| 	{ | ||||
| 		uint32_t ir = 0; | ||||
| 		rval = 0; | ||||
| 		cycle++; | ||||
| 		uint32_t ofs_pc = pc - MINIRV32_RAM_IMAGE_OFFSET; | ||||
|  | ||||
| 		if( ofs_pc >= MINI_RV32_RAM_SIZE ) | ||||
| 		{ | ||||
| 			trap = 1 + 1;  // Handle access violation on instruction read. | ||||
| 			break; | ||||
| 		} | ||||
| 		else if( ofs_pc & 3 ) | ||||
| 		{ | ||||
| 			trap = 1 + 0;  //Handle PC-misaligned access | ||||
| 			break; | ||||
| 		} | ||||
| 		else | ||||
| 		{ | ||||
| 			ir = MINIRV32_LOAD4( ofs_pc ); | ||||
| 			uint32_t rdid = (ir >> 7) & 0x1f; | ||||
|  | ||||
| 			switch( ir & 0x7f ) | ||||
| 			{ | ||||
| 				case 0x37: // LUI (0b0110111) | ||||
| 					rval = ( ir & 0xfffff000 ); | ||||
| 					break; | ||||
| 				case 0x17: // AUIPC (0b0010111) | ||||
| 					rval = pc + ( ir & 0xfffff000 ); | ||||
| 					break; | ||||
| 				case 0x6F: // JAL (0b1101111) | ||||
| 				{ | ||||
| 					int32_t reladdy = ((ir & 0x80000000)>>11) | ((ir & 0x7fe00000)>>20) | ((ir & 0x00100000)>>9) | ((ir&0x000ff000)); | ||||
| 					if( reladdy & 0x00100000 ) reladdy |= 0xffe00000; // Sign extension. | ||||
| 					rval = pc + 4; | ||||
| 					pc = pc + reladdy - 4; | ||||
| 					break; | ||||
| 				} | ||||
| 				case 0x67: // JALR (0b1100111) | ||||
| 				{ | ||||
| 					uint32_t imm = ir >> 20; | ||||
| 					int32_t imm_se = imm | (( imm & 0x800 )?0xfffff000:0); | ||||
| 					rval = pc + 4; | ||||
| 					pc = ( (REG( (ir >> 15) & 0x1f ) + imm_se) & ~1) - 4; | ||||
| 					break; | ||||
| 				} | ||||
| 				case 0x63: // Branch (0b1100011) | ||||
| 				{ | ||||
| 					uint32_t immm4 = ((ir & 0xf00)>>7) | ((ir & 0x7e000000)>>20) | ((ir & 0x80) << 4) | ((ir >> 31)<<12); | ||||
| 					if( immm4 & 0x1000 ) immm4 |= 0xffffe000; | ||||
| 					int32_t rs1 = REG((ir >> 15) & 0x1f); | ||||
| 					int32_t rs2 = REG((ir >> 20) & 0x1f); | ||||
| 					immm4 = pc + immm4 - 4; | ||||
| 					rdid = 0; | ||||
| 					switch( ( ir >> 12 ) & 0x7 ) | ||||
| 					{ | ||||
| 						// BEQ, BNE, BLT, BGE, BLTU, BGEU | ||||
| 						case 0: if( rs1 == rs2 ) pc = immm4; break; | ||||
| 						case 1: if( rs1 != rs2 ) pc = immm4; break; | ||||
| 						case 4: if( rs1 < rs2 ) pc = immm4; break; | ||||
| 						case 5: if( rs1 >= rs2 ) pc = immm4; break; //BGE | ||||
| 						case 6: if( (uint32_t)rs1 < (uint32_t)rs2 ) pc = immm4; break;   //BLTU | ||||
| 						case 7: if( (uint32_t)rs1 >= (uint32_t)rs2 ) pc = immm4; break;  //BGEU | ||||
| 						default: trap = (2+1); | ||||
| 					} | ||||
| 					break; | ||||
| 				} | ||||
| 				case 0x03: // Load (0b0000011) | ||||
| 				{ | ||||
| 					uint32_t rs1 = REG((ir >> 15) & 0x1f); | ||||
| 					uint32_t imm = ir >> 20; | ||||
| 					int32_t imm_se = imm | (( imm & 0x800 )?0xfffff000:0); | ||||
| 					uint32_t rsval = rs1 + imm_se; | ||||
|  | ||||
| 					rsval -= MINIRV32_RAM_IMAGE_OFFSET; | ||||
| 					if( rsval >= MINI_RV32_RAM_SIZE-3 ) | ||||
| 					{ | ||||
| 						rsval += MINIRV32_RAM_IMAGE_OFFSET; | ||||
| 						if( MINIRV32_MMIO_RANGE( rsval ) )  // UART, CLNT | ||||
| 						{ | ||||
| 							MINIRV32_HANDLE_MEM_LOAD_CONTROL( rsval, rval ); | ||||
| 						} | ||||
| 						else | ||||
| 						{ | ||||
| 							trap = (5+1); | ||||
| 							rval = rsval; | ||||
| 						} | ||||
| 					} | ||||
| 					else | ||||
| 					{ | ||||
| 						switch( ( ir >> 12 ) & 0x7 ) | ||||
| 						{ | ||||
| 							//LB, LH, LW, LBU, LHU | ||||
| 							case 0: rval = MINIRV32_LOAD1_SIGNED( rsval ); break; | ||||
| 							case 1: rval = MINIRV32_LOAD2_SIGNED( rsval ); break; | ||||
| 							case 2: rval = MINIRV32_LOAD4( rsval ); break; | ||||
| 							case 4: rval = MINIRV32_LOAD1( rsval ); break; | ||||
| 							case 5: rval = MINIRV32_LOAD2( rsval ); break; | ||||
| 							default: trap = (2+1); | ||||
| 						} | ||||
| 					} | ||||
| 					break; | ||||
| 				} | ||||
| 				case 0x23: // Store 0b0100011 | ||||
| 				{ | ||||
| 					uint32_t rs1 = REG((ir >> 15) & 0x1f); | ||||
| 					uint32_t rs2 = REG((ir >> 20) & 0x1f); | ||||
| 					uint32_t addy = ( ( ir >> 7 ) & 0x1f ) | ( ( ir & 0xfe000000 ) >> 20 ); | ||||
| 					if( addy & 0x800 ) addy |= 0xfffff000; | ||||
| 					addy += rs1 - MINIRV32_RAM_IMAGE_OFFSET; | ||||
| 					rdid = 0; | ||||
|  | ||||
| 					if( addy >= MINI_RV32_RAM_SIZE-3 ) | ||||
| 					{ | ||||
| 						addy += MINIRV32_RAM_IMAGE_OFFSET; | ||||
| 						if( MINIRV32_MMIO_RANGE( addy ) ) | ||||
| 						{ | ||||
| 							MINIRV32_HANDLE_MEM_STORE_CONTROL( addy, rs2 ); | ||||
| 						} | ||||
| 						else | ||||
| 						{ | ||||
| 							trap = (7+1); // Store access fault. | ||||
| 							rval = addy; | ||||
| 						} | ||||
| 					} | ||||
| 					else | ||||
| 					{ | ||||
| 						switch( ( ir >> 12 ) & 0x7 ) | ||||
| 						{ | ||||
| 							//SB, SH, SW | ||||
| 							case 0: MINIRV32_STORE1( addy, rs2 ); break; | ||||
| 							case 1: MINIRV32_STORE2( addy, rs2 ); break; | ||||
| 							case 2: MINIRV32_STORE4( addy, rs2 ); break; | ||||
| 							default: trap = (2+1); | ||||
| 						} | ||||
| 					} | ||||
| 					break; | ||||
| 				} | ||||
| 				case 0x13: // Op-immediate 0b0010011 | ||||
| 				case 0x33: // Op           0b0110011 | ||||
| 				{ | ||||
| 					uint32_t imm = ir >> 20; | ||||
| 					imm = imm | (( imm & 0x800 )?0xfffff000:0); | ||||
| 					uint32_t rs1 = REG((ir >> 15) & 0x1f); | ||||
| 					uint32_t is_reg = !!( ir & 0x20 ); | ||||
| 					uint32_t rs2 = is_reg ? REG(imm & 0x1f) : imm; | ||||
|  | ||||
| 					if( is_reg && ( ir & 0x02000000 ) ) | ||||
| 					{ | ||||
| 						switch( (ir>>12)&7 ) //0x02000000 = RV32M | ||||
| 						{ | ||||
| 							case 0: rval = rs1 * rs2; break; // MUL | ||||
| #ifndef CUSTOM_MULH // If compiling on a system that doesn't natively, or via libgcc support 64-bit math. | ||||
| 							case 1: rval = ((int64_t)((int32_t)rs1) * (int64_t)((int32_t)rs2)) >> 32; break; // MULH | ||||
| 							case 2: rval = ((int64_t)((int32_t)rs1) * (uint64_t)rs2) >> 32; break; // MULHSU | ||||
| 							case 3: rval = ((uint64_t)rs1 * (uint64_t)rs2) >> 32; break; // MULHU | ||||
| #else | ||||
| 							CUSTOM_MULH | ||||
| #endif | ||||
| 							case 4: if( rs2 == 0 ) rval = -1; else rval = ((int32_t)rs1 == INT32_MIN && (int32_t)rs2 == -1) ? rs1 : ((int32_t)rs1 / (int32_t)rs2); break; // DIV | ||||
| 							case 5: if( rs2 == 0 ) rval = 0xffffffff; else rval = rs1 / rs2; break; // DIVU | ||||
| 							case 6: if( rs2 == 0 ) rval = rs1; else rval = ((int32_t)rs1 == INT32_MIN && (int32_t)rs2 == -1) ? 0 : ((uint32_t)((int32_t)rs1 % (int32_t)rs2)); break; // REM | ||||
| 							case 7: if( rs2 == 0 ) rval = rs1; else rval = rs1 % rs2; break; // REMU | ||||
| 						} | ||||
| 					} | ||||
| 					else | ||||
| 					{ | ||||
| 						switch( (ir>>12)&7 ) // These could be either op-immediate or op commands.  Be careful. | ||||
| 						{ | ||||
| 							case 0: rval = (is_reg && (ir & 0x40000000) ) ? ( rs1 - rs2 ) : ( rs1 + rs2 ); break;  | ||||
| 							case 1: rval = rs1 << (rs2 & 0x1F); break; | ||||
| 							case 2: rval = (int32_t)rs1 < (int32_t)rs2; break; | ||||
| 							case 3: rval = rs1 < rs2; break; | ||||
| 							case 4: rval = rs1 ^ rs2; break; | ||||
| 							case 5: rval = (ir & 0x40000000 ) ? ( ((int32_t)rs1) >> (rs2 & 0x1F) ) : ( rs1 >> (rs2 & 0x1F) ); break; | ||||
| 							case 6: rval = rs1 | rs2; break; | ||||
| 							case 7: rval = rs1 & rs2; break; | ||||
| 						} | ||||
| 					} | ||||
| 					break; | ||||
| 				} | ||||
| 				case 0x0f: // 0b0001111 | ||||
| 					rdid = 0;   // fencetype = (ir >> 12) & 0b111; We ignore fences in this impl. | ||||
| 					break; | ||||
| 				case 0x73: // Zifencei+Zicsr  (0b1110011) | ||||
| 				{ | ||||
| 					uint32_t csrno = ir >> 20; | ||||
| 					uint32_t microop = ( ir >> 12 ) & 0x7; | ||||
| 					if( (microop & 3) ) // It's a Zicsr function. | ||||
| 					{ | ||||
| 						int rs1imm = (ir >> 15) & 0x1f; | ||||
| 						uint32_t rs1 = REG(rs1imm); | ||||
| 						uint32_t writeval = rs1; | ||||
|  | ||||
| 						// https://raw.githubusercontent.com/riscv/virtual-memory/main/specs/663-Svpbmt.pdf | ||||
| 						// Generally, support for Zicsr | ||||
| 						switch( csrno ) | ||||
| 						{ | ||||
| 						case 0x340: rval = CSR( mscratch ); break; | ||||
| 						case 0x305: rval = CSR( mtvec ); break; | ||||
| 						case 0x304: rval = CSR( mie ); break; | ||||
| 						case 0xC00: rval = cycle; break; | ||||
| 						case 0x344: rval = CSR( mip ); break; | ||||
| 						case 0x341: rval = CSR( mepc ); break; | ||||
| 						case 0x300: rval = CSR( mstatus ); break; //mstatus | ||||
| 						case 0x342: rval = CSR( mcause ); break; | ||||
| 						case 0x343: rval = CSR( mtval ); break; | ||||
| 						case 0xf11: rval = 0xff0ff0ff; break; //mvendorid | ||||
| 						case 0x301: rval = 0x40401101; break; //misa (XLEN=32, IMA+X) | ||||
| 						//case 0x3B0: rval = 0; break; //pmpaddr0 | ||||
| 						//case 0x3a0: rval = 0; break; //pmpcfg0 | ||||
| 						//case 0xf12: rval = 0x00000000; break; //marchid | ||||
| 						//case 0xf13: rval = 0x00000000; break; //mimpid | ||||
| 						//case 0xf14: rval = 0x00000000; break; //mhartid | ||||
| 						default: | ||||
| 							MINIRV32_OTHERCSR_READ( csrno, rval ); | ||||
| 							break; | ||||
| 						} | ||||
|  | ||||
| 						switch( microop ) | ||||
| 						{ | ||||
| 							case 1: writeval = rs1; break;  			//CSRRW | ||||
| 							case 2: writeval = rval | rs1; break;		//CSRRS | ||||
| 							case 3: writeval = rval & ~rs1; break;		//CSRRC | ||||
| 							case 5: writeval = rs1imm; break;			//CSRRWI | ||||
| 							case 6: writeval = rval | rs1imm; break;	//CSRRSI | ||||
| 							case 7: writeval = rval & ~rs1imm; break;	//CSRRCI | ||||
| 						} | ||||
|  | ||||
| 						switch( csrno ) | ||||
| 						{ | ||||
| 						case 0x340: SETCSR( mscratch, writeval ); break; | ||||
| 						case 0x305: SETCSR( mtvec, writeval ); break; | ||||
| 						case 0x304: SETCSR( mie, writeval ); break; | ||||
| 						case 0x344: SETCSR( mip, writeval ); break; | ||||
| 						case 0x341: SETCSR( mepc, writeval ); break; | ||||
| 						case 0x300: SETCSR( mstatus, writeval ); break; //mstatus | ||||
| 						case 0x342: SETCSR( mcause, writeval ); break; | ||||
| 						case 0x343: SETCSR( mtval, writeval ); break; | ||||
| 						//case 0x3a0: break; //pmpcfg0 | ||||
| 						//case 0x3B0: break; //pmpaddr0 | ||||
| 						//case 0xf11: break; //mvendorid | ||||
| 						//case 0xf12: break; //marchid | ||||
| 						//case 0xf13: break; //mimpid | ||||
| 						//case 0xf14: break; //mhartid | ||||
| 						//case 0x301: break; //misa | ||||
| 						default: | ||||
| 							MINIRV32_OTHERCSR_WRITE( csrno, writeval ); | ||||
| 							break; | ||||
| 						} | ||||
| 					} | ||||
| 					else if( microop == 0x0 ) // "SYSTEM" 0b000 | ||||
| 					{ | ||||
| 						rdid = 0; | ||||
| 						if( ( ( csrno & 0xff ) == 0x02 ) )  // MRET | ||||
| 						{ | ||||
| 							//https://raw.githubusercontent.com/riscv/virtual-memory/main/specs/663-Svpbmt.pdf | ||||
| 							//Table 7.6. MRET then in mstatus/mstatush sets MPV=0, MPP=0, MIE=MPIE, and MPIE=1. La | ||||
| 							// Should also update mstatus to reflect correct mode. | ||||
| 							uint32_t startmstatus = CSR( mstatus ); | ||||
| 							uint32_t startextraflags = CSR( extraflags ); | ||||
| 							SETCSR( mstatus , (( startmstatus & 0x80) >> 4) | ((startextraflags&3) << 11) | 0x80 ); | ||||
| 							SETCSR( extraflags, (startextraflags & ~3) | ((startmstatus >> 11) & 3) ); | ||||
| 							pc = CSR( mepc ) -4; | ||||
| 						} else { | ||||
| 							switch (csrno) { | ||||
| 							case 0: | ||||
| 								#ifndef ECALL_HANDLER | ||||
| 								trap = ( CSR( extraflags ) & 3) ? (11+1) : (8+1); // ECALL; 8 = "Environment call from U-mode"; 11 = "Environment call from M-mode" | ||||
| 								#else | ||||
| 								ECALL_HANDLER(state); | ||||
| 								trap = 0; | ||||
| 								#endif | ||||
| 								break; | ||||
| 							case 1: | ||||
| 								trap = (3+1); break; // EBREAK 3 = "Breakpoint" | ||||
| 							case 0x105: //WFI (Wait for interrupts) | ||||
| 								CSR( mstatus ) |= 8;    //Enable interrupts | ||||
| 								CSR( extraflags ) |= 4; //Infor environment we want to go to sleep. | ||||
| 								SETCSR( pc, pc + 4 ); | ||||
| 								return 1; | ||||
| 							default: | ||||
| 								trap = (2+1); break; // Illegal opcode. | ||||
| 							} | ||||
| 						} | ||||
| 					} | ||||
| 					else | ||||
| 						trap = (2+1); 				// Note micrrop 0b100 == undefined. | ||||
| 					break; | ||||
| 				} | ||||
| 				case 0x2f: // RV32A (0b00101111) | ||||
| 				{ | ||||
| 					uint32_t rs1 = REG((ir >> 15) & 0x1f); | ||||
| 					uint32_t rs2 = REG((ir >> 20) & 0x1f); | ||||
| 					uint32_t irmid = ( ir>>27 ) & 0x1f; | ||||
|  | ||||
| 					rs1 -= MINIRV32_RAM_IMAGE_OFFSET; | ||||
|  | ||||
| 					// We don't implement load/store from UART or CLNT with RV32A here. | ||||
|  | ||||
| 					if( rs1 >= MINI_RV32_RAM_SIZE-3 ) | ||||
| 					{ | ||||
| 						trap = (7+1); //Store/AMO access fault | ||||
| 						rval = rs1 + MINIRV32_RAM_IMAGE_OFFSET; | ||||
| 					} | ||||
| 					else | ||||
| 					{ | ||||
| 						rval = MINIRV32_LOAD4( rs1 ); | ||||
|  | ||||
| 						// Referenced a little bit of https://github.com/franzflasch/riscv_em/blob/master/src/core/core.c | ||||
| 						uint32_t dowrite = 1; | ||||
| 						switch( irmid ) | ||||
| 						{ | ||||
| 							case 2: //LR.W (0b00010) | ||||
| 								dowrite = 0; | ||||
| 								CSR( extraflags ) = (CSR( extraflags ) & 0x07) | (rs1<<3); | ||||
| 								break; | ||||
| 							case 3:  //SC.W (0b00011) (Make sure we have a slot, and, it's valid) | ||||
| 								rval = ( CSR( extraflags ) >> 3 != ( rs1 & 0x1fffffff ) );  // Validate that our reservation slot is OK. | ||||
| 								dowrite = !rval; // Only write if slot is valid. | ||||
| 								break; | ||||
| 							case 1: break; //AMOSWAP.W (0b00001) | ||||
| 							case 0: rs2 += rval; break; //AMOADD.W (0b00000) | ||||
| 							case 4: rs2 ^= rval; break; //AMOXOR.W (0b00100) | ||||
| 							case 12: rs2 &= rval; break; //AMOAND.W (0b01100) | ||||
| 							case 8: rs2 |= rval; break; //AMOOR.W (0b01000) | ||||
| 							case 16: rs2 = ((int32_t)rs2<(int32_t)rval)?rs2:rval; break; //AMOMIN.W (0b10000) | ||||
| 							case 20: rs2 = ((int32_t)rs2>(int32_t)rval)?rs2:rval; break; //AMOMAX.W (0b10100) | ||||
| 							case 24: rs2 = (rs2<rval)?rs2:rval; break; //AMOMINU.W (0b11000) | ||||
| 							case 28: rs2 = (rs2>rval)?rs2:rval; break; //AMOMAXU.W (0b11100) | ||||
| 							default: trap = (2+1); dowrite = 0; break; //Not supported. | ||||
| 						} | ||||
| 						if( dowrite ) MINIRV32_STORE4( rs1, rs2 ); | ||||
| 					} | ||||
| 					break; | ||||
| 				} | ||||
| 				default: trap = (2+1); // Fault: Invalid opcode. | ||||
| 			} | ||||
|  | ||||
| 			// If there was a trap, do NOT allow register writeback. | ||||
| 			if( trap ) { | ||||
| 				SETCSR( pc, pc ); | ||||
| 				MINIRV32_POSTEXEC( pc, ir, trap ); | ||||
| 				break; | ||||
| 			} | ||||
|  | ||||
| 			if( rdid ) | ||||
| 			{ | ||||
| 				REGSET( rdid, rval ); // Write back register. | ||||
| 			} | ||||
| 		} | ||||
|  | ||||
| 		MINIRV32_POSTEXEC( pc, ir, trap ); | ||||
|  | ||||
| 		pc += 4; | ||||
| 	} | ||||
|  | ||||
| 	// Handle traps and interrupts. | ||||
| 	if( trap ) | ||||
| 	{ | ||||
| 		if( trap & 0x80000000 ) // If prefixed with 1 in MSB, it's an interrupt, not a trap. | ||||
| 		{ | ||||
| 			SETCSR( mcause, trap ); | ||||
| 			SETCSR( mtval, 0 ); | ||||
| 			pc += 4; // PC needs to point to where the PC will return to. | ||||
| 		} | ||||
| 		else | ||||
| 		{ | ||||
| 			SETCSR( mcause,  trap - 1 ); | ||||
| 			SETCSR( mtval, (trap > 5 && trap <= 8)? rval : pc ); | ||||
| 		} | ||||
| 		SETCSR( mepc, pc ); //TRICKY: The kernel advances mepc automatically. | ||||
| 		//CSR( mstatus ) & 8 = MIE, & 0x80 = MPIE | ||||
| 		// On an interrupt, the system moves current MIE into MPIE | ||||
| 		SETCSR( mstatus, (( CSR( mstatus ) & 0x08) << 4) | (( CSR( extraflags ) & 3 ) << 11) ); | ||||
| 		pc = (CSR( mtvec ) - 4); | ||||
|  | ||||
| 		// If trapping, always enter machine mode. | ||||
| 		CSR( extraflags ) |= 3; | ||||
|  | ||||
| 		trap = 0; | ||||
| 		pc += 4; | ||||
| 	} | ||||
|  | ||||
| 	if( CSR( cyclel ) > cycle ) CSR( cycleh )++; | ||||
| 	SETCSR( cyclel, cycle ); | ||||
| 	SETCSR( pc, pc ); | ||||
| 	return 0; | ||||
| } | ||||
|  | ||||
| #endif | ||||
|  | ||||
| #endif | ||||
|  | ||||
|  | ||||
							
								
								
									
										139
									
								
								test_rv_vm/ripes-vm.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										139
									
								
								test_rv_vm/ripes-vm.c
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,139 @@ | ||||
| #include <stdio.h> | ||||
| #include <stdlib.h> | ||||
| #include <stdint.h> | ||||
| #include <string.h> | ||||
|  | ||||
| struct MiniRV32IMAState; | ||||
| void ecall_handler(struct MiniRV32IMAState *state); | ||||
| #define ECALL_HANDLER(state) ecall_handler(state) | ||||
| #define MINIRV32WARN( x... ) printf( x ); | ||||
| #define MINIRV32_DECORATE  static | ||||
| #define MINI_RV32_RAM_SIZE (32 * 1024 * 1024) | ||||
| #define MINIRV32_IMPLEMENTATION | ||||
|  | ||||
| #define MINIRV32_RAM_IMAGE_OFFSET  0x0 | ||||
| #include "mini-rv32ima.h" | ||||
|  | ||||
| #define SYSCALL(num) (1025 + num) | ||||
| void ecall_handler(struct MiniRV32IMAState *state) { | ||||
|     uint32_t a0 = REG(10); | ||||
|     uint32_t a1 = REG(11); | ||||
|     switch (state->regs[17]) // x17 | a7 | ||||
|     { | ||||
|     case 1: | ||||
|         // PrintInt | ||||
|         printf("%d", a0); | ||||
|         break; | ||||
|     case 4: | ||||
|         // PrintString | ||||
|         printf("%s", a0); | ||||
|         break; | ||||
|     case 10: | ||||
|         fprintf(stderr, "\nexit: %d\n", a0); | ||||
|         exit(a0); | ||||
|     case 93: | ||||
|         fprintf(stderr, "\nmain return code: %d\n", a0); | ||||
|         exit(a0); | ||||
|     case SYSCALL(0): | ||||
|         // getchar(); | ||||
|         REGSET(10, getchar()); | ||||
|     case SYSCALL(1): | ||||
|         // putchar | ||||
|         putchar(a0); | ||||
|  | ||||
|     case SYSCALL(4): | ||||
|         // input int | ||||
|         scanf("%d", &a0); | ||||
|         REGSET(10, a0); | ||||
|         break; | ||||
|     case SYSCALL(5): | ||||
|         // input string | ||||
|         scanf("%s", a0); | ||||
|         REGSET(10, a0); | ||||
|         break; | ||||
|     default: | ||||
|         MINIRV32WARN("Unhandled ECALL: %d\n", state->regs[17]); | ||||
|         exit(1); | ||||
|         break; | ||||
|     } | ||||
| } | ||||
|  | ||||
| int main(int argc, char *argv[]) { | ||||
|     // gcc -DDEFAULT_FILE='\"flat.bin\"' .\ripes-vm.c -o rv32-vm.exe | ||||
|     struct MiniRV32IMAState state; | ||||
|     uint8_t *image = (uint8_t *)malloc(MINI_RV32_RAM_SIZE); | ||||
|  | ||||
|     // 初始化状态 | ||||
|     memset(&state, 0, sizeof(state)); | ||||
|     state.pc = 0; // 程序计数器从0开始 | ||||
|     state.mstatus = 0x80000000; // 设置机器模式 | ||||
|     state.mtvec = 0x1000; | ||||
|     state.mie = 0x7; // 启用所有中断 | ||||
|  | ||||
|     // 初始化内存 | ||||
|     memset(image, 0, MINI_RV32_RAM_SIZE); | ||||
|  | ||||
|     #ifndef DEFAULT_FILE | ||||
|     #define DEFAULT_FILE "../ccompiler/backend/test_rv.bin" | ||||
|     #endif | ||||
|     const char* filename = DEFAULT_FILE; | ||||
|     // 加载 flatbin 文件 | ||||
|     if (argc == 2) { | ||||
|         filename = argv[1]; | ||||
|     } | ||||
|      | ||||
|     FILE *file = fopen(filename, "rb"); | ||||
|     if (!file) { | ||||
|         fprintf(stderr, "Usage: %s <flatbin_file>\n", argv[0]); | ||||
|         printf("Failed to open file %s\n", filename); | ||||
|         return 1; | ||||
|     } | ||||
|  | ||||
|     fseek(file, 0, SEEK_END); | ||||
|     long flen = ftell(file); | ||||
|     fseek(file, 0, SEEK_SET); | ||||
|  | ||||
|     if (flen > MINI_RV32_RAM_SIZE) { | ||||
|         fprintf(stderr, "Flatbin file is too large\n"); | ||||
|         fclose(file); | ||||
|         return 1; | ||||
|     } | ||||
|  | ||||
|     fread(image, flen, 1, file); | ||||
|     fclose(file); | ||||
|  | ||||
|     // 运行模拟器 | ||||
|     while (1) { | ||||
|         int32_t ret = MiniRV32IMAStep(&state, image, MINIRV32_RAM_IMAGE_OFFSET, 0, 1); | ||||
|         if (ret != 0) { | ||||
|             printf("Exception or interrupt occurred at PC: %d\n", state.pc); | ||||
|             return ret; | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     free(image); | ||||
|     return 0; | ||||
| } | ||||
|  | ||||
| // static void DumpState( struct MiniRV32IMAState * core, uint8_t * ram_image ) | ||||
| // { | ||||
| // 	uint32_t pc = core->pc; | ||||
| // 	uint32_t pc_offset = pc - MINIRV32_RAM_IMAGE_OFFSET; | ||||
| // 	uint32_t ir = 0; | ||||
|  | ||||
| // 	printf( "PC: %08x ", pc ); | ||||
| // 	if( pc_offset >= 0 && pc_offset < ram_amt - 3 ) | ||||
| // 	{ | ||||
| // 		ir = *((uint32_t*)(&((uint8_t*)ram_image)[pc_offset])); | ||||
| // 		printf( "[0x%08x] ", ir );  | ||||
| // 	} | ||||
| // 	else | ||||
| // 		printf( "[xxxxxxxxxx] " );  | ||||
| // 	uint32_t * regs = core->regs; | ||||
| // 	printf( "Z:%08x ra:%08x sp:%08x gp:%08x tp:%08x t0:%08x t1:%08x t2:%08x s0:%08x s1:%08x a0:%08x a1:%08x a2:%08x a3:%08x a4:%08x a5:%08x ", | ||||
| // 		regs[0], regs[1], regs[2], regs[3], regs[4], regs[5], regs[6], regs[7], | ||||
| // 		regs[8], regs[9], regs[10], regs[11], regs[12], regs[13], regs[14], regs[15] ); | ||||
| // 	printf( "a6:%08x a7:%08x s2:%08x s3:%08x s4:%08x s5:%08x s6:%08x s7:%08x s8:%08x s9:%08x s10:%08x s11:%08x t3:%08x t4:%08x t5:%08x t6:%08x\n", | ||||
| // 		regs[16], regs[17], regs[18], regs[19], regs[20], regs[21], regs[22], regs[23], | ||||
| // 		regs[24], regs[25], regs[26], regs[27], regs[28], regs[29], regs[30], regs[31] ); | ||||
| // } | ||||
		Reference in New Issue
	
	Block a user