feat add func call and rewrite codes

This commit is contained in:
ZZY
2025-03-07 12:29:53 +08:00
parent 09299e339c
commit 95bf44eb3f
37 changed files with 3369 additions and 1063 deletions

View File

@ -0,0 +1,13 @@
all: ccompiler
run: ccompiler
./ccompiler test.c flat.bin
ccompiler: frontend
gcc -g rv32ima_codegen.c ../../middleend/ir.c -L../../frontend -lfrontend -o ccompiler
frontend:
make -C ../../frontend
clean:
rm -f ccompiler flat.bin

View File

@ -0,0 +1,338 @@
#ifndef __RV32I_GEN_H__
#define __RV32I_GEN_H__
/**
31 25 24 20 19 15 14 12 11 7 6 0
imm[31:12] rd 0110111 U lui
imm[31:12] rd 0010111 U auipc
imm[20|10:1|11|19:12] rd 1101111 J jal
imm[11:0] rs1 000 rd 1100111 I jalr
imm[12|10:5] rs2 rs1 000 imm[4:1|11] 1100011 B beq
imm[12|10:5] rs2 rs1 001 imm[4:1|11] 1100011 B bne
imm[12|10:5] rs2 rs1 100 imm[4:1|11] 1100011 B blt
imm[12|10:5] rs2 rs1 101 imm[4:1|11] 1100011 B bge
imm[12|10:5] rs2 rs1 110 imm[4:1|11] 1100011 B bltu
imm[12|10:5] rs2 rs1 111 imm[4:1|11] 1100011 B bgeu
imm[11:0] rs1 000 rd 0000011 I lb
imm[11:0] rs1 001 rd 0000011 I lh
imm[11:0] rs1 010 rd 0000011 I lw
imm[11:0] rs1 100 rd 0000011 I lbu
imm[11:0] rs1 101 rd 0000011 I lhu
imm[11:5] rs2 rs1 000 imm[4:0] 0100011 S sb
imm[11:5] rs2 rs1 001 imm[4:0] 0100011 S sh
imm[11:5] rs2 rs1 010 imm[4:0] 0100011 S sw
imm[11:0] rs1 000 rd 0010011 I addi
imm[11:0] rs1 010 rd 0010011 I slti
imm[11:0] rs1 011 rd 0010011 I sltiu
imm[11:0] rs1 100 rd 0010011 I xori
imm[11:0] rs1 110 rd 0010011 I ori
imm[11:0] rs1 111 rd 0010011 I andi
0000000 shamt rs1 001 rd 0010011 I slli
0000000 shamt rs1 101 rd 0010011 I srli
0100000 shamt rs1 101 rd 0010011 I srai
0000000 rs2 rs1 000 rd 0110011 R add
0100000 rs2 rs1 000 rd 0110011 R sub
0000000 rs2 rs1 001 rd 0110011 R sll
0000000 rs2 rs1 010 rd 0110011 R slt
0000000 rs2 rs1 011 rd 0110011 R sltu
0000000 rs2 rs1 100 rd 0110011 R xor
0000000 rs2 rs1 101 rd 0110011 R srl
0100000 rs2 rs1 101 rd 0110011 R sra
0000000 rs2 rs1 110 rd 0110011 R or
0000000 rs2 rs1 111 rd 0110011 R and
0000 pred succ 00000 000 00000 0001111 I fence
0000 0000 0000 00000 001 00000 0001111 I fence.i
000000000000 00000 00 00000 1110011 I ecall
000000000000 00000 000 00000 1110011 I ebreak
csr rs1 001 rd 1110011 I csrrw
csr rs1 010 rd 1110011 I csrrs
csr rs1 011 rd 1110011 I csrrc
csr zimm 101 rd 1110011 I csrrwi
csr zimm 110 rd 1110011 I cssrrsi
csr zimm 111 rd 1110011 I csrrci
*/
#include <stdint.h>
// 寄存器枚举定义
typedef enum {
REG_X0, REG_X1, REG_X2, REG_X3, REG_X4, REG_X5, REG_X6, REG_X7,
REG_X8, REG_X9, REG_X10, REG_X11, REG_X12, REG_X13, REG_X14, REG_X15,
REG_X16, REG_X17, REG_X18, REG_X19, REG_X20, REG_X21, REG_X22, REG_X23,
REG_X24, REG_X25, REG_X26, REG_X27, REG_X28, REG_X29, REG_X30, REG_X31,
REG_ZERO = REG_X0, REG_RA = REG_X1, REG_SP = REG_X2, REG_GP = REG_X3,
REG_TP = REG_X4, REG_T0 = REG_X5, REG_T1 = REG_X6, REG_T2 = REG_X7,
REG_S0 = REG_X8, REG_S1 = REG_X9, REG_A0 = REG_X10, REG_A1 = REG_X11,
REG_A2 = REG_X12, REG_A3 = REG_X13, REG_A4 = REG_X14, REG_A5 = REG_X15,
REG_A6 = REG_X16, REG_A7 = REG_X17, REG_S2 = REG_X18, REG_S3 = REG_X19,
REG_S4 = REG_X20, REG_S5 = REG_X21, REG_S6 = REG_X22, REG_S7 = REG_X23,
REG_S8 = REG_X24, REG_S9 = REG_X25, REG_S10 = REG_X26, REG_S11 = REG_X27,
REG_T3 = REG_X28, REG_T4 = REG_X29, REG_T5 = REG_X30, REG_T6 = REG_X31,
} RV32Reg;
/******************** 立即数处理宏 ********************/
#define IMM_12BITS(imm) ((imm) & 0xFFF)
#define IMM_20BITS(imm) ((imm) & 0xFFFFF)
#define SHAMT_VAL(imm) ((imm) & 0x1F)
#define CSR_VAL(csr) ((csr) & 0xFFF)
// B型立即数编码[12|10:5|4:1|11]
#define ENCODE_B_IMM(imm) ( \
(((imm) >> 12) & 0x1) << 31 | /* imm[12:12] -> instr[31:31] */ \
(((imm) >> 5) & 0x3F) << 25 | /* imm[10:5] -> instr[30:25] */ \
(((imm) >> 1) & 0xF) << 8 | /* imm[4:1] -> instr[11:8] */ \
(((imm) >> 11) & 0x1) << 7) /* imm[11:11] -> instr[7:7] */
// J型立即数编码[20|10:1|11|19:12]W
#define ENCODE_J_IMM(imm) ( \
(((imm) >> 20) & 0x1) << 31 | /* imm[20:20] -> instr[31:31] */ \
(((imm) >> 1) & 0x3FF)<< 21 | /* imm[10:1] -> instr[30:21] */ \
(((imm) >> 11) & 0x1) << 20 | /* imm[11:11] -> instr[20:20] */ \
(((imm) >> 12) & 0xFF) << 12) /* imm[19:12] -> instr[19:12] */
/******************** 指令生成宏 ********************/
// R型指令宏
#define RV32_RTYPE(op, f3, f7, rd, rs1, rs2) (uint32_t)( \
(0x33 | ((rd) << 7) | ((f3) << 12) | ((rs1) << 15) | \
((rs2) << 20) | ((f7) << 25)) )
// I型指令宏
#define RV32_ITYPE(op, f3, rd, rs1, imm) (uint32_t)( \
(op | ((rd) << 7) | ((f3) << 12) | ((rs1) << 15) | \
(IMM_12BITS(imm) << 20)) )
// S型指令宏
#define RV32_STYPE(op, f3, rs1, rs2, imm) (uint32_t)( \
(op | ((IMM_12BITS(imm) & 0xFE0) << 20) | ((rs1) << 15) | \
((rs2) << 20) | ((f3) << 12) | ((IMM_12BITS(imm) & 0x1F) << 7)) )
// B型指令宏
#define RV32_BTYPE(op, f3, rs1, rs2, imm) (uint32_t)( \
(op | (ENCODE_B_IMM(imm)) | ((rs1) << 15) | \
((rs2) << 20) | ((f3) << 12)) )
// U型指令宏
#define RV32_UTYPE(op, rd, imm) (uint32_t)( \
(op | ((rd) << 7) | (IMM_20BITS((imm) >> 12) << 12)) )
// J型指令宏
#define RV32_JTYPE(op, rd, imm) (uint32_t)( \
(op | ((rd) << 7) | ENCODE_J_IMM(imm)) )
/******************** U-type ********************/
#define LUI(rd, imm) RV32_UTYPE(0x37, rd, imm)
#define AUIPC(rd, imm) RV32_UTYPE(0x17, rd, imm)
/******************** J-type ********************/
#define JAL(rd, imm) RV32_JTYPE(0x6F, rd, imm)
/******************** I-type ********************/
#define JALR(rd, rs1, imm) RV32_ITYPE(0x67, 0x0, rd, rs1, imm)
// Load instructions
#define LB(rd, rs1, imm) RV32_ITYPE(0x03, 0x0, rd, rs1, imm)
#define LH(rd, rs1, imm) RV32_ITYPE(0x03, 0x1, rd, rs1, imm)
#define LW(rd, rs1, imm) RV32_ITYPE(0x03, 0x2, rd, rs1, imm)
#define LBU(rd, rs1, imm) RV32_ITYPE(0x03, 0x4, rd, rs1, imm)
#define LHU(rd, rs1, imm) RV32_ITYPE(0x03, 0x5, rd, rs1, imm)
// Immediate arithmetic
#define ADDI(rd, rs1, imm) RV32_ITYPE(0x13, 0x0, rd, rs1, imm)
#define SLTI(rd, rs1, imm) RV32_ITYPE(0x13, 0x2, rd, rs1, imm)
#define SLTIU(rd, rs1, imm) RV32_ITYPE(0x13, 0x3, rd, rs1, imm)
#define XORI(rd, rs1, imm) RV32_ITYPE(0x13, 0x4, rd, rs1, imm)
#define ORI(rd, rs1, imm) RV32_ITYPE(0x13, 0x6, rd, rs1, imm)
#define ANDI(rd, rs1, imm) RV32_ITYPE(0x13, 0x7, rd, rs1, imm)
// Shift instructions
#define SLLI(rd, rs1, shamt) RV32_ITYPE(0x13, 0x1, rd, rs1, (0x00000000 | (shamt << 20)))
#define SRLI(rd, rs1, shamt) RV32_ITYPE(0x13, 0x5, rd, rs1, (0x00000000 | (shamt << 20)))
#define SRAI(rd, rs1, shamt) RV32_ITYPE(0x13, 0x5, rd, rs1, (0x40000000 | (shamt << 20)))
/******************** B-type ********************/
#define BEQ(rs1, rs2, imm) RV32_BTYPE(0x63, 0x0, rs1, rs2, imm)
#define BNE(rs1, rs2, imm) RV32_BTYPE(0x63, 0x1, rs1, rs2, imm)
#define BLT(rs1, rs2, imm) RV32_BTYPE(0x63, 0x4, rs1, rs2, imm)
#define BGE(rs1, rs2, imm) RV32_BTYPE(0x63, 0x5, rs1, rs2, imm)
#define BLTU(rs1, rs2, imm) RV32_BTYPE(0x63, 0x6, rs1, rs2, imm)
#define BGEU(rs1, rs2, imm) RV32_BTYPE(0x63, 0x7, rs1, rs2, imm)
/******************** S-type ********************/
#define SB(rs2, rs1, imm) RV32_STYPE(0x23, 0x0, rs1, rs2, imm)
#define SH(rs2, rs1, imm) RV32_STYPE(0x23, 0x1, rs1, rs2, imm)
#define SW(rs2, rs1, imm) RV32_STYPE(0x23, 0x2, rs1, rs2, imm)
/******************** R-type ********************/
#define ADD(rd, rs1, rs2) RV32_RTYPE(0x33, 0x0, 0x00, rd, rs1, rs2)
#define SUB(rd, rs1, rs2) RV32_RTYPE(0x33, 0x0, 0x20, rd, rs1, rs2)
#define SLL(rd, rs1, rs2) RV32_RTYPE(0x33, 0x1, 0x00, rd, rs1, rs2)
#define SLT(rd, rs1, rs2) RV32_RTYPE(0x33, 0x2, 0x00, rd, rs1, rs2)
#define SLTU(rd, rs1, rs2) RV32_RTYPE(0x33, 0x3, 0x00, rd, rs1, rs2)
#define XOR(rd, rs1, rs2) RV32_RTYPE(0x33, 0x4, 0x00, rd, rs1, rs2)
#define SRL(rd, rs1, rs2) RV32_RTYPE(0x33, 0x5, 0x00, rd, rs1, rs2)
#define SRA(rd, rs1, rs2) RV32_RTYPE(0x33, 0x5, 0x20, rd, rs1, rs2)
#define OR(rd, rs1, rs2) RV32_RTYPE(0x33, 0x6, 0x00, rd, rs1, rs2)
#define AND(rd, rs1, rs2) RV32_RTYPE(0x33, 0x7, 0x00, rd, rs1, rs2)
/******************** I-type (system) ********************/
#define FENCE(pred, succ) (uint32_t)( 0x0F | ((pred) << 23) | ((succ) << 27) )
#define FENCE_I() (uint32_t)( 0x100F )
#define ECALL() (uint32_t)( 0x73 )
#define EBREAK() (uint32_t)( 0x100073 )
// CSR instructions
#define CSRRW(rd, csr, rs) RV32_ITYPE(0x73, 0x1, rd, rs, CSR_VAL(csr))
#define CSRRS(rd, csr, rs) RV32_ITYPE(0x73, 0x2, rd, rs, CSR_VAL(csr))
#define CSRRC(rd, csr, rs) RV32_ITYPE(0x73, 0x3, rd, rs, CSR_VAL(csr))
#define CSRRWI(rd, csr, zimm) RV32_ITYPE(0x73, 0x5, rd, 0, (CSR_VAL(csr) | ((zimm) << 15)))
#define CSRRSI(rd, csr, zimm) RV32_ITYPE(0x73, 0x6, rd, 0, (CSR_VAL(csr) | ((zimm) << 15)))
#define CSRRCI(rd, csr, zimm) RV32_ITYPE(0x73, 0x7, rd, 0, (CSR_VAL(csr) | ((zimm) << 15)))
/* M-Extention */
#define MUL(rd, rs1, rs2) RV32_RTYPE(0x33, 0x0, 0x01, rd, rs1, rs2)
#define DIV(rd, rs1, rs2) RV32_RTYPE(0x33, 0x0, 0x05, rd, rs1, rs2)
#define REM(rd, rs1, rs2) RV32_RTYPE(0x33, 0x0, 0x07, rd, rs1, rs2)
/******************** Pseudo-instructions ********************/
// 伪指令
// nop (No operation)
#define NOP() ADDI(REG_X0, REG_X0, 0) // 无操作
// neg rd, rs (Two's complement of rs)
#define NEG(rd, rs) SUB(rd, REG_ZERO, rs) // 补码
// negw rd, rs (Two's complement word of rs)
#define NEGW(rd, rs) SUBW(rd, REG_ZERO, rs) // 字的补码
// snez rd, rs (Set if ≠ zero)
#define SNEZ(rd, rs) SLTU(rd, REG_X0, rs) // 非0则置位
// sltz rd, rs (Set if < zero)
#define SLTZ(rd, rs) SLT(rd, rs, REG_X0) // 小于0则置位
// sgtz rd, rs (Set if > zero)
#define SG TZ(rd, rs) SLT(rd, REG_X0, rs) // 大于0则置位
// beqz rs, offset (Branch if = zero)
#define BEQZ(rs, offset) BEQ(rs, REG_X0, offset) // 为0则转移
// bnez rs, offset (Branch if ≠ zero)
#define BNEZ(rs, offset) BNE(rs, REG_X0, offset) // 非0则转移
// blez rs, offset (Branch if ≤ zero)
#define BLEZ(rs, offset) BGE(REG_X0, rs, offset) // 小于等于0则转移
// bgez rs, offset (Branch if ≥ zero)
#define BGEZ(rs, offset) BGE(rs, REG_X0, offset) // 大于等于0则转移
// bltz rs, offset (Branch if < zero)
#define BLTZ(rs, offset) BLT(rs, REG_X0, offset) // 小于0则转移
// bgtz rs, offset (Branch if > zero)
#define BGTZ(rs, offset) BLT(REG_X0, rs, offset) // 大于0则转移
// j offset (Jump)
#define J(offset) JAL(REG_X0, offset) // 跳转
// jr rs (Jump register)
#define JR(rs) JALR(REG_X0, rs, 0) // 寄存器跳转
// ret (Return from subroutine)
#define RET() JALR(REG_X0, REG_RA, 0) // 从子过程返回
// tail offset (Tail call far-away subroutine)
#define TAIL_2(offset) AUIPC(REG_X6, offset), JAL(REG_X0, REG_X6, offset) // 尾调用远程子过程, 有2条指令
#define TAIL(offset) TAIL_2(offset) // Warning this have 2 instructions
// csrr csr, rd (Read CSR)
#define CSRR(csr, rd) CSRRS(rd, csr, REG_X0) // 读CSR寄存器
// csrw csr, rs (Write CSR)
#define CSR W(csr, rs) CSRRW(csr, REG_X0, rs) // 写CSR寄存器
// csrs csr, rs (Set bits in CSR)
#define CSRS(csr, rs) CSRRS(REG_X0, csr, rs) // CSR寄存器置零位
// csrrc csr, rs (Clear bits in CSR)
#define CSRC(csr, rs) CSRRC(REG_X0, csr, rs) // CSR寄存器清
// csrci csr, imm (Immediate clear bits in CSR)
#define CSRCI(csr, imm) CSRRCI(REG_X0, csr, imm) // 立即数清除CSR
// csrrwi csr, imm (Write CSR immediate)
#define CSRRWI2(csr, imm) CSRRWI(REG_X0, csr, imm) // 立即数写入CSR
// csrrsi csr, imm (Immediate set bits in CSR)
#define CSRRSI2(csr, imm) CSRRSI(REG_X0, csr, imm) // 立即数置位CSR
// csrrci csr, imm (Immediate clear bits in CSR)
#define CSRRCI2(csr, imm) CSRRCI(REG_X0, csr, imm) // 立即数清除CSR
// // frcsr rd (Read FP control/status register)
// #define FRC SR(rd) CSRRS(rd, FCSR, REG_X0) // 读取FP控制/状态寄存器
// // fscsr rs (Write FP control/status register)
// #define FSCSR(rs) CSRRW(REG_X0, FCSR, rs) // 写入FP控制/状态寄存器
// // frrm rd (Read FP rounding mode)
// #define FRRM(rd) CSRRS(rd, FRM, REG_X0) // 读取FP舍入模式
// // fsrm rs (Write FP rounding mode)
// #define FS RM(rs) CSRRW(REG_X0, FRM, rs) // 写入FP舍入模式
// // frflags rd (Read FP exception flags)
// #define FRFLAGS(rd) CSRRS(rd, FFLAGS, REG_X0) // 读取FP例外标志
// // fsflags rs (Write FP exception flags)
// #define FS FLAGS(rs) CSRRW(REG_X0, FFLAGS, rs) // 写入FP例外标志
// Myriad sequences
#define LI(rd, num) \
LUI(rd, num), \
ADDI(rd, rd, num)
#define MV(rd, rs) ADDI(rd, rs, 0)
#define NOT(rd, rs) XORI(rd, rs, -1)
#define CALL(offset) \
AUIPC(REG_X1, offset), \
JALR(REG_X1, REG_X1, offset)
#define CALL_ABS(addr) \
AUIPC(REG_X0, addr), \
JALR(REG_X1, REG_X0, addr)
#ifdef RISCV_VM_BUILDIN_ECALL
#define ECALL_PNT_INT(num) \
ADDI(REG_A0, REG_X0, num), \
ADDI(REG_A7, REG_X0, 0x1), \
ECALL()
#define ECALL_PNT_STR(str) \
ADDI(REG_A0, REG_X0, str), \
ADDI(REG_A7, REG_X0, 0x4), \
ECALL()
#define ECALL_EXIT2() \
ADDI(REG_A7, REG_X0, 93), \
ECALL()
#define ECALL_EXIT_ARG(errno) \
ADDI(REG_A0, REG_X0, errno), \
ECALL_EXIT2()
#define ECALL_EXIT() \
ADDI(REG_A7, REG_X0, 93), \
ECALL()
#define ECALL_SCAN_INT(int) \
ADDI(REG_A7, (1025 + 4)), \
ECALL()
#define ECALL_SCAN_STR(str) \
ADDI(REG_A0, REG_X0, str), \
ADDI(REG_A7, REG_X0, (1025 + 5)), \
ECALL()
#endif
#endif

View File

@ -0,0 +1,413 @@
#define RISCV_VM_BUILDIN_ECALL
#include "rv32gen.h"
#include <stdio.h>
#include <assert.h>
// 指令编码联合体(自动处理小端序)
typedef union rv32code {
uint32_t code;
uint8_t bytes[4];
} rv32code_t;
#define CRT_CODE_SIZE 16
// 使用示例
rv32code_t gcodes[] = {
LI(REG_SP, 0x1000),
LI(REG_RA, 0x0),
CALL_ABS(CRT_CODE_SIZE << 2),
// Exit
ECALL_EXIT2(),
};
void test_raw_gen(FILE* out) {
fwrite(gcodes, sizeof(rv32code_t), sizeof(gcodes)/sizeof(gcodes[0]), out);
}
#include "../../frontend/frontend.h"
#include "../../middleend/ir.h"
typedef struct {
int code_pos;
int to_idx;
int cur_idx;
int base_offset;
enum {
JMP_BRANCH,
JMP_JUMP,
JMP_CALL,
} type;
} jmp_t;
static struct {
vector_header(codes, rv32code_t);
int stack_offset;
int stack_base;
int tmp_reg;
ir_bblock_t* cur_block;
ir_func_t* cur_func;
ir_prog_t* prog;
vector_header(jmp, jmp_t*);
vector_header(call, jmp_t*);
int cur_func_offset;
int cur_block_offset;
} ctx;
int write_inst(union rv32code ins, FILE* fp) {
return fwrite(&ins, sizeof(union rv32code), 1, fp);
}
#define GENCODE(code) vector_push(ctx.codes, (rv32code_t)(code)); len += 4
#define GENCODES(code) do { \
rv32code_t codes[] = { \
code \
}; \
for (int i = 0; i < sizeof(codes) / sizeof(codes[0]); i ++) { \
GENCODE(codes[i]); \
} \
} while (0)
static int stack_offset(ir_node_t* ptr) {
int offset = ctx.stack_base;
for (int i = 0; i < ctx.cur_func->bblocks.size; i ++) {
ir_bblock_t* block = vector_at(ctx.cur_func->bblocks, i);
for (int i = 0; i < block->instrs.size; i++) {
if (vector_at(block->instrs, i) == ptr) {
offset += i * 4;
assert(offset >= 0 && offset < ctx.stack_offset);
return offset;
}
}
offset += block->instrs.size * 4;
}
assert(0);
}
static int block_idx(ir_bblock_t* toblock) {
for (int i = 0; i < ctx.cur_func->bblocks.size; i ++) {
ir_bblock_t* block = vector_at(ctx.cur_func->bblocks, i);
if (toblock == block) {
return i;
}
}
assert(0);
}
static int func_idx(ir_func_t* tofunc) {
for (int i = 0; i < ctx.prog->funcs.size; i ++) {
ir_func_t* func = vector_at(ctx.prog->funcs, i);
if (tofunc == func) {
return i;
}
}
assert(0);
}
static int system_func(const char* name) {
static const char defined_func[][16] = {
"ecall_pnt_int",
};
for (int j = 0; j < sizeof(defined_func)/sizeof(defined_func[0]); j++) {
if (strcmp(name, defined_func[j]) == 0) {
return j;
}
}
return -1;
}
static int get_node_val(ir_node_t* ptr, int reg) {
int len = 0;
if (ptr->tag == IR_NODE_CONST_INT) {
GENCODES(LI(reg, ptr->data.const_int.val));
} else {
int offset = stack_offset(ptr);
GENCODE(LW(reg, REG_SP, offset));
}
return len;
}
static int gen_instr(ir_bblock_t* block, ir_node_t* instr) {
int len = 0;
int offset;
switch (instr->tag) {
case IR_NODE_ALLOC: {
break;
}
case IR_NODE_LOAD: {
// S1 = *(S0 + imm)
offset = stack_offset(instr->data.load.target);
GENCODE(LW(REG_T0, REG_SP, offset));
// offset = STACK_OFFSET(instr);
// GENCODE(SW(REG_T0, REG_SP, offset));
break;
}
case IR_NODE_STORE: {
// *(S0 + imm) = S1
len += get_node_val(instr->data.store.value, REG_T0);
offset = stack_offset(instr->data.store.target);
GENCODE(SW(REG_T0, REG_SP, offset));
break;
}
case IR_NODE_RET: {
// A0 = S0
if (instr->data.ret.ret_val != NULL) {
len += get_node_val(instr->data.ret.ret_val, REG_A0);
}
GENCODE(LW(REG_RA, REG_SP, 0));
GENCODE(ADDI(REG_SP, REG_SP, ctx.stack_offset));
GENCODE(RET());
break;
}
case IR_NODE_OP: {
len += get_node_val(instr->data.op.lhs, REG_T1);
len += get_node_val(instr->data.op.rhs, REG_T2);
switch (instr->data.op.op) {
case IR_OP_ADD:
GENCODE(ADD(REG_T0, REG_T1, REG_T2));
break;
case IR_OP_SUB:
GENCODE(SUB(REG_T0, REG_T1, REG_T2));
break;
case IR_OP_MUL:
GENCODE(MUL(REG_T0, REG_T1, REG_T2));
break;
case IR_OP_DIV:
GENCODE(DIV(REG_T0, REG_T1, REG_T2));
break;
case IR_OP_MOD:
GENCODE(REM(REG_T0, REG_T1, REG_T2));
break;
default:
error("ERROR gen_instr op in riscv");
break;
}
offset = stack_offset(instr);
GENCODE(SW(REG_T0, REG_SP, offset));
break;
}
case IR_NODE_BRANCH: {
len += get_node_val(instr->data.branch.cond, REG_T0);
int tidx = block_idx(instr->data.branch.true_bblock);
int fidx = block_idx(instr->data.branch.false_bblock);
int cidx = block_idx(ctx.cur_block);
jmp_t* jmp;
jmp = xmalloc(sizeof(jmp_t));
*jmp = (jmp_t) {
.base_offset = 8,
.code_pos = ctx.codes.size,
.type = JMP_BRANCH,
.to_idx = tidx,
.cur_idx=cidx,
};
vector_push(ctx.jmp, jmp);
GENCODE(BNEZ(REG_T0, 0));
jmp = xmalloc(sizeof(jmp_t));
*jmp = (jmp_t) {
.base_offset = 4,
.code_pos = ctx.codes.size,
.type = JMP_JUMP,
.to_idx = fidx,
.cur_idx=cidx,
};
vector_push(ctx.jmp, jmp);
GENCODE(J(0));
break;
}
case IR_NODE_JUMP: {
int idx = block_idx(instr->data.jump.target_bblock);
jmp_t* jmp = xmalloc(sizeof(jmp_t));
*jmp = (jmp_t) {
.base_offset = 4,
.code_pos = ctx.codes.size,
.type = JMP_JUMP,
.to_idx = idx,
.cur_idx=block_idx(ctx.cur_block),
};
vector_push(ctx.jmp, jmp);
GENCODE(J(0));
break;
}
case IR_NODE_CALL: {
if (instr->data.call.args.size > 8) {
error("can't add so much params");
}
int param_regs[8] = {
REG_A0, REG_A1, REG_A2, REG_A3,
REG_A4, REG_A5, REG_A6, REG_A7
};
for (int i = 0; i < instr->data.call.args.size; i++) {
ir_node_t* param = vector_at(instr->data.call.args, i);
len += get_node_val(param, param_regs[i]);
}
int system_func_idx = system_func(instr->data.call.callee->name);
if (system_func_idx == 0) {
// ecall_pnt_int
GENCODE(ADDI(REG_A7, REG_X0, 0x1));
GENCODE(ECALL());
break;
}
jmp_t* jmp = xmalloc(sizeof(jmp_t));
*jmp = (jmp_t) {
.base_offset = ctx.cur_func_offset + ctx.cur_block_offset + len,
.code_pos = ctx.codes.size,
.type = JMP_CALL,
.to_idx = func_idx(instr->data.call.callee),
.cur_idx = func_idx(ctx.cur_func),
};
vector_push(ctx.call, jmp);
GENCODES((
CALL(0)
));
break;
}
default:
error("ERROR gen_instr in riscv");
}
return len;
}
static int gen_block(ir_bblock_t* block) {
int len = 0;
ctx.cur_block = block;
for (int i = 0; i < block->instrs.size; i ++) {
ctx.cur_block_offset = len;
len += gen_instr(block, vector_at(block->instrs, i));
}
return len;
}
static int gen_func(ir_func_t* func) {
int len = 0;
ctx.cur_func = func;
ctx.stack_base = 16;
ctx.stack_offset = ctx.stack_base;
for (int i = 0; i < func->bblocks.size; i++) {
ctx.stack_offset += 4 * (*vector_at(func->bblocks, i)).instrs.size;
}
GENCODE(ADDI(REG_SP, REG_SP, -ctx.stack_offset));
GENCODE(SW(REG_RA, REG_SP, 0));
int param_regs[8] = {
REG_A0, REG_A1, REG_A2, REG_A3,
REG_A4, REG_A5, REG_A6, REG_A7
};
if (func->params.size > 8) {
error("can't add so much params");
}
for (int i = 0; i < func->params.size; i++) {
int offset = stack_offset(vector_at(func->params, i));
GENCODE(SW(param_regs[i], REG_SP, offset));
}
int jmp_cache[func->bblocks.size + 1];
if (ctx.jmp.data != NULL) vector_free(ctx.jmp);
vector_init(ctx.jmp);
jmp_cache[0] = 0;
for(int i = 0; i < func->bblocks.size; i ++) {
ctx.cur_func_offset = len;
jmp_cache[i + 1] = jmp_cache[i];
int ret = gen_block(vector_at(func->bblocks, i));
jmp_cache[i + 1] += ret;
len += ret;
}
for (int i = 0; i < ctx.jmp.size; i++) {
jmp_t* jmp = vector_at(ctx.jmp, i);
int32_t code = 0;
int offset = jmp_cache[jmp->to_idx] - (jmp_cache[jmp->cur_idx + 1] - jmp->base_offset);
if (jmp->type == JMP_JUMP) {
code = J(offset);
} else {
code = BNEZ(REG_T0, offset);
}
ctx.codes.data[jmp->code_pos] = (rv32code_t) {
.code = code,
};
}
return len;
}
static void gen_code(ir_prog_t* prog) {
ctx.prog = prog;
for (int i = 0; i < prog->extern_funcs.size; i++) {
if (system_func(prog->extern_funcs.data[i]->name) == -1) {
error("func %s not defined and not a system func", prog->extern_funcs.data[i]->name);
}
}
int len = 0;
int jmp_cache[prog->funcs.size + 1];
for(int i = 0; i < prog->funcs.size; i ++) {
jmp_cache[i + 1] = jmp_cache[i];
int ret = gen_func(vector_at(prog->funcs, i));
jmp_cache[i + 1] += ret;
len += ret;
}
for (int i = 0; i < ctx.call.size; i++) {
jmp_t* jmp = vector_at(ctx.call, i);
int32_t code = 0;
// FIXME ERROR
int offset = jmp_cache[jmp->to_idx] - (jmp_cache[jmp->cur_idx] + jmp->base_offset);
int32_t codes[2] = {
CALL(offset)
};
for (int i = 0; i < 2; i++) {
ctx.codes.data[jmp->code_pos + i] = (rv32code_t) {
.code = codes[i],
};
}
}
}
int main(int argc, char** argv) {
// gcc rv32ima_codegen.c -o rv32gen.exe
const char* infilename = "test.c";
const char* outfilename = "flat.bin";
if (argc >= 2) {
infilename = argv[1];
}
if (argc >= 3) {
outfilename = argv[2];
}
FILE* in = fopen(infilename, "r");
FILE* out = fopen(outfilename, "wb");
if (in == NULL || out == NULL) {
printf("Failed to open file\n");
return 1;
}
struct ASTNode* root = frontend(infilename, in, (sread_fn)fread_s);
gen_ir_from_ast(root);
gen_code(&prog);
for (int i = 0; i < CRT_CODE_SIZE; i++) {
write_inst((union rv32code) {
.code = NOP(),
}, out);
}
fflush(out);
assert(CRT_CODE_SIZE >= sizeof(gcodes) / sizeof(gcodes[0]));
fseek(out, 0, SEEK_SET);
fwrite(gcodes, sizeof(gcodes), 1, out);
fflush(out);
fseek(out, CRT_CODE_SIZE * 4, SEEK_SET);
fwrite(ctx.codes.data, sizeof(ctx.codes.data[0]), ctx.codes.size, out);
fflush(out);
fclose(in);
fclose(out);
// printf("comiler end out: %s\n", outfilename);
return 0;
}

View File

@ -1,7 +1,7 @@
# 编译器设置
CC = gcc
AR = ar
CFLAGS = -g
CFLAGS = -g -Wall
# 源文件路径
LEXER_DIR = ./lexer
@ -13,6 +13,7 @@ SYMTAB_DIR = ./parser/symtab
SRCS = \
frontend.c \
$(LEXER_DIR)/lexer.c \
$(LEXER_DIR)/token.c \
$(PARSER_DIR)/parser.c \
$(AST_DIR)/ast.c \
$(AST_DIR)/block.c \

View File

@ -3,13 +3,13 @@
#include "frontend.h"
struct ASTNode* frontend(const char* file, void* stream, sread_fn sread) {
struct Lexer lexer;
lexer_t lexer;
init_lexer(&lexer, file, stream, sread);
struct SymbolTable symtab;
symtab_t symtab;
init_symtab(&symtab);
struct Parser parser;
parser_t parser;
init_parser(&parser, &lexer, &symtab);
parse_prog(&parser);

View File

@ -4,8 +4,9 @@
#ifndef error
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#define STD_LIBRARY
#define error(...) do { fprintf(stderr, __VA_ARGS__); exit(1); } while (0)
#define error(...) do { fprintf(stderr, __VA_ARGS__); assert(0); } while (0)
#endif
#ifndef warn
#include <stdio.h>
@ -15,10 +16,12 @@
#define xmalloc(size) malloc(size)
#ifndef FRONTEND_IMPLEMENTATION
#include "parser/parser.h"
#include "parser/ast/ast.h"
typedef int (*sread_fn)(void *dst_buf, int dst_size, int elem_size, int count, void *stream);
struct ASTNode* frontend(const char* file, void* stream, sread_fn sread);
#endif
#endif

View File

@ -26,13 +26,15 @@ the distribution and installation instructions.
Chris Fraser / cwf@aya.yale.edu
David Hanson / drh@drhanson.net
*/
#define FRONTEND_IMPLEMENTATION
#include "../frontend.h"
#include "token.h"
#include "lexer.h"
static const struct {
const char* name;
enum CSTD_KEYWORD std_type;
enum TokenType tok;
tok_type_t tok;
} keywords[] = {
#define X(name, std_type, tok, ...) { #name, std_type, tok },
KEYWORD_TABLE
@ -72,7 +74,7 @@ static inline int keyword_cmp(const char* name, int len) {
return -1; // Not a keyword.
}
void init_lexer(struct Lexer* lexer, const char* file_name, void* stream, lexer_sread_fn sread)
void init_lexer(lexer_t* lexer, const char* file_name, void* stream, lexer_sread_fn sread)
{
lexer->cur_ptr = lexer->end_ptr = (unsigned char*)&(lexer->buffer);
lexer->index = 1;
@ -86,12 +88,12 @@ void init_lexer(struct Lexer* lexer, const char* file_name, void* stream, lexer_
}
}
static void flush_buffer(struct Lexer* lexer) {
static void flush_buffer(lexer_t* lexer) {
int num = lexer->end_ptr - lexer->cur_ptr;
for (int i = 0; i < num; i++) {
lexer->buffer[i] = lexer->cur_ptr[i];
}
lexer->cur_ptr = lexer->buffer;
lexer->cur_ptr = (unsigned char*)lexer->buffer;
int read_size = LEXER_BUFFER_SIZE - num;
// TODO size_t to int maybe lose precision
@ -109,7 +111,7 @@ static void flush_buffer(struct Lexer* lexer) {
}
}
static void goto_newline(struct Lexer* lexer) {
static void goto_newline(lexer_t* lexer) {
do {
if (lexer->cur_ptr == lexer->end_ptr) {
flush_buffer(lexer);
@ -119,7 +121,7 @@ static void goto_newline(struct Lexer* lexer) {
} while (*lexer->cur_ptr != '\n' && *lexer->cur_ptr != '\0');
}
static void goto_block_comment(struct Lexer* lexer) {
static void goto_block_comment(lexer_t* lexer) {
while (1) {
if (lexer->end_ptr - lexer->cur_ptr < 2) {
flush_buffer(lexer);
@ -155,7 +157,7 @@ static char got_slash(unsigned char* peek) {
}
}
static void parse_char_literal(struct Lexer* lexer, struct Token* token) {
static void parse_char_literal(lexer_t* lexer, tok_t* token) {
char val = 0;
unsigned char* peek = lexer->cur_ptr + 1;
if (*peek == '\\') {
@ -166,16 +168,16 @@ static void parse_char_literal(struct Lexer* lexer, struct Token* token) {
}
if (*peek != '\'') error("Unclosed character literal");
token->constant.ch = val;
token->val.ch = val;
lexer->cur_ptr = peek + 1;
token->constant.have = 1;
token->val.have = 1;
token->type = TOKEN_CHAR_LITERAL;
}
static void parse_string_literal(struct Lexer* lexer, struct Token* token) {
static void parse_string_literal(lexer_t* lexer, tok_t* token) {
unsigned char* peek = lexer->cur_ptr + 1;
// TODO string literal size check
char* dest = token->constant.str = xmalloc(LEXER_MAX_TOKEN_SIZE + 1);
char* dest = token->val.str = xmalloc(LEXER_MAX_TOKEN_SIZE + 1);
int len = 0;
while (*peek != '"') {
@ -191,12 +193,12 @@ static void parse_string_literal(struct Lexer* lexer, struct Token* token) {
}
dest[len] = '\0';
lexer->cur_ptr = peek + 1;
token->constant.have = 1;
token->val.have = 1;
token->type = TOKEN_STRING_LITERAL;
}
// FIXME it write by AI maybe error
static void parse_number(struct Lexer* lexer, struct Token* token) {
static void parse_number(lexer_t* lexer, tok_t* token) {
unsigned char* peek = lexer->cur_ptr;
int base = 10;
int is_float = 0;
@ -255,12 +257,12 @@ static void parse_number(struct Lexer* lexer, struct Token* token) {
if ((*peek == 'e' || *peek == 'E') && base == 10) {
is_float = 1;
peek++;
int exp_sign = 1;
// int exp_sign = 1;
int exponent = 0;
if (*peek == '+') peek++;
else if (*peek == '-') {
exp_sign = -1;
// exp_sign = -1;
peek++;
}
@ -273,19 +275,19 @@ static void parse_number(struct Lexer* lexer, struct Token* token) {
// 存储结果
lexer->cur_ptr = peek;
token->constant.have = 1;
token->val.have = 1;
if (is_float) {
token->constant.d = float_val;
token->val.d = float_val;
token->type = TOKEN_FLOAT_LITERAL;
} else {
token->constant.ll = int_val;
token->val.ll = int_val;
token->type = TOKEN_INT_LITERAL;
}
}
#define GOT_ONE_TOKEN_BUF_SIZE 64
// /zh/c/language/operator_arithmetic.html
void get_token(struct Lexer* lexer, struct Token* token) {
void get_token(lexer_t* lexer, tok_t* token) {
// 需要保证缓冲区始终可读
if (lexer->end_ptr - lexer->cur_ptr < GOT_ONE_TOKEN_BUF_SIZE) {
flush_buffer(lexer);
@ -305,8 +307,8 @@ void get_token(struct Lexer* lexer, struct Token* token) {
token->type = TOKEN_FLUSH;
}
enum TokenType tok = TOKEN_INIT;
struct TokenConstant constant;
tok_type_t tok = TOKEN_INIT;
tok_val_t constant;
constant.have = 0;
// once step
@ -392,7 +394,7 @@ void get_token(struct Lexer* lexer, struct Token* token) {
switch (*peek++) {
case '=': tok = TOKEN_NEQ; break;
default: peek--, tok = TOKEN_NOT; break;
}
} break;
case '[':
tok = TOKEN_L_BRACKET; break;
case ']':
@ -454,7 +456,7 @@ void get_token(struct Lexer* lexer, struct Token* token) {
case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':case 'Y': case 'Z':
case '_':
// TOKEN_IDENT
if (*peek == 'L' && *peek == '\'' || *peek == 'L' && *peek == '"') {
if ((*peek == 'L' && *peek == '\'') || (*peek == 'L' && *peek == '"')) {
error("unsupport wide-character char literal by `L` format");
}
while (1) {
@ -469,18 +471,18 @@ void get_token(struct Lexer* lexer, struct Token* token) {
break;
}
int res = keyword_cmp(lexer->cur_ptr, peek - (lexer->cur_ptr));
int res = keyword_cmp((const char*)lexer->cur_ptr, peek - (lexer->cur_ptr));
if (res == -1) {
int strlen = peek - lexer->cur_ptr;
unsigned char* str = xmalloc(strlen + 1);
constant.have = 1;
constant.str = str;
constant.str = (char*)str;
for (int i = 0; i < strlen; i++) {
str[i] = lexer->cur_ptr[i];
}
str[strlen] = '\0';
constant.have = 1;
constant.str = str;
constant.str = (char*)str;
tok = TOKEN_IDENT; break;
} else {
tok = keywords[res].tok; break;
@ -492,32 +494,16 @@ void get_token(struct Lexer* lexer, struct Token* token) {
lexer->cur_ptr = peek;
END:
token->constant = constant;
token->val = constant;
token->type = tok;
}
// get_token maybe got invalid (with parser)
void get_valid_token(struct Lexer* lexer, struct Token* token) {
enum TokenType type;
void get_valid_token(lexer_t* lexer, tok_t* token) {
tok_type_t type;
do {
get_token(lexer, token);
type = token->type;
} while (type == TOKEN_FLUSH || type == TOKEN_LINE_COMMENT || type == TOKEN_BLOCK_COMMENT);
}
// 生成字符串映射(根据需求选择#str或#name
static const char* token_strings[] = {
// 普通token使用#str
#define X(str, tok) [tok] = #str,
TOKEN_TABLE
#undef X
// 关键字使用#name
#define X(name, std, tok) [tok] = #name,
KEYWORD_TABLE
#undef X
};
const char* get_token_name(enum TokenType type) {
return token_strings[type];
}

View File

@ -2,13 +2,17 @@
#define __LEXER_H__
#include "token.h"
#ifndef LEXER_MAX_TOKEN_SIZE
#define LEXER_MAX_TOKEN_SIZE 63
#endif
#ifndef LEXER_BUFFER_SIZE
#define LEXER_BUFFER_SIZE 4095
#endif
typedef int (*lexer_sread_fn)(void *dst_buf, int dst_size,
int elem_size, int count, void *stream);
struct Lexer {
typedef struct lexer {
int line;
int index;
// const char current_file_name[LEXER_BUFFER_SIZE+1];
@ -19,22 +23,15 @@ struct Lexer {
lexer_sread_fn sread;
void* stream;
};
} lexer_t;
struct Token {
enum TokenType type;
struct TokenConstant constant;
};
void init_lexer(struct Lexer* lexer, const char* file_name, void* stream,
void init_lexer(lexer_t* lexer, const char* file_name, void* stream,
lexer_sread_fn sread);
//
void get_token(struct Lexer* lexer, struct Token* token);
// pure token getter it will included empty token like TOKEN_FLUSH
void get_token(lexer_t* lexer, tok_t* token);
// get_token maybe got invalid (with parser as TOKEN_FLUSH)
void get_valid_token(struct Lexer* lexer, struct Token* token);
const char* get_token_name(enum TokenType token);
void get_valid_token(lexer_t* lexer, tok_t* token);
#endif

View File

@ -0,0 +1,17 @@
CC = gcc
CFLAGS = -g -Wall
SRC = ../lexer.c ../token.c
all = test_all
test_all: test
./test
run:
$(CC) $(CFLAGS) $(SRC) run.c -o run
test:
$(CC) $(CFLAGS) $(SRC) -o test test.c
clean:
rm -f test run

View File

@ -1,8 +1,8 @@
#include "../lexer.h"
#include <stdio.h>
// gcc -g ../lexer.c test_lexer.c -o test_lexer
// gcc -g ../lexer.c ../token.c test_lexer.c -o test_lexer
/*
struct TokenConstant {
tok_tConstant {
int have;
union {
char ch;
@ -31,9 +31,9 @@ int main(int argc, char* argv[]) {
}
printf("open file success\n");
struct Lexer lexer;
lexer_t lexer;
init_lexer(&lexer, "test_lexter.c", fp, (lexer_sread_fn)fread_s);
struct Token tok;
tok_t tok;
while (1) {
get_valid_token(&lexer, &tok);
@ -41,6 +41,6 @@ int main(int argc, char* argv[]) {
break;
}
printf("line: %d, column: %d, type: %3d, typename: %s\n",
lexer.line, lexer.index, tok.type, get_token_name(tok.type));
lexer.line, lexer.index, tok.type, get_tok_name(tok.type));
}
}

View File

@ -0,0 +1,178 @@
// test_lexer.c
#include "../../../../libcore/acutest.h"
#include "../lexer.h"
#include <string.h>
int test_read(void *dst_buf, int dst_size, int elem_size, int count, void *stream) {
if (stream == NULL) {
return 0;
}
int size = dst_size > elem_size * count ? elem_size * count : dst_size;
memcpy(dst_buf, stream, size);
return size;
}
// 测试辅助函数
static inline void test_lexer_string(const char* input, tok_type_t expected_type) {
lexer_t lexer;
tok_t token;
init_lexer(&lexer, "test.c", (void*)input, test_read);
get_valid_token(&lexer, &token);
TEST_CHECK(token.type == expected_type);
TEST_MSG("Expected: %s", get_tok_name(expected_type));
TEST_MSG("Got: %s", get_tok_name(token.type));
}
// 基础运算符测试
void test_operators() {
TEST_CASE("Arithmetic operators"); {
test_lexer_string("+", TOKEN_ADD);
test_lexer_string("++", TOKEN_ADD_ADD);
test_lexer_string("+=", TOKEN_ASSIGN_ADD);
test_lexer_string("-", TOKEN_SUB);
test_lexer_string("--", TOKEN_SUB_SUB);
test_lexer_string("-=", TOKEN_ASSIGN_SUB);
test_lexer_string("*", TOKEN_MUL);
test_lexer_string("*=", TOKEN_ASSIGN_MUL);
test_lexer_string("/", TOKEN_DIV);
test_lexer_string("/=", TOKEN_ASSIGN_DIV);
test_lexer_string("%", TOKEN_MOD);
test_lexer_string("%=", TOKEN_ASSIGN_MOD);
}
TEST_CASE("Bitwise operators"); {
test_lexer_string("&", TOKEN_AND);
test_lexer_string("&&", TOKEN_AND_AND);
test_lexer_string("&=", TOKEN_ASSIGN_AND);
test_lexer_string("|", TOKEN_OR);
test_lexer_string("||", TOKEN_OR_OR);
test_lexer_string("|=", TOKEN_ASSIGN_OR);
test_lexer_string("^", TOKEN_XOR);
test_lexer_string("^=", TOKEN_ASSIGN_XOR);
test_lexer_string("~", TOKEN_BIT_NOT);
test_lexer_string("<<", TOKEN_L_SH);
test_lexer_string("<<=", TOKEN_ASSIGN_L_SH);
test_lexer_string(">>", TOKEN_R_SH);
test_lexer_string(">>=", TOKEN_ASSIGN_R_SH);
}
TEST_CASE("Comparison operators"); {
test_lexer_string("==", TOKEN_EQ);
test_lexer_string("!=", TOKEN_NEQ);
test_lexer_string("<", TOKEN_LT);
test_lexer_string("<=", TOKEN_LE);
test_lexer_string(">", TOKEN_GT);
test_lexer_string(">=", TOKEN_GE);
}
TEST_CASE("Special symbols"); {
test_lexer_string("(", TOKEN_L_PAREN);
test_lexer_string(")", TOKEN_R_PAREN);
test_lexer_string("[", TOKEN_L_BRACKET);
test_lexer_string("]", TOKEN_R_BRACKET);
test_lexer_string("{", TOKEN_L_BRACE);
test_lexer_string("}", TOKEN_R_BRACE);
test_lexer_string(";", TOKEN_SEMICOLON);
test_lexer_string(",", TOKEN_COMMA);
test_lexer_string(":", TOKEN_COLON);
test_lexer_string(".", TOKEN_DOT);
test_lexer_string("...", TOKEN_ELLIPSIS);
test_lexer_string("->", TOKEN_DEREF);
test_lexer_string("?", TOKEN_COND);
}
}
// 关键字测试
void test_keywords() {
TEST_CASE("C89 keywords");
test_lexer_string("while", TOKEN_WHILE);
test_lexer_string("sizeof", TOKEN_SIZEOF);
// TEST_CASE("C99 keywords");
// test_lexer_string("restrict", TOKEN_RESTRICT);
// test_lexer_string("_Bool", TOKEN_INT); // 需确认你的类型定义
}
// 字面量测试
void test_literals() {
TEST_CASE("Integer literals"); {
// 十进制
test_lexer_string("0", TOKEN_INT_LITERAL);
test_lexer_string("123", TOKEN_INT_LITERAL);
// test_lexer_string("2147483647", TOKEN_INT_LITERAL);
// // 十六进制
// test_lexer_string("0x0", TOKEN_INT_LITERAL);
// test_lexer_string("0x1A3F", TOKEN_INT_LITERAL);
// test_lexer_string("0XABCDEF", TOKEN_INT_LITERAL);
// // 八进制
// test_lexer_string("0123", TOKEN_INT_LITERAL);
// test_lexer_string("0777", TOKEN_INT_LITERAL);
// // 边界值测试
// test_lexer_string("2147483647", TOKEN_INT_LITERAL); // INT_MAX
// test_lexer_string("4294967295", TOKEN_INT_LITERAL); // UINT_MAX
}
// TEST_CASE("Character literals"); {
// test_lexer_string("'a'", TOKEN_CHAR_LITERAL);
// test_lexer_string("'\\n'", TOKEN_CHAR_LITERAL);
// test_lexer_string("'\\t'", TOKEN_CHAR_LITERAL);
// test_lexer_string("'\\\\'", TOKEN_CHAR_LITERAL);
// test_lexer_string("'\\0'", TOKEN_CHAR_LITERAL);
// }
TEST_CASE("String literals"); {
test_lexer_string("\"hello\"", TOKEN_STRING_LITERAL);
test_lexer_string("\"multi-line\\nstring\"", TOKEN_STRING_LITERAL);
test_lexer_string("\"escape\\\"quote\"", TOKEN_STRING_LITERAL);
}
// TEST_CASE("Integer literals");
// test_lexer_string("123", TOKEN_INT_LITERAL);
// test_lexer_string("0x1F", TOKEN_INT_LITERAL);
// TEST_CASE("Floating literals");
// test_lexer_string("3.14e-5", TOKEN_FLOAT_LITERAL);
// TEST_CASE("Character literals");
// test_lexer_string("'\\n'", TOKEN_CHAR_LITERAL);
}
// 边界测试
void test_edge_cases() {
// TEST_CASE("Long identifiers");
// char long_id[LEXER_MAX_TOKEN_SIZE+2] = {0};
// memset(long_id, 'a', LEXER_MAX_TOKEN_SIZE+1);
// test_lexer_string(long_id, TOKEN_IDENT);
// TEST_CASE("Buffer boundary");
// char boundary[LEXER_BUFFER_SIZE*2] = {0};
// memset(boundary, '+', LEXER_BUFFER_SIZE*2-1);
// test_lexer_string(boundary, TOKEN_ADD);
}
// 错误处理测试
void test_error_handling() {
TEST_CASE("Invalid characters");
lexer_t lexer;
tok_t token;
init_lexer(&lexer, "test.c", NULL, test_read);
get_valid_token(&lexer, &token);
TEST_CHECK(token.type == TOKEN_EOF); // 应触发错误处理
}
// 测试列表
TEST_LIST = {
{"operators", test_operators},
{"keywords", test_keywords},
{"literals", test_literals},
{"edge_cases", test_edge_cases},
{"error_handling", test_error_handling},
{NULL, NULL}
};

View File

@ -0,0 +1,86 @@
#define FRONTEND_IMPLEMENTATION
#include "../frontend.h"
#include "token.h"
#define ROUND_IDX(idx) ((idx) % tokbuf->cap)
tok_t* pop_tok(tok_buf_t* tokbuf) {
if (tokbuf->size == 0) {
error("no token to pop");
return NULL;
}
int idx = tokbuf->cur;
tokbuf->cur = ROUND_IDX(idx + 1);
tokbuf->size -= 1;
return tokbuf->buf + idx;
}
void flush_peek_tok(tok_buf_t* tokbuf) {
tokbuf->peek = tokbuf->cur;
}
void init_tokbuf(tok_buf_t *tokbuf, void *stream, get_tokbuf_func gettok) {
tokbuf->cur = 0;
tokbuf->end = 0;
tokbuf->peek = 0;
tokbuf->size = 0;
tokbuf->stream = stream;
tokbuf->gettok = gettok;
tokbuf->buf = NULL;
tokbuf->cap = 0;
}
tok_t *peek_tok(tok_buf_t *tokbuf)
{
int idx = tokbuf->peek;
idx = ROUND_IDX(idx + 1);
if (tokbuf->size >= tokbuf->cap) {
error("peek too deep, outof array size");
}
if (tokbuf->peek == tokbuf->end) {
if (tokbuf->size == tokbuf->cap) {
error("peek_tok buffer overflow");
}
if (tokbuf->gettok == NULL) {
error("peek_tok can not got tok");
}
tokbuf->gettok(tokbuf->stream, &(tokbuf->buf[idx]));
tokbuf->size++;
tokbuf->end = idx;
}
tokbuf->peek = idx;
return &(tokbuf->buf[idx]);
}
tok_type_t peek_tok_type(tok_buf_t* tokbuf) {
return peek_tok(tokbuf)->type;
}
int expect_pop_tok(tok_buf_t* tokbuf, tok_type_t type) {
flush_peek_tok(tokbuf);
tok_t* tok = peek_tok(tokbuf);
if (tok->type != type) {
error("expected tok: %s, got %s", get_tok_name(type), get_tok_name(tok->type));
} else {
pop_tok(tokbuf);
}
return 0;
}
// 生成字符串映射(根据需求选择#str或#name
static const char* token_strings[] = {
// 普通token使用#str
#define X(str, tok) [tok] = #str,
TOKEN_TABLE
#undef X
// 关键字使用#name
#define X(name, std, tok) [tok] = #name,
KEYWORD_TABLE
#undef X
};
const char* get_tok_name(tok_type_t type) {
return token_strings[type];
}

View File

@ -105,7 +105,7 @@ enum CSTD_KEYWORD {
// END
// 定义TokenType枚举
enum TokenType {
typedef enum tok_type {
// 处理普通token
#define X(str, tok) tok,
TOKEN_TABLE
@ -115,9 +115,9 @@ enum TokenType {
#define X(name, std, tok) tok,
KEYWORD_TABLE
#undef X
};
} tok_type_t;
struct TokenConstant {
typedef struct tok_val {
int have;
union {
char ch;
@ -127,124 +127,31 @@ struct TokenConstant {
long long ll;
char* str;
};
};
} tok_val_t;
// "break"
// "case"
// "char"
// "const"
// "continue"
// "default"
// "do"
// "double"
// "else"
// "enum"
// "extern"
// "float"
// "for"
// "goto"
// "if"
// "inline (C99)"
// "int"
// "long"
// "register"
// "restrict (C99)"
// "return"
// "short"
// "signed"
// "sizeof"
// "static"
// "struct"
// "switch"
// "typedef"
// "union"
// "unsigned"
// "void"
// "volatile"
// "while"
typedef struct tok {
tok_type_t type;
tok_val_t val;
} tok_t;
// alignas (C23)
// alignof (C23)
// auto
// bool (C23)
// constexpr (C23)
// false (C23)
// nullptr (C23)
// static_assert (C23)
// thread_local (C23)
// true (C23)
// typeof (C23)
// typeof_unqual (C23)
// _Alignas (C11)
// _Alignof (C11)
// _Atomic (C11)
// _BitInt (C23)
// _Bool (C99)
// _Complex (C99)
// _Decimal128 (C23)
// _Decimal32 (C23)
// _Decimal64 (C23)
// _Generic (C11)
// _Imaginary (C99)
// _Noreturn (C11)
// _Static_assert (C11)
// _Thread_local (C11)
typedef struct tok_buf {
int cur;
int end;
int peek;
int size;
int cap;
tok_t* buf;
void* stream;
void (*gettok)(void* stream, tok_t* token);
} tok_buf_t;
// a = b
// a += b
// a -= b
// a *= b
// a /= b
// a %= b
// a &= b
// a |= b
// a ^= b
// a <<= b
// a >>= b
// ++a
// --a
// a++
// a--
// +a
// -a
// a + b
// a - b
// a * b
// a / b
// a % b
// ~a
// a & b
// a | b
// a ^ b
// a << b
// a >> b
// !a
// a && b
// a || b
// a == b
// a != b
// a < b
// a > b
// a <= b
// a >= b
// a[b]
// *a
// &a
// a->b
// a.b
// a(...)
// a, b
// (type) a
// a ? b : c
// sizeof
// _Alignof
// (C11)
typedef void(*get_tokbuf_func)(void* stream, tok_t* token);
void init_tokbuf(tok_buf_t* tokbuf, void* stream, get_tokbuf_func gettok);
tok_t* peek_tok(tok_buf_t* tokbuf);
tok_t* pop_tok(tok_buf_t* tokbuf);
void flush_peek_tok(tok_buf_t* tokbuf);
tok_type_t peek_tok_type(tok_buf_t* tokbuf);
int expect_pop_tok(tok_buf_t* tokbuf, tok_type_t type);
const char* get_tok_name(tok_type_t type);
#endif

View File

@ -14,9 +14,9 @@ void init_ast_node(struct ASTNode* node) {
}
}
struct ASTNode* find_ast_node(struct ASTNode* node, enum ASTType type) {
// struct ASTNode* find_ast_node(struct ASTNode* node, ast_type_t type) {
}
// }
#include <stdio.h>
static void pnt_depth(int depth) {
@ -25,149 +25,149 @@ static void pnt_depth(int depth) {
}
}
void pnt_ast(struct ASTNode* node, int depth) {
if (!node) return;
pnt_depth(depth);
switch (node->type) {
case NT_ROOT:
for (int i = 0; i < node->root.child_size; i++) {
pnt_ast(node->root.children[i], depth);
}
return;
// void pnt_ast(struct ASTNode* node, int depth) {
// if (!node) return;
// pnt_depth(depth);
// switch (node->type) {
// case NT_ROOT:
// for (int i = 0; i < node->root.child_size; i++) {
// pnt_ast(node->root.children[i], depth);
// }
// return;
case NT_ADD : printf("+ \n"); break; // (expr) + (expr)
case NT_SUB : printf("- \n"); break; // (expr) - (expr)
case NT_MUL : printf("* \n"); break; // (expr) * (expr)
case NT_DIV : printf("/ \n"); break; // (expr) / (expr)
case NT_MOD : printf("%%\n"); break; // (expr) % (expr)
case NT_AND : printf("& \n"); break; // (expr) & (expr)
case NT_OR : printf("| \n"); break; // (expr) | (expr)
case NT_XOR : printf("^ \n"); break; // (expr) ^ (expr)
case NT_L_SH : printf("<<\n"); break; // (expr) << (expr)
case NT_R_SH : printf(">>\n"); break; // (expr) >> (expr)
case NT_EQ : printf("==\n"); break; // (expr) == (expr)
case NT_NEQ : printf("!=\n"); break; // (expr) != (expr)
case NT_LE : printf("<=\n"); break; // (expr) <= (expr)
case NT_GE : printf(">=\n"); break; // (expr) >= (expr)
case NT_LT : printf("< \n"); break; // (expr) < (expr)
case NT_GT : printf("> \n"); break; // (expr) > (expr)
case NT_AND_AND : printf("&&\n"); break; // (expr) && (expr)
case NT_OR_OR : printf("||\n"); break; // (expr) || (expr)
case NT_NOT : printf("! \n"); break; // ! (expr)
case NT_BIT_NOT : printf("~ \n"); break; // ~ (expr)
case NT_COMMA : printf(", \n"); break; // expr, expr 逗号运算符
case NT_ASSIGN : printf("= \n"); break; // (expr) = (expr)
// case NT_COND : // (expr) ? (expr) : (expr)
// case NT_ADD : printf("+ \n"); break; // (expr) + (expr)
// case NT_SUB : printf("- \n"); break; // (expr) - (expr)
// case NT_MUL : printf("* \n"); break; // (expr) * (expr)
// case NT_DIV : printf("/ \n"); break; // (expr) / (expr)
// case NT_MOD : printf("%%\n"); break; // (expr) % (expr)
// case NT_AND : printf("& \n"); break; // (expr) & (expr)
// case NT_OR : printf("| \n"); break; // (expr) | (expr)
// case NT_XOR : printf("^ \n"); break; // (expr) ^ (expr)
// case NT_L_SH : printf("<<\n"); break; // (expr) << (expr)
// case NT_R_SH : printf(">>\n"); break; // (expr) >> (expr)
// case NT_EQ : printf("==\n"); break; // (expr) == (expr)
// case NT_NEQ : printf("!=\n"); break; // (expr) != (expr)
// case NT_LE : printf("<=\n"); break; // (expr) <= (expr)
// case NT_GE : printf(">=\n"); break; // (expr) >= (expr)
// case NT_LT : printf("< \n"); break; // (expr) < (expr)
// case NT_GT : printf("> \n"); break; // (expr) > (expr)
// case NT_AND_AND : printf("&&\n"); break; // (expr) && (expr)
// case NT_OR_OR : printf("||\n"); break; // (expr) || (expr)
// case NT_NOT : printf("! \n"); break; // ! (expr)
// case NT_BIT_NOT : printf("~ \n"); break; // ~ (expr)
// case NT_COMMA : printf(", \n"); break; // expr, expr 逗号运算符
// case NT_ASSIGN : printf("= \n"); break; // (expr) = (expr)
// // case NT_COND : // (expr) ? (expr) : (expr)
case NT_STMT_EMPTY : // ;
printf(";\n");
break;
case NT_STMT_IF : // if (cond) { ... } [else {...}]
printf("if");
pnt_ast(node->if_stmt.cond, depth+1);
pnt_ast(node->if_stmt.if_stmt, depth+1);
if (node->if_stmt.else_stmt) {
pnt_depth(depth);
printf("else");
pnt_ast(node->if_stmt.else_stmt, depth+1);
}
break;
case NT_STMT_WHILE : // while (cond) { ... }
printf("while\n");
pnt_ast(node->while_stmt.cond, depth+1);
pnt_ast(node->while_stmt.body, depth+1);
break;
case NT_STMT_DOWHILE : // do {...} while (cond)
printf("do-while\n");
pnt_ast(node->do_while_stmt.body, depth+1);
pnt_ast(node->do_while_stmt.cond, depth+1);
break;
case NT_STMT_FOR : // for (init; cond; iter) {...}
printf("for\n");
if (node->for_stmt.init)
pnt_ast(node->for_stmt.init, depth+1);
if (node->for_stmt.cond)
pnt_ast(node->for_stmt.cond, depth+1);
if (node->for_stmt.iter)
pnt_ast(node->for_stmt.iter, depth+1);
pnt_ast(node->for_stmt.body, depth+1);
break;
case NT_STMT_SWITCH : // switch (expr) { case ... }
case NT_STMT_BREAK : // break;
case NT_STMT_CONTINUE : // continue;
case NT_STMT_GOTO : // goto label;
case NT_STMT_CASE : // case const_expr:
case NT_STMT_DEFAULT : // default:
case NT_STMT_LABEL : // label:
break;
case NT_STMT_BLOCK : // { ... }
printf("{\n");
for (int i = 0; i < node->block.child_size; i++) {
pnt_ast(node->block.children[i], depth+1);
}
pnt_depth(depth);
printf("}\n");
break;
case NT_STMT_RETURN : // return expr;
printf("return");
if (node->return_stmt.expr_stmt) {
printf(" ");
pnt_ast(node->return_stmt.expr_stmt, depth+1);
} else {
printf("\n");
}
break;
case NT_STMT_EXPR : // expr;
printf("stmt\n");
pnt_ast(node->expr_stmt.expr_stmt, depth);
pnt_depth(depth);
printf(";\n");
break;
case NT_DECL_VAR : // type name; or type name = expr;
printf("decl_val\n");
break;
case NT_DECL_FUNC: // type func_name(param_list);
printf("decl func %s\n", node->func.name->syms.tok.constant.str);
break;
case NT_FUNC : // type func_name(param_list) {...}
printf("def func %s\n", node->func.name->syms.tok.constant.str);
// pnt_ast(node->child.func.params, depth);
pnt_ast(node->func.body, depth);
// pnt_ast(node->child.func.ret, depth);
break;
case NT_PARAM : // 函数形参
printf("param\n");
case NT_ARG_LIST : // 实参列表需要与NT_CALL配合
printf("arg_list\n");
case NT_TERM_CALL : // func (expr)
printf("call\n");
break;
case NT_TERM_IDENT:
printf("%s\n", node->syms.tok.constant.str);
break;
case NT_TERM_VAL : // Terminal Symbols like constant, identifier, keyword
struct Token * tok = &node->syms.tok;
switch (tok->type) {
case TOKEN_CHAR_LITERAL:
printf("%c\n", tok->constant.ch);
break;
case TOKEN_INT_LITERAL:
printf("%d\n", tok->constant.i);
break;
case TOKEN_STRING_LITERAL:
printf("%s\n", tok->constant.str);
break;
default:
printf("unknown term val\n");
break;
}
default:
break;
}
// case NT_STMT_EMPTY : // ;
// printf(";\n");
// break;
// case NT_STMT_IF : // if (cond) { ... } [else {...}]
// printf("if");
// pnt_ast(node->if_stmt.cond, depth+1);
// pnt_ast(node->if_stmt.if_stmt, depth+1);
// if (node->if_stmt.else_stmt) {
// pnt_depth(depth);
// printf("else");
// pnt_ast(node->if_stmt.else_stmt, depth+1);
// }
// break;
// case NT_STMT_WHILE : // while (cond) { ... }
// printf("while\n");
// pnt_ast(node->while_stmt.cond, depth+1);
// pnt_ast(node->while_stmt.body, depth+1);
// break;
// case NT_STMT_DOWHILE : // do {...} while (cond)
// printf("do-while\n");
// pnt_ast(node->do_while_stmt.body, depth+1);
// pnt_ast(node->do_while_stmt.cond, depth+1);
// break;
// case NT_STMT_FOR : // for (init; cond; iter) {...}
// printf("for\n");
// if (node->for_stmt.init)
// pnt_ast(node->for_stmt.init, depth+1);
// if (node->for_stmt.cond)
// pnt_ast(node->for_stmt.cond, depth+1);
// if (node->for_stmt.iter)
// pnt_ast(node->for_stmt.iter, depth+1);
// pnt_ast(node->for_stmt.body, depth+1);
// break;
// case NT_STMT_SWITCH : // switch (expr) { case ... }
// case NT_STMT_BREAK : // break;
// case NT_STMT_CONTINUE : // continue;
// case NT_STMT_GOTO : // goto label;
// case NT_STMT_CASE : // case const_expr:
// case NT_STMT_DEFAULT : // default:
// case NT_STMT_LABEL : // label:
// break;
// case NT_STMT_BLOCK : // { ... }
// printf("{\n");
// for (int i = 0; i < node->block.child_size; i++) {
// pnt_ast(node->block.children[i], depth+1);
// }
// pnt_depth(depth);
// printf("}\n");
// break;
// case NT_STMT_RETURN : // return expr;
// printf("return");
// if (node->return_stmt.expr_stmt) {
// printf(" ");
// pnt_ast(node->return_stmt.expr_stmt, depth+1);
// } else {
// printf("\n");
// }
// break;
// case NT_STMT_EXPR : // expr;
// printf("stmt\n");
// pnt_ast(node->expr_stmt.expr_stmt, depth);
// pnt_depth(depth);
// printf(";\n");
// break;
// case NT_DECL_VAR : // type name; or type name = expr;
// printf("decl_val\n");
// break;
// case NT_DECL_FUNC: // type func_name(param_list);
// printf("decl func %s\n", node->func.name->syms.tok.val.str);
// break;
// case NT_FUNC : // type func_name(param_list) {...}
// printf("def func %s\n", node->func.name->syms.tok.val.str);
// // pnt_ast(node->child.func.params, depth);
// pnt_ast(node->func.body, depth);
// // pnt_ast(node->child.func.ret, depth);
// break;
// case NT_PARAM : // 函数形参
// printf("param\n");
// case NT_ARG_LIST : // 实参列表需要与NT_CALL配合
// printf("arg_list\n");
// case NT_TERM_CALL : // func (expr)
// printf("call\n");
// break;
// case NT_TERM_IDENT:
// printf("%s\n", node->syms.tok.val.str);
// break;
// case NT_TERM_VAL : // Terminal Symbols like constant, identifier, keyword
// tok_t * tok = &node->syms.tok;
// switch (tok->type) {
// case TOKEN_CHAR_LITERAL:
// printf("%c\n", tok->val.ch);
// break;
// case TOKEN_INT_LITERAL:
// printf("%d\n", tok->val.i);
// break;
// case TOKEN_STRING_LITERAL:
// printf("%s\n", tok->val.str);
// break;
// default:
// printf("unknown term val\n");
// break;
// }
// default:
// break;
// }
// 通用子节点递归处理
if (node->type <= NT_ASSIGN) { // 表达式类统一处理子节点
if (node->expr.left) pnt_ast(node->expr.left, depth+1);
if (node->expr.right) pnt_ast(node->expr.right, depth + 1);
}
}
// // 通用子节点递归处理
// if (node->type <= NT_ASSIGN) { // 表达式类统一处理子节点
// if (node->expr.left) pnt_ast(node->expr.left, depth+1);
// if (node->expr.right) pnt_ast(node->expr.right, depth + 1);
// }
// }

View File

@ -3,9 +3,10 @@
#include "../../frontend.h"
#include "../../lexer/lexer.h"
#include "../../../../libcore/vector.h"
#include "../type.h"
enum ASTType {
typedef enum {
NT_INIT,
NT_ROOT, // global scope in root node
NT_ADD, // (expr) + (expr)
@ -75,31 +76,28 @@ enum ASTType {
NT_TERM_VAL,
NT_TERM_IDENT,
NT_TERM_TYPE,
};
} ast_type_t;
struct ASTNode {
enum ASTType type;
typedef struct ASTNode {
ast_type_t type;
union {
void *children[6];
struct {
struct ASTNode** children;
int child_size;
vector_header(children, struct ASTNode*);
} root;
struct {
struct ASTNode** children; // array of children
int child_size;
vector_header(children, struct ASTNode*);
} block;
struct {
struct ASTNode* decl_node;
struct Token tok;
tok_t tok;
} syms;
struct {
struct ASTNode *arr;
int size;
vector_header(params, struct ASTNode*);
} params;
struct {
const char* name;
struct ASTNode* name;
struct ASTNode* params;
struct ASTNode* func_decl;
} call;
@ -113,13 +111,12 @@ struct ASTNode {
struct ASTNode *ret;
struct ASTNode *name;
struct ASTNode *params; // array of params
void* data;
} func_decl;
struct ASTNode *def;
} decl_func;
struct {
struct ASTNode *ret;
struct ASTNode *name;
struct ASTNode *params; // array of params
struct ASTNode *decl;
struct ASTNode *body; // optional
void* data;
} func;
struct {
struct ASTNode *left;
@ -165,27 +162,26 @@ struct ASTNode {
struct ASTNode *expr_stmt;
} expr_stmt;
};
};
} ast_node_t;
struct ASTNode* new_ast_node(void);
void init_ast_node(struct ASTNode* node);
void pnt_ast(struct ASTNode* node, int depth);
struct Parser;
typedef struct ASTNode* (*parse_func_t) (struct Parser*);
typedef struct parser parser_t;
typedef struct ASTNode* (*parse_func_t) (parser_t*);
void parse_prog(struct Parser* parser);
struct ASTNode* parse_block(struct Parser* parser);
struct ASTNode* parse_stmt(struct Parser* parser);
struct ASTNode* parse_expr(struct Parser* parser);
struct ASTNode* parse_func(struct Parser* parser);
struct ASTNode* parse_decl(struct Parser* parser);
void parse_prog(parser_t* parser);
ast_node_t* parse_decl(parser_t* parser);
ast_node_t* parse_block(parser_t* parser);
ast_node_t* parse_stmt(parser_t* parser);
ast_node_t* parse_expr(parser_t* parser);
struct ASTNode* parse_ident(struct Parser* parser);
struct ASTNode* parse_type(struct Parser* parser);
ast_node_t* parse_type(parser_t* parser);
int peek_decl(struct Parser* parser);
ast_node_t* new_ast_ident_node(tok_t* tok);
ast_node_t* expect_pop_ident(tok_buf_t* tokbuf);
struct ASTNode* parser_ident_without_pop(struct Parser* parser);
int peek_decl(tok_buf_t* tokbuf);
#endif

View File

@ -1,48 +1,49 @@
#include "../parser.h"
#include "ast.h"
#include "../parser.h"
#include "../symtab/symtab.h"
#ifndef BLOCK_MAX_NODE
#define BLOCK_MAX_NODE (1024)
#endif
struct ASTNode* parse_block(struct Parser* parser) {
symtab_enter_scope(parser->symtab);
// parse_decl(parser); // decl_var
enum TokenType ttype;
struct ASTNode* node = new_ast_node();
ast_node_t* new_ast_node_block() {
ast_node_t* node = new_ast_node();
node->type = NT_BLOCK;
flushpeektok(parser);
ttype = peektoktype(parser);
if (ttype != TOKEN_L_BRACE) {
error("block need '{' start");
}
poptok(parser);
vector_init(node->block.children);
return node;
}
node->block.children = malloc(sizeof(struct ASTNode*) * BLOCK_MAX_NODE);
struct ASTNode* child = NULL;
ast_node_t* parse_block(parser_t* parser) {
symtab_enter_scope(parser->symtab);
tok_buf_t *tokbuf = &parser->tokbuf;
flush_peek_tok(tokbuf);
tok_type_t ttype;
ast_node_t* node = new_ast_node_block();
expect_pop_tok(tokbuf, TOKEN_L_BRACE);
ast_node_t* child = NULL;
while (1) {
if (peek_decl(parser) == 1) {
if (peek_decl(tokbuf)) {
child = parse_decl(parser);
goto ADD_CHILD;
vector_push(node->block.children, child);
continue;
}
flushpeektok(parser);
ttype = peektoktype(parser);
flush_peek_tok(tokbuf);
ttype = peek_tok_type(tokbuf);
switch (ttype) {
case TOKEN_R_BRACE:
poptok(parser);
goto END;
default:
child = parse_stmt(parser);
goto ADD_CHILD;
break;
case TOKEN_R_BRACE: {
pop_tok(tokbuf);
goto END;
}
default: {
child = parse_stmt(parser);
vector_push(node->block.children, child);
break;
}
}
continue;
ADD_CHILD:
node->block.children[node->block.child_size++] = child;
}
END:
symtab_leave_scope(parser->symtab);

View File

@ -6,9 +6,9 @@
* 0 false
* 1 true
*/
int peek_decl(struct Parser* parser) {
flushpeektok(parser);
switch (peektoktype(parser)) {
int peek_decl(tok_buf_t* tokbuf) {
flush_peek_tok(tokbuf);
switch (peek_tok_type(tokbuf)) {
case TOKEN_STATIC:
case TOKEN_EXTERN:
case TOKEN_REGISTER:
@ -16,10 +16,10 @@ int peek_decl(struct Parser* parser) {
error("not impliment");
break;
default:
flushpeektok(parser);
flush_peek_tok(tokbuf);
}
switch (peektoktype(parser)) {
switch (peek_tok_type(tokbuf)) {
case TOKEN_VOID:
case TOKEN_CHAR:
case TOKEN_SHORT:
@ -27,60 +27,62 @@ int peek_decl(struct Parser* parser) {
case TOKEN_LONG:
case TOKEN_FLOAT:
case TOKEN_DOUBLE:
// FIXME Ptr
return 1;
default:
flushpeektok(parser);
flush_peek_tok(tokbuf);
}
return 0;
}
struct ASTNode* parse_decl_val(struct Parser* parser) {
flushpeektok(parser);
// parse_type
enum TokenType ttype;
struct ASTNode* node;
ast_node_t* parse_decl_val(parser_t* parser) {
tok_buf_t* tokbuf = &parser->tokbuf;
tok_type_t ttype;
flush_peek_tok(tokbuf);
struct ASTNode* type_node = parse_type(parser);
struct ASTNode* name_node = parser_ident_without_pop(parser);
ast_node_t* node;
ast_node_t* type_node = parse_type(parser);
flush_peek_tok(tokbuf);
ast_node_t* name_node = new_ast_ident_node(peek_tok(tokbuf));
node = new_ast_node();
node->decl_val.type = type_node;
node->decl_val.name = name_node;
node->type = NT_DECL_VAR;
symtab_add_symbol(parser->symtab, name_node->syms.tok.constant.str, node);
symtab_add_symbol(parser->symtab, name_node->syms.tok.val.str, node, 0);
ttype = peektoktype(parser);
ttype = peek_tok_type(tokbuf);
if (ttype == TOKEN_ASSIGN) {
node->decl_val.expr_stmt = parse_stmt(parser);
if (node->decl_val.expr_stmt->type != NT_STMT_EXPR) {
error("parser_decl_val want stmt_expr");
}
} else if (ttype == TOKEN_SEMICOLON) {
poptok(parser);
expecttok(parser, TOKEN_SEMICOLON);
pop_tok(tokbuf);
expect_pop_tok(tokbuf, TOKEN_SEMICOLON);
} else {
error("parser_decl_val syntax error");
}
return node;
}
// 类型解析入口改进
struct ASTNode* parse_decl(struct Parser* parser) {
flushpeektok(parser);
int idx;
enum TokenType ttype;
struct ASTNode* node;
ast_node_t* parse_decl(parser_t* parser) {
tok_buf_t* tokbuf = &parser->tokbuf;
flush_peek_tok(tokbuf);
tok_type_t ttype;
ast_node_t* node;
if (peek_decl(parser) == 0) {
if (peek_decl(tokbuf) == 0) {
error("syntax error expect decl_val TYPE");
}
if (peektoktype(parser) != TOKEN_IDENT) {
if (peek_tok_type(tokbuf) != TOKEN_IDENT) {
error("syntax error expect decl_val IDENT");
}
ttype = peektoktype(parser);
ttype = peek_tok_type(tokbuf);
switch (ttype) {
case TOKEN_L_PAREN: // (
node = parse_func(parser);
return NULL;
break;
case TOKEN_ASSIGN:
case TOKEN_SEMICOLON:

View File

@ -33,14 +33,18 @@ enum ParseType {
PREFIX_PARSER,
};
static struct ASTNode *parse_subexpression(struct Parser* parser, enum Precedence prec);
static ast_node_t *parse_subexpression(tok_buf_t* tokbuf, symtab_t *symtab, enum Precedence prec);
#define NEXT(prec) parse_subexpression(tokbuf, symtab, prec)
static struct ASTNode* gen_node2(struct ASTNode* left, struct ASTNode* right,
enum ASTType type) {
struct ASTNode* node = new_ast_node();
static ast_node_t* gen_node2(ast_node_t* left, ast_node_t* right,
ast_type_t type) {
ast_node_t* node = new_ast_node();
node->type = type;
node->expr.left = left;
node->expr.right = right;
return node;
// FIXME
// switch (type) {
// case NT_ADD : printf("+ \n"); break; // (expr) + (expr)
// case NT_SUB : printf("- \n"); break; // (expr) - (expr)
@ -68,154 +72,157 @@ static struct ASTNode* gen_node2(struct ASTNode* left, struct ASTNode* right,
// }
}
static struct ASTNode* parse_comma(struct Parser* parser, struct ASTNode* left) {
struct ASTNode* node = new_ast_node();
static ast_node_t* parse_comma(tok_buf_t* tokbuf, symtab_t *symtab, ast_node_t* left) {
ast_node_t* node = new_ast_node();
node->type = NT_COMMA;
node->expr.left = left;
node->expr.right = parse_subexpression(parser, PREC_EXPRESSION);
node->expr.right = NEXT(PREC_EXPRESSION);
return node;
}
static struct ASTNode* parse_assign(struct Parser* parser, struct ASTNode* left) {
flushpeektok(parser);
enum TokenType ttype = peektoktype(parser);
poptok(parser);
struct ASTNode* node = new_ast_node();
static ast_node_t* parse_assign(tok_buf_t* tokbuf, symtab_t *symtab, ast_node_t* left) {
flush_peek_tok(tokbuf);
tok_type_t ttype = peek_tok_type(tokbuf);
pop_tok(tokbuf);
ast_node_t* node = new_ast_node();
node->type = NT_ASSIGN;
// saved left
node->expr.left = left;
enum Precedence next = PREC_ASSIGNMENT + 1;
switch (ttype) {
case TOKEN_ASSIGN :
left = parse_subexpression(parser, next);
left = NEXT(next);
break;
case TOKEN_ASSIGN_ADD :
left = gen_node2(left, parse_subexpression(parser, next), NT_ADD);
left = gen_node2(left, NEXT(next), NT_ADD);
break;
case TOKEN_ASSIGN_SUB :
left = gen_node2(left, parse_subexpression(parser, next), NT_SUB);
left = gen_node2(left, NEXT(next), NT_SUB);
break;
case TOKEN_ASSIGN_MUL :
left = gen_node2(left, parse_subexpression(parser, next), NT_MUL);
left = gen_node2(left, NEXT(next), NT_MUL);
break;
case TOKEN_ASSIGN_DIV :
left = gen_node2(left, parse_subexpression(parser, next), NT_DIV);
left = gen_node2(left, NEXT(next), NT_DIV);
break;
case TOKEN_ASSIGN_MOD :
left = gen_node2(left, parse_subexpression(parser, next), NT_MOD);
left = gen_node2(left, NEXT(next), NT_MOD);
break;
case TOKEN_ASSIGN_L_SH :
left = gen_node2(left, parse_subexpression(parser, next), NT_L_SH);
left = gen_node2(left, NEXT(next), NT_L_SH);
break;
case TOKEN_ASSIGN_R_SH :
left = gen_node2(left, parse_subexpression(parser, next), NT_R_SH);
left = gen_node2(left, NEXT(next), NT_R_SH);
break;
case TOKEN_ASSIGN_AND :
left = gen_node2(left, parse_subexpression(parser, next), NT_AND);
left = gen_node2(left, NEXT(next), NT_AND);
break;
case TOKEN_ASSIGN_OR :
left = gen_node2(left, parse_subexpression(parser, next), NT_OR);
left = gen_node2(left, NEXT(next), NT_OR);
break;
case TOKEN_ASSIGN_XOR :
left = gen_node2(left, parse_subexpression(parser, next), NT_XOR);
left = gen_node2(left, NEXT(next), NT_XOR);
break;
default:
error("unsupported operator");
break;
}
node->expr.right = left;
return node;
}
static struct ASTNode* parse_cmp(struct Parser* parser, struct ASTNode* left) {
flushpeektok(parser);
enum TokenType ttype = peektoktype(parser);
poptok(parser);
struct ASTNode* node = new_ast_node();
static ast_node_t* parse_cmp(tok_buf_t* tokbuf, symtab_t *symtab, ast_node_t* left) {
flush_peek_tok(tokbuf);
tok_type_t ttype = peek_tok_type(tokbuf);
pop_tok(tokbuf);
ast_node_t* node = new_ast_node();
// saved left
node->expr.left = left;
switch (ttype) {
case TOKEN_EQ:
node->type = NT_EQ;
node->expr.right = parse_subexpression(parser, PREC_EQUALITY);
node->expr.right = NEXT(PREC_EQUALITY);
break;
case TOKEN_NEQ:
node->type = NT_NEQ;
node->expr.right = parse_subexpression(parser, PREC_EQUALITY);
node->expr.right = NEXT(PREC_EQUALITY);
break;
case TOKEN_LT:
node->type = NT_LT;
node->expr.right = parse_subexpression(parser, PREC_RELATIONAL);
node->expr.right = NEXT(PREC_RELATIONAL);
break;
case TOKEN_GT:
node->type = NT_GT;
node->expr.right = parse_subexpression(parser, PREC_RELATIONAL);
node->expr.right = NEXT(PREC_RELATIONAL);
break;
case TOKEN_LE:
node->type = NT_LE;
node->expr.right = parse_subexpression(parser, PREC_RELATIONAL);
node->expr.right = NEXT(PREC_RELATIONAL);
break;
case TOKEN_GE:
node->type = NT_GE;
node->expr.right = parse_subexpression(parser, PREC_RELATIONAL);
node->expr.right = NEXT(PREC_RELATIONAL);
break;
default:
error("invalid operator");
}
return node;
}
static struct ASTNode* parse_cal(struct Parser* parser, struct ASTNode* left) {
flushpeektok(parser);
enum TokenType ttype = peektoktype(parser);
poptok(parser);
struct ASTNode* node = new_ast_node();
static ast_node_t* parse_cal(tok_buf_t* tokbuf, symtab_t *symtab, ast_node_t* left) {
flush_peek_tok(tokbuf);
tok_type_t ttype = peek_tok_type(tokbuf);
pop_tok(tokbuf);
ast_node_t* node = new_ast_node();
node->expr.left = left;
switch (ttype) {
case TOKEN_OR_OR:
node->type = NT_OR_OR;
node->expr.right = parse_subexpression(parser, PREC_LOGICAL_OR);
node->expr.right = NEXT(PREC_LOGICAL_OR);
break;
case TOKEN_AND_AND:
node->type = NT_AND_AND;
node->expr.right = parse_subexpression(parser, PREC_LOGICAL_AND);
node->expr.right = NEXT(PREC_LOGICAL_AND);
break;
case TOKEN_OR:
node->type = NT_OR;
node->expr.right = parse_subexpression(parser, PREC_OR);
node->expr.right = NEXT(PREC_OR);
break;
case TOKEN_XOR:
node->type = NT_XOR;
node->expr.right = parse_subexpression(parser, PREC_XOR);
node->expr.right = NEXT(PREC_XOR);
break;
case TOKEN_AND:
node->type = NT_AND;
node->expr.right = parse_subexpression(parser, PREC_AND);
node->expr.right = NEXT(PREC_AND);
break;
case TOKEN_L_SH:
node->type = NT_L_SH;
node->expr.right = parse_subexpression(parser, PREC_SHIFT);
node->expr.right = NEXT(PREC_SHIFT);
break;
case TOKEN_R_SH:
node->type = NT_R_SH;
node->expr.right = parse_subexpression(parser, PREC_SHIFT);
node->expr.right = NEXT(PREC_SHIFT);
break;
case TOKEN_ADD:
node->type = NT_ADD;
node->expr.right = parse_subexpression(parser, PREC_ADDITIVE);
node->expr.right = NEXT(PREC_ADDITIVE);
break;
case TOKEN_SUB:
node->type = NT_SUB;
node->expr.right = parse_subexpression(parser, PREC_ADDITIVE);
node->expr.right = NEXT(PREC_ADDITIVE);
break;
case TOKEN_MUL:
node->type = NT_MUL;
node->expr.right = parse_subexpression(parser, PREC_MULTIPLICATIVE);
node->expr.right = NEXT(PREC_MULTIPLICATIVE);
break;
case TOKEN_DIV:
node->type = NT_DIV;
node->expr.right = parse_subexpression(parser, PREC_MULTIPLICATIVE);
node->expr.right = NEXT(PREC_MULTIPLICATIVE);
break;
case TOKEN_MOD:
node->type = NT_MOD;
node->expr.right = parse_subexpression(parser, PREC_MULTIPLICATIVE);
node->expr.right = NEXT(PREC_MULTIPLICATIVE);
break;
default:
break;
@ -223,44 +230,50 @@ static struct ASTNode* parse_cal(struct Parser* parser, struct ASTNode* left) {
return node;
}
// 新增函数调用解析
static struct ASTNode* parse_call(struct Parser* parser, struct ASTNode* ident) {
struct ASTNode* node = new_ast_node();
static ast_node_t* parse_call(tok_buf_t* tokbuf, symtab_t *symtab, ast_node_t* ident) {
ast_node_t* node = new_ast_node();
node->type = NT_TERM_CALL;
poptok(parser); // 跳过 '('
node->call.name = ident;
node->call.params = new_ast_node();
vector_init(node->call.params->params.params);
pop_tok(tokbuf); // 跳过 '('
enum TokenType ttype;
// 解析参数列表
while ((ttype = peektoktype(parser)) != TOKEN_R_PAREN) {
// add_arg(node, parse_expr(parser));
if (ttype == TOKEN_COMMA) poptok(parser);
else poptok(parser);
tok_type_t ttype;
while (1) {
flush_peek_tok(tokbuf);
ttype = peek_tok_type(tokbuf);
if (ttype == TOKEN_R_PAREN) {
break;
}
ast_node_t* param = NEXT(PREC_EXPRESSION);
vector_push(node->call.params->params.params, param);
flush_peek_tok(tokbuf);
ttype = peek_tok_type(tokbuf);
if (ttype == TOKEN_COMMA) pop_tok(tokbuf);
}
poptok(parser); // 跳过 ')'
pop_tok(tokbuf); // 跳过 ')'
char* name = ident->syms.tok.constant.str;
void* sym = symtab_lookup_symbol(parser->symtab, name);
if (sym == NULL) {
const char* name = ident->syms.tok.val.str;
ast_node_t* sym = symtab_lookup_symbol(symtab, name);
// TODO check func is match
if (sym == NULL || sym->type != NT_DECL_FUNC) {
error("function not decl %s", name);
}
node->call.name = name;
node->call.params = NULL;
node->call.name = ident;
node->call.func_decl = sym;
return node;
}
static struct ASTNode* parse_paren(struct Parser* parser, struct ASTNode* left) {
flushpeektok(parser);
enum TokenType ttype;
expecttok(parser, TOKEN_L_PAREN);
left = parse_subexpression(parser, PREC_EXPRESSION);
flushpeektok(parser);
expecttok(parser, TOKEN_R_PAREN);
static ast_node_t* parse_paren(tok_buf_t* tokbuf, symtab_t *symtab, ast_node_t* left) {
flush_peek_tok(tokbuf);
expect_pop_tok(tokbuf, TOKEN_L_PAREN);
left = NEXT(PREC_EXPRESSION);
flush_peek_tok(tokbuf);
expect_pop_tok(tokbuf, TOKEN_R_PAREN);
return left;
}
typedef struct ASTNode* (*parse_expr_fun_t)(struct Parser*, struct ASTNode*);
typedef ast_node_t* (*parse_expr_fun_t)(tok_buf_t*, symtab_t* , ast_node_t*);
static struct expr_prec_table_t {
parse_expr_fun_t parser;
enum Precedence prec;
@ -309,11 +322,11 @@ static struct expr_prec_table_t {
[TOKEN_L_PAREN] = {parse_paren, PREC_POSTFIX, INFIX_PARSER},
};
static struct ASTNode *parse_primary_expression(struct Parser* parser) {
flushpeektok(parser);
static ast_node_t *parse_primary_expression(tok_buf_t* tokbuf, symtab_t *symtab) {
flush_peek_tok(tokbuf);
struct Token* tok = peektok(parser);
struct ASTNode *node = new_ast_node();
tok_t* tok = peek_tok(tokbuf);
ast_node_t *node = new_ast_node();
node->type = NT_TERM_VAL;
node->syms.tok = *tok;
@ -330,34 +343,35 @@ static struct ASTNode *parse_primary_expression(struct Parser* parser) {
case TOKEN_STRING_LITERAL:
// node->data.data_type = TYPE_POINTER;
case TOKEN_IDENT:
node = parse_ident(parser);
if (peektoktype(parser) == TOKEN_L_PAREN) {
node = parse_call(parser, node);
node = expect_pop_ident(tokbuf);
tok_type_t ttype = peek_tok_type(tokbuf);
if (ttype == TOKEN_L_PAREN) {
node = parse_call(tokbuf, symtab, node);
} else {
void *sym = symtab_lookup_symbol(parser->symtab, tok->constant.str);
void *sym = symtab_lookup_symbol(symtab, tok->val.str);
if (sym == NULL) {
error("undefined symbol but use %s", tok->constant.str);
error("undefined symbol but use %s", tok->val.str);
}
node->type = NT_TERM_IDENT;
node->syms.decl_node = sym;
goto END;
}
goto END;
default:
return NULL;
}
poptok(parser);
pop_tok(tokbuf);
END:
return node;
}
static struct ASTNode *parse_subexpression(struct Parser* parser, enum Precedence prec) {
enum TokenType ttype;
struct expr_prec_table_t* work;
struct ASTNode* left;
static ast_node_t *parse_subexpression(tok_buf_t* tokbuf, symtab_t *symtab, enum Precedence prec) {
tok_type_t ttype;
struct expr_prec_table_t* work;
ast_node_t* left;
while (1) {
flushpeektok(parser);
ttype = peektoktype(parser);
flush_peek_tok(tokbuf);
ttype = peek_tok_type(tokbuf);
work = &expr_table[ttype];
// FIXME
if (ttype == TOKEN_SEMICOLON || ttype == TOKEN_R_PAREN) {
@ -365,16 +379,16 @@ static struct ASTNode *parse_subexpression(struct Parser* parser, enum Precedenc
}
if (work == NULL || work->parser == NULL || work->ptype == PREFIX_PARSER) {
if (work->parser != NULL) {
left = work->parser(parser, NULL);
left = work->parser(tokbuf, symtab, NULL);
} else {
left = parse_primary_expression(parser);
left = parse_primary_expression(tokbuf, symtab);
}
} else if (work->ptype == INFIX_PARSER) {
if (work->parser == NULL)
break;
if (work->prec <= prec)
break;
left = work->parser(parser, left);
left = work->parser(tokbuf, symtab, left);
}
// assert(left != NULL);
}
@ -382,9 +396,11 @@ static struct ASTNode *parse_subexpression(struct Parser* parser, enum Precedenc
return left;
}
struct ASTNode* parse_expr(struct Parser* parser) {
flushpeektok(parser);
enum TokenType ttype = peektoktype(parser);
ast_node_t* parse_expr(parser_t* parser) {
tok_buf_t* tokbuf = &(parser->tokbuf);
symtab_t *symtab = parser->symtab;
flush_peek_tok(tokbuf);
tok_type_t ttype = peek_tok_type(tokbuf);
switch (ttype) {
case TOKEN_NOT:
case TOKEN_AND:
@ -401,9 +417,9 @@ struct ASTNode* parse_expr(struct Parser* parser) {
case TOKEN_SUB_SUB:
case TOKEN_SIZEOF:
case TOKEN_IDENT:
return parse_subexpression(parser, PREC_EXPRESSION);
return NEXT(PREC_EXPRESSION);
default:
error("Want expr but not got %s", get_token_name(ttype));
error("Want expr but not got %s", get_tok_name(ttype));
break;
}
}

View File

@ -6,34 +6,21 @@
#define FUNC_PARAM_CACHE_SIZE 32 // 合理初始值可覆盖99%常见情况
#endif
struct FuncParamCache {
struct Token tokens[FUNC_PARAM_CACHE_SIZE];
int read_pos; // 当前读取位置
int write_pos; // 写入位置
int depth; // 当前缓存深度
};
static enum TokenType peekcachetype(struct FuncParamCache* cache) {
return cache->tokens[cache->read_pos++].type;
}
// TODO 语义分析压入符号表
static void parse_params(struct Parser* parser, struct FuncParamCache* cache, struct ASTNode* node) {
// = peekcachetype(cache);
enum TokenType ttype;
// if (ttype != TOKEN_L_PAREN) {
// error("function expected '('\n");
// }
struct ASTNode *params = new_ast_node();
node->func.params = params;
int params_size = 0;
static void parse_params(parser_t* parser, tok_buf_t* cache, ast_node_t* node) {
tok_type_t ttype;
ast_node_t *params = new_ast_node();
node->decl_func.params = params;
vector_init(params->params.params);
while ((ttype = peekcachetype(cache)) != TOKEN_R_PAREN) {
int depth = 1;
while (depth) {
ttype = peek_tok_type(cache);
switch (ttype) {
case TOKEN_COMMA:
break;
case TOKEN_ELLIPSIS:
ttype = peekcachetype(cache);
ttype = peek_tok_type(cache);
if (ttype != TOKEN_R_PAREN) {
error("... must be a last parameter list (expect ')')");
}
@ -41,9 +28,29 @@ static void parse_params(struct Parser* parser, struct FuncParamCache* cache, st
error("not implement");
break;
case TOKEN_IDENT:
params->children[params_size++] = NULL;
// TODO 静态数组
flush_peek_tok(cache);
ast_node_t* id_node = new_ast_ident_node(peek_tok(cache));
ast_node_t* node = new_ast_node();
node->type = NT_DECL_VAR;
node->decl_val.name = id_node;
// TODO typing sys
node->decl_val.type = NULL;
node->decl_val.expr_stmt = NULL;
node->decl_val.data = NULL;
vector_push(params->params.params, node);
symtab_add_symbol(parser->symtab, id_node->syms.tok.val.str, node, 0);
break;
case TOKEN_L_PAREN: {
depth++;
break;
}
case TOKEN_R_PAREN: {
depth--;
break;
}
default:
break;
// TODO 使用cache的类型解析
// parse_type(parser);
// TODO type parse
@ -51,39 +58,42 @@ static void parse_params(struct Parser* parser, struct FuncParamCache* cache, st
// ttype = peekcachetype(cache);
// if (ttype != TOKEN_IDENT) {
// node->node_type = NT_DECL_FUNC;
// flushpeektok(parser);
// flush_peek_tok(tokbuf);
// continue;
// }
// error("function expected ')' or ','\n");
}
pop_tok(cache);
}
}
enum ASTType check_is_func_decl(struct Parser* parser, struct FuncParamCache* cache) {
cache->depth = 1;
cache->read_pos = 0;
cache->write_pos = 0;
ast_type_t check_is_func_decl(tok_buf_t* tokbuf, tok_buf_t* cache) {
expect_pop_tok(tokbuf, TOKEN_L_PAREN);
int depth = 1;
while (cache->depth) {
struct Token* tok = peektok(parser);
poptok(parser);
if (cache->write_pos >= FUNC_PARAM_CACHE_SIZE - 1) {
while (depth) {
tok_t* tok = peek_tok(tokbuf);
pop_tok(tokbuf);
if (cache->size >= cache->cap - 1) {
error("function parameter list too long");
}
cache->tokens[cache->write_pos++] = *tok;
cache->buf[cache->size++] = *tok;
switch (tok->type) {
case TOKEN_L_PAREN:
cache->depth++;
depth++;
break;
case TOKEN_R_PAREN:
cache->depth--;
depth--;
break;
default:
break;
}
}
cache->end = cache->size;
switch (peektoktype(parser)) {
switch (peek_tok_type(tokbuf)) {
case TOKEN_SEMICOLON:
poptok(parser);
pop_tok(tokbuf);
return NT_DECL_FUNC;
case TOKEN_L_BRACE:
return NT_FUNC;
@ -93,28 +103,66 @@ enum ASTType check_is_func_decl(struct Parser* parser, struct FuncParamCache* ca
}
}
struct ASTNode* parse_func(struct Parser* parser) {
struct ASTNode* ret_type = parse_type(parser);
struct ASTNode* func_name = parse_ident(parser);
static ast_node_t* new_ast_node_funcdecl(ast_node_t* ret, ast_node_t* name) {
ast_node_t* node = new_ast_node();
node->type = NT_DECL_FUNC;
node->decl_func.ret = ret;
node->decl_func.name = name;
node->decl_func.def = NULL;
return node;
}
struct ASTNode* node = new_ast_node();
node->func.ret = ret_type;
node->func.name = func_name;
void parse_func(parser_t* parser) {
tok_buf_t* tokbuf = &(parser->tokbuf);
flush_peek_tok(tokbuf);
ast_node_t* ret_node = parse_type(parser);
ast_node_t* name_node = expect_pop_ident(tokbuf);
const char* func_name = name_node->syms.tok.val.str;
ast_node_t* decl = new_ast_node_funcdecl(ret_node, name_node);
flushpeektok(parser);
expecttok(parser, TOKEN_L_PAREN);
struct FuncParamCache cache;
node->type = check_is_func_decl(parser, &cache);
tok_buf_t cache;
init_tokbuf(&cache, NULL, NULL);
cache.cap = FUNC_PARAM_CACHE_SIZE;
tok_t buf[FUNC_PARAM_CACHE_SIZE];
cache.buf = buf;
ast_type_t type = check_is_func_decl(&(parser->tokbuf), &cache);
symtab_add_symbol(parser->symtab, func_name->syms.tok.constant.str, node);
if (node->type == NT_DECL_FUNC) {
return node;
ast_node_t* prev = symtab_add_symbol(parser->symtab, func_name, decl, 1);
if (prev != NULL) {
if (prev->type != NT_DECL_FUNC) {
error("the symbol duplicate old is %d, new is func", prev->type);
}
// TODO check redeclare func is match
if (type == NT_FUNC) {
// TODO Free decl;
free(decl);
decl = prev;
goto FUNC;
}
return;
}
vector_push(parser->root->root.children, decl);
if (type == NT_DECL_FUNC) {
return;
}
FUNC:
// 该data临时用于判断是否重复定义
if (decl->decl_func.def != NULL) {
error("redefinition of function %s", func_name);
}
ast_node_t* node = new_ast_node();
node->type = NT_FUNC;
node->func.decl = decl;
node->func.data = NULL;
decl->decl_func.def = node;
symtab_enter_scope(parser->symtab);
parse_params(parser, &cache, node);
parse_params(parser, &cache, decl);
node->func.body = parse_block(parser);
symtab_leave_scope(parser->symtab);
return node;
vector_push(parser->root->root.children, node);
}

View File

@ -5,25 +5,30 @@
#define PROG_MAX_NODE_SIZE (1024 * 4)
#endif
void parse_prog(struct Parser* parser) {
void parse_func(parser_t* parser);
void parse_prog(parser_t* parser) {
/**
* Program := (Declaration | Definition)*
* same as
* Program := Declaration* Definition*
*/
int child_size = 0;
tok_buf_t *tokbuf = &(parser->tokbuf);
parser->root = new_ast_node();
struct ASTNode* node;
parser->root->root.children = xmalloc(sizeof(struct ASTNode*) * PROG_MAX_NODE_SIZE);
ast_node_t* node;
parser->root->type = NT_ROOT;
vector_init(parser->root->root.children);
while (1) {
flushpeektok(parser);
if (peektoktype(parser) == TOKEN_EOF) {
flush_peek_tok(tokbuf);
if (peek_tok_type(tokbuf) == TOKEN_EOF) {
break;
}
node = parse_decl(parser);
parser->root->root.children[child_size++] = node;
if (node == NULL) {
parse_func(parser);
} else {
vector_push(parser->root->root.children, node);
}
}
parser->root->type = NT_ROOT;
parser->root->root.child_size = child_size;
return;
}

View File

@ -1,27 +1,28 @@
#include "../parser.h"
#include "ast.h"
struct ASTNode* parse_stmt(struct Parser* parser) {
flushpeektok(parser);
enum TokenType ttype = peektoktype(parser);
struct ASTNode* node = new_ast_node();
ast_node_t* parse_stmt(parser_t* parser) {
tok_buf_t* tokbuf = &parser->tokbuf;
flush_peek_tok(tokbuf);
tok_type_t ttype = peek_tok_type(tokbuf);
ast_node_t* node = new_ast_node();
switch (ttype) {
case TOKEN_IF: {
/**
* if (exp) stmt
* if (exp) stmt else stmt
*/
poptok(parser);
pop_tok(tokbuf);
expecttok(parser, TOKEN_L_PAREN);
expect_pop_tok(tokbuf, TOKEN_L_PAREN);
node->if_stmt.cond = parse_expr(parser);
flushpeektok(parser);
expecttok(parser, TOKEN_R_PAREN);
flush_peek_tok(tokbuf);
expect_pop_tok(tokbuf, TOKEN_R_PAREN);
node->if_stmt.if_stmt = parse_stmt(parser);
ttype = peektoktype(parser);
ttype = peek_tok_type(tokbuf);
if (ttype == TOKEN_ELSE) {
poptok(parser);
pop_tok(tokbuf);
node->if_stmt.else_stmt = parse_stmt(parser);
} else {
node->if_stmt.else_stmt = NULL;
@ -33,11 +34,11 @@ struct ASTNode* parse_stmt(struct Parser* parser) {
/**
* switch (exp) stmt
*/
poptok(parser);
pop_tok(tokbuf);
expecttok(parser, TOKEN_L_PAREN);
expect_pop_tok(tokbuf, TOKEN_L_PAREN);
node->switch_stmt.cond = parse_expr(parser);
expecttok(parser, TOKEN_R_PAREN);
expect_pop_tok(tokbuf, TOKEN_R_PAREN);
node->switch_stmt.body = parse_stmt(parser);
node->type = NT_STMT_SWITCH;
@ -47,11 +48,11 @@ struct ASTNode* parse_stmt(struct Parser* parser) {
/**
* while (exp) stmt
*/
poptok(parser);
pop_tok(tokbuf);
expecttok(parser, TOKEN_L_PAREN);
expect_pop_tok(tokbuf, TOKEN_L_PAREN);
node->while_stmt.cond = parse_expr(parser);
expecttok(parser, TOKEN_R_PAREN);
expect_pop_tok(tokbuf, TOKEN_R_PAREN);
node->while_stmt.body = parse_stmt(parser);
node->type = NT_STMT_WHILE;
@ -61,16 +62,16 @@ struct ASTNode* parse_stmt(struct Parser* parser) {
/**
* do stmt while (exp)
*/
poptok(parser);
pop_tok(tokbuf);
node->do_while_stmt.body = parse_stmt(parser);
ttype = peektoktype(parser);
ttype = peek_tok_type(tokbuf);
if (ttype != TOKEN_WHILE) {
error("expected while after do");
}
poptok(parser);
expecttok(parser, TOKEN_L_PAREN);
pop_tok(tokbuf);
expect_pop_tok(tokbuf, TOKEN_L_PAREN);
node->do_while_stmt.cond = parse_expr(parser);
expecttok(parser, TOKEN_R_PAREN);
expect_pop_tok(tokbuf, TOKEN_R_PAREN);
node->type = NT_STMT_DOWHILE;
break;
}
@ -79,36 +80,36 @@ struct ASTNode* parse_stmt(struct Parser* parser) {
* for (init; [cond]; [iter]) stmt
*/
// node->children.stmt.for_stmt.init
poptok(parser);
ttype = peektoktype(parser);
pop_tok(tokbuf);
ttype = peek_tok_type(tokbuf);
if (ttype != TOKEN_L_PAREN) {
error("expected ( after for");
}
poptok(parser);
pop_tok(tokbuf);
// init expr or init decl_var
// TODO need add this feature
node->for_stmt.init = parse_expr(parser);
expecttok(parser, TOKEN_SEMICOLON);
expect_pop_tok(tokbuf, TOKEN_SEMICOLON);
// cond expr or null
ttype = peektoktype(parser);
ttype = peek_tok_type(tokbuf);
if (ttype != TOKEN_SEMICOLON) {
node->for_stmt.cond = parse_expr(parser);
expecttok(parser, TOKEN_SEMICOLON);
expect_pop_tok(tokbuf, TOKEN_SEMICOLON);
} else {
node->for_stmt.cond = NULL;
poptok(parser);
pop_tok(tokbuf);
}
// iter expr or null
ttype = peektoktype(parser);
ttype = peek_tok_type(tokbuf);
if (ttype != TOKEN_R_PAREN) {
node->for_stmt.iter = parse_expr(parser);
expecttok(parser, TOKEN_R_PAREN);
expect_pop_tok(tokbuf, TOKEN_R_PAREN);
} else {
node->for_stmt.iter = NULL;
poptok(parser);
pop_tok(tokbuf);
}
node->for_stmt.body = parse_stmt(parser);
@ -120,8 +121,8 @@ struct ASTNode* parse_stmt(struct Parser* parser) {
* break ;
*/
// TODO check 导致外围 for、while 或 do-while 循环或 switch 语句终止。
poptok(parser);
expecttok(parser, TOKEN_SEMICOLON);
pop_tok(tokbuf);
expect_pop_tok(tokbuf, TOKEN_SEMICOLON);
node->type = NT_STMT_BREAK;
break;
@ -131,8 +132,8 @@ struct ASTNode* parse_stmt(struct Parser* parser) {
* continue ;
*/
// TODO check 导致跳过整个 for、 while 或 do-while 循环体的剩余部分。
poptok(parser);
expecttok(parser, TOKEN_SEMICOLON);
pop_tok(tokbuf);
expect_pop_tok(tokbuf, TOKEN_SEMICOLON);
node->type = NT_STMT_CONTINUE;
break;
@ -142,16 +143,16 @@ struct ASTNode* parse_stmt(struct Parser* parser) {
* return [exp] ;
*/
// TODO 终止当前函数并返回指定值给调用方函数。
poptok(parser);
ttype = peektoktype(parser);
pop_tok(tokbuf);
ttype = peek_tok_type(tokbuf);
if (ttype != TOKEN_SEMICOLON) {
node->return_stmt.expr_stmt = parse_expr(parser);
flushpeektok(parser);
expecttok(parser, TOKEN_SEMICOLON);
flush_peek_tok(tokbuf);
expect_pop_tok(tokbuf, TOKEN_SEMICOLON);
} else {
node->return_stmt.expr_stmt = NULL;
pop_tok(tokbuf);
}
poptok(parser);
node->type = NT_STMT_RETURN;
break;
}
@ -161,15 +162,15 @@ struct ASTNode* parse_stmt(struct Parser* parser) {
*/
// TODO check label 将控制无条件转移到所欲位置。
//在无法用约定的构造将控制转移到所欲位置时使用。
poptok(parser);
pop_tok(tokbuf);
// find symbol table
ttype = peektoktype(parser);
ttype = peek_tok_type(tokbuf);
if (ttype != TOKEN_IDENT) {
error("expect identifier after goto");
}
expecttok(parser, TOKEN_SEMICOLON);
expect_pop_tok(tokbuf, TOKEN_SEMICOLON);
// TODO filling label
node->goto_stmt.label = parse_ident(parser);
node->goto_stmt.label = expect_pop_ident(tokbuf);
node->type = NT_STMT_GOTO;
break;
}
@ -181,7 +182,7 @@ struct ASTNode* parse_stmt(struct Parser* parser) {
* if () ;
* for () ;
*/
poptok(parser);
pop_tok(tokbuf);
node->type = NT_STMT_EMPTY;
break;
}
@ -193,30 +194,30 @@ struct ASTNode* parse_stmt(struct Parser* parser) {
node->type = NT_STMT_BLOCK;
break;
}
case TOKEN_IDENT: {
case TOKEN_IDENT: {
// TODO label goto
if (peektoktype(parser) != TOKEN_COLON) {
if (peek_tok_type(tokbuf) != TOKEN_COLON) {
goto EXP;
}
node->label_stmt.label = parse_ident(parser);
expecttok(parser, TOKEN_COLON);
node->label_stmt.label = expect_pop_ident(tokbuf);
expect_pop_tok(tokbuf, TOKEN_COLON);
node->type = NT_STMT_LABEL;
break;
}
case TOKEN_CASE: {
// TODO label switch
poptok(parser);
pop_tok(tokbuf);
error("unimplemented switch label");
node->label_stmt.label = parse_expr(parser);
// TODO 该表达式为const int
expecttok(parser, TOKEN_COLON);
expect_pop_tok(tokbuf, TOKEN_COLON);
node->type = NT_STMT_CASE;
break;
}
case TOKEN_DEFAULT: {
// TODO label switch default
poptok(parser);
expecttok(parser, TOKEN_COLON);
pop_tok(tokbuf);
expect_pop_tok(tokbuf, TOKEN_COLON);
node->type = NT_STMT_DEFAULT;
break;
}
@ -226,15 +227,16 @@ struct ASTNode* parse_stmt(struct Parser* parser) {
*/
EXP:
node->expr_stmt.expr_stmt = parse_expr(parser);
flushpeektok(parser);
ttype = peektoktype(parser);
flush_peek_tok(tokbuf);
ttype = peek_tok_type(tokbuf);
if (ttype != TOKEN_SEMICOLON) {
error("exp must end with \";\"");
}
poptok(parser);
pop_tok(tokbuf);
node->type = NT_STMT_EXPR;
break;
}
}
return node;
}

View File

@ -2,162 +2,30 @@
#include "../type.h"
#include "ast.h"
// /* 状态跳转表定义 */
// typedef void (*StateHandler)(struct Parser*, struct ASTNode**);
// enum TypeParseState {
// TPS_BASE_TYPE, // 解析基础类型 (int/char等)
// TPS_QUALIFIER, // 解析限定符 (const/volatile)
// TPS_POINTER, // 解析指针 (*)
// TPS_ARRAY, // 解析数组维度 ([n])
// TPS_FUNC_PARAMS, // 解析函数参数列表
// TPS_END,
// };
// ;
// /* 状态处理函数前置声明 */
// static void handle_base_type(struct Parser*, struct ASTNode**);
// static void handle_qualifier(struct Parser*, struct ASTNode**);
// static void handle_pointer(struct Parser*, struct ASTNode**);
// static void handle_array(struct Parser*, struct ASTNode**);
// static void handle_func_params(struct Parser*, struct ASTNode**);
// static void handle_error(struct Parser*, struct ASTNode**);
// /* 状态跳转表(核心优化部分) */
// static const struct StateTransition {
// enum TokenType tok; // 触发token
// StateHandler handler; // 处理函数
// enum TypeParseState next_state; // 下一个状态
// } state_table[][8] = {
// [TPS_QUALIFIER] = {
// {TOKEN_CONST, handle_qualifier, TPS_QUALIFIER},
// {TOKEN_VOLATILE, handle_qualifier, TPS_QUALIFIER},
// {TOKEN_VOID, handle_base_type, TPS_POINTER},
// {TOKEN_CHAR, handle_base_type, TPS_POINTER},
// {TOKEN_INT, handle_base_type, TPS_POINTER},
// {TOKEN_EOF, handle_error, TPS_QUALIFIER},
// /* 其他token默认处理 */
// {0, NULL, TPS_BASE_TYPE}
// },
// [TPS_BASE_TYPE] = {
// {TOKEN_MUL, handle_pointer, TPS_POINTER},
// {TOKEN_L_BRACKET, handle_array, TPS_ARRAY},
// {TOKEN_L_PAREN, handle_func_params,TPS_FUNC_PARAMS},
// {TOKEN_EOF, NULL, TPS_END},
// {0, NULL, TPS_POINTER}
// },
// [TPS_POINTER] = {
// {TOKEN_MUL, handle_pointer, TPS_POINTER},
// {TOKEN_L_BRACKET, handle_array, TPS_ARRAY},
// {TOKEN_L_PAREN, handle_func_params,TPS_FUNC_PARAMS},
// {0, NULL, TPS_END}
// },
// [TPS_ARRAY] = {
// {TOKEN_L_BRACKET, handle_array, TPS_ARRAY},
// {TOKEN_L_PAREN, handle_func_params,TPS_FUNC_PARAMS},
// {0, NULL, TPS_END}
// },
// [TPS_FUNC_PARAMS] = {
// {0, NULL, TPS_END}
// }
// };
// /* 新的类型解析函数 */
// struct ASTNode* parse_type(struct Parser* p) {
// struct ASTNode* type_root = NULL;
// struct ASTNode** current = &type_root;
// enum TypeParseState state = TPS_QUALIFIER;
// while (state != TPS_END) {
// enum TokenType t = peektoktype(p);
// const struct StateTransition* trans = state_table[state];
// // 查找匹配的转换规则
// while (trans->tok != 0 && trans->tok != t) {
// trans++;
// }
// if (trans->handler) {
// trans->handler(p, current);
// } else if (trans->tok == 0) { // 默认规则
// state = trans->next_state;
// continue;
// } else {
// error("syntax error type parse error");
// }
// state = trans->next_state;
// }
// return type_root;
// }
// /* 具体状态处理函数实现 */
// static void handle_qualifier(struct Parser* p, struct ASTNode** current) {
// struct ASTNode* node = new_ast_node();
// node->node_type = NT_TYPE_QUAL;
// node->data.data_type = poptok(p).type;
// if (*current) {
// (*current)->child.decl.type = node;
// } else {
// *current = node;
// }
// }
// static void handle_base_type(struct Parser* p, struct ASTNode** current) {
// struct ASTNode* node = new_ast_node();
// node->node_type = NT_TYPE_BASE;
// node->data.data_type = poptok(p).type;
// // 链接到当前节点链的末端
// while (*current && (*current)->child.decl.type) {
// current = &(*current)->child.decl.type;
// }
// if (*current) {
// (*current)->child.decl.type = node;
// } else {
// *current = node;
// }
// }
// static void handle_pointer(struct Parser* p, struct ASTNode** current) {
// poptok(p); // 吃掉*
// struct ASTNode* node = new_ast_node();
// node->node_type = NT_TYPE_PTR;
// // 插入到当前节点之前
// node->child.decl.type = *current;
// *current = node;
// }
// /* 其他处理函数类似实现... */
struct ASTNode* parser_ident_without_pop(struct Parser* parser) {
flushpeektok(parser);
struct Token* tok = peektok(parser);
ast_node_t* new_ast_ident_node(tok_t* tok) {
if (tok->type != TOKEN_IDENT) {
error("syntax error: want identifier but got %d", tok->type);
}
struct ASTNode* node = new_ast_node();
ast_node_t* node = new_ast_node();
node->type = NT_TERM_IDENT;
node->syms.tok = *tok;
node->syms.decl_node = NULL;
return node;
}
struct ASTNode* parse_ident(struct Parser* parser) {
struct ASTNode* node = parser_ident_without_pop(parser);
poptok(parser);
ast_node_t* expect_pop_ident(tok_buf_t* tokbuf) {
flush_peek_tok(tokbuf);
tok_t* tok = peek_tok(tokbuf);
ast_node_t* node = new_ast_ident_node(tok);
pop_tok(tokbuf);
return node;
}
struct ASTNode* parse_type(struct Parser* parser) {
flushpeektok(parser);
enum TokenType ttype = peektoktype(parser);
enum DataType dtype;
ast_node_t* parse_type(parser_t* parser) {
tok_buf_t* tokbuf = &parser->tokbuf;
flush_peek_tok(tokbuf);
tok_type_t ttype = peek_tok_type(tokbuf);
data_type_t dtype;
switch(ttype) {
case TOKEN_VOID: dtype = TYPE_VOID; break;
case TOKEN_CHAR: dtype = TYPE_CHAR; break;
@ -170,13 +38,14 @@ struct ASTNode* parse_type(struct Parser* parser) {
error("无效的类型说明符");
}
struct ASTNode* node = new_ast_node();
ast_node_t* node = new_ast_node();
node->type = NT_TERM_TYPE;
// node->data.data_type = dtype;
poptok(parser);
// TODO added by disable warning, will add typing system
dtype += 1;
pop_tok(tokbuf);
if (peektoktype(parser) == TOKEN_MUL) {
poptok(parser);
if (peek_tok_type(tokbuf) == TOKEN_MUL) {
pop_tok(tokbuf);
}
return node;
}

View File

@ -1,136 +1,136 @@
#include "../parser.h"
#include "../type.h"
// #include "../parser.h"
// #include "../type.h"
enum TypeParseState {
TPS_BASE_TYPE, // 解析基础类型 (int/char等)
TPS_QUALIFIER, // 解析限定符 (const/volatile)
TPS_POINTER, // 解析指针 (*)
TPS_ARRAY, // 解析数组维度 ([n])
TPS_FUNC_PARAMS // 解析函数参数列表
};
// enum TypeParseState {
// TPS_BASE_TYPE, // 解析基础类型 (int/char等)
// TPS_QUALIFIER, // 解析限定符 (const/volatile)
// TPS_POINTER, // 解析指针 (*)
// TPS_ARRAY, // 解析数组维度 ([n])
// TPS_FUNC_PARAMS // 解析函数参数列表
// };
struct ASTNode* parse_type(struct Parser* p) {
struct ASTNode* type_root = new_ast_node();
struct ASTNode* current = type_root;
current->type = NT_TYPE_BASE;
// ast_node_t* parse_type(parser_t* p) {
// ast_node_t* type_root = new_ast_node();
// ast_node_t* current = type_root;
// current->type = NT_TYPE_BASE;
enum TypeParseState state = TPS_QUALIFIER;
int pointer_level = 0;
// enum TypeParseState state = TPS_QUALIFIER;
// int pointer_level = 0;
while (1) {
enum TokenType t = peektoktype(p);
// while (1) {
// tok_type_t t = peektoktype(p);
switch (state) {
// 基础类型解析 (int, char等)
case TPS_BASE_TYPE:
if (is_base_type(t)) {
// current->data.data_type = token_to_datatype(t);
poptok(p);
state = TPS_POINTER;
} else {
error("Expected type specifier");
}
break;
// switch (state) {
// // 基础类型解析 (int, char等)
// case TPS_BASE_TYPE:
// if (is_base_type(t)) {
// // current->data.data_type = token_to_datatype(t);
// pop_tok(p);
// state = TPS_POINTER;
// } else {
// error("Expected type specifier");
// }
// break;
// 类型限定符 (const/volatile)
case TPS_QUALIFIER:
if (t == TOKEN_CONST || t == TOKEN_VOLATILE) {
struct ASTNode* qual_node = new_ast_node();
qual_node->type = NT_TYPE_QUAL;
qual_node->data.data_type = t; // 复用data_type字段存储限定符
current->child.decl.type = qual_node;
current = qual_node;
poptok(p);
} else {
state = TPS_BASE_TYPE;
}
break;
// // 类型限定符 (const/volatile)
// case TPS_QUALIFIER:
// if (t == TOKEN_CONST || t == TOKEN_VOLATILE) {
// ast_node_t* qual_node = new_ast_node();
// qual_node->type = NT_TYPE_QUAL;
// qual_node->data.data_type = t; // 复用data_type字段存储限定符
// current->child.decl.type = qual_node;
// current = qual_node;
// pop_tok(p);
// } else {
// state = TPS_BASE_TYPE;
// }
// break;
// 指针解析 (*)
case TPS_POINTER:
if (t == TOKEN_MUL) {
struct ASTNode* ptr_node = new_ast_node();
ptr_node->type = NT_TYPE_PTR;
current->child.decl.type = ptr_node;
current = ptr_node;
pointer_level++;
poptok(p);
} else {
state = TPS_ARRAY;
}
break;
// // 指针解析 (*)
// case TPS_POINTER:
// if (t == TOKEN_MUL) {
// ast_node_t* ptr_node = new_ast_node();
// ptr_node->type = NT_TYPE_PTR;
// current->child.decl.type = ptr_node;
// current = ptr_node;
// pointer_level++;
// pop_tok(p);
// } else {
// state = TPS_ARRAY;
// }
// break;
// 数组维度 ([n])
case TPS_ARRAY:
if (t == TOKEN_L_BRACKET) {
poptok(p); // 吃掉[
struct ASTNode* arr_node = new_ast_node();
arr_node->type = NT_TYPE_ARRAY;
// // 数组维度 ([n])
// case TPS_ARRAY:
// if (t == TOKEN_L_BRACKET) {
// pop_tok(p); // 吃掉[
// ast_node_t* arr_node = new_ast_node();
// arr_node->type = NT_TYPE_ARRAY;
// 解析数组大小(仅语法检查)
if (peektoktype(p) != TOKEN_R_BRACKET) {
parse_expr(p); // 不计算实际值
}
// // 解析数组大小(仅语法检查)
// if (peektoktype(p) != TOKEN_R_BRACKET) {
// parse_expr(p); // 不计算实际值
// }
expecttok(p, TOKEN_R_BRACKET);
current->child.decl.type = arr_node;
current = arr_node;
} else {
state = TPS_FUNC_PARAMS;
}
break;
// expecttok(p, TOKEN_R_BRACKET);
// current->child.decl.type = arr_node;
// current = arr_node;
// } else {
// state = TPS_FUNC_PARAMS;
// }
// break;
// 函数参数列表
case TPS_FUNC_PARAMS:
if (t == TOKEN_L_PAREN) {
struct ASTNode* func_node = new_ast_node();
func_node->type = NT_TYPE_FUNC;
current->child.decl.type = func_node;
// // 函数参数列表
// case TPS_FUNC_PARAMS:
// if (t == TOKEN_L_PAREN) {
// ast_node_t* func_node = new_ast_node();
// func_node->type = NT_TYPE_FUNC;
// current->child.decl.type = func_node;
// 解析参数列表(仅结构,不验证类型)
parse_param_list(p, func_node);
current = func_node;
} else {
return type_root; // 类型解析结束
}
break;
}
}
}
// 判断是否是基础类型
static int is_base_type(enum TokenType t) {
return t >= TOKEN_VOID && t <= TOKEN_DOUBLE;
}
// // 转换token到数据类型简化版
// static enum DataType token_to_datatype(enum TokenType t) {
// static enum DataType map[] = {
// [TOKEN_VOID] = DT_VOID,
// [TOKEN_CHAR] = DT_CHAR,
// [TOKEN_INT] = DT_INT,
// // ...其他类型映射
// };
// return map[t];
// // 解析参数列表(仅结构,不验证类型)
// parse_param_list(p, func_node);
// current = func_node;
// } else {
// return type_root; // 类型解析结束
// }
// break;
// }
// }
// }
// 解析参数列表(轻量级)
static void parse_param_list(struct Parser* p, struct ASTNode* func) {
expecttok(p, TOKEN_L_PAREN);
while (peektoktype(p) != TOKEN_R_PAREN) {
struct ASTNode* param = parse_type(p); // 递归解析类型
// 允许可选参数名(仅语法检查)
if (peektoktype(p) == TOKEN_IDENT) {
poptok(p); // 吃掉参数名
}
if (peektoktype(p) == TOKEN_COMMA) {
poptok(p);
}
}
expecttok(p, TOKEN_R_PAREN);
}
// // 判断是否是基础类型
// static int is_base_type(tok_type_t t) {
// return t >= TOKEN_VOID && t <= TOKEN_DOUBLE;
// }
// // // 转换token到数据类型简化版
// // static enum DataType token_to_datatype(tok_type_t t) {
// // static enum DataType map[] = {
// // [TOKEN_VOID] = DT_VOID,
// // [TOKEN_CHAR] = DT_CHAR,
// // [TOKEN_INT] = DT_INT,
// // // ...其他类型映射
// // };
// // return map[t];
// // }
// // 解析参数列表(轻量级)
// static void parse_param_list(parser_t* p, ast_node_t* func) {
// expecttok(p, TOKEN_L_PAREN);
// while (peektoktype(p) != TOKEN_R_PAREN) {
// ast_node_t* param = parse_type(p); // 递归解析类型
// // 允许可选参数名(仅语法检查)
// if (peektoktype(p) == TOKEN_IDENT) {
// pop_tok(p); // 吃掉参数名
// }
// if (peektoktype(p) == TOKEN_COMMA) {
// pop_tok(p);
// }
// }
// expecttok(p, TOKEN_R_PAREN);
// }

View File

@ -1,67 +1,17 @@
#include "parser.h"
#include "type.h"
#include "ast/ast.h"
int poptok(struct Parser* parser) {
if (parser->size == 0) {
return -1;
}
int idx = parser->cur_idx;
parser->cur_idx = (idx + 1) % PARSER_MAX_TOKEN_QUEUE;
parser->size--;
return 0;
}
void flushpeektok(struct Parser* parser) {
parser->peek_idx = parser->cur_idx;
}
struct Token* peektok(struct Parser* parser) {
int idx = parser->peek_idx;
idx = (idx + 1) % PARSER_MAX_TOKEN_QUEUE;
if (parser->size >= PARSER_MAX_TOKEN_QUEUE) {
warn("peek maybe too deep");
}
if (parser->peek_idx == parser->end_idx) {
if (parser->size == PARSER_MAX_TOKEN_QUEUE) {
// FIXME
error("buffer overflow");
}
get_valid_token(parser->lexer, &(parser->TokenBuffer[idx]));
parser->size++;
parser->end_idx = idx;
}
parser->peek_idx = idx;
return &(parser->TokenBuffer[idx]);
}
enum TokenType peektoktype(struct Parser* parser) {
return peektok(parser)->type;
}
void expecttok(struct Parser* parser, enum TokenType type) {
struct Token* tok = peektok(parser);
if (tok->type != type) {
error("expected tok: %s, got %s", get_token_name(type), get_token_name(tok->type));
} else {
poptok(parser);
}
}
void init_parser(struct Parser* parser, struct Lexer* lexer, struct SymbolTable* symtab) {
void init_parser(parser_t* parser, lexer_t* lexer, symtab_t* symtab) {
parser->cur_node = NULL;
parser->root = NULL;
parser->cur_idx = 0;
parser->peek_idx = 0;
parser->end_idx = 0;
parser->size = 0;
parser->lexer = lexer;
parser->symtab = symtab;
// TODO
init_tokbuf(&parser->tokbuf, lexer, (get_tokbuf_func)get_valid_token);
parser->tokbuf.cap = sizeof(parser->TokenBuffer) / sizeof(parser->TokenBuffer[0]);
parser->tokbuf.buf = parser->TokenBuffer;
}
void run_parser(struct Parser* parser) {
void run_parser(parser_t* parser) {
parse_prog(parser);
}

View File

@ -2,32 +2,24 @@
#define __PARSER_H__
#include "../frontend.h"
#include "../lexer/lexer.h"
// #include "symbol_table/symtab.h"
// #include "ast/ast.h"
#include "../lexer/lexer.h"
typedef struct lexer lexer_t;
typedef struct symtab symtab_t;
#define PARSER_MAX_TOKEN_QUEUE 16
struct Parser {
typedef struct parser {
struct ASTNode* root;
struct ASTNode* cur_node;
struct Lexer* lexer;
struct SymbolTable* symtab;
int cur_idx;
int peek_idx;
int end_idx;
int size;
struct Token TokenBuffer[PARSER_MAX_TOKEN_QUEUE];
lexer_t* lexer;
symtab_t* symtab;
tok_buf_t tokbuf;
tok_t TokenBuffer[PARSER_MAX_TOKEN_QUEUE];
int err_level;
};
} parser_t;
void init_parser(struct Parser* parser, struct Lexer* lexer, struct SymbolTable* symtab);
void run_parser(struct Parser* parser);
void flushpeektok(struct Parser* parser);
int poptok(struct Parser* parser);
struct Token* peektok(struct Parser* parser);
enum TokenType peektoktype(struct Parser* parser);
void expecttok(struct Parser* parser, enum TokenType type);
void init_parser(parser_t* parser, lexer_t* lexer, symtab_t* symtab);
void run_parser(parser_t* parser);
#endif

View File

@ -3,25 +3,25 @@
#include "scope.h"
#include "symtab.h"
typedef struct SymbolTable SymbolTable;
typedef symtab_t symtab_t;
typedef struct Scope Scope;
void init_symtab(SymbolTable* symtab) {
void init_symtab(symtab_t* symtab) {
symtab->global_scope = scope_create(NULL);
symtab->cur_scope = symtab->global_scope;
}
void del_symtab(SymbolTable* symtab) {
void del_symtab(symtab_t* symtab) {
scope_destroy(symtab->global_scope);
}
void symtab_enter_scope(SymbolTable* symtab) {
void symtab_enter_scope(symtab_t* symtab) {
struct Scope* scope = scope_create(symtab->cur_scope);
scope->base_offset = symtab->cur_scope->base_offset + symtab->cur_scope->cur_offset;
symtab->cur_scope = scope;
}
void symtab_leave_scope(SymbolTable* symtab) {
void symtab_leave_scope(symtab_t* symtab) {
Scope * scope = symtab->cur_scope;
if (scope == NULL) {
error("cannot leave NULL scope or global scope");
@ -30,16 +30,20 @@ void symtab_leave_scope(SymbolTable* symtab) {
scope_destroy(scope);
}
void symtab_add_symbol(SymbolTable* symtab, const char* name, void* ast_node) {
void* symtab_add_symbol(symtab_t* symtab, const char* name, void* ast_node, int can_duplicate) {
struct Scope* scope = symtab->cur_scope;
if (scope_lookup_current(scope, name) != NULL) {
// TODO WARNING
// return NULL;
void* node = scope_lookup_current(scope, name);
if (node != NULL) {
if (!can_duplicate) {
error("duplicate symbol %s", name);
}
return node;
}
scope_insert(scope, name, ast_node);
return node;
}
void* symtab_lookup_symbol(SymbolTable* symtab, const char* name) {
void* symtab_lookup_symbol(symtab_t* symtab, const char* name) {
return scope_lookup(symtab->cur_scope, name);
}

View File

@ -2,17 +2,17 @@
#ifndef __SYMTAB_H__
#define __SYMTAB_H__
struct SymbolTable {
typedef struct symtab {
struct Scope* cur_scope;
struct Scope* global_scope;
};
} symtab_t;
void init_symtab(struct SymbolTable* symtab);
void del_symtab(struct SymbolTable* symtab);
void init_symtab(symtab_t* symtab);
void del_symtab(symtab_t* symtab);
void symtab_enter_scope(struct SymbolTable* symtab);
void symtab_leave_scope(struct SymbolTable* symtab);
void symtab_add_symbol(struct SymbolTable* symtab, const char* name, void* ast_node);
void* symtab_lookup_symbol(struct SymbolTable* symtab, const char* name);
void symtab_enter_scope(symtab_t* symtab);
void symtab_leave_scope(symtab_t* symtab);
void* symtab_add_symbol(symtab_t* symtab, const char* name, void* ast_node, int can_duplicate);
void* symtab_lookup_symbol(symtab_t* symtab, const char* name);
#endif

View File

@ -23,7 +23,7 @@ int main(int argc, char** argv) {
struct SymbolTable symtab;
init_symtab(&symtab);
struct Parser parser;
struct parser parser;
init_parser(&parser, &lexer, &symtab);
parse_prog(&parser);

View File

@ -3,7 +3,7 @@
#include "../lexer/token.h"
enum DataType {
typedef enum {
TYPE_VOID,
TYPE_CHAR,
TYPE_SHORT,
@ -30,6 +30,6 @@ enum DataType {
TYPE_ATOMIC,
TYPE_TYPEDEF,
};
} data_type_t;
#endif

View File

View File

@ -5,48 +5,61 @@ typedef struct ASTNode ASTNode;
// 上下文结构,记录生成过程中的状态
typedef struct {
ir_func_t* current_func; // 当前处理的函数
ir_bblock_t* current_block; // 当前基本块
uint32_t vreg_counter; // 虚拟寄存器计数器
ir_func_t* cur_func; // 当前处理的函数
ir_bblock_t* cur_block; // 当前基本块
} IRGenContext;
IRGenContext ctx;
ir_prog_t prog;
ir_type_t type_i32 = {
.tag = IR_TYPE_INT32,
};
static inline void init_ir_node_t(ir_node_t* node) {
node->name = NULL;
node->type = NULL;
vector_init(node->used_by);
}
static inline ir_node_t* new_ir_node_t() {
static inline ir_node_t* new_irnode() {
ir_node_t* node = xmalloc(sizeof(ir_node_t));
init_ir_node_t(node);
}
static inline ir_bblock_t* new_irbblock(const char* name) {
ir_bblock_t* block = xmalloc(sizeof(ir_bblock_t));
block->label = name;
vector_init(block->instrs);
return block;
}
ir_node_t* emit_instr(ir_bblock_t* block) {
if (block == NULL) block = ctx.current_block;
ir_node_t *node = new_ir_node_t();
if (block == NULL) block = ctx.cur_block;
ir_node_t *node = new_irnode();
vector_push(block->instrs, node);
return vector_at(block->instrs, block->instrs.size - 1);
}
void emit_br(ir_node_t cond, const char* true_lable, const char* false_lable) {
ir_node_t br = {
.tag = IR_NODE_RET,
.data = {
ir_node_t* emit_br(ir_node_t* cond, ir_bblock_t* trueb, ir_bblock_t* falseb) {
ir_node_t* br = emit_instr(NULL);
*br = (ir_node_t) {
.tag = IR_NODE_BRANCH,
.data.branch = {
.cond = cond,
.true_bblock = trueb,
.false_bblock = falseb,
}
};
// emit_instr(br, NULL);
return br;
}
ir_node_t* gen_ir_expr(ASTNode* node) {
switch (node->type) {
case NT_TERM_VAL: {
ir_node_t* ir = new_ir_node_t();
ir_node_t* ir = new_irnode();
*ir = (ir_node_t) {
.tag = IR_NODE_CONST_INT,
.data.const_int = {
.val = node->syms.tok.constant.i,
.val = node->syms.tok.val.i,
},
};
return ir;
@ -56,15 +69,18 @@ ir_node_t* gen_ir_expr(ASTNode* node) {
return decl;
}
case NT_TERM_CALL: {
// TODO
ir_node_t* ir = new_ir_node_t();
ir_node_t* ir = emit_instr(NULL);
*ir = (ir_node_t) {
.tag = IR_NODE_CALL,
.data.call = {
.callee = NULL,
.callee = node->call.func_decl->decl_func.def->func.data,
},
};
vector_init(ir->data.call.args);
for (int i = 0; i < node->call.params->params.params.size; i++) {
vector_push(ir->data.call.args, \
gen_ir_expr(node->call.params->params.params.data[i]));
}
return ir;
}
default:
@ -191,42 +207,75 @@ NEXT:
}
return ret;
}
static ir_func_t* new_irfunc(const char* name) {
ir_func_t *func = xmalloc(sizeof(ir_func_t));
vector_init(func->bblocks);
vector_init(func->params);
*func = (ir_func_t) {
.name = name,
// TODO typing system
.type = &type_i32,
};
return func;
}
static void gen_ir_func(ASTNode* node, ir_func_t* func) {
assert(node->type == NT_FUNC);
ir_bblock_t *entry = new_irbblock("entry");
vector_push(func->bblocks, entry);
vector_push(prog.funcs, func);
IRGenContext prev_ctx = ctx;
ctx.cur_func = func;
ctx.cur_block = entry;
ast_node_t* params = node->func.decl->decl_func.params;
for (int i = 0; i < params->params.params.size; i ++) {
ir_node_t* decl = emit_instr(entry);
ast_node_t* param = params->params.params.data[i];
vector_push(func->params, decl);
*decl = (ir_node_t) {
.tag = IR_NODE_ALLOC,
.name = param->decl_val.name->syms.tok.val.str,
.type = &type_i32,
};
param->decl_val.data = decl;
}
gen_ir_from_ast(node->func.body);
ctx = prev_ctx;
}
void gen_ir_from_ast(struct ASTNode* node) {
switch (node->type) {
case NT_ROOT: {
for (int i = 0; i < node->root.child_size; i ++) {
gen_ir_from_ast(node->root.children[i]);
for (int i = 0; i < node->root.children.size; i ++) {
gen_ir_from_ast(node->root.children.data[i]);
}
} break;
break;
}
case NT_DECL_FUNC: {
ir_func_t* func = new_irfunc(node->decl_func.name->syms.tok.val.str);
if (node->decl_func.def == NULL) {
ast_node_t* def = new_ast_node();
def->func.body = NULL;
def->func.decl = node;
node->decl_func.def = def;
vector_push(prog.extern_funcs, func);
}
node->decl_func.def->func.data = func;
break;
}
case NT_FUNC: {
ir_func_t *func = xmalloc(sizeof(ir_func_t));
*func = (ir_func_t) {
.name = node->func.name->syms.tok.constant.str,
};
vector_init(func->bblocks);
ir_bblock_t *entry = xmalloc(sizeof(ir_bblock_t));
*entry = (ir_bblock_t) {
.label = "entry",
};
vector_init(entry->instrs);
vector_push(func->bblocks, entry);
IRGenContext prev_ctx = ctx;
ctx = (IRGenContext) {
.current_func = func,
.current_block = vector_at(func->bblocks, 0),
.vreg_counter = 0,
};
gen_ir_from_ast(node->func.body);
ctx = prev_ctx;
vector_push(prog.funcs, func);
} break;
gen_ir_func(node, node->func.data);
break;
}
case NT_STMT_RETURN: {
ir_node_t* ret = gen_ir_expr(node->return_stmt.expr_stmt);
ir_node_t* ret = NULL;
if (node->return_stmt.expr_stmt != NULL) {
ret = gen_ir_expr(node->return_stmt.expr_stmt);
}
ir_node_t* ir = emit_instr(NULL);
*ir = (ir_node_t) {
.tag = IR_NODE_RET,
@ -236,22 +285,54 @@ void gen_ir_from_ast(struct ASTNode* node) {
}
}
};
vector_push(ctx.cur_func->bblocks, new_irbblock(NULL));
break;
}
case NT_STMT_BLOCK: {
gen_ir_from_ast(node->block_stmt.block);
break;
}
case NT_BLOCK: {
for (int i = 0; i < node->block.child_size; i ++) {
gen_ir_from_ast(node->block.children[i]);
for (int i = 0; i < node->block.children.size; i ++) {
gen_ir_from_ast(node->block.children.data[i]);
}
break;
}
case NT_STMT_IF: {
ir_node_t *cond = gen_ir_expr(node->if_stmt.cond);
ir_bblock_t* trueb = new_irbblock("true_block");
ir_bblock_t* falseb = new_irbblock("false_block");
emit_br(cond, trueb, falseb);
// xmalloc();
// ir_bblock_t then_block = {
// };
node->if_stmt.if_stmt;
node->if_stmt.else_stmt;
vector_push(ctx.cur_func->bblocks, trueb);
ctx.cur_block = trueb;
gen_ir_from_ast(node->if_stmt.if_stmt);
ir_node_t* jmp = emit_instr(NULL);
if (node->if_stmt.else_stmt != NULL) {
vector_push(ctx.cur_func->bblocks, falseb);
ctx.cur_block = falseb;
gen_ir_from_ast(node->if_stmt.else_stmt);
ir_node_t* jmp = emit_instr(NULL);
ctx.cur_block = new_irbblock("jmp_block");
vector_push(ctx.cur_func->bblocks, ctx.cur_block);
*jmp = (ir_node_t) {
.tag = IR_NODE_JUMP,
.data.jump = {
.target_bblock = ctx.cur_block,
},
};
} else {
ctx.cur_block = falseb;
}
*jmp = (ir_node_t) {
.tag = IR_NODE_JUMP,
.data.jump = {
.target_bblock = ctx.cur_block,
},
};
break;
}
case NT_STMT_WHILE: {
@ -275,7 +356,7 @@ void gen_ir_from_ast(struct ASTNode* node) {
ir_node_t* ret_node = emit_instr(NULL);
*ret_node = (ir_node_t) {
.tag = IR_NODE_ALLOC,
.name = node->decl_val.name->syms.tok.constant.str,
.name = node->decl_val.name->syms.tok.val.str,
.type = &type_i32,
};
node->decl_val.data = ret_node;

View File

@ -54,6 +54,7 @@ typedef struct {
typedef struct {
vector_header(global, ir_node_t*);
vector_header(funcs, ir_func_t*);
vector_header(extern_funcs, ir_func_t*);
} ir_prog_t;
struct ir_node {
@ -131,15 +132,15 @@ struct ir_node {
} op;
struct {
ir_node_t* cond;
ir_bblock_t true_bblock;
ir_bblock_t false_bblock;
ir_bblock_t* true_bblock;
ir_bblock_t* false_bblock;
} branch;
struct {
ir_bblock_t target_bblock;
ir_bblock_t* target_bblock;
} jump;
struct {
ir_func_t callee;
vector_header(args, ir_node_t);
ir_func_t* callee;
vector_header(args, ir_node_t*);
} call;
struct {
ir_node_t* ret_val;

View File

@ -1,5 +1,7 @@
int main(void) {
int a;
a = 1 + 2 * 3;
return a;
int add(int a, int b) {
return a + b;
}
int main(void) {
return add(1, 2);
}