diff --git a/ccompiler/backend/riscv32/Makefile b/ccompiler/backend/riscv32/Makefile index 4d3107f..abaf862 100644 --- a/ccompiler/backend/riscv32/Makefile +++ b/ccompiler/backend/riscv32/Makefile @@ -3,11 +3,16 @@ all: ccompiler run: ccompiler ./ccompiler test.c flat.bin -ccompiler: frontend - gcc -g rv32ima_codegen.c ../../middleend/ir.c -L../../frontend -lfrontend -o ccompiler +ccompiler: frontend ir + gcc -g rv32ima_codegen.c -L../../frontend -lfrontend -L../../middleend -lir -o ccompiler frontend: make -C ../../frontend +ir: + make -C ../../middleend + clean: - rm -f ccompiler flat.bin \ No newline at end of file + rm -f ccompiler flat.bin + make -C ../../frontend clean + make -C ../../middleend clean \ No newline at end of file diff --git a/ccompiler/backend/riscv32/rv32gen.h b/ccompiler/backend/riscv32/rv32gen.h index 9dd1dee..fe58ae3 100644 --- a/ccompiler/backend/riscv32/rv32gen.h +++ b/ccompiler/backend/riscv32/rv32gen.h @@ -211,7 +211,7 @@ typedef enum { #define SLTZ(rd, rs) SLT(rd, rs, REG_X0) // 小于0则置位 // sgtz rd, rs (Set if > zero) -#define SG TZ(rd, rs) SLT(rd, REG_X0, rs) // 大于0则置位 +#define SGTZ(rd, rs) SLT(rd, REG_X0, rs) // 大于0则置位 // beqz rs, offset (Branch if = zero) #define BEQZ(rs, offset) BEQ(rs, REG_X0, offset) // 为0则转移 @@ -291,13 +291,16 @@ typedef enum { #define LI(rd, num) \ LUI(rd, num), \ ADDI(rd, rd, num) - #define MV(rd, rs) ADDI(rd, rs, 0) #define NOT(rd, rs) XORI(rd, rs, -1) -#define CALL(offset) \ - AUIPC(REG_X1, offset), \ - JALR(REG_X1, REG_X1, offset) +#define SEQZ(rd, rs) SLTIU(rd, rs, 1) +#define SGT(rd, rs1, rs2) SLT(rd, rs2, rs1) + +// TODO call have error when outof jalr +#define CALL(offset) \ + AUIPC(REG_X1, REG_X0), \ + JALR(REG_X1, REG_X1, offset) #define CALL_ABS(addr) \ AUIPC(REG_X0, addr), \ JALR(REG_X1, REG_X0, addr) diff --git a/ccompiler/backend/riscv32/rv32ima_codegen.c b/ccompiler/backend/riscv32/rv32ima_codegen.c index 3dda238..40d3a24 100644 --- a/ccompiler/backend/riscv32/rv32ima_codegen.c +++ b/ccompiler/backend/riscv32/rv32ima_codegen.c @@ -9,22 +9,6 @@ typedef union rv32code { uint8_t bytes[4]; } rv32code_t; -#define CRT_CODE_SIZE 16 - -// 使用示例 -rv32code_t gcodes[] = { - LI(REG_SP, 0x1000), - LI(REG_RA, 0x0), - - CALL_ABS(CRT_CODE_SIZE << 2), - // Exit - ECALL_EXIT2(), -}; - -void test_raw_gen(FILE* out) { - fwrite(gcodes, sizeof(rv32code_t), sizeof(gcodes)/sizeof(gcodes[0]), out); -} - #include "../../frontend/frontend.h" #include "../../middleend/ir.h" typedef struct { @@ -59,9 +43,9 @@ int write_inst(union rv32code ins, FILE* fp) { } #define GENCODE(code) vector_push(ctx.codes, (rv32code_t)(code)); len += 4 -#define GENCODES(code) do { \ +#define GENCODES(...) do { \ rv32code_t codes[] = { \ - code \ + __VA_ARGS__ \ }; \ for (int i = 0; i < sizeof(codes) / sizeof(codes[0]); i ++) { \ GENCODE(codes[i]); \ @@ -105,13 +89,18 @@ static int func_idx(ir_func_t* tofunc) { } static int system_func(const char* name) { - static const char defined_func[][16] = { - "ecall_pnt_int", + static struct { + const char* name; + int ecall_num; + } defined_func[] = { + {"ecall_pnt_int", 1}, + {"ecall_pnt_char", 11}, + {"ecall_scan_int", 1025 + 4}, }; - for (int j = 0; j < sizeof(defined_func)/sizeof(defined_func[0]); j++) { - if (strcmp(name, defined_func[j]) == 0) { - return j; + for (int i = 0; i < sizeof(defined_func)/sizeof(defined_func[0]); i++) { + if (strcmp(name, defined_func[i].name) == 0) { + return defined_func[i].ecall_num; } } return -1; @@ -119,11 +108,22 @@ static int system_func(const char* name) { static int get_node_val(ir_node_t* ptr, int reg) { int len = 0; - if (ptr->tag == IR_NODE_CONST_INT) { - GENCODES(LI(reg, ptr->data.const_int.val)); - } else { - int offset = stack_offset(ptr); - GENCODE(LW(reg, REG_SP, offset)); + switch (ptr->tag) { + case IR_NODE_CONST_INT: { + GENCODES(LI(reg, ptr->data.const_int.val)); + break; + } + // case IR_NODE_CALL: { + // // GENCODE(SW(REG_A0, REG_SP, ctx.stack_offset)); + // // GENCODE() + // // break; + // } + default: { + int offset = stack_offset(ptr); + GENCODE(LW(reg, REG_SP, offset)); + break; + } + } return len; } @@ -139,8 +139,6 @@ static int gen_instr(ir_bblock_t* block, ir_node_t* instr) { // S1 = *(S0 + imm) offset = stack_offset(instr->data.load.target); GENCODE(LW(REG_T0, REG_SP, offset)); - // offset = STACK_OFFSET(instr); - // GENCODE(SW(REG_T0, REG_SP, offset)); break; } case IR_NODE_STORE: { @@ -180,6 +178,27 @@ static int gen_instr(ir_bblock_t* block, ir_node_t* instr) { case IR_OP_MOD: GENCODE(REM(REG_T0, REG_T1, REG_T2)); break; + case IR_OP_EQ: + GENCODE(XOR(REG_T0, REG_T1, REG_T2)); + GENCODE(SEQZ(REG_T0, REG_T0)); + break; + case IR_OP_GE: + GENCODE(SLT(REG_T0, REG_T1, REG_T2)); + GENCODE(SEQZ(REG_T0, REG_T0)); + break; + case IR_OP_GT: + GENCODE(SGT(REG_T0, REG_T1, REG_T2)); + break; + case IR_OP_LE: + GENCODE(SGT(REG_T0, REG_T1, REG_T2)); + GENCODE(SEQZ(REG_T0, REG_T0)); + break; + case IR_OP_LT: + GENCODE(SLT(REG_T0, REG_T1, REG_T2)); + break; + case IR_OP_NEQ: + GENCODE(XOR(REG_T0, REG_T1, REG_T2)); + break; default: error("ERROR gen_instr op in riscv"); break; @@ -244,13 +263,15 @@ static int gen_instr(ir_bblock_t* block, ir_node_t* instr) { } int system_func_idx = system_func(instr->data.call.callee->name); - if (system_func_idx == 0) { - // ecall_pnt_int - GENCODE(ADDI(REG_A7, REG_X0, 0x1)); - GENCODE(ECALL()); - break; + if (system_func_idx != -1) { + // ecall + GENCODES( + ADDI(REG_A7, REG_X0, system_func_idx), + ECALL() + ); + goto CALL_END; } - + jmp_t* jmp = xmalloc(sizeof(jmp_t)); *jmp = (jmp_t) { .base_offset = ctx.cur_func_offset + ctx.cur_block_offset + len, @@ -260,10 +281,11 @@ static int gen_instr(ir_bblock_t* block, ir_node_t* instr) { .cur_idx = func_idx(ctx.cur_func), }; vector_push(ctx.call, jmp); - - GENCODES(( - CALL(0) - )); + + GENCODES(CALL(0)); + CALL_END: + offset = stack_offset(instr); + GENCODE(SW(REG_A0, REG_SP, offset)); break; } default: @@ -335,10 +357,9 @@ static int gen_func(ir_func_t* func) { return len; } -static void gen_code(ir_prog_t* prog) { +static int gen_code(ir_prog_t* prog) { ctx.prog = prog; - for (int i = 0; i < prog->extern_funcs.size; i++) { if (system_func(prog->extern_funcs.data[i]->name) == -1) { error("func %s not defined and not a system func", prog->extern_funcs.data[i]->name); @@ -354,12 +375,12 @@ static void gen_code(ir_prog_t* prog) { len += ret; } - for (int i = 0; i < ctx.call.size; i++) { jmp_t* jmp = vector_at(ctx.call, i); int32_t code = 0; // FIXME ERROR int offset = jmp_cache[jmp->to_idx] - (jmp_cache[jmp->cur_idx] + jmp->base_offset); + assert(offset > -0xfff && offset < 0xfff); int32_t codes[2] = { CALL(offset) }; @@ -369,6 +390,14 @@ static void gen_code(ir_prog_t* prog) { }; } } + + // Got Main pos; + for (int i = 0; i < prog->funcs.size; i++) { + if (strcmp(vector_at(prog->funcs, i)->name, "main") == 0) { + return jmp_cache[i]; + } + } + error("main not found"); } int main(int argc, char** argv) { @@ -390,7 +419,23 @@ int main(int argc, char** argv) { struct ASTNode* root = frontend(infilename, in, (sread_fn)fread_s); gen_ir_from_ast(root); - gen_code(&prog); + int main_pos = gen_code(&prog); + + #define CRT_CODE_SIZE 16 + rv32code_t gcodes[] = { + LI(REG_SP, 0x1000), + LI(REG_RA, 0x0), + CALL(0), + // Exit + ECALL_EXIT2(), + }; + main_pos += (CRT_CODE_SIZE - 4) * 4; + assert(main_pos > -0xfff && main_pos < 0xfff); + rv32code_t call_main[2] = { + CALL(main_pos) + }; + gcodes[4] = call_main[0]; + gcodes[5] = call_main[1]; for (int i = 0; i < CRT_CODE_SIZE; i++) { write_inst((union rv32code) { diff --git a/ccompiler/backend/riscv32/tests/simple/01_return.c b/ccompiler/backend/riscv32/tests/simple/01_return.c new file mode 100644 index 0000000..9fd5dff --- /dev/null +++ b/ccompiler/backend/riscv32/tests/simple/01_return.c @@ -0,0 +1,3 @@ +int main() { + return 65536; +} diff --git a/ccompiler/backend/riscv32/tests/simple/02_decl_expr.c b/ccompiler/backend/riscv32/tests/simple/02_decl_expr.c new file mode 100644 index 0000000..50796b1 --- /dev/null +++ b/ccompiler/backend/riscv32/tests/simple/02_decl_expr.c @@ -0,0 +1,8 @@ +int main() { + int a; + int b; + a = 1 + 2 * 3; + b = 7; + a = a - b + 1; + return a; +} diff --git a/ccompiler/backend/riscv32/tests/simple/03_decl_init.c b/ccompiler/backend/riscv32/tests/simple/03_decl_init.c new file mode 100644 index 0000000..45ad750 --- /dev/null +++ b/ccompiler/backend/riscv32/tests/simple/03_decl_init.c @@ -0,0 +1,6 @@ + +int main() { + int x = 10; + x = x + 1; + return x; +} \ No newline at end of file diff --git a/ccompiler/backend/riscv32/tests/simple/04_if.c b/ccompiler/backend/riscv32/tests/simple/04_if.c new file mode 100644 index 0000000..af66af7 --- /dev/null +++ b/ccompiler/backend/riscv32/tests/simple/04_if.c @@ -0,0 +1,10 @@ +int main(void) { + int a; + a = 1; + if (a) { + a = 1; + } else { + a = 2; + } + return a; +} diff --git a/ccompiler/backend/riscv32/tests/simple/05_else.c b/ccompiler/backend/riscv32/tests/simple/05_else.c new file mode 100644 index 0000000..6cda761 --- /dev/null +++ b/ccompiler/backend/riscv32/tests/simple/05_else.c @@ -0,0 +1,10 @@ +int main(void) { + int a; + a = 0; + if (a) { + a = 1; + } else { + a = 2; + } + return a; +} \ No newline at end of file diff --git a/ccompiler/backend/riscv32/tests/simple/06_fcall.c b/ccompiler/backend/riscv32/tests/simple/06_fcall.c new file mode 100644 index 0000000..18f817c --- /dev/null +++ b/ccompiler/backend/riscv32/tests/simple/06_fcall.c @@ -0,0 +1,9 @@ +int add(int, int); + +int main(void) { + return add(1, 2); +} + +int add(int a, int b) { + return a + b; +} diff --git a/ccompiler/backend/riscv32/tests/simple/07_while.c b/ccompiler/backend/riscv32/tests/simple/07_while.c new file mode 100644 index 0000000..93d6cd4 --- /dev/null +++ b/ccompiler/backend/riscv32/tests/simple/07_while.c @@ -0,0 +1,5 @@ +int main() { + int i = 0; + while (i < 10) i = i + 1; + return i; +} diff --git a/ccompiler/backend/riscv32/tests/simple/08_do_while.c b/ccompiler/backend/riscv32/tests/simple/08_do_while.c new file mode 100644 index 0000000..a5e3245 --- /dev/null +++ b/ccompiler/backend/riscv32/tests/simple/08_do_while.c @@ -0,0 +1,12 @@ +// #include + +int main() { + int i = 0; + int pow = 1; + do { + pow = pow * 2; + i = i + 1; + } while(i < 7); + // printf("%d", pow); + return pow; +} diff --git a/ccompiler/backend/riscv32/tests/simple/09_for.c b/ccompiler/backend/riscv32/tests/simple/09_for.c new file mode 100644 index 0000000..7095227 --- /dev/null +++ b/ccompiler/backend/riscv32/tests/simple/09_for.c @@ -0,0 +1,7 @@ +int main() { + int num = 0; + for (int i = 0; i < 10; i += 1) { + num = num + 1; + } + return num; +} diff --git a/ccompiler/backend/riscv32/tests/simple/10_main.c b/ccompiler/backend/riscv32/tests/simple/10_main.c new file mode 100644 index 0000000..045475b --- /dev/null +++ b/ccompiler/backend/riscv32/tests/simple/10_main.c @@ -0,0 +1,7 @@ +int add(int a, int b) { + return a + b; +} + +int main(void) { + return add(1, 2); +} diff --git a/ccompiler/backend/riscv32/tests/simple/11_recursive.c b/ccompiler/backend/riscv32/tests/simple/11_recursive.c new file mode 100644 index 0000000..e526db0 --- /dev/null +++ b/ccompiler/backend/riscv32/tests/simple/11_recursive.c @@ -0,0 +1,18 @@ +// #include + +int factorial(int num); + +int main() { + int num = 5; + int result = factorial(num); + // printf("%d", result); + return result; +} + +int factorial(int num) { + if (num == 0) { + return 1; + } else { + return num * factorial(num - 1); + } +} diff --git a/ccompiler/backend/riscv32/tests/simple/Makefile b/ccompiler/backend/riscv32/tests/simple/Makefile new file mode 100644 index 0000000..1e6afdb --- /dev/null +++ b/ccompiler/backend/riscv32/tests/simple/Makefile @@ -0,0 +1,28 @@ +VM := ../../rv32-vm +CC := ../../ccompiler +STD_CC := gcc + +TESTS := $(wildcard *.c) + +# 定义所有测试目标 +TEST_TARGETS := $(patsubst %.c, %_test, $(TESTS)) + +all: $(TEST_TARGETS) + +%_test: %.c + @$(STD_CC) -g -o $@ $< + @$(CC) $< flat.bin + @./$@ ; ret_gcc=$$? + @$(VM) flat.bin ; ret_vm=$$? + @echo "Testing $@" + @if [ $$ret_gcc -eq $$ret_vm ]; then \ + echo "$@ passed"; \ + else \ + echo "$@ failed: GCC returned $$ret_gcc, VM returned $$ret_vm"; \ + exit 1; \ + fi + +clean: + rm -f $(TEST_TARGETS) flat.bin + +.PHONY: all clean diff --git a/ccompiler/backend/riscv32/tests/simple/hard_01.c b/ccompiler/backend/riscv32/tests/simple/hard_01.c new file mode 100644 index 0000000..85ad74e --- /dev/null +++ b/ccompiler/backend/riscv32/tests/simple/hard_01.c @@ -0,0 +1,6 @@ +int main() { + int a, b; + a = 1; + b = 2; + return a + b; +} \ No newline at end of file diff --git a/ccompiler/backend/riscv32/tests/simple/test.py b/ccompiler/backend/riscv32/tests/simple/test.py new file mode 100644 index 0000000..66d6a57 --- /dev/null +++ b/ccompiler/backend/riscv32/tests/simple/test.py @@ -0,0 +1,86 @@ +import subprocess +import os +from pathlib import Path + +# 配置参数 +TEST_DIR = Path(".") +CC_PATH = Path("../../ccompiler.exe") +VM_PATH = Path("../../rv32-vm.exe") +WORKSPACE = Path(".") # 测试工作目录 + +# 测试用例映射表(示例) +TEST_CASE_MAP = { + "./01_return.c": 65536, + "./02_decl_expr.c": 1, + "./03_decl_init.c": 11, + "./04_if.c": 1, + "./05_else.c": 2, + "./06_fcall.c": 3, + "./07_while.c": 10, + "./08_do_while.c": 128, + "./09_for.c": 10, + "./10_main.c": 3, + "./11_recursive.c": 120, +} + +def run_command(cmd, capture_stderr=True): + """执行命令并捕获stderr""" + result = subprocess.run( + cmd, + cwd=WORKSPACE, + stderr=subprocess.PIPE if capture_stderr else None, + text=True, + timeout=1, + ) + return result.stderr.strip() if capture_stderr else None + +def run_test(test_file, expected): + print(f"\nTesting {test_file}...") + + # 1. 编译生成flat.bin + compile_cmd = [str(CC_PATH), str(test_file)] + compile_err = run_command(compile_cmd) + + if not (WORKSPACE / "flat.bin").exists(): + print(f" Compilation failed: {compile_err}") + return False + + # 2. 执行虚拟机 + vm_cmd = [str(VM_PATH), "flat.bin"] + + # 3. 解析返回值(假设最后一行是返回值) + try: + vm_err = run_command(vm_cmd) + actual = int(vm_err.split()[-1]) + except (ValueError, IndexError) as e: + print(f" Invalid VM output: {vm_err}") + return False + except subprocess.TimeoutExpired: + print(" Timeout expired") + return False + + # 4. 验证结果 + if actual == expected: + print(f" PASSED {test_file}") + return True + else: + print(f" FAILED: Expected {expected}, got {actual}") + return False + +def main(): + passed = 0 + total = 0 + + for test_file, expected in TEST_CASE_MAP.items(): + total += 1 + if run_test(TEST_DIR / test_file, expected): + passed += 1 + + # 清理中间文件 + if (WORKSPACE / "flat.bin").exists(): + os.remove(WORKSPACE / "flat.bin") + + print(f"\nTest Summary: {passed}/{total} passed") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/ccompiler/frontend/lexer/lexer.c b/ccompiler/frontend/lexer/lexer.c index d0ca174..d6fe029 100644 --- a/ccompiler/frontend/lexer/lexer.c +++ b/ccompiler/frontend/lexer/lexer.c @@ -163,13 +163,14 @@ static void parse_char_literal(lexer_t* lexer, tok_t* token) { if (*peek == '\\') { peek++; val = got_slash(peek); + peek++; } else { - val = *peek; + val = *peek++; } - if (*peek != '\'') error("Unclosed character literal"); + if (*peek++ != '\'') error("Unclosed character literal"); token->val.ch = val; - lexer->cur_ptr = peek + 1; + lexer->cur_ptr = peek; token->val.have = 1; token->type = TOKEN_CHAR_LITERAL; } diff --git a/ccompiler/frontend/lexer/tests/test.c b/ccompiler/frontend/lexer/tests/test.c index 8a1f866..5a8142a 100644 --- a/ccompiler/frontend/lexer/tests/test.c +++ b/ccompiler/frontend/lexer/tests/test.c @@ -117,29 +117,23 @@ void test_literals() { // test_lexer_string("4294967295", TOKEN_INT_LITERAL); // UINT_MAX } - // TEST_CASE("Character literals"); { - // test_lexer_string("'a'", TOKEN_CHAR_LITERAL); - // test_lexer_string("'\\n'", TOKEN_CHAR_LITERAL); - // test_lexer_string("'\\t'", TOKEN_CHAR_LITERAL); - // test_lexer_string("'\\\\'", TOKEN_CHAR_LITERAL); - // test_lexer_string("'\\0'", TOKEN_CHAR_LITERAL); - // } + TEST_CASE("Character literals"); { + test_lexer_string("'a'", TOKEN_CHAR_LITERAL); + test_lexer_string("'\\n'", TOKEN_CHAR_LITERAL); + test_lexer_string("'\\t'", TOKEN_CHAR_LITERAL); + test_lexer_string("'\\\\'", TOKEN_CHAR_LITERAL); + test_lexer_string("'\\0'", TOKEN_CHAR_LITERAL); + } TEST_CASE("String literals"); { test_lexer_string("\"hello\"", TOKEN_STRING_LITERAL); test_lexer_string("\"multi-line\\nstring\"", TOKEN_STRING_LITERAL); test_lexer_string("\"escape\\\"quote\"", TOKEN_STRING_LITERAL); } - - // TEST_CASE("Integer literals"); - // test_lexer_string("123", TOKEN_INT_LITERAL); - // test_lexer_string("0x1F", TOKEN_INT_LITERAL); // TEST_CASE("Floating literals"); // test_lexer_string("3.14e-5", TOKEN_FLOAT_LITERAL); - // TEST_CASE("Character literals"); - // test_lexer_string("'\\n'", TOKEN_CHAR_LITERAL); } // 边界测试 diff --git a/ccompiler/frontend/lexer/token.c b/ccompiler/frontend/lexer/token.c index 39ed3bf..fc6c30d 100644 --- a/ccompiler/frontend/lexer/token.c +++ b/ccompiler/frontend/lexer/token.c @@ -30,14 +30,13 @@ void init_tokbuf(tok_buf_t *tokbuf, void *stream, get_tokbuf_func gettok) { tokbuf->cap = 0; } -tok_t *peek_tok(tok_buf_t *tokbuf) -{ +tok_t *peek_tok(tok_buf_t *tokbuf) { int idx = tokbuf->peek; - idx = ROUND_IDX(idx + 1); + tokbuf->peek = ROUND_IDX(idx + 1); if (tokbuf->size >= tokbuf->cap) { error("peek too deep, outof array size"); } - if (tokbuf->peek == tokbuf->end) { + if (idx == tokbuf->end) { if (tokbuf->size == tokbuf->cap) { error("peek_tok buffer overflow"); } @@ -46,10 +45,9 @@ tok_t *peek_tok(tok_buf_t *tokbuf) } tokbuf->gettok(tokbuf->stream, &(tokbuf->buf[idx])); tokbuf->size++; - tokbuf->end = idx; + tokbuf->end = tokbuf->peek; } - - tokbuf->peek = idx; + return &(tokbuf->buf[idx]); } diff --git a/ccompiler/frontend/parser/ast/ast.h b/ccompiler/frontend/parser/ast/ast.h index e2e53cc..985f6f2 100644 --- a/ccompiler/frontend/parser/ast/ast.h +++ b/ccompiler/frontend/parser/ast/ast.h @@ -173,6 +173,8 @@ typedef struct ASTNode* (*parse_func_t) (parser_t*); void parse_prog(parser_t* parser); ast_node_t* parse_decl(parser_t* parser); +ast_node_t* parse_decl_val(parser_t* parser); + ast_node_t* parse_block(parser_t* parser); ast_node_t* parse_stmt(parser_t* parser); ast_node_t* parse_expr(parser_t* parser); diff --git a/ccompiler/frontend/parser/ast/func.c b/ccompiler/frontend/parser/ast/func.c index 2c31d15..aaba29d 100644 --- a/ccompiler/frontend/parser/ast/func.c +++ b/ccompiler/frontend/parser/ast/func.c @@ -8,6 +8,7 @@ // TODO 语义分析压入符号表 static void parse_params(parser_t* parser, tok_buf_t* cache, ast_node_t* node) { + flush_peek_tok(cache); tok_type_t ttype; ast_node_t *params = new_ast_node(); node->decl_func.params = params; diff --git a/ccompiler/frontend/parser/ast/stmt.c b/ccompiler/frontend/parser/ast/stmt.c index e6b3903..f5dc2a3 100644 --- a/ccompiler/frontend/parser/ast/stmt.c +++ b/ccompiler/frontend/parser/ast/stmt.c @@ -89,8 +89,12 @@ ast_node_t* parse_stmt(parser_t* parser) { // init expr or init decl_var // TODO need add this feature - node->for_stmt.init = parse_expr(parser); - expect_pop_tok(tokbuf, TOKEN_SEMICOLON); + if (peek_decl(tokbuf)) { + node->for_stmt.init = parse_decl_val(parser); + } else { + node->for_stmt.init = parse_expr(parser); + expect_pop_tok(tokbuf, TOKEN_SEMICOLON); + } // cond expr or null ttype = peek_tok_type(tokbuf); diff --git a/ccompiler/middleend/Makefile b/ccompiler/middleend/Makefile index e69de29..77d19c6 100644 --- a/ccompiler/middleend/Makefile +++ b/ccompiler/middleend/Makefile @@ -0,0 +1,30 @@ +# 编译器设置 +CC = gcc +AR = ar +CFLAGS = -g -Wall + +# 源文件列表 +SRCS = \ + ir.c \ + ir_ast.c \ + ir_lib.c \ + ir_type.c + +# 生成目标文件列表 +OBJS = $(SRCS:.c=.o) + +# 最终目标 +TARGET = libir.a + +all: $(TARGET) + +$(TARGET): $(OBJS) + $(AR) rcs $@ $^ + +%.o: %.c + $(CC) $(CFLAGS) -c -o $@ $< + +clean: + rm -f $(OBJS) $(TARGET) + +.PHONY: all clean \ No newline at end of file diff --git a/ccompiler/middleend/ir.c b/ccompiler/middleend/ir.c index 479a15a..e69de29 100644 --- a/ccompiler/middleend/ir.c +++ b/ccompiler/middleend/ir.c @@ -1,380 +0,0 @@ -#include "ir.h" -#include "../frontend/frontend.h" - -typedef struct ASTNode ASTNode; - -// 上下文结构,记录生成过程中的状态 -typedef struct { - ir_func_t* cur_func; // 当前处理的函数 - ir_bblock_t* cur_block; // 当前基本块 -} IRGenContext; -IRGenContext ctx; -ir_prog_t prog; -ir_type_t type_i32 = { - .tag = IR_TYPE_INT32, -}; - -static inline void init_ir_node_t(ir_node_t* node) { - node->name = NULL; - node->type = NULL; - vector_init(node->used_by); -} - -static inline ir_node_t* new_irnode() { - ir_node_t* node = xmalloc(sizeof(ir_node_t)); - init_ir_node_t(node); -} - -static inline ir_bblock_t* new_irbblock(const char* name) { - ir_bblock_t* block = xmalloc(sizeof(ir_bblock_t)); - block->label = name; - vector_init(block->instrs); - return block; -} - -ir_node_t* emit_instr(ir_bblock_t* block) { - if (block == NULL) block = ctx.cur_block; - ir_node_t *node = new_irnode(); - vector_push(block->instrs, node); - return vector_at(block->instrs, block->instrs.size - 1); -} - -ir_node_t* emit_br(ir_node_t* cond, ir_bblock_t* trueb, ir_bblock_t* falseb) { - ir_node_t* br = emit_instr(NULL); - *br = (ir_node_t) { - .tag = IR_NODE_BRANCH, - .data.branch = { - .cond = cond, - .true_bblock = trueb, - .false_bblock = falseb, - } - }; - return br; -} - -ir_node_t* gen_ir_expr(ASTNode* node) { - switch (node->type) { - case NT_TERM_VAL: { - ir_node_t* ir = new_irnode(); - *ir = (ir_node_t) { - .tag = IR_NODE_CONST_INT, - .data.const_int = { - .val = node->syms.tok.val.i, - }, - }; - return ir; - } - case NT_TERM_IDENT: { - ir_node_t* decl = node->syms.decl_node->decl_val.data; - return decl; - } - case NT_TERM_CALL: { - ir_node_t* ir = emit_instr(NULL); - *ir = (ir_node_t) { - .tag = IR_NODE_CALL, - .data.call = { - .callee = node->call.func_decl->decl_func.def->func.data, - }, - }; - vector_init(ir->data.call.args); - for (int i = 0; i < node->call.params->params.params.size; i++) { - vector_push(ir->data.call.args, \ - gen_ir_expr(node->call.params->params.params.data[i])); - } - return ir; - } - default: - goto NEXT; - } - return NULL; -NEXT: - ir_node_t* lhs = gen_ir_expr(node->expr.left); - ir_node_t* rhs = node->expr.right ? gen_ir_expr(node->expr.right) : NULL; - - if (node->type == NT_COMMA) { - return rhs; - } - - ir_node_t* instr = emit_instr(NULL); - vector_push(lhs->used_by, instr); - if (rhs) { vector_push(rhs->used_by, instr); } - - ir_node_t* ret; -#define BINOP(operand) do { \ - *instr = (ir_node_t){ \ - .tag = IR_NODE_OP, \ - .data.op = { \ - .op = operand, \ - .lhs = lhs, \ - .rhs = rhs, \ - }, \ - }; \ - ret = instr; \ - } while (0) - - switch (node->type) { - case NT_ADD :// (expr) + (expr) - BINOP(IR_OP_ADD); - break; - case NT_SUB :// (expr) - (expr) - BINOP(IR_OP_SUB); - break; - case NT_MUL :// (expr) * (expr) - BINOP(IR_OP_MUL); - break; - case NT_DIV :// (expr) / (expr) - BINOP(IR_OP_DIV); - break; - case NT_MOD :// (expr) % (expr) - BINOP(IR_OP_MOD); - break; - case NT_AND :// (expr) & (expr) - BINOP(IR_OP_AND); - break; - case NT_OR :// (expr) | (expr) - BINOP(IR_OP_OR); - break; - case NT_XOR :// (expr) ^ (expr) - BINOP(IR_OP_XOR); - break; - case NT_BIT_NOT :// ~ (expr) - // TODO - // BINOP(IR_OP_NOT); - break; - case NT_L_SH :// (expr) << (expr) - BINOP(IR_OP_SHL); - break; - case NT_R_SH :// (expr) >> (expr) - BINOP(IR_OP_SHR); // Shift right logical. - // TODO - // BINOP(IR_OP_SAR); // Shift right arithmetic. - break; - case NT_EQ :// (expr) == (expr) - BINOP(IR_OP_EQ); - break; - case NT_NEQ :// (expr) != (expr) - BINOP(IR_OP_NEQ); - break; - case NT_LE :// (expr) <= (expr) - BINOP(IR_OP_LE); - break; - case NT_GE :// (expr) >= (expr) - BINOP(IR_OP_GE); - break; - case NT_LT :// (expr) < (expr) - BINOP(IR_OP_LT); - break; - case NT_GT :// (expr) > (expr) - BINOP(IR_OP_GE); - break; - case NT_AND_AND :// (expr) && (expr) - break; - case NT_OR_OR :// (expr) || (expr) - break; - case NT_NOT :// ! (expr) - ir_node_t* zero = xmalloc(sizeof(ir_node_t)); - *zero = (ir_node_t){ - .tag = IR_NODE_CONST_INT, - .data.const_int = { - .val = 0, - }, - }; - *instr = (ir_node_t){ - .tag = IR_NODE_OP, - .data.op = { - .op = IR_OP_EQ, - .lhs = zero, - .rhs = lhs, - }, - }; - ret = instr; - break; - case NT_ASSIGN :// (expr) = (expr) - *instr = (ir_node_t){ - .tag = IR_NODE_STORE, - .data.store = { - .target = lhs, - .value = rhs, - }, - }; - ret = rhs; - break; - // case NT_COND : // (expr) ? (expr) : (expr) - default: - // TODO self error msg - error("Unsupported IR generation for AST node type %d", node->type); - break; - } - return ret; -} -static ir_func_t* new_irfunc(const char* name) { - ir_func_t *func = xmalloc(sizeof(ir_func_t)); - - vector_init(func->bblocks); - vector_init(func->params); - *func = (ir_func_t) { - .name = name, - // TODO typing system - .type = &type_i32, - }; - return func; -} - -static void gen_ir_func(ASTNode* node, ir_func_t* func) { - assert(node->type == NT_FUNC); - ir_bblock_t *entry = new_irbblock("entry"); - vector_push(func->bblocks, entry); - - vector_push(prog.funcs, func); - IRGenContext prev_ctx = ctx; - ctx.cur_func = func; - ctx.cur_block = entry; - - ast_node_t* params = node->func.decl->decl_func.params; - for (int i = 0; i < params->params.params.size; i ++) { - ir_node_t* decl = emit_instr(entry); - ast_node_t* param = params->params.params.data[i]; - vector_push(func->params, decl); - *decl = (ir_node_t) { - .tag = IR_NODE_ALLOC, - .name = param->decl_val.name->syms.tok.val.str, - .type = &type_i32, - }; - param->decl_val.data = decl; - } - gen_ir_from_ast(node->func.body); - - ctx = prev_ctx; -} - -void gen_ir_from_ast(struct ASTNode* node) { - switch (node->type) { - case NT_ROOT: { - for (int i = 0; i < node->root.children.size; i ++) { - gen_ir_from_ast(node->root.children.data[i]); - } - break; - } - case NT_DECL_FUNC: { - ir_func_t* func = new_irfunc(node->decl_func.name->syms.tok.val.str); - if (node->decl_func.def == NULL) { - ast_node_t* def = new_ast_node(); - def->func.body = NULL; - def->func.decl = node; - node->decl_func.def = def; - vector_push(prog.extern_funcs, func); - } - node->decl_func.def->func.data = func; - break; - } - case NT_FUNC: { - gen_ir_func(node, node->func.data); - break; - } - case NT_STMT_RETURN: { - ir_node_t* ret = NULL; - if (node->return_stmt.expr_stmt != NULL) { - ret = gen_ir_expr(node->return_stmt.expr_stmt); - } - ir_node_t* ir = emit_instr(NULL); - *ir = (ir_node_t) { - .tag = IR_NODE_RET, - .data = { - .ret = { - .ret_val = ret, - } - } - }; - - vector_push(ctx.cur_func->bblocks, new_irbblock(NULL)); - break; - } - case NT_STMT_BLOCK: { - gen_ir_from_ast(node->block_stmt.block); - break; - } - case NT_BLOCK: { - for (int i = 0; i < node->block.children.size; i ++) { - gen_ir_from_ast(node->block.children.data[i]); - } - break; - } - case NT_STMT_IF: { - ir_node_t *cond = gen_ir_expr(node->if_stmt.cond); - ir_bblock_t* trueb = new_irbblock("true_block"); - ir_bblock_t* falseb = new_irbblock("false_block"); - emit_br(cond, trueb, falseb); - - vector_push(ctx.cur_func->bblocks, trueb); - ctx.cur_block = trueb; - gen_ir_from_ast(node->if_stmt.if_stmt); - ir_node_t* jmp = emit_instr(NULL); - - if (node->if_stmt.else_stmt != NULL) { - vector_push(ctx.cur_func->bblocks, falseb); - ctx.cur_block = falseb; - gen_ir_from_ast(node->if_stmt.else_stmt); - ir_node_t* jmp = emit_instr(NULL); - - ctx.cur_block = new_irbblock("jmp_block"); - vector_push(ctx.cur_func->bblocks, ctx.cur_block); - *jmp = (ir_node_t) { - .tag = IR_NODE_JUMP, - .data.jump = { - .target_bblock = ctx.cur_block, - }, - }; - } else { - ctx.cur_block = falseb; - } - *jmp = (ir_node_t) { - .tag = IR_NODE_JUMP, - .data.jump = { - .target_bblock = ctx.cur_block, - }, - }; - break; - } - case NT_STMT_WHILE: { - node->while_stmt.cond; - node->while_stmt.body; - break; - } - case NT_STMT_DOWHILE: { - node->do_while_stmt.cond; - node->do_while_stmt.body; - break; - } - case NT_STMT_FOR: { - node->for_stmt.init; - node->for_stmt.cond; - node->for_stmt.iter; - node->for_stmt.body; - break; - } - case NT_DECL_VAR: { - ir_node_t* ret_node = emit_instr(NULL); - *ret_node = (ir_node_t) { - .tag = IR_NODE_ALLOC, - .name = node->decl_val.name->syms.tok.val.str, - .type = &type_i32, - }; - node->decl_val.data = ret_node; - if (node->decl_val.expr_stmt != NULL) { - gen_ir_from_ast(node->decl_val.expr_stmt); - } - break; - } - case NT_STMT_EXPR: { - gen_ir_expr(node->expr_stmt.expr_stmt); - break; - } - case NT_STMT_EMPTY: { - break; - } - default: - // TODO: 错误处理 - error("unknown node type"); - break; - } -} diff --git a/ccompiler/middleend/ir.h b/ccompiler/middleend/ir.h index b06891c..1263df1 100644 --- a/ccompiler/middleend/ir.h +++ b/ccompiler/middleend/ir.h @@ -57,22 +57,25 @@ typedef struct { vector_header(extern_funcs, ir_func_t*); } ir_prog_t; +typedef enum ir_node_tag { + IR_NODE_NULL, + IR_NODE_CONST_INT, + IR_NODE_ALLOC, + IR_NODE_LOAD, + IR_NODE_STORE, + IR_NODE_GET_PTR, + IR_NODE_OP, + IR_NODE_BRANCH, + IR_NODE_JUMP, + IR_NODE_CALL, + IR_NODE_RET, +} ir_node_tag_t; + struct ir_node { const ir_type_t* type; const char* name; vector_header(used_by, ir_node_t*); - enum { - IR_NODE_CONST_INT, - IR_NODE_ALLOC, - IR_NODE_LOAD, - IR_NODE_STORE, - IR_NODE_GET_PTR, - IR_NODE_OP, - IR_NODE_BRANCH, - IR_NODE_JUMP, - IR_NODE_CALL, - IR_NODE_RET, - } tag; + ir_node_tag_t tag; union { struct { int32_t val; diff --git a/ccompiler/middleend/ir_ast.c b/ccompiler/middleend/ir_ast.c new file mode 100644 index 0000000..ab75f56 --- /dev/null +++ b/ccompiler/middleend/ir_ast.c @@ -0,0 +1,439 @@ +#include "ir.h" +#include "ir_lib.h" +#include "ir_type.h" +#include "../frontend/frontend.h" + +// 上下文结构,记录生成过程中的状态 +typedef struct { + ir_func_t* cur_func; // 当前处理的函数 + ir_bblock_t* cur_block; // 当前基本块 +} IRGenContext; +IRGenContext ctx; +ir_prog_t prog; + +static void emit_instr(ir_bblock_t* block, ir_node_t* node) { + if (block == NULL) block = ctx.cur_block; + vector_push(block->instrs, node); + // return &(vector_at(block->instrs, block->instrs.size - 1)); +} + +static ir_node_t* emit_br(ir_node_t* cond, ir_bblock_t* trueb, ir_bblock_t* falseb) { + ir_node_t* br = new_ir_node(NULL, IR_NODE_BRANCH); + emit_instr(NULL, br); + br->data.branch.cond = cond; + br->data.branch.true_bblock = trueb; + br->data.branch.false_bblock = falseb; + return br; +} + +static ir_node_t* gen_ir_expr(ast_node_t* node); + +static ir_node_t* gen_ir_term(ast_node_t* node) { + switch (node->type) { + case NT_TERM_VAL: { + ir_node_t* ir = new_ir_node(NULL, IR_NODE_CONST_INT); + ir->data.const_int.val = node->syms.tok.val.i; + return ir; + } + case NT_TERM_IDENT: { + ir_node_t* decl = node->syms.decl_node->decl_val.data; + return decl; + } + case NT_TERM_CALL: { + ir_node_t* call = new_ir_node(NULL, IR_NODE_CALL); + call->data.call.callee = node->call.func_decl->decl_func.def->func.data; + + for (int i = 0; i < node->call.params->params.params.size; i++) { + ast_node_t* param = vector_at(node->call.params->params.params, i); + ir_node_t *tmp = gen_ir_expr(param); + vector_push(call->data.call.args, tmp); + } + + emit_instr(NULL, call); + return call; + } + default: { + assert(0); + } + } +} + +static ir_node_t* gen_ir_expr(ast_node_t* node) { + // term node + switch (node->type) { + case NT_TERM_VAL: + case NT_TERM_IDENT: + case NT_TERM_CALL: + return gen_ir_term(node); + default: + break; + } + + ir_node_t* lhs = gen_ir_expr(node->expr.left); + ir_node_t* rhs = node->expr.right ? gen_ir_expr(node->expr.right) : NULL; + if (node->type == NT_COMMA) { + return rhs; + } + + ir_node_t* instr = NULL; + vector_push(lhs->used_by, instr); + if (rhs) { vector_push(rhs->used_by, instr); } + + ir_node_t* ret; +#define BINOP(operand) do { \ + instr = new_ir_node(NULL, IR_NODE_OP); \ + instr->data.op.op = operand; \ + instr->data.op.lhs = lhs; \ + instr->data.op.rhs = rhs; \ + ret = instr; \ + } while (0) + + switch (node->type) { + case NT_ADD: { + // (expr) + (expr) + BINOP(IR_OP_ADD); break; + } + case NT_SUB: { + // (expr) - (expr) + BINOP(IR_OP_SUB); break; + } + case NT_MUL: { + // (expr) * (expr) + BINOP(IR_OP_MUL); break; + } + case NT_DIV: { + // (expr) / (expr) + BINOP(IR_OP_DIV); break; + } + case NT_MOD: { + // (expr) % (expr) + BINOP(IR_OP_MOD); break; + } + case NT_AND: { + // (expr) & (expr) + BINOP(IR_OP_AND); break; + } + case NT_OR: { + // (expr) | (expr) + BINOP(IR_OP_OR); break; + } + case NT_XOR: { + // (expr) ^ (expr) + BINOP(IR_OP_XOR); break; + } + case NT_BIT_NOT: { + // ~ (expr) + // TODO + // BINOP(IR_OP_NOT); + break; + } + case NT_L_SH: { + // (expr) << (expr) + BINOP(IR_OP_SHL); + break; + } + case NT_R_SH: { + // (expr) >> (expr) + BINOP(IR_OP_SHR); // Shift right logical. + // TODO + // BINOP(IR_OP_SAR); // Shift right arithmetic. + break; + } + case NT_EQ: { + // (expr) == (expr) + BINOP(IR_OP_EQ); break; + } + case NT_NEQ: { + // (expr) != (expr) + BINOP(IR_OP_NEQ); break; + } + case NT_LE: { + // (expr) <= (expr) + BINOP(IR_OP_LE); break; + } + case NT_GE: { + // (expr) >= (expr) + BINOP(IR_OP_GE); break; + } + case NT_LT: { + // (expr) < (expr) + BINOP(IR_OP_LT); break; + } + case NT_GT: { + // (expr) > (expr) + BINOP(IR_OP_GE); break; + } + case NT_AND_AND:// (expr) && (expr) + error("unimpliment"); + break; + case NT_OR_OR:// (expr) || (expr) + error("unimpliment"); + break; + case NT_NOT: { + // ! (expr) + instr = new_ir_node(NULL, IR_NODE_OP); + instr->data.op.op = IR_OP_EQ, + instr->data.op.lhs = &node_zero, + instr->data.op.rhs = lhs, + ret = instr; + break; + } + case NT_ASSIGN: { + // (expr) = (expr) + instr = new_ir_node(NULL, IR_NODE_STORE); + instr->data.store.target = lhs; + instr->data.store.value = rhs; + ret = rhs; + break; + } + // case NT_COND: // (expr) ? (expr) : (expr) + default: { + // TODO self error msg + error("Unsupported IR generation for AST node type %d", node->type); + break; + } + } + emit_instr(NULL, instr); + return ret; +} + +static void gen_ir_func(ast_node_t* node, ir_func_t* func) { + assert(node->type == NT_FUNC); + ir_bblock_t *entry = new_ir_bblock("entry"); + vector_push(func->bblocks, entry); + + vector_push(prog.funcs, func); + IRGenContext prev_ctx = ctx; + ctx.cur_func = func; + ctx.cur_block = entry; + + ast_node_t* params = node->func.decl->decl_func.params; + for (int i = 0; i < params->params.params.size; i ++) { + ast_node_t* param = params->params.params.data[i]; + ir_node_t* decl = new_ir_node(param->decl_val.name->syms.tok.val.str, IR_NODE_ALLOC); + emit_instr(entry, decl); + vector_push(func->params, decl); + // TODO Typing system + decl->type = &type_i32; + param->decl_val.data = decl; + } + gen_ir_from_ast(node->func.body); + + ctx = prev_ctx; +} + +void gen_ir_jmp(ast_node_t* node) { + ir_bblock_t *bblocks[3]; + for (int i = 0; i < sizeof(bblocks)/sizeof(bblocks[0]); i++) { + bblocks[i] = new_ir_bblock(NULL); + vector_push(ctx.cur_func->bblocks, bblocks[i]); + } + +#define NEW_IR_JMP(name, block) do { \ + name = new_ir_node(NULL, IR_NODE_JUMP); \ + name->data.jump.target_bblock = block; \ + } while (0) + + switch (node->type) { + case NT_STMT_IF: { + ir_bblock_t* trueb = bblocks[0]; + ir_bblock_t* falseb = bblocks[1]; + ir_bblock_t* endb = bblocks[2]; + ir_node_t* jmp; + + // cond + ir_node_t *cond = gen_ir_expr(node->if_stmt.cond); + emit_br(cond, trueb, falseb); + + // true block + vector_push(ctx.cur_func->bblocks, trueb); + ctx.cur_block = trueb; + gen_ir_from_ast(node->if_stmt.if_stmt); + + // else block + if (node->if_stmt.else_stmt != NULL) { + vector_push(ctx.cur_func->bblocks, falseb); + ctx.cur_block = falseb; + gen_ir_from_ast(node->if_stmt.else_stmt); + ir_node_t* jmp; + + ctx.cur_block = endb; + vector_push(ctx.cur_func->bblocks, ctx.cur_block); + NEW_IR_JMP(jmp, ctx.cur_block); + emit_instr(falseb, jmp); + } else { + ctx.cur_block = falseb; + } + NEW_IR_JMP(jmp, ctx.cur_block); + emit_instr(trueb, jmp); + break; + } + case NT_STMT_WHILE: { + ir_bblock_t* entryb = bblocks[0]; + ir_bblock_t* bodyb = bblocks[1]; + ir_bblock_t* endb = bblocks[2]; + + ir_node_t* entry; + NEW_IR_JMP(entry, entryb); + emit_instr(NULL, entry); + + // Entry: + ctx.cur_block = entryb; + ir_node_t *cond = gen_ir_expr(node->while_stmt.cond); + emit_br(cond, bodyb, endb); + + // Body: + ir_node_t* jmp; + ctx.cur_block = bodyb; + gen_ir_from_ast(node->while_stmt.body); + NEW_IR_JMP(jmp, entryb); + emit_instr(NULL, jmp); + + // End: + ctx.cur_block = endb; + break; + } + case NT_STMT_DOWHILE: { + ir_bblock_t* entryb = bblocks[0]; + ir_bblock_t* bodyb = bblocks[1]; + ir_bblock_t* endb = bblocks[2]; + + ir_node_t* entry; + NEW_IR_JMP(entry, bodyb); + emit_instr(NULL, entry); + + // Body: + ctx.cur_block = bodyb; + gen_ir_from_ast(node->do_while_stmt.body); + ir_node_t* jmp; + NEW_IR_JMP(jmp, entryb); + emit_instr(NULL, jmp); + + // Entry: + ctx.cur_block = entryb; + ir_node_t *cond = gen_ir_expr(node->do_while_stmt.cond); + emit_br(cond, bodyb, endb); + + // End: + ctx.cur_block = endb; + break; + } + case NT_STMT_FOR: { + ir_bblock_t* entryb = bblocks[0]; + ir_bblock_t* bodyb = bblocks[1]; + ir_bblock_t* endb = bblocks[2]; + + if (node->for_stmt.init) { + gen_ir_from_ast(node->for_stmt.init); + } + ir_node_t* entry; + NEW_IR_JMP(entry, entryb); + emit_instr(NULL, entry); + + // Entry: + ctx.cur_block = entryb; + if (node->for_stmt.cond) { + ir_node_t *cond = gen_ir_expr(node->for_stmt.cond); + emit_br(cond, bodyb, endb); + } else { + ir_node_t* jmp; + NEW_IR_JMP(jmp, bodyb); + } + + // Body: + ctx.cur_block = bodyb; + gen_ir_from_ast(node->for_stmt.body); + if (node->for_stmt.iter) { + gen_ir_expr(node->for_stmt.iter); + } + ir_node_t* jmp; + NEW_IR_JMP(jmp, entryb); + emit_instr(NULL, jmp); + + // End: + ctx.cur_block = endb; + break; + } + default: + error("ir jmp can't hit here"); + } +} + +void gen_ir_from_ast(ast_node_t* node) { + switch (node->type) { + case NT_ROOT: { + for (int i = 0; i < node->root.children.size; i ++) { + gen_ir_from_ast(node->root.children.data[i]); + } + break; + } + case NT_DECL_FUNC: { + ir_func_t* func = new_ir_func(node->decl_func.name->syms.tok.val.str, &type_i32); + if (node->decl_func.def == NULL) { + ast_node_t* def = new_ast_node(); + def->func.body = NULL; + def->func.decl = node; + node->decl_func.def = def; + vector_push(prog.extern_funcs, func); + } + node->decl_func.def->func.data = func; + break; + } + case NT_FUNC: { + gen_ir_func(node, node->func.data); + break; + } + case NT_STMT_RETURN: { + ir_node_t* ret = NULL; + if (node->return_stmt.expr_stmt != NULL) { + ret = gen_ir_expr(node->return_stmt.expr_stmt); + } + ir_node_t* ir = new_ir_node(NULL, IR_NODE_RET); + ir->data.ret.ret_val = ret; + emit_instr(NULL, ir); + + ir_bblock_t* block = new_ir_bblock(NULL); + ctx.cur_block = block; + vector_push(ctx.cur_func->bblocks, block); + break; + } + case NT_STMT_BLOCK: { + gen_ir_from_ast(node->block_stmt.block); + break; + } + case NT_BLOCK: { + for (int i = 0; i < node->block.children.size; i ++) { + gen_ir_from_ast(node->block.children.data[i]); + } + break; + } + case NT_STMT_IF: + case NT_STMT_WHILE: + case NT_STMT_DOWHILE: + case NT_STMT_FOR: + gen_ir_jmp(node); + break; + case NT_DECL_VAR: { + ir_node_t* ir = new_ir_node(node->decl_val.name->syms.tok.val.str, IR_NODE_ALLOC); + emit_instr(NULL, ir); + // TODO Typing system + ir->type = &type_i32; + node->decl_val.data = ir; + if (node->decl_val.expr_stmt != NULL) { + gen_ir_from_ast(node->decl_val.expr_stmt); + } + break; + } + case NT_STMT_EXPR: { + gen_ir_expr(node->expr_stmt.expr_stmt); + break; + } + case NT_STMT_EMPTY: { + break; + } + default: + // TODO: 错误处理 + error("unknown node type"); + break; + } +} diff --git a/ccompiler/middleend/ir_ast.h b/ccompiler/middleend/ir_ast.h new file mode 100644 index 0000000..e69de29 diff --git a/ccompiler/middleend/ir_lib.c b/ccompiler/middleend/ir_lib.c new file mode 100644 index 0000000..a6b3614 --- /dev/null +++ b/ccompiler/middleend/ir_lib.c @@ -0,0 +1,122 @@ +#include "ir.h" + +// FIXME using stdlib.h +#include + +static int total_alloc = 0; +typedef union ir_alloc_item { + ir_node_t node; + ir_bblock_t bblock; + ir_func_t func; + ir_prog_t prog; +} ir_alloc_item_t; + +ir_alloc_item_t* alloc_item() { + return malloc(sizeof(ir_alloc_item_t)); +} + +void free_item(ir_alloc_item_t* item) { + return free(item); +} + +ir_node_t* new_ir_node(const char* name, ir_node_tag_t tag) { + ir_node_t* node = (ir_node_t*)alloc_item(); + node->name = name; + node->type = NULL; + node->tag = tag; + switch (tag) { + case IR_NODE_ALLOC: { + node->type = NULL; + break; + } + case IR_NODE_BRANCH: { + node->data.branch.cond = NULL; + node->data.branch.true_bblock = NULL; + node->data.branch.false_bblock = NULL; + break; + } + case IR_NODE_CALL: { + vector_init(node->data.call.args); + node->data.call.callee = NULL; + break; + } + case IR_NODE_CONST_INT: { + node->data.const_int.val = 0; + break; + } + case IR_NODE_JUMP: { + node->data.jump.target_bblock = NULL; + break; + } + case IR_NODE_LOAD: { + node->data.load.target = NULL; + break; + } + case IR_NODE_STORE: { + node->data.store.target = NULL; + node->data.store.value = NULL; + break; + } + case IR_NODE_OP: { + node->data.op.op = 0; + node->data.op.lhs = NULL; + node->data.op.rhs = NULL; + break; + } + case IR_NODE_RET: { + node->data.ret.ret_val = NULL; + break; + } + case IR_NODE_GET_PTR: { + } + default: { + exit(0); + } + } + vector_init(node->used_by); + return node; +} + +void dump_ir_node(ir_node_t* node) { + +} + +void free_irnode() { + +} + +ir_bblock_t* new_ir_bblock(const char* name) { + ir_bblock_t* block = (ir_bblock_t*)alloc_item(); + block->label = name; + vector_init(block->instrs); + return block; +} + +void free_irbblock() { + +} + +ir_func_t* new_ir_func(const char* name, ir_type_t* type) { + ir_func_t* func = (ir_func_t*)alloc_item(); + func->name = name; + func->type = type; + vector_init(func->params); + vector_init(func->bblocks); + return func; +} + +void free_irfunc() { + +} + +ir_prog_t* new_ir_prog() { + ir_prog_t* prog = (ir_prog_t*)alloc_item(); + vector_init(prog->global); + vector_init(prog->funcs); + vector_init(prog->extern_funcs); + return prog; +} + +void free_irprog() { + +} diff --git a/ccompiler/middleend/ir_lib.h b/ccompiler/middleend/ir_lib.h new file mode 100644 index 0000000..e969c1c --- /dev/null +++ b/ccompiler/middleend/ir_lib.h @@ -0,0 +1,9 @@ +#ifndef __IR_LIB_H__ +#define __IR_LIB_H__ + +#include "ir.h" +ir_node_t* new_ir_node(const char* name, ir_node_tag_t tag); +ir_bblock_t* new_ir_bblock(const char* name); +ir_func_t* new_ir_func(const char* name, ir_type_t* type); + +#endif \ No newline at end of file diff --git a/ccompiler/middleend/ir_type.c b/ccompiler/middleend/ir_type.c new file mode 100644 index 0000000..2453747 --- /dev/null +++ b/ccompiler/middleend/ir_type.c @@ -0,0 +1,12 @@ +#include "ir.h" + +ir_type_t type_i32 = { + .tag = IR_TYPE_INT32, +}; + +ir_node_t node_zero = { + .tag = IR_NODE_CONST_INT, + .data.const_int = { + .val = 0, + }, +}; diff --git a/ccompiler/middleend/ir_type.h b/ccompiler/middleend/ir_type.h new file mode 100644 index 0000000..b666a02 --- /dev/null +++ b/ccompiler/middleend/ir_type.h @@ -0,0 +1,8 @@ +#ifndef __IR_TYPE_H__ +#define __IR_TYPE_H__ + +#include "ir.h" +extern ir_type_t type_i32; +extern ir_node_t node_zero; + +#endif \ No newline at end of file