stable 重构文件结构

抽象出Machine Code
This commit is contained in:
ZZY
2025-04-01 23:27:25 +08:00
parent 74f43a1ab7
commit b57f21556a
76 changed files with 657 additions and 260 deletions

View File

@ -0,0 +1,33 @@
# 编译器设置
CC = gcc
AR = ar
CFLAGS = -g -Wall -I../..
IR_DIR = ./ir
# 源文件列表
SRCS = \
middleend.c \
$(IR_DIR)/ir.c \
$(IR_DIR)/ir_ast.c \
$(IR_DIR)/ir_lib.c \
$(IR_DIR)/ir_type.c
# 生成目标文件列表
OBJS = $(SRCS:.c=.o)
# 最终目标
TARGET = libmiddleend.a
all: $(TARGET)
$(TARGET): $(OBJS)
$(AR) rcs $@ $^
%.o: %.c
$(CC) $(CFLAGS) -c -o $@ $<
clean:
rm -f $(OBJS) $(TARGET)
.PHONY: all clean

View File

View File

@ -0,0 +1,152 @@
// ir_core.h
#ifndef IR_CORE_H
#define IR_CORE_H
#include <lib/utils/ds/vector.h>
// 错误码定义
typedef enum {
IR_EC_SUCCESS = 0, // 成功
IR_EC_MEMORY_ERROR, // 内存分配失败
IR_EC_TYPE_MISMATCH, // 类型不匹配
IR_EC_INVALID_OPERAND, // 无效操作数
IR_EC_DUPLICATE_SYMBOL, // 符号重定义
} ir_ecode_t;
typedef struct {
enum {
IR_TYPE_INT32,
IR_TYPE_PTR,
IR_TYPE_ARRAY,
IR_TYPE_FUNC,
IR_TYPE_VOID,
} tag;
union {
struct {
struct ir_type *base;
rt_size_t len;
} arr;
struct {
struct ir_type *ret;
struct ir_type **params;
rt_size_t param_cnt;
} func;
};
} ir_type_t;
typedef struct ir_node ir_node_t;
typedef struct ir_bblock {
const char *label;
vector_header(instrs, ir_node_t*);
// ir_arr_t used_by;
} ir_bblock_t; // basic block
typedef struct {
const char *name;
ir_type_t *type;
vector_header(params, ir_node_t*);
vector_header(bblocks, ir_bblock_t*);
} ir_func_t;
typedef struct {
vector_header(global, ir_node_t*);
vector_header(funcs, ir_func_t*);
vector_header(extern_funcs, ir_func_t*);
} ir_prog_t;
typedef enum ir_node_tag {
IR_NODE_NULL,
IR_NODE_CONST_INT,
IR_NODE_ALLOC,
IR_NODE_LOAD,
IR_NODE_STORE,
IR_NODE_GET_PTR,
IR_NODE_OP,
IR_NODE_BRANCH,
IR_NODE_JUMP,
IR_NODE_CALL,
IR_NODE_RET,
} ir_node_tag_t;
struct ir_node {
const ir_type_t* type;
const char* name;
vector_header(used_by, ir_node_t*);
ir_node_tag_t tag;
union {
struct {
int32_t val;
} const_int;
struct {
ir_node_t* target;
} load;
struct {
ir_node_t* target;
ir_node_t* value;
} store;
struct {
ir_node_t* src_addr;
ir_node_t* offset;
} get_ptr;
struct {
enum {
/// Not equal to.
IR_OP_NEQ,
/// Equal to.
IR_OP_EQ,
/// Greater than.
IR_OP_GT,
/// Less than.
IR_OP_LT,
/// Greater than or equal to.
IR_OP_GE,
/// Less than or equal to.
IR_OP_LE,
/// Addition.
IR_OP_ADD,
/// Subtraction.
IR_OP_SUB,
/// Multiplication.
IR_OP_MUL,
/// Division.
IR_OP_DIV,
/// Modulo.
IR_OP_MOD,
/// Bitwise AND.
IR_OP_AND,
/// Bitwise OR.
IR_OP_OR,
/// Bitwise XOR.
IR_OP_XOR,
/// Bitwise NOT.
IR_OP_NOT,
/// Shift left logical.
IR_OP_SHL,
/// Shift right logical.
IR_OP_SHR,
/// Shift right arithmetic.
IR_OP_SAR,
} op;
ir_node_t* lhs;
ir_node_t* rhs;
} op;
struct {
ir_node_t* cond;
ir_bblock_t* true_bblock;
ir_bblock_t* false_bblock;
} branch;
struct {
ir_bblock_t* target_bblock;
} jump;
struct {
ir_func_t* callee;
vector_header(args, ir_node_t*);
} call;
struct {
ir_node_t* ret_val;
} ret;
} data;
};
#endif // IR_CORE_H

View File

@ -0,0 +1,446 @@
#include "ir.h"
#include "ir_lib.h"
#include "ir_type.h"
#include "../../frontend/frontend.h"
#include "../../frontend/parser/ast.h"
// 上下文结构,记录生成过程中的状态
typedef struct {
ir_func_t* cur_func; // 当前处理的函数
ir_bblock_t* cur_block; // 当前基本块
} IRGenContext;
IRGenContext ctx;
ir_prog_t prog;
void _gen_ir_from_ast(ast_node_t* node);
static void emit_instr(ir_bblock_t* block, ir_node_t* node) {
if (block == NULL) block = ctx.cur_block;
vector_push(block->instrs, node);
// return &(vector_at(block->instrs, block->instrs.size - 1));
}
static ir_node_t* emit_br(ir_node_t* cond, ir_bblock_t* trueb, ir_bblock_t* falseb) {
ir_node_t* br = new_ir_node(NULL, IR_NODE_BRANCH);
emit_instr(NULL, br);
br->data.branch.cond = cond;
br->data.branch.true_bblock = trueb;
br->data.branch.false_bblock = falseb;
return br;
}
static ir_node_t* gen_ir_expr(ast_node_t* node);
static ir_node_t* gen_ir_term(ast_node_t* node) {
switch (node->type) {
case NT_TERM_VAL: {
ir_node_t* ir = new_ir_node(NULL, IR_NODE_CONST_INT);
ir->data.const_int.val = node->syms.tok.val.i;
return ir;
}
case NT_TERM_IDENT: {
ir_node_t* decl = node->syms.decl_node->decl_val.data;
return decl;
}
case NT_TERM_CALL: {
ir_node_t* call = new_ir_node(NULL, IR_NODE_CALL);
call->data.call.callee = node->call.func_decl->decl_func.def->func.data;
for (int i = 0; i < node->call.params->params.params.size; i++) {
ast_node_t* param = vector_at(node->call.params->params.params, i);
ir_node_t *tmp = gen_ir_expr(param);
vector_push(call->data.call.args, tmp);
}
emit_instr(NULL, call);
return call;
}
default: {
Panic("gen_ir_expr: unknown node type");
}
}
TODO();
return NULL;
}
static ir_node_t* gen_ir_expr(ast_node_t* node) {
// term node
switch (node->type) {
case NT_TERM_VAL:
case NT_TERM_IDENT:
case NT_TERM_CALL:
return gen_ir_term(node);
default:
break;
}
ir_node_t* lhs = gen_ir_expr(node->expr.left);
ir_node_t* rhs = node->expr.right ? gen_ir_expr(node->expr.right) : NULL;
if (node->type == NT_COMMA) {
return rhs;
}
ir_node_t* instr = NULL;
vector_push(lhs->used_by, instr);
if (rhs) { vector_push(rhs->used_by, instr); }
ir_node_t* ret;
#define BINOP(operand) do { \
instr = new_ir_node(NULL, IR_NODE_OP); \
instr->data.op.op = operand; \
instr->data.op.lhs = lhs; \
instr->data.op.rhs = rhs; \
ret = instr; \
} while (0)
switch (node->type) {
case NT_ADD: {
// (expr) + (expr)
BINOP(IR_OP_ADD); break;
}
case NT_SUB: {
// (expr) - (expr)
BINOP(IR_OP_SUB); break;
}
case NT_MUL: {
// (expr) * (expr)
BINOP(IR_OP_MUL); break;
}
case NT_DIV: {
// (expr) / (expr)
BINOP(IR_OP_DIV); break;
}
case NT_MOD: {
// (expr) % (expr)
BINOP(IR_OP_MOD); break;
}
case NT_AND: {
// (expr) & (expr)
BINOP(IR_OP_AND); break;
}
case NT_OR: {
// (expr) | (expr)
BINOP(IR_OP_OR); break;
}
case NT_XOR: {
// (expr) ^ (expr)
BINOP(IR_OP_XOR); break;
}
case NT_BIT_NOT: {
// ~ (expr)
// TODO
// BINOP(IR_OP_NOT);
break;
}
case NT_L_SH: {
// (expr) << (expr)
BINOP(IR_OP_SHL);
break;
}
case NT_R_SH: {
// (expr) >> (expr)
BINOP(IR_OP_SHR); // Shift right logical.
// TODO
// BINOP(IR_OP_SAR); // Shift right arithmetic.
break;
}
case NT_EQ: {
// (expr) == (expr)
BINOP(IR_OP_EQ); break;
}
case NT_NEQ: {
// (expr) != (expr)
BINOP(IR_OP_NEQ); break;
}
case NT_LE: {
// (expr) <= (expr)
BINOP(IR_OP_LE); break;
}
case NT_GE: {
// (expr) >= (expr)
BINOP(IR_OP_GE); break;
}
case NT_LT: {
// (expr) < (expr)
BINOP(IR_OP_LT); break;
}
case NT_GT: {
// (expr) > (expr)
BINOP(IR_OP_GE); break;
}
case NT_AND_AND:// (expr) && (expr)
LOG_ERROR("unimpliment");
break;
case NT_OR_OR:// (expr) || (expr)
LOG_ERROR("unimpliment");
break;
case NT_NOT: {
// ! (expr)
instr = new_ir_node(NULL, IR_NODE_OP);
instr->data.op.op = IR_OP_EQ,
instr->data.op.lhs = &node_zero,
instr->data.op.rhs = lhs,
ret = instr;
break;
}
case NT_ASSIGN: {
// (expr) = (expr)
instr = new_ir_node(NULL, IR_NODE_STORE);
instr->data.store.target = lhs;
instr->data.store.value = rhs;
ret = rhs;
break;
}
// case NT_COND: // (expr) ? (expr) : (expr)
default: {
// TODO self error msg
LOG_ERROR("Unsupported IR generation for AST node type %d", node->type);
break;
}
}
emit_instr(NULL, instr);
return ret;
}
static void gen_ir_func(ast_node_t* node, ir_func_t* func) {
Assert(node->type == NT_FUNC);
ir_bblock_t *entry = new_ir_bblock("entry");
vector_push(func->bblocks, entry);
vector_push(prog.funcs, func);
IRGenContext prev_ctx = ctx;
ctx.cur_func = func;
ctx.cur_block = entry;
ast_node_t* params = node->func.decl->decl_func.params;
for (int i = 0; i < params->params.params.size; i ++) {
ast_node_t* param = params->params.params.data[i];
ir_node_t* decl = new_ir_node(param->decl_val.name->syms.tok.val.str, IR_NODE_ALLOC);
emit_instr(entry, decl);
vector_push(func->params, decl);
// TODO Typing system
decl->type = &type_i32;
param->decl_val.data = decl;
}
_gen_ir_from_ast(node->func.body);
ctx = prev_ctx;
}
void gen_ir_jmp(ast_node_t* node) {
ir_bblock_t *bblocks[3];
for (int i = 0; i < sizeof(bblocks)/sizeof(bblocks[0]); i++) {
bblocks[i] = new_ir_bblock(NULL);
vector_push(ctx.cur_func->bblocks, bblocks[i]);
}
#define NEW_IR_JMP(name, block) do { \
name = new_ir_node(NULL, IR_NODE_JUMP); \
name->data.jump.target_bblock = block; \
} while (0)
switch (node->type) {
case NT_STMT_IF: {
ir_bblock_t* trueb = bblocks[0];
trueb->label = "if_true";
ir_bblock_t* falseb = bblocks[1];
falseb->label = "if_false";
ir_bblock_t* endb = bblocks[2];
endb->label = "if_end";
ir_node_t* jmp;
// cond
ir_node_t *cond = gen_ir_expr(node->if_stmt.cond);
emit_br(cond, trueb, falseb);
// true block
ctx.cur_block = trueb;
_gen_ir_from_ast(node->if_stmt.if_stmt);
// else block
if (node->if_stmt.else_stmt != NULL) {
ctx.cur_block = falseb;
_gen_ir_from_ast(node->if_stmt.else_stmt);
ir_node_t* jmp;
ctx.cur_block = endb;
NEW_IR_JMP(jmp, ctx.cur_block);
emit_instr(falseb, jmp);
} else {
ctx.cur_block = falseb;
}
NEW_IR_JMP(jmp, ctx.cur_block);
emit_instr(trueb, jmp);
break;
}
case NT_STMT_WHILE: {
ir_bblock_t* entryb = bblocks[0];
ir_bblock_t* bodyb = bblocks[1];
ir_bblock_t* endb = bblocks[2];
ir_node_t* entry;
NEW_IR_JMP(entry, entryb);
emit_instr(NULL, entry);
// Entry:
ctx.cur_block = entryb;
ir_node_t *cond = gen_ir_expr(node->while_stmt.cond);
emit_br(cond, bodyb, endb);
// Body:
ir_node_t* jmp;
ctx.cur_block = bodyb;
_gen_ir_from_ast(node->while_stmt.body);
NEW_IR_JMP(jmp, entryb);
emit_instr(NULL, jmp);
// End:
ctx.cur_block = endb;
break;
}
case NT_STMT_DOWHILE: {
ir_bblock_t* entryb = bblocks[0];
ir_bblock_t* bodyb = bblocks[1];
ir_bblock_t* endb = bblocks[2];
ir_node_t* entry;
NEW_IR_JMP(entry, bodyb);
emit_instr(NULL, entry);
// Body:
ctx.cur_block = bodyb;
_gen_ir_from_ast(node->do_while_stmt.body);
ir_node_t* jmp;
NEW_IR_JMP(jmp, entryb);
emit_instr(NULL, jmp);
// Entry:
ctx.cur_block = entryb;
ir_node_t *cond = gen_ir_expr(node->do_while_stmt.cond);
emit_br(cond, bodyb, endb);
// End:
ctx.cur_block = endb;
break;
}
case NT_STMT_FOR: {
ir_bblock_t* entryb = bblocks[0];
ir_bblock_t* bodyb = bblocks[1];
ir_bblock_t* endb = bblocks[2];
if (node->for_stmt.init) {
_gen_ir_from_ast(node->for_stmt.init);
}
ir_node_t* entry;
NEW_IR_JMP(entry, entryb);
emit_instr(NULL, entry);
// Entry:
ctx.cur_block = entryb;
if (node->for_stmt.cond) {
ir_node_t *cond = gen_ir_expr(node->for_stmt.cond);
emit_br(cond, bodyb, endb);
} else {
ir_node_t* jmp;
NEW_IR_JMP(jmp, bodyb);
}
// Body:
ctx.cur_block = bodyb;
_gen_ir_from_ast(node->for_stmt.body);
if (node->for_stmt.iter) {
gen_ir_expr(node->for_stmt.iter);
}
ir_node_t* jmp;
NEW_IR_JMP(jmp, entryb);
emit_instr(NULL, jmp);
// End:
ctx.cur_block = endb;
break;
}
default:
LOG_ERROR("ir jmp can't hit here");
}
}
ir_prog_t* gen_ir_from_ast(ast_node_t* root) {
Assert(root->type == NT_ROOT);
for (int i = 0; i < root->root.children.size; i ++) {
_gen_ir_from_ast(root->root.children.data[i]);
}
// _gen_ir_from_ast(root);
return &prog;
}
void _gen_ir_from_ast(ast_node_t* node) {
switch (node->type) {
case NT_DECL_FUNC: {
ir_func_t* func = new_ir_func(node->decl_func.name->syms.tok.val.str, &type_i32);
if (node->decl_func.def == NULL) {
ast_node_t* def = new_ast_node();
def->func.body = NULL;
def->func.decl = node;
node->decl_func.def = def;
vector_push(prog.extern_funcs, func);
}
node->decl_func.def->func.data = func;
break;
}
case NT_FUNC: {
gen_ir_func(node, node->func.data);
break;
}
case NT_STMT_RETURN: {
ir_node_t* ret = NULL;
if (node->return_stmt.expr_stmt != NULL) {
ret = gen_ir_expr(node->return_stmt.expr_stmt);
}
ir_node_t* ir = new_ir_node(NULL, IR_NODE_RET);
ir->data.ret.ret_val = ret;
emit_instr(NULL, ir);
ir_bblock_t* block = new_ir_bblock(NULL);
ctx.cur_block = block;
vector_push(ctx.cur_func->bblocks, block);
break;
}
case NT_STMT_BLOCK: {
_gen_ir_from_ast(node->block_stmt.block);
break;
}
case NT_BLOCK: {
for (int i = 0; i < node->block.children.size; i ++) {
_gen_ir_from_ast(node->block.children.data[i]);
}
break;
}
case NT_STMT_IF:
case NT_STMT_WHILE:
case NT_STMT_DOWHILE:
case NT_STMT_FOR:
gen_ir_jmp(node);
break;
case NT_DECL_VAR: {
ir_node_t* ir = new_ir_node(node->decl_val.name->syms.tok.val.str, IR_NODE_ALLOC);
emit_instr(NULL, ir);
// TODO Typing system
ir->type = &type_i32;
node->decl_val.data = ir;
if (node->decl_val.expr_stmt != NULL) {
_gen_ir_from_ast(node->decl_val.expr_stmt);
}
break;
}
case NT_STMT_EXPR: {
gen_ir_expr(node->expr_stmt.expr_stmt);
break;
}
case NT_STMT_EMPTY: {
break;
}
default:
// TODO: 错误处理
LOG_ERROR("unknown node type");
break;
}
}

View File

@ -0,0 +1,8 @@
#ifndef __IR_AST_H__
#define __IR_AST_H__
#include "ir.h"
typedef struct ast_node ast_node_t;
ir_prog_t* gen_ir_from_ast(ast_node_t* node);
#endif //

View File

@ -0,0 +1,76 @@
#include "ir.h"
#include "ir_lib.h"
#include "ir_type.h"
#include <stdio.h>
#include <assert.h>
typedef struct ir_dump {
FILE* fp;
} ir_dump_t;
void dump_ir_node(ir_node_t* node, ir_dump_t* dump) {
fprintf(dump->fp, "%%%p", node);
switch (node->tag) {
case IR_NODE_ALLOC: {
node->type = NULL;
// fprintf(dump->fp, "%p\n", );
break;
}
case IR_NODE_BRANCH: {
node->data.branch.cond = NULL;
node->data.branch.true_bblock = NULL;
node->data.branch.false_bblock = NULL;
break;
}
case IR_NODE_CALL: {
vector_init(node->data.call.args);
node->data.call.callee = NULL;
break;
}
case IR_NODE_CONST_INT: {
node->data.const_int.val = 0;
break;
}
case IR_NODE_JUMP: {
node->data.jump.target_bblock = NULL;
break;
}
case IR_NODE_LOAD: {
node->data.load.target = NULL;
break;
}
case IR_NODE_STORE: {
node->data.store.target = NULL;
node->data.store.value = NULL;
break;
}
case IR_NODE_OP: {
node->data.op.op = 0;
node->data.op.lhs = NULL;
node->data.op.rhs = NULL;
break;
}
case IR_NODE_RET: {
node->data.ret.ret_val = NULL;
break;
}
case IR_NODE_GET_PTR: {
}
default: {
assert(0);
}
}
}
void dump_ir_bblock(ir_bblock_t* block) {
}
void dump_ir_func(ir_func_t* func) {
}
void dump_ir_prog(ir_prog_t* prog) {
}

View File

@ -0,0 +1,118 @@
#include "ir.h"
// FIXME using stdlib.h
#include <stdlib.h>
static int total_alloc = 0;
typedef union ir_alloc_item {
ir_node_t node;
ir_bblock_t bblock;
ir_func_t func;
ir_prog_t prog;
} ir_alloc_item_t;
ir_alloc_item_t* alloc_item() {
return malloc(sizeof(ir_alloc_item_t));
}
void free_item(ir_alloc_item_t* item) {
return free(item);
}
ir_node_t* new_ir_node(const char* name, ir_node_tag_t tag) {
ir_node_t* node = (ir_node_t*)alloc_item();
node->name = name;
node->type = NULL;
node->tag = tag;
switch (tag) {
case IR_NODE_ALLOC: {
node->type = NULL;
break;
}
case IR_NODE_BRANCH: {
node->data.branch.cond = NULL;
node->data.branch.true_bblock = NULL;
node->data.branch.false_bblock = NULL;
break;
}
case IR_NODE_CALL: {
vector_init(node->data.call.args);
node->data.call.callee = NULL;
break;
}
case IR_NODE_CONST_INT: {
node->data.const_int.val = 0;
break;
}
case IR_NODE_JUMP: {
node->data.jump.target_bblock = NULL;
break;
}
case IR_NODE_LOAD: {
node->data.load.target = NULL;
break;
}
case IR_NODE_STORE: {
node->data.store.target = NULL;
node->data.store.value = NULL;
break;
}
case IR_NODE_OP: {
node->data.op.op = 0;
node->data.op.lhs = NULL;
node->data.op.rhs = NULL;
break;
}
case IR_NODE_RET: {
node->data.ret.ret_val = NULL;
break;
}
case IR_NODE_GET_PTR: {
}
default: {
exit(0);
}
}
vector_init(node->used_by);
return node;
}
void free_irnode() {
}
ir_bblock_t* new_ir_bblock(const char* name) {
ir_bblock_t* block = (ir_bblock_t*)alloc_item();
block->label = name;
vector_init(block->instrs);
return block;
}
void free_irbblock() {
}
ir_func_t* new_ir_func(const char* name, ir_type_t* type) {
ir_func_t* func = (ir_func_t*)alloc_item();
func->name = name;
func->type = type;
vector_init(func->params);
vector_init(func->bblocks);
return func;
}
void free_irfunc() {
}
ir_prog_t* new_ir_prog() {
ir_prog_t* prog = (ir_prog_t*)alloc_item();
vector_init(prog->global);
vector_init(prog->funcs);
vector_init(prog->extern_funcs);
return prog;
}
void free_irprog() {
}

View File

@ -0,0 +1,9 @@
#ifndef __IR_LIB_H__
#define __IR_LIB_H__
#include "ir.h"
ir_node_t* new_ir_node(const char* name, ir_node_tag_t tag);
ir_bblock_t* new_ir_bblock(const char* name);
ir_func_t* new_ir_func(const char* name, ir_type_t* type);
#endif

View File

@ -0,0 +1,12 @@
#include "ir.h"
ir_type_t type_i32 = {
.tag = IR_TYPE_INT32,
};
ir_node_t node_zero = {
.tag = IR_NODE_CONST_INT,
.data.const_int = {
.val = 0,
},
};

View File

@ -0,0 +1,8 @@
#ifndef __IR_TYPE_H__
#define __IR_TYPE_H__
#include "ir.h"
extern ir_type_t type_i32;
extern ir_node_t node_zero;
#endif

View File

@ -0,0 +1,5 @@
#include "middleend.h"
ir_prog_t* cc_middleend(ast_node_t* root, cc_midend_conf_t* conf) {
return gen_ir_from_ast(root);
}

View File

@ -0,0 +1,13 @@
#ifndef __SMCC_CC_MIDDLEEND_H__
#define __SMCC_CC_MIDDLEEND_H__
#include "ir/ir.h"
#include "ir/ir_ast.h"
typedef struct cc_midend_conf {
// cc_arch_t arch;
} cc_midend_conf_t;
// TODO add some feature to cc_middleend like optimization
ir_prog_t* cc_middleend(ast_node_t* root, cc_midend_conf_t* conf);
#endif // __SMCC_MIDDLEEND_H__

View File

View File

@ -0,0 +1,8 @@
#ifndef __REG_ALLOC_H__
#define __REG_ALLOC_H__
typedef struct {
} reg_alloc_t;
#endif