stable 重构文件结构

抽象出Machine Code
This commit is contained in:
ZZY
2025-04-01 23:27:25 +08:00
parent 74f43a1ab7
commit b57f21556a
76 changed files with 657 additions and 260 deletions

57
src/ccompiler/Makefile Normal file
View File

@ -0,0 +1,57 @@
# all: cc
# # run: ccompiler
# # ./ccompiler test.c flat.bin
# # simple_test:
# # make -C tests/simple
# cc: frontend middleend backend ccompiler.c test_main.c
# gcc -g ccompiler.c test_main.c -I../ -L./frontend -lfrontend -L./middleend -lmiddleend -L./backend -lbackend -L../lib -lcore -o cc
# frontend:
# make -C ./frontend
# middleend:
# make -C ./middleend
# backend:
# make -C ./backend
# clean:
# rm -f cc
# make -C ./frontend clean
# make -C ./middleend clean
# make -C ./backend clean
# 顶层Makefile修改
CC = gcc
AR = ar
CFLAGS = -g -Wall -I.. -I../..
MODULES = frontend middleend backend
FRONTEND_SUBDIRS = lexer parser parser/ast parser/symtab
MODULES += $(addprefix frontend/, $(FRONTEND_SUBDIRS))
MIDDLEEND_SUBDIRS = ir
MODULES += $(addprefix middleend/, $(MIDDLEEND_SUBDIRS))
BACKEND_SUBDIRS = riscv32
MODULES += $(addprefix backend/, $(BACKEND_SUBDIRS))
# 自动收集所有子模块源文件
EXCLUDE = test*.c
SRCS = $(filter-out $(EXCLUDE), $(wildcard $(addsuffix /*.c,$(MODULES))))
SRCS += ccompiler.c
OBJS = $(SRCS:.c=.o)
libcc.a: $(OBJS)
$(AR) rcs $@ $^
%.o: %.c
$(CC) $(CFLAGS) -c -o $@ $<
clean:
rm -f libcc.a $(OBJS)

View File

@ -0,0 +1,30 @@
# 编译器设置
CC = gcc
AR = ar
CFLAGS = -g -Wall -I../..
RISCV32_DIR = ./riscv32
# 源文件列表
SRCS = \
backend.c \
$(RISCV32_DIR)/riscv32.c
# 生成目标文件列表
OBJS = $(SRCS:.c=.o)
# 最终目标
TARGET = libbackend.a
all: $(TARGET)
$(TARGET): $(OBJS)
$(AR) rcs $@ $^
%.o: %.c
$(CC) $(CFLAGS) -c -o $@ $<
clean:
rm -f $(OBJS) $(TARGET)
.PHONY: all clean

View File

@ -0,0 +1,23 @@
#include "backend.h"
int gen_asm_from_ir(ir_prog_t* ir, cc_arch_t arch, asm_prog_t* out_asm) {
switch (arch) {
case CC_ARCH_RISCV32:
// TODO using maroc to choice
init_rv32_prog(&(out_asm->rv32), NULL);
gen_rv32_from_ir(ir, &(out_asm->rv32));
break;
case CC_ARCH_X86_32:
default:
Panic("Unsupported arch");
break;
}
return 0;
}
asm_prog_t* cc_backend(ir_prog_t* ir, cc_backend_conf_t* conf) {
// TODO
asm_prog_t* bin = (asm_prog_t*)salloc_alloc(sizeof(asm_prog_t));
gen_asm_from_ir(ir, conf->arch, bin);
return bin;
}

View File

@ -0,0 +1,30 @@
#ifndef __SMCC_CC_BACKEND_H__
#define __SMCC_CC_BACKEND_H__
// TODO Use Maroc to choice architecture
#ifndef __SMCC_CC_NO_RISCV32__
#include "riscv32/riscv32.h"
#endif
// #ifndef __SMCC_CC_NO_X86_32__
// #include "x86_32/x86_32.h"
// #endif
// TODO 统一 汇编器 接口
#include <src/assembler/assembler.h>
#include "../middleend/ir/ir.h"
typedef enum cc_arch {
CC_ARCH_RISCV32,
CC_ARCH_X86_32
} cc_arch_t;
typedef union asm_prog asm_prog_t;
int gen_asm_from_ir(ir_prog_t* ir, cc_arch_t arch, asm_prog_t* asm_prog);
typedef struct cc_backend_conf {
cc_arch_t arch;
} cc_backend_conf_t;
asm_prog_t* cc_backend(ir_prog_t* ir, cc_backend_conf_t* conf);
#endif

View File

@ -0,0 +1,41 @@
# 后端代码生成
## riscv32i
> 仿照ripes的syscall实现了rv32-vm
### syscall ecall 系统调用
```c
// ecall 系统调用函数实现
#define ECALL_PNT_INT(num) \
ADDI(REG_A0, REG_X0, num), \
ADDI(REG_A7, REG_X0, 0x1), \
ECALL(),
#define ECALL_PNT_STR(str) \
ADDI(REG_A0, REG_X0, str), \
ADDI(REG_A7, REG_X0, 0x4), \
ECALL(),
#define ECALL_EXIT(errno) \
ADDI(REG_A0, REG_X0, errno), \
ADDI(REG_A7, REG_X0, 10), \
ECALL(),
#define ECALL_SCAN_INT(int) \
ADDI(REG_A7, (1025 + 4)), \
ECALL(),
#define ECALL_SCAN_STR(str) \
ADDI(REG_A0, REG_X0, str), \
ADDI(REG_A7, REG_X0, (1025 + 5)), \
ECALL(),
// 函数声明
void ecall_pnt_int(int num);
void ecall_pnt_str(char *str);
void ecall_exit(int errno);
int ecall_scani();
void ecall_scans(char *str);
```

View File

@ -0,0 +1,302 @@
#include "riscv32.h"
#include <src/mcode/riscv32/riscv32_instr.h>
typedef struct {
ir_func_t* func;
int stack_offset;
int stack_base;
int func_idx;
int block_idx;
} gen_ctx_t;
static inline int stack_pos(ir_node_t* ptr, gen_ctx_t *ctx) {
// ir_func_t *func, int stack_base, int stack_offset
int offset = ctx->stack_base;
for (int i = 0; i < ctx->func->bblocks.size; i ++) {
ir_bblock_t* block = vector_at(ctx->func->bblocks, i);
for (int i = 0; i < block->instrs.size; i++) {
if (vector_at(block->instrs, i) == ptr) {
offset += i * 4;
Assert(offset >= 0 && offset < ctx->stack_offset);
return offset;
}
}
offset += block->instrs.size * 4;
}
Panic("stack pos got error");
return 0;
}
static int system_func(const char* name) {
static struct {
const char* name;
int ecall_num;
} defined_func[] = {
{"ecall_pnt_int", 1},
{"ecall_pnt_char", 11},
{"ecall_scan_int", 1025 + 4},
};
for (int i = 0; i < sizeof(defined_func)/sizeof(defined_func[0]); i++) {
if (rt_strcmp(name, defined_func[i].name) == 0) {
return defined_func[i].ecall_num;
}
}
return -1;
}
static int get_node_val(mcode_rv32_t* out_asm, gen_ctx_t* ctx, ir_node_t* ptr, int reg) {
int len = 0;
switch (ptr->tag) {
case IR_NODE_CONST_INT: {
// TODO
rv32_li(out_asm, reg, ptr->data.const_int.val);
// emit_rv32_instr(out_asm, RV_ADDI, reg, reg, 0, ptr->data.const_int.val);
break;
}
default: {
int offset = stack_pos(ptr, ctx);
rv32_lw(out_asm, reg, REG_SP, offset);
break;
}
}
return len;
}
static int gen_instr(rv32_prog_t* _out_asm, gen_ctx_t* ctx, ir_node_t* instr) {
mcode_rv32_t* out_asm = &_out_asm->mcode;
int idx = 0;
int offset;
char buf[1024];
symasm_entry_t label;
switch (instr->tag) {
case IR_NODE_ALLOC: {
// TODO
break;
}
case IR_NODE_LOAD: {
offset = stack_pos(instr->data.load.target, ctx);
// t0 = M[sp + offset]
rv32_lw(out_asm, REG_T0, REG_SP, offset);
break;
}
case IR_NODE_STORE: {
idx += get_node_val(out_asm, ctx, instr->data.store.value, REG_T0);
offset = stack_pos(instr->data.store.target, ctx);
// M[sp + offset] = t0
rv32_sw(out_asm, REG_T0, REG_SP, offset);
break;
}
case IR_NODE_RET: {
// A0 = S0
if (instr->data.ret.ret_val != NULL) {
idx += get_node_val(out_asm, ctx, instr->data.ret.ret_val, REG_A0);
}
// ra = M[sp + 0]
rv32_lw(out_asm, REG_RA, REG_SP, 0);
// sp = sp + stack_offset
rv32_addi(out_asm, REG_SP, REG_SP, ctx->stack_offset);
// ret == JALR(REG_X0, REG_RA, 0)
rv32_ret(out_asm);
break;
}
case IR_NODE_OP: {
idx += get_node_val(out_asm, ctx, instr->data.op.lhs, REG_T1);
idx += get_node_val(out_asm, ctx, instr->data.op.rhs, REG_T2);
rv32_instr_t _instr = {
.rd = REG_T0,
.rs1 = REG_T1,
.rs2 = REG_T2,
.imm = 0
};
#define GEN_BIN_OP(type) _instr.instr_type = type, \
emit_rv32_instr(out_asm, &_instr, EMIT_PUSH_BACK, NULL)
switch (instr->data.op.op) {
case IR_OP_ADD:
GEN_BIN_OP(RV_ADD);
break;
case IR_OP_SUB:
GEN_BIN_OP(RV_SUB);
break;
case IR_OP_MUL:
GEN_BIN_OP(RV_MUL);
break;
case IR_OP_DIV:
GEN_BIN_OP(RV_DIV);
break;
case IR_OP_MOD:
GEN_BIN_OP(RV_REM);
break;
case IR_OP_EQ:
GEN_BIN_OP(RV_XOR);
rv32_seqz(out_asm, REG_T0, REG_T0);
break;
case IR_OP_GE:
GEN_BIN_OP(RV_SLT);
rv32_seqz(out_asm, REG_T0, REG_T0);
break;
case IR_OP_GT:
// SGT(rd, rs1, rs2) SLT(rd, rs2, rs1)
// GENCODE(SGT(REG_T0, REG_T1, REG_T2));
rv32_slt(out_asm, REG_T0, REG_T2, REG_T1);
break;
case IR_OP_LE:
// GENCODE(SGT(REG_T0, REG_T1, REG_T2));
rv32_slt(out_asm, REG_T0, REG_T2, REG_T1);
rv32_seqz(out_asm, REG_T0, REG_T0);
break;
case IR_OP_LT:
rv32_slt(out_asm, REG_T0, REG_T1, REG_T2);
break;
case IR_OP_NEQ:
GEN_BIN_OP(RV_XOR);
break;
default:
LOG_ERROR("ERROR gen_instr op in riscv");
break;
}
offset = stack_pos(instr, ctx);
rv32_sw(out_asm, REG_T0, REG_SP, offset);
break;
}
case IR_NODE_BRANCH: {
get_node_val(out_asm, ctx, instr->data.branch.cond, REG_T0);
rt.snprintf(buf, sizeof(buf), "L%s%p", instr->data.branch.true_bblock->label, instr->data.branch.true_bblock);
label.name = strpool_intern(_out_asm->strpool, buf);
label.attr = LOCAL;
rv32_bne_l(out_asm, REG_T0, REG_X0, &label);
rt.snprintf(buf, sizeof(buf), "L%s%p", instr->data.branch.false_bblock->label, instr->data.branch.false_bblock);
label.name = strpool_intern(_out_asm->strpool, buf);
label.attr = LOCAL;
rv32_jal_l(out_asm, REG_X0, &label);
break;
}
case IR_NODE_JUMP: {
// TODO
rt.snprintf(buf, sizeof(buf), "L%s%p", instr->data.jump.target_bblock->label, instr->data.jump.target_bblock);
label.name = strpool_intern(_out_asm->strpool, buf);
label.attr = LOCAL;
rv32_jal_l(out_asm, REG_X0, &label);
break;
}
case IR_NODE_CALL: {
if (instr->data.call.args.size > 8) {
LOG_ERROR("can't add so much params");
}
int param_regs[8] = {
REG_A0, REG_A1, REG_A2, REG_A3,
REG_A4, REG_A5, REG_A6, REG_A7
};
for (int i = 0; i < instr->data.call.args.size; i++) {
ir_node_t* param = vector_at(instr->data.call.args, i);
idx += get_node_val(out_asm, ctx, param, param_regs[i]);
}
int system_func_idx = system_func(instr->data.call.callee->name);
if (system_func_idx != -1) {
rv32_li(out_asm, REG_A7, system_func_idx);
rv32_ecall(out_asm);
goto CALL_END;
}
/*
// GENCODES(CALL(0));
// AUIPC(REG_X1, REG_X0), \
// JALR(REG_X1, REG_X1, offset)
*/
// TODO CALL
label.name = strpool_intern(_out_asm->strpool, instr->data.call.callee->name);
label.attr = GLOBAL;
rv32_call_l(out_asm, &label);
CALL_END:
offset = stack_pos(instr, ctx);
rv32_sw(out_asm, REG_A0, REG_SP, offset);
break;
}
default:
LOG_ERROR("ERROR gen_instr in riscv");
}
return idx;
}
static int gen_block(rv32_prog_t* out_asm, gen_ctx_t* ctx, ir_bblock_t* block) {
symasm_entry_t label;
char buf[1024];
rt.snprintf(buf, sizeof(buf), "L%s%p", block->label, block);
label.name = strpool_intern(out_asm->strpool, buf);
label.attr = LOCAL;
symtab_asm_put(&out_asm->symtab, &label, out_asm->mcode.code.size);
for (int i = 0; i < block->instrs.size; i ++) {
gen_instr(out_asm, ctx, vector_at(block->instrs, i));
}
return 0;
}
static int gen_func(rv32_prog_t* out_asm, ir_func_t* func) {
gen_ctx_t ctx;
symasm_entry_t label = {
.name = strpool_intern(out_asm->strpool, func->name),
.attr = GLOBAL,
};
symtab_asm_put(&out_asm->symtab, &label, out_asm->mcode.code.size);
int stack_base = 4;
int stack_offset = stack_base;
for (int i = 0; i < func->bblocks.size; i++) {
// TODO every instr push ret val to stack
stack_offset += 4 * (*vector_at(func->bblocks, i)).instrs.size;
}
ctx.func = func;
ctx.stack_base = stack_base;
ctx.stack_offset = stack_offset;
ctx.func_idx = 0;
ctx.block_idx = 0;
// TODO Alignment by 16
// sp = sp - stack_offset;
rv32_addi(&out_asm->mcode, REG_SP, REG_SP, -stack_offset);
// M[sp] = ra;
rv32_sw(&out_asm->mcode, REG_RA, REG_SP, 0);
int param_regs[8] = {
REG_A0, REG_A1, REG_A2, REG_A3,
REG_A4, REG_A5, REG_A6, REG_A7
};
if (func->params.size > 8) {
LOG_ERROR("can't add so much params");
}
for (int i = 0; i < func->params.size; i++) {
int offset = stack_pos(vector_at(func->params, i), &ctx);
// M[sp + offset] = param[idx];
rv32_sw(&out_asm->mcode, param_regs[i], REG_SP, offset);
}
for(int i = 0; i < func->bblocks.size; i ++) {
gen_block(out_asm, &ctx ,vector_at(func->bblocks, i));
}
return 0;
}
int gen_rv32_from_ir(ir_prog_t* ir, rv32_prog_t* out_asm) {
init_rv32_prog(out_asm, NULL);
for(int i = 0; i < ir->funcs.size; i ++) {
gen_func(out_asm, vector_at(ir->funcs, i));
}
return 0;
// // Got Main pos;
// for (int i = 0; i < prog->funcs.size; i++) {
// if (strcmp(vector_at(prog->funcs, i)->name, "main") == 0) {
// return jmp_cache[i];
// }
// }
// LOG_ERROR("main not found");
}

View File

@ -0,0 +1,9 @@
#ifndef __SMCC_CC_RISCV32_H__
#define __SMCC_CC_RISCV32_H__
#include <src/assembler/assembler.h>
#include "../../middleend/ir/ir.h"
int gen_rv32_from_ir(ir_prog_t* ir, rv32_prog_t* out_asm);
#endif

11
src/ccompiler/ccompiler.c Normal file
View File

@ -0,0 +1,11 @@
#include "ccompiler.h"
asm_prog_t* smcc_cc(smcc_cc_t* cc) {
ast_node_t* root = cc_frontend(cc->file, cc->stream, cc->sread);
// TODO add config
ir_prog_t* prog = cc_middleend(root, &cc->midend_conf);
// TODO add config
asm_prog_t* asm_prog = cc_backend(prog, &cc->backend_conf);
}

21
src/ccompiler/ccompiler.h Normal file
View File

@ -0,0 +1,21 @@
#ifndef __SMCC_CC_H__
#define __SMCC_CC_H__
// TODO
#include "frontend/frontend.h"
#include "middleend/middleend.h"
#include "backend/backend.h"
typedef struct smcc_cc {
const char *file;
void *stream;
sread_fn sread;
cc_midend_conf_t midend_conf;
cc_backend_conf_t backend_conf;
} smcc_cc_t;
typedef union asm_prog asm_prog_t;
asm_prog_t* smcc_cc(smcc_cc_t* cc);
#endif

View File

@ -0,0 +1,43 @@
# 编译器设置
CC = gcc
AR = ar
CFLAGS = -g -Wall -I../..
# 源文件路径
LEXER_DIR = ./lexer
PARSER_DIR = ./parser
AST_DIR = ./parser/ast
# 源文件列表
SRCS = \
frontend.c \
$(LEXER_DIR)/lexer.c \
$(LEXER_DIR)/token.c \
$(PARSER_DIR)/parser.c \
$(PARSER_DIR)/ast.c \
$(AST_DIR)/block.c \
$(AST_DIR)/decl.c \
$(AST_DIR)/expr.c \
$(AST_DIR)/func.c \
$(AST_DIR)/program.c \
$(AST_DIR)/stmt.c \
$(AST_DIR)/term.c \
# 生成目标文件列表
OBJS = $(SRCS:.c=.o)
# 最终目标
TARGET = libfrontend.a
all: $(TARGET)
$(TARGET): $(OBJS)
$(AR) rcs $@ $^
%.o: %.c
$(CC) $(CFLAGS) -c -o $@ $<
clean:
rm -f $(OBJS) $(TARGET)
.PHONY: all clean

View File

@ -0,0 +1,23 @@
#include <lib/core.h>
#include "frontend.h"
ast_node_t* cc_frontend(const char* file, void* stream, sread_fn sread) {
init_lib_core();
strpool_t strpool;
init_strpool(&strpool);
cc_lexer_t lexer;
init_lexer(&lexer, file, stream, sread, &strpool);
symtab_t symtab;
init_symtab(&symtab);
// TODO global scope
symtab_enter_scope(&symtab);
parser_t parser;
init_parser(&parser, &lexer, &symtab);
parse_prog(&parser);
// TODO Free the resourse
return parser.root;
}

View File

@ -0,0 +1,9 @@
#ifndef __SMCC_CC_FRONTEND_H__
#define __SMCC_CC_FRONTEND_H__
#include "lexer/lexer.h"
#include "parser/parser.h"
typedef int (*sread_fn)(void *dst_buf, int elem_size, int count, void *stream);
ast_node_t* cc_frontend(const char* file, void* stream, sread_fn sread);
#endif

View File

@ -0,0 +1,5 @@
# 词法分析
参考LCC的此分析部分
主要使用 LL(n) 硬编码查找token

View File

@ -0,0 +1,525 @@
/**
* 仿照LCCompiler的词法分析部分
*
* 如下为LCC的README in 2025.2
This hierarchy is the distribution for lcc version 4.2.
lcc version 3.x is described in the book "A Retargetable C Compiler:
Design and Implementation" (Addison-Wesley, 1995, ISBN 0-8053-1670-1).
There are significant differences between 3.x and 4.x, most notably in
the intermediate code. For details, see
https://drh.github.io/lcc/documents/interface4.pdf.
VERSION 4.2 IS INCOMPATIBLE WITH EARLIER VERSIONS OF LCC. DO NOT
UNLOAD THIS DISTRIBUTION ON TOP OF A 3.X DISTRIBUTION.
LCC is a C89 ("ANSI C") compiler designed to be highly retargetable.
LOG describes the changes since the last release.
CPYRIGHT describes the conditions under you can use, copy, modify, and
distribute lcc or works derived from lcc.
doc/install.html is an HTML file that gives a complete description of
the distribution and installation instructions.
Chris Fraser / cwf@aya.yale.edu
David Hanson / drh@drhanson.net
*/
#include <lib/core.h>
#include "lexer_log.h"
#include "token.h"
#include "lexer.h"
static const struct {
const char* name;
enum CSTD_KEYWORD std_type;
cc_tktype_t tok;
} keywords[] = {
#define X(name, std_type, tok, ...) { #name, std_type, tok },
KEYWORD_TABLE
#undef X
};
// by using binary search to find the keyword
static inline int keyword_cmp(const char* name, int len) {
int low = 0;
int high = sizeof(keywords) / sizeof(keywords[0]) - 1;
while (low <= high) {
int mid = (low + high) / 2;
const char *key = keywords[mid].name;
int cmp = 0;
// 自定义字符串比较逻辑
for (int i = 0; i < len; i++) {
if (name[i] != key[i]) {
cmp = (unsigned char)name[i] - (unsigned char)key[i];
break;
}
if (name[i] == '\0') break; // 遇到终止符提前结束
}
if (cmp == 0) {
// 完全匹配检查(长度相同)
if (key[len] == '\0') return mid;
cmp = -1; // 当前关键词比输入长
}
if (cmp < 0) {
high = mid - 1;
} else {
low = mid + 1;
}
}
return -1; // Not a keyword.
}
void init_lexer(cc_lexer_t* lexer, const char* file_name, void* stream, lexer_sread_fn sread, strpool_t* strpool) {
lexer->strpool = strpool;
lexer->cur_ptr = lexer->end_ptr = (char*)&(lexer->buffer);
lexer->loc.fname = strpool_intern(lexer->strpool, file_name);
lexer->loc.line = 1;
lexer->loc.col = 1;
lexer->stream = stream;
lexer->sread = sread;
rt_memset(lexer->buffer, 0, sizeof(lexer->buffer));
}
static void flush_buffer(cc_lexer_t* lexer) {
int num = lexer->end_ptr - lexer->cur_ptr;
for (int i = 0; i < num; i++) {
lexer->buffer[i] = lexer->cur_ptr[i];
}
lexer->cur_ptr = lexer->buffer;
int read_size = LEXER_BUFFER_SIZE - num;
// TODO rt_size_t to int maybe lose precision
int got_size = lexer->sread(lexer->buffer + num, 1, read_size, lexer->stream);
if (got_size < 0) {
LEX_ERROR("lexer read error");
} else if (got_size < read_size) {
lexer->end_ptr += got_size;
lexer->end_ptr[0] = '\0'; // EOF
lexer->end_ptr++;
} else if (got_size == read_size) {
lexer->end_ptr += got_size;
} else {
LEX_ERROR("lexer read error imposible got_size > read_size maybe overflow?");
}
}
static void goto_newline(cc_lexer_t* lexer) {
do {
if (lexer->cur_ptr == lexer->end_ptr) {
flush_buffer(lexer);
lexer->cur_ptr--;
}
lexer->cur_ptr++;
} while (*lexer->cur_ptr != '\n' && *lexer->cur_ptr != '\0');
}
static void goto_block_comment(cc_lexer_t* lexer) {
while (1) {
if (lexer->end_ptr - lexer->cur_ptr < 2) {
flush_buffer(lexer);
}
if (lexer->cur_ptr[0] == '\0') {
break;
} else if (lexer->cur_ptr[0] == '*' && lexer->cur_ptr[1] == '/') {
lexer->cur_ptr += 2;
break;
} else {
if (lexer->cur_ptr[0] == '\n') lexer->loc.line++;
lexer->cur_ptr++;
}
}
}
// TODO escape character not enough
static char got_slash(char* peek) {
switch (*peek) {
case '\\': return '\\';
case '\'': return '\'';
case '\"': return '\"';
case '\?': return '\?';
case '0': return '\0';
case 'b': return '\b';
case 'f': return '\f';
case 'n': return '\n';
case 'r': return '\r';
case 't': return '\t';
case 'v': return '\v';
default: break;
}
LEX_ERROR("Unknown escape character");
return -1;
}
static void parse_char_literal(cc_lexer_t* lexer, tok_t* token) {
char val = 0;
char* peek = lexer->cur_ptr + 1;
if (*peek == '\\') {
peek++;
val = got_slash(peek);
peek++;
} else {
val = *peek++;
}
if (*peek++ != '\'') LEX_ERROR("Unclosed character literal");
lexer->cur_ptr = peek;
token->val.ch = val;
}
static void parse_string_literal(cc_lexer_t* lexer, tok_t* token) {
char* peek = lexer->cur_ptr + 1;
// TODO string literal size check
static char dest[LEXER_MAX_TOKEN_SIZE + 1];
int len = 0;
while (*peek != '"') {
if (peek >= lexer->end_ptr) flush_buffer(lexer);
if (*peek == '\\') { // 处理转义
peek++;
*peek = got_slash(peek);
}
if (len >= LEXER_MAX_TOKEN_SIZE) LEX_ERROR("String too long");
dest[len++] = *peek++;
}
dest[len] = '\0';
lexer->cur_ptr = peek + 1; // 1 is `"`
lexer->loc.len = len + 2; // 2 is `"` `"`
token->val.str = strpool_intern(lexer->strpool, dest);
}
// FIXME it write by AI maybe error
static void parse_number(cc_lexer_t* lexer, tok_t* token) {
char* peek = lexer->cur_ptr;
int base = 10;
int is_float = 0;
long long int_val = 0;
double float_val = 0.0;
double fraction = 1.0;
// 判断进制
if (*peek == '0') {
peek++;
switch (*peek) {
case 'x':
case 'X':
base = 16;
default:
base = 8;
}
}
// 解析整数部分
while (1) {
int digit = -1;
if (*peek >= '0' && *peek <= '9') {
digit = *peek - '0';
} else if (base == 16) {
if (*peek >= 'a' && *peek <= 'f') digit = *peek - 'a' + 10;
else if (*peek >= 'A' && *peek <= 'F') digit = *peek - 'A' + 10;
}
if (digit < 0 || digit >= base) break;
if (!is_float) {
int_val = int_val * base + digit;
} else {
float_val = float_val * base + digit;
fraction *= base;
}
peek++;
}
// 解析浮点数
if (*peek == '.' && base == 10) {
is_float = 1;
float_val = int_val;
peek++;
while (*peek >= '0' && *peek <= '9') {
float_val = float_val * 10.0 + (*peek - '0');
fraction *= 10.0;
peek++;
}
float_val /= fraction;
}
// 解析科学计数法
if ((*peek == 'e' || *peek == 'E') && base == 10) {
is_float = 1;
peek++;
// int exp_sign = 1;
int exponent = 0;
if (*peek == '+') peek++;
else if (*peek == '-') {
// exp_sign = -1;
peek++;
}
while (*peek >= '0' && *peek <= '9') {
exponent = exponent * 10 + (*peek - '0');
peek++;
}
// float_val *= pow(10.0, exp_sign * exponent);
}
// 存储结果
// TODO
lexer->loc.len = peek - lexer->cur_ptr;
lexer->cur_ptr = peek;
if (is_float) {
token->val.f32 = float_val;
token->sub_type = TOKEN_FLOAT_LITERAL;
} else {
token->val.i = int_val;
token->sub_type = TOKEN_INT_LITERAL;
}
}
#define GOT_ONE_TOKEN_BUF_SIZE 64
// /zh/c/language/operator_arithmetic.html
void get_token(cc_lexer_t* lexer, tok_t* token) {
// 需要保证缓冲区始终可读
if (lexer->end_ptr - lexer->cur_ptr < GOT_ONE_TOKEN_BUF_SIZE) {
flush_buffer(lexer);
}
register char* peek = lexer->cur_ptr;
cc_tktype_t tk_type = TOKEN_INIT;
ctype_t literal = { 0 };
// once step
switch (*peek++) {
case '=':
switch (*peek++) {
case '=': tk_type = TOKEN_EQ; break;
default: peek--, tk_type = TOKEN_ASSIGN; break;
} break;
case '+':
switch (*peek++) {
case '+': tk_type = TOKEN_ADD_ADD; break;
case '=': tk_type = TOKEN_ASSIGN_ADD; break;
default: peek--, tk_type = TOKEN_ADD; break;
} break;
case '-':
switch (*peek++) {
case '-': tk_type = TOKEN_SUB_SUB; break;
case '=': tk_type = TOKEN_ASSIGN_SUB; break;
case '>': tk_type = TOKEN_DEREF; break;
default: peek--, tk_type = TOKEN_SUB; break;
} break;
case '*':
switch (*peek++) {
case '=': tk_type = TOKEN_ASSIGN_MUL; break;
default: peek--, tk_type = TOKEN_MUL; break;
} break;
case '/':
switch (*peek++) {
case '=': tk_type = TOKEN_ASSIGN_DIV; break;
case '/': {
goto_newline(lexer);
tk_type = TOKEN_LINE_COMMENT;
goto END;
}
case '*': {
lexer->cur_ptr = peek;
goto_block_comment(lexer);
tk_type = TOKEN_BLOCK_COMMENT;
goto END;
}
default: peek--, tk_type = TOKEN_DIV; break;
} break;
case '%':
switch (*peek++) {
case '=': tk_type = TOKEN_ASSIGN_MOD; break;
default: peek--, tk_type = TOKEN_MOD; break;
} break;
case '&':
switch (*peek++) {
case '&': tk_type = TOKEN_AND_AND; break;
case '=': tk_type = TOKEN_ASSIGN_AND; break;
default: peek--, tk_type = TOKEN_AND; break;
} break;
case '|':
switch (*peek++) {
case '|': tk_type = TOKEN_OR_OR; break;
case '=': tk_type = TOKEN_ASSIGN_OR; break;
default: peek--, tk_type = TOKEN_OR; break;
} break;
case '^':
switch (*peek++) {
case '=': tk_type = TOKEN_ASSIGN_XOR; break;
default: peek--, tk_type = TOKEN_XOR; break;
} break;
case '<':
switch (*peek++) {
case '=': tk_type = TOKEN_LE; break;
case '<': tk_type = (*peek == '=') ? (peek++, TOKEN_ASSIGN_L_SH) : TOKEN_L_SH; break;
default: peek--, tk_type = TOKEN_LT; break;
} break;
case '>':
switch (*peek++) {
case '=': tk_type = TOKEN_GE; break;
case '>': tk_type = (*peek == '=') ? (peek++, TOKEN_ASSIGN_R_SH) : TOKEN_R_SH; break;
default: peek--, tk_type = TOKEN_GT; break;
} break;
case '~':
tk_type = TOKEN_BIT_NOT; break;
case '!':
switch (*peek++) {
case '=': tk_type = TOKEN_NEQ; break;
default: peek--, tk_type = TOKEN_NOT; break;
} break;
case '[':
tk_type = TOKEN_L_BRACKET; break;
case ']':
tk_type = TOKEN_R_BRACKET; break;
case '(':
tk_type = TOKEN_L_PAREN; break;
case ')':
tk_type = TOKEN_R_PAREN; break;
case '{':
tk_type = TOKEN_L_BRACE; break;
case '}':
tk_type = TOKEN_R_BRACE; break;
case ';':
tk_type = TOKEN_SEMICOLON; break;
case ',':
tk_type = TOKEN_COMMA; break;
case ':':
tk_type = TOKEN_COLON; break;
case '.':
if (peek[0] == '.' && peek[1] == '.') {
peek += 2;
tk_type = TOKEN_ELLIPSIS;
} else {
tk_type = TOKEN_DOT;
}
break;
case '?':
tk_type = TOKEN_COND; break;
case '\v': case '\r': case '\f':
case ' ': case '\t':
tk_type = TOKEN_BLANK; break;
case '\n':
// you need to flush a newline or blank
lexer->loc.line += 1;
lexer->loc.col = -1;
lexer->loc.len = 1;
tk_type = TOKEN_BLANK;
break;
case '#':
// TODO make line or file comment to change
LEX_WARN("Maroc does not support in lexer rather in preprocessor, it will be ignored");
goto_newline(lexer);
tk_type = TOKEN_BLANK;
goto END;
case '\0':
// EOF
tk_type = TOKEN_EOF;
goto END;
case '\'':
parse_char_literal(lexer, token);
literal = token->val;
tk_type = TOKEN_CHAR_LITERAL;
goto END; break;
case '"':
parse_string_literal(lexer, token);
literal = token->val;
tk_type = TOKEN_STRING_LITERAL;
goto END; break;
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
parse_number(lexer, token);
// TODO Make it easy
literal = token->val;
tk_type = token->sub_type;
goto END; break;
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':case 'Y': case 'Z':
case '_':
// TOKEN_IDENT
if ((*peek == 'L' && *peek == '\'') || (*peek == 'L' && *peek == '"')) {
LEX_ERROR("unsupport wide-character char literal by `L` format");
}
while (1) {
if (peek == lexer->end_ptr) {
LEX_ERROR("unsupport outof 64 length identifier");
}
if ((*peek >= 'a' && *peek <= 'z') || (*peek >= 'A' && *peek <= 'Z') ||
(*peek == '_') || (*peek >= '0' && *peek <= '9')) {
peek++;
continue;
}
break;
}
int strlen = peek - lexer->cur_ptr;
int res = keyword_cmp((const char*)lexer->cur_ptr, strlen);
if (res == -1) {
char prev = lexer->cur_ptr[strlen];
lexer->cur_ptr[strlen] = '\0';
literal.str = strpool_intern(lexer->strpool, lexer->cur_ptr);
lexer->cur_ptr[strlen] = prev;
tk_type = TOKEN_IDENT; break;
} else {
tk_type = keywords[res].tok; break;
}
default:
LEX_ERROR("unsupport char in sourse code `%c`", *(lexer->cur_ptr));
break;
}
lexer->loc.len = peek - lexer->cur_ptr;
lexer->cur_ptr = peek;
END:
lexer->loc.col += lexer->loc.len;
lexer->loc.len = 0;
token->val = literal;
token->sub_type = tk_type;
token->loc = lexer->loc;
static const tok_basic_type_t tok_type_map[] = {
// 普通token使用#str
#define X(str, basic, tok) [tok] = basic,
TOKEN_TABLE
#undef X
// 关键字使用#name
#define X(name, std, tok) [tok] = TK_BASIC_KEYWORD,
KEYWORD_TABLE
#undef X
};
token->type = tok_type_map[tk_type];
LEX_DEBUG("get token `%s` in %s:%d:%d", get_tok_name(tk_type),
token->loc.fname, token->loc.line, token->loc.col);
}
// get_token maybe got invalid (with parser)
void get_valid_token(cc_lexer_t* lexer, tok_t* token) {
tok_basic_type_t type;
do {
get_token(lexer, token);
type = token->type;
Assert(type != TK_BASIC_INVALID);
} while (type == TK_BASIC_WHITESPACE || type == TK_BASIC_COMMENT);
}

View File

@ -0,0 +1,37 @@
#ifndef __SMCC_CC_LEXER_H__
#define __SMCC_CC_LEXER_H__
#include <lib/core.h>
#include "token.h"
#ifndef LEXER_MAX_TOKEN_SIZE
#define LEXER_MAX_TOKEN_SIZE 63
#endif
#ifndef LEXER_BUFFER_SIZE
#define LEXER_BUFFER_SIZE 4095
#endif
typedef int (*lexer_sread_fn)(void *dst_buf, int elem_size, int count, void *stream);
typedef struct cc_lexer {
loc_t loc;
char* cur_ptr; // 当前扫描的字符,但是还没有开始扫描
char* end_ptr; // 缓冲区最后一个字符的下一个位置
char buffer[LEXER_BUFFER_SIZE+1];
lexer_sread_fn sread;
void* stream;
strpool_t* strpool;
} cc_lexer_t;
void init_lexer(cc_lexer_t* lexer, const char* file_name, void* stream,
lexer_sread_fn sread, strpool_t* strpool);
// pure token getter it will included empty token like TOKEN_BLANK
void get_token(cc_lexer_t* lexer, tok_t* token);
// get_token maybe got invalid (with parser as TOKEN_BLANK)
void get_valid_token(cc_lexer_t* lexer, tok_t* token);
#endif

View File

@ -0,0 +1,46 @@
#ifndef __SMCC_LEXER_LOG_H__
#define __SMCC_LEXER_LOG_H__
#include <lib/rt/rt.h>
#ifndef LEX_LOG_LEVEL
#define LEX_LOG_LEVEL 4
#endif
#if LEX_LOG_LEVEL <= 1
#define LEX_NOTSET( fmt, ...) LOG_NOTSET("LEXER: " fmt, ##__VA_ARGS__)
#else
#define LEX_NOTSET( fmt, ...)
#endif
#if LEX_LOG_LEVEL <= 2
#define LEX_DEBUG( fmt, ...) LOG_DEBUG( "LEXER: " fmt, ##__VA_ARGS__)
#else
#define LEX_DEBUG( fmt, ...)
#endif
#if LEX_LOG_LEVEL <= 3
#define LEX_INFO( fmt, ...) LOG_INFO( "LEXER: " fmt, ##__VA_ARGS__)
#else
#define LEX_INFO( fmt, ...)
#endif
#if LEX_LOG_LEVEL <= 4
#define LEX_WARN( fmt, ...) LOG_WARN( "LEXER: " fmt, ##__VA_ARGS__)
#else
#define LEX_WARN( fmt, ...)
#endif
#if LEX_LOG_LEVEL <= 5
#define LEX_ERROR( fmt, ...) LOG_ERROR("LEXER: " fmt, ##__VA_ARGS__)
#else
#define LEX_ERROR( fmt, ...)
#endif
#if LEX_LOG_LEVEL <= 6
#define LEX_FATAL( fmt, ...) LOG_FATAL("LEXER: " fmt, ##__VA_ARGS__)
#else
#define LEX_FATAL( fmt, ...)
#endif
#endif // __SMCC_LEXER_LOG_H__

View File

@ -0,0 +1,86 @@
#include <lib/core.h>
#include "lexer_log.h"
#include "token.h"
#define ROUND_IDX(idx) ((idx) % tokbuf->cap)
tok_t* pop_tok(tok_stream_t* tokbuf) {
if (tokbuf->size == 0) {
LEX_ERROR("no token to pop");
return NULL;
}
int idx = tokbuf->cur;
tokbuf->cur = ROUND_IDX(idx + 1);
tokbuf->size -= 1;
return tokbuf->buf + idx;
}
void flush_peek_tok(tok_stream_t* tokbuf) {
tokbuf->peek = tokbuf->cur;
}
void init_tokbuf(tok_stream_t *tokbuf, void *stream, tok_stream_get_func gettok) {
tokbuf->cur = 0;
tokbuf->end = 0;
tokbuf->peek = 0;
tokbuf->size = 0;
tokbuf->stream = stream;
tokbuf->gettok = gettok;
tokbuf->buf = NULL;
tokbuf->cap = 0;
}
tok_t *peek_tok(tok_stream_t *tokbuf) {
Assert(tokbuf->size <= tokbuf->cap);
int idx = tokbuf->peek;
tokbuf->peek = ROUND_IDX(idx + 1);
if (idx == tokbuf->end) {
if (tokbuf->size == tokbuf->cap) {
LEX_ERROR("peek_tok buffer overflow");
return NULL;
}
if (tokbuf->gettok == NULL) {
LEX_ERROR("peek_tok can not got tok");
return NULL;
}
tokbuf->gettok(tokbuf->stream, &(tokbuf->buf[idx]));
tokbuf->size++;
tokbuf->end = tokbuf->peek;
}
return &(tokbuf->buf[idx]);
}
cc_tktype_t peek_tok_type(tok_stream_t* tokbuf) {
return peek_tok(tokbuf)->sub_type;
}
int expect_pop_tok(tok_stream_t* tokbuf, cc_tktype_t type) {
flush_peek_tok(tokbuf);
tok_t* tok = peek_tok(tokbuf);
if (tok->sub_type != type) {
LEX_ERROR("expected tok `%s` but got `%s`", get_tok_name(type), get_tok_name(tok->type));
return 0;
} else {
pop_tok(tokbuf);
}
return 0;
}
// 生成字符串映射(根据需求选择#str或#name
static const char* token_strings[] = {
// 普通token使用#str
#define X(str, basic, tok) [tok] = #str,
TOKEN_TABLE
#undef X
// 关键字使用#name
#define X(name, std, tok) [tok] = #name,
KEYWORD_TABLE
#undef X
};
const char* get_tok_name(cc_tktype_t type) {
return token_strings[type];
}

View File

@ -0,0 +1,142 @@
#ifndef __SMCC_CC_TOKEN_H__
#define __SMCC_CC_TOKEN_H__
#include <lib/utils/utils.h>
enum CSTD_KEYWORD {
CSTD_C89,
CSTD_C99,
CEXT_ASM,
};
// Using Binary Search To Fast Find Keyword
#define KEYWORD_TABLE \
X(asm , CEXT_ASM, TOKEN_ASM) \
X(break , CSTD_C89, TOKEN_BREAK) \
X(case , CSTD_C89, TOKEN_CASE) \
X(char , CSTD_C89, TOKEN_CHAR) \
X(const , CSTD_C89, TOKEN_CONST) \
X(continue , CSTD_C89, TOKEN_CONTINUE) \
X(default , CSTD_C89, TOKEN_DEFAULT) \
X(do , CSTD_C89, TOKEN_DO) \
X(double , CSTD_C89, TOKEN_DOUBLE) \
X(else , CSTD_C89, TOKEN_ELSE) \
X(enum , CSTD_C89, TOKEN_ENUM) \
X(extern , CSTD_C89, TOKEN_EXTERN) \
X(float , CSTD_C89, TOKEN_FLOAT) \
X(for , CSTD_C89, TOKEN_FOR) \
X(goto , CSTD_C89, TOKEN_GOTO) \
X(if , CSTD_C89, TOKEN_IF) \
X(inline , CSTD_C99, TOKEN_INLINE) \
X(int , CSTD_C89, TOKEN_INT) \
X(long , CSTD_C89, TOKEN_LONG) \
X(register , CSTD_C89, TOKEN_REGISTER) \
X(restrict , CSTD_C99, TOKEN_RESTRICT) \
X(return , CSTD_C89, TOKEN_RETURN) \
X(short , CSTD_C89, TOKEN_SHORT) \
X(signed , CSTD_C89, TOKEN_SIGNED) \
X(sizeof , CSTD_C89, TOKEN_SIZEOF) \
X(static , CSTD_C89, TOKEN_STATIC) \
X(struct , CSTD_C89, TOKEN_STRUCT) \
X(switch , CSTD_C89, TOKEN_SWITCH) \
X(typedef , CSTD_C89, TOKEN_TYPEDEF) \
X(union , CSTD_C89, TOKEN_UNION) \
X(unsigned , CSTD_C89, TOKEN_UNSIGNED) \
X(void , CSTD_C89, TOKEN_VOID) \
X(volatile , CSTD_C89, TOKEN_VOLATILE) \
X(while , CSTD_C89, TOKEN_WHILE) \
// KEYWORD_TABLE
#define TOKEN_TABLE \
X(init , TK_BASIC_INVALID, TOKEN_INIT) \
X(EOF , TK_BASIC_EOF, TOKEN_EOF) \
X(blank , TK_BASIC_WHITESPACE, TOKEN_BLANK) \
X("==" , TK_BASIC_OPERATOR, TOKEN_EQ) \
X("=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN) \
X("++" , TK_BASIC_OPERATOR, TOKEN_ADD_ADD) \
X("+=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_ADD) \
X("+" , TK_BASIC_OPERATOR, TOKEN_ADD) \
X("--" , TK_BASIC_OPERATOR, TOKEN_SUB_SUB) \
X("-=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_SUB) \
X("->" , TK_BASIC_OPERATOR, TOKEN_DEREF) \
X("-" , TK_BASIC_OPERATOR, TOKEN_SUB) \
X("*=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_MUL) \
X("*" , TK_BASIC_OPERATOR, TOKEN_MUL) \
X("/=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_DIV) \
X("/" , TK_BASIC_OPERATOR, TOKEN_DIV) \
X("//" , TK_BASIC_COMMENT , TOKEN_LINE_COMMENT) \
X("/* */" , TK_BASIC_COMMENT , TOKEN_BLOCK_COMMENT) \
X("%=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_MOD) \
X("%" , TK_BASIC_OPERATOR, TOKEN_MOD) \
X("&&" , TK_BASIC_OPERATOR, TOKEN_AND_AND) \
X("&=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_AND) \
X("&" , TK_BASIC_OPERATOR, TOKEN_AND) \
X("||" , TK_BASIC_OPERATOR, TOKEN_OR_OR) \
X("|=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_OR) \
X("|" , TK_BASIC_OPERATOR, TOKEN_OR) \
X("^=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_XOR) \
X("^" , TK_BASIC_OPERATOR, TOKEN_XOR) \
X("<<=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_L_SH) \
X("<<" , TK_BASIC_OPERATOR, TOKEN_L_SH) \
X("<=" , TK_BASIC_OPERATOR, TOKEN_LE) \
X("<" , TK_BASIC_OPERATOR, TOKEN_LT) \
X(">>=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_R_SH) \
X(">>" , TK_BASIC_OPERATOR, TOKEN_R_SH) \
X(">=" , TK_BASIC_OPERATOR, TOKEN_GE) \
X(">" , TK_BASIC_OPERATOR, TOKEN_GT) \
X("!" , TK_BASIC_OPERATOR, TOKEN_NOT) \
X("!=" , TK_BASIC_OPERATOR, TOKEN_NEQ) \
X("~" , TK_BASIC_OPERATOR, TOKEN_BIT_NOT) \
X("[" , TK_BASIC_OPERATOR, TOKEN_L_BRACKET) \
X("]" , TK_BASIC_OPERATOR, TOKEN_R_BRACKET) \
X("(" , TK_BASIC_OPERATOR, TOKEN_L_PAREN) \
X(")" , TK_BASIC_OPERATOR, TOKEN_R_PAREN) \
X("{" , TK_BASIC_OPERATOR, TOKEN_L_BRACE) \
X("}" , TK_BASIC_OPERATOR, TOKEN_R_BRACE) \
X(";" , TK_BASIC_OPERATOR, TOKEN_SEMICOLON) \
X("," , TK_BASIC_OPERATOR, TOKEN_COMMA) \
X(":" , TK_BASIC_OPERATOR, TOKEN_COLON) \
X("." , TK_BASIC_OPERATOR, TOKEN_DOT) \
X("..." , TK_BASIC_OPERATOR, TOKEN_ELLIPSIS) \
X("?" , TK_BASIC_OPERATOR, TOKEN_COND) \
X(ident , TK_BASIC_IDENTIFIER, TOKEN_IDENT) \
X(int_literal , TK_BASIC_LITERAL, TOKEN_INT_LITERAL) \
X(float_literal , TK_BASIC_LITERAL, TOKEN_FLOAT_LITERAL) \
X(char_literal , TK_BASIC_LITERAL, TOKEN_CHAR_LITERAL) \
X(string_literal , TK_BASIC_LITERAL, TOKEN_STRING_LITERAL) \
// END
// 定义TokenType枚举
typedef enum cc_tktype {
// 处理普通token
#define X(str, basic, tok) tok,
TOKEN_TABLE
#undef X
// 处理关键字(保持原有格式)
#define X(name, std, tok) tok,
KEYWORD_TABLE
#undef X
} cc_tktype_t;
typedef struct tok_stream {
int cur;
int end;
int peek;
int size;
int cap;
tok_t* buf;
void* stream;
void (*gettok)(void* stream, tok_t* token);
} tok_stream_t;
typedef void(*tok_stream_get_func)(void* stream, tok_t* token);
void init_tokbuf(tok_stream_t* tokbuf, void* stream, tok_stream_get_func gettok);
tok_t* peek_tok(tok_stream_t* tokbuf);
tok_t* pop_tok(tok_stream_t* tokbuf);
void flush_peek_tok(tok_stream_t* tokbuf);
cc_tktype_t peek_tok_type(tok_stream_t* tokbuf);
int expect_pop_tok(tok_stream_t* tokbuf, cc_tktype_t type);
const char* get_tok_name(cc_tktype_t type);
#endif

View File

View File

@ -0,0 +1,172 @@
#include "ast.h"
ast_node_t* new_ast_node(void) {
ast_node_t* node = rt._malloc(sizeof(ast_node_t));
init_ast_node(node);
return node;
}
void init_ast_node(ast_node_t* node) {
node->type = NT_INIT;
for (int i = 0; i < sizeof(node->children) / sizeof(node->children[0]); i++) {
node->children[i] = NULL;
}
}
// ast_node_t* find_ast_node(ast_node_t* node, ast_type_t type) {
// }
#include <stdio.h>
static void pnt_depth(int depth) {
for (int i = 0; i < depth; i++) {
printf(" ");
}
}
// void pnt_ast(ast_node_t* node, int depth) {
// if (!node) return;
// pnt_depth(depth);
// switch (node->type) {
// case NT_ROOT:
// for (int i = 0; i < node->root.child_size; i++) {
// pnt_ast(node->root.children[i], depth);
// }
// return;
// case NT_ADD : printf("+ \n"); break; // (expr) + (expr)
// case NT_SUB : printf("- \n"); break; // (expr) - (expr)
// case NT_MUL : printf("* \n"); break; // (expr) * (expr)
// case NT_DIV : printf("/ \n"); break; // (expr) / (expr)
// case NT_MOD : printf("%%\n"); break; // (expr) % (expr)
// case NT_AND : printf("& \n"); break; // (expr) & (expr)
// case NT_OR : printf("| \n"); break; // (expr) | (expr)
// case NT_XOR : printf("^ \n"); break; // (expr) ^ (expr)
// case NT_L_SH : printf("<<\n"); break; // (expr) << (expr)
// case NT_R_SH : printf(">>\n"); break; // (expr) >> (expr)
// case NT_EQ : printf("==\n"); break; // (expr) == (expr)
// case NT_NEQ : printf("!=\n"); break; // (expr) != (expr)
// case NT_LE : printf("<=\n"); break; // (expr) <= (expr)
// case NT_GE : printf(">=\n"); break; // (expr) >= (expr)
// case NT_LT : printf("< \n"); break; // (expr) < (expr)
// case NT_GT : printf("> \n"); break; // (expr) > (expr)
// case NT_AND_AND : printf("&&\n"); break; // (expr) && (expr)
// case NT_OR_OR : printf("||\n"); break; // (expr) || (expr)
// case NT_NOT : printf("! \n"); break; // ! (expr)
// case NT_BIT_NOT : printf("~ \n"); break; // ~ (expr)
// case NT_COMMA : printf(", \n"); break; // expr, expr 逗号运算符
// case NT_ASSIGN : printf("= \n"); break; // (expr) = (expr)
// // case NT_COND : // (expr) ? (expr) : (expr)
// case NT_STMT_EMPTY : // ;
// printf(";\n");
// break;
// case NT_STMT_IF : // if (cond) { ... } [else {...}]
// printf("if");
// pnt_ast(node->if_stmt.cond, depth+1);
// pnt_ast(node->if_stmt.if_stmt, depth+1);
// if (node->if_stmt.else_stmt) {
// pnt_depth(depth);
// printf("else");
// pnt_ast(node->if_stmt.else_stmt, depth+1);
// }
// break;
// case NT_STMT_WHILE : // while (cond) { ... }
// printf("while\n");
// pnt_ast(node->while_stmt.cond, depth+1);
// pnt_ast(node->while_stmt.body, depth+1);
// break;
// case NT_STMT_DOWHILE : // do {...} while (cond)
// printf("do-while\n");
// pnt_ast(node->do_while_stmt.body, depth+1);
// pnt_ast(node->do_while_stmt.cond, depth+1);
// break;
// case NT_STMT_FOR : // for (init; cond; iter) {...}
// printf("for\n");
// if (node->for_stmt.init)
// pnt_ast(node->for_stmt.init, depth+1);
// if (node->for_stmt.cond)
// pnt_ast(node->for_stmt.cond, depth+1);
// if (node->for_stmt.iter)
// pnt_ast(node->for_stmt.iter, depth+1);
// pnt_ast(node->for_stmt.body, depth+1);
// break;
// case NT_STMT_SWITCH : // switch (expr) { case ... }
// case NT_STMT_BREAK : // break;
// case NT_STMT_CONTINUE : // continue;
// case NT_STMT_GOTO : // goto label;
// case NT_STMT_CASE : // case const_expr:
// case NT_STMT_DEFAULT : // default:
// case NT_STMT_LABEL : // label:
// break;
// case NT_STMT_BLOCK : // { ... }
// printf("{\n");
// for (int i = 0; i < node->block.child_size; i++) {
// pnt_ast(node->block.children[i], depth+1);
// }
// pnt_depth(depth);
// printf("}\n");
// break;
// case NT_STMT_RETURN : // return expr;
// printf("return");
// if (node->return_stmt.expr_stmt) {
// printf(" ");
// pnt_ast(node->return_stmt.expr_stmt, depth+1);
// } else {
// printf("\n");
// }
// break;
// case NT_STMT_EXPR : // expr;
// printf("stmt\n");
// pnt_ast(node->expr_stmt.expr_stmt, depth);
// pnt_depth(depth);
// printf(";\n");
// break;
// case NT_DECL_VAR : // type name; or type name = expr;
// printf("decl_val\n");
// break;
// case NT_DECL_FUNC: // type func_name(param_list);
// printf("decl func %s\n", node->func.name->syms.tok.val.str);
// break;
// case NT_FUNC : // type func_name(param_list) {...}
// printf("def func %s\n", node->func.name->syms.tok.val.str);
// // pnt_ast(node->child.func.params, depth);
// pnt_ast(node->func.body, depth);
// // pnt_ast(node->child.func.ret, depth);
// break;
// case NT_PARAM : // 函数形参
// printf("param\n");
// case NT_ARG_LIST : // 实参列表需要与NT_CALL配合
// printf("arg_list\n");
// case NT_TERM_CALL : // func (expr)
// printf("call\n");
// break;
// case NT_TERM_IDENT:
// printf("%s\n", node->syms.tok.val.str);
// break;
// case NT_TERM_VAL : // Terminal Symbols like constant, identifier, keyword
// tok_t * tok = &node->syms.tok;
// switch (tok->type) {
// case TOKEN_CHAR_LITERAL:
// printf("%c\n", tok->val.ch);
// break;
// case TOKEN_INT_LITERAL:
// printf("%d\n", tok->val.i);
// break;
// case TOKEN_STRING_LITERAL:
// printf("%s\n", tok->val.str);
// break;
// default:
// printf("unknown term val\n");
// break;
// }
// default:
// break;
// }
// // 通用子节点递归处理
// if (node->type <= NT_ASSIGN) { // 表达式类统一处理子节点
// if (node->expr.left) pnt_ast(node->expr.left, depth+1);
// if (node->expr.right) pnt_ast(node->expr.right, depth + 1);
// }
// }

View File

@ -0,0 +1,190 @@
#ifndef __AST_H__
#define __AST_H__
#include "../lexer/lexer.h"
#include <lib/utils/ds/vector.h>
#include "symtab/symtab.h"
#include "type.h"
typedef enum {
NT_INIT,
NT_ROOT, // global scope in root node
NT_ADD, // (expr) + (expr)
NT_SUB, // (expr) - (expr)
NT_MUL, // (expr) * (expr)
NT_DIV, // (expr) / (expr)
NT_MOD, // (expr) % (expr)
NT_AND, // (expr) & (expr)
NT_OR, // (expr) | (expr)
NT_XOR, // (expr) ^ (expr)
NT_L_SH, // (expr) << (expr)
NT_R_SH, // (expr) >> (expr)
NT_EQ, // (expr) == (expr)
NT_NEQ, // (expr) != (expr)
NT_LE, // (expr) <= (expr)
NT_GE, // (expr) >= (expr)
NT_LT, // (expr) < (expr)
NT_GT, // (expr) > (expr)
NT_AND_AND, // (expr) && (expr)
NT_OR_OR, // (expr) || (expr)
NT_NOT, // ! (expr)
NT_BIT_NOT, // ~ (expr)
NT_COND, // (expr) ? (expr) : (expr)
NT_COMMA, // expr, expr 逗号运算符
NT_ASSIGN, // (expr) = (expr)
NT_ADDRESS, // &expr (取地址)
NT_DEREF, // *expr (解引用)
NT_INDEX, // arr[index] (数组访问)
NT_MEMBER, // struct.member
NT_PTR_MEMBER,// ptr->member
NT_CAST, // (type)expr 强制类型转换
NT_SIZEOF, // sizeof(type|expr)
// NT_ALIGNOF, // _Alignof(type) (C11)
NT_STMT_EMPTY, // ;
NT_STMT_IF, // if (cond) { ... } [else {...}]
NT_STMT_WHILE, // while (cond) { ... }
NT_STMT_DOWHILE, // do {...} while (cond)
NT_STMT_FOR, // for (init; cond; iter) {...}
NT_STMT_SWITCH, // switch (expr) { case ... }
NT_STMT_BREAK, // break;
NT_STMT_CONTINUE, // continue;
NT_STMT_GOTO, // goto label;
NT_STMT_CASE, // case const_expr:
NT_STMT_DEFAULT, // default:
NT_STMT_LABEL, // label:
NT_STMT_BLOCK, // { ... }
NT_STMT_RETURN, // return expr;
NT_STMT_EXPR, // expr;
NT_BLOCK,
// NT_TYPE_BASE, // 基础类型节点
// NT_TYPE_PTR, // 指针类型
// NT_TYPE_ARRAY, // 数组类型
// NT_TYPE_FUNC, // 函数类型
// NT_TYPE_QUAL, // 限定符节点
NT_DECL_VAR, // type name; or type name = expr;
NT_DECL_FUNC, // type func_name(param_list);
NT_FUNC, // type func_name(param_list) {...}
NT_PARAM, // 函数形参
NT_ARG_LIST, // 实参列表需要与NT_CALL配合
NT_TERM_CALL, // func (expr)
NT_TERM_VAL,
NT_TERM_IDENT,
NT_TERM_TYPE,
} ast_type_t;
typedef struct ast_node {
ast_type_t type;
union {
void *children[6];
struct {
vector_header(children, struct ast_node *);
} root;
struct {
vector_header(children, struct ast_node *);
} block;
struct {
symtab_key_t key;
struct ast_node * decl_node;
tok_t tok;
} syms;
struct {
vector_header(params, struct ast_node *);
} params;
struct {
struct ast_node * name;
struct ast_node * params;
struct ast_node * func_decl;
} call;
struct {
struct ast_node *type;
struct ast_node *name;
struct ast_node *expr_stmt; // optional
void* data;
} decl_val;
struct {
struct ast_node *ret;
struct ast_node *name;
struct ast_node *params; // array of params
struct ast_node *def;
} decl_func;
struct {
struct ast_node *decl;
struct ast_node *body; // optional
void* data;
} func;
struct {
struct ast_node *left;
struct ast_node *right;
struct ast_node *optional; // optional
} expr;
struct {
struct ast_node *cond;
struct ast_node *if_stmt;
struct ast_node *else_stmt; // optional
} if_stmt;
struct {
struct ast_node *cond;
struct ast_node *body;
} switch_stmt;
struct {
struct ast_node *cond;
struct ast_node *body;
} while_stmt;
struct {
struct ast_node *body;
struct ast_node *cond;
} do_while_stmt;
struct {
struct ast_node *init;
struct ast_node *cond; // optional
struct ast_node *iter; // optional
struct ast_node *body;
} for_stmt;
struct {
struct ast_node *expr_stmt; // optional
} return_stmt;
struct {
struct ast_node *label;
} goto_stmt;
struct {
struct ast_node *label;
} label_stmt;
struct {
struct ast_node *block;
} block_stmt;
struct {
struct ast_node *expr_stmt;
} expr_stmt;
};
} ast_node_t;
ast_node_t* new_ast_node(void);
void init_ast_node(ast_node_t* node);
void pnt_ast(ast_node_t* node, int depth);
typedef struct parser parser_t;
typedef ast_node_t* (*parse_func_t) (parser_t*);
void parse_prog(parser_t* parser);
ast_node_t* parse_decl(parser_t* parser);
ast_node_t* parse_decl_val(parser_t* parser);
ast_node_t* parse_block(parser_t* parser);
ast_node_t* parse_stmt(parser_t* parser);
ast_node_t* parse_expr(parser_t* parser);
ast_node_t* parse_type(parser_t* parser);
ast_node_t* new_ast_ident_node(tok_t* tok);
ast_node_t* expect_pop_ident(tok_stream_t* tokbuf);
int peek_decl(tok_stream_t* tokbuf);
#endif

View File

@ -0,0 +1,18 @@
- ast.c 作为抽象语法树的定义
- block.c 作为块的实现主要用于处理作用域,需要符号表
- decl.c 作为声明的实现,其中主要携带变量声明,函数声明见 func.c ,需要符号表
- func.c 作为函数的实现,其中主要携带函数声明,以及函数定义,需要符号表
- expr.c 作为表达式的实现。需要符号表
- stmt.c 作为语句的实现。需要表达式类型判断合法性
- term.c 作为终结符的实现。需要表达式类型判断合法性
- program.c 作为词法分析语义分析入口函数可以根据parser结构生成AST
其中stmt参考cppreference
其中expr参考AI以及CParser

View File

@ -0,0 +1,49 @@
#include "../ast.h"
#include "../parser.h"
#ifndef BLOCK_MAX_NODE
#define BLOCK_MAX_NODE (1024)
#endif
ast_node_t* new_ast_node_block() {
ast_node_t* node = new_ast_node();
node->type = NT_BLOCK;
vector_init(node->block.children);
return node;
}
ast_node_t* parse_block(parser_t* parser) {
symtab_enter_scope(parser->symtab);
tok_stream_t *tokbuf = &parser->tokbuf;
flush_peek_tok(tokbuf);
cc_tktype_t ttype;
ast_node_t* node = new_ast_node_block();
expect_pop_tok(tokbuf, TOKEN_L_BRACE);
ast_node_t* child = NULL;
while (1) {
if (peek_decl(tokbuf)) {
child = parse_decl(parser);
vector_push(node->block.children, child);
continue;
}
flush_peek_tok(tokbuf);
ttype = peek_tok_type(tokbuf);
switch (ttype) {
case TOKEN_R_BRACE: {
pop_tok(tokbuf);
goto END;
}
default: {
child = parse_stmt(parser);
vector_push(node->block.children, child);
break;
}
}
}
END:
symtab_leave_scope(parser->symtab);
return node;
}

View File

@ -0,0 +1,98 @@
#include "../ast.h"
#include "../parser.h"
/**
* 0 false
* 1 true
*/
int peek_decl(tok_stream_t* tokbuf) {
flush_peek_tok(tokbuf);
switch (peek_tok_type(tokbuf)) {
case TOKEN_STATIC:
case TOKEN_EXTERN:
case TOKEN_REGISTER:
case TOKEN_TYPEDEF:
LOG_ERROR("not impliment");
break;
default:
flush_peek_tok(tokbuf);
}
switch (peek_tok_type(tokbuf)) {
case TOKEN_VOID:
case TOKEN_CHAR:
case TOKEN_SHORT:
case TOKEN_INT:
case TOKEN_LONG:
case TOKEN_FLOAT:
case TOKEN_DOUBLE:
// FIXME Ptr
return 1;
default:
flush_peek_tok(tokbuf);
}
return 0;
}
ast_node_t* parse_decl_val(parser_t* parser) {
tok_stream_t* tokbuf = &parser->tokbuf;
cc_tktype_t ttype;
flush_peek_tok(tokbuf);
ast_node_t* node;
ast_node_t* type_node = parse_type(parser);
flush_peek_tok(tokbuf);
ast_node_t* name_node = new_ast_ident_node(peek_tok(tokbuf));
node = new_ast_node();
node->decl_val.type = type_node;
node->decl_val.name = name_node;
node->type = NT_DECL_VAR;
type_node->syms.key.uid = parser->symtab->cur_scope->uid;
type_node->syms.key.strp_name = name_node->syms.tok.val.str;
symtab_add(parser->symtab, &type_node->syms.key, node);
ttype = peek_tok_type(tokbuf);
if (ttype == TOKEN_ASSIGN) {
node->decl_val.expr_stmt = parse_stmt(parser);
if (node->decl_val.expr_stmt->type != NT_STMT_EXPR) {
LOG_ERROR("parser_decl_val want stmt_expr");
}
} else if (ttype == TOKEN_SEMICOLON) {
pop_tok(tokbuf);
expect_pop_tok(tokbuf, TOKEN_SEMICOLON);
} else {
LOG_ERROR("parser_decl_val syntax error");
}
return node;
}
ast_node_t* parse_decl(parser_t* parser) {
tok_stream_t* tokbuf = &parser->tokbuf;
flush_peek_tok(tokbuf);
cc_tktype_t ttype;
ast_node_t* node;
if (peek_decl(tokbuf) == 0) {
LOG_ERROR("syntax error expect decl_val TYPE");
}
if (peek_tok_type(tokbuf) != TOKEN_IDENT) {
LOG_ERROR("syntax error expect decl_val IDENT");
}
ttype = peek_tok_type(tokbuf);
switch (ttype) {
case TOKEN_L_PAREN: // (
return NULL;
break;
case TOKEN_ASSIGN:
case TOKEN_SEMICOLON:
node = parse_decl_val(parser);
break;
default:
LOG_ERROR("syntax error expect decl_val ASSIGN or SEMICOLON");
return NULL;
}
return node;
}

View File

@ -0,0 +1,425 @@
#include "../ast.h"
#include "../parser.h"
// Copy from `CParse`
/**
* Operator precedence classes
*/
enum Precedence {
PREC_BOTTOM,
PREC_EXPRESSION, /* , left to right */
PREC_ASSIGNMENT, /* = += -= *= /= %= <<= >>= &= ^= |= right to left */
PREC_CONDITIONAL, /* ?: right to left */
PREC_LOGICAL_OR, /* || left to right */
PREC_LOGICAL_AND, /* && left to right */
PREC_OR, /* | left to right */
PREC_XOR, /* ^ left to right */
PREC_AND, /* & left to right */
PREC_EQUALITY, /* == != left to right */
PREC_RELATIONAL, /* < <= > >= left to right */
PREC_SHIFT, /* << >> left to right */
PREC_ADDITIVE, /* + - left to right */
PREC_MULTIPLICATIVE, /* * / % left to right */
PREC_CAST, /* (type) right to left */
PREC_UNARY, /* ! ~ ++ -- + - * & sizeof right to left */
PREC_POSTFIX, /* () [] -> . left to right */
PREC_PRIMARY,
PREC_TOP
};
enum ParseType {
INFIX_PARSER,
PREFIX_PARSER,
};
static ast_node_t *parse_subexpression(tok_stream_t* tokbuf, symtab_t *symtab, enum Precedence prec);
#define NEXT(prec) parse_subexpression(tokbuf, symtab, prec)
static ast_node_t* gen_node2(ast_node_t* left, ast_node_t* right,
ast_type_t type) {
ast_node_t* node = new_ast_node();
node->type = type;
node->expr.left = left;
node->expr.right = right;
return node;
// FIXME
// switch (type) {
// case NT_ADD : printf("+ \n"); break; // (expr) + (expr)
// case NT_SUB : printf("- \n"); break; // (expr) - (expr)
// case NT_MUL : printf("* \n"); break; // (expr) * (expr)
// case NT_DIV : printf("/ \n"); break; // (expr) / (expr)
// case NT_MOD : printf("%%\n"); break; // (expr) % (expr)
// case NT_AND : printf("& \n"); break; // (expr) & (expr)
// case NT_OR : printf("| \n"); break; // (expr) | (expr)
// case NT_XOR : printf("^ \n"); break; // (expr) ^ (expr)
// case NT_L_SH : printf("<<\n"); break; // (expr) << (expr)
// case NT_R_SH : printf(">>\n"); break; // (expr) >> (expr)
// case NT_EQ : printf("==\n"); break; // (expr) == (expr)
// case NT_NEQ : printf("!=\n"); break; // (expr) != (expr)
// case NT_LE : printf("<=\n"); break; // (expr) <= (expr)
// case NT_GE : printf(">=\n"); break; // (expr) >= (expr)
// case NT_LT : printf("< \n"); break; // (expr) < (expr)
// case NT_GT : printf("> \n"); break; // (expr) > (expr)
// case NT_AND_AND : printf("&&\n"); break; // (expr) && (expr)
// case NT_OR_OR : printf("||\n"); break; // (expr) || (expr)
// case NT_NOT : printf("! \n"); break; // ! (expr)
// case NT_BIT_NOT : printf("~ \n"); break; // ~ (expr)
// case NT_COMMA : printf(", \n"); break; // expr, expr 逗号运算符
// case NT_ASSIGN : printf("= \n"); break; // (expr) = (expr)
// // case NT_COND : // (expr) ? (expr) : (expr)
// }
}
static ast_node_t* parse_comma(tok_stream_t* tokbuf, symtab_t *symtab, ast_node_t* left) {
ast_node_t* node = new_ast_node();
node->type = NT_COMMA;
node->expr.left = left;
node->expr.right = NEXT(PREC_EXPRESSION);
return node;
}
static ast_node_t* parse_assign(tok_stream_t* tokbuf, symtab_t *symtab, ast_node_t* left) {
flush_peek_tok(tokbuf);
cc_tktype_t ttype = peek_tok_type(tokbuf);
pop_tok(tokbuf);
ast_node_t* node = new_ast_node();
node->type = NT_ASSIGN;
// saved left
node->expr.left = left;
enum Precedence next = PREC_ASSIGNMENT + 1;
switch (ttype) {
case TOKEN_ASSIGN :
left = NEXT(next);
break;
case TOKEN_ASSIGN_ADD :
left = gen_node2(left, NEXT(next), NT_ADD);
break;
case TOKEN_ASSIGN_SUB :
left = gen_node2(left, NEXT(next), NT_SUB);
break;
case TOKEN_ASSIGN_MUL :
left = gen_node2(left, NEXT(next), NT_MUL);
break;
case TOKEN_ASSIGN_DIV :
left = gen_node2(left, NEXT(next), NT_DIV);
break;
case TOKEN_ASSIGN_MOD :
left = gen_node2(left, NEXT(next), NT_MOD);
break;
case TOKEN_ASSIGN_L_SH :
left = gen_node2(left, NEXT(next), NT_L_SH);
break;
case TOKEN_ASSIGN_R_SH :
left = gen_node2(left, NEXT(next), NT_R_SH);
break;
case TOKEN_ASSIGN_AND :
left = gen_node2(left, NEXT(next), NT_AND);
break;
case TOKEN_ASSIGN_OR :
left = gen_node2(left, NEXT(next), NT_OR);
break;
case TOKEN_ASSIGN_XOR :
left = gen_node2(left, NEXT(next), NT_XOR);
break;
default:
LOG_ERROR("unsupported operator");
break;
}
node->expr.right = left;
return node;
}
static ast_node_t* parse_cmp(tok_stream_t* tokbuf, symtab_t *symtab, ast_node_t* left) {
flush_peek_tok(tokbuf);
cc_tktype_t ttype = peek_tok_type(tokbuf);
pop_tok(tokbuf);
ast_node_t* node = new_ast_node();
// saved left
node->expr.left = left;
switch (ttype) {
case TOKEN_EQ:
node->type = NT_EQ;
node->expr.right = NEXT(PREC_EQUALITY);
break;
case TOKEN_NEQ:
node->type = NT_NEQ;
node->expr.right = NEXT(PREC_EQUALITY);
break;
case TOKEN_LT:
node->type = NT_LT;
node->expr.right = NEXT(PREC_RELATIONAL);
break;
case TOKEN_GT:
node->type = NT_GT;
node->expr.right = NEXT(PREC_RELATIONAL);
break;
case TOKEN_LE:
node->type = NT_LE;
node->expr.right = NEXT(PREC_RELATIONAL);
break;
case TOKEN_GE:
node->type = NT_GE;
node->expr.right = NEXT(PREC_RELATIONAL);
break;
default:
LOG_ERROR("invalid operator");
}
return node;
}
static ast_node_t* parse_cal(tok_stream_t* tokbuf, symtab_t *symtab, ast_node_t* left) {
flush_peek_tok(tokbuf);
cc_tktype_t ttype = peek_tok_type(tokbuf);
pop_tok(tokbuf);
ast_node_t* node = new_ast_node();
node->expr.left = left;
switch (ttype) {
case TOKEN_OR_OR:
node->type = NT_OR_OR;
node->expr.right = NEXT(PREC_LOGICAL_OR);
break;
case TOKEN_AND_AND:
node->type = NT_AND_AND;
node->expr.right = NEXT(PREC_LOGICAL_AND);
break;
case TOKEN_OR:
node->type = NT_OR;
node->expr.right = NEXT(PREC_OR);
break;
case TOKEN_XOR:
node->type = NT_XOR;
node->expr.right = NEXT(PREC_XOR);
break;
case TOKEN_AND:
node->type = NT_AND;
node->expr.right = NEXT(PREC_AND);
break;
case TOKEN_L_SH:
node->type = NT_L_SH;
node->expr.right = NEXT(PREC_SHIFT);
break;
case TOKEN_R_SH:
node->type = NT_R_SH;
node->expr.right = NEXT(PREC_SHIFT);
break;
case TOKEN_ADD:
node->type = NT_ADD;
node->expr.right = NEXT(PREC_ADDITIVE);
break;
case TOKEN_SUB:
node->type = NT_SUB;
node->expr.right = NEXT(PREC_ADDITIVE);
break;
case TOKEN_MUL:
node->type = NT_MUL;
node->expr.right = NEXT(PREC_MULTIPLICATIVE);
break;
case TOKEN_DIV:
node->type = NT_DIV;
node->expr.right = NEXT(PREC_MULTIPLICATIVE);
break;
case TOKEN_MOD:
node->type = NT_MOD;
node->expr.right = NEXT(PREC_MULTIPLICATIVE);
break;
default:
break;
}
return node;
}
static ast_node_t* parse_call(tok_stream_t* tokbuf, symtab_t *symtab, ast_node_t* ident) {
ast_node_t* node = new_ast_node();
node->type = NT_TERM_CALL;
node->call.name = ident;
node->call.params = new_ast_node();
vector_init(node->call.params->params.params);
pop_tok(tokbuf); // 跳过 '('
cc_tktype_t ttype;
while (1) {
flush_peek_tok(tokbuf);
ttype = peek_tok_type(tokbuf);
if (ttype == TOKEN_R_PAREN) {
break;
}
ast_node_t* param = NEXT(PREC_EXPRESSION);
vector_push(node->call.params->params.params, param);
flush_peek_tok(tokbuf);
ttype = peek_tok_type(tokbuf);
if (ttype == TOKEN_COMMA) pop_tok(tokbuf);
}
pop_tok(tokbuf); // 跳过 ')'
ast_node_t* sym = symtab_get(symtab, &ident->syms.key);
// TODO check func is match
if (sym == NULL || sym->type != NT_DECL_FUNC) {
LOG_FATAL("function not decl %s", ident->syms.key.strp_name);
}
node->call.name = ident;
node->call.func_decl = sym;
return node;
}
static ast_node_t* parse_paren(tok_stream_t* tokbuf, symtab_t *symtab, ast_node_t* left) {
flush_peek_tok(tokbuf);
expect_pop_tok(tokbuf, TOKEN_L_PAREN);
left = NEXT(PREC_EXPRESSION);
flush_peek_tok(tokbuf);
expect_pop_tok(tokbuf, TOKEN_R_PAREN);
return left;
}
typedef ast_node_t* (*parse_expr_fun_t)(tok_stream_t*, symtab_t* , ast_node_t*);
static struct expr_prec_table_t {
parse_expr_fun_t parser;
enum Precedence prec;
enum ParseType ptype;
} expr_table [256] = {
[TOKEN_COMMA] = {parse_comma, PREC_EXPRESSION, INFIX_PARSER},
[TOKEN_ASSIGN] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
[TOKEN_ASSIGN_ADD] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
[TOKEN_ASSIGN_SUB] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
[TOKEN_ASSIGN_MUL] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
[TOKEN_ASSIGN_DIV] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
[TOKEN_ASSIGN_MOD] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
[TOKEN_ASSIGN_L_SH] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
[TOKEN_ASSIGN_R_SH] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
[TOKEN_ASSIGN_AND] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
[TOKEN_ASSIGN_OR] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
[TOKEN_ASSIGN_XOR] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
[TOKEN_OR_OR] = {parse_cal, PREC_LOGICAL_OR , INFIX_PARSER},
[TOKEN_AND_AND] = {parse_cal, PREC_LOGICAL_AND, INFIX_PARSER},
[TOKEN_OR] = {parse_cal, PREC_OR , INFIX_PARSER},
[TOKEN_XOR] = {parse_cal, PREC_XOR , INFIX_PARSER},
[TOKEN_AND] = {parse_cal, PREC_AND , INFIX_PARSER},
[TOKEN_EQ] = {parse_cmp, PREC_EQUALITY, INFIX_PARSER},
[TOKEN_NEQ] = {parse_cmp, PREC_EQUALITY, INFIX_PARSER},
[TOKEN_LT] = {parse_cmp, PREC_RELATIONAL, INFIX_PARSER},
[TOKEN_LE] = {parse_cmp, PREC_RELATIONAL, INFIX_PARSER},
[TOKEN_GT] = {parse_cmp, PREC_RELATIONAL, INFIX_PARSER},
[TOKEN_GE] = {parse_cmp, PREC_RELATIONAL, INFIX_PARSER},
[TOKEN_L_SH] = {parse_cal, PREC_SHIFT , INFIX_PARSER},
[TOKEN_R_SH] = {parse_cal, PREC_SHIFT , INFIX_PARSER},
[TOKEN_ADD] = {parse_cal, PREC_ADDITIVE , INFIX_PARSER},
[TOKEN_SUB] = {parse_cal, PREC_ADDITIVE , INFIX_PARSER},
[TOKEN_MUL] = {parse_cal, PREC_MULTIPLICATIVE , INFIX_PARSER},
[TOKEN_DIV] = {parse_cal, PREC_MULTIPLICATIVE , INFIX_PARSER},
[TOKEN_MOD] = {parse_cal, PREC_MULTIPLICATIVE , INFIX_PARSER},
[TOKEN_NOT] = {NULL, PREC_UNARY, PREFIX_PARSER},
[TOKEN_BIT_NOT] = {NULL, PREC_UNARY, PREFIX_PARSER},
[TOKEN_ADD_ADD] = {NULL, PREC_UNARY, PREFIX_PARSER},
[TOKEN_SUB_SUB] = {NULL, PREC_UNARY, PREFIX_PARSER},
// + - * & sizeof
[TOKEN_L_PAREN] = {parse_paren, PREC_POSTFIX, INFIX_PARSER},
};
static ast_node_t *parse_primary_expression(tok_stream_t* tokbuf, symtab_t *symtab) {
flush_peek_tok(tokbuf);
tok_t* tok = peek_tok(tokbuf);
ast_node_t *node = new_ast_node();
node->type = NT_TERM_VAL;
node->syms.tok = *tok;
switch (tok->sub_type) {
case TOKEN_INT_LITERAL:
// node->data.data_type = TYPE_INT;
break;
case TOKEN_FLOAT_LITERAL:
LOG_WARN("float not supported");
break;
case TOKEN_CHAR_LITERAL:
// node->data.data_type = TYPE_CHAR;
break;
case TOKEN_STRING_LITERAL:
// node->data.data_type = TYPE_POINTER;
case TOKEN_IDENT:
node = expect_pop_ident(tokbuf);
cc_tktype_t ttype = peek_tok_type(tokbuf);
node->syms.key.uid = 0;
node->syms.key.strp_name = tok->val.str;
if (ttype == TOKEN_L_PAREN) {
node = parse_call(tokbuf, symtab, node);
} else {
void *sym = symtab_get(symtab, &node->syms.key);
if (sym == NULL) {
LOG_ERROR("undefined symbol but use %s", tok->val.str);
}
node->type = NT_TERM_IDENT;
node->syms.decl_node = sym;
}
goto END;
default:
return NULL;
}
pop_tok(tokbuf);
END:
return node;
}
static ast_node_t *parse_subexpression(tok_stream_t* tokbuf, symtab_t *symtab, enum Precedence prec) {
cc_tktype_t ttype;
struct expr_prec_table_t* work;
ast_node_t* left;
while (1) {
flush_peek_tok(tokbuf);
ttype = peek_tok_type(tokbuf);
work = &expr_table[ttype];
// FIXME
if (ttype == TOKEN_SEMICOLON || ttype == TOKEN_R_PAREN) {
break;
}
if (work == NULL || work->parser == NULL || work->ptype == PREFIX_PARSER) {
if (work->parser != NULL) {
left = work->parser(tokbuf, symtab, NULL);
} else {
left = parse_primary_expression(tokbuf, symtab);
}
} else if (work->ptype == INFIX_PARSER) {
if (work->parser == NULL)
break;
if (work->prec <= prec)
break;
left = work->parser(tokbuf, symtab, left);
}
// assert(left != NULL);
}
return left;
}
ast_node_t* parse_expr(parser_t* parser) {
tok_stream_t* tokbuf = &(parser->tokbuf);
symtab_t *symtab = parser->symtab;
flush_peek_tok(tokbuf);
cc_tktype_t ttype = peek_tok_type(tokbuf);
switch (ttype) {
case TOKEN_NOT:
case TOKEN_AND:
case TOKEN_L_PAREN:
case TOKEN_MUL:
case TOKEN_ADD:
case TOKEN_SUB:
case TOKEN_BIT_NOT:
case TOKEN_AND_AND:
case TOKEN_CHAR_LITERAL:
case TOKEN_INT_LITERAL:
case TOKEN_STRING_LITERAL:
case TOKEN_ADD_ADD:
case TOKEN_SUB_SUB:
case TOKEN_SIZEOF:
case TOKEN_IDENT:
return NEXT(PREC_EXPRESSION);
default:
LOG_ERROR("Want expr but not got %s", get_tok_name(ttype));
break;
}
}

View File

@ -0,0 +1,176 @@
#include "../ast.h"
#include "../parser.h"
#ifndef FUNC_PARAM_CACHE_SIZE
#define FUNC_PARAM_CACHE_SIZE 32 // 合理初始值可覆盖99%常见情况
#endif
// TODO 语义分析压入符号表
static void parse_params(parser_t* parser, tok_stream_t* cache, ast_node_t* node) {
flush_peek_tok(cache);
cc_tktype_t ttype;
ast_node_t *params = new_ast_node();
node->decl_func.params = params;
vector_init(params->params.params);
int depth = 1;
while (depth) {
ttype = peek_tok_type(cache);
switch (ttype) {
case TOKEN_COMMA:
break;
case TOKEN_ELLIPSIS:
ttype = peek_tok_type(cache);
if (ttype != TOKEN_R_PAREN) {
LOG_ERROR("... must be a last parameter list (expect ')')");
}
// TODO
LOG_ERROR("not implement");
break;
case TOKEN_IDENT:
// TODO 静态数组
flush_peek_tok(cache);
ast_node_t* id_node = new_ast_ident_node(peek_tok(cache));
ast_node_t* node = new_ast_node();
node->type = NT_DECL_VAR;
node->decl_val.name = id_node;
// TODO typing sys
node->decl_val.type = NULL;
node->decl_val.expr_stmt = NULL;
node->decl_val.data = NULL;
vector_push(params->params.params, node);
id_node->syms.key.uid = parser->symtab->cur_scope->uid;
id_node->syms.key.strp_name = id_node->syms.tok.val.str;
symtab_add(parser->symtab, &id_node->syms.key, node);
break;
case TOKEN_L_PAREN: {
depth++;
break;
}
case TOKEN_R_PAREN: {
depth--;
break;
}
default:
break;
// TODO 使用cache的类型解析
// parse_type(parser);
// TODO type parse
// ttype = peekcachetype(cache);
// ttype = peekcachetype(cache);
// if (ttype != TOKEN_IDENT) {
// node->node_type = NT_DECL_FUNC;
// flush_peek_tok(tokbuf);
// continue;
// }
// LOG_ERROR("function expected ')' or ','\n");
}
pop_tok(cache);
}
}
ast_type_t check_is_func_decl(tok_stream_t* tokbuf, tok_stream_t* cache) {
expect_pop_tok(tokbuf, TOKEN_L_PAREN);
int depth = 1;
while (depth) {
tok_t* tok = peek_tok(tokbuf);
pop_tok(tokbuf);
if (cache->size >= cache->cap - 1) {
LOG_ERROR("function parameter list too long");
}
cache->buf[cache->size++] = *tok;
switch (tok->sub_type) {
case TOKEN_L_PAREN:
depth++;
break;
case TOKEN_R_PAREN:
depth--;
break;
default:
break;
}
}
cache->end = cache->size;
switch (peek_tok_type(tokbuf)) {
case TOKEN_SEMICOLON:
pop_tok(tokbuf);
return NT_DECL_FUNC;
case TOKEN_L_BRACE:
return NT_FUNC;
break;
default:
LOG_ERROR("function define or decl need '{' or ';' but you don't got");
}
}
static ast_node_t* new_ast_node_funcdecl(ast_node_t* ret, ast_node_t* name) {
ast_node_t* node = new_ast_node();
node->type = NT_DECL_FUNC;
node->decl_func.ret = ret;
node->decl_func.name = name;
node->decl_func.def = NULL;
return node;
}
void parse_func(parser_t* parser) {
tok_stream_t* tokbuf = &(parser->tokbuf);
flush_peek_tok(tokbuf);
ast_node_t* ret_node = parse_type(parser);
ast_node_t* name_node = expect_pop_ident(tokbuf);
const char* func_name = name_node->syms.tok.val.str;
ast_node_t* decl = new_ast_node_funcdecl(ret_node, name_node);
tok_stream_t cache;
init_tokbuf(&cache, NULL, NULL);
cache.cap = FUNC_PARAM_CACHE_SIZE;
tok_t buf[FUNC_PARAM_CACHE_SIZE];
cache.buf = buf;
ast_type_t type = check_is_func_decl(&(parser->tokbuf), &cache);
name_node->syms.key.uid = parser->symtab->cur_scope->uid;
name_node->syms.key.strp_name = func_name;
ast_node_t* prev = symtab_get(parser->symtab, &name_node->syms.key);
// TODO Change something
if (prev != NULL) {
if (prev->type != NT_DECL_FUNC) {
LOG_ERROR("the symbol duplicate old is %d, new is func", prev->type);
}
// TODO check redeclare func is match
if (type == NT_FUNC) {
// TODO Free decl;
rt._free(decl);
decl = prev;
goto FUNC;
}
return;
}
symtab_add(parser->symtab, &name_node->syms.key, decl);
vector_push(parser->root->root.children, decl);
if (type == NT_DECL_FUNC) {
return;
}
FUNC:
// 该data临时用于判断是否重复定义
if (decl->decl_func.def != NULL) {
LOG_ERROR("redefinition of function %s", func_name);
}
ast_node_t* node = new_ast_node();
node->type = NT_FUNC;
node->func.decl = decl;
node->func.data = NULL;
decl->decl_func.def = node;
symtab_enter_scope(parser->symtab);
parse_params(parser, &cache, decl);
node->func.body = parse_block(parser);
symtab_leave_scope(parser->symtab);
vector_push(parser->root->root.children, node);
}

View File

@ -0,0 +1,34 @@
#include "../ast.h"
#include "../parser.h"
#ifndef PROG_MAX_NODE_SIZE
#define PROG_MAX_NODE_SIZE (1024 * 4)
#endif
void parse_func(parser_t* parser);
void parse_prog(parser_t* parser) {
/**
* Program := (Declaration | Definition)*
* same as
* Program := Declaration* Definition*
*/
tok_stream_t *tokbuf = &(parser->tokbuf);
parser->root = new_ast_node();
ast_node_t* node;
parser->root->type = NT_ROOT;
vector_init(parser->root->root.children);
while (1) {
flush_peek_tok(tokbuf);
if (peek_tok_type(tokbuf) == TOKEN_EOF) {
break;
}
node = parse_decl(parser);
if (node == NULL) {
parse_func(parser);
} else {
vector_push(parser->root->root.children, node);
}
}
return;
}

View File

@ -0,0 +1,246 @@
#include "../ast.h"
#include "../parser.h"
ast_node_t* parse_stmt(parser_t* parser) {
tok_stream_t* tokbuf = &parser->tokbuf;
flush_peek_tok(tokbuf);
cc_tktype_t ttype = peek_tok_type(tokbuf);
ast_node_t* node = new_ast_node();
switch (ttype) {
case TOKEN_IF: {
/**
* if (exp) stmt
* if (exp) stmt else stmt
*/
pop_tok(tokbuf);
expect_pop_tok(tokbuf, TOKEN_L_PAREN);
node->if_stmt.cond = parse_expr(parser);
flush_peek_tok(tokbuf);
expect_pop_tok(tokbuf, TOKEN_R_PAREN);
node->if_stmt.if_stmt = parse_stmt(parser);
ttype = peek_tok_type(tokbuf);
if (ttype == TOKEN_ELSE) {
pop_tok(tokbuf);
node->if_stmt.else_stmt = parse_stmt(parser);
} else {
node->if_stmt.else_stmt = NULL;
}
node->type = NT_STMT_IF;
break;
}
case TOKEN_SWITCH: {
/**
* switch (exp) stmt
*/
pop_tok(tokbuf);
expect_pop_tok(tokbuf, TOKEN_L_PAREN);
node->switch_stmt.cond = parse_expr(parser);
expect_pop_tok(tokbuf, TOKEN_R_PAREN);
node->switch_stmt.body = parse_stmt(parser);
node->type = NT_STMT_SWITCH;
break;
}
case TOKEN_WHILE: {
/**
* while (exp) stmt
*/
pop_tok(tokbuf);
expect_pop_tok(tokbuf, TOKEN_L_PAREN);
node->while_stmt.cond = parse_expr(parser);
expect_pop_tok(tokbuf, TOKEN_R_PAREN);
node->while_stmt.body = parse_stmt(parser);
node->type = NT_STMT_WHILE;
break;
}
case TOKEN_DO: {
/**
* do stmt while (exp)
*/
pop_tok(tokbuf);
node->do_while_stmt.body = parse_stmt(parser);
ttype = peek_tok_type(tokbuf);
if (ttype != TOKEN_WHILE) {
LOG_ERROR("expected while after do");
}
pop_tok(tokbuf);
expect_pop_tok(tokbuf, TOKEN_L_PAREN);
node->do_while_stmt.cond = parse_expr(parser);
expect_pop_tok(tokbuf, TOKEN_R_PAREN);
node->type = NT_STMT_DOWHILE;
break;
}
case TOKEN_FOR: {
/**
* for (init; [cond]; [iter]) stmt
*/
// node->children.stmt.for_stmt.init
pop_tok(tokbuf);
ttype = peek_tok_type(tokbuf);
if (ttype != TOKEN_L_PAREN) {
LOG_ERROR("expected ( after for");
}
pop_tok(tokbuf);
// init expr or init decl_var
// TODO need add this feature
if (peek_decl(tokbuf)) {
node->for_stmt.init = parse_decl_val(parser);
} else {
node->for_stmt.init = parse_expr(parser);
expect_pop_tok(tokbuf, TOKEN_SEMICOLON);
}
// cond expr or null
ttype = peek_tok_type(tokbuf);
if (ttype != TOKEN_SEMICOLON) {
node->for_stmt.cond = parse_expr(parser);
expect_pop_tok(tokbuf, TOKEN_SEMICOLON);
} else {
node->for_stmt.cond = NULL;
pop_tok(tokbuf);
}
// iter expr or null
ttype = peek_tok_type(tokbuf);
if (ttype != TOKEN_R_PAREN) {
node->for_stmt.iter = parse_expr(parser);
expect_pop_tok(tokbuf, TOKEN_R_PAREN);
} else {
node->for_stmt.iter = NULL;
pop_tok(tokbuf);
}
node->for_stmt.body = parse_stmt(parser);
node->type = NT_STMT_FOR;
break;
}
case TOKEN_BREAK: {
/**
* break ;
*/
// TODO check 导致外围 for、while 或 do-while 循环或 switch 语句终止。
pop_tok(tokbuf);
expect_pop_tok(tokbuf, TOKEN_SEMICOLON);
node->type = NT_STMT_BREAK;
break;
}
case TOKEN_CONTINUE: {
/**
* continue ;
*/
// TODO check 导致跳过整个 for、 while 或 do-while 循环体的剩余部分。
pop_tok(tokbuf);
expect_pop_tok(tokbuf, TOKEN_SEMICOLON);
node->type = NT_STMT_CONTINUE;
break;
}
case TOKEN_RETURN: {
/**
* return [exp] ;
*/
// TODO 终止当前函数并返回指定值给调用方函数。
pop_tok(tokbuf);
ttype = peek_tok_type(tokbuf);
if (ttype != TOKEN_SEMICOLON) {
node->return_stmt.expr_stmt = parse_expr(parser);
flush_peek_tok(tokbuf);
expect_pop_tok(tokbuf, TOKEN_SEMICOLON);
} else {
node->return_stmt.expr_stmt = NULL;
pop_tok(tokbuf);
}
node->type = NT_STMT_RETURN;
break;
}
case TOKEN_GOTO: {
/**
* goto label ;
*/
// TODO check label 将控制无条件转移到所欲位置。
//在无法用约定的构造将控制转移到所欲位置时使用。
pop_tok(tokbuf);
// find symbol table
ttype = peek_tok_type(tokbuf);
if (ttype != TOKEN_IDENT) {
LOG_ERROR("expect identifier after goto");
}
expect_pop_tok(tokbuf, TOKEN_SEMICOLON);
// TODO filling label
node->goto_stmt.label = expect_pop_ident(tokbuf);
node->type = NT_STMT_GOTO;
break;
}
case TOKEN_SEMICOLON: {
/**
* ;
* empty stmt using by :
* while () ;
* if () ;
* for () ;
*/
pop_tok(tokbuf);
node->type = NT_STMT_EMPTY;
break;
}
case TOKEN_L_BRACE: {
/**
* stmt_block like: { (decl_var | stmt) ... }
*/
node->block_stmt.block = parse_block(parser);
node->type = NT_STMT_BLOCK;
break;
}
case TOKEN_IDENT: {
// TODO label goto
if (peek_tok_type(tokbuf) != TOKEN_COLON) {
goto EXP;
}
node->label_stmt.label = expect_pop_ident(tokbuf);
expect_pop_tok(tokbuf, TOKEN_COLON);
node->type = NT_STMT_LABEL;
break;
}
case TOKEN_CASE: {
// TODO label switch
pop_tok(tokbuf);
LOG_ERROR("unimplemented switch label");
node->label_stmt.label = parse_expr(parser);
// TODO 该表达式为const int
expect_pop_tok(tokbuf, TOKEN_COLON);
node->type = NT_STMT_CASE;
break;
}
case TOKEN_DEFAULT: {
// TODO label switch default
pop_tok(tokbuf);
expect_pop_tok(tokbuf, TOKEN_COLON);
node->type = NT_STMT_DEFAULT;
break;
}
default: {
/**
* exp ;
*/
EXP:
node->expr_stmt.expr_stmt = parse_expr(parser);
flush_peek_tok(tokbuf);
ttype = peek_tok_type(tokbuf);
if (ttype != TOKEN_SEMICOLON) {
LOG_ERROR("exp must end with \";\"");
}
pop_tok(tokbuf);
node->type = NT_STMT_EXPR;
break;
}
}
return node;
}

View File

@ -0,0 +1,51 @@
#include "../ast.h"
#include "../parser.h"
#include "../type.h"
ast_node_t* new_ast_ident_node(tok_t* tok) {
if (tok->sub_type != TOKEN_IDENT) {
LOG_ERROR("syntax error: want identifier but got %d", tok->sub_type);
}
ast_node_t* node = new_ast_node();
node->type = NT_TERM_IDENT;
node->syms.tok = *tok;
node->syms.decl_node = NULL;
return node;
}
ast_node_t* expect_pop_ident(tok_stream_t* tokbuf) {
flush_peek_tok(tokbuf);
tok_t* tok = peek_tok(tokbuf);
ast_node_t* node = new_ast_ident_node(tok);
pop_tok(tokbuf);
return node;
}
ast_node_t* parse_type(parser_t* parser) {
tok_stream_t* tokbuf = &parser->tokbuf;
flush_peek_tok(tokbuf);
cc_tktype_t ttype = peek_tok_type(tokbuf);
data_type_t dtype;
switch(ttype) {
case TOKEN_VOID: dtype = TYPE_VOID; break;
case TOKEN_CHAR: dtype = TYPE_CHAR; break;
case TOKEN_SHORT: dtype = TYPE_SHORT; break;
case TOKEN_INT: dtype = TYPE_INT; break;
case TOKEN_LONG: dtype = TYPE_LONG; break;
case TOKEN_FLOAT: dtype = TYPE_FLOAT; break;
case TOKEN_DOUBLE: dtype = TYPE_DOUBLE; break;
default:
LOG_ERROR("无效的类型说明符");
}
ast_node_t* node = new_ast_node();
node->type = NT_TERM_TYPE;
// TODO added by disable warning, will add typing system
dtype += 1;
pop_tok(tokbuf);
if (peek_tok_type(tokbuf) == TOKEN_MUL) {
pop_tok(tokbuf);
}
return node;
}

View File

@ -0,0 +1,136 @@
// #include "../parser.h"
// #include "../type.h"
// enum TypeParseState {
// TPS_BASE_TYPE, // 解析基础类型 (int/char等)
// TPS_QUALIFIER, // 解析限定符 (const/volatile)
// TPS_POINTER, // 解析指针 (*)
// TPS_ARRAY, // 解析数组维度 ([n])
// TPS_FUNC_PARAMS // 解析函数参数列表
// };
// ast_node_t* parse_type(parser_t* p) {
// ast_node_t* type_root = new_ast_node();
// ast_node_t* current = type_root;
// current->type = NT_TYPE_BASE;
// enum TypeParseState state = TPS_QUALIFIER;
// int pointer_level = 0;
// while (1) {
// tok_type_t t = peektoktype(p);
// switch (state) {
// // 基础类型解析 (int, char等)
// case TPS_BASE_TYPE:
// if (is_base_type(t)) {
// // current->data.data_type = token_to_datatype(t);
// pop_tok(p);
// state = TPS_POINTER;
// } else {
// error("Expected type specifier");
// }
// break;
// // 类型限定符 (const/volatile)
// case TPS_QUALIFIER:
// if (t == TOKEN_CONST || t == TOKEN_VOLATILE) {
// ast_node_t* qual_node = new_ast_node();
// qual_node->type = NT_TYPE_QUAL;
// qual_node->data.data_type = t; // 复用data_type字段存储限定符
// current->child.decl.type = qual_node;
// current = qual_node;
// pop_tok(p);
// } else {
// state = TPS_BASE_TYPE;
// }
// break;
// // 指针解析 (*)
// case TPS_POINTER:
// if (t == TOKEN_MUL) {
// ast_node_t* ptr_node = new_ast_node();
// ptr_node->type = NT_TYPE_PTR;
// current->child.decl.type = ptr_node;
// current = ptr_node;
// pointer_level++;
// pop_tok(p);
// } else {
// state = TPS_ARRAY;
// }
// break;
// // 数组维度 ([n])
// case TPS_ARRAY:
// if (t == TOKEN_L_BRACKET) {
// pop_tok(p); // 吃掉[
// ast_node_t* arr_node = new_ast_node();
// arr_node->type = NT_TYPE_ARRAY;
// // 解析数组大小(仅语法检查)
// if (peektoktype(p) != TOKEN_R_BRACKET) {
// parse_expr(p); // 不计算实际值
// }
// expecttok(p, TOKEN_R_BRACKET);
// current->child.decl.type = arr_node;
// current = arr_node;
// } else {
// state = TPS_FUNC_PARAMS;
// }
// break;
// // 函数参数列表
// case TPS_FUNC_PARAMS:
// if (t == TOKEN_L_PAREN) {
// ast_node_t* func_node = new_ast_node();
// func_node->type = NT_TYPE_FUNC;
// current->child.decl.type = func_node;
// // 解析参数列表(仅结构,不验证类型)
// parse_param_list(p, func_node);
// current = func_node;
// } else {
// return type_root; // 类型解析结束
// }
// break;
// }
// }
// }
// // 判断是否是基础类型
// static int is_base_type(tok_type_t t) {
// return t >= TOKEN_VOID && t <= TOKEN_DOUBLE;
// }
// // // 转换token到数据类型简化版
// // static enum DataType token_to_datatype(tok_type_t t) {
// // static enum DataType map[] = {
// // [TOKEN_VOID] = DT_VOID,
// // [TOKEN_CHAR] = DT_CHAR,
// // [TOKEN_INT] = DT_INT,
// // // ...其他类型映射
// // };
// // return map[t];
// // }
// // 解析参数列表(轻量级)
// static void parse_param_list(parser_t* p, ast_node_t* func) {
// expecttok(p, TOKEN_L_PAREN);
// while (peektoktype(p) != TOKEN_R_PAREN) {
// ast_node_t* param = parse_type(p); // 递归解析类型
// // 允许可选参数名(仅语法检查)
// if (peektoktype(p) == TOKEN_IDENT) {
// pop_tok(p); // 吃掉参数名
// }
// if (peektoktype(p) == TOKEN_COMMA) {
// pop_tok(p);
// }
// }
// expecttok(p, TOKEN_R_PAREN);
// }

View File

@ -0,0 +1,20 @@
#include <lib/core.h>
#include "parser.h"
#include "type.h"
void init_parser(parser_t* parser, cc_lexer_t* lexer, symtab_t* symtab) {
init_lib_core();
parser->cur_node = NULL;
parser->root = NULL;
parser->lexer = lexer;
parser->symtab = symtab;
init_tokbuf(&parser->tokbuf, lexer, (tok_stream_get_func)get_valid_token);
parser->tokbuf.cap = sizeof(parser->TokenBuffer) / sizeof(parser->TokenBuffer[0]);
parser->tokbuf.buf = parser->TokenBuffer;
}
void run_parser(parser_t* parser) {
parse_prog(parser);
}

View File

@ -0,0 +1,23 @@
#ifndef __PARSER_H__
#define __PARSER_H__
#include "../lexer/lexer.h"
#include "symtab/symtab.h"
#include "ast.h"
#define PARSER_MAX_TOKEN_QUEUE 16
typedef struct parser {
ast_node_t* root;
ast_node_t* cur_node;
cc_lexer_t* lexer;
symtab_t* symtab;
tok_stream_t tokbuf;
tok_t TokenBuffer[PARSER_MAX_TOKEN_QUEUE];
int err_level;
} parser_t;
void init_parser(parser_t* parser, cc_lexer_t* lexer, symtab_t* symtab);
void run_parser(parser_t* parser);
#endif

View File

@ -0,0 +1,62 @@
#include "symtab.h"
static u32_t hash_func(const void* _key) {
const symtab_key_t* key = (symtab_key_t*)_key;
return rt_strhash(key->strp_name);
}
static int key_cmp(const void* _key1, const void* _key2) {
const symtab_key_t* key1 = (symtab_key_t*)_key1;
const symtab_key_t* key2 = (symtab_key_t*)_key2;
if (rt_strcmp(key1->strp_name, key2->strp_name) == 0) {
return 0;
}
return 1;
}
void init_symtab(symtab_t* symtab) {
symtab->cur_scope = NULL;
symtab->gid = 1;
init_hashtable(&symtab->global_table);
symtab->global_table.hash_func = hash_func;
symtab->global_table.key_cmp = key_cmp;
init_hashtable(&symtab->local_table);
symtab->local_table.hash_func = hash_func;
symtab->local_table.key_cmp = key_cmp;
}
void symtab_destroy(symtab_t* symtab) {
// TODO
}
void symtab_enter_scope(symtab_t* symtab) {
scope_t *scope = (scope_t*)salloc_alloc(sizeof(scope_t));
scope->parent = symtab->cur_scope;
scope->uid = symtab->gid++;
init_hashtable(&scope->table);
scope->table.hash_func = hash_func;
scope->table.key_cmp = key_cmp;
symtab->cur_scope = scope;
}
void symtab_leave_scope(symtab_t* symtab) {
Assert(symtab->cur_scope != NULL);
scope_t *parent = symtab->cur_scope->parent;
hashtable_destory(&symtab->cur_scope->table);
salloc_free(symtab->cur_scope);
symtab->cur_scope = parent;
}
void* symtab_get(symtab_t* symtab, symtab_key_t* key) {
for (scope_t* scope = symtab->cur_scope; scope != NULL; scope = scope->parent) {
void* val = hashtable_get(&scope->table, key);
if (val != NULL) {
return val;
}
}
return NULL;
}
void* symtab_add(symtab_t* symtab, symtab_key_t* key, void* val) {
return hashtable_set(&symtab->cur_scope->table, key, val);
}

View File

@ -0,0 +1,39 @@
#ifndef __SMCC_SYMTABL_H__
#define __SMCC_SYMTABL_H__
#include <lib/core.h>
#include <lib/utils/ds/hashtable.h>
#include <lib/utils/strpool/strpool.h>
// FIXME 架构上可能有更好的方式解决
typedef struct symtab_key {
const char* strp_name;
int uid;
} symtab_key_t;
typedef struct scope {
int uid;
struct scope* parent;
hash_table_t table;
} scope_t;
typedef struct symtab {
hash_table_t global_table;
hash_table_t local_table;
scope_t* cur_scope;
int gid; // global id for generating unique scope id
} symtab_t;
void init_symtab(symtab_t* symtab);
void symtab_destroy(symtab_t* symtab);
void symtab_enter_scope(symtab_t* symtab);
void symtab_leave_scope(symtab_t* symtab);
void* symtab_get(symtab_t* symtab, symtab_key_t* key);
// WARNING key and val need you save, especially val
void* symtab_add(symtab_t* symtab, symtab_key_t* key, void* val);
#endif

View File

@ -0,0 +1,35 @@
#ifndef __TYPE_H__
#define __TYPE_H__
#include "../lexer/token.h"
typedef enum {
TYPE_VOID,
TYPE_CHAR,
TYPE_SHORT,
TYPE_INT,
TYPE_LONG,
TYPE_LONG_LONG,
TYPE_FLOAT,
TYPE_DOUBLE,
TYPE_LONG_DOUBLE,
// prefix
TYPE_SIGNED,
TYPE_UNSIGNED,
// TYPE_BOOL,
// TYPE_COMPLEX,
// TYPE_IMAGINARY,
TYPE_ENUM,
TYPE_ARRAY,
TYPE_STRUCT,
TYPE_UNION,
TYPE_FUNCTION,
TYPE_POINTER,
TYPE_ATOMIC,
TYPE_TYPEDEF,
} data_type_t;
#endif

View File

@ -0,0 +1,33 @@
# 编译器设置
CC = gcc
AR = ar
CFLAGS = -g -Wall -I../..
IR_DIR = ./ir
# 源文件列表
SRCS = \
middleend.c \
$(IR_DIR)/ir.c \
$(IR_DIR)/ir_ast.c \
$(IR_DIR)/ir_lib.c \
$(IR_DIR)/ir_type.c
# 生成目标文件列表
OBJS = $(SRCS:.c=.o)
# 最终目标
TARGET = libmiddleend.a
all: $(TARGET)
$(TARGET): $(OBJS)
$(AR) rcs $@ $^
%.o: %.c
$(CC) $(CFLAGS) -c -o $@ $<
clean:
rm -f $(OBJS) $(TARGET)
.PHONY: all clean

View File

View File

@ -0,0 +1,152 @@
// ir_core.h
#ifndef IR_CORE_H
#define IR_CORE_H
#include <lib/utils/ds/vector.h>
// 错误码定义
typedef enum {
IR_EC_SUCCESS = 0, // 成功
IR_EC_MEMORY_ERROR, // 内存分配失败
IR_EC_TYPE_MISMATCH, // 类型不匹配
IR_EC_INVALID_OPERAND, // 无效操作数
IR_EC_DUPLICATE_SYMBOL, // 符号重定义
} ir_ecode_t;
typedef struct {
enum {
IR_TYPE_INT32,
IR_TYPE_PTR,
IR_TYPE_ARRAY,
IR_TYPE_FUNC,
IR_TYPE_VOID,
} tag;
union {
struct {
struct ir_type *base;
rt_size_t len;
} arr;
struct {
struct ir_type *ret;
struct ir_type **params;
rt_size_t param_cnt;
} func;
};
} ir_type_t;
typedef struct ir_node ir_node_t;
typedef struct ir_bblock {
const char *label;
vector_header(instrs, ir_node_t*);
// ir_arr_t used_by;
} ir_bblock_t; // basic block
typedef struct {
const char *name;
ir_type_t *type;
vector_header(params, ir_node_t*);
vector_header(bblocks, ir_bblock_t*);
} ir_func_t;
typedef struct {
vector_header(global, ir_node_t*);
vector_header(funcs, ir_func_t*);
vector_header(extern_funcs, ir_func_t*);
} ir_prog_t;
typedef enum ir_node_tag {
IR_NODE_NULL,
IR_NODE_CONST_INT,
IR_NODE_ALLOC,
IR_NODE_LOAD,
IR_NODE_STORE,
IR_NODE_GET_PTR,
IR_NODE_OP,
IR_NODE_BRANCH,
IR_NODE_JUMP,
IR_NODE_CALL,
IR_NODE_RET,
} ir_node_tag_t;
struct ir_node {
const ir_type_t* type;
const char* name;
vector_header(used_by, ir_node_t*);
ir_node_tag_t tag;
union {
struct {
int32_t val;
} const_int;
struct {
ir_node_t* target;
} load;
struct {
ir_node_t* target;
ir_node_t* value;
} store;
struct {
ir_node_t* src_addr;
ir_node_t* offset;
} get_ptr;
struct {
enum {
/// Not equal to.
IR_OP_NEQ,
/// Equal to.
IR_OP_EQ,
/// Greater than.
IR_OP_GT,
/// Less than.
IR_OP_LT,
/// Greater than or equal to.
IR_OP_GE,
/// Less than or equal to.
IR_OP_LE,
/// Addition.
IR_OP_ADD,
/// Subtraction.
IR_OP_SUB,
/// Multiplication.
IR_OP_MUL,
/// Division.
IR_OP_DIV,
/// Modulo.
IR_OP_MOD,
/// Bitwise AND.
IR_OP_AND,
/// Bitwise OR.
IR_OP_OR,
/// Bitwise XOR.
IR_OP_XOR,
/// Bitwise NOT.
IR_OP_NOT,
/// Shift left logical.
IR_OP_SHL,
/// Shift right logical.
IR_OP_SHR,
/// Shift right arithmetic.
IR_OP_SAR,
} op;
ir_node_t* lhs;
ir_node_t* rhs;
} op;
struct {
ir_node_t* cond;
ir_bblock_t* true_bblock;
ir_bblock_t* false_bblock;
} branch;
struct {
ir_bblock_t* target_bblock;
} jump;
struct {
ir_func_t* callee;
vector_header(args, ir_node_t*);
} call;
struct {
ir_node_t* ret_val;
} ret;
} data;
};
#endif // IR_CORE_H

View File

@ -0,0 +1,446 @@
#include "ir.h"
#include "ir_lib.h"
#include "ir_type.h"
#include "../../frontend/frontend.h"
#include "../../frontend/parser/ast.h"
// 上下文结构,记录生成过程中的状态
typedef struct {
ir_func_t* cur_func; // 当前处理的函数
ir_bblock_t* cur_block; // 当前基本块
} IRGenContext;
IRGenContext ctx;
ir_prog_t prog;
void _gen_ir_from_ast(ast_node_t* node);
static void emit_instr(ir_bblock_t* block, ir_node_t* node) {
if (block == NULL) block = ctx.cur_block;
vector_push(block->instrs, node);
// return &(vector_at(block->instrs, block->instrs.size - 1));
}
static ir_node_t* emit_br(ir_node_t* cond, ir_bblock_t* trueb, ir_bblock_t* falseb) {
ir_node_t* br = new_ir_node(NULL, IR_NODE_BRANCH);
emit_instr(NULL, br);
br->data.branch.cond = cond;
br->data.branch.true_bblock = trueb;
br->data.branch.false_bblock = falseb;
return br;
}
static ir_node_t* gen_ir_expr(ast_node_t* node);
static ir_node_t* gen_ir_term(ast_node_t* node) {
switch (node->type) {
case NT_TERM_VAL: {
ir_node_t* ir = new_ir_node(NULL, IR_NODE_CONST_INT);
ir->data.const_int.val = node->syms.tok.val.i;
return ir;
}
case NT_TERM_IDENT: {
ir_node_t* decl = node->syms.decl_node->decl_val.data;
return decl;
}
case NT_TERM_CALL: {
ir_node_t* call = new_ir_node(NULL, IR_NODE_CALL);
call->data.call.callee = node->call.func_decl->decl_func.def->func.data;
for (int i = 0; i < node->call.params->params.params.size; i++) {
ast_node_t* param = vector_at(node->call.params->params.params, i);
ir_node_t *tmp = gen_ir_expr(param);
vector_push(call->data.call.args, tmp);
}
emit_instr(NULL, call);
return call;
}
default: {
Panic("gen_ir_expr: unknown node type");
}
}
TODO();
return NULL;
}
static ir_node_t* gen_ir_expr(ast_node_t* node) {
// term node
switch (node->type) {
case NT_TERM_VAL:
case NT_TERM_IDENT:
case NT_TERM_CALL:
return gen_ir_term(node);
default:
break;
}
ir_node_t* lhs = gen_ir_expr(node->expr.left);
ir_node_t* rhs = node->expr.right ? gen_ir_expr(node->expr.right) : NULL;
if (node->type == NT_COMMA) {
return rhs;
}
ir_node_t* instr = NULL;
vector_push(lhs->used_by, instr);
if (rhs) { vector_push(rhs->used_by, instr); }
ir_node_t* ret;
#define BINOP(operand) do { \
instr = new_ir_node(NULL, IR_NODE_OP); \
instr->data.op.op = operand; \
instr->data.op.lhs = lhs; \
instr->data.op.rhs = rhs; \
ret = instr; \
} while (0)
switch (node->type) {
case NT_ADD: {
// (expr) + (expr)
BINOP(IR_OP_ADD); break;
}
case NT_SUB: {
// (expr) - (expr)
BINOP(IR_OP_SUB); break;
}
case NT_MUL: {
// (expr) * (expr)
BINOP(IR_OP_MUL); break;
}
case NT_DIV: {
// (expr) / (expr)
BINOP(IR_OP_DIV); break;
}
case NT_MOD: {
// (expr) % (expr)
BINOP(IR_OP_MOD); break;
}
case NT_AND: {
// (expr) & (expr)
BINOP(IR_OP_AND); break;
}
case NT_OR: {
// (expr) | (expr)
BINOP(IR_OP_OR); break;
}
case NT_XOR: {
// (expr) ^ (expr)
BINOP(IR_OP_XOR); break;
}
case NT_BIT_NOT: {
// ~ (expr)
// TODO
// BINOP(IR_OP_NOT);
break;
}
case NT_L_SH: {
// (expr) << (expr)
BINOP(IR_OP_SHL);
break;
}
case NT_R_SH: {
// (expr) >> (expr)
BINOP(IR_OP_SHR); // Shift right logical.
// TODO
// BINOP(IR_OP_SAR); // Shift right arithmetic.
break;
}
case NT_EQ: {
// (expr) == (expr)
BINOP(IR_OP_EQ); break;
}
case NT_NEQ: {
// (expr) != (expr)
BINOP(IR_OP_NEQ); break;
}
case NT_LE: {
// (expr) <= (expr)
BINOP(IR_OP_LE); break;
}
case NT_GE: {
// (expr) >= (expr)
BINOP(IR_OP_GE); break;
}
case NT_LT: {
// (expr) < (expr)
BINOP(IR_OP_LT); break;
}
case NT_GT: {
// (expr) > (expr)
BINOP(IR_OP_GE); break;
}
case NT_AND_AND:// (expr) && (expr)
LOG_ERROR("unimpliment");
break;
case NT_OR_OR:// (expr) || (expr)
LOG_ERROR("unimpliment");
break;
case NT_NOT: {
// ! (expr)
instr = new_ir_node(NULL, IR_NODE_OP);
instr->data.op.op = IR_OP_EQ,
instr->data.op.lhs = &node_zero,
instr->data.op.rhs = lhs,
ret = instr;
break;
}
case NT_ASSIGN: {
// (expr) = (expr)
instr = new_ir_node(NULL, IR_NODE_STORE);
instr->data.store.target = lhs;
instr->data.store.value = rhs;
ret = rhs;
break;
}
// case NT_COND: // (expr) ? (expr) : (expr)
default: {
// TODO self error msg
LOG_ERROR("Unsupported IR generation for AST node type %d", node->type);
break;
}
}
emit_instr(NULL, instr);
return ret;
}
static void gen_ir_func(ast_node_t* node, ir_func_t* func) {
Assert(node->type == NT_FUNC);
ir_bblock_t *entry = new_ir_bblock("entry");
vector_push(func->bblocks, entry);
vector_push(prog.funcs, func);
IRGenContext prev_ctx = ctx;
ctx.cur_func = func;
ctx.cur_block = entry;
ast_node_t* params = node->func.decl->decl_func.params;
for (int i = 0; i < params->params.params.size; i ++) {
ast_node_t* param = params->params.params.data[i];
ir_node_t* decl = new_ir_node(param->decl_val.name->syms.tok.val.str, IR_NODE_ALLOC);
emit_instr(entry, decl);
vector_push(func->params, decl);
// TODO Typing system
decl->type = &type_i32;
param->decl_val.data = decl;
}
_gen_ir_from_ast(node->func.body);
ctx = prev_ctx;
}
void gen_ir_jmp(ast_node_t* node) {
ir_bblock_t *bblocks[3];
for (int i = 0; i < sizeof(bblocks)/sizeof(bblocks[0]); i++) {
bblocks[i] = new_ir_bblock(NULL);
vector_push(ctx.cur_func->bblocks, bblocks[i]);
}
#define NEW_IR_JMP(name, block) do { \
name = new_ir_node(NULL, IR_NODE_JUMP); \
name->data.jump.target_bblock = block; \
} while (0)
switch (node->type) {
case NT_STMT_IF: {
ir_bblock_t* trueb = bblocks[0];
trueb->label = "if_true";
ir_bblock_t* falseb = bblocks[1];
falseb->label = "if_false";
ir_bblock_t* endb = bblocks[2];
endb->label = "if_end";
ir_node_t* jmp;
// cond
ir_node_t *cond = gen_ir_expr(node->if_stmt.cond);
emit_br(cond, trueb, falseb);
// true block
ctx.cur_block = trueb;
_gen_ir_from_ast(node->if_stmt.if_stmt);
// else block
if (node->if_stmt.else_stmt != NULL) {
ctx.cur_block = falseb;
_gen_ir_from_ast(node->if_stmt.else_stmt);
ir_node_t* jmp;
ctx.cur_block = endb;
NEW_IR_JMP(jmp, ctx.cur_block);
emit_instr(falseb, jmp);
} else {
ctx.cur_block = falseb;
}
NEW_IR_JMP(jmp, ctx.cur_block);
emit_instr(trueb, jmp);
break;
}
case NT_STMT_WHILE: {
ir_bblock_t* entryb = bblocks[0];
ir_bblock_t* bodyb = bblocks[1];
ir_bblock_t* endb = bblocks[2];
ir_node_t* entry;
NEW_IR_JMP(entry, entryb);
emit_instr(NULL, entry);
// Entry:
ctx.cur_block = entryb;
ir_node_t *cond = gen_ir_expr(node->while_stmt.cond);
emit_br(cond, bodyb, endb);
// Body:
ir_node_t* jmp;
ctx.cur_block = bodyb;
_gen_ir_from_ast(node->while_stmt.body);
NEW_IR_JMP(jmp, entryb);
emit_instr(NULL, jmp);
// End:
ctx.cur_block = endb;
break;
}
case NT_STMT_DOWHILE: {
ir_bblock_t* entryb = bblocks[0];
ir_bblock_t* bodyb = bblocks[1];
ir_bblock_t* endb = bblocks[2];
ir_node_t* entry;
NEW_IR_JMP(entry, bodyb);
emit_instr(NULL, entry);
// Body:
ctx.cur_block = bodyb;
_gen_ir_from_ast(node->do_while_stmt.body);
ir_node_t* jmp;
NEW_IR_JMP(jmp, entryb);
emit_instr(NULL, jmp);
// Entry:
ctx.cur_block = entryb;
ir_node_t *cond = gen_ir_expr(node->do_while_stmt.cond);
emit_br(cond, bodyb, endb);
// End:
ctx.cur_block = endb;
break;
}
case NT_STMT_FOR: {
ir_bblock_t* entryb = bblocks[0];
ir_bblock_t* bodyb = bblocks[1];
ir_bblock_t* endb = bblocks[2];
if (node->for_stmt.init) {
_gen_ir_from_ast(node->for_stmt.init);
}
ir_node_t* entry;
NEW_IR_JMP(entry, entryb);
emit_instr(NULL, entry);
// Entry:
ctx.cur_block = entryb;
if (node->for_stmt.cond) {
ir_node_t *cond = gen_ir_expr(node->for_stmt.cond);
emit_br(cond, bodyb, endb);
} else {
ir_node_t* jmp;
NEW_IR_JMP(jmp, bodyb);
}
// Body:
ctx.cur_block = bodyb;
_gen_ir_from_ast(node->for_stmt.body);
if (node->for_stmt.iter) {
gen_ir_expr(node->for_stmt.iter);
}
ir_node_t* jmp;
NEW_IR_JMP(jmp, entryb);
emit_instr(NULL, jmp);
// End:
ctx.cur_block = endb;
break;
}
default:
LOG_ERROR("ir jmp can't hit here");
}
}
ir_prog_t* gen_ir_from_ast(ast_node_t* root) {
Assert(root->type == NT_ROOT);
for (int i = 0; i < root->root.children.size; i ++) {
_gen_ir_from_ast(root->root.children.data[i]);
}
// _gen_ir_from_ast(root);
return &prog;
}
void _gen_ir_from_ast(ast_node_t* node) {
switch (node->type) {
case NT_DECL_FUNC: {
ir_func_t* func = new_ir_func(node->decl_func.name->syms.tok.val.str, &type_i32);
if (node->decl_func.def == NULL) {
ast_node_t* def = new_ast_node();
def->func.body = NULL;
def->func.decl = node;
node->decl_func.def = def;
vector_push(prog.extern_funcs, func);
}
node->decl_func.def->func.data = func;
break;
}
case NT_FUNC: {
gen_ir_func(node, node->func.data);
break;
}
case NT_STMT_RETURN: {
ir_node_t* ret = NULL;
if (node->return_stmt.expr_stmt != NULL) {
ret = gen_ir_expr(node->return_stmt.expr_stmt);
}
ir_node_t* ir = new_ir_node(NULL, IR_NODE_RET);
ir->data.ret.ret_val = ret;
emit_instr(NULL, ir);
ir_bblock_t* block = new_ir_bblock(NULL);
ctx.cur_block = block;
vector_push(ctx.cur_func->bblocks, block);
break;
}
case NT_STMT_BLOCK: {
_gen_ir_from_ast(node->block_stmt.block);
break;
}
case NT_BLOCK: {
for (int i = 0; i < node->block.children.size; i ++) {
_gen_ir_from_ast(node->block.children.data[i]);
}
break;
}
case NT_STMT_IF:
case NT_STMT_WHILE:
case NT_STMT_DOWHILE:
case NT_STMT_FOR:
gen_ir_jmp(node);
break;
case NT_DECL_VAR: {
ir_node_t* ir = new_ir_node(node->decl_val.name->syms.tok.val.str, IR_NODE_ALLOC);
emit_instr(NULL, ir);
// TODO Typing system
ir->type = &type_i32;
node->decl_val.data = ir;
if (node->decl_val.expr_stmt != NULL) {
_gen_ir_from_ast(node->decl_val.expr_stmt);
}
break;
}
case NT_STMT_EXPR: {
gen_ir_expr(node->expr_stmt.expr_stmt);
break;
}
case NT_STMT_EMPTY: {
break;
}
default:
// TODO: 错误处理
LOG_ERROR("unknown node type");
break;
}
}

View File

@ -0,0 +1,8 @@
#ifndef __IR_AST_H__
#define __IR_AST_H__
#include "ir.h"
typedef struct ast_node ast_node_t;
ir_prog_t* gen_ir_from_ast(ast_node_t* node);
#endif //

View File

@ -0,0 +1,76 @@
#include "ir.h"
#include "ir_lib.h"
#include "ir_type.h"
#include <stdio.h>
#include <assert.h>
typedef struct ir_dump {
FILE* fp;
} ir_dump_t;
void dump_ir_node(ir_node_t* node, ir_dump_t* dump) {
fprintf(dump->fp, "%%%p", node);
switch (node->tag) {
case IR_NODE_ALLOC: {
node->type = NULL;
// fprintf(dump->fp, "%p\n", );
break;
}
case IR_NODE_BRANCH: {
node->data.branch.cond = NULL;
node->data.branch.true_bblock = NULL;
node->data.branch.false_bblock = NULL;
break;
}
case IR_NODE_CALL: {
vector_init(node->data.call.args);
node->data.call.callee = NULL;
break;
}
case IR_NODE_CONST_INT: {
node->data.const_int.val = 0;
break;
}
case IR_NODE_JUMP: {
node->data.jump.target_bblock = NULL;
break;
}
case IR_NODE_LOAD: {
node->data.load.target = NULL;
break;
}
case IR_NODE_STORE: {
node->data.store.target = NULL;
node->data.store.value = NULL;
break;
}
case IR_NODE_OP: {
node->data.op.op = 0;
node->data.op.lhs = NULL;
node->data.op.rhs = NULL;
break;
}
case IR_NODE_RET: {
node->data.ret.ret_val = NULL;
break;
}
case IR_NODE_GET_PTR: {
}
default: {
assert(0);
}
}
}
void dump_ir_bblock(ir_bblock_t* block) {
}
void dump_ir_func(ir_func_t* func) {
}
void dump_ir_prog(ir_prog_t* prog) {
}

View File

@ -0,0 +1,118 @@
#include "ir.h"
// FIXME using stdlib.h
#include <stdlib.h>
static int total_alloc = 0;
typedef union ir_alloc_item {
ir_node_t node;
ir_bblock_t bblock;
ir_func_t func;
ir_prog_t prog;
} ir_alloc_item_t;
ir_alloc_item_t* alloc_item() {
return malloc(sizeof(ir_alloc_item_t));
}
void free_item(ir_alloc_item_t* item) {
return free(item);
}
ir_node_t* new_ir_node(const char* name, ir_node_tag_t tag) {
ir_node_t* node = (ir_node_t*)alloc_item();
node->name = name;
node->type = NULL;
node->tag = tag;
switch (tag) {
case IR_NODE_ALLOC: {
node->type = NULL;
break;
}
case IR_NODE_BRANCH: {
node->data.branch.cond = NULL;
node->data.branch.true_bblock = NULL;
node->data.branch.false_bblock = NULL;
break;
}
case IR_NODE_CALL: {
vector_init(node->data.call.args);
node->data.call.callee = NULL;
break;
}
case IR_NODE_CONST_INT: {
node->data.const_int.val = 0;
break;
}
case IR_NODE_JUMP: {
node->data.jump.target_bblock = NULL;
break;
}
case IR_NODE_LOAD: {
node->data.load.target = NULL;
break;
}
case IR_NODE_STORE: {
node->data.store.target = NULL;
node->data.store.value = NULL;
break;
}
case IR_NODE_OP: {
node->data.op.op = 0;
node->data.op.lhs = NULL;
node->data.op.rhs = NULL;
break;
}
case IR_NODE_RET: {
node->data.ret.ret_val = NULL;
break;
}
case IR_NODE_GET_PTR: {
}
default: {
exit(0);
}
}
vector_init(node->used_by);
return node;
}
void free_irnode() {
}
ir_bblock_t* new_ir_bblock(const char* name) {
ir_bblock_t* block = (ir_bblock_t*)alloc_item();
block->label = name;
vector_init(block->instrs);
return block;
}
void free_irbblock() {
}
ir_func_t* new_ir_func(const char* name, ir_type_t* type) {
ir_func_t* func = (ir_func_t*)alloc_item();
func->name = name;
func->type = type;
vector_init(func->params);
vector_init(func->bblocks);
return func;
}
void free_irfunc() {
}
ir_prog_t* new_ir_prog() {
ir_prog_t* prog = (ir_prog_t*)alloc_item();
vector_init(prog->global);
vector_init(prog->funcs);
vector_init(prog->extern_funcs);
return prog;
}
void free_irprog() {
}

View File

@ -0,0 +1,9 @@
#ifndef __IR_LIB_H__
#define __IR_LIB_H__
#include "ir.h"
ir_node_t* new_ir_node(const char* name, ir_node_tag_t tag);
ir_bblock_t* new_ir_bblock(const char* name);
ir_func_t* new_ir_func(const char* name, ir_type_t* type);
#endif

View File

@ -0,0 +1,12 @@
#include "ir.h"
ir_type_t type_i32 = {
.tag = IR_TYPE_INT32,
};
ir_node_t node_zero = {
.tag = IR_NODE_CONST_INT,
.data.const_int = {
.val = 0,
},
};

View File

@ -0,0 +1,8 @@
#ifndef __IR_TYPE_H__
#define __IR_TYPE_H__
#include "ir.h"
extern ir_type_t type_i32;
extern ir_node_t node_zero;
#endif

View File

@ -0,0 +1,5 @@
#include "middleend.h"
ir_prog_t* cc_middleend(ast_node_t* root, cc_midend_conf_t* conf) {
return gen_ir_from_ast(root);
}

View File

@ -0,0 +1,13 @@
#ifndef __SMCC_CC_MIDDLEEND_H__
#define __SMCC_CC_MIDDLEEND_H__
#include "ir/ir.h"
#include "ir/ir_ast.h"
typedef struct cc_midend_conf {
// cc_arch_t arch;
} cc_midend_conf_t;
// TODO add some feature to cc_middleend like optimization
ir_prog_t* cc_middleend(ast_node_t* root, cc_midend_conf_t* conf);
#endif // __SMCC_MIDDLEEND_H__

View File

View File

@ -0,0 +1,8 @@
#ifndef __REG_ALLOC_H__
#define __REG_ALLOC_H__
typedef struct {
} reg_alloc_t;
#endif