init basic

This commit is contained in:
ZZY
2025-03-05 15:45:19 +08:00
commit 09299e339c
42 changed files with 5752 additions and 0 deletions

View File

@ -0,0 +1,18 @@
- ast.c 作为抽象语法树的定义
- block.c 作为块的实现主要用于处理作用域,需要符号表
- decl.c 作为声明的实现,其中主要携带变量声明,函数声明见 func.c ,需要符号表
- func.c 作为函数的实现,其中主要携带函数声明,以及函数定义,需要符号表
- expr.c 作为表达式的实现。需要符号表
- stmt.c 作为语句的实现。需要表达式类型判断合法性
- term.c 作为终结符的实现。需要表达式类型判断合法性
- program.c 作为词法分析语义分析入口函数可以根据parser结构生成AST
其中stmt参考cppreference
其中expr参考AI以及CParser

View File

@ -0,0 +1,173 @@
#include "ast.h"
#include "../parser.h"
struct ASTNode* new_ast_node(void) {
struct ASTNode* node = xmalloc(sizeof(struct ASTNode));
init_ast_node(node);
return node;
}
void init_ast_node(struct ASTNode* node) {
node->type = NT_INIT;
for (int i = 0; i < sizeof(node->children) / sizeof(node->children[0]); i++) {
node->children[i] = NULL;
}
}
struct ASTNode* find_ast_node(struct ASTNode* node, enum ASTType type) {
}
#include <stdio.h>
static void pnt_depth(int depth) {
for (int i = 0; i < depth; i++) {
printf(" ");
}
}
void pnt_ast(struct ASTNode* node, int depth) {
if (!node) return;
pnt_depth(depth);
switch (node->type) {
case NT_ROOT:
for (int i = 0; i < node->root.child_size; i++) {
pnt_ast(node->root.children[i], depth);
}
return;
case NT_ADD : printf("+ \n"); break; // (expr) + (expr)
case NT_SUB : printf("- \n"); break; // (expr) - (expr)
case NT_MUL : printf("* \n"); break; // (expr) * (expr)
case NT_DIV : printf("/ \n"); break; // (expr) / (expr)
case NT_MOD : printf("%%\n"); break; // (expr) % (expr)
case NT_AND : printf("& \n"); break; // (expr) & (expr)
case NT_OR : printf("| \n"); break; // (expr) | (expr)
case NT_XOR : printf("^ \n"); break; // (expr) ^ (expr)
case NT_L_SH : printf("<<\n"); break; // (expr) << (expr)
case NT_R_SH : printf(">>\n"); break; // (expr) >> (expr)
case NT_EQ : printf("==\n"); break; // (expr) == (expr)
case NT_NEQ : printf("!=\n"); break; // (expr) != (expr)
case NT_LE : printf("<=\n"); break; // (expr) <= (expr)
case NT_GE : printf(">=\n"); break; // (expr) >= (expr)
case NT_LT : printf("< \n"); break; // (expr) < (expr)
case NT_GT : printf("> \n"); break; // (expr) > (expr)
case NT_AND_AND : printf("&&\n"); break; // (expr) && (expr)
case NT_OR_OR : printf("||\n"); break; // (expr) || (expr)
case NT_NOT : printf("! \n"); break; // ! (expr)
case NT_BIT_NOT : printf("~ \n"); break; // ~ (expr)
case NT_COMMA : printf(", \n"); break; // expr, expr 逗号运算符
case NT_ASSIGN : printf("= \n"); break; // (expr) = (expr)
// case NT_COND : // (expr) ? (expr) : (expr)
case NT_STMT_EMPTY : // ;
printf(";\n");
break;
case NT_STMT_IF : // if (cond) { ... } [else {...}]
printf("if");
pnt_ast(node->if_stmt.cond, depth+1);
pnt_ast(node->if_stmt.if_stmt, depth+1);
if (node->if_stmt.else_stmt) {
pnt_depth(depth);
printf("else");
pnt_ast(node->if_stmt.else_stmt, depth+1);
}
break;
case NT_STMT_WHILE : // while (cond) { ... }
printf("while\n");
pnt_ast(node->while_stmt.cond, depth+1);
pnt_ast(node->while_stmt.body, depth+1);
break;
case NT_STMT_DOWHILE : // do {...} while (cond)
printf("do-while\n");
pnt_ast(node->do_while_stmt.body, depth+1);
pnt_ast(node->do_while_stmt.cond, depth+1);
break;
case NT_STMT_FOR : // for (init; cond; iter) {...}
printf("for\n");
if (node->for_stmt.init)
pnt_ast(node->for_stmt.init, depth+1);
if (node->for_stmt.cond)
pnt_ast(node->for_stmt.cond, depth+1);
if (node->for_stmt.iter)
pnt_ast(node->for_stmt.iter, depth+1);
pnt_ast(node->for_stmt.body, depth+1);
break;
case NT_STMT_SWITCH : // switch (expr) { case ... }
case NT_STMT_BREAK : // break;
case NT_STMT_CONTINUE : // continue;
case NT_STMT_GOTO : // goto label;
case NT_STMT_CASE : // case const_expr:
case NT_STMT_DEFAULT : // default:
case NT_STMT_LABEL : // label:
break;
case NT_STMT_BLOCK : // { ... }
printf("{\n");
for (int i = 0; i < node->block.child_size; i++) {
pnt_ast(node->block.children[i], depth+1);
}
pnt_depth(depth);
printf("}\n");
break;
case NT_STMT_RETURN : // return expr;
printf("return");
if (node->return_stmt.expr_stmt) {
printf(" ");
pnt_ast(node->return_stmt.expr_stmt, depth+1);
} else {
printf("\n");
}
break;
case NT_STMT_EXPR : // expr;
printf("stmt\n");
pnt_ast(node->expr_stmt.expr_stmt, depth);
pnt_depth(depth);
printf(";\n");
break;
case NT_DECL_VAR : // type name; or type name = expr;
printf("decl_val\n");
break;
case NT_DECL_FUNC: // type func_name(param_list);
printf("decl func %s\n", node->func.name->syms.tok.constant.str);
break;
case NT_FUNC : // type func_name(param_list) {...}
printf("def func %s\n", node->func.name->syms.tok.constant.str);
// pnt_ast(node->child.func.params, depth);
pnt_ast(node->func.body, depth);
// pnt_ast(node->child.func.ret, depth);
break;
case NT_PARAM : // 函数形参
printf("param\n");
case NT_ARG_LIST : // 实参列表需要与NT_CALL配合
printf("arg_list\n");
case NT_TERM_CALL : // func (expr)
printf("call\n");
break;
case NT_TERM_IDENT:
printf("%s\n", node->syms.tok.constant.str);
break;
case NT_TERM_VAL : // Terminal Symbols like constant, identifier, keyword
struct Token * tok = &node->syms.tok;
switch (tok->type) {
case TOKEN_CHAR_LITERAL:
printf("%c\n", tok->constant.ch);
break;
case TOKEN_INT_LITERAL:
printf("%d\n", tok->constant.i);
break;
case TOKEN_STRING_LITERAL:
printf("%s\n", tok->constant.str);
break;
default:
printf("unknown term val\n");
break;
}
default:
break;
}
// 通用子节点递归处理
if (node->type <= NT_ASSIGN) { // 表达式类统一处理子节点
if (node->expr.left) pnt_ast(node->expr.left, depth+1);
if (node->expr.right) pnt_ast(node->expr.right, depth + 1);
}
}

View File

@ -0,0 +1,191 @@
#ifndef __AST_H__
#define __AST_H__
#include "../../frontend.h"
#include "../../lexer/lexer.h"
#include "../type.h"
enum ASTType {
NT_INIT,
NT_ROOT, // global scope in root node
NT_ADD, // (expr) + (expr)
NT_SUB, // (expr) - (expr)
NT_MUL, // (expr) * (expr)
NT_DIV, // (expr) / (expr)
NT_MOD, // (expr) % (expr)
NT_AND, // (expr) & (expr)
NT_OR, // (expr) | (expr)
NT_XOR, // (expr) ^ (expr)
NT_L_SH, // (expr) << (expr)
NT_R_SH, // (expr) >> (expr)
NT_EQ, // (expr) == (expr)
NT_NEQ, // (expr) != (expr)
NT_LE, // (expr) <= (expr)
NT_GE, // (expr) >= (expr)
NT_LT, // (expr) < (expr)
NT_GT, // (expr) > (expr)
NT_AND_AND, // (expr) && (expr)
NT_OR_OR, // (expr) || (expr)
NT_NOT, // ! (expr)
NT_BIT_NOT, // ~ (expr)
NT_COND, // (expr) ? (expr) : (expr)
NT_COMMA, // expr, expr 逗号运算符
NT_ASSIGN, // (expr) = (expr)
NT_ADDRESS, // &expr (取地址)
NT_DEREF, // *expr (解引用)
NT_INDEX, // arr[index] (数组访问)
NT_MEMBER, // struct.member
NT_PTR_MEMBER,// ptr->member
NT_CAST, // (type)expr 强制类型转换
NT_SIZEOF, // sizeof(type|expr)
// NT_ALIGNOF, // _Alignof(type) (C11)
NT_STMT_EMPTY, // ;
NT_STMT_IF, // if (cond) { ... } [else {...}]
NT_STMT_WHILE, // while (cond) { ... }
NT_STMT_DOWHILE, // do {...} while (cond)
NT_STMT_FOR, // for (init; cond; iter) {...}
NT_STMT_SWITCH, // switch (expr) { case ... }
NT_STMT_BREAK, // break;
NT_STMT_CONTINUE, // continue;
NT_STMT_GOTO, // goto label;
NT_STMT_CASE, // case const_expr:
NT_STMT_DEFAULT, // default:
NT_STMT_LABEL, // label:
NT_STMT_BLOCK, // { ... }
NT_STMT_RETURN, // return expr;
NT_STMT_EXPR, // expr;
NT_BLOCK,
// NT_TYPE_BASE, // 基础类型节点
// NT_TYPE_PTR, // 指针类型
// NT_TYPE_ARRAY, // 数组类型
// NT_TYPE_FUNC, // 函数类型
// NT_TYPE_QUAL, // 限定符节点
NT_DECL_VAR, // type name; or type name = expr;
NT_DECL_FUNC, // type func_name(param_list);
NT_FUNC, // type func_name(param_list) {...}
NT_PARAM, // 函数形参
NT_ARG_LIST, // 实参列表需要与NT_CALL配合
NT_TERM_CALL, // func (expr)
NT_TERM_VAL,
NT_TERM_IDENT,
NT_TERM_TYPE,
};
struct ASTNode {
enum ASTType type;
union {
void *children[6];
struct {
struct ASTNode** children;
int child_size;
} root;
struct {
struct ASTNode** children; // array of children
int child_size;
} block;
struct {
struct ASTNode* decl_node;
struct Token tok;
} syms;
struct {
struct ASTNode *arr;
int size;
} params;
struct {
const char* name;
struct ASTNode* params;
struct ASTNode* func_decl;
} call;
struct {
struct ASTNode *type;
struct ASTNode *name;
struct ASTNode *expr_stmt; // optional
void* data;
} decl_val;
struct {
struct ASTNode *ret;
struct ASTNode *name;
struct ASTNode *params; // array of params
void* data;
} func_decl;
struct {
struct ASTNode *ret;
struct ASTNode *name;
struct ASTNode *params; // array of params
struct ASTNode *body; // optional
} func;
struct {
struct ASTNode *left;
struct ASTNode *right;
struct ASTNode *optional; // optional
} expr;
struct {
struct ASTNode *cond;
struct ASTNode *if_stmt;
struct ASTNode *else_stmt; // optional
} if_stmt;
struct {
struct ASTNode *cond;
struct ASTNode *body;
} switch_stmt;
struct {
struct ASTNode *cond;
struct ASTNode *body;
} while_stmt;
struct {
struct ASTNode *body;
struct ASTNode *cond;
} do_while_stmt;
struct {
struct ASTNode *init;
struct ASTNode *cond; // optional
struct ASTNode *iter; // optional
struct ASTNode *body;
} for_stmt;
struct {
struct ASTNode *expr_stmt; // optional
} return_stmt;
struct {
struct ASTNode *label;
} goto_stmt;
struct {
struct ASTNode *label;
} label_stmt;
struct {
struct ASTNode *block;
} block_stmt;
struct {
struct ASTNode *expr_stmt;
} expr_stmt;
};
};
struct ASTNode* new_ast_node(void);
void init_ast_node(struct ASTNode* node);
void pnt_ast(struct ASTNode* node, int depth);
struct Parser;
typedef struct ASTNode* (*parse_func_t) (struct Parser*);
void parse_prog(struct Parser* parser);
struct ASTNode* parse_block(struct Parser* parser);
struct ASTNode* parse_stmt(struct Parser* parser);
struct ASTNode* parse_expr(struct Parser* parser);
struct ASTNode* parse_func(struct Parser* parser);
struct ASTNode* parse_decl(struct Parser* parser);
struct ASTNode* parse_ident(struct Parser* parser);
struct ASTNode* parse_type(struct Parser* parser);
int peek_decl(struct Parser* parser);
struct ASTNode* parser_ident_without_pop(struct Parser* parser);
#endif

View File

@ -0,0 +1,50 @@
#include "../parser.h"
#include "ast.h"
#include "../symtab/symtab.h"
#ifndef BLOCK_MAX_NODE
#define BLOCK_MAX_NODE (1024)
#endif
struct ASTNode* parse_block(struct Parser* parser) {
symtab_enter_scope(parser->symtab);
// parse_decl(parser); // decl_var
enum TokenType ttype;
struct ASTNode* node = new_ast_node();
node->type = NT_BLOCK;
flushpeektok(parser);
ttype = peektoktype(parser);
if (ttype != TOKEN_L_BRACE) {
error("block need '{' start");
}
poptok(parser);
node->block.children = malloc(sizeof(struct ASTNode*) * BLOCK_MAX_NODE);
struct ASTNode* child = NULL;
while (1) {
if (peek_decl(parser) == 1) {
child = parse_decl(parser);
goto ADD_CHILD;
}
flushpeektok(parser);
ttype = peektoktype(parser);
switch (ttype) {
case TOKEN_R_BRACE:
poptok(parser);
goto END;
default:
child = parse_stmt(parser);
goto ADD_CHILD;
break;
}
continue;
ADD_CHILD:
node->block.children[node->block.child_size++] = child;
}
END:
symtab_leave_scope(parser->symtab);
return node;
}

View File

@ -0,0 +1,94 @@
#include "../parser.h"
#include "ast.h"
#include "../symtab/symtab.h"
/**
* 0 false
* 1 true
*/
int peek_decl(struct Parser* parser) {
flushpeektok(parser);
switch (peektoktype(parser)) {
case TOKEN_STATIC:
case TOKEN_EXTERN:
case TOKEN_REGISTER:
case TOKEN_TYPEDEF:
error("not impliment");
break;
default:
flushpeektok(parser);
}
switch (peektoktype(parser)) {
case TOKEN_VOID:
case TOKEN_CHAR:
case TOKEN_SHORT:
case TOKEN_INT:
case TOKEN_LONG:
case TOKEN_FLOAT:
case TOKEN_DOUBLE:
return 1;
default:
flushpeektok(parser);
}
}
struct ASTNode* parse_decl_val(struct Parser* parser) {
flushpeektok(parser);
// parse_type
enum TokenType ttype;
struct ASTNode* node;
struct ASTNode* type_node = parse_type(parser);
struct ASTNode* name_node = parser_ident_without_pop(parser);
node = new_ast_node();
node->decl_val.type = type_node;
node->decl_val.name = name_node;
node->type = NT_DECL_VAR;
symtab_add_symbol(parser->symtab, name_node->syms.tok.constant.str, node);
ttype = peektoktype(parser);
if (ttype == TOKEN_ASSIGN) {
node->decl_val.expr_stmt = parse_stmt(parser);
if (node->decl_val.expr_stmt->type != NT_STMT_EXPR) {
error("parser_decl_val want stmt_expr");
}
} else if (ttype == TOKEN_SEMICOLON) {
poptok(parser);
expecttok(parser, TOKEN_SEMICOLON);
} else {
error("parser_decl_val syntax error");
}
return node;
}
// 类型解析入口改进
struct ASTNode* parse_decl(struct Parser* parser) {
flushpeektok(parser);
int idx;
enum TokenType ttype;
struct ASTNode* node;
if (peek_decl(parser) == 0) {
error("syntax error expect decl_val TYPE");
}
if (peektoktype(parser) != TOKEN_IDENT) {
error("syntax error expect decl_val IDENT");
}
ttype = peektoktype(parser);
switch (ttype) {
case TOKEN_L_PAREN: // (
node = parse_func(parser);
break;
case TOKEN_ASSIGN:
case TOKEN_SEMICOLON:
node = parse_decl_val(parser);
break;
default:
error("syntax error expect decl_val ASSIGN or SEMICOLON");
return NULL;
}
return node;
}

View File

@ -0,0 +1,409 @@
#include "../parser.h"
#include "ast.h"
#include "../symtab/symtab.h"
// Copy from `CParse`
/**
* Operator precedence classes
*/
enum Precedence {
PREC_BOTTOM,
PREC_EXPRESSION, /* , left to right */
PREC_ASSIGNMENT, /* = += -= *= /= %= <<= >>= &= ^= |= right to left */
PREC_CONDITIONAL, /* ?: right to left */
PREC_LOGICAL_OR, /* || left to right */
PREC_LOGICAL_AND, /* && left to right */
PREC_OR, /* | left to right */
PREC_XOR, /* ^ left to right */
PREC_AND, /* & left to right */
PREC_EQUALITY, /* == != left to right */
PREC_RELATIONAL, /* < <= > >= left to right */
PREC_SHIFT, /* << >> left to right */
PREC_ADDITIVE, /* + - left to right */
PREC_MULTIPLICATIVE, /* * / % left to right */
PREC_CAST, /* (type) right to left */
PREC_UNARY, /* ! ~ ++ -- + - * & sizeof right to left */
PREC_POSTFIX, /* () [] -> . left to right */
PREC_PRIMARY,
PREC_TOP
};
enum ParseType {
INFIX_PARSER,
PREFIX_PARSER,
};
static struct ASTNode *parse_subexpression(struct Parser* parser, enum Precedence prec);
static struct ASTNode* gen_node2(struct ASTNode* left, struct ASTNode* right,
enum ASTType type) {
struct ASTNode* node = new_ast_node();
node->type = type;
node->expr.left = left;
node->expr.right = right;
// switch (type) {
// case NT_ADD : printf("+ \n"); break; // (expr) + (expr)
// case NT_SUB : printf("- \n"); break; // (expr) - (expr)
// case NT_MUL : printf("* \n"); break; // (expr) * (expr)
// case NT_DIV : printf("/ \n"); break; // (expr) / (expr)
// case NT_MOD : printf("%%\n"); break; // (expr) % (expr)
// case NT_AND : printf("& \n"); break; // (expr) & (expr)
// case NT_OR : printf("| \n"); break; // (expr) | (expr)
// case NT_XOR : printf("^ \n"); break; // (expr) ^ (expr)
// case NT_L_SH : printf("<<\n"); break; // (expr) << (expr)
// case NT_R_SH : printf(">>\n"); break; // (expr) >> (expr)
// case NT_EQ : printf("==\n"); break; // (expr) == (expr)
// case NT_NEQ : printf("!=\n"); break; // (expr) != (expr)
// case NT_LE : printf("<=\n"); break; // (expr) <= (expr)
// case NT_GE : printf(">=\n"); break; // (expr) >= (expr)
// case NT_LT : printf("< \n"); break; // (expr) < (expr)
// case NT_GT : printf("> \n"); break; // (expr) > (expr)
// case NT_AND_AND : printf("&&\n"); break; // (expr) && (expr)
// case NT_OR_OR : printf("||\n"); break; // (expr) || (expr)
// case NT_NOT : printf("! \n"); break; // ! (expr)
// case NT_BIT_NOT : printf("~ \n"); break; // ~ (expr)
// case NT_COMMA : printf(", \n"); break; // expr, expr 逗号运算符
// case NT_ASSIGN : printf("= \n"); break; // (expr) = (expr)
// // case NT_COND : // (expr) ? (expr) : (expr)
// }
}
static struct ASTNode* parse_comma(struct Parser* parser, struct ASTNode* left) {
struct ASTNode* node = new_ast_node();
node->type = NT_COMMA;
node->expr.left = left;
node->expr.right = parse_subexpression(parser, PREC_EXPRESSION);
}
static struct ASTNode* parse_assign(struct Parser* parser, struct ASTNode* left) {
flushpeektok(parser);
enum TokenType ttype = peektoktype(parser);
poptok(parser);
struct ASTNode* node = new_ast_node();
node->type = NT_ASSIGN;
// saved left
node->expr.left = left;
enum Precedence next = PREC_ASSIGNMENT + 1;
switch (ttype) {
case TOKEN_ASSIGN :
left = parse_subexpression(parser, next);
break;
case TOKEN_ASSIGN_ADD :
left = gen_node2(left, parse_subexpression(parser, next), NT_ADD);
break;
case TOKEN_ASSIGN_SUB :
left = gen_node2(left, parse_subexpression(parser, next), NT_SUB);
break;
case TOKEN_ASSIGN_MUL :
left = gen_node2(left, parse_subexpression(parser, next), NT_MUL);
break;
case TOKEN_ASSIGN_DIV :
left = gen_node2(left, parse_subexpression(parser, next), NT_DIV);
break;
case TOKEN_ASSIGN_MOD :
left = gen_node2(left, parse_subexpression(parser, next), NT_MOD);
break;
case TOKEN_ASSIGN_L_SH :
left = gen_node2(left, parse_subexpression(parser, next), NT_L_SH);
break;
case TOKEN_ASSIGN_R_SH :
left = gen_node2(left, parse_subexpression(parser, next), NT_R_SH);
break;
case TOKEN_ASSIGN_AND :
left = gen_node2(left, parse_subexpression(parser, next), NT_AND);
break;
case TOKEN_ASSIGN_OR :
left = gen_node2(left, parse_subexpression(parser, next), NT_OR);
break;
case TOKEN_ASSIGN_XOR :
left = gen_node2(left, parse_subexpression(parser, next), NT_XOR);
break;
default:
error("unsupported operator");
break;
}
node->expr.right = left;
}
static struct ASTNode* parse_cmp(struct Parser* parser, struct ASTNode* left) {
flushpeektok(parser);
enum TokenType ttype = peektoktype(parser);
poptok(parser);
struct ASTNode* node = new_ast_node();
// saved left
node->expr.left = left;
switch (ttype) {
case TOKEN_EQ:
node->type = NT_EQ;
node->expr.right = parse_subexpression(parser, PREC_EQUALITY);
break;
case TOKEN_NEQ:
node->type = NT_NEQ;
node->expr.right = parse_subexpression(parser, PREC_EQUALITY);
break;
case TOKEN_LT:
node->type = NT_LT;
node->expr.right = parse_subexpression(parser, PREC_RELATIONAL);
break;
case TOKEN_GT:
node->type = NT_GT;
node->expr.right = parse_subexpression(parser, PREC_RELATIONAL);
break;
case TOKEN_LE:
node->type = NT_LE;
node->expr.right = parse_subexpression(parser, PREC_RELATIONAL);
break;
case TOKEN_GE:
node->type = NT_GE;
node->expr.right = parse_subexpression(parser, PREC_RELATIONAL);
break;
default:
error("invalid operator");
}
}
static struct ASTNode* parse_cal(struct Parser* parser, struct ASTNode* left) {
flushpeektok(parser);
enum TokenType ttype = peektoktype(parser);
poptok(parser);
struct ASTNode* node = new_ast_node();
node->expr.left = left;
switch (ttype) {
case TOKEN_OR_OR:
node->type = NT_OR_OR;
node->expr.right = parse_subexpression(parser, PREC_LOGICAL_OR);
break;
case TOKEN_AND_AND:
node->type = NT_AND_AND;
node->expr.right = parse_subexpression(parser, PREC_LOGICAL_AND);
break;
case TOKEN_OR:
node->type = NT_OR;
node->expr.right = parse_subexpression(parser, PREC_OR);
break;
case TOKEN_XOR:
node->type = NT_XOR;
node->expr.right = parse_subexpression(parser, PREC_XOR);
break;
case TOKEN_AND:
node->type = NT_AND;
node->expr.right = parse_subexpression(parser, PREC_AND);
break;
case TOKEN_L_SH:
node->type = NT_L_SH;
node->expr.right = parse_subexpression(parser, PREC_SHIFT);
break;
case TOKEN_R_SH:
node->type = NT_R_SH;
node->expr.right = parse_subexpression(parser, PREC_SHIFT);
break;
case TOKEN_ADD:
node->type = NT_ADD;
node->expr.right = parse_subexpression(parser, PREC_ADDITIVE);
break;
case TOKEN_SUB:
node->type = NT_SUB;
node->expr.right = parse_subexpression(parser, PREC_ADDITIVE);
break;
case TOKEN_MUL:
node->type = NT_MUL;
node->expr.right = parse_subexpression(parser, PREC_MULTIPLICATIVE);
break;
case TOKEN_DIV:
node->type = NT_DIV;
node->expr.right = parse_subexpression(parser, PREC_MULTIPLICATIVE);
break;
case TOKEN_MOD:
node->type = NT_MOD;
node->expr.right = parse_subexpression(parser, PREC_MULTIPLICATIVE);
break;
default:
break;
}
return node;
}
// 新增函数调用解析
static struct ASTNode* parse_call(struct Parser* parser, struct ASTNode* ident) {
struct ASTNode* node = new_ast_node();
node->type = NT_TERM_CALL;
poptok(parser); // 跳过 '('
enum TokenType ttype;
// 解析参数列表
while ((ttype = peektoktype(parser)) != TOKEN_R_PAREN) {
// add_arg(node, parse_expr(parser));
if (ttype == TOKEN_COMMA) poptok(parser);
else poptok(parser);
}
poptok(parser); // 跳过 ')'
char* name = ident->syms.tok.constant.str;
void* sym = symtab_lookup_symbol(parser->symtab, name);
if (sym == NULL) {
error("function not decl %s", name);
}
node->call.name = name;
node->call.params = NULL;
node->call.func_decl = sym;
return node;
}
static struct ASTNode* parse_paren(struct Parser* parser, struct ASTNode* left) {
flushpeektok(parser);
enum TokenType ttype;
expecttok(parser, TOKEN_L_PAREN);
left = parse_subexpression(parser, PREC_EXPRESSION);
flushpeektok(parser);
expecttok(parser, TOKEN_R_PAREN);
return left;
}
typedef struct ASTNode* (*parse_expr_fun_t)(struct Parser*, struct ASTNode*);
static struct expr_prec_table_t {
parse_expr_fun_t parser;
enum Precedence prec;
enum ParseType ptype;
} expr_table [256] = {
[TOKEN_COMMA] = {parse_comma, PREC_EXPRESSION, INFIX_PARSER},
[TOKEN_ASSIGN] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
[TOKEN_ASSIGN_ADD] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
[TOKEN_ASSIGN_SUB] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
[TOKEN_ASSIGN_MUL] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
[TOKEN_ASSIGN_DIV] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
[TOKEN_ASSIGN_MOD] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
[TOKEN_ASSIGN_L_SH] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
[TOKEN_ASSIGN_R_SH] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
[TOKEN_ASSIGN_AND] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
[TOKEN_ASSIGN_OR] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
[TOKEN_ASSIGN_XOR] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
[TOKEN_OR_OR] = {parse_cal, PREC_LOGICAL_OR , INFIX_PARSER},
[TOKEN_AND_AND] = {parse_cal, PREC_LOGICAL_AND, INFIX_PARSER},
[TOKEN_OR] = {parse_cal, PREC_OR , INFIX_PARSER},
[TOKEN_XOR] = {parse_cal, PREC_XOR , INFIX_PARSER},
[TOKEN_AND] = {parse_cal, PREC_AND , INFIX_PARSER},
[TOKEN_EQ] = {parse_cmp, PREC_EQUALITY, INFIX_PARSER},
[TOKEN_NEQ] = {parse_cmp, PREC_EQUALITY, INFIX_PARSER},
[TOKEN_LT] = {parse_cmp, PREC_RELATIONAL, INFIX_PARSER},
[TOKEN_LE] = {parse_cmp, PREC_RELATIONAL, INFIX_PARSER},
[TOKEN_GT] = {parse_cmp, PREC_RELATIONAL, INFIX_PARSER},
[TOKEN_GE] = {parse_cmp, PREC_RELATIONAL, INFIX_PARSER},
[TOKEN_L_SH] = {parse_cal, PREC_SHIFT , INFIX_PARSER},
[TOKEN_R_SH] = {parse_cal, PREC_SHIFT , INFIX_PARSER},
[TOKEN_ADD] = {parse_cal, PREC_ADDITIVE , INFIX_PARSER},
[TOKEN_SUB] = {parse_cal, PREC_ADDITIVE , INFIX_PARSER},
[TOKEN_MUL] = {parse_cal, PREC_MULTIPLICATIVE , INFIX_PARSER},
[TOKEN_DIV] = {parse_cal, PREC_MULTIPLICATIVE , INFIX_PARSER},
[TOKEN_MOD] = {parse_cal, PREC_MULTIPLICATIVE , INFIX_PARSER},
[TOKEN_NOT] = {NULL, PREC_UNARY, PREFIX_PARSER},
[TOKEN_BIT_NOT] = {NULL, PREC_UNARY, PREFIX_PARSER},
[TOKEN_ADD_ADD] = {NULL, PREC_UNARY, PREFIX_PARSER},
[TOKEN_SUB_SUB] = {NULL, PREC_UNARY, PREFIX_PARSER},
// + - * & sizeof
[TOKEN_L_PAREN] = {parse_paren, PREC_POSTFIX, INFIX_PARSER},
};
static struct ASTNode *parse_primary_expression(struct Parser* parser) {
flushpeektok(parser);
struct Token* tok = peektok(parser);
struct ASTNode *node = new_ast_node();
node->type = NT_TERM_VAL;
node->syms.tok = *tok;
switch (tok->type) {
case TOKEN_INT_LITERAL:
// node->data.data_type = TYPE_INT;
break;
case TOKEN_FLOAT_LITERAL:
warn("float not supported");
break;
case TOKEN_CHAR_LITERAL:
// node->data.data_type = TYPE_CHAR;
break;
case TOKEN_STRING_LITERAL:
// node->data.data_type = TYPE_POINTER;
case TOKEN_IDENT:
node = parse_ident(parser);
if (peektoktype(parser) == TOKEN_L_PAREN) {
node = parse_call(parser, node);
} else {
void *sym = symtab_lookup_symbol(parser->symtab, tok->constant.str);
if (sym == NULL) {
error("undefined symbol but use %s", tok->constant.str);
}
node->type = NT_TERM_IDENT;
node->syms.decl_node = sym;
goto END;
}
default:
return NULL;
}
poptok(parser);
END:
return node;
}
static struct ASTNode *parse_subexpression(struct Parser* parser, enum Precedence prec) {
enum TokenType ttype;
struct expr_prec_table_t* work;
struct ASTNode* left;
while (1) {
flushpeektok(parser);
ttype = peektoktype(parser);
work = &expr_table[ttype];
// FIXME
if (ttype == TOKEN_SEMICOLON || ttype == TOKEN_R_PAREN) {
break;
}
if (work == NULL || work->parser == NULL || work->ptype == PREFIX_PARSER) {
if (work->parser != NULL) {
left = work->parser(parser, NULL);
} else {
left = parse_primary_expression(parser);
}
} else if (work->ptype == INFIX_PARSER) {
if (work->parser == NULL)
break;
if (work->prec <= prec)
break;
left = work->parser(parser, left);
}
// assert(left != NULL);
}
return left;
}
struct ASTNode* parse_expr(struct Parser* parser) {
flushpeektok(parser);
enum TokenType ttype = peektoktype(parser);
switch (ttype) {
case TOKEN_NOT:
case TOKEN_AND:
case TOKEN_L_PAREN:
case TOKEN_MUL:
case TOKEN_ADD:
case TOKEN_SUB:
case TOKEN_BIT_NOT:
case TOKEN_AND_AND:
case TOKEN_CHAR_LITERAL:
case TOKEN_INT_LITERAL:
case TOKEN_STRING_LITERAL:
case TOKEN_ADD_ADD:
case TOKEN_SUB_SUB:
case TOKEN_SIZEOF:
case TOKEN_IDENT:
return parse_subexpression(parser, PREC_EXPRESSION);
default:
error("Want expr but not got %s", get_token_name(ttype));
break;
}
}

View File

@ -0,0 +1,120 @@
#include "../parser.h"
#include "../symtab/symtab.h"
#include "ast.h"
#ifndef FUNC_PARAM_CACHE_SIZE
#define FUNC_PARAM_CACHE_SIZE 32 // 合理初始值可覆盖99%常见情况
#endif
struct FuncParamCache {
struct Token tokens[FUNC_PARAM_CACHE_SIZE];
int read_pos; // 当前读取位置
int write_pos; // 写入位置
int depth; // 当前缓存深度
};
static enum TokenType peekcachetype(struct FuncParamCache* cache) {
return cache->tokens[cache->read_pos++].type;
}
// TODO 语义分析压入符号表
static void parse_params(struct Parser* parser, struct FuncParamCache* cache, struct ASTNode* node) {
// = peekcachetype(cache);
enum TokenType ttype;
// if (ttype != TOKEN_L_PAREN) {
// error("function expected '('\n");
// }
struct ASTNode *params = new_ast_node();
node->func.params = params;
int params_size = 0;
while ((ttype = peekcachetype(cache)) != TOKEN_R_PAREN) {
switch (ttype) {
case TOKEN_COMMA:
break;
case TOKEN_ELLIPSIS:
ttype = peekcachetype(cache);
if (ttype != TOKEN_R_PAREN) {
error("... must be a last parameter list (expect ')')");
}
// TODO
error("not implement");
break;
case TOKEN_IDENT:
params->children[params_size++] = NULL;
break;
default:
// TODO 使用cache的类型解析
// parse_type(parser);
// TODO type parse
// ttype = peekcachetype(cache);
// ttype = peekcachetype(cache);
// if (ttype != TOKEN_IDENT) {
// node->node_type = NT_DECL_FUNC;
// flushpeektok(parser);
// continue;
// }
// error("function expected ')' or ','\n");
}
}
}
enum ASTType check_is_func_decl(struct Parser* parser, struct FuncParamCache* cache) {
cache->depth = 1;
cache->read_pos = 0;
cache->write_pos = 0;
while (cache->depth) {
struct Token* tok = peektok(parser);
poptok(parser);
if (cache->write_pos >= FUNC_PARAM_CACHE_SIZE - 1) {
error("function parameter list too long");
}
cache->tokens[cache->write_pos++] = *tok;
switch (tok->type) {
case TOKEN_L_PAREN:
cache->depth++;
break;
case TOKEN_R_PAREN:
cache->depth--;
break;
}
}
switch (peektoktype(parser)) {
case TOKEN_SEMICOLON:
poptok(parser);
return NT_DECL_FUNC;
case TOKEN_L_BRACE:
return NT_FUNC;
break;
default:
error("function define or decl need '{' or ';' but you don't got");
}
}
struct ASTNode* parse_func(struct Parser* parser) {
struct ASTNode* ret_type = parse_type(parser);
struct ASTNode* func_name = parse_ident(parser);
struct ASTNode* node = new_ast_node();
node->func.ret = ret_type;
node->func.name = func_name;
flushpeektok(parser);
expecttok(parser, TOKEN_L_PAREN);
struct FuncParamCache cache;
node->type = check_is_func_decl(parser, &cache);
symtab_add_symbol(parser->symtab, func_name->syms.tok.constant.str, node);
if (node->type == NT_DECL_FUNC) {
return node;
}
symtab_enter_scope(parser->symtab);
parse_params(parser, &cache, node);
node->func.body = parse_block(parser);
symtab_leave_scope(parser->symtab);
return node;
}

View File

@ -0,0 +1,29 @@
#include "../parser.h"
#include "ast.h"
#ifndef PROG_MAX_NODE_SIZE
#define PROG_MAX_NODE_SIZE (1024 * 4)
#endif
void parse_prog(struct Parser* parser) {
/**
* Program := (Declaration | Definition)*
* same as
* Program := Declaration* Definition*
*/
int child_size = 0;
parser->root = new_ast_node();
struct ASTNode* node;
parser->root->root.children = xmalloc(sizeof(struct ASTNode*) * PROG_MAX_NODE_SIZE);
while (1) {
flushpeektok(parser);
if (peektoktype(parser) == TOKEN_EOF) {
break;
}
node = parse_decl(parser);
parser->root->root.children[child_size++] = node;
}
parser->root->type = NT_ROOT;
parser->root->root.child_size = child_size;
return;
}

View File

@ -0,0 +1,240 @@
#include "../parser.h"
#include "ast.h"
struct ASTNode* parse_stmt(struct Parser* parser) {
flushpeektok(parser);
enum TokenType ttype = peektoktype(parser);
struct ASTNode* node = new_ast_node();
switch (ttype) {
case TOKEN_IF: {
/**
* if (exp) stmt
* if (exp) stmt else stmt
*/
poptok(parser);
expecttok(parser, TOKEN_L_PAREN);
node->if_stmt.cond = parse_expr(parser);
flushpeektok(parser);
expecttok(parser, TOKEN_R_PAREN);
node->if_stmt.if_stmt = parse_stmt(parser);
ttype = peektoktype(parser);
if (ttype == TOKEN_ELSE) {
poptok(parser);
node->if_stmt.else_stmt = parse_stmt(parser);
} else {
node->if_stmt.else_stmt = NULL;
}
node->type = NT_STMT_IF;
break;
}
case TOKEN_SWITCH: {
/**
* switch (exp) stmt
*/
poptok(parser);
expecttok(parser, TOKEN_L_PAREN);
node->switch_stmt.cond = parse_expr(parser);
expecttok(parser, TOKEN_R_PAREN);
node->switch_stmt.body = parse_stmt(parser);
node->type = NT_STMT_SWITCH;
break;
}
case TOKEN_WHILE: {
/**
* while (exp) stmt
*/
poptok(parser);
expecttok(parser, TOKEN_L_PAREN);
node->while_stmt.cond = parse_expr(parser);
expecttok(parser, TOKEN_R_PAREN);
node->while_stmt.body = parse_stmt(parser);
node->type = NT_STMT_WHILE;
break;
}
case TOKEN_DO: {
/**
* do stmt while (exp)
*/
poptok(parser);
node->do_while_stmt.body = parse_stmt(parser);
ttype = peektoktype(parser);
if (ttype != TOKEN_WHILE) {
error("expected while after do");
}
poptok(parser);
expecttok(parser, TOKEN_L_PAREN);
node->do_while_stmt.cond = parse_expr(parser);
expecttok(parser, TOKEN_R_PAREN);
node->type = NT_STMT_DOWHILE;
break;
}
case TOKEN_FOR: {
/**
* for (init; [cond]; [iter]) stmt
*/
// node->children.stmt.for_stmt.init
poptok(parser);
ttype = peektoktype(parser);
if (ttype != TOKEN_L_PAREN) {
error("expected ( after for");
}
poptok(parser);
// init expr or init decl_var
// TODO need add this feature
node->for_stmt.init = parse_expr(parser);
expecttok(parser, TOKEN_SEMICOLON);
// cond expr or null
ttype = peektoktype(parser);
if (ttype != TOKEN_SEMICOLON) {
node->for_stmt.cond = parse_expr(parser);
expecttok(parser, TOKEN_SEMICOLON);
} else {
node->for_stmt.cond = NULL;
poptok(parser);
}
// iter expr or null
ttype = peektoktype(parser);
if (ttype != TOKEN_R_PAREN) {
node->for_stmt.iter = parse_expr(parser);
expecttok(parser, TOKEN_R_PAREN);
} else {
node->for_stmt.iter = NULL;
poptok(parser);
}
node->for_stmt.body = parse_stmt(parser);
node->type = NT_STMT_FOR;
break;
}
case TOKEN_BREAK: {
/**
* break ;
*/
// TODO check 导致外围 for、while 或 do-while 循环或 switch 语句终止。
poptok(parser);
expecttok(parser, TOKEN_SEMICOLON);
node->type = NT_STMT_BREAK;
break;
}
case TOKEN_CONTINUE: {
/**
* continue ;
*/
// TODO check 导致跳过整个 for、 while 或 do-while 循环体的剩余部分。
poptok(parser);
expecttok(parser, TOKEN_SEMICOLON);
node->type = NT_STMT_CONTINUE;
break;
}
case TOKEN_RETURN: {
/**
* return [exp] ;
*/
// TODO 终止当前函数并返回指定值给调用方函数。
poptok(parser);
ttype = peektoktype(parser);
if (ttype != TOKEN_SEMICOLON) {
node->return_stmt.expr_stmt = parse_expr(parser);
flushpeektok(parser);
expecttok(parser, TOKEN_SEMICOLON);
} else {
node->return_stmt.expr_stmt = NULL;
}
poptok(parser);
node->type = NT_STMT_RETURN;
break;
}
case TOKEN_GOTO: {
/**
* goto label ;
*/
// TODO check label 将控制无条件转移到所欲位置。
//在无法用约定的构造将控制转移到所欲位置时使用。
poptok(parser);
// find symbol table
ttype = peektoktype(parser);
if (ttype != TOKEN_IDENT) {
error("expect identifier after goto");
}
expecttok(parser, TOKEN_SEMICOLON);
// TODO filling label
node->goto_stmt.label = parse_ident(parser);
node->type = NT_STMT_GOTO;
break;
}
case TOKEN_SEMICOLON: {
/**
* ;
* empty stmt using by :
* while () ;
* if () ;
* for () ;
*/
poptok(parser);
node->type = NT_STMT_EMPTY;
break;
}
case TOKEN_L_BRACE: {
/**
* stmt_block like: { (decl_var | stmt) ... }
*/
node->block_stmt.block = parse_block(parser);
node->type = NT_STMT_BLOCK;
break;
}
case TOKEN_IDENT: {
// TODO label goto
if (peektoktype(parser) != TOKEN_COLON) {
goto EXP;
}
node->label_stmt.label = parse_ident(parser);
expecttok(parser, TOKEN_COLON);
node->type = NT_STMT_LABEL;
break;
}
case TOKEN_CASE: {
// TODO label switch
poptok(parser);
error("unimplemented switch label");
node->label_stmt.label = parse_expr(parser);
// TODO 该表达式为const int
expecttok(parser, TOKEN_COLON);
node->type = NT_STMT_CASE;
break;
}
case TOKEN_DEFAULT: {
// TODO label switch default
poptok(parser);
expecttok(parser, TOKEN_COLON);
node->type = NT_STMT_DEFAULT;
break;
}
default: {
/**
* exp ;
*/
EXP:
node->expr_stmt.expr_stmt = parse_expr(parser);
flushpeektok(parser);
ttype = peektoktype(parser);
if (ttype != TOKEN_SEMICOLON) {
error("exp must end with \";\"");
}
poptok(parser);
node->type = NT_STMT_EXPR;
break;
}
}
}

View File

@ -0,0 +1,182 @@
#include "../parser.h"
#include "../type.h"
#include "ast.h"
// /* 状态跳转表定义 */
// typedef void (*StateHandler)(struct Parser*, struct ASTNode**);
// enum TypeParseState {
// TPS_BASE_TYPE, // 解析基础类型 (int/char等)
// TPS_QUALIFIER, // 解析限定符 (const/volatile)
// TPS_POINTER, // 解析指针 (*)
// TPS_ARRAY, // 解析数组维度 ([n])
// TPS_FUNC_PARAMS, // 解析函数参数列表
// TPS_END,
// };
// ;
// /* 状态处理函数前置声明 */
// static void handle_base_type(struct Parser*, struct ASTNode**);
// static void handle_qualifier(struct Parser*, struct ASTNode**);
// static void handle_pointer(struct Parser*, struct ASTNode**);
// static void handle_array(struct Parser*, struct ASTNode**);
// static void handle_func_params(struct Parser*, struct ASTNode**);
// static void handle_error(struct Parser*, struct ASTNode**);
// /* 状态跳转表(核心优化部分) */
// static const struct StateTransition {
// enum TokenType tok; // 触发token
// StateHandler handler; // 处理函数
// enum TypeParseState next_state; // 下一个状态
// } state_table[][8] = {
// [TPS_QUALIFIER] = {
// {TOKEN_CONST, handle_qualifier, TPS_QUALIFIER},
// {TOKEN_VOLATILE, handle_qualifier, TPS_QUALIFIER},
// {TOKEN_VOID, handle_base_type, TPS_POINTER},
// {TOKEN_CHAR, handle_base_type, TPS_POINTER},
// {TOKEN_INT, handle_base_type, TPS_POINTER},
// {TOKEN_EOF, handle_error, TPS_QUALIFIER},
// /* 其他token默认处理 */
// {0, NULL, TPS_BASE_TYPE}
// },
// [TPS_BASE_TYPE] = {
// {TOKEN_MUL, handle_pointer, TPS_POINTER},
// {TOKEN_L_BRACKET, handle_array, TPS_ARRAY},
// {TOKEN_L_PAREN, handle_func_params,TPS_FUNC_PARAMS},
// {TOKEN_EOF, NULL, TPS_END},
// {0, NULL, TPS_POINTER}
// },
// [TPS_POINTER] = {
// {TOKEN_MUL, handle_pointer, TPS_POINTER},
// {TOKEN_L_BRACKET, handle_array, TPS_ARRAY},
// {TOKEN_L_PAREN, handle_func_params,TPS_FUNC_PARAMS},
// {0, NULL, TPS_END}
// },
// [TPS_ARRAY] = {
// {TOKEN_L_BRACKET, handle_array, TPS_ARRAY},
// {TOKEN_L_PAREN, handle_func_params,TPS_FUNC_PARAMS},
// {0, NULL, TPS_END}
// },
// [TPS_FUNC_PARAMS] = {
// {0, NULL, TPS_END}
// }
// };
// /* 新的类型解析函数 */
// struct ASTNode* parse_type(struct Parser* p) {
// struct ASTNode* type_root = NULL;
// struct ASTNode** current = &type_root;
// enum TypeParseState state = TPS_QUALIFIER;
// while (state != TPS_END) {
// enum TokenType t = peektoktype(p);
// const struct StateTransition* trans = state_table[state];
// // 查找匹配的转换规则
// while (trans->tok != 0 && trans->tok != t) {
// trans++;
// }
// if (trans->handler) {
// trans->handler(p, current);
// } else if (trans->tok == 0) { // 默认规则
// state = trans->next_state;
// continue;
// } else {
// error("syntax error type parse error");
// }
// state = trans->next_state;
// }
// return type_root;
// }
// /* 具体状态处理函数实现 */
// static void handle_qualifier(struct Parser* p, struct ASTNode** current) {
// struct ASTNode* node = new_ast_node();
// node->node_type = NT_TYPE_QUAL;
// node->data.data_type = poptok(p).type;
// if (*current) {
// (*current)->child.decl.type = node;
// } else {
// *current = node;
// }
// }
// static void handle_base_type(struct Parser* p, struct ASTNode** current) {
// struct ASTNode* node = new_ast_node();
// node->node_type = NT_TYPE_BASE;
// node->data.data_type = poptok(p).type;
// // 链接到当前节点链的末端
// while (*current && (*current)->child.decl.type) {
// current = &(*current)->child.decl.type;
// }
// if (*current) {
// (*current)->child.decl.type = node;
// } else {
// *current = node;
// }
// }
// static void handle_pointer(struct Parser* p, struct ASTNode** current) {
// poptok(p); // 吃掉*
// struct ASTNode* node = new_ast_node();
// node->node_type = NT_TYPE_PTR;
// // 插入到当前节点之前
// node->child.decl.type = *current;
// *current = node;
// }
// /* 其他处理函数类似实现... */
struct ASTNode* parser_ident_without_pop(struct Parser* parser) {
flushpeektok(parser);
struct Token* tok = peektok(parser);
if (tok->type != TOKEN_IDENT) {
error("syntax error: want identifier but got %d", tok->type);
}
struct ASTNode* node = new_ast_node();
node->type = NT_TERM_IDENT;
node->syms.tok = *tok;
node->syms.decl_node = NULL;
return node;
}
struct ASTNode* parse_ident(struct Parser* parser) {
struct ASTNode* node = parser_ident_without_pop(parser);
poptok(parser);
return node;
}
struct ASTNode* parse_type(struct Parser* parser) {
flushpeektok(parser);
enum TokenType ttype = peektoktype(parser);
enum DataType dtype;
switch(ttype) {
case TOKEN_VOID: dtype = TYPE_VOID; break;
case TOKEN_CHAR: dtype = TYPE_CHAR; break;
case TOKEN_SHORT: dtype = TYPE_SHORT; break;
case TOKEN_INT: dtype = TYPE_INT; break;
case TOKEN_LONG: dtype = TYPE_LONG; break;
case TOKEN_FLOAT: dtype = TYPE_FLOAT; break;
case TOKEN_DOUBLE: dtype = TYPE_DOUBLE; break;
default:
error("无效的类型说明符");
}
struct ASTNode* node = new_ast_node();
node->type = NT_TERM_TYPE;
// node->data.data_type = dtype;
poptok(parser);
if (peektoktype(parser) == TOKEN_MUL) {
poptok(parser);
}
return node;
}

View File

@ -0,0 +1,136 @@
#include "../parser.h"
#include "../type.h"
enum TypeParseState {
TPS_BASE_TYPE, // 解析基础类型 (int/char等)
TPS_QUALIFIER, // 解析限定符 (const/volatile)
TPS_POINTER, // 解析指针 (*)
TPS_ARRAY, // 解析数组维度 ([n])
TPS_FUNC_PARAMS // 解析函数参数列表
};
struct ASTNode* parse_type(struct Parser* p) {
struct ASTNode* type_root = new_ast_node();
struct ASTNode* current = type_root;
current->type = NT_TYPE_BASE;
enum TypeParseState state = TPS_QUALIFIER;
int pointer_level = 0;
while (1) {
enum TokenType t = peektoktype(p);
switch (state) {
// 基础类型解析 (int, char等)
case TPS_BASE_TYPE:
if (is_base_type(t)) {
// current->data.data_type = token_to_datatype(t);
poptok(p);
state = TPS_POINTER;
} else {
error("Expected type specifier");
}
break;
// 类型限定符 (const/volatile)
case TPS_QUALIFIER:
if (t == TOKEN_CONST || t == TOKEN_VOLATILE) {
struct ASTNode* qual_node = new_ast_node();
qual_node->type = NT_TYPE_QUAL;
qual_node->data.data_type = t; // 复用data_type字段存储限定符
current->child.decl.type = qual_node;
current = qual_node;
poptok(p);
} else {
state = TPS_BASE_TYPE;
}
break;
// 指针解析 (*)
case TPS_POINTER:
if (t == TOKEN_MUL) {
struct ASTNode* ptr_node = new_ast_node();
ptr_node->type = NT_TYPE_PTR;
current->child.decl.type = ptr_node;
current = ptr_node;
pointer_level++;
poptok(p);
} else {
state = TPS_ARRAY;
}
break;
// 数组维度 ([n])
case TPS_ARRAY:
if (t == TOKEN_L_BRACKET) {
poptok(p); // 吃掉[
struct ASTNode* arr_node = new_ast_node();
arr_node->type = NT_TYPE_ARRAY;
// 解析数组大小(仅语法检查)
if (peektoktype(p) != TOKEN_R_BRACKET) {
parse_expr(p); // 不计算实际值
}
expecttok(p, TOKEN_R_BRACKET);
current->child.decl.type = arr_node;
current = arr_node;
} else {
state = TPS_FUNC_PARAMS;
}
break;
// 函数参数列表
case TPS_FUNC_PARAMS:
if (t == TOKEN_L_PAREN) {
struct ASTNode* func_node = new_ast_node();
func_node->type = NT_TYPE_FUNC;
current->child.decl.type = func_node;
// 解析参数列表(仅结构,不验证类型)
parse_param_list(p, func_node);
current = func_node;
} else {
return type_root; // 类型解析结束
}
break;
}
}
}
// 判断是否是基础类型
static int is_base_type(enum TokenType t) {
return t >= TOKEN_VOID && t <= TOKEN_DOUBLE;
}
// // 转换token到数据类型简化版
// static enum DataType token_to_datatype(enum TokenType t) {
// static enum DataType map[] = {
// [TOKEN_VOID] = DT_VOID,
// [TOKEN_CHAR] = DT_CHAR,
// [TOKEN_INT] = DT_INT,
// // ...其他类型映射
// };
// return map[t];
// }
// 解析参数列表(轻量级)
static void parse_param_list(struct Parser* p, struct ASTNode* func) {
expecttok(p, TOKEN_L_PAREN);
while (peektoktype(p) != TOKEN_R_PAREN) {
struct ASTNode* param = parse_type(p); // 递归解析类型
// 允许可选参数名(仅语法检查)
if (peektoktype(p) == TOKEN_IDENT) {
poptok(p); // 吃掉参数名
}
if (peektoktype(p) == TOKEN_COMMA) {
poptok(p);
}
}
expecttok(p, TOKEN_R_PAREN);
}

View File

@ -0,0 +1,67 @@
#include "parser.h"
#include "type.h"
#include "ast/ast.h"
int poptok(struct Parser* parser) {
if (parser->size == 0) {
return -1;
}
int idx = parser->cur_idx;
parser->cur_idx = (idx + 1) % PARSER_MAX_TOKEN_QUEUE;
parser->size--;
return 0;
}
void flushpeektok(struct Parser* parser) {
parser->peek_idx = parser->cur_idx;
}
struct Token* peektok(struct Parser* parser) {
int idx = parser->peek_idx;
idx = (idx + 1) % PARSER_MAX_TOKEN_QUEUE;
if (parser->size >= PARSER_MAX_TOKEN_QUEUE) {
warn("peek maybe too deep");
}
if (parser->peek_idx == parser->end_idx) {
if (parser->size == PARSER_MAX_TOKEN_QUEUE) {
// FIXME
error("buffer overflow");
}
get_valid_token(parser->lexer, &(parser->TokenBuffer[idx]));
parser->size++;
parser->end_idx = idx;
}
parser->peek_idx = idx;
return &(parser->TokenBuffer[idx]);
}
enum TokenType peektoktype(struct Parser* parser) {
return peektok(parser)->type;
}
void expecttok(struct Parser* parser, enum TokenType type) {
struct Token* tok = peektok(parser);
if (tok->type != type) {
error("expected tok: %s, got %s", get_token_name(type), get_token_name(tok->type));
} else {
poptok(parser);
}
}
void init_parser(struct Parser* parser, struct Lexer* lexer, struct SymbolTable* symtab) {
parser->cur_node = NULL;
parser->root = NULL;
parser->cur_idx = 0;
parser->peek_idx = 0;
parser->end_idx = 0;
parser->size = 0;
parser->lexer = lexer;
parser->symtab = symtab;
// TODO
}
void run_parser(struct Parser* parser) {
parse_prog(parser);
}

View File

@ -0,0 +1,33 @@
#ifndef __PARSER_H__
#define __PARSER_H__
#include "../frontend.h"
#include "../lexer/lexer.h"
// #include "symbol_table/symtab.h"
// #include "ast/ast.h"
#define PARSER_MAX_TOKEN_QUEUE 16
struct Parser {
struct ASTNode* root;
struct ASTNode* cur_node;
struct Lexer* lexer;
struct SymbolTable* symtab;
int cur_idx;
int peek_idx;
int end_idx;
int size;
struct Token TokenBuffer[PARSER_MAX_TOKEN_QUEUE];
int err_level;
};
void init_parser(struct Parser* parser, struct Lexer* lexer, struct SymbolTable* symtab);
void run_parser(struct Parser* parser);
void flushpeektok(struct Parser* parser);
int poptok(struct Parser* parser);
struct Token* peektok(struct Parser* parser);
enum TokenType peektoktype(struct Parser* parser);
void expecttok(struct Parser* parser, enum TokenType type);
#endif

View File

@ -0,0 +1,53 @@
// hashmap.c
#include "hashmap.h"
#include <stdlib.h>
#include <string.h>
// DJB2哈希算法
static unsigned long hash(const char* str) {
unsigned long hash = 5381;
int c;
while ((c = *str++))
hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
return hash % HMAP_SIZE;
}
void hmap_init(HashMap* map) {
memset(map->buckets, 0, sizeof(map->buckets));
}
void hmap_put(HashMap* map, const char* key, void* value) {
unsigned long idx = hash(key);
HashMapEntry* entry = malloc(sizeof(HashMapEntry));
entry->key = strdup(key);
entry->value = value;
entry->next = map->buckets[idx];
map->buckets[idx] = entry;
}
void* hmap_get(HashMap* map, const char* key) {
unsigned long idx = hash(key);
HashMapEntry* entry = map->buckets[idx];
while (entry) {
if (strcmp(entry->key, key) == 0)
return entry->value;
entry = entry->next;
}
return NULL;
}
int hmap_contains(HashMap* map, const char* key) {
return hmap_get(map, key) != NULL;
}
void hmap_destroy(HashMap* map) {
for (int i = 0; i < HMAP_SIZE; i++) {
HashMapEntry* entry = map->buckets[i];
while (entry) {
HashMapEntry* next = entry->next;
free(entry->key);
free(entry);
entry = next;
}
}
}

View File

@ -0,0 +1,31 @@
#ifndef HASHMAP_H
#define HASHMAP_H
#define HMAP_SIZE 64
typedef struct HashMapEntry {
char* key;
void* value;
struct HashMapEntry* next;
} HashMapEntry;
typedef struct {
HashMapEntry* buckets[HMAP_SIZE];
} HashMap;
// 初始化哈希表
void hmap_init(HashMap* map);
// 插入键值对
void hmap_put(HashMap* map, const char* key, void* value);
// 查找键值
void* hmap_get(HashMap* map, const char* key);
// 检查键是否存在
int hmap_contains(HashMap* map, const char* key);
// 释放哈希表内存不释放value
void hmap_destroy(HashMap* map);
#endif

View File

@ -0,0 +1,43 @@
// scope.c
#include "scope.h"
#include <stdio.h>
#include <stdlib.h>
typedef struct Scope Scope;
Scope* scope_create(Scope* parent) {
Scope* scope = malloc(sizeof(Scope));
hmap_init(&scope->symbols);
scope->parent = parent;
scope->base_offset = 0;
scope->cur_offset = 0;
return scope;
}
void scope_destroy(Scope* scope) {
hmap_destroy(&scope->symbols);
free(scope);
}
void scope_insert(Scope* scope, const char* name, void* symbol) {
if (hmap_contains(&scope->symbols, name)) {
// 处理重复定义错误
fprintf(stderr, "Error: Symbol '%s' already defined\n", name);
exit(EXIT_FAILURE);
}
hmap_put(&scope->symbols, name, symbol);
}
void* scope_lookup(Scope* scope, const char* name) {
void* symbol = NULL;
while (scope) {
symbol = hmap_get(&scope->symbols, name);
if (symbol) break;
scope = scope->parent;
}
return symbol;
}
void* scope_lookup_current(Scope* scope, const char* name) {
return hmap_get(&scope->symbols, name);
}

View File

@ -0,0 +1,28 @@
#ifndef SCOPE_H
#define SCOPE_H
#include "hashmap.h"
struct Scope {
HashMap symbols; // 当前作用域符号表
struct Scope* parent; // 上层作用域
int base_offset;
int cur_offset;
};
// 创建新作用域父作用域可为NULL
struct Scope* scope_create(struct Scope* parent);
// 销毁作用域
void scope_destroy(struct Scope* scope);
// 在当前作用域插入符号
void scope_insert(struct Scope* scope, const char* name, void* symbol);
// 逐级查找符号
void* scope_lookup(struct Scope* scope, const char* name);
// 仅在当前作用域查找
void* scope_lookup_current(struct Scope* scope, const char* name);
#endif

View File

@ -0,0 +1,45 @@
// symtab.c
#include "../../frontend.h"
#include "scope.h"
#include "symtab.h"
typedef struct SymbolTable SymbolTable;
typedef struct Scope Scope;
void init_symtab(SymbolTable* symtab) {
symtab->global_scope = scope_create(NULL);
symtab->cur_scope = symtab->global_scope;
}
void del_symtab(SymbolTable* symtab) {
scope_destroy(symtab->global_scope);
}
void symtab_enter_scope(SymbolTable* symtab) {
struct Scope* scope = scope_create(symtab->cur_scope);
scope->base_offset = symtab->cur_scope->base_offset + symtab->cur_scope->cur_offset;
symtab->cur_scope = scope;
}
void symtab_leave_scope(SymbolTable* symtab) {
Scope * scope = symtab->cur_scope;
if (scope == NULL) {
error("cannot leave NULL scope or global scope");
}
symtab->cur_scope = symtab->cur_scope->parent;
scope_destroy(scope);
}
void symtab_add_symbol(SymbolTable* symtab, const char* name, void* ast_node) {
struct Scope* scope = symtab->cur_scope;
if (scope_lookup_current(scope, name) != NULL) {
// TODO WARNING
// return NULL;
}
scope_insert(scope, name, ast_node);
}
void* symtab_lookup_symbol(SymbolTable* symtab, const char* name) {
return scope_lookup(symtab->cur_scope, name);
}

View File

@ -0,0 +1,18 @@
// symtab.h
#ifndef __SYMTAB_H__
#define __SYMTAB_H__
struct SymbolTable {
struct Scope* cur_scope;
struct Scope* global_scope;
};
void init_symtab(struct SymbolTable* symtab);
void del_symtab(struct SymbolTable* symtab);
void symtab_enter_scope(struct SymbolTable* symtab);
void symtab_leave_scope(struct SymbolTable* symtab);
void symtab_add_symbol(struct SymbolTable* symtab, const char* name, void* ast_node);
void* symtab_lookup_symbol(struct SymbolTable* symtab, const char* name);
#endif

View File

@ -0,0 +1,4 @@
extern int _print_str(const char* str);
int main(void) {
_print_str("Hello, world!\n");
}

View File

@ -0,0 +1,14 @@
// int __print_str(char* str);
int f(void);
int main(void) {
int a;
// f();
// a = 1 + 2 * 3 + 4;
// __print_str("Hello, world!\n");
a = 3 - f() * (3 + 2) % 6;
// 测试用例:
// if (a) if (2) 3; else b;
// 是否正确解析为 if (a) { if (b) c else d }
}

View File

@ -0,0 +1,34 @@
#include "../parser.h"
#include "../ast/ast.h"
#include "../symtab/symtab.h"
#include <stdio.h>
// gcc -g ../parser.c ../../lexer/lexer.c ../ast/ast.c ../ast/block.c ../ast/decl.c ../ast/expr.c ../ast/func.c ../ast/program.c ../ast/stmt.c ../ast/term.c ../symtab/hashmap.c ../symtab/scope.c ../symtab/symtab.c test_parser.c -o test_parser
// gcc -g test_parser.c -L../.. -lfrontend -o test_parser
int main(int argc, char** argv) {
const char* file_name = "test_file.c";
if (argc == 2) {
file_name = argv[1];
}
FILE* fp = fopen(file_name, "r");
if (fp == NULL) {
perror("open file failed");
return 1;
}
printf("open file success\n");
struct Lexer lexer;
init_lexer(&lexer, file_name, fp, (lexer_sread_fn)fread_s);
struct SymbolTable symtab;
init_symtab(&symtab);
struct Parser parser;
init_parser(&parser, &lexer, &symtab);
parse_prog(&parser);
printf("parse_end\n");
pnt_ast(parser.root, 0);
return 0;
}

View File

@ -0,0 +1,35 @@
#ifndef __TYPE_H__
#define __TYPE_H__
#include "../lexer/token.h"
enum DataType {
TYPE_VOID,
TYPE_CHAR,
TYPE_SHORT,
TYPE_INT,
TYPE_LONG,
TYPE_LONG_LONG,
TYPE_FLOAT,
TYPE_DOUBLE,
TYPE_LONG_DOUBLE,
// prefix
TYPE_SIGNED,
TYPE_UNSIGNED,
// TYPE_BOOL,
// TYPE_COMPLEX,
// TYPE_IMAGINARY,
TYPE_ENUM,
TYPE_ARRAY,
TYPE_STRUCT,
TYPE_UNION,
TYPE_FUNCTION,
TYPE_POINTER,
TYPE_ATOMIC,
TYPE_TYPEDEF,
};
#endif