2025-03-05 15:45:19 +08:00

410 lines
15 KiB
C

#include "../parser.h"
#include "ast.h"
#include "../symtab/symtab.h"
// Copy from `CParse`
/**
* Operator precedence classes
*/
enum Precedence {
PREC_BOTTOM,
PREC_EXPRESSION, /* , left to right */
PREC_ASSIGNMENT, /* = += -= *= /= %= <<= >>= &= ^= |= right to left */
PREC_CONDITIONAL, /* ?: right to left */
PREC_LOGICAL_OR, /* || left to right */
PREC_LOGICAL_AND, /* && left to right */
PREC_OR, /* | left to right */
PREC_XOR, /* ^ left to right */
PREC_AND, /* & left to right */
PREC_EQUALITY, /* == != left to right */
PREC_RELATIONAL, /* < <= > >= left to right */
PREC_SHIFT, /* << >> left to right */
PREC_ADDITIVE, /* + - left to right */
PREC_MULTIPLICATIVE, /* * / % left to right */
PREC_CAST, /* (type) right to left */
PREC_UNARY, /* ! ~ ++ -- + - * & sizeof right to left */
PREC_POSTFIX, /* () [] -> . left to right */
PREC_PRIMARY,
PREC_TOP
};
enum ParseType {
INFIX_PARSER,
PREFIX_PARSER,
};
static struct ASTNode *parse_subexpression(struct Parser* parser, enum Precedence prec);
static struct ASTNode* gen_node2(struct ASTNode* left, struct ASTNode* right,
enum ASTType type) {
struct ASTNode* node = new_ast_node();
node->type = type;
node->expr.left = left;
node->expr.right = right;
// switch (type) {
// case NT_ADD : printf("+ \n"); break; // (expr) + (expr)
// case NT_SUB : printf("- \n"); break; // (expr) - (expr)
// case NT_MUL : printf("* \n"); break; // (expr) * (expr)
// case NT_DIV : printf("/ \n"); break; // (expr) / (expr)
// case NT_MOD : printf("%%\n"); break; // (expr) % (expr)
// case NT_AND : printf("& \n"); break; // (expr) & (expr)
// case NT_OR : printf("| \n"); break; // (expr) | (expr)
// case NT_XOR : printf("^ \n"); break; // (expr) ^ (expr)
// case NT_L_SH : printf("<<\n"); break; // (expr) << (expr)
// case NT_R_SH : printf(">>\n"); break; // (expr) >> (expr)
// case NT_EQ : printf("==\n"); break; // (expr) == (expr)
// case NT_NEQ : printf("!=\n"); break; // (expr) != (expr)
// case NT_LE : printf("<=\n"); break; // (expr) <= (expr)
// case NT_GE : printf(">=\n"); break; // (expr) >= (expr)
// case NT_LT : printf("< \n"); break; // (expr) < (expr)
// case NT_GT : printf("> \n"); break; // (expr) > (expr)
// case NT_AND_AND : printf("&&\n"); break; // (expr) && (expr)
// case NT_OR_OR : printf("||\n"); break; // (expr) || (expr)
// case NT_NOT : printf("! \n"); break; // ! (expr)
// case NT_BIT_NOT : printf("~ \n"); break; // ~ (expr)
// case NT_COMMA : printf(", \n"); break; // expr, expr 逗号运算符
// case NT_ASSIGN : printf("= \n"); break; // (expr) = (expr)
// // case NT_COND : // (expr) ? (expr) : (expr)
// }
}
static struct ASTNode* parse_comma(struct Parser* parser, struct ASTNode* left) {
struct ASTNode* node = new_ast_node();
node->type = NT_COMMA;
node->expr.left = left;
node->expr.right = parse_subexpression(parser, PREC_EXPRESSION);
}
static struct ASTNode* parse_assign(struct Parser* parser, struct ASTNode* left) {
flushpeektok(parser);
enum TokenType ttype = peektoktype(parser);
poptok(parser);
struct ASTNode* node = new_ast_node();
node->type = NT_ASSIGN;
// saved left
node->expr.left = left;
enum Precedence next = PREC_ASSIGNMENT + 1;
switch (ttype) {
case TOKEN_ASSIGN :
left = parse_subexpression(parser, next);
break;
case TOKEN_ASSIGN_ADD :
left = gen_node2(left, parse_subexpression(parser, next), NT_ADD);
break;
case TOKEN_ASSIGN_SUB :
left = gen_node2(left, parse_subexpression(parser, next), NT_SUB);
break;
case TOKEN_ASSIGN_MUL :
left = gen_node2(left, parse_subexpression(parser, next), NT_MUL);
break;
case TOKEN_ASSIGN_DIV :
left = gen_node2(left, parse_subexpression(parser, next), NT_DIV);
break;
case TOKEN_ASSIGN_MOD :
left = gen_node2(left, parse_subexpression(parser, next), NT_MOD);
break;
case TOKEN_ASSIGN_L_SH :
left = gen_node2(left, parse_subexpression(parser, next), NT_L_SH);
break;
case TOKEN_ASSIGN_R_SH :
left = gen_node2(left, parse_subexpression(parser, next), NT_R_SH);
break;
case TOKEN_ASSIGN_AND :
left = gen_node2(left, parse_subexpression(parser, next), NT_AND);
break;
case TOKEN_ASSIGN_OR :
left = gen_node2(left, parse_subexpression(parser, next), NT_OR);
break;
case TOKEN_ASSIGN_XOR :
left = gen_node2(left, parse_subexpression(parser, next), NT_XOR);
break;
default:
error("unsupported operator");
break;
}
node->expr.right = left;
}
static struct ASTNode* parse_cmp(struct Parser* parser, struct ASTNode* left) {
flushpeektok(parser);
enum TokenType ttype = peektoktype(parser);
poptok(parser);
struct ASTNode* node = new_ast_node();
// saved left
node->expr.left = left;
switch (ttype) {
case TOKEN_EQ:
node->type = NT_EQ;
node->expr.right = parse_subexpression(parser, PREC_EQUALITY);
break;
case TOKEN_NEQ:
node->type = NT_NEQ;
node->expr.right = parse_subexpression(parser, PREC_EQUALITY);
break;
case TOKEN_LT:
node->type = NT_LT;
node->expr.right = parse_subexpression(parser, PREC_RELATIONAL);
break;
case TOKEN_GT:
node->type = NT_GT;
node->expr.right = parse_subexpression(parser, PREC_RELATIONAL);
break;
case TOKEN_LE:
node->type = NT_LE;
node->expr.right = parse_subexpression(parser, PREC_RELATIONAL);
break;
case TOKEN_GE:
node->type = NT_GE;
node->expr.right = parse_subexpression(parser, PREC_RELATIONAL);
break;
default:
error("invalid operator");
}
}
static struct ASTNode* parse_cal(struct Parser* parser, struct ASTNode* left) {
flushpeektok(parser);
enum TokenType ttype = peektoktype(parser);
poptok(parser);
struct ASTNode* node = new_ast_node();
node->expr.left = left;
switch (ttype) {
case TOKEN_OR_OR:
node->type = NT_OR_OR;
node->expr.right = parse_subexpression(parser, PREC_LOGICAL_OR);
break;
case TOKEN_AND_AND:
node->type = NT_AND_AND;
node->expr.right = parse_subexpression(parser, PREC_LOGICAL_AND);
break;
case TOKEN_OR:
node->type = NT_OR;
node->expr.right = parse_subexpression(parser, PREC_OR);
break;
case TOKEN_XOR:
node->type = NT_XOR;
node->expr.right = parse_subexpression(parser, PREC_XOR);
break;
case TOKEN_AND:
node->type = NT_AND;
node->expr.right = parse_subexpression(parser, PREC_AND);
break;
case TOKEN_L_SH:
node->type = NT_L_SH;
node->expr.right = parse_subexpression(parser, PREC_SHIFT);
break;
case TOKEN_R_SH:
node->type = NT_R_SH;
node->expr.right = parse_subexpression(parser, PREC_SHIFT);
break;
case TOKEN_ADD:
node->type = NT_ADD;
node->expr.right = parse_subexpression(parser, PREC_ADDITIVE);
break;
case TOKEN_SUB:
node->type = NT_SUB;
node->expr.right = parse_subexpression(parser, PREC_ADDITIVE);
break;
case TOKEN_MUL:
node->type = NT_MUL;
node->expr.right = parse_subexpression(parser, PREC_MULTIPLICATIVE);
break;
case TOKEN_DIV:
node->type = NT_DIV;
node->expr.right = parse_subexpression(parser, PREC_MULTIPLICATIVE);
break;
case TOKEN_MOD:
node->type = NT_MOD;
node->expr.right = parse_subexpression(parser, PREC_MULTIPLICATIVE);
break;
default:
break;
}
return node;
}
// 新增函数调用解析
static struct ASTNode* parse_call(struct Parser* parser, struct ASTNode* ident) {
struct ASTNode* node = new_ast_node();
node->type = NT_TERM_CALL;
poptok(parser); // 跳过 '('
enum TokenType ttype;
// 解析参数列表
while ((ttype = peektoktype(parser)) != TOKEN_R_PAREN) {
// add_arg(node, parse_expr(parser));
if (ttype == TOKEN_COMMA) poptok(parser);
else poptok(parser);
}
poptok(parser); // 跳过 ')'
char* name = ident->syms.tok.constant.str;
void* sym = symtab_lookup_symbol(parser->symtab, name);
if (sym == NULL) {
error("function not decl %s", name);
}
node->call.name = name;
node->call.params = NULL;
node->call.func_decl = sym;
return node;
}
static struct ASTNode* parse_paren(struct Parser* parser, struct ASTNode* left) {
flushpeektok(parser);
enum TokenType ttype;
expecttok(parser, TOKEN_L_PAREN);
left = parse_subexpression(parser, PREC_EXPRESSION);
flushpeektok(parser);
expecttok(parser, TOKEN_R_PAREN);
return left;
}
typedef struct ASTNode* (*parse_expr_fun_t)(struct Parser*, struct ASTNode*);
static struct expr_prec_table_t {
parse_expr_fun_t parser;
enum Precedence prec;
enum ParseType ptype;
} expr_table [256] = {
[TOKEN_COMMA] = {parse_comma, PREC_EXPRESSION, INFIX_PARSER},
[TOKEN_ASSIGN] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
[TOKEN_ASSIGN_ADD] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
[TOKEN_ASSIGN_SUB] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
[TOKEN_ASSIGN_MUL] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
[TOKEN_ASSIGN_DIV] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
[TOKEN_ASSIGN_MOD] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
[TOKEN_ASSIGN_L_SH] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
[TOKEN_ASSIGN_R_SH] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
[TOKEN_ASSIGN_AND] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
[TOKEN_ASSIGN_OR] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
[TOKEN_ASSIGN_XOR] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
[TOKEN_OR_OR] = {parse_cal, PREC_LOGICAL_OR , INFIX_PARSER},
[TOKEN_AND_AND] = {parse_cal, PREC_LOGICAL_AND, INFIX_PARSER},
[TOKEN_OR] = {parse_cal, PREC_OR , INFIX_PARSER},
[TOKEN_XOR] = {parse_cal, PREC_XOR , INFIX_PARSER},
[TOKEN_AND] = {parse_cal, PREC_AND , INFIX_PARSER},
[TOKEN_EQ] = {parse_cmp, PREC_EQUALITY, INFIX_PARSER},
[TOKEN_NEQ] = {parse_cmp, PREC_EQUALITY, INFIX_PARSER},
[TOKEN_LT] = {parse_cmp, PREC_RELATIONAL, INFIX_PARSER},
[TOKEN_LE] = {parse_cmp, PREC_RELATIONAL, INFIX_PARSER},
[TOKEN_GT] = {parse_cmp, PREC_RELATIONAL, INFIX_PARSER},
[TOKEN_GE] = {parse_cmp, PREC_RELATIONAL, INFIX_PARSER},
[TOKEN_L_SH] = {parse_cal, PREC_SHIFT , INFIX_PARSER},
[TOKEN_R_SH] = {parse_cal, PREC_SHIFT , INFIX_PARSER},
[TOKEN_ADD] = {parse_cal, PREC_ADDITIVE , INFIX_PARSER},
[TOKEN_SUB] = {parse_cal, PREC_ADDITIVE , INFIX_PARSER},
[TOKEN_MUL] = {parse_cal, PREC_MULTIPLICATIVE , INFIX_PARSER},
[TOKEN_DIV] = {parse_cal, PREC_MULTIPLICATIVE , INFIX_PARSER},
[TOKEN_MOD] = {parse_cal, PREC_MULTIPLICATIVE , INFIX_PARSER},
[TOKEN_NOT] = {NULL, PREC_UNARY, PREFIX_PARSER},
[TOKEN_BIT_NOT] = {NULL, PREC_UNARY, PREFIX_PARSER},
[TOKEN_ADD_ADD] = {NULL, PREC_UNARY, PREFIX_PARSER},
[TOKEN_SUB_SUB] = {NULL, PREC_UNARY, PREFIX_PARSER},
// + - * & sizeof
[TOKEN_L_PAREN] = {parse_paren, PREC_POSTFIX, INFIX_PARSER},
};
static struct ASTNode *parse_primary_expression(struct Parser* parser) {
flushpeektok(parser);
struct Token* tok = peektok(parser);
struct ASTNode *node = new_ast_node();
node->type = NT_TERM_VAL;
node->syms.tok = *tok;
switch (tok->type) {
case TOKEN_INT_LITERAL:
// node->data.data_type = TYPE_INT;
break;
case TOKEN_FLOAT_LITERAL:
warn("float not supported");
break;
case TOKEN_CHAR_LITERAL:
// node->data.data_type = TYPE_CHAR;
break;
case TOKEN_STRING_LITERAL:
// node->data.data_type = TYPE_POINTER;
case TOKEN_IDENT:
node = parse_ident(parser);
if (peektoktype(parser) == TOKEN_L_PAREN) {
node = parse_call(parser, node);
} else {
void *sym = symtab_lookup_symbol(parser->symtab, tok->constant.str);
if (sym == NULL) {
error("undefined symbol but use %s", tok->constant.str);
}
node->type = NT_TERM_IDENT;
node->syms.decl_node = sym;
goto END;
}
default:
return NULL;
}
poptok(parser);
END:
return node;
}
static struct ASTNode *parse_subexpression(struct Parser* parser, enum Precedence prec) {
enum TokenType ttype;
struct expr_prec_table_t* work;
struct ASTNode* left;
while (1) {
flushpeektok(parser);
ttype = peektoktype(parser);
work = &expr_table[ttype];
// FIXME
if (ttype == TOKEN_SEMICOLON || ttype == TOKEN_R_PAREN) {
break;
}
if (work == NULL || work->parser == NULL || work->ptype == PREFIX_PARSER) {
if (work->parser != NULL) {
left = work->parser(parser, NULL);
} else {
left = parse_primary_expression(parser);
}
} else if (work->ptype == INFIX_PARSER) {
if (work->parser == NULL)
break;
if (work->prec <= prec)
break;
left = work->parser(parser, left);
}
// assert(left != NULL);
}
return left;
}
struct ASTNode* parse_expr(struct Parser* parser) {
flushpeektok(parser);
enum TokenType ttype = peektoktype(parser);
switch (ttype) {
case TOKEN_NOT:
case TOKEN_AND:
case TOKEN_L_PAREN:
case TOKEN_MUL:
case TOKEN_ADD:
case TOKEN_SUB:
case TOKEN_BIT_NOT:
case TOKEN_AND_AND:
case TOKEN_CHAR_LITERAL:
case TOKEN_INT_LITERAL:
case TOKEN_STRING_LITERAL:
case TOKEN_ADD_ADD:
case TOKEN_SUB_SUB:
case TOKEN_SIZEOF:
case TOKEN_IDENT:
return parse_subexpression(parser, PREC_EXPRESSION);
default:
error("Want expr but not got %s", get_token_name(ttype));
break;
}
}