410 lines
15 KiB
C
410 lines
15 KiB
C
#include "../parser.h"
|
|
#include "ast.h"
|
|
#include "../symtab/symtab.h"
|
|
|
|
// Copy from `CParse`
|
|
/**
|
|
* Operator precedence classes
|
|
*/
|
|
enum Precedence {
|
|
PREC_BOTTOM,
|
|
PREC_EXPRESSION, /* , left to right */
|
|
PREC_ASSIGNMENT, /* = += -= *= /= %= <<= >>= &= ^= |= right to left */
|
|
PREC_CONDITIONAL, /* ?: right to left */
|
|
PREC_LOGICAL_OR, /* || left to right */
|
|
PREC_LOGICAL_AND, /* && left to right */
|
|
PREC_OR, /* | left to right */
|
|
PREC_XOR, /* ^ left to right */
|
|
PREC_AND, /* & left to right */
|
|
PREC_EQUALITY, /* == != left to right */
|
|
PREC_RELATIONAL, /* < <= > >= left to right */
|
|
PREC_SHIFT, /* << >> left to right */
|
|
PREC_ADDITIVE, /* + - left to right */
|
|
PREC_MULTIPLICATIVE, /* * / % left to right */
|
|
PREC_CAST, /* (type) right to left */
|
|
PREC_UNARY, /* ! ~ ++ -- + - * & sizeof right to left */
|
|
PREC_POSTFIX, /* () [] -> . left to right */
|
|
PREC_PRIMARY,
|
|
PREC_TOP
|
|
};
|
|
|
|
enum ParseType {
|
|
INFIX_PARSER,
|
|
PREFIX_PARSER,
|
|
};
|
|
|
|
static struct ASTNode *parse_subexpression(struct Parser* parser, enum Precedence prec);
|
|
|
|
static struct ASTNode* gen_node2(struct ASTNode* left, struct ASTNode* right,
|
|
enum ASTType type) {
|
|
struct ASTNode* node = new_ast_node();
|
|
node->type = type;
|
|
node->expr.left = left;
|
|
node->expr.right = right;
|
|
// switch (type) {
|
|
// case NT_ADD : printf("+ \n"); break; // (expr) + (expr)
|
|
// case NT_SUB : printf("- \n"); break; // (expr) - (expr)
|
|
// case NT_MUL : printf("* \n"); break; // (expr) * (expr)
|
|
// case NT_DIV : printf("/ \n"); break; // (expr) / (expr)
|
|
// case NT_MOD : printf("%%\n"); break; // (expr) % (expr)
|
|
// case NT_AND : printf("& \n"); break; // (expr) & (expr)
|
|
// case NT_OR : printf("| \n"); break; // (expr) | (expr)
|
|
// case NT_XOR : printf("^ \n"); break; // (expr) ^ (expr)
|
|
// case NT_L_SH : printf("<<\n"); break; // (expr) << (expr)
|
|
// case NT_R_SH : printf(">>\n"); break; // (expr) >> (expr)
|
|
// case NT_EQ : printf("==\n"); break; // (expr) == (expr)
|
|
// case NT_NEQ : printf("!=\n"); break; // (expr) != (expr)
|
|
// case NT_LE : printf("<=\n"); break; // (expr) <= (expr)
|
|
// case NT_GE : printf(">=\n"); break; // (expr) >= (expr)
|
|
// case NT_LT : printf("< \n"); break; // (expr) < (expr)
|
|
// case NT_GT : printf("> \n"); break; // (expr) > (expr)
|
|
// case NT_AND_AND : printf("&&\n"); break; // (expr) && (expr)
|
|
// case NT_OR_OR : printf("||\n"); break; // (expr) || (expr)
|
|
// case NT_NOT : printf("! \n"); break; // ! (expr)
|
|
// case NT_BIT_NOT : printf("~ \n"); break; // ~ (expr)
|
|
// case NT_COMMA : printf(", \n"); break; // expr, expr 逗号运算符
|
|
// case NT_ASSIGN : printf("= \n"); break; // (expr) = (expr)
|
|
// // case NT_COND : // (expr) ? (expr) : (expr)
|
|
// }
|
|
}
|
|
|
|
static struct ASTNode* parse_comma(struct Parser* parser, struct ASTNode* left) {
|
|
struct ASTNode* node = new_ast_node();
|
|
node->type = NT_COMMA;
|
|
node->expr.left = left;
|
|
node->expr.right = parse_subexpression(parser, PREC_EXPRESSION);
|
|
}
|
|
|
|
static struct ASTNode* parse_assign(struct Parser* parser, struct ASTNode* left) {
|
|
flushpeektok(parser);
|
|
enum TokenType ttype = peektoktype(parser);
|
|
poptok(parser);
|
|
struct ASTNode* node = new_ast_node();
|
|
node->type = NT_ASSIGN;
|
|
// saved left
|
|
node->expr.left = left;
|
|
enum Precedence next = PREC_ASSIGNMENT + 1;
|
|
switch (ttype) {
|
|
case TOKEN_ASSIGN :
|
|
left = parse_subexpression(parser, next);
|
|
break;
|
|
case TOKEN_ASSIGN_ADD :
|
|
left = gen_node2(left, parse_subexpression(parser, next), NT_ADD);
|
|
break;
|
|
case TOKEN_ASSIGN_SUB :
|
|
left = gen_node2(left, parse_subexpression(parser, next), NT_SUB);
|
|
break;
|
|
case TOKEN_ASSIGN_MUL :
|
|
left = gen_node2(left, parse_subexpression(parser, next), NT_MUL);
|
|
break;
|
|
case TOKEN_ASSIGN_DIV :
|
|
left = gen_node2(left, parse_subexpression(parser, next), NT_DIV);
|
|
break;
|
|
case TOKEN_ASSIGN_MOD :
|
|
left = gen_node2(left, parse_subexpression(parser, next), NT_MOD);
|
|
break;
|
|
case TOKEN_ASSIGN_L_SH :
|
|
left = gen_node2(left, parse_subexpression(parser, next), NT_L_SH);
|
|
break;
|
|
case TOKEN_ASSIGN_R_SH :
|
|
left = gen_node2(left, parse_subexpression(parser, next), NT_R_SH);
|
|
break;
|
|
case TOKEN_ASSIGN_AND :
|
|
left = gen_node2(left, parse_subexpression(parser, next), NT_AND);
|
|
break;
|
|
case TOKEN_ASSIGN_OR :
|
|
left = gen_node2(left, parse_subexpression(parser, next), NT_OR);
|
|
break;
|
|
case TOKEN_ASSIGN_XOR :
|
|
left = gen_node2(left, parse_subexpression(parser, next), NT_XOR);
|
|
break;
|
|
default:
|
|
error("unsupported operator");
|
|
break;
|
|
}
|
|
node->expr.right = left;
|
|
}
|
|
|
|
static struct ASTNode* parse_cmp(struct Parser* parser, struct ASTNode* left) {
|
|
flushpeektok(parser);
|
|
enum TokenType ttype = peektoktype(parser);
|
|
poptok(parser);
|
|
struct ASTNode* node = new_ast_node();
|
|
// saved left
|
|
node->expr.left = left;
|
|
switch (ttype) {
|
|
case TOKEN_EQ:
|
|
node->type = NT_EQ;
|
|
node->expr.right = parse_subexpression(parser, PREC_EQUALITY);
|
|
break;
|
|
case TOKEN_NEQ:
|
|
node->type = NT_NEQ;
|
|
node->expr.right = parse_subexpression(parser, PREC_EQUALITY);
|
|
break;
|
|
case TOKEN_LT:
|
|
node->type = NT_LT;
|
|
node->expr.right = parse_subexpression(parser, PREC_RELATIONAL);
|
|
break;
|
|
case TOKEN_GT:
|
|
node->type = NT_GT;
|
|
node->expr.right = parse_subexpression(parser, PREC_RELATIONAL);
|
|
break;
|
|
case TOKEN_LE:
|
|
node->type = NT_LE;
|
|
node->expr.right = parse_subexpression(parser, PREC_RELATIONAL);
|
|
break;
|
|
case TOKEN_GE:
|
|
node->type = NT_GE;
|
|
node->expr.right = parse_subexpression(parser, PREC_RELATIONAL);
|
|
break;
|
|
default:
|
|
error("invalid operator");
|
|
}
|
|
}
|
|
|
|
static struct ASTNode* parse_cal(struct Parser* parser, struct ASTNode* left) {
|
|
flushpeektok(parser);
|
|
enum TokenType ttype = peektoktype(parser);
|
|
poptok(parser);
|
|
struct ASTNode* node = new_ast_node();
|
|
node->expr.left = left;
|
|
switch (ttype) {
|
|
case TOKEN_OR_OR:
|
|
node->type = NT_OR_OR;
|
|
node->expr.right = parse_subexpression(parser, PREC_LOGICAL_OR);
|
|
break;
|
|
case TOKEN_AND_AND:
|
|
node->type = NT_AND_AND;
|
|
node->expr.right = parse_subexpression(parser, PREC_LOGICAL_AND);
|
|
break;
|
|
case TOKEN_OR:
|
|
node->type = NT_OR;
|
|
node->expr.right = parse_subexpression(parser, PREC_OR);
|
|
break;
|
|
case TOKEN_XOR:
|
|
node->type = NT_XOR;
|
|
node->expr.right = parse_subexpression(parser, PREC_XOR);
|
|
break;
|
|
case TOKEN_AND:
|
|
node->type = NT_AND;
|
|
node->expr.right = parse_subexpression(parser, PREC_AND);
|
|
break;
|
|
case TOKEN_L_SH:
|
|
node->type = NT_L_SH;
|
|
node->expr.right = parse_subexpression(parser, PREC_SHIFT);
|
|
break;
|
|
case TOKEN_R_SH:
|
|
node->type = NT_R_SH;
|
|
node->expr.right = parse_subexpression(parser, PREC_SHIFT);
|
|
break;
|
|
case TOKEN_ADD:
|
|
node->type = NT_ADD;
|
|
node->expr.right = parse_subexpression(parser, PREC_ADDITIVE);
|
|
break;
|
|
case TOKEN_SUB:
|
|
node->type = NT_SUB;
|
|
node->expr.right = parse_subexpression(parser, PREC_ADDITIVE);
|
|
break;
|
|
case TOKEN_MUL:
|
|
node->type = NT_MUL;
|
|
node->expr.right = parse_subexpression(parser, PREC_MULTIPLICATIVE);
|
|
break;
|
|
case TOKEN_DIV:
|
|
node->type = NT_DIV;
|
|
node->expr.right = parse_subexpression(parser, PREC_MULTIPLICATIVE);
|
|
break;
|
|
case TOKEN_MOD:
|
|
node->type = NT_MOD;
|
|
node->expr.right = parse_subexpression(parser, PREC_MULTIPLICATIVE);
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
return node;
|
|
}
|
|
|
|
|
|
// 新增函数调用解析
|
|
static struct ASTNode* parse_call(struct Parser* parser, struct ASTNode* ident) {
|
|
struct ASTNode* node = new_ast_node();
|
|
node->type = NT_TERM_CALL;
|
|
poptok(parser); // 跳过 '('
|
|
|
|
enum TokenType ttype;
|
|
// 解析参数列表
|
|
while ((ttype = peektoktype(parser)) != TOKEN_R_PAREN) {
|
|
// add_arg(node, parse_expr(parser));
|
|
if (ttype == TOKEN_COMMA) poptok(parser);
|
|
else poptok(parser);
|
|
}
|
|
poptok(parser); // 跳过 ')'
|
|
|
|
char* name = ident->syms.tok.constant.str;
|
|
void* sym = symtab_lookup_symbol(parser->symtab, name);
|
|
if (sym == NULL) {
|
|
error("function not decl %s", name);
|
|
}
|
|
node->call.name = name;
|
|
node->call.params = NULL;
|
|
node->call.func_decl = sym;
|
|
return node;
|
|
}
|
|
|
|
static struct ASTNode* parse_paren(struct Parser* parser, struct ASTNode* left) {
|
|
flushpeektok(parser);
|
|
enum TokenType ttype;
|
|
expecttok(parser, TOKEN_L_PAREN);
|
|
left = parse_subexpression(parser, PREC_EXPRESSION);
|
|
flushpeektok(parser);
|
|
expecttok(parser, TOKEN_R_PAREN);
|
|
return left;
|
|
}
|
|
|
|
typedef struct ASTNode* (*parse_expr_fun_t)(struct Parser*, struct ASTNode*);
|
|
static struct expr_prec_table_t {
|
|
parse_expr_fun_t parser;
|
|
enum Precedence prec;
|
|
enum ParseType ptype;
|
|
} expr_table [256] = {
|
|
[TOKEN_COMMA] = {parse_comma, PREC_EXPRESSION, INFIX_PARSER},
|
|
[TOKEN_ASSIGN] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
|
|
[TOKEN_ASSIGN_ADD] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
|
|
[TOKEN_ASSIGN_SUB] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
|
|
[TOKEN_ASSIGN_MUL] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
|
|
[TOKEN_ASSIGN_DIV] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
|
|
[TOKEN_ASSIGN_MOD] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
|
|
[TOKEN_ASSIGN_L_SH] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
|
|
[TOKEN_ASSIGN_R_SH] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
|
|
[TOKEN_ASSIGN_AND] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
|
|
[TOKEN_ASSIGN_OR] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
|
|
[TOKEN_ASSIGN_XOR] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
|
|
|
|
[TOKEN_OR_OR] = {parse_cal, PREC_LOGICAL_OR , INFIX_PARSER},
|
|
[TOKEN_AND_AND] = {parse_cal, PREC_LOGICAL_AND, INFIX_PARSER},
|
|
[TOKEN_OR] = {parse_cal, PREC_OR , INFIX_PARSER},
|
|
[TOKEN_XOR] = {parse_cal, PREC_XOR , INFIX_PARSER},
|
|
[TOKEN_AND] = {parse_cal, PREC_AND , INFIX_PARSER},
|
|
|
|
[TOKEN_EQ] = {parse_cmp, PREC_EQUALITY, INFIX_PARSER},
|
|
[TOKEN_NEQ] = {parse_cmp, PREC_EQUALITY, INFIX_PARSER},
|
|
[TOKEN_LT] = {parse_cmp, PREC_RELATIONAL, INFIX_PARSER},
|
|
[TOKEN_LE] = {parse_cmp, PREC_RELATIONAL, INFIX_PARSER},
|
|
[TOKEN_GT] = {parse_cmp, PREC_RELATIONAL, INFIX_PARSER},
|
|
[TOKEN_GE] = {parse_cmp, PREC_RELATIONAL, INFIX_PARSER},
|
|
|
|
[TOKEN_L_SH] = {parse_cal, PREC_SHIFT , INFIX_PARSER},
|
|
[TOKEN_R_SH] = {parse_cal, PREC_SHIFT , INFIX_PARSER},
|
|
[TOKEN_ADD] = {parse_cal, PREC_ADDITIVE , INFIX_PARSER},
|
|
[TOKEN_SUB] = {parse_cal, PREC_ADDITIVE , INFIX_PARSER},
|
|
[TOKEN_MUL] = {parse_cal, PREC_MULTIPLICATIVE , INFIX_PARSER},
|
|
[TOKEN_DIV] = {parse_cal, PREC_MULTIPLICATIVE , INFIX_PARSER},
|
|
[TOKEN_MOD] = {parse_cal, PREC_MULTIPLICATIVE , INFIX_PARSER},
|
|
|
|
[TOKEN_NOT] = {NULL, PREC_UNARY, PREFIX_PARSER},
|
|
[TOKEN_BIT_NOT] = {NULL, PREC_UNARY, PREFIX_PARSER},
|
|
[TOKEN_ADD_ADD] = {NULL, PREC_UNARY, PREFIX_PARSER},
|
|
[TOKEN_SUB_SUB] = {NULL, PREC_UNARY, PREFIX_PARSER},
|
|
// + - * & sizeof
|
|
|
|
[TOKEN_L_PAREN] = {parse_paren, PREC_POSTFIX, INFIX_PARSER},
|
|
};
|
|
|
|
static struct ASTNode *parse_primary_expression(struct Parser* parser) {
|
|
flushpeektok(parser);
|
|
|
|
struct Token* tok = peektok(parser);
|
|
struct ASTNode *node = new_ast_node();
|
|
node->type = NT_TERM_VAL;
|
|
node->syms.tok = *tok;
|
|
|
|
switch (tok->type) {
|
|
case TOKEN_INT_LITERAL:
|
|
// node->data.data_type = TYPE_INT;
|
|
break;
|
|
case TOKEN_FLOAT_LITERAL:
|
|
warn("float not supported");
|
|
break;
|
|
case TOKEN_CHAR_LITERAL:
|
|
// node->data.data_type = TYPE_CHAR;
|
|
break;
|
|
case TOKEN_STRING_LITERAL:
|
|
// node->data.data_type = TYPE_POINTER;
|
|
case TOKEN_IDENT:
|
|
node = parse_ident(parser);
|
|
if (peektoktype(parser) == TOKEN_L_PAREN) {
|
|
node = parse_call(parser, node);
|
|
} else {
|
|
void *sym = symtab_lookup_symbol(parser->symtab, tok->constant.str);
|
|
if (sym == NULL) {
|
|
error("undefined symbol but use %s", tok->constant.str);
|
|
}
|
|
node->type = NT_TERM_IDENT;
|
|
node->syms.decl_node = sym;
|
|
goto END;
|
|
}
|
|
default:
|
|
return NULL;
|
|
}
|
|
poptok(parser);
|
|
END:
|
|
return node;
|
|
}
|
|
|
|
static struct ASTNode *parse_subexpression(struct Parser* parser, enum Precedence prec) {
|
|
enum TokenType ttype;
|
|
struct expr_prec_table_t* work;
|
|
struct ASTNode* left;
|
|
|
|
while (1) {
|
|
flushpeektok(parser);
|
|
ttype = peektoktype(parser);
|
|
work = &expr_table[ttype];
|
|
// FIXME
|
|
if (ttype == TOKEN_SEMICOLON || ttype == TOKEN_R_PAREN) {
|
|
break;
|
|
}
|
|
if (work == NULL || work->parser == NULL || work->ptype == PREFIX_PARSER) {
|
|
if (work->parser != NULL) {
|
|
left = work->parser(parser, NULL);
|
|
} else {
|
|
left = parse_primary_expression(parser);
|
|
}
|
|
} else if (work->ptype == INFIX_PARSER) {
|
|
if (work->parser == NULL)
|
|
break;
|
|
if (work->prec <= prec)
|
|
break;
|
|
left = work->parser(parser, left);
|
|
}
|
|
// assert(left != NULL);
|
|
}
|
|
|
|
return left;
|
|
}
|
|
|
|
struct ASTNode* parse_expr(struct Parser* parser) {
|
|
flushpeektok(parser);
|
|
enum TokenType ttype = peektoktype(parser);
|
|
switch (ttype) {
|
|
case TOKEN_NOT:
|
|
case TOKEN_AND:
|
|
case TOKEN_L_PAREN:
|
|
case TOKEN_MUL:
|
|
case TOKEN_ADD:
|
|
case TOKEN_SUB:
|
|
case TOKEN_BIT_NOT:
|
|
case TOKEN_AND_AND:
|
|
case TOKEN_CHAR_LITERAL:
|
|
case TOKEN_INT_LITERAL:
|
|
case TOKEN_STRING_LITERAL:
|
|
case TOKEN_ADD_ADD:
|
|
case TOKEN_SUB_SUB:
|
|
case TOKEN_SIZEOF:
|
|
case TOKEN_IDENT:
|
|
return parse_subexpression(parser, PREC_EXPRESSION);
|
|
default:
|
|
error("Want expr but not got %s", get_token_name(ttype));
|
|
break;
|
|
}
|
|
}
|