426 lines
15 KiB
C
426 lines
15 KiB
C
#include "../parser.h"
|
|
#include "ast.h"
|
|
#include "../symtab/symtab.h"
|
|
|
|
// Copy from `CParse`
|
|
/**
|
|
* Operator precedence classes
|
|
*/
|
|
enum Precedence {
|
|
PREC_BOTTOM,
|
|
PREC_EXPRESSION, /* , left to right */
|
|
PREC_ASSIGNMENT, /* = += -= *= /= %= <<= >>= &= ^= |= right to left */
|
|
PREC_CONDITIONAL, /* ?: right to left */
|
|
PREC_LOGICAL_OR, /* || left to right */
|
|
PREC_LOGICAL_AND, /* && left to right */
|
|
PREC_OR, /* | left to right */
|
|
PREC_XOR, /* ^ left to right */
|
|
PREC_AND, /* & left to right */
|
|
PREC_EQUALITY, /* == != left to right */
|
|
PREC_RELATIONAL, /* < <= > >= left to right */
|
|
PREC_SHIFT, /* << >> left to right */
|
|
PREC_ADDITIVE, /* + - left to right */
|
|
PREC_MULTIPLICATIVE, /* * / % left to right */
|
|
PREC_CAST, /* (type) right to left */
|
|
PREC_UNARY, /* ! ~ ++ -- + - * & sizeof right to left */
|
|
PREC_POSTFIX, /* () [] -> . left to right */
|
|
PREC_PRIMARY,
|
|
PREC_TOP
|
|
};
|
|
|
|
enum ParseType {
|
|
INFIX_PARSER,
|
|
PREFIX_PARSER,
|
|
};
|
|
|
|
static ast_node_t *parse_subexpression(tok_buf_t* tokbuf, symtab_t *symtab, enum Precedence prec);
|
|
#define NEXT(prec) parse_subexpression(tokbuf, symtab, prec)
|
|
|
|
static ast_node_t* gen_node2(ast_node_t* left, ast_node_t* right,
|
|
ast_type_t type) {
|
|
ast_node_t* node = new_ast_node();
|
|
node->type = type;
|
|
node->expr.left = left;
|
|
node->expr.right = right;
|
|
return node;
|
|
// FIXME
|
|
|
|
// switch (type) {
|
|
// case NT_ADD : printf("+ \n"); break; // (expr) + (expr)
|
|
// case NT_SUB : printf("- \n"); break; // (expr) - (expr)
|
|
// case NT_MUL : printf("* \n"); break; // (expr) * (expr)
|
|
// case NT_DIV : printf("/ \n"); break; // (expr) / (expr)
|
|
// case NT_MOD : printf("%%\n"); break; // (expr) % (expr)
|
|
// case NT_AND : printf("& \n"); break; // (expr) & (expr)
|
|
// case NT_OR : printf("| \n"); break; // (expr) | (expr)
|
|
// case NT_XOR : printf("^ \n"); break; // (expr) ^ (expr)
|
|
// case NT_L_SH : printf("<<\n"); break; // (expr) << (expr)
|
|
// case NT_R_SH : printf(">>\n"); break; // (expr) >> (expr)
|
|
// case NT_EQ : printf("==\n"); break; // (expr) == (expr)
|
|
// case NT_NEQ : printf("!=\n"); break; // (expr) != (expr)
|
|
// case NT_LE : printf("<=\n"); break; // (expr) <= (expr)
|
|
// case NT_GE : printf(">=\n"); break; // (expr) >= (expr)
|
|
// case NT_LT : printf("< \n"); break; // (expr) < (expr)
|
|
// case NT_GT : printf("> \n"); break; // (expr) > (expr)
|
|
// case NT_AND_AND : printf("&&\n"); break; // (expr) && (expr)
|
|
// case NT_OR_OR : printf("||\n"); break; // (expr) || (expr)
|
|
// case NT_NOT : printf("! \n"); break; // ! (expr)
|
|
// case NT_BIT_NOT : printf("~ \n"); break; // ~ (expr)
|
|
// case NT_COMMA : printf(", \n"); break; // expr, expr 逗号运算符
|
|
// case NT_ASSIGN : printf("= \n"); break; // (expr) = (expr)
|
|
// // case NT_COND : // (expr) ? (expr) : (expr)
|
|
// }
|
|
}
|
|
|
|
static ast_node_t* parse_comma(tok_buf_t* tokbuf, symtab_t *symtab, ast_node_t* left) {
|
|
ast_node_t* node = new_ast_node();
|
|
node->type = NT_COMMA;
|
|
node->expr.left = left;
|
|
node->expr.right = NEXT(PREC_EXPRESSION);
|
|
return node;
|
|
}
|
|
|
|
static ast_node_t* parse_assign(tok_buf_t* tokbuf, symtab_t *symtab, ast_node_t* left) {
|
|
flush_peek_tok(tokbuf);
|
|
tok_type_t ttype = peek_tok_type(tokbuf);
|
|
pop_tok(tokbuf);
|
|
ast_node_t* node = new_ast_node();
|
|
node->type = NT_ASSIGN;
|
|
// saved left
|
|
node->expr.left = left;
|
|
enum Precedence next = PREC_ASSIGNMENT + 1;
|
|
switch (ttype) {
|
|
case TOKEN_ASSIGN :
|
|
left = NEXT(next);
|
|
break;
|
|
case TOKEN_ASSIGN_ADD :
|
|
left = gen_node2(left, NEXT(next), NT_ADD);
|
|
break;
|
|
case TOKEN_ASSIGN_SUB :
|
|
left = gen_node2(left, NEXT(next), NT_SUB);
|
|
break;
|
|
case TOKEN_ASSIGN_MUL :
|
|
left = gen_node2(left, NEXT(next), NT_MUL);
|
|
break;
|
|
case TOKEN_ASSIGN_DIV :
|
|
left = gen_node2(left, NEXT(next), NT_DIV);
|
|
break;
|
|
case TOKEN_ASSIGN_MOD :
|
|
left = gen_node2(left, NEXT(next), NT_MOD);
|
|
break;
|
|
case TOKEN_ASSIGN_L_SH :
|
|
left = gen_node2(left, NEXT(next), NT_L_SH);
|
|
break;
|
|
case TOKEN_ASSIGN_R_SH :
|
|
left = gen_node2(left, NEXT(next), NT_R_SH);
|
|
break;
|
|
case TOKEN_ASSIGN_AND :
|
|
left = gen_node2(left, NEXT(next), NT_AND);
|
|
break;
|
|
case TOKEN_ASSIGN_OR :
|
|
left = gen_node2(left, NEXT(next), NT_OR);
|
|
break;
|
|
case TOKEN_ASSIGN_XOR :
|
|
left = gen_node2(left, NEXT(next), NT_XOR);
|
|
break;
|
|
default:
|
|
error("unsupported operator");
|
|
break;
|
|
}
|
|
node->expr.right = left;
|
|
return node;
|
|
}
|
|
|
|
static ast_node_t* parse_cmp(tok_buf_t* tokbuf, symtab_t *symtab, ast_node_t* left) {
|
|
flush_peek_tok(tokbuf);
|
|
tok_type_t ttype = peek_tok_type(tokbuf);
|
|
pop_tok(tokbuf);
|
|
ast_node_t* node = new_ast_node();
|
|
// saved left
|
|
node->expr.left = left;
|
|
switch (ttype) {
|
|
case TOKEN_EQ:
|
|
node->type = NT_EQ;
|
|
node->expr.right = NEXT(PREC_EQUALITY);
|
|
break;
|
|
case TOKEN_NEQ:
|
|
node->type = NT_NEQ;
|
|
node->expr.right = NEXT(PREC_EQUALITY);
|
|
break;
|
|
case TOKEN_LT:
|
|
node->type = NT_LT;
|
|
node->expr.right = NEXT(PREC_RELATIONAL);
|
|
break;
|
|
case TOKEN_GT:
|
|
node->type = NT_GT;
|
|
node->expr.right = NEXT(PREC_RELATIONAL);
|
|
break;
|
|
case TOKEN_LE:
|
|
node->type = NT_LE;
|
|
node->expr.right = NEXT(PREC_RELATIONAL);
|
|
break;
|
|
case TOKEN_GE:
|
|
node->type = NT_GE;
|
|
node->expr.right = NEXT(PREC_RELATIONAL);
|
|
break;
|
|
default:
|
|
error("invalid operator");
|
|
}
|
|
return node;
|
|
}
|
|
|
|
static ast_node_t* parse_cal(tok_buf_t* tokbuf, symtab_t *symtab, ast_node_t* left) {
|
|
flush_peek_tok(tokbuf);
|
|
tok_type_t ttype = peek_tok_type(tokbuf);
|
|
pop_tok(tokbuf);
|
|
ast_node_t* node = new_ast_node();
|
|
node->expr.left = left;
|
|
switch (ttype) {
|
|
case TOKEN_OR_OR:
|
|
node->type = NT_OR_OR;
|
|
node->expr.right = NEXT(PREC_LOGICAL_OR);
|
|
break;
|
|
case TOKEN_AND_AND:
|
|
node->type = NT_AND_AND;
|
|
node->expr.right = NEXT(PREC_LOGICAL_AND);
|
|
break;
|
|
case TOKEN_OR:
|
|
node->type = NT_OR;
|
|
node->expr.right = NEXT(PREC_OR);
|
|
break;
|
|
case TOKEN_XOR:
|
|
node->type = NT_XOR;
|
|
node->expr.right = NEXT(PREC_XOR);
|
|
break;
|
|
case TOKEN_AND:
|
|
node->type = NT_AND;
|
|
node->expr.right = NEXT(PREC_AND);
|
|
break;
|
|
case TOKEN_L_SH:
|
|
node->type = NT_L_SH;
|
|
node->expr.right = NEXT(PREC_SHIFT);
|
|
break;
|
|
case TOKEN_R_SH:
|
|
node->type = NT_R_SH;
|
|
node->expr.right = NEXT(PREC_SHIFT);
|
|
break;
|
|
case TOKEN_ADD:
|
|
node->type = NT_ADD;
|
|
node->expr.right = NEXT(PREC_ADDITIVE);
|
|
break;
|
|
case TOKEN_SUB:
|
|
node->type = NT_SUB;
|
|
node->expr.right = NEXT(PREC_ADDITIVE);
|
|
break;
|
|
case TOKEN_MUL:
|
|
node->type = NT_MUL;
|
|
node->expr.right = NEXT(PREC_MULTIPLICATIVE);
|
|
break;
|
|
case TOKEN_DIV:
|
|
node->type = NT_DIV;
|
|
node->expr.right = NEXT(PREC_MULTIPLICATIVE);
|
|
break;
|
|
case TOKEN_MOD:
|
|
node->type = NT_MOD;
|
|
node->expr.right = NEXT(PREC_MULTIPLICATIVE);
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
return node;
|
|
}
|
|
|
|
static ast_node_t* parse_call(tok_buf_t* tokbuf, symtab_t *symtab, ast_node_t* ident) {
|
|
ast_node_t* node = new_ast_node();
|
|
node->type = NT_TERM_CALL;
|
|
node->call.name = ident;
|
|
node->call.params = new_ast_node();
|
|
vector_init(node->call.params->params.params);
|
|
pop_tok(tokbuf); // 跳过 '('
|
|
|
|
tok_type_t ttype;
|
|
while (1) {
|
|
flush_peek_tok(tokbuf);
|
|
ttype = peek_tok_type(tokbuf);
|
|
if (ttype == TOKEN_R_PAREN) {
|
|
break;
|
|
}
|
|
ast_node_t* param = NEXT(PREC_EXPRESSION);
|
|
vector_push(node->call.params->params.params, param);
|
|
flush_peek_tok(tokbuf);
|
|
ttype = peek_tok_type(tokbuf);
|
|
if (ttype == TOKEN_COMMA) pop_tok(tokbuf);
|
|
}
|
|
pop_tok(tokbuf); // 跳过 ')'
|
|
|
|
const char* name = ident->syms.tok.val.str;
|
|
ast_node_t* sym = symtab_lookup_symbol(symtab, name);
|
|
// TODO check func is match
|
|
if (sym == NULL || sym->type != NT_DECL_FUNC) {
|
|
error("function not decl %s", name);
|
|
}
|
|
node->call.name = ident;
|
|
node->call.func_decl = sym;
|
|
return node;
|
|
}
|
|
|
|
static ast_node_t* parse_paren(tok_buf_t* tokbuf, symtab_t *symtab, ast_node_t* left) {
|
|
flush_peek_tok(tokbuf);
|
|
expect_pop_tok(tokbuf, TOKEN_L_PAREN);
|
|
left = NEXT(PREC_EXPRESSION);
|
|
flush_peek_tok(tokbuf);
|
|
expect_pop_tok(tokbuf, TOKEN_R_PAREN);
|
|
return left;
|
|
}
|
|
|
|
typedef ast_node_t* (*parse_expr_fun_t)(tok_buf_t*, symtab_t* , ast_node_t*);
|
|
static struct expr_prec_table_t {
|
|
parse_expr_fun_t parser;
|
|
enum Precedence prec;
|
|
enum ParseType ptype;
|
|
} expr_table [256] = {
|
|
[TOKEN_COMMA] = {parse_comma, PREC_EXPRESSION, INFIX_PARSER},
|
|
[TOKEN_ASSIGN] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
|
|
[TOKEN_ASSIGN_ADD] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
|
|
[TOKEN_ASSIGN_SUB] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
|
|
[TOKEN_ASSIGN_MUL] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
|
|
[TOKEN_ASSIGN_DIV] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
|
|
[TOKEN_ASSIGN_MOD] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
|
|
[TOKEN_ASSIGN_L_SH] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
|
|
[TOKEN_ASSIGN_R_SH] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
|
|
[TOKEN_ASSIGN_AND] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
|
|
[TOKEN_ASSIGN_OR] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
|
|
[TOKEN_ASSIGN_XOR] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
|
|
|
|
[TOKEN_OR_OR] = {parse_cal, PREC_LOGICAL_OR , INFIX_PARSER},
|
|
[TOKEN_AND_AND] = {parse_cal, PREC_LOGICAL_AND, INFIX_PARSER},
|
|
[TOKEN_OR] = {parse_cal, PREC_OR , INFIX_PARSER},
|
|
[TOKEN_XOR] = {parse_cal, PREC_XOR , INFIX_PARSER},
|
|
[TOKEN_AND] = {parse_cal, PREC_AND , INFIX_PARSER},
|
|
|
|
[TOKEN_EQ] = {parse_cmp, PREC_EQUALITY, INFIX_PARSER},
|
|
[TOKEN_NEQ] = {parse_cmp, PREC_EQUALITY, INFIX_PARSER},
|
|
[TOKEN_LT] = {parse_cmp, PREC_RELATIONAL, INFIX_PARSER},
|
|
[TOKEN_LE] = {parse_cmp, PREC_RELATIONAL, INFIX_PARSER},
|
|
[TOKEN_GT] = {parse_cmp, PREC_RELATIONAL, INFIX_PARSER},
|
|
[TOKEN_GE] = {parse_cmp, PREC_RELATIONAL, INFIX_PARSER},
|
|
|
|
[TOKEN_L_SH] = {parse_cal, PREC_SHIFT , INFIX_PARSER},
|
|
[TOKEN_R_SH] = {parse_cal, PREC_SHIFT , INFIX_PARSER},
|
|
[TOKEN_ADD] = {parse_cal, PREC_ADDITIVE , INFIX_PARSER},
|
|
[TOKEN_SUB] = {parse_cal, PREC_ADDITIVE , INFIX_PARSER},
|
|
[TOKEN_MUL] = {parse_cal, PREC_MULTIPLICATIVE , INFIX_PARSER},
|
|
[TOKEN_DIV] = {parse_cal, PREC_MULTIPLICATIVE , INFIX_PARSER},
|
|
[TOKEN_MOD] = {parse_cal, PREC_MULTIPLICATIVE , INFIX_PARSER},
|
|
|
|
[TOKEN_NOT] = {NULL, PREC_UNARY, PREFIX_PARSER},
|
|
[TOKEN_BIT_NOT] = {NULL, PREC_UNARY, PREFIX_PARSER},
|
|
[TOKEN_ADD_ADD] = {NULL, PREC_UNARY, PREFIX_PARSER},
|
|
[TOKEN_SUB_SUB] = {NULL, PREC_UNARY, PREFIX_PARSER},
|
|
// + - * & sizeof
|
|
|
|
[TOKEN_L_PAREN] = {parse_paren, PREC_POSTFIX, INFIX_PARSER},
|
|
};
|
|
|
|
static ast_node_t *parse_primary_expression(tok_buf_t* tokbuf, symtab_t *symtab) {
|
|
flush_peek_tok(tokbuf);
|
|
|
|
tok_t* tok = peek_tok(tokbuf);
|
|
ast_node_t *node = new_ast_node();
|
|
node->type = NT_TERM_VAL;
|
|
node->syms.tok = *tok;
|
|
|
|
switch (tok->type) {
|
|
case TOKEN_INT_LITERAL:
|
|
// node->data.data_type = TYPE_INT;
|
|
break;
|
|
case TOKEN_FLOAT_LITERAL:
|
|
warn("float not supported");
|
|
break;
|
|
case TOKEN_CHAR_LITERAL:
|
|
// node->data.data_type = TYPE_CHAR;
|
|
break;
|
|
case TOKEN_STRING_LITERAL:
|
|
// node->data.data_type = TYPE_POINTER;
|
|
case TOKEN_IDENT:
|
|
node = expect_pop_ident(tokbuf);
|
|
tok_type_t ttype = peek_tok_type(tokbuf);
|
|
if (ttype == TOKEN_L_PAREN) {
|
|
node = parse_call(tokbuf, symtab, node);
|
|
} else {
|
|
void *sym = symtab_lookup_symbol(symtab, tok->val.str);
|
|
if (sym == NULL) {
|
|
error("undefined symbol but use %s", tok->val.str);
|
|
}
|
|
node->type = NT_TERM_IDENT;
|
|
node->syms.decl_node = sym;
|
|
}
|
|
goto END;
|
|
default:
|
|
return NULL;
|
|
}
|
|
pop_tok(tokbuf);
|
|
END:
|
|
return node;
|
|
}
|
|
|
|
static ast_node_t *parse_subexpression(tok_buf_t* tokbuf, symtab_t *symtab, enum Precedence prec) {
|
|
tok_type_t ttype;
|
|
struct expr_prec_table_t* work;
|
|
ast_node_t* left;
|
|
|
|
while (1) {
|
|
flush_peek_tok(tokbuf);
|
|
ttype = peek_tok_type(tokbuf);
|
|
work = &expr_table[ttype];
|
|
// FIXME
|
|
if (ttype == TOKEN_SEMICOLON || ttype == TOKEN_R_PAREN) {
|
|
break;
|
|
}
|
|
if (work == NULL || work->parser == NULL || work->ptype == PREFIX_PARSER) {
|
|
if (work->parser != NULL) {
|
|
left = work->parser(tokbuf, symtab, NULL);
|
|
} else {
|
|
left = parse_primary_expression(tokbuf, symtab);
|
|
}
|
|
} else if (work->ptype == INFIX_PARSER) {
|
|
if (work->parser == NULL)
|
|
break;
|
|
if (work->prec <= prec)
|
|
break;
|
|
left = work->parser(tokbuf, symtab, left);
|
|
}
|
|
// assert(left != NULL);
|
|
}
|
|
|
|
return left;
|
|
}
|
|
|
|
ast_node_t* parse_expr(parser_t* parser) {
|
|
tok_buf_t* tokbuf = &(parser->tokbuf);
|
|
symtab_t *symtab = parser->symtab;
|
|
flush_peek_tok(tokbuf);
|
|
tok_type_t ttype = peek_tok_type(tokbuf);
|
|
switch (ttype) {
|
|
case TOKEN_NOT:
|
|
case TOKEN_AND:
|
|
case TOKEN_L_PAREN:
|
|
case TOKEN_MUL:
|
|
case TOKEN_ADD:
|
|
case TOKEN_SUB:
|
|
case TOKEN_BIT_NOT:
|
|
case TOKEN_AND_AND:
|
|
case TOKEN_CHAR_LITERAL:
|
|
case TOKEN_INT_LITERAL:
|
|
case TOKEN_STRING_LITERAL:
|
|
case TOKEN_ADD_ADD:
|
|
case TOKEN_SUB_SUB:
|
|
case TOKEN_SIZEOF:
|
|
case TOKEN_IDENT:
|
|
return NEXT(PREC_EXPRESSION);
|
|
default:
|
|
error("Want expr but not got %s", get_tok_name(ttype));
|
|
break;
|
|
}
|
|
}
|