#include "../parser.h" #include "ast.h" #include "../symtab/symtab.h" // Copy from `CParse` /** * Operator precedence classes */ enum Precedence { PREC_BOTTOM, PREC_EXPRESSION, /* , left to right */ PREC_ASSIGNMENT, /* = += -= *= /= %= <<= >>= &= ^= |= right to left */ PREC_CONDITIONAL, /* ?: right to left */ PREC_LOGICAL_OR, /* || left to right */ PREC_LOGICAL_AND, /* && left to right */ PREC_OR, /* | left to right */ PREC_XOR, /* ^ left to right */ PREC_AND, /* & left to right */ PREC_EQUALITY, /* == != left to right */ PREC_RELATIONAL, /* < <= > >= left to right */ PREC_SHIFT, /* << >> left to right */ PREC_ADDITIVE, /* + - left to right */ PREC_MULTIPLICATIVE, /* * / % left to right */ PREC_CAST, /* (type) right to left */ PREC_UNARY, /* ! ~ ++ -- + - * & sizeof right to left */ PREC_POSTFIX, /* () [] -> . left to right */ PREC_PRIMARY, PREC_TOP }; enum ParseType { INFIX_PARSER, PREFIX_PARSER, }; static struct ASTNode *parse_subexpression(struct Parser* parser, enum Precedence prec); static struct ASTNode* gen_node2(struct ASTNode* left, struct ASTNode* right, enum ASTType type) { struct ASTNode* node = new_ast_node(); node->type = type; node->expr.left = left; node->expr.right = right; // switch (type) { // case NT_ADD : printf("+ \n"); break; // (expr) + (expr) // case NT_SUB : printf("- \n"); break; // (expr) - (expr) // case NT_MUL : printf("* \n"); break; // (expr) * (expr) // case NT_DIV : printf("/ \n"); break; // (expr) / (expr) // case NT_MOD : printf("%%\n"); break; // (expr) % (expr) // case NT_AND : printf("& \n"); break; // (expr) & (expr) // case NT_OR : printf("| \n"); break; // (expr) | (expr) // case NT_XOR : printf("^ \n"); break; // (expr) ^ (expr) // case NT_L_SH : printf("<<\n"); break; // (expr) << (expr) // case NT_R_SH : printf(">>\n"); break; // (expr) >> (expr) // case NT_EQ : printf("==\n"); break; // (expr) == (expr) // case NT_NEQ : printf("!=\n"); break; // (expr) != (expr) // case NT_LE : printf("<=\n"); break; // (expr) <= (expr) // case NT_GE : printf(">=\n"); break; // (expr) >= (expr) // case NT_LT : printf("< \n"); break; // (expr) < (expr) // case NT_GT : printf("> \n"); break; // (expr) > (expr) // case NT_AND_AND : printf("&&\n"); break; // (expr) && (expr) // case NT_OR_OR : printf("||\n"); break; // (expr) || (expr) // case NT_NOT : printf("! \n"); break; // ! (expr) // case NT_BIT_NOT : printf("~ \n"); break; // ~ (expr) // case NT_COMMA : printf(", \n"); break; // expr, expr 逗号运算符 // case NT_ASSIGN : printf("= \n"); break; // (expr) = (expr) // // case NT_COND : // (expr) ? (expr) : (expr) // } } static struct ASTNode* parse_comma(struct Parser* parser, struct ASTNode* left) { struct ASTNode* node = new_ast_node(); node->type = NT_COMMA; node->expr.left = left; node->expr.right = parse_subexpression(parser, PREC_EXPRESSION); } static struct ASTNode* parse_assign(struct Parser* parser, struct ASTNode* left) { flushpeektok(parser); enum TokenType ttype = peektoktype(parser); poptok(parser); struct ASTNode* node = new_ast_node(); node->type = NT_ASSIGN; // saved left node->expr.left = left; enum Precedence next = PREC_ASSIGNMENT + 1; switch (ttype) { case TOKEN_ASSIGN : left = parse_subexpression(parser, next); break; case TOKEN_ASSIGN_ADD : left = gen_node2(left, parse_subexpression(parser, next), NT_ADD); break; case TOKEN_ASSIGN_SUB : left = gen_node2(left, parse_subexpression(parser, next), NT_SUB); break; case TOKEN_ASSIGN_MUL : left = gen_node2(left, parse_subexpression(parser, next), NT_MUL); break; case TOKEN_ASSIGN_DIV : left = gen_node2(left, parse_subexpression(parser, next), NT_DIV); break; case TOKEN_ASSIGN_MOD : left = gen_node2(left, parse_subexpression(parser, next), NT_MOD); break; case TOKEN_ASSIGN_L_SH : left = gen_node2(left, parse_subexpression(parser, next), NT_L_SH); break; case TOKEN_ASSIGN_R_SH : left = gen_node2(left, parse_subexpression(parser, next), NT_R_SH); break; case TOKEN_ASSIGN_AND : left = gen_node2(left, parse_subexpression(parser, next), NT_AND); break; case TOKEN_ASSIGN_OR : left = gen_node2(left, parse_subexpression(parser, next), NT_OR); break; case TOKEN_ASSIGN_XOR : left = gen_node2(left, parse_subexpression(parser, next), NT_XOR); break; default: error("unsupported operator"); break; } node->expr.right = left; } static struct ASTNode* parse_cmp(struct Parser* parser, struct ASTNode* left) { flushpeektok(parser); enum TokenType ttype = peektoktype(parser); poptok(parser); struct ASTNode* node = new_ast_node(); // saved left node->expr.left = left; switch (ttype) { case TOKEN_EQ: node->type = NT_EQ; node->expr.right = parse_subexpression(parser, PREC_EQUALITY); break; case TOKEN_NEQ: node->type = NT_NEQ; node->expr.right = parse_subexpression(parser, PREC_EQUALITY); break; case TOKEN_LT: node->type = NT_LT; node->expr.right = parse_subexpression(parser, PREC_RELATIONAL); break; case TOKEN_GT: node->type = NT_GT; node->expr.right = parse_subexpression(parser, PREC_RELATIONAL); break; case TOKEN_LE: node->type = NT_LE; node->expr.right = parse_subexpression(parser, PREC_RELATIONAL); break; case TOKEN_GE: node->type = NT_GE; node->expr.right = parse_subexpression(parser, PREC_RELATIONAL); break; default: error("invalid operator"); } } static struct ASTNode* parse_cal(struct Parser* parser, struct ASTNode* left) { flushpeektok(parser); enum TokenType ttype = peektoktype(parser); poptok(parser); struct ASTNode* node = new_ast_node(); node->expr.left = left; switch (ttype) { case TOKEN_OR_OR: node->type = NT_OR_OR; node->expr.right = parse_subexpression(parser, PREC_LOGICAL_OR); break; case TOKEN_AND_AND: node->type = NT_AND_AND; node->expr.right = parse_subexpression(parser, PREC_LOGICAL_AND); break; case TOKEN_OR: node->type = NT_OR; node->expr.right = parse_subexpression(parser, PREC_OR); break; case TOKEN_XOR: node->type = NT_XOR; node->expr.right = parse_subexpression(parser, PREC_XOR); break; case TOKEN_AND: node->type = NT_AND; node->expr.right = parse_subexpression(parser, PREC_AND); break; case TOKEN_L_SH: node->type = NT_L_SH; node->expr.right = parse_subexpression(parser, PREC_SHIFT); break; case TOKEN_R_SH: node->type = NT_R_SH; node->expr.right = parse_subexpression(parser, PREC_SHIFT); break; case TOKEN_ADD: node->type = NT_ADD; node->expr.right = parse_subexpression(parser, PREC_ADDITIVE); break; case TOKEN_SUB: node->type = NT_SUB; node->expr.right = parse_subexpression(parser, PREC_ADDITIVE); break; case TOKEN_MUL: node->type = NT_MUL; node->expr.right = parse_subexpression(parser, PREC_MULTIPLICATIVE); break; case TOKEN_DIV: node->type = NT_DIV; node->expr.right = parse_subexpression(parser, PREC_MULTIPLICATIVE); break; case TOKEN_MOD: node->type = NT_MOD; node->expr.right = parse_subexpression(parser, PREC_MULTIPLICATIVE); break; default: break; } return node; } // 新增函数调用解析 static struct ASTNode* parse_call(struct Parser* parser, struct ASTNode* ident) { struct ASTNode* node = new_ast_node(); node->type = NT_TERM_CALL; poptok(parser); // 跳过 '(' enum TokenType ttype; // 解析参数列表 while ((ttype = peektoktype(parser)) != TOKEN_R_PAREN) { // add_arg(node, parse_expr(parser)); if (ttype == TOKEN_COMMA) poptok(parser); else poptok(parser); } poptok(parser); // 跳过 ')' char* name = ident->syms.tok.constant.str; void* sym = symtab_lookup_symbol(parser->symtab, name); if (sym == NULL) { error("function not decl %s", name); } node->call.name = name; node->call.params = NULL; node->call.func_decl = sym; return node; } static struct ASTNode* parse_paren(struct Parser* parser, struct ASTNode* left) { flushpeektok(parser); enum TokenType ttype; expecttok(parser, TOKEN_L_PAREN); left = parse_subexpression(parser, PREC_EXPRESSION); flushpeektok(parser); expecttok(parser, TOKEN_R_PAREN); return left; } typedef struct ASTNode* (*parse_expr_fun_t)(struct Parser*, struct ASTNode*); static struct expr_prec_table_t { parse_expr_fun_t parser; enum Precedence prec; enum ParseType ptype; } expr_table [256] = { [TOKEN_COMMA] = {parse_comma, PREC_EXPRESSION, INFIX_PARSER}, [TOKEN_ASSIGN] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER}, [TOKEN_ASSIGN_ADD] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER}, [TOKEN_ASSIGN_SUB] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER}, [TOKEN_ASSIGN_MUL] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER}, [TOKEN_ASSIGN_DIV] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER}, [TOKEN_ASSIGN_MOD] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER}, [TOKEN_ASSIGN_L_SH] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER}, [TOKEN_ASSIGN_R_SH] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER}, [TOKEN_ASSIGN_AND] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER}, [TOKEN_ASSIGN_OR] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER}, [TOKEN_ASSIGN_XOR] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER}, [TOKEN_OR_OR] = {parse_cal, PREC_LOGICAL_OR , INFIX_PARSER}, [TOKEN_AND_AND] = {parse_cal, PREC_LOGICAL_AND, INFIX_PARSER}, [TOKEN_OR] = {parse_cal, PREC_OR , INFIX_PARSER}, [TOKEN_XOR] = {parse_cal, PREC_XOR , INFIX_PARSER}, [TOKEN_AND] = {parse_cal, PREC_AND , INFIX_PARSER}, [TOKEN_EQ] = {parse_cmp, PREC_EQUALITY, INFIX_PARSER}, [TOKEN_NEQ] = {parse_cmp, PREC_EQUALITY, INFIX_PARSER}, [TOKEN_LT] = {parse_cmp, PREC_RELATIONAL, INFIX_PARSER}, [TOKEN_LE] = {parse_cmp, PREC_RELATIONAL, INFIX_PARSER}, [TOKEN_GT] = {parse_cmp, PREC_RELATIONAL, INFIX_PARSER}, [TOKEN_GE] = {parse_cmp, PREC_RELATIONAL, INFIX_PARSER}, [TOKEN_L_SH] = {parse_cal, PREC_SHIFT , INFIX_PARSER}, [TOKEN_R_SH] = {parse_cal, PREC_SHIFT , INFIX_PARSER}, [TOKEN_ADD] = {parse_cal, PREC_ADDITIVE , INFIX_PARSER}, [TOKEN_SUB] = {parse_cal, PREC_ADDITIVE , INFIX_PARSER}, [TOKEN_MUL] = {parse_cal, PREC_MULTIPLICATIVE , INFIX_PARSER}, [TOKEN_DIV] = {parse_cal, PREC_MULTIPLICATIVE , INFIX_PARSER}, [TOKEN_MOD] = {parse_cal, PREC_MULTIPLICATIVE , INFIX_PARSER}, [TOKEN_NOT] = {NULL, PREC_UNARY, PREFIX_PARSER}, [TOKEN_BIT_NOT] = {NULL, PREC_UNARY, PREFIX_PARSER}, [TOKEN_ADD_ADD] = {NULL, PREC_UNARY, PREFIX_PARSER}, [TOKEN_SUB_SUB] = {NULL, PREC_UNARY, PREFIX_PARSER}, // + - * & sizeof [TOKEN_L_PAREN] = {parse_paren, PREC_POSTFIX, INFIX_PARSER}, }; static struct ASTNode *parse_primary_expression(struct Parser* parser) { flushpeektok(parser); struct Token* tok = peektok(parser); struct ASTNode *node = new_ast_node(); node->type = NT_TERM_VAL; node->syms.tok = *tok; switch (tok->type) { case TOKEN_INT_LITERAL: // node->data.data_type = TYPE_INT; break; case TOKEN_FLOAT_LITERAL: warn("float not supported"); break; case TOKEN_CHAR_LITERAL: // node->data.data_type = TYPE_CHAR; break; case TOKEN_STRING_LITERAL: // node->data.data_type = TYPE_POINTER; case TOKEN_IDENT: node = parse_ident(parser); if (peektoktype(parser) == TOKEN_L_PAREN) { node = parse_call(parser, node); } else { void *sym = symtab_lookup_symbol(parser->symtab, tok->constant.str); if (sym == NULL) { error("undefined symbol but use %s", tok->constant.str); } node->type = NT_TERM_IDENT; node->syms.decl_node = sym; goto END; } default: return NULL; } poptok(parser); END: return node; } static struct ASTNode *parse_subexpression(struct Parser* parser, enum Precedence prec) { enum TokenType ttype; struct expr_prec_table_t* work; struct ASTNode* left; while (1) { flushpeektok(parser); ttype = peektoktype(parser); work = &expr_table[ttype]; // FIXME if (ttype == TOKEN_SEMICOLON || ttype == TOKEN_R_PAREN) { break; } if (work == NULL || work->parser == NULL || work->ptype == PREFIX_PARSER) { if (work->parser != NULL) { left = work->parser(parser, NULL); } else { left = parse_primary_expression(parser); } } else if (work->ptype == INFIX_PARSER) { if (work->parser == NULL) break; if (work->prec <= prec) break; left = work->parser(parser, left); } // assert(left != NULL); } return left; } struct ASTNode* parse_expr(struct Parser* parser) { flushpeektok(parser); enum TokenType ttype = peektoktype(parser); switch (ttype) { case TOKEN_NOT: case TOKEN_AND: case TOKEN_L_PAREN: case TOKEN_MUL: case TOKEN_ADD: case TOKEN_SUB: case TOKEN_BIT_NOT: case TOKEN_AND_AND: case TOKEN_CHAR_LITERAL: case TOKEN_INT_LITERAL: case TOKEN_STRING_LITERAL: case TOKEN_ADD_ADD: case TOKEN_SUB_SUB: case TOKEN_SIZEOF: case TOKEN_IDENT: return parse_subexpression(parser, PREC_EXPRESSION); default: error("Want expr but not got %s", get_token_name(ttype)); break; } }