#include "../ast.h" #include "../parser.h" #include "../symtab/symtab.h" // Copy from `CParse` /** * Operator precedence classes */ enum Precedence { PREC_BOTTOM, PREC_EXPRESSION, /* , left to right */ PREC_ASSIGNMENT, /* = += -= *= /= %= <<= >>= &= ^= |= right to left */ PREC_CONDITIONAL, /* ?: right to left */ PREC_LOGICAL_OR, /* || left to right */ PREC_LOGICAL_AND, /* && left to right */ PREC_OR, /* | left to right */ PREC_XOR, /* ^ left to right */ PREC_AND, /* & left to right */ PREC_EQUALITY, /* == != left to right */ PREC_RELATIONAL, /* < <= > >= left to right */ PREC_SHIFT, /* << >> left to right */ PREC_ADDITIVE, /* + - left to right */ PREC_MULTIPLICATIVE, /* * / % left to right */ PREC_CAST, /* (type) right to left */ PREC_UNARY, /* ! ~ ++ -- + - * & sizeof right to left */ PREC_POSTFIX, /* () [] -> . left to right */ PREC_PRIMARY, PREC_TOP }; enum ParseType { INFIX_PARSER, PREFIX_PARSER, }; static ast_node_t *parse_subexpression(tok_stream_t* tokbuf, symtab_t *symtab, enum Precedence prec); #define NEXT(prec) parse_subexpression(tokbuf, symtab, prec) static ast_node_t* gen_node2(ast_node_t* left, ast_node_t* right, ast_type_t type) { ast_node_t* node = new_ast_node(); node->type = type; node->expr.left = left; node->expr.right = right; return node; // FIXME // switch (type) { // case NT_ADD : printf("+ \n"); break; // (expr) + (expr) // case NT_SUB : printf("- \n"); break; // (expr) - (expr) // case NT_MUL : printf("* \n"); break; // (expr) * (expr) // case NT_DIV : printf("/ \n"); break; // (expr) / (expr) // case NT_MOD : printf("%%\n"); break; // (expr) % (expr) // case NT_AND : printf("& \n"); break; // (expr) & (expr) // case NT_OR : printf("| \n"); break; // (expr) | (expr) // case NT_XOR : printf("^ \n"); break; // (expr) ^ (expr) // case NT_L_SH : printf("<<\n"); break; // (expr) << (expr) // case NT_R_SH : printf(">>\n"); break; // (expr) >> (expr) // case NT_EQ : printf("==\n"); break; // (expr) == (expr) // case NT_NEQ : printf("!=\n"); break; // (expr) != (expr) // case NT_LE : printf("<=\n"); break; // (expr) <= (expr) // case NT_GE : printf(">=\n"); break; // (expr) >= (expr) // case NT_LT : printf("< \n"); break; // (expr) < (expr) // case NT_GT : printf("> \n"); break; // (expr) > (expr) // case NT_AND_AND : printf("&&\n"); break; // (expr) && (expr) // case NT_OR_OR : printf("||\n"); break; // (expr) || (expr) // case NT_NOT : printf("! \n"); break; // ! (expr) // case NT_BIT_NOT : printf("~ \n"); break; // ~ (expr) // case NT_COMMA : printf(", \n"); break; // expr, expr 逗号运算符 // case NT_ASSIGN : printf("= \n"); break; // (expr) = (expr) // // case NT_COND : // (expr) ? (expr) : (expr) // } } static ast_node_t* parse_comma(tok_stream_t* tokbuf, symtab_t *symtab, ast_node_t* left) { ast_node_t* node = new_ast_node(); node->type = NT_COMMA; node->expr.left = left; node->expr.right = NEXT(PREC_EXPRESSION); return node; } static ast_node_t* parse_assign(tok_stream_t* tokbuf, symtab_t *symtab, ast_node_t* left) { flush_peek_tok(tokbuf); cc_tktype_t ttype = peek_tok_type(tokbuf); pop_tok(tokbuf); ast_node_t* node = new_ast_node(); node->type = NT_ASSIGN; // saved left node->expr.left = left; enum Precedence next = PREC_ASSIGNMENT + 1; switch (ttype) { case TOKEN_ASSIGN : left = NEXT(next); break; case TOKEN_ASSIGN_ADD : left = gen_node2(left, NEXT(next), NT_ADD); break; case TOKEN_ASSIGN_SUB : left = gen_node2(left, NEXT(next), NT_SUB); break; case TOKEN_ASSIGN_MUL : left = gen_node2(left, NEXT(next), NT_MUL); break; case TOKEN_ASSIGN_DIV : left = gen_node2(left, NEXT(next), NT_DIV); break; case TOKEN_ASSIGN_MOD : left = gen_node2(left, NEXT(next), NT_MOD); break; case TOKEN_ASSIGN_L_SH : left = gen_node2(left, NEXT(next), NT_L_SH); break; case TOKEN_ASSIGN_R_SH : left = gen_node2(left, NEXT(next), NT_R_SH); break; case TOKEN_ASSIGN_AND : left = gen_node2(left, NEXT(next), NT_AND); break; case TOKEN_ASSIGN_OR : left = gen_node2(left, NEXT(next), NT_OR); break; case TOKEN_ASSIGN_XOR : left = gen_node2(left, NEXT(next), NT_XOR); break; default: LOG_ERROR("unsupported operator"); break; } node->expr.right = left; return node; } static ast_node_t* parse_cmp(tok_stream_t* tokbuf, symtab_t *symtab, ast_node_t* left) { flush_peek_tok(tokbuf); cc_tktype_t ttype = peek_tok_type(tokbuf); pop_tok(tokbuf); ast_node_t* node = new_ast_node(); // saved left node->expr.left = left; switch (ttype) { case TOKEN_EQ: node->type = NT_EQ; node->expr.right = NEXT(PREC_EQUALITY); break; case TOKEN_NEQ: node->type = NT_NEQ; node->expr.right = NEXT(PREC_EQUALITY); break; case TOKEN_LT: node->type = NT_LT; node->expr.right = NEXT(PREC_RELATIONAL); break; case TOKEN_GT: node->type = NT_GT; node->expr.right = NEXT(PREC_RELATIONAL); break; case TOKEN_LE: node->type = NT_LE; node->expr.right = NEXT(PREC_RELATIONAL); break; case TOKEN_GE: node->type = NT_GE; node->expr.right = NEXT(PREC_RELATIONAL); break; default: LOG_ERROR("invalid operator"); } return node; } static ast_node_t* parse_cal(tok_stream_t* tokbuf, symtab_t *symtab, ast_node_t* left) { flush_peek_tok(tokbuf); cc_tktype_t ttype = peek_tok_type(tokbuf); pop_tok(tokbuf); ast_node_t* node = new_ast_node(); node->expr.left = left; switch (ttype) { case TOKEN_OR_OR: node->type = NT_OR_OR; node->expr.right = NEXT(PREC_LOGICAL_OR); break; case TOKEN_AND_AND: node->type = NT_AND_AND; node->expr.right = NEXT(PREC_LOGICAL_AND); break; case TOKEN_OR: node->type = NT_OR; node->expr.right = NEXT(PREC_OR); break; case TOKEN_XOR: node->type = NT_XOR; node->expr.right = NEXT(PREC_XOR); break; case TOKEN_AND: node->type = NT_AND; node->expr.right = NEXT(PREC_AND); break; case TOKEN_L_SH: node->type = NT_L_SH; node->expr.right = NEXT(PREC_SHIFT); break; case TOKEN_R_SH: node->type = NT_R_SH; node->expr.right = NEXT(PREC_SHIFT); break; case TOKEN_ADD: node->type = NT_ADD; node->expr.right = NEXT(PREC_ADDITIVE); break; case TOKEN_SUB: node->type = NT_SUB; node->expr.right = NEXT(PREC_ADDITIVE); break; case TOKEN_MUL: node->type = NT_MUL; node->expr.right = NEXT(PREC_MULTIPLICATIVE); break; case TOKEN_DIV: node->type = NT_DIV; node->expr.right = NEXT(PREC_MULTIPLICATIVE); break; case TOKEN_MOD: node->type = NT_MOD; node->expr.right = NEXT(PREC_MULTIPLICATIVE); break; default: break; } return node; } static ast_node_t* parse_call(tok_stream_t* tokbuf, symtab_t *symtab, ast_node_t* ident) { ast_node_t* node = new_ast_node(); node->type = NT_TERM_CALL; node->call.name = ident; node->call.params = new_ast_node(); vector_init(node->call.params->params.params); pop_tok(tokbuf); // 跳过 '(' cc_tktype_t ttype; while (1) { flush_peek_tok(tokbuf); ttype = peek_tok_type(tokbuf); if (ttype == TOKEN_R_PAREN) { break; } ast_node_t* param = NEXT(PREC_EXPRESSION); vector_push(node->call.params->params.params, param); flush_peek_tok(tokbuf); ttype = peek_tok_type(tokbuf); if (ttype == TOKEN_COMMA) pop_tok(tokbuf); } pop_tok(tokbuf); // 跳过 ')' const char* name = ident->syms.tok.val.str; ast_node_t* sym = symtab_lookup_symbol(symtab, name); // TODO check func is match if (sym == NULL || sym->type != NT_DECL_FUNC) { LOG_ERROR("function not decl %s", name); } node->call.name = ident; node->call.func_decl = sym; return node; } static ast_node_t* parse_paren(tok_stream_t* tokbuf, symtab_t *symtab, ast_node_t* left) { flush_peek_tok(tokbuf); expect_pop_tok(tokbuf, TOKEN_L_PAREN); left = NEXT(PREC_EXPRESSION); flush_peek_tok(tokbuf); expect_pop_tok(tokbuf, TOKEN_R_PAREN); return left; } typedef ast_node_t* (*parse_expr_fun_t)(tok_stream_t*, symtab_t* , ast_node_t*); static struct expr_prec_table_t { parse_expr_fun_t parser; enum Precedence prec; enum ParseType ptype; } expr_table [256] = { [TOKEN_COMMA] = {parse_comma, PREC_EXPRESSION, INFIX_PARSER}, [TOKEN_ASSIGN] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER}, [TOKEN_ASSIGN_ADD] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER}, [TOKEN_ASSIGN_SUB] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER}, [TOKEN_ASSIGN_MUL] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER}, [TOKEN_ASSIGN_DIV] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER}, [TOKEN_ASSIGN_MOD] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER}, [TOKEN_ASSIGN_L_SH] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER}, [TOKEN_ASSIGN_R_SH] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER}, [TOKEN_ASSIGN_AND] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER}, [TOKEN_ASSIGN_OR] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER}, [TOKEN_ASSIGN_XOR] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER}, [TOKEN_OR_OR] = {parse_cal, PREC_LOGICAL_OR , INFIX_PARSER}, [TOKEN_AND_AND] = {parse_cal, PREC_LOGICAL_AND, INFIX_PARSER}, [TOKEN_OR] = {parse_cal, PREC_OR , INFIX_PARSER}, [TOKEN_XOR] = {parse_cal, PREC_XOR , INFIX_PARSER}, [TOKEN_AND] = {parse_cal, PREC_AND , INFIX_PARSER}, [TOKEN_EQ] = {parse_cmp, PREC_EQUALITY, INFIX_PARSER}, [TOKEN_NEQ] = {parse_cmp, PREC_EQUALITY, INFIX_PARSER}, [TOKEN_LT] = {parse_cmp, PREC_RELATIONAL, INFIX_PARSER}, [TOKEN_LE] = {parse_cmp, PREC_RELATIONAL, INFIX_PARSER}, [TOKEN_GT] = {parse_cmp, PREC_RELATIONAL, INFIX_PARSER}, [TOKEN_GE] = {parse_cmp, PREC_RELATIONAL, INFIX_PARSER}, [TOKEN_L_SH] = {parse_cal, PREC_SHIFT , INFIX_PARSER}, [TOKEN_R_SH] = {parse_cal, PREC_SHIFT , INFIX_PARSER}, [TOKEN_ADD] = {parse_cal, PREC_ADDITIVE , INFIX_PARSER}, [TOKEN_SUB] = {parse_cal, PREC_ADDITIVE , INFIX_PARSER}, [TOKEN_MUL] = {parse_cal, PREC_MULTIPLICATIVE , INFIX_PARSER}, [TOKEN_DIV] = {parse_cal, PREC_MULTIPLICATIVE , INFIX_PARSER}, [TOKEN_MOD] = {parse_cal, PREC_MULTIPLICATIVE , INFIX_PARSER}, [TOKEN_NOT] = {NULL, PREC_UNARY, PREFIX_PARSER}, [TOKEN_BIT_NOT] = {NULL, PREC_UNARY, PREFIX_PARSER}, [TOKEN_ADD_ADD] = {NULL, PREC_UNARY, PREFIX_PARSER}, [TOKEN_SUB_SUB] = {NULL, PREC_UNARY, PREFIX_PARSER}, // + - * & sizeof [TOKEN_L_PAREN] = {parse_paren, PREC_POSTFIX, INFIX_PARSER}, }; static ast_node_t *parse_primary_expression(tok_stream_t* tokbuf, symtab_t *symtab) { flush_peek_tok(tokbuf); tok_t* tok = peek_tok(tokbuf); ast_node_t *node = new_ast_node(); node->type = NT_TERM_VAL; node->syms.tok = *tok; switch (tok->sub_type) { case TOKEN_INT_LITERAL: // node->data.data_type = TYPE_INT; break; case TOKEN_FLOAT_LITERAL: LOG_WARN("float not supported"); break; case TOKEN_CHAR_LITERAL: // node->data.data_type = TYPE_CHAR; break; case TOKEN_STRING_LITERAL: // node->data.data_type = TYPE_POINTER; case TOKEN_IDENT: node = expect_pop_ident(tokbuf); cc_tktype_t ttype = peek_tok_type(tokbuf); if (ttype == TOKEN_L_PAREN) { node = parse_call(tokbuf, symtab, node); } else { void *sym = symtab_lookup_symbol(symtab, tok->val.str); if (sym == NULL) { LOG_ERROR("undefined symbol but use %s", tok->val.str); } node->type = NT_TERM_IDENT; node->syms.decl_node = sym; } goto END; default: return NULL; } pop_tok(tokbuf); END: return node; } static ast_node_t *parse_subexpression(tok_stream_t* tokbuf, symtab_t *symtab, enum Precedence prec) { cc_tktype_t ttype; struct expr_prec_table_t* work; ast_node_t* left; while (1) { flush_peek_tok(tokbuf); ttype = peek_tok_type(tokbuf); work = &expr_table[ttype]; // FIXME if (ttype == TOKEN_SEMICOLON || ttype == TOKEN_R_PAREN) { break; } if (work == NULL || work->parser == NULL || work->ptype == PREFIX_PARSER) { if (work->parser != NULL) { left = work->parser(tokbuf, symtab, NULL); } else { left = parse_primary_expression(tokbuf, symtab); } } else if (work->ptype == INFIX_PARSER) { if (work->parser == NULL) break; if (work->prec <= prec) break; left = work->parser(tokbuf, symtab, left); } // assert(left != NULL); } return left; } ast_node_t* parse_expr(parser_t* parser) { tok_stream_t* tokbuf = &(parser->tokbuf); symtab_t *symtab = parser->symtab; flush_peek_tok(tokbuf); cc_tktype_t ttype = peek_tok_type(tokbuf); switch (ttype) { case TOKEN_NOT: case TOKEN_AND: case TOKEN_L_PAREN: case TOKEN_MUL: case TOKEN_ADD: case TOKEN_SUB: case TOKEN_BIT_NOT: case TOKEN_AND_AND: case TOKEN_CHAR_LITERAL: case TOKEN_INT_LITERAL: case TOKEN_STRING_LITERAL: case TOKEN_ADD_ADD: case TOKEN_SUB_SUB: case TOKEN_SIZEOF: case TOKEN_IDENT: return NEXT(PREC_EXPRESSION); default: LOG_ERROR("Want expr but not got %s", get_tok_name(ttype)); break; } }