#ifndef __SCC_LEXER_TOKEN_H__ #define __SCC_LEXER_TOKEN_H__ #include #include #include struct scc_lexer_token; typedef struct scc_lexer_token scc_lexer_tok_t; typedef SCC_RING(scc_lexer_tok_t) scc_lexer_tok_ring_t; typedef SCC_VEC(scc_lexer_tok_t) scc_lexer_tok_vec_t; typedef enum scc_cstd { SCC_CSTD_C89, SCC_CSTD_C99, SCC_CEXT_SCC, } scc_cstd_t; /* clang-format off */ /// https://cppreference.cn/w/c/preprocessor #define SCC_PPKEYWORD_TABLE \ X(define , SCC_CSTD_C99, SCC_PP_TOK_DEFINE ) \ X(elif , SCC_CSTD_C99, SCC_PP_TOK_ELIF ) \ X(elifdef , SCC_CSTD_C99, SCC_PP_TOK_ELIFDEF ) \ X(elifndef , SCC_CSTD_C99, SCC_PP_TOK_ELIFNDEF ) \ X(else , SCC_CSTD_C99, SCC_PP_TOK_ELSE ) \ X(embed , SCC_CSTD_C99, SCC_PP_TOK_EMBED ) \ X(endif , SCC_CSTD_C99, SCC_PP_TOK_ENDIF ) \ X(error , SCC_CSTD_C99, SCC_PP_TOK_ERROR ) \ X(if , SCC_CSTD_C99, SCC_PP_TOK_IF ) \ X(ifdef , SCC_CEXT_SCC, SCC_PP_TOK_IFDEF ) \ X(ifndef , SCC_CSTD_C99, SCC_PP_TOK_IFNDEF ) \ X(include , SCC_CSTD_C99, SCC_PP_TOK_INCLUDE ) \ X(line , SCC_CEXT_SCC, SCC_PP_TOK_LINE ) \ X(pragma , SCC_CSTD_C99, SCC_PP_TOK_PRAGMA ) \ X(undef , SCC_CEXT_SCC, SCC_PP_TOK_UNDEF ) \ X(warning , SCC_CSTD_C99, SCC_PP_TOK_WARNING ) \ // END /* clang-format on */ /* clang-format off */ // WARNING: Using Binary Search To Fast Find Keyword // 你必须确保其中是按照字典序排列 #define SCC_CKEYWORD_TABLE \ X(asm , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_ASM , SCC_CEXT_SCC) \ X(atomic , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_ATOMIC , SCC_CEXT_SCC) \ X(auto , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_AUTO , SCC_CEXT_SCC) \ X(bool , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_BOOL , SCC_CEXT_SCC) \ X(break , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_BREAK , SCC_CSTD_C89) \ X(case , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_CASE , SCC_CSTD_C89) \ X(char , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_CHAR , SCC_CSTD_C89) \ X(complex , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_COMPLEX , SCC_CEXT_SCC) \ X(const , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_CONST , SCC_CSTD_C89) \ X(continue , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_CONTINUE , SCC_CSTD_C89) \ X(default , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_DEFAULT , SCC_CSTD_C89) \ X(do , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_DO , SCC_CSTD_C89) \ X(double , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_DOUBLE , SCC_CSTD_C89) \ X(else , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_ELSE , SCC_CSTD_C89) \ X(enum , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_ENUM , SCC_CSTD_C89) \ X(extern , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_EXTERN , SCC_CSTD_C89) \ X(float , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_FLOAT , SCC_CSTD_C89) \ X(for , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_FOR , SCC_CSTD_C89) \ X(goto , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_GOTO , SCC_CSTD_C89) \ X(if , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_IF , SCC_CSTD_C89) \ X(inline , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_INLINE , SCC_CSTD_C99) \ X(int , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_INT , SCC_CSTD_C89) \ X(long , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_LONG , SCC_CSTD_C89) \ X(register , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_REGISTER , SCC_CSTD_C89) \ X(restrict , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_RESTRICT , SCC_CSTD_C99) \ X(return , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_RETURN , SCC_CSTD_C89) \ X(short , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_SHORT , SCC_CSTD_C89) \ X(signed , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_SIGNED , SCC_CSTD_C89) \ X(sizeof , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_SIZEOF , SCC_CSTD_C89) \ X(static , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_STATIC , SCC_CSTD_C89) \ X(struct , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_STRUCT , SCC_CSTD_C89) \ X(switch , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_SWITCH , SCC_CSTD_C89) \ X(typedef , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_TYPEDEF , SCC_CSTD_C89) \ X(union , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_UNION , SCC_CSTD_C89) \ X(unsigned , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_UNSIGNED , SCC_CSTD_C89) \ X(void , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_VOID , SCC_CSTD_C89) \ X(volatile , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_VOLATILE , SCC_CSTD_C89) \ X(while , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_WHILE , SCC_CSTD_C89) \ // KEYWORD_TABLE #define SCC_CTOK_TABLE \ X(unknown , SCC_TOK_SUBTYPE_INVALID, SCC_TOK_UNKNOWN ) \ X(EOF , SCC_TOK_SUBTYPE_EOF, SCC_TOK_EOF ) \ X(blank , SCC_TOK_SUBTYPE_EMPTYSPACE, SCC_TOK_BLANK ) \ X(endline , SCC_TOK_SUBTYPE_EMPTYSPACE, SCC_TOK_ENDLINE ) \ X("#" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_SHARP ) \ X("##" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_SHARP_SHARP ) \ X("==" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_EQ ) \ X("=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN ) \ X("++" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ADD_ADD ) \ X("+=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_ADD ) \ X("+" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ADD ) \ X("--" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_SUB_SUB ) \ X("-=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_SUB ) \ X("->" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_DEREF ) \ X("-" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_SUB ) \ X("*=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_MUL ) \ X("*" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_MUL ) \ X("/=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_DIV ) \ X("/" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_DIV ) \ X("//" , SCC_TOK_SUBTYPE_COMMENT , SCC_TOK_LINE_COMMENT ) \ X("/* */" , SCC_TOK_SUBTYPE_COMMENT , SCC_TOK_BLOCK_COMMENT ) \ X("%=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_MOD ) \ X("%" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_MOD ) \ X("&&" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_AND_AND ) \ X("&=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_AND ) \ X("&" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_AND ) \ X("||" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_OR_OR ) \ X("|=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_OR ) \ X("|" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_OR ) \ X("^=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_XOR ) \ X("^" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_XOR ) \ X("<<=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_L_SH ) \ X("<<" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_L_SH ) \ X("<=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_LE ) \ X("<" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_LT ) \ X(">>=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_R_SH ) \ X(">>" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_R_SH ) \ X(">=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_GE ) \ X(">" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_GT ) \ X("!" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_NOT ) \ X("!=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_NEQ ) \ X("~" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_BIT_NOT ) \ X("[" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_L_BRACKET ) \ X("]" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_R_BRACKET ) \ X("(" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_L_PAREN ) \ X(")" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_R_PAREN ) \ X("{" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_L_BRACE ) \ X("}" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_R_BRACE ) \ X(";" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_SEMICOLON ) \ X("," , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_COMMA ) \ X(":" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_COLON ) \ X("." , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_DOT ) \ X("..." , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ELLIPSIS ) \ X("?" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_COND ) \ X(ident , SCC_TOK_SUBTYPE_IDENTIFIER, SCC_TOK_IDENT ) \ X(int , SCC_TOK_SUBTYPE_LITERAL, SCC_TOK_INT_LITERAL ) \ X(float , SCC_TOK_SUBTYPE_LITERAL, SCC_TOK_FLOAT_LITERAL ) \ X(char , SCC_TOK_SUBTYPE_LITERAL, SCC_TOK_CHAR_LITERAL ) \ X(string , SCC_TOK_SUBTYPE_LITERAL, SCC_TOK_STRING_LITERAL ) \ // END /* clang-format on */ typedef enum scc_tok_type { /* clang-format off */ // must first becase the unknown token must be 0 #define X(str, subtype, tok) tok, SCC_CTOK_TABLE #undef X #define X(name, type, tok) tok, SCC_PPKEYWORD_TABLE #undef X #define X(name, subtype, tok, std) tok, SCC_CKEYWORD_TABLE #undef X /* clang-format on*/ } scc_tok_type_t; typedef enum scc_tok_subtype { SCC_TOK_SUBTYPE_INVALID, // 错误占位 SCC_TOK_SUBTYPE_OPERATOR, // 操作符 SCC_TOK_SUBTYPE_IDENTIFIER, // 标识符 SCC_TOK_SUBTYPE_LITERAL, // 字面量 SCC_TOK_SUBTYPE_EMPTYSPACE, // 空白 SCC_TOK_SUBTYPE_COMMENT, // 注释 SCC_TOK_SUBTYPE_EOF // 结束标记 } scc_tok_subtype_t; /** * @brief 词法分析结果 * @warning 需要手动释放lexeme否则会出现内存泄漏 */ struct scc_lexer_token { scc_tok_type_t type; scc_cstring_t lexeme; scc_pos_t loc; }; scc_tok_subtype_t scc_get_tok_subtype(scc_tok_type_t type); const char *scc_get_tok_name(scc_tok_type_t type); static inline void scc_lexer_tok_drop(scc_lexer_tok_t *tok) { tok->type = SCC_TOK_UNKNOWN; tok->loc.col = 0; tok->loc.line = 0; tok->loc.name = null; tok->loc.offset = 0; scc_cstring_free(&tok->lexeme); } static inline cbool scc_lexer_tok_match(const scc_lexer_tok_t *tok, scc_tok_type_t type) { return tok->type == type; } // 深拷贝 token static inline scc_lexer_tok_t scc_lexer_tok_copy(const scc_lexer_tok_t *src) { scc_lexer_tok_t dst = *src; dst.lexeme = scc_cstring_copy(&src->lexeme); return dst; } // 移动 token(源 token 不再拥有 lexeme) static inline void scc_lexer_tok_move(scc_lexer_tok_t *dst, scc_lexer_tok_t *src) { *dst = *src; src->lexeme.data = null; src->lexeme.size = 0; src->lexeme.cap = 0; } #endif /* __SCC_LEXER_TOKEN_H__ */