ZZY 2b4857001c feat(frontend): 重构词法分析器
- 添加 .gitignore 文件,忽略编译器生成的二进制文件
- 重构 lexer.c 文件,改进了关键字处理和字符串处理
- 更新前端的前端、解析器和 AST 相关文件,以适应新的词法分析器
- 优化了 token 相关的定义和函数,引入了新的 token 类型
2025-03-23 12:13:16 +08:00

143 lines
7.0 KiB
C

#ifndef __SMCC_CC_TOKEN_H__
#define __SMCC_CC_TOKEN_H__
#include <lib/utils/utils.h>
enum CSTD_KEYWORD {
CSTD_C89,
CSTD_C99,
CEXT_ASM,
};
// Using Binary Search To Fast Find Keyword
#define KEYWORD_TABLE \
X(asm , CEXT_ASM, TOKEN_ASM) \
X(break , CSTD_C89, TOKEN_BREAK) \
X(case , CSTD_C89, TOKEN_CASE) \
X(char , CSTD_C89, TOKEN_CHAR) \
X(const , CSTD_C89, TOKEN_CONST) \
X(continue , CSTD_C89, TOKEN_CONTINUE) \
X(default , CSTD_C89, TOKEN_DEFAULT) \
X(do , CSTD_C89, TOKEN_DO) \
X(double , CSTD_C89, TOKEN_DOUBLE) \
X(else , CSTD_C89, TOKEN_ELSE) \
X(enum , CSTD_C89, TOKEN_ENUM) \
X(extern , CSTD_C89, TOKEN_EXTERN) \
X(float , CSTD_C89, TOKEN_FLOAT) \
X(for , CSTD_C89, TOKEN_FOR) \
X(goto , CSTD_C89, TOKEN_GOTO) \
X(if , CSTD_C89, TOKEN_IF) \
X(inline , CSTD_C99, TOKEN_INLINE) \
X(int , CSTD_C89, TOKEN_INT) \
X(long , CSTD_C89, TOKEN_LONG) \
X(register , CSTD_C89, TOKEN_REGISTER) \
X(restrict , CSTD_C99, TOKEN_RESTRICT) \
X(return , CSTD_C89, TOKEN_RETURN) \
X(short , CSTD_C89, TOKEN_SHORT) \
X(signed , CSTD_C89, TOKEN_SIGNED) \
X(sizeof , CSTD_C89, TOKEN_SIZEOF) \
X(static , CSTD_C89, TOKEN_STATIC) \
X(struct , CSTD_C89, TOKEN_STRUCT) \
X(switch , CSTD_C89, TOKEN_SWITCH) \
X(typedef , CSTD_C89, TOKEN_TYPEDEF) \
X(union , CSTD_C89, TOKEN_UNION) \
X(unsigned , CSTD_C89, TOKEN_UNSIGNED) \
X(void , CSTD_C89, TOKEN_VOID) \
X(volatile , CSTD_C89, TOKEN_VOLATILE) \
X(while , CSTD_C89, TOKEN_WHILE) \
// KEYWORD_TABLE
#define TOKEN_TABLE \
X(init , TK_BASIC_INVALID, TOKEN_INIT) \
X(EOF , TK_BASIC_EOF, TOKEN_EOF) \
X(blank , TK_BASIC_WHITESPACE, TOKEN_BLANK) \
X("==" , TK_BASIC_OPERATOR, TOKEN_EQ) \
X("=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN) \
X("++" , TK_BASIC_OPERATOR, TOKEN_ADD_ADD) \
X("+=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_ADD) \
X("+" , TK_BASIC_OPERATOR, TOKEN_ADD) \
X("--" , TK_BASIC_OPERATOR, TOKEN_SUB_SUB) \
X("-=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_SUB) \
X("->" , TK_BASIC_OPERATOR, TOKEN_DEREF) \
X("-" , TK_BASIC_OPERATOR, TOKEN_SUB) \
X("*=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_MUL) \
X("*" , TK_BASIC_OPERATOR, TOKEN_MUL) \
X("/=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_DIV) \
X("/" , TK_BASIC_OPERATOR, TOKEN_DIV) \
X("//" , TK_BASIC_COMMENT , TOKEN_LINE_COMMENT) \
X("/* */" , TK_BASIC_COMMENT , TOKEN_BLOCK_COMMENT) \
X("%=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_MOD) \
X("%" , TK_BASIC_OPERATOR, TOKEN_MOD) \
X("&&" , TK_BASIC_OPERATOR, TOKEN_AND_AND) \
X("&=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_AND) \
X("&" , TK_BASIC_OPERATOR, TOKEN_AND) \
X("||" , TK_BASIC_OPERATOR, TOKEN_OR_OR) \
X("|=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_OR) \
X("|" , TK_BASIC_OPERATOR, TOKEN_OR) \
X("^=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_XOR) \
X("^" , TK_BASIC_OPERATOR, TOKEN_XOR) \
X("<<=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_L_SH) \
X("<<" , TK_BASIC_OPERATOR, TOKEN_L_SH) \
X("<=" , TK_BASIC_OPERATOR, TOKEN_LE) \
X("<" , TK_BASIC_OPERATOR, TOKEN_LT) \
X(">>=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_R_SH) \
X(">>" , TK_BASIC_OPERATOR, TOKEN_R_SH) \
X(">=" , TK_BASIC_OPERATOR, TOKEN_GE) \
X(">" , TK_BASIC_OPERATOR, TOKEN_GT) \
X("!" , TK_BASIC_OPERATOR, TOKEN_NOT) \
X("!=" , TK_BASIC_OPERATOR, TOKEN_NEQ) \
X("~" , TK_BASIC_OPERATOR, TOKEN_BIT_NOT) \
X("[" , TK_BASIC_OPERATOR, TOKEN_L_BRACKET) \
X("]" , TK_BASIC_OPERATOR, TOKEN_R_BRACKET) \
X("(" , TK_BASIC_OPERATOR, TOKEN_L_PAREN) \
X(")" , TK_BASIC_OPERATOR, TOKEN_R_PAREN) \
X("{" , TK_BASIC_OPERATOR, TOKEN_L_BRACE) \
X("}" , TK_BASIC_OPERATOR, TOKEN_R_BRACE) \
X(";" , TK_BASIC_OPERATOR, TOKEN_SEMICOLON) \
X("," , TK_BASIC_OPERATOR, TOKEN_COMMA) \
X(":" , TK_BASIC_OPERATOR, TOKEN_COLON) \
X("." , TK_BASIC_OPERATOR, TOKEN_DOT) \
X("..." , TK_BASIC_OPERATOR, TOKEN_ELLIPSIS) \
X("?" , TK_BASIC_OPERATOR, TOKEN_COND) \
X(ident , TK_BASIC_IDENTIFIER, TOKEN_IDENT) \
X(int_literal , TK_BASIC_LITERAL, TOKEN_INT_LITERAL) \
X(float_literal , TK_BASIC_LITERAL, TOKEN_FLOAT_LITERAL) \
X(char_literal , TK_BASIC_LITERAL, TOKEN_CHAR_LITERAL) \
X(string_literal , TK_BASIC_LITERAL, TOKEN_STRING_LITERAL) \
// END
// 定义TokenType枚举
typedef enum cc_tktype {
// 处理普通token
#define X(str, basic, tok) tok,
TOKEN_TABLE
#undef X
// 处理关键字(保持原有格式)
#define X(name, std, tok) tok,
KEYWORD_TABLE
#undef X
} cc_tktype_t;
typedef struct tok_stream {
int cur;
int end;
int peek;
int size;
int cap;
tok_t* buf;
void* stream;
void (*gettok)(void* stream, tok_t* token);
} tok_stream_t;
typedef void(*tok_stream_get_func)(void* stream, tok_t* token);
void init_tokbuf(tok_stream_t* tokbuf, void* stream, tok_stream_get_func gettok);
tok_t* peek_tok(tok_stream_t* tokbuf);
tok_t* pop_tok(tok_stream_t* tokbuf);
void flush_peek_tok(tok_stream_t* tokbuf);
cc_tktype_t peek_tok_type(tok_stream_t* tokbuf);
int expect_pop_tok(tok_stream_t* tokbuf, cc_tktype_t type);
const char* get_tok_name(cc_tktype_t type);
#endif