feat(frontend): 重构词法分析器

- 添加 .gitignore 文件,忽略编译器生成的二进制文件
- 重构 lexer.c 文件,改进了关键字处理和字符串处理
- 更新前端的前端、解析器和 AST 相关文件,以适应新的词法分析器
- 优化了 token 相关的定义和函数,引入了新的 token 类型
This commit is contained in:
ZZY
2025-03-23 12:13:16 +08:00
parent 05c637e594
commit 2b4857001c
33 changed files with 532 additions and 624 deletions

View File

@@ -1,5 +1,7 @@
#ifndef __TOKEN_H__
#define __TOKEN_H__
#ifndef __SMCC_CC_TOKEN_H__
#define __SMCC_CC_TOKEN_H__
#include <lib/utils/utils.h>
enum CSTD_KEYWORD {
CSTD_C89,
@@ -46,68 +48,68 @@ enum CSTD_KEYWORD {
// KEYWORD_TABLE
#define TOKEN_TABLE \
X(EOF , TOKEN_EOF) \
X(init , TOKEN_INIT) \
X(flush , TOKEN_FLUSH) \
X("==" , TOKEN_EQ) \
X("=" , TOKEN_ASSIGN) \
X("++" , TOKEN_ADD_ADD) \
X("+=" , TOKEN_ASSIGN_ADD) \
X("+" , TOKEN_ADD) \
X("--" , TOKEN_SUB_SUB) \
X("-=" , TOKEN_ASSIGN_SUB) \
X("->" , TOKEN_DEREF) \
X("-" , TOKEN_SUB) \
X("*=" , TOKEN_ASSIGN_MUL) \
X("*" , TOKEN_MUL) \
X("/=" , TOKEN_ASSIGN_DIV) \
X("/" , TOKEN_DIV) \
X("//" , TOKEN_LINE_COMMENT) \
X("/* */" , TOKEN_BLOCK_COMMENT) \
X("%=" , TOKEN_ASSIGN_MOD) \
X("%" , TOKEN_MOD) \
X("&&" , TOKEN_AND_AND) \
X("&=" , TOKEN_ASSIGN_AND) \
X("&" , TOKEN_AND) \
X("||" , TOKEN_OR_OR) \
X("|=" , TOKEN_ASSIGN_OR) \
X("|" , TOKEN_OR) \
X("^=" , TOKEN_ASSIGN_XOR) \
X("^" , TOKEN_XOR) \
X("<<=" , TOKEN_ASSIGN_L_SH) \
X("<<" , TOKEN_L_SH) \
X("<=" , TOKEN_LE) \
X("<" , TOKEN_LT) \
X(">>=" , TOKEN_ASSIGN_R_SH) \
X(">>" , TOKEN_R_SH) \
X(">=" , TOKEN_GE) \
X(">" , TOKEN_GT) \
X("!" , TOKEN_NOT) \
X("!=" , TOKEN_NEQ) \
X("~" , TOKEN_BIT_NOT) \
X("[" , TOKEN_L_BRACKET) \
X("]" , TOKEN_R_BRACKET) \
X("(" , TOKEN_L_PAREN) \
X(")" , TOKEN_R_PAREN) \
X("{" , TOKEN_L_BRACE) \
X("}" , TOKEN_R_BRACE) \
X(";" , TOKEN_SEMICOLON) \
X("," , TOKEN_COMMA) \
X(":" , TOKEN_COLON) \
X("." , TOKEN_DOT) \
X("..." , TOKEN_ELLIPSIS) \
X("?" , TOKEN_COND) \
X(identifier , TOKEN_IDENT) \
X(int_literal , TOKEN_INT_LITERAL) \
X(float_literal , TOKEN_FLOAT_LITERAL) \
X(char_literal , TOKEN_CHAR_LITERAL) \
X(string_literal , TOKEN_STRING_LITERAL) \
X(init , TK_BASIC_INVALID, TOKEN_INIT) \
X(EOF , TK_BASIC_EOF, TOKEN_EOF) \
X(blank , TK_BASIC_WHITESPACE, TOKEN_BLANK) \
X("==" , TK_BASIC_OPERATOR, TOKEN_EQ) \
X("=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN) \
X("++" , TK_BASIC_OPERATOR, TOKEN_ADD_ADD) \
X("+=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_ADD) \
X("+" , TK_BASIC_OPERATOR, TOKEN_ADD) \
X("--" , TK_BASIC_OPERATOR, TOKEN_SUB_SUB) \
X("-=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_SUB) \
X("->" , TK_BASIC_OPERATOR, TOKEN_DEREF) \
X("-" , TK_BASIC_OPERATOR, TOKEN_SUB) \
X("*=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_MUL) \
X("*" , TK_BASIC_OPERATOR, TOKEN_MUL) \
X("/=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_DIV) \
X("/" , TK_BASIC_OPERATOR, TOKEN_DIV) \
X("//" , TK_BASIC_COMMENT , TOKEN_LINE_COMMENT) \
X("/* */" , TK_BASIC_COMMENT , TOKEN_BLOCK_COMMENT) \
X("%=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_MOD) \
X("%" , TK_BASIC_OPERATOR, TOKEN_MOD) \
X("&&" , TK_BASIC_OPERATOR, TOKEN_AND_AND) \
X("&=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_AND) \
X("&" , TK_BASIC_OPERATOR, TOKEN_AND) \
X("||" , TK_BASIC_OPERATOR, TOKEN_OR_OR) \
X("|=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_OR) \
X("|" , TK_BASIC_OPERATOR, TOKEN_OR) \
X("^=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_XOR) \
X("^" , TK_BASIC_OPERATOR, TOKEN_XOR) \
X("<<=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_L_SH) \
X("<<" , TK_BASIC_OPERATOR, TOKEN_L_SH) \
X("<=" , TK_BASIC_OPERATOR, TOKEN_LE) \
X("<" , TK_BASIC_OPERATOR, TOKEN_LT) \
X(">>=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_R_SH) \
X(">>" , TK_BASIC_OPERATOR, TOKEN_R_SH) \
X(">=" , TK_BASIC_OPERATOR, TOKEN_GE) \
X(">" , TK_BASIC_OPERATOR, TOKEN_GT) \
X("!" , TK_BASIC_OPERATOR, TOKEN_NOT) \
X("!=" , TK_BASIC_OPERATOR, TOKEN_NEQ) \
X("~" , TK_BASIC_OPERATOR, TOKEN_BIT_NOT) \
X("[" , TK_BASIC_OPERATOR, TOKEN_L_BRACKET) \
X("]" , TK_BASIC_OPERATOR, TOKEN_R_BRACKET) \
X("(" , TK_BASIC_OPERATOR, TOKEN_L_PAREN) \
X(")" , TK_BASIC_OPERATOR, TOKEN_R_PAREN) \
X("{" , TK_BASIC_OPERATOR, TOKEN_L_BRACE) \
X("}" , TK_BASIC_OPERATOR, TOKEN_R_BRACE) \
X(";" , TK_BASIC_OPERATOR, TOKEN_SEMICOLON) \
X("," , TK_BASIC_OPERATOR, TOKEN_COMMA) \
X(":" , TK_BASIC_OPERATOR, TOKEN_COLON) \
X("." , TK_BASIC_OPERATOR, TOKEN_DOT) \
X("..." , TK_BASIC_OPERATOR, TOKEN_ELLIPSIS) \
X("?" , TK_BASIC_OPERATOR, TOKEN_COND) \
X(ident , TK_BASIC_IDENTIFIER, TOKEN_IDENT) \
X(int_literal , TK_BASIC_LITERAL, TOKEN_INT_LITERAL) \
X(float_literal , TK_BASIC_LITERAL, TOKEN_FLOAT_LITERAL) \
X(char_literal , TK_BASIC_LITERAL, TOKEN_CHAR_LITERAL) \
X(string_literal , TK_BASIC_LITERAL, TOKEN_STRING_LITERAL) \
// END
// 定义TokenType枚举
typedef enum tok_type {
typedef enum cc_tktype {
// 处理普通token
#define X(str, tok) tok,
#define X(str, basic, tok) tok,
TOKEN_TABLE
#undef X
@@ -115,24 +117,7 @@ typedef enum tok_type {
#define X(name, std, tok) tok,
KEYWORD_TABLE
#undef X
} tok_type_t;
typedef struct tok_val {
int have;
union {
char ch;
int i;
float f;
double d;
long long ll;
char* str;
};
} tok_val_t;
typedef struct tok {
tok_type_t type;
tok_val_t val;
} tok_t;
} cc_tktype_t;
typedef struct tok_stream {
int cur;
@@ -150,8 +135,8 @@ void init_tokbuf(tok_stream_t* tokbuf, void* stream, tok_stream_get_func gettok)
tok_t* peek_tok(tok_stream_t* tokbuf);
tok_t* pop_tok(tok_stream_t* tokbuf);
void flush_peek_tok(tok_stream_t* tokbuf);
tok_type_t peek_tok_type(tok_stream_t* tokbuf);
int expect_pop_tok(tok_stream_t* tokbuf, tok_type_t type);
const char* get_tok_name(tok_type_t type);
cc_tktype_t peek_tok_type(tok_stream_t* tokbuf);
int expect_pop_tok(tok_stream_t* tokbuf, cc_tktype_t type);
const char* get_tok_name(cc_tktype_t type);
#endif