- 将依赖项从libcore重命名为scc_core - 更新头文件包含路径从<libcore.h>到<scc_core.h> - 保持原有功能不变 refactor(lexer): 重命名libcore为scc_core并添加词法流式解析功能 - 将依赖项从libcore重命名为scc_core - 移除不再需要的scc_lexer_token结构体定义 - 重命名struct cc_lexer为struct scc_lexer - 添加scc_lexer_stream_t流式解析器相关定义和实现 - 新增lexer_stream.c文件实现流式token缓冲功能 refactor(lexer_log): 重命名logger变量和头文件定义 - 将头文件保护宏从__SMCC_LEXER_LOG_H__改为__SCC_LEXER_LOG_H__ - 将logger变量从__smcc_lexer_log改为__scc_lexer_log - 更新头文件包含从<libcore.h>到<scc_core.h> refactor(lexer_token): 重新组织token头文件结构 - 将头文件保护宏从__SMCC_CC_TOKEN_H__改为__SCC_LEXER_TOKEN_H__ - 更新头文件包含从<libcore.h>到<scc_core.h> - 将scc_lexer_token结构体定义移至该文件 refactor(lexer): 简化token匹配代码格式 - 移除LCC相关的注释内容 - 优化括号符号的token匹配代码格式,使用clang-format控制 refactor(pprocessor): 更新依赖项名称和头文件包含 - 将libcore重命名为scc_core - 将libutils重命名为scc_utils - 更新头文件包含路径 refactor(runtime): 重命名libcore为scc_core并重构目录结构 - 将libcore目录重命名为scc_core - 将libutils目录重命名为scc_utils - 更新所有相关的头文件包含路径 - 修改cbuild.toml中的包名称 - 更新core_vec.h中的宏定义以支持标准库模式
161 lines
9.3 KiB
C
161 lines
9.3 KiB
C
#ifndef __SCC_LEXER_TOKEN_H__
|
|
#define __SCC_LEXER_TOKEN_H__
|
|
|
|
#include <scc_core.h>
|
|
|
|
typedef enum scc_cstd {
|
|
SCC_CSTD_C89,
|
|
SCC_CSTD_C99,
|
|
SCC_CEXT_ASM,
|
|
} scc_cstd_t;
|
|
|
|
/* clang-format off */
|
|
// WARNING: Using Binary Search To Fast Find Keyword
|
|
// 你必须确保其中是按照字典序排列
|
|
#define SCC_CKEYWORD_TABLE \
|
|
X(asm , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_ASM , SCC_CEXT_ASM) \
|
|
X(break , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_BREAK , SCC_CSTD_C89) \
|
|
X(case , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_CASE , SCC_CSTD_C89) \
|
|
X(char , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_CHAR , SCC_CSTD_C89) \
|
|
X(const , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_CONST , SCC_CSTD_C89) \
|
|
X(continue , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_CONTINUE , SCC_CSTD_C89) \
|
|
X(default , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_DEFAULT , SCC_CSTD_C89) \
|
|
X(do , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_DO , SCC_CSTD_C89) \
|
|
X(double , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_DOUBLE , SCC_CSTD_C89) \
|
|
X(else , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_ELSE , SCC_CSTD_C89) \
|
|
X(enum , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_ENUM , SCC_CSTD_C89) \
|
|
X(extern , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_EXTERN , SCC_CSTD_C89) \
|
|
X(float , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_FLOAT , SCC_CSTD_C89) \
|
|
X(for , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_FOR , SCC_CSTD_C89) \
|
|
X(goto , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_GOTO , SCC_CSTD_C89) \
|
|
X(if , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_IF , SCC_CSTD_C89) \
|
|
X(inline , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_INLINE , SCC_CSTD_C99) \
|
|
X(int , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_INT , SCC_CSTD_C89) \
|
|
X(long , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_LONG , SCC_CSTD_C89) \
|
|
X(register , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_REGISTER , SCC_CSTD_C89) \
|
|
X(restrict , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_RESTRICT , SCC_CSTD_C99) \
|
|
X(return , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_RETURN , SCC_CSTD_C89) \
|
|
X(short , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_SHORT , SCC_CSTD_C89) \
|
|
X(signed , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_SIGNED , SCC_CSTD_C89) \
|
|
X(sizeof , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_SIZEOF , SCC_CSTD_C89) \
|
|
X(static , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_STATIC , SCC_CSTD_C89) \
|
|
X(struct , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_STRUCT , SCC_CSTD_C89) \
|
|
X(switch , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_SWITCH , SCC_CSTD_C89) \
|
|
X(typedef , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_TYPEDEF , SCC_CSTD_C89) \
|
|
X(union , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_UNION , SCC_CSTD_C89) \
|
|
X(unsigned , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_UNSIGNED , SCC_CSTD_C89) \
|
|
X(void , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_VOID , SCC_CSTD_C89) \
|
|
X(volatile , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_VOLATILE , SCC_CSTD_C89) \
|
|
X(while , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_WHILE , SCC_CSTD_C89) \
|
|
// KEYWORD_TABLE
|
|
|
|
#define SCC_CTOK_TABLE \
|
|
X(unknown , SCC_TOK_SUBTYPE_INVALID, SCC_TOK_UNKNOWN ) \
|
|
X(EOF , SCC_TOK_SUBTYPE_EOF, SCC_TOK_EOF ) \
|
|
X(blank , SCC_TOK_SUBTYPE_EMPTYSPACE, SCC_TOK_BLANK ) \
|
|
X("==" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_EQ ) \
|
|
X("=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN ) \
|
|
X("++" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ADD_ADD ) \
|
|
X("+=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_ADD ) \
|
|
X("+" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ADD ) \
|
|
X("--" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_SUB_SUB ) \
|
|
X("-=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_SUB ) \
|
|
X("->" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_DEREF ) \
|
|
X("-" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_SUB ) \
|
|
X("*=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_MUL ) \
|
|
X("*" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_MUL ) \
|
|
X("/=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_DIV ) \
|
|
X("/" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_DIV ) \
|
|
X("//" , SCC_TOK_SUBTYPE_COMMENT , SCC_TOK_LINE_COMMENT ) \
|
|
X("/* */" , SCC_TOK_SUBTYPE_COMMENT , SCC_TOK_BLOCK_COMMENT ) \
|
|
X("%=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_MOD ) \
|
|
X("%" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_MOD ) \
|
|
X("&&" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_AND_AND ) \
|
|
X("&=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_AND ) \
|
|
X("&" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_AND ) \
|
|
X("||" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_OR_OR ) \
|
|
X("|=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_OR ) \
|
|
X("|" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_OR ) \
|
|
X("^=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_XOR ) \
|
|
X("^" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_XOR ) \
|
|
X("<<=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_L_SH ) \
|
|
X("<<" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_L_SH ) \
|
|
X("<=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_LE ) \
|
|
X("<" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_LT ) \
|
|
X(">>=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_R_SH ) \
|
|
X(">>" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_R_SH ) \
|
|
X(">=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_GE ) \
|
|
X(">" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_GT ) \
|
|
X("!" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_NOT ) \
|
|
X("!=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_NEQ ) \
|
|
X("~" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_BIT_NOT ) \
|
|
X("[" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_L_BRACKET ) \
|
|
X("]" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_R_BRACKET ) \
|
|
X("(" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_L_PAREN ) \
|
|
X(")" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_R_PAREN ) \
|
|
X("{" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_L_BRACE ) \
|
|
X("}" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_R_BRACE ) \
|
|
X(";" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_SEMICOLON ) \
|
|
X("," , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_COMMA ) \
|
|
X(":" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_COLON ) \
|
|
X("." , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_DOT ) \
|
|
X("..." , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ELLIPSIS ) \
|
|
X("?" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_COND ) \
|
|
X(ident , SCC_TOK_SUBTYPE_IDENTIFIER, SCC_TOK_IDENT ) \
|
|
X(int_literal , SCC_TOK_SUBTYPE_LITERAL, SCC_TOK_INT_LITERAL ) \
|
|
X(float_literal , SCC_TOK_SUBTYPE_LITERAL, SCC_TOK_FLOAT_LITERAL ) \
|
|
X(char_literal , SCC_TOK_SUBTYPE_LITERAL, SCC_TOK_CHAR_LITERAL ) \
|
|
X(string_literal , SCC_TOK_SUBTYPE_LITERAL, SCC_TOK_STRING_LITERAL ) \
|
|
// END
|
|
/* clang-format on */
|
|
|
|
// 定义TokenType枚举
|
|
typedef enum scc_tok_type {
|
|
// 处理普通token
|
|
#define X(str, subtype, tok) tok,
|
|
SCC_CTOK_TABLE
|
|
#undef X
|
|
|
|
// 处理关键字(保持原有格式)
|
|
#define X(name, subtype, tok, std) tok,
|
|
SCC_CKEYWORD_TABLE
|
|
#undef X
|
|
} scc_tok_type_t;
|
|
|
|
typedef enum scc_tok_subtype {
|
|
SCC_TOK_SUBTYPE_INVALID, // 错误占位
|
|
SCC_TOK_SUBTYPE_KEYWORD, // 关键字
|
|
SCC_TOK_SUBTYPE_OPERATOR, // 操作符
|
|
SCC_TOK_SUBTYPE_IDENTIFIER, // 标识符
|
|
SCC_TOK_SUBTYPE_LITERAL, // 字面量
|
|
|
|
SCC_TOK_SUBTYPE_EMPTYSPACE, // 空白
|
|
SCC_TOK_SUBTYPE_COMMENT, // 注释
|
|
SCC_TOK_SUBTYPE_EOF // 结束标记
|
|
} scc_tok_subtype_t;
|
|
|
|
scc_tok_subtype_t scc_get_tok_subtype(scc_tok_type_t type);
|
|
const char *scc_get_tok_name(scc_tok_type_t type);
|
|
|
|
typedef struct scc_lexer_token {
|
|
scc_tok_type_t type;
|
|
scc_cvalue_t value;
|
|
scc_pos_t loc;
|
|
} scc_lexer_tok_t;
|
|
|
|
static inline cbool scc_lexer_tok_match(const scc_lexer_tok_t *tok,
|
|
scc_tok_type_t type) {
|
|
return tok->type == type;
|
|
}
|
|
|
|
static inline cbool scc_lexer_tok_expect(const scc_lexer_tok_t *tok,
|
|
scc_tok_type_t type) {
|
|
if (!scc_lexer_tok_match(tok, type)) {
|
|
LOG_ERROR("expected token %d, got %d\n", type, tok->type);
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
#endif /* __SCC_LEXER_TOKEN_H__ */
|