Files
scc/libs/lexer/include/scc_lexer_token.h
zzy 4144f7841c refactor(argparse): 将null替换为nullptr以提高C++兼容性
- 在argparse库中将所有null指针常量替换为nullptr
- 更新头文件和源文件中的指针初始化和比较操作
- 修改测试文件中的相关断言检查
- 更新AST定义文件中的注释说明
2026-04-05 20:18:09 +08:00

229 lines
12 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#ifndef __SCC_LEXER_TOKEN_H__
#define __SCC_LEXER_TOKEN_H__
#include <scc_core.h>
#include <scc_pos.h>
#include <scc_core_ring.h>
struct scc_lexer_token;
typedef struct scc_lexer_token scc_lexer_tok_t;
typedef SCC_RING(scc_lexer_tok_t) scc_lexer_tok_ring_t;
typedef SCC_VEC(scc_lexer_tok_t) scc_lexer_tok_vec_t;
typedef enum scc_cstd {
SCC_CSTD_C89,
SCC_CSTD_C99,
SCC_CEXT_SCC,
} scc_cstd_t;
/* clang-format off */
/// https://cppreference.cn/w/c/preprocessor
#define SCC_PPKEYWORD_TABLE \
X(define , SCC_CSTD_C99, SCC_PP_TOK_DEFINE ) \
X(elif , SCC_CSTD_C99, SCC_PP_TOK_ELIF ) \
X(elifdef , SCC_CSTD_C99, SCC_PP_TOK_ELIFDEF ) \
X(elifndef , SCC_CSTD_C99, SCC_PP_TOK_ELIFNDEF ) \
X(else , SCC_CSTD_C99, SCC_PP_TOK_ELSE ) \
X(embed , SCC_CSTD_C99, SCC_PP_TOK_EMBED ) \
X(endif , SCC_CSTD_C99, SCC_PP_TOK_ENDIF ) \
X(error , SCC_CSTD_C99, SCC_PP_TOK_ERROR ) \
X(if , SCC_CSTD_C99, SCC_PP_TOK_IF ) \
X(ifdef , SCC_CEXT_SCC, SCC_PP_TOK_IFDEF ) \
X(ifndef , SCC_CSTD_C99, SCC_PP_TOK_IFNDEF ) \
X(include , SCC_CSTD_C99, SCC_PP_TOK_INCLUDE ) \
X(line , SCC_CEXT_SCC, SCC_PP_TOK_LINE ) \
X(pragma , SCC_CSTD_C99, SCC_PP_TOK_PRAGMA ) \
X(undef , SCC_CEXT_SCC, SCC_PP_TOK_UNDEF ) \
X(warning , SCC_CSTD_C99, SCC_PP_TOK_WARNING ) \
// END
/* clang-format on */
/* clang-format off */
// WARNING: Using Binary Search To Fast Find Keyword
// 你必须确保其中是按照字典序排列
#define SCC_CKEYWORD_TABLE \
X(asm , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_ASM , SCC_CEXT_SCC) \
X(atomic , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_ATOMIC , SCC_CEXT_SCC) \
X(auto , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_AUTO , SCC_CEXT_SCC) \
X(bool , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_BOOL , SCC_CEXT_SCC) \
X(break , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_BREAK , SCC_CSTD_C89) \
X(case , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_CASE , SCC_CSTD_C89) \
X(char , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_CHAR , SCC_CSTD_C89) \
X(complex , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_COMPLEX , SCC_CEXT_SCC) \
X(const , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_CONST , SCC_CSTD_C89) \
X(continue , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_CONTINUE , SCC_CSTD_C89) \
X(default , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_DEFAULT , SCC_CSTD_C89) \
X(do , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_DO , SCC_CSTD_C89) \
X(double , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_DOUBLE , SCC_CSTD_C89) \
X(else , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_ELSE , SCC_CSTD_C89) \
X(enum , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_ENUM , SCC_CSTD_C89) \
X(extern , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_EXTERN , SCC_CSTD_C89) \
X(float , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_FLOAT , SCC_CSTD_C89) \
X(for , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_FOR , SCC_CSTD_C89) \
X(goto , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_GOTO , SCC_CSTD_C89) \
X(if , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_IF , SCC_CSTD_C89) \
X(inline , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_INLINE , SCC_CSTD_C99) \
X(int , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_INT , SCC_CSTD_C89) \
X(long , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_LONG , SCC_CSTD_C89) \
X(register , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_REGISTER , SCC_CSTD_C89) \
X(restrict , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_RESTRICT , SCC_CSTD_C99) \
X(return , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_RETURN , SCC_CSTD_C89) \
X(short , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_SHORT , SCC_CSTD_C89) \
X(signed , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_SIGNED , SCC_CSTD_C89) \
X(sizeof , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_SIZEOF , SCC_CSTD_C89) \
X(static , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_STATIC , SCC_CSTD_C89) \
X(struct , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_STRUCT , SCC_CSTD_C89) \
X(switch , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_SWITCH , SCC_CSTD_C89) \
X(typedef , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_TYPEDEF , SCC_CSTD_C89) \
X(union , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_UNION , SCC_CSTD_C89) \
X(unsigned , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_UNSIGNED , SCC_CSTD_C89) \
X(void , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_VOID , SCC_CSTD_C89) \
X(volatile , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_VOLATILE , SCC_CSTD_C89) \
X(while , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_WHILE , SCC_CSTD_C89) \
// KEYWORD_TABLE
#define SCC_CTOK_TABLE \
X(unknown , SCC_TOK_SUBTYPE_INVALID, SCC_TOK_UNKNOWN ) \
X(EOF , SCC_TOK_SUBTYPE_EOF, SCC_TOK_EOF ) \
X(blank , SCC_TOK_SUBTYPE_EMPTYSPACE, SCC_TOK_BLANK ) \
X(endline , SCC_TOK_SUBTYPE_EMPTYSPACE, SCC_TOK_ENDLINE ) \
X("#" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_SHARP ) \
X("##" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_SHARP_SHARP ) \
X("==" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_EQ ) \
X("=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN ) \
X("++" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ADD_ADD ) \
X("+=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_ADD ) \
X("+" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ADD ) \
X("--" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_SUB_SUB ) \
X("-=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_SUB ) \
X("->" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_DEREF ) \
X("-" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_SUB ) \
X("*=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_MUL ) \
X("*" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_MUL ) \
X("/=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_DIV ) \
X("/" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_DIV ) \
X("//" , SCC_TOK_SUBTYPE_COMMENT , SCC_TOK_LINE_COMMENT ) \
X("/* */" , SCC_TOK_SUBTYPE_COMMENT , SCC_TOK_BLOCK_COMMENT ) \
X("%=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_MOD ) \
X("%" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_MOD ) \
X("&&" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_AND_AND ) \
X("&=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_AND ) \
X("&" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_AND ) \
X("||" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_OR_OR ) \
X("|=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_OR ) \
X("|" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_OR ) \
X("^=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_XOR ) \
X("^" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_XOR ) \
X("<<=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_L_SH ) \
X("<<" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_L_SH ) \
X("<=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_LE ) \
X("<" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_LT ) \
X(">>=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_R_SH ) \
X(">>" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_R_SH ) \
X(">=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_GE ) \
X(">" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_GT ) \
X("!" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_NOT ) \
X("!=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_NEQ ) \
X("~" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_BIT_NOT ) \
X("[" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_L_BRACKET ) \
X("]" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_R_BRACKET ) \
X("(" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_L_PAREN ) \
X(")" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_R_PAREN ) \
X("{" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_L_BRACE ) \
X("}" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_R_BRACE ) \
X(";" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_SEMICOLON ) \
X("," , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_COMMA ) \
X(":" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_COLON ) \
X("." , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_DOT ) \
X("..." , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ELLIPSIS ) \
X("?" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_COND ) \
X(ident , SCC_TOK_SUBTYPE_IDENTIFIER, SCC_TOK_IDENT ) \
X(int , SCC_TOK_SUBTYPE_LITERAL, SCC_TOK_INT_LITERAL ) \
X(float , SCC_TOK_SUBTYPE_LITERAL, SCC_TOK_FLOAT_LITERAL ) \
X(char , SCC_TOK_SUBTYPE_LITERAL, SCC_TOK_CHAR_LITERAL ) \
X(string , SCC_TOK_SUBTYPE_LITERAL, SCC_TOK_STRING_LITERAL ) \
// END
/* clang-format on */
/* clang-format off */
typedef enum scc_tok_type {
// must first becase the unknown token must be 0
#define X(str, subtype, tok) tok,
SCC_CTOK_TABLE
#undef X
#define X(name, type, tok) tok,
SCC_PPKEYWORD_TABLE
#undef X
#define X(name, subtype, tok, std) tok,
SCC_CKEYWORD_TABLE
#undef X
// FIXME hack one bit for disabled or other method
#define X(str, subtype, tok) tok,
X(sum , SCC_TOK_SUBTYPE_INVALID, SCC_TOK_SUM )
X(disabled , SCC_TOK_SUBTYPE_INVALID, SCC_TOK_DISABLED )
#undef X
} scc_tok_type_t;
/* clang-format on */
typedef enum scc_tok_subtype {
SCC_TOK_SUBTYPE_INVALID, // 错误占位
SCC_TOK_SUBTYPE_OPERATOR, // 操作符
SCC_TOK_SUBTYPE_IDENTIFIER, // 标识符
SCC_TOK_SUBTYPE_LITERAL, // 字面量
SCC_TOK_SUBTYPE_EMPTYSPACE, // 空白
SCC_TOK_SUBTYPE_COMMENT, // 注释
SCC_TOK_SUBTYPE_EOF // 结束标记
} scc_tok_subtype_t;
/**
* @brief 词法分析结果
* @warning 需要手动释放lexeme否则会出现内存泄漏
*/
struct scc_lexer_token {
scc_tok_type_t type;
scc_str_t lexeme;
scc_pos_t loc;
};
scc_tok_subtype_t scc_get_tok_subtype(scc_tok_type_t type);
const char *scc_get_tok_name(scc_tok_type_t type);
static inline void scc_lexer_tok_drop(scc_lexer_tok_t *tok) {
Assert(tok != nullptr);
tok->type = SCC_TOK_UNKNOWN;
tok->loc.col = 0;
tok->loc.line = 0;
tok->loc.name = nullptr;
tok->loc.offset = 0;
scc_str_drop(&tok->lexeme);
}
static inline cbool scc_lexer_tok_match(const scc_lexer_tok_t *tok,
scc_tok_type_t type) {
return tok->type == type;
}
// 深拷贝 token
static inline scc_lexer_tok_t scc_lexer_tok_copy(const scc_lexer_tok_t *src) {
Assert(src != nullptr);
scc_lexer_tok_t dst = *src;
dst.lexeme = scc_str_copy(&src->lexeme);
return dst;
}
// 移动 token源 token 不再拥有 lexeme
static inline void scc_lexer_tok_move(scc_lexer_tok_t *dst,
scc_lexer_tok_t *src) {
Assert(src != nullptr);
*dst = *src;
src->lexeme.data = nullptr;
src->lexeme.size = 0;
src->lexeme.cap = 0;
}
#endif /* __SCC_LEXER_TOKEN_H__ */