Files
scc/libs/lexer/include/scc_lexer_token.h
zzy 1fceeca011 feat(parser): 启用parser和ast模块并重构解析器结构
- 在cbuild.toml中启用parser和ast依赖项
- 将AST内置类型枚举重命名为SCC_AST_BUILTIN_TYPE_*前缀格式
- 修复ast_def.h中的类型字段命名,将builtin改为type
- 添加逗号操作符支持到表达式操作符枚举中
- 更新字面量表达式的lexeme字段为const char*指针和owned标志
- 重构解析器头文件结构,分离为parser.h、parser_utils.h、scc_sema.h等
- 实现新的解析器工具函数,包括预览、消费、回溯等功能
- 更新声明解析逻辑,使用新的解析器接口进行token处理
- 添加符号表语义分析功能框架
- 修复词法分析器中token移动时的空指针检查
- 统一使用scc_tree_dump_printf替代直接的scc_printf调用
2026-03-09 15:25:12 +08:00

223 lines
12 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#ifndef __SCC_LEXER_TOKEN_H__
#define __SCC_LEXER_TOKEN_H__
#include <scc_core.h>
#include <scc_pos.h>
#include <scc_core_ring.h>
struct scc_lexer_token;
typedef struct scc_lexer_token scc_lexer_tok_t;
typedef SCC_RING(scc_lexer_tok_t) scc_lexer_tok_ring_t;
typedef SCC_VEC(scc_lexer_tok_t) scc_lexer_tok_vec_t;
typedef enum scc_cstd {
SCC_CSTD_C89,
SCC_CSTD_C99,
SCC_CEXT_SCC,
} scc_cstd_t;
/* clang-format off */
/// https://cppreference.cn/w/c/preprocessor
#define SCC_PPKEYWORD_TABLE \
X(define , SCC_CSTD_C99, SCC_PP_TOK_DEFINE ) \
X(elif , SCC_CSTD_C99, SCC_PP_TOK_ELIF ) \
X(elifdef , SCC_CSTD_C99, SCC_PP_TOK_ELIFDEF ) \
X(elifndef , SCC_CSTD_C99, SCC_PP_TOK_ELIFNDEF ) \
X(else , SCC_CSTD_C99, SCC_PP_TOK_ELSE ) \
X(embed , SCC_CSTD_C99, SCC_PP_TOK_EMBED ) \
X(endif , SCC_CSTD_C99, SCC_PP_TOK_ENDIF ) \
X(error , SCC_CSTD_C99, SCC_PP_TOK_ERROR ) \
X(if , SCC_CSTD_C99, SCC_PP_TOK_IF ) \
X(ifdef , SCC_CEXT_SCC, SCC_PP_TOK_IFDEF ) \
X(ifndef , SCC_CSTD_C99, SCC_PP_TOK_IFNDEF ) \
X(include , SCC_CSTD_C99, SCC_PP_TOK_INCLUDE ) \
X(line , SCC_CEXT_SCC, SCC_PP_TOK_LINE ) \
X(pragma , SCC_CSTD_C99, SCC_PP_TOK_PRAGMA ) \
X(undef , SCC_CEXT_SCC, SCC_PP_TOK_UNDEF ) \
X(warning , SCC_CSTD_C99, SCC_PP_TOK_WARNING ) \
// END
/* clang-format on */
/* clang-format off */
// WARNING: Using Binary Search To Fast Find Keyword
// 你必须确保其中是按照字典序排列
#define SCC_CKEYWORD_TABLE \
X(asm , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_ASM , SCC_CEXT_SCC) \
X(atomic , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_ATOMIC , SCC_CEXT_SCC) \
X(auto , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_AUTO , SCC_CEXT_SCC) \
X(bool , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_BOOL , SCC_CEXT_SCC) \
X(break , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_BREAK , SCC_CSTD_C89) \
X(case , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_CASE , SCC_CSTD_C89) \
X(char , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_CHAR , SCC_CSTD_C89) \
X(complex , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_COMPLEX , SCC_CEXT_SCC) \
X(const , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_CONST , SCC_CSTD_C89) \
X(continue , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_CONTINUE , SCC_CSTD_C89) \
X(default , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_DEFAULT , SCC_CSTD_C89) \
X(do , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_DO , SCC_CSTD_C89) \
X(double , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_DOUBLE , SCC_CSTD_C89) \
X(else , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_ELSE , SCC_CSTD_C89) \
X(enum , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_ENUM , SCC_CSTD_C89) \
X(extern , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_EXTERN , SCC_CSTD_C89) \
X(float , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_FLOAT , SCC_CSTD_C89) \
X(for , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_FOR , SCC_CSTD_C89) \
X(goto , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_GOTO , SCC_CSTD_C89) \
X(if , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_IF , SCC_CSTD_C89) \
X(inline , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_INLINE , SCC_CSTD_C99) \
X(int , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_INT , SCC_CSTD_C89) \
X(long , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_LONG , SCC_CSTD_C89) \
X(register , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_REGISTER , SCC_CSTD_C89) \
X(restrict , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_RESTRICT , SCC_CSTD_C99) \
X(return , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_RETURN , SCC_CSTD_C89) \
X(short , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_SHORT , SCC_CSTD_C89) \
X(signed , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_SIGNED , SCC_CSTD_C89) \
X(sizeof , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_SIZEOF , SCC_CSTD_C89) \
X(static , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_STATIC , SCC_CSTD_C89) \
X(struct , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_STRUCT , SCC_CSTD_C89) \
X(switch , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_SWITCH , SCC_CSTD_C89) \
X(typedef , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_TYPEDEF , SCC_CSTD_C89) \
X(union , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_UNION , SCC_CSTD_C89) \
X(unsigned , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_UNSIGNED , SCC_CSTD_C89) \
X(void , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_VOID , SCC_CSTD_C89) \
X(volatile , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_VOLATILE , SCC_CSTD_C89) \
X(while , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_WHILE , SCC_CSTD_C89) \
// KEYWORD_TABLE
#define SCC_CTOK_TABLE \
X(unknown , SCC_TOK_SUBTYPE_INVALID, SCC_TOK_UNKNOWN ) \
X(disabled , SCC_TOK_SUBTYPE_INVALID, SCC_TOK_DISABLED ) \
X(EOF , SCC_TOK_SUBTYPE_EOF, SCC_TOK_EOF ) \
X(blank , SCC_TOK_SUBTYPE_EMPTYSPACE, SCC_TOK_BLANK ) \
X(endline , SCC_TOK_SUBTYPE_EMPTYSPACE, SCC_TOK_ENDLINE ) \
X("#" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_SHARP ) \
X("##" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_SHARP_SHARP ) \
X("==" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_EQ ) \
X("=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN ) \
X("++" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ADD_ADD ) \
X("+=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_ADD ) \
X("+" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ADD ) \
X("--" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_SUB_SUB ) \
X("-=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_SUB ) \
X("->" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_DEREF ) \
X("-" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_SUB ) \
X("*=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_MUL ) \
X("*" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_MUL ) \
X("/=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_DIV ) \
X("/" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_DIV ) \
X("//" , SCC_TOK_SUBTYPE_COMMENT , SCC_TOK_LINE_COMMENT ) \
X("/* */" , SCC_TOK_SUBTYPE_COMMENT , SCC_TOK_BLOCK_COMMENT ) \
X("%=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_MOD ) \
X("%" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_MOD ) \
X("&&" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_AND_AND ) \
X("&=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_AND ) \
X("&" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_AND ) \
X("||" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_OR_OR ) \
X("|=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_OR ) \
X("|" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_OR ) \
X("^=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_XOR ) \
X("^" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_XOR ) \
X("<<=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_L_SH ) \
X("<<" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_L_SH ) \
X("<=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_LE ) \
X("<" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_LT ) \
X(">>=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_R_SH ) \
X(">>" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_R_SH ) \
X(">=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_GE ) \
X(">" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_GT ) \
X("!" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_NOT ) \
X("!=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_NEQ ) \
X("~" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_BIT_NOT ) \
X("[" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_L_BRACKET ) \
X("]" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_R_BRACKET ) \
X("(" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_L_PAREN ) \
X(")" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_R_PAREN ) \
X("{" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_L_BRACE ) \
X("}" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_R_BRACE ) \
X(";" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_SEMICOLON ) \
X("," , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_COMMA ) \
X(":" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_COLON ) \
X("." , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_DOT ) \
X("..." , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ELLIPSIS ) \
X("?" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_COND ) \
X(ident , SCC_TOK_SUBTYPE_IDENTIFIER, SCC_TOK_IDENT ) \
X(int , SCC_TOK_SUBTYPE_LITERAL, SCC_TOK_INT_LITERAL ) \
X(float , SCC_TOK_SUBTYPE_LITERAL, SCC_TOK_FLOAT_LITERAL ) \
X(char , SCC_TOK_SUBTYPE_LITERAL, SCC_TOK_CHAR_LITERAL ) \
X(string , SCC_TOK_SUBTYPE_LITERAL, SCC_TOK_STRING_LITERAL ) \
// END
/* clang-format on */
/* clang-format off */
typedef enum scc_tok_type {
// must first becase the unknown token must be 0
#define X(str, subtype, tok) tok,
SCC_CTOK_TABLE
#undef X
#define X(name, type, tok) tok,
SCC_PPKEYWORD_TABLE
#undef X
#define X(name, subtype, tok, std) tok,
SCC_CKEYWORD_TABLE
#undef X
} scc_tok_type_t;
/* clang-format on */
typedef enum scc_tok_subtype {
SCC_TOK_SUBTYPE_INVALID, // 错误占位
SCC_TOK_SUBTYPE_OPERATOR, // 操作符
SCC_TOK_SUBTYPE_IDENTIFIER, // 标识符
SCC_TOK_SUBTYPE_LITERAL, // 字面量
SCC_TOK_SUBTYPE_EMPTYSPACE, // 空白
SCC_TOK_SUBTYPE_COMMENT, // 注释
SCC_TOK_SUBTYPE_EOF // 结束标记
} scc_tok_subtype_t;
/**
* @brief 词法分析结果
* @warning 需要手动释放lexeme否则会出现内存泄漏
*/
struct scc_lexer_token {
scc_tok_type_t type;
scc_cstring_t lexeme;
scc_pos_t loc;
};
scc_tok_subtype_t scc_get_tok_subtype(scc_tok_type_t type);
const char *scc_get_tok_name(scc_tok_type_t type);
static inline void scc_lexer_tok_drop(scc_lexer_tok_t *tok) {
tok->type = SCC_TOK_UNKNOWN;
tok->loc.col = 0;
tok->loc.line = 0;
tok->loc.name = null;
tok->loc.offset = 0;
scc_cstring_free(&tok->lexeme);
}
static inline cbool scc_lexer_tok_match(const scc_lexer_tok_t *tok,
scc_tok_type_t type) {
return tok->type == type;
}
// 深拷贝 token
static inline scc_lexer_tok_t scc_lexer_tok_copy(const scc_lexer_tok_t *src) {
Assert(src != null);
scc_lexer_tok_t dst = *src;
dst.lexeme = scc_cstring_copy(&src->lexeme);
return dst;
}
// 移动 token源 token 不再拥有 lexeme
static inline void scc_lexer_tok_move(scc_lexer_tok_t *dst,
scc_lexer_tok_t *src) {
Assert(src != null);
*dst = *src;
src->lexeme.data = null;
src->lexeme.size = 0;
src->lexeme.cap = 0;
}
#endif /* __SCC_LEXER_TOKEN_H__ */