Files
scc/libs/lexer/include/scc_lexer_token.h
zzy 8fcfeda14e feat(ast): 添加内置表达式支持并完善解析器功能
- 添加SCC_AST_EXPR_BUILTIN枚举值用于表示内置表达式
- 定义scc_ast_builtin_expr_type_t枚举包含va_start、va_end、va_copy、va_arg等内置函数类型
- 在AST表达式结构中添加builtin字段以支持内置表达式存储
- 实现scc_ast_decl_unsafe_val_init内联函数用于不安全的声明初始化
- 修改sizeof和alignof表达式初始化函数以支持类型或表达式参数
- 修复默认语句的字段引用错误(default_stmt而非case_stmt)
- 改进词法分析器对整数字面量后缀(U、L、LL等)的处理逻辑
- 重构解析器中的声明解析逻辑,统一使用scc_parse_declarator函数
- 完善结构体、联合体和枚举类型的声明解析,支持仅有名称的前向声明
- 优化初始化列表解析,支持复合字面量的成员访问语法
- 更新参数类型列表解析以正确处理参数声明
- 修复括号表达式的解析逻辑,区分类型转换和普通括号表达式
2026-03-13 17:39:14 +08:00

229 lines
12 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#ifndef __SCC_LEXER_TOKEN_H__
#define __SCC_LEXER_TOKEN_H__
#include <scc_core.h>
#include <scc_pos.h>
#include <scc_core_ring.h>
struct scc_lexer_token;
typedef struct scc_lexer_token scc_lexer_tok_t;
typedef SCC_RING(scc_lexer_tok_t) scc_lexer_tok_ring_t;
typedef SCC_VEC(scc_lexer_tok_t) scc_lexer_tok_vec_t;
typedef enum scc_cstd {
SCC_CSTD_C89,
SCC_CSTD_C99,
SCC_CEXT_SCC,
} scc_cstd_t;
/* clang-format off */
/// https://cppreference.cn/w/c/preprocessor
#define SCC_PPKEYWORD_TABLE \
X(define , SCC_CSTD_C99, SCC_PP_TOK_DEFINE ) \
X(elif , SCC_CSTD_C99, SCC_PP_TOK_ELIF ) \
X(elifdef , SCC_CSTD_C99, SCC_PP_TOK_ELIFDEF ) \
X(elifndef , SCC_CSTD_C99, SCC_PP_TOK_ELIFNDEF ) \
X(else , SCC_CSTD_C99, SCC_PP_TOK_ELSE ) \
X(embed , SCC_CSTD_C99, SCC_PP_TOK_EMBED ) \
X(endif , SCC_CSTD_C99, SCC_PP_TOK_ENDIF ) \
X(error , SCC_CSTD_C99, SCC_PP_TOK_ERROR ) \
X(if , SCC_CSTD_C99, SCC_PP_TOK_IF ) \
X(ifdef , SCC_CEXT_SCC, SCC_PP_TOK_IFDEF ) \
X(ifndef , SCC_CSTD_C99, SCC_PP_TOK_IFNDEF ) \
X(include , SCC_CSTD_C99, SCC_PP_TOK_INCLUDE ) \
X(line , SCC_CEXT_SCC, SCC_PP_TOK_LINE ) \
X(pragma , SCC_CSTD_C99, SCC_PP_TOK_PRAGMA ) \
X(undef , SCC_CEXT_SCC, SCC_PP_TOK_UNDEF ) \
X(warning , SCC_CSTD_C99, SCC_PP_TOK_WARNING ) \
// END
/* clang-format on */
/* clang-format off */
// WARNING: Using Binary Search To Fast Find Keyword
// 你必须确保其中是按照字典序排列
#define SCC_CKEYWORD_TABLE \
X(asm , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_ASM , SCC_CEXT_SCC) \
X(atomic , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_ATOMIC , SCC_CEXT_SCC) \
X(auto , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_AUTO , SCC_CEXT_SCC) \
X(bool , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_BOOL , SCC_CEXT_SCC) \
X(break , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_BREAK , SCC_CSTD_C89) \
X(case , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_CASE , SCC_CSTD_C89) \
X(char , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_CHAR , SCC_CSTD_C89) \
X(complex , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_COMPLEX , SCC_CEXT_SCC) \
X(const , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_CONST , SCC_CSTD_C89) \
X(continue , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_CONTINUE , SCC_CSTD_C89) \
X(default , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_DEFAULT , SCC_CSTD_C89) \
X(do , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_DO , SCC_CSTD_C89) \
X(double , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_DOUBLE , SCC_CSTD_C89) \
X(else , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_ELSE , SCC_CSTD_C89) \
X(enum , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_ENUM , SCC_CSTD_C89) \
X(extern , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_EXTERN , SCC_CSTD_C89) \
X(float , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_FLOAT , SCC_CSTD_C89) \
X(for , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_FOR , SCC_CSTD_C89) \
X(goto , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_GOTO , SCC_CSTD_C89) \
X(if , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_IF , SCC_CSTD_C89) \
X(inline , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_INLINE , SCC_CSTD_C99) \
X(int , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_INT , SCC_CSTD_C89) \
X(long , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_LONG , SCC_CSTD_C89) \
X(register , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_REGISTER , SCC_CSTD_C89) \
X(restrict , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_RESTRICT , SCC_CSTD_C99) \
X(return , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_RETURN , SCC_CSTD_C89) \
X(short , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_SHORT , SCC_CSTD_C89) \
X(signed , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_SIGNED , SCC_CSTD_C89) \
X(sizeof , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_SIZEOF , SCC_CSTD_C89) \
X(static , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_STATIC , SCC_CSTD_C89) \
X(struct , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_STRUCT , SCC_CSTD_C89) \
X(switch , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_SWITCH , SCC_CSTD_C89) \
X(typedef , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_TYPEDEF , SCC_CSTD_C89) \
X(union , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_UNION , SCC_CSTD_C89) \
X(unsigned , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_UNSIGNED , SCC_CSTD_C89) \
X(void , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_VOID , SCC_CSTD_C89) \
X(volatile , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_VOLATILE , SCC_CSTD_C89) \
X(while , SCC_TOK_SUBTYPE_IDENTIFIER , SCC_TOK_WHILE , SCC_CSTD_C89) \
// KEYWORD_TABLE
#define SCC_CTOK_TABLE \
X(unknown , SCC_TOK_SUBTYPE_INVALID, SCC_TOK_UNKNOWN ) \
X(EOF , SCC_TOK_SUBTYPE_EOF, SCC_TOK_EOF ) \
X(blank , SCC_TOK_SUBTYPE_EMPTYSPACE, SCC_TOK_BLANK ) \
X(endline , SCC_TOK_SUBTYPE_EMPTYSPACE, SCC_TOK_ENDLINE ) \
X("#" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_SHARP ) \
X("##" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_SHARP_SHARP ) \
X("==" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_EQ ) \
X("=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN ) \
X("++" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ADD_ADD ) \
X("+=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_ADD ) \
X("+" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ADD ) \
X("--" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_SUB_SUB ) \
X("-=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_SUB ) \
X("->" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_DEREF ) \
X("-" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_SUB ) \
X("*=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_MUL ) \
X("*" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_MUL ) \
X("/=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_DIV ) \
X("/" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_DIV ) \
X("//" , SCC_TOK_SUBTYPE_COMMENT , SCC_TOK_LINE_COMMENT ) \
X("/* */" , SCC_TOK_SUBTYPE_COMMENT , SCC_TOK_BLOCK_COMMENT ) \
X("%=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_MOD ) \
X("%" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_MOD ) \
X("&&" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_AND_AND ) \
X("&=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_AND ) \
X("&" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_AND ) \
X("||" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_OR_OR ) \
X("|=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_OR ) \
X("|" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_OR ) \
X("^=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_XOR ) \
X("^" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_XOR ) \
X("<<=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_L_SH ) \
X("<<" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_L_SH ) \
X("<=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_LE ) \
X("<" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_LT ) \
X(">>=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_R_SH ) \
X(">>" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_R_SH ) \
X(">=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_GE ) \
X(">" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_GT ) \
X("!" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_NOT ) \
X("!=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_NEQ ) \
X("~" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_BIT_NOT ) \
X("[" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_L_BRACKET ) \
X("]" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_R_BRACKET ) \
X("(" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_L_PAREN ) \
X(")" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_R_PAREN ) \
X("{" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_L_BRACE ) \
X("}" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_R_BRACE ) \
X(";" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_SEMICOLON ) \
X("," , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_COMMA ) \
X(":" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_COLON ) \
X("." , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_DOT ) \
X("..." , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ELLIPSIS ) \
X("?" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_COND ) \
X(ident , SCC_TOK_SUBTYPE_IDENTIFIER, SCC_TOK_IDENT ) \
X(int , SCC_TOK_SUBTYPE_LITERAL, SCC_TOK_INT_LITERAL ) \
X(float , SCC_TOK_SUBTYPE_LITERAL, SCC_TOK_FLOAT_LITERAL ) \
X(char , SCC_TOK_SUBTYPE_LITERAL, SCC_TOK_CHAR_LITERAL ) \
X(string , SCC_TOK_SUBTYPE_LITERAL, SCC_TOK_STRING_LITERAL ) \
// END
/* clang-format on */
/* clang-format off */
typedef enum scc_tok_type {
// must first becase the unknown token must be 0
#define X(str, subtype, tok) tok,
SCC_CTOK_TABLE
#undef X
#define X(name, type, tok) tok,
SCC_PPKEYWORD_TABLE
#undef X
#define X(name, subtype, tok, std) tok,
SCC_CKEYWORD_TABLE
#undef X
// FIXME hack one bit for disabled or other method
#define X(str, subtype, tok) tok,
X(sum , SCC_TOK_SUBTYPE_INVALID, SCC_TOK_SUM )
X(disabled , SCC_TOK_SUBTYPE_INVALID, SCC_TOK_DISABLED )
#undef X
} scc_tok_type_t;
/* clang-format on */
typedef enum scc_tok_subtype {
SCC_TOK_SUBTYPE_INVALID, // 错误占位
SCC_TOK_SUBTYPE_OPERATOR, // 操作符
SCC_TOK_SUBTYPE_IDENTIFIER, // 标识符
SCC_TOK_SUBTYPE_LITERAL, // 字面量
SCC_TOK_SUBTYPE_EMPTYSPACE, // 空白
SCC_TOK_SUBTYPE_COMMENT, // 注释
SCC_TOK_SUBTYPE_EOF // 结束标记
} scc_tok_subtype_t;
/**
* @brief 词法分析结果
* @warning 需要手动释放lexeme否则会出现内存泄漏
*/
struct scc_lexer_token {
scc_tok_type_t type;
scc_cstring_t lexeme;
scc_pos_t loc;
};
scc_tok_subtype_t scc_get_tok_subtype(scc_tok_type_t type);
const char *scc_get_tok_name(scc_tok_type_t type);
static inline void scc_lexer_tok_drop(scc_lexer_tok_t *tok) {
Assert(tok != null);
tok->type = SCC_TOK_UNKNOWN;
tok->loc.col = 0;
tok->loc.line = 0;
tok->loc.name = null;
tok->loc.offset = 0;
scc_cstring_free(&tok->lexeme);
}
static inline cbool scc_lexer_tok_match(const scc_lexer_tok_t *tok,
scc_tok_type_t type) {
return tok->type == type;
}
// 深拷贝 token
static inline scc_lexer_tok_t scc_lexer_tok_copy(const scc_lexer_tok_t *src) {
Assert(src != null);
scc_lexer_tok_t dst = *src;
dst.lexeme = scc_cstring_copy(&src->lexeme);
return dst;
}
// 移动 token源 token 不再拥有 lexeme
static inline void scc_lexer_tok_move(scc_lexer_tok_t *dst,
scc_lexer_tok_t *src) {
Assert(src != null);
*dst = *src;
src->lexeme.data = null;
src->lexeme.size = 0;
src->lexeme.cap = 0;
}
#endif /* __SCC_LEXER_TOKEN_H__ */