Files
scc/libs/lexer/include/scc_lexer_token.h
zzy 08a60e6e8a feat: 添加预处理器宏定义的字符串化和连接操作支持
- 实现了 # 和 ## 预处理器操作符的功能
- 添加了 token 深拷贝和移动函数以支持宏展开
- 修改预处理器展开逻辑以正确处理宏参数替换
- 增加了宏参数分割时对空白字符的处理

fix: 修复预处理器宏展开中的内存管理和逻辑错误

- 修正了宏展开集合的数据结构初始化方式
- 修复了函数式宏调用时括号匹配的判断逻辑
- 改进了宏参数解析过程中空白字符的处理
- 解决了 token 在宏展开过程中的所有权管理问题

chore: 为 justfile 添加文件统计命令并优化构建配置

- 新增 count-file 命令用于统计代码文件数量
- 调整了输出文件的默认命名规则
- 优化了词法分析器 token 释放时的字段重置逻辑
2026-02-19 11:20:01 +08:00

220 lines
11 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#ifndef __SCC_LEXER_TOKEN_H__
#define __SCC_LEXER_TOKEN_H__
#include <scc_core.h>
#include <scc_pos.h>
#include <scc_core_ring.h>
struct scc_lexer_token;
typedef struct scc_lexer_token scc_lexer_tok_t;
typedef SCC_RING(scc_lexer_tok_t) scc_lexer_tok_ring_t;
typedef SCC_VEC(scc_lexer_tok_t) scc_lexer_tok_vec_t;
typedef enum scc_cstd {
SCC_CSTD_C89,
SCC_CSTD_C99,
SCC_CEXT_SCC,
} scc_cstd_t;
/* clang-format off */
/// https://cppreference.cn/w/c/preprocessor
#define SCC_PPKEYWORD_TABLE \
X(define , SCC_CSTD_C99, SCC_PP_TOK_DEFINE ) \
X(elif , SCC_CSTD_C99, SCC_PP_TOK_ELIF ) \
X(elifdef , SCC_CSTD_C99, SCC_PP_TOK_ELIFDEF ) \
X(elifndef , SCC_CSTD_C99, SCC_PP_TOK_ELIFNDEF ) \
X(else , SCC_CSTD_C99, SCC_PP_TOK_ELSE ) \
X(embed , SCC_CSTD_C99, SCC_PP_TOK_EMBED ) \
X(endif , SCC_CSTD_C99, SCC_PP_TOK_ENDIF ) \
X(error , SCC_CSTD_C99, SCC_PP_TOK_ERROR ) \
X(if , SCC_CSTD_C99, SCC_PP_TOK_IF ) \
X(ifdef , SCC_CEXT_SCC, SCC_PP_TOK_IFDEF ) \
X(ifndef , SCC_CSTD_C99, SCC_PP_TOK_IFNDEF ) \
X(include , SCC_CSTD_C99, SCC_PP_TOK_INCLUDE ) \
X(line , SCC_CEXT_SCC, SCC_PP_TOK_LINE ) \
X(pragma , SCC_CSTD_C99, SCC_PP_TOK_PRAGMA ) \
X(undef , SCC_CEXT_SCC, SCC_PP_TOK_UNDEF ) \
X(warning , SCC_CSTD_C99, SCC_PP_TOK_WARNING ) \
// END
/* clang-format on */
/* clang-format off */
// WARNING: Using Binary Search To Fast Find Keyword
// 你必须确保其中是按照字典序排列
#define SCC_CKEYWORD_TABLE \
X(asm , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_ASM , SCC_CEXT_SCC) \
X(atomic , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_ATOMIC , SCC_CEXT_SCC) \
X(auto , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_AUTO , SCC_CEXT_SCC) \
X(bool , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_BOOL , SCC_CEXT_SCC) \
X(break , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_BREAK , SCC_CSTD_C89) \
X(case , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_CASE , SCC_CSTD_C89) \
X(char , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_CHAR , SCC_CSTD_C89) \
X(complex , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_COMPLEX , SCC_CEXT_SCC) \
X(const , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_CONST , SCC_CSTD_C89) \
X(continue , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_CONTINUE , SCC_CSTD_C89) \
X(default , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_DEFAULT , SCC_CSTD_C89) \
X(do , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_DO , SCC_CSTD_C89) \
X(double , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_DOUBLE , SCC_CSTD_C89) \
X(else , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_ELSE , SCC_CSTD_C89) \
X(enum , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_ENUM , SCC_CSTD_C89) \
X(extern , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_EXTERN , SCC_CSTD_C89) \
X(float , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_FLOAT , SCC_CSTD_C89) \
X(for , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_FOR , SCC_CSTD_C89) \
X(goto , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_GOTO , SCC_CSTD_C89) \
X(if , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_IF , SCC_CSTD_C89) \
X(inline , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_INLINE , SCC_CSTD_C99) \
X(int , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_INT , SCC_CSTD_C89) \
X(long , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_LONG , SCC_CSTD_C89) \
X(register , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_REGISTER , SCC_CSTD_C89) \
X(restrict , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_RESTRICT , SCC_CSTD_C99) \
X(return , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_RETURN , SCC_CSTD_C89) \
X(short , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_SHORT , SCC_CSTD_C89) \
X(signed , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_SIGNED , SCC_CSTD_C89) \
X(sizeof , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_SIZEOF , SCC_CSTD_C89) \
X(static , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_STATIC , SCC_CSTD_C89) \
X(struct , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_STRUCT , SCC_CSTD_C89) \
X(switch , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_SWITCH , SCC_CSTD_C89) \
X(typedef , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_TYPEDEF , SCC_CSTD_C89) \
X(union , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_UNION , SCC_CSTD_C89) \
X(unsigned , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_UNSIGNED , SCC_CSTD_C89) \
X(void , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_VOID , SCC_CSTD_C89) \
X(volatile , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_VOLATILE , SCC_CSTD_C89) \
X(while , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_WHILE , SCC_CSTD_C89) \
// KEYWORD_TABLE
#define SCC_CTOK_TABLE \
X(unknown , SCC_TOK_SUBTYPE_INVALID, SCC_TOK_UNKNOWN ) \
X(EOF , SCC_TOK_SUBTYPE_EOF, SCC_TOK_EOF ) \
X(blank , SCC_TOK_SUBTYPE_EMPTYSPACE, SCC_TOK_BLANK ) \
X(endline , SCC_TOK_SUBTYPE_EMPTYSPACE, SCC_TOK_ENDLINE ) \
X("#" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_SHARP ) \
X("##" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_SHARP_SHARP ) \
X("==" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_EQ ) \
X("=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN ) \
X("++" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ADD_ADD ) \
X("+=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_ADD ) \
X("+" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ADD ) \
X("--" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_SUB_SUB ) \
X("-=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_SUB ) \
X("->" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_DEREF ) \
X("-" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_SUB ) \
X("*=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_MUL ) \
X("*" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_MUL ) \
X("/=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_DIV ) \
X("/" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_DIV ) \
X("//" , SCC_TOK_SUBTYPE_COMMENT , SCC_TOK_LINE_COMMENT ) \
X("/* */" , SCC_TOK_SUBTYPE_COMMENT , SCC_TOK_BLOCK_COMMENT ) \
X("%=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_MOD ) \
X("%" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_MOD ) \
X("&&" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_AND_AND ) \
X("&=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_AND ) \
X("&" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_AND ) \
X("||" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_OR_OR ) \
X("|=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_OR ) \
X("|" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_OR ) \
X("^=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_XOR ) \
X("^" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_XOR ) \
X("<<=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_L_SH ) \
X("<<" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_L_SH ) \
X("<=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_LE ) \
X("<" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_LT ) \
X(">>=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_R_SH ) \
X(">>" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_R_SH ) \
X(">=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_GE ) \
X(">" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_GT ) \
X("!" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_NOT ) \
X("!=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_NEQ ) \
X("~" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_BIT_NOT ) \
X("[" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_L_BRACKET ) \
X("]" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_R_BRACKET ) \
X("(" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_L_PAREN ) \
X(")" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_R_PAREN ) \
X("{" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_L_BRACE ) \
X("}" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_R_BRACE ) \
X(";" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_SEMICOLON ) \
X("," , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_COMMA ) \
X(":" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_COLON ) \
X("." , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_DOT ) \
X("..." , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ELLIPSIS ) \
X("?" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_COND ) \
X(ident , SCC_TOK_SUBTYPE_IDENTIFIER, SCC_TOK_IDENT ) \
X(int , SCC_TOK_SUBTYPE_LITERAL, SCC_TOK_INT_LITERAL ) \
X(float , SCC_TOK_SUBTYPE_LITERAL, SCC_TOK_FLOAT_LITERAL ) \
X(char , SCC_TOK_SUBTYPE_LITERAL, SCC_TOK_CHAR_LITERAL ) \
X(string , SCC_TOK_SUBTYPE_LITERAL, SCC_TOK_STRING_LITERAL ) \
// END
/* clang-format on */
// 定义TokenType枚举
typedef enum scc_tok_type {
// must first becase the unknown token must be 0
#define X(str, subtype, tok) tok,
SCC_CTOK_TABLE
#undef X
#define X(name, type, tok) tok,
SCC_PPKEYWORD_TABLE
#undef X
#define X(name, subtype, tok, std) tok,
SCC_CKEYWORD_TABLE
#undef X
} scc_tok_type_t;
typedef enum scc_tok_subtype {
SCC_TOK_SUBTYPE_INVALID, // 错误占位
SCC_TOK_SUBTYPE_KEYWORD, // 关键字
SCC_TOK_SUBTYPE_OPERATOR, // 操作符
SCC_TOK_SUBTYPE_IDENTIFIER, // 标识符
SCC_TOK_SUBTYPE_LITERAL, // 字面量
SCC_TOK_SUBTYPE_EMPTYSPACE, // 空白
SCC_TOK_SUBTYPE_COMMENT, // 注释
SCC_TOK_SUBTYPE_EOF // 结束标记
} scc_tok_subtype_t;
/**
* @brief 词法分析结果
* @warning 需要手动释放lexeme否则会出现内存泄漏
*/
struct scc_lexer_token {
scc_tok_type_t type;
scc_cstring_t lexeme;
scc_pos_t loc;
};
scc_tok_subtype_t scc_get_tok_subtype(scc_tok_type_t type);
const char *scc_get_tok_name(scc_tok_type_t type);
static inline void scc_lexer_tok_drop(scc_lexer_tok_t *tok) {
tok->type = SCC_TOK_UNKNOWN;
tok->loc.col = 0;
tok->loc.line = 0;
tok->loc.name = null;
tok->loc.offset = 0;
scc_cstring_free(&tok->lexeme);
}
static inline cbool scc_lexer_tok_match(const scc_lexer_tok_t *tok,
scc_tok_type_t type) {
return tok->type == type;
}
// 深拷贝 token
static inline scc_lexer_tok_t scc_lexer_tok_copy(const scc_lexer_tok_t *src) {
scc_lexer_tok_t dst = *src;
dst.lexeme = scc_cstring_copy(&src->lexeme);
return dst;
}
// 移动 token源 token 不再拥有 lexeme
static inline void scc_lexer_tok_move(scc_lexer_tok_t *dst,
scc_lexer_tok_t *src) {
*dst = *src;
src->lexeme.data = null;
src->lexeme.size = 0;
src->lexeme.cap = 0;
}
#endif /* __SCC_LEXER_TOKEN_H__ */