Files
scc/libs/lexer/include/lexer_token.h
zzy 681a15cb44 feat(lexer): 添加预处理器关键字支持并优化词法分析器
添加了完整的C预处理器关键字表,包括define、include、ifdef等关键字,
用于支持预处理器功能。

- 新增SCC_PPKEYWORD_TABLE宏定义所有预处理器关键字
- 在token类型枚举中包含预处理关键字
- 重构词法分析器以正确识别预处理关键字
- 添加scc_lexer_tok_drop函数用于清理token资源

refactor(lexer): 重构词法分析器内部结构

- 修复keywords数组字段名从tok到tok_type
- 优化scc_lexer_get_valid_token使用while循环替代do-while
- 修改fill_token和fill_valid_token返回类型为cbool
- 调整lexer_to_ring参数语义更清晰

fix(sstream): 修正环形缓冲区填充函数返回类型

- 将fill_func返回类型从int改为cbool以保持一致性
- 更新SCC_RING宏文档说明fill回调函数返回值含义

docs(argparse): 重命名examples目录修复路径错误

- 将libs/argparse/example重命名为libs/argparse/examples保持一致性

test(lexer): 更新测试用例适配新的流接口

- 修改测试代码中的scc_sstream_ref_ring为scc_sstream_to_ring
- 确保测试用例与新的API保持兼容

style(lexer): 更新示例程序日志级别和实现方式

- 将调试日志改为信息日志
- 使用环形缓冲区实现示例程序的token获取
2026-02-16 22:27:09 +08:00

192 lines
10 KiB
C

#ifndef __SCC_LEXER_TOKEN_H__
#define __SCC_LEXER_TOKEN_H__
#include <scc_core.h>
#include <scc_pos.h>
typedef enum scc_cstd {
SCC_CSTD_C89,
SCC_CSTD_C99,
SCC_CEXT_SCC,
} scc_cstd_t;
/* clang-format off */
/// https://cppreference.cn/w/c/preprocessor
#define SCC_PPKEYWORD_TABLE \
X(define , SCC_CSTD_C99, SCC_PP_TOK_DEFINE ) \
X(elif , SCC_CSTD_C99, SCC_PP_TOK_ELIF ) \
X(elifdef , SCC_CSTD_C99, SCC_PP_TOK_ELIFDEF ) \
X(elifndef , SCC_CSTD_C99, SCC_PP_TOK_ELIFNDEF ) \
X(else , SCC_CSTD_C99, SCC_PP_TOK_ELSE ) \
X(embed , SCC_CSTD_C99, SCC_PP_TOK_EMBED ) \
X(endif , SCC_CSTD_C99, SCC_PP_TOK_ENDIF ) \
X(error , SCC_CSTD_C99, SCC_PP_TOK_ERROR ) \
X(if , SCC_CSTD_C99, SCC_PP_TOK_IF ) \
X(ifdef , SCC_CEXT_SCC, SCC_PP_TOK_IFDEF ) \
X(ifndef , SCC_CSTD_C99, SCC_PP_TOK_IFNDEF ) \
X(include , SCC_CSTD_C99, SCC_PP_TOK_INCLUDE ) \
X(line , SCC_CEXT_SCC, SCC_PP_TOK_LINE ) \
X(pragma , SCC_CSTD_C99, SCC_PP_TOK_PRAGMA ) \
X(undef , SCC_CEXT_SCC, SCC_PP_TOK_UNDEF ) \
X(warning , SCC_CSTD_C99, SCC_PP_TOK_WARNING ) \
// END
/* clang-format on */
/* clang-format off */
// WARNING: Using Binary Search To Fast Find Keyword
// 你必须确保其中是按照字典序排列
#define SCC_CKEYWORD_TABLE \
X(asm , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_ASM , SCC_CEXT_SCC) \
X(atomic , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_ATOMIC , SCC_CEXT_SCC) \
X(auto , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_AUTO , SCC_CEXT_SCC) \
X(bool , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_BOOL , SCC_CEXT_SCC) \
X(break , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_BREAK , SCC_CSTD_C89) \
X(case , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_CASE , SCC_CSTD_C89) \
X(char , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_CHAR , SCC_CSTD_C89) \
X(complex , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_COMPLEX , SCC_CEXT_SCC) \
X(const , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_CONST , SCC_CSTD_C89) \
X(continue , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_CONTINUE , SCC_CSTD_C89) \
X(default , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_DEFAULT , SCC_CSTD_C89) \
X(do , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_DO , SCC_CSTD_C89) \
X(double , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_DOUBLE , SCC_CSTD_C89) \
X(else , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_ELSE , SCC_CSTD_C89) \
X(enum , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_ENUM , SCC_CSTD_C89) \
X(extern , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_EXTERN , SCC_CSTD_C89) \
X(float , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_FLOAT , SCC_CSTD_C89) \
X(for , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_FOR , SCC_CSTD_C89) \
X(goto , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_GOTO , SCC_CSTD_C89) \
X(if , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_IF , SCC_CSTD_C89) \
X(inline , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_INLINE , SCC_CSTD_C99) \
X(int , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_INT , SCC_CSTD_C89) \
X(long , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_LONG , SCC_CSTD_C89) \
X(register , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_REGISTER , SCC_CSTD_C89) \
X(restrict , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_RESTRICT , SCC_CSTD_C99) \
X(return , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_RETURN , SCC_CSTD_C89) \
X(short , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_SHORT , SCC_CSTD_C89) \
X(signed , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_SIGNED , SCC_CSTD_C89) \
X(sizeof , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_SIZEOF , SCC_CSTD_C89) \
X(static , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_STATIC , SCC_CSTD_C89) \
X(struct , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_STRUCT , SCC_CSTD_C89) \
X(switch , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_SWITCH , SCC_CSTD_C89) \
X(typedef , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_TYPEDEF , SCC_CSTD_C89) \
X(union , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_UNION , SCC_CSTD_C89) \
X(unsigned , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_UNSIGNED , SCC_CSTD_C89) \
X(void , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_VOID , SCC_CSTD_C89) \
X(volatile , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_VOLATILE , SCC_CSTD_C89) \
X(while , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_WHILE , SCC_CSTD_C89) \
// KEYWORD_TABLE
#define SCC_CTOK_TABLE \
X(unknown , SCC_TOK_SUBTYPE_INVALID, SCC_TOK_UNKNOWN ) \
X(EOF , SCC_TOK_SUBTYPE_EOF, SCC_TOK_EOF ) \
X(blank , SCC_TOK_SUBTYPE_EMPTYSPACE, SCC_TOK_BLANK ) \
X(endline , SCC_TOK_SUBTYPE_EMPTYSPACE, SCC_TOK_ENDLINE ) \
X("#" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_SHARP ) \
X("==" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_EQ ) \
X("=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN ) \
X("++" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ADD_ADD ) \
X("+=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_ADD ) \
X("+" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ADD ) \
X("--" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_SUB_SUB ) \
X("-=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_SUB ) \
X("->" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_DEREF ) \
X("-" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_SUB ) \
X("*=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_MUL ) \
X("*" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_MUL ) \
X("/=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_DIV ) \
X("/" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_DIV ) \
X("//" , SCC_TOK_SUBTYPE_COMMENT , SCC_TOK_LINE_COMMENT ) \
X("/* */" , SCC_TOK_SUBTYPE_COMMENT , SCC_TOK_BLOCK_COMMENT ) \
X("%=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_MOD ) \
X("%" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_MOD ) \
X("&&" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_AND_AND ) \
X("&=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_AND ) \
X("&" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_AND ) \
X("||" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_OR_OR ) \
X("|=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_OR ) \
X("|" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_OR ) \
X("^=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_XOR ) \
X("^" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_XOR ) \
X("<<=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_L_SH ) \
X("<<" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_L_SH ) \
X("<=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_LE ) \
X("<" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_LT ) \
X(">>=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_R_SH ) \
X(">>" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_R_SH ) \
X(">=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_GE ) \
X(">" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_GT ) \
X("!" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_NOT ) \
X("!=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_NEQ ) \
X("~" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_BIT_NOT ) \
X("[" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_L_BRACKET ) \
X("]" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_R_BRACKET ) \
X("(" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_L_PAREN ) \
X(")" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_R_PAREN ) \
X("{" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_L_BRACE ) \
X("}" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_R_BRACE ) \
X(";" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_SEMICOLON ) \
X("," , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_COMMA ) \
X(":" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_COLON ) \
X("." , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_DOT ) \
X("..." , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ELLIPSIS ) \
X("?" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_COND ) \
X(ident , SCC_TOK_SUBTYPE_IDENTIFIER, SCC_TOK_IDENT ) \
X(int , SCC_TOK_SUBTYPE_LITERAL, SCC_TOK_INT_LITERAL ) \
X(float , SCC_TOK_SUBTYPE_LITERAL, SCC_TOK_FLOAT_LITERAL ) \
X(char , SCC_TOK_SUBTYPE_LITERAL, SCC_TOK_CHAR_LITERAL ) \
X(string , SCC_TOK_SUBTYPE_LITERAL, SCC_TOK_STRING_LITERAL ) \
// END
/* clang-format on */
// 定义TokenType枚举
typedef enum scc_tok_type {
#define X(str, subtype, tok) tok,
SCC_CTOK_TABLE
#undef X
#define X(name, type, tok) tok,
SCC_PPKEYWORD_TABLE
#undef X
#define X(name, subtype, tok, std) tok,
SCC_CKEYWORD_TABLE
#undef X
} scc_tok_type_t;
typedef enum scc_tok_subtype {
SCC_TOK_SUBTYPE_INVALID, // 错误占位
SCC_TOK_SUBTYPE_KEYWORD, // 关键字
SCC_TOK_SUBTYPE_OPERATOR, // 操作符
SCC_TOK_SUBTYPE_IDENTIFIER, // 标识符
SCC_TOK_SUBTYPE_LITERAL, // 字面量
SCC_TOK_SUBTYPE_EMPTYSPACE, // 空白
SCC_TOK_SUBTYPE_COMMENT, // 注释
SCC_TOK_SUBTYPE_EOF // 结束标记
} scc_tok_subtype_t;
scc_tok_subtype_t scc_get_tok_subtype(scc_tok_type_t type);
const char *scc_get_tok_name(scc_tok_type_t type);
/**
* @brief 词法分析结果
* @warning 需要手动释放lexeme否则会出现内存泄漏
*/
typedef struct scc_lexer_token {
scc_tok_type_t type;
scc_cstring_t lexeme;
scc_pos_t loc;
} scc_lexer_tok_t;
static inline void scc_lexer_tok_drop(scc_lexer_tok_t *tok) {
scc_cstring_free(&tok->lexeme);
}
static inline cbool scc_lexer_tok_match(const scc_lexer_tok_t *tok,
scc_tok_type_t type) {
return tok->type == type;
}
#endif /* __SCC_LEXER_TOKEN_H__ */