修正了关键字表的注释,明确要求其必须按字典序排列以确保二分查找正确性。 在词法分析过程中,修复标识符解析时对 `cstring` 的使用问题,并调整 token 类型赋值顺序, 避免潜在的未定义行为。同时新增测试文件用于验证操作符、关键字及各类字面量的识别准确性, 并更新测试运行器的日志级别控制参数。
141 lines
7.6 KiB
C
141 lines
7.6 KiB
C
#ifndef __SMCC_CC_TOKEN_H__
|
|
#define __SMCC_CC_TOKEN_H__
|
|
|
|
#include <libcore.h>
|
|
|
|
typedef enum ckeyword {
|
|
CSTD_C89,
|
|
CSTD_C99,
|
|
CEXT_ASM,
|
|
} ckeyword_t;
|
|
|
|
/* clang-format off */
|
|
// WARNING: Using Binary Search To Fast Find Keyword
|
|
// 你必须确保其中是按照字典序排列
|
|
#define KEYWORD_TABLE \
|
|
X(asm , TK_BASIC_KEYWORD , TOKEN_ASM , CEXT_ASM) \
|
|
X(break , TK_BASIC_KEYWORD , TOKEN_BREAK , CSTD_C89) \
|
|
X(case , TK_BASIC_KEYWORD , TOKEN_CASE , CSTD_C89) \
|
|
X(char , TK_BASIC_KEYWORD , TOKEN_CHAR , CSTD_C89) \
|
|
X(const , TK_BASIC_KEYWORD , TOKEN_CONST , CSTD_C89) \
|
|
X(continue , TK_BASIC_KEYWORD , TOKEN_CONTINUE , CSTD_C89) \
|
|
X(default , TK_BASIC_KEYWORD , TOKEN_DEFAULT , CSTD_C89) \
|
|
X(do , TK_BASIC_KEYWORD , TOKEN_DO , CSTD_C89) \
|
|
X(double , TK_BASIC_KEYWORD , TOKEN_DOUBLE , CSTD_C89) \
|
|
X(else , TK_BASIC_KEYWORD , TOKEN_ELSE , CSTD_C89) \
|
|
X(enum , TK_BASIC_KEYWORD , TOKEN_ENUM , CSTD_C89) \
|
|
X(extern , TK_BASIC_KEYWORD , TOKEN_EXTERN , CSTD_C89) \
|
|
X(float , TK_BASIC_KEYWORD , TOKEN_FLOAT , CSTD_C89) \
|
|
X(for , TK_BASIC_KEYWORD , TOKEN_FOR , CSTD_C89) \
|
|
X(goto , TK_BASIC_KEYWORD , TOKEN_GOTO , CSTD_C89) \
|
|
X(if , TK_BASIC_KEYWORD , TOKEN_IF , CSTD_C89) \
|
|
X(inline , TK_BASIC_KEYWORD , TOKEN_INLINE , CSTD_C99) \
|
|
X(int , TK_BASIC_KEYWORD , TOKEN_INT , CSTD_C89) \
|
|
X(long , TK_BASIC_KEYWORD , TOKEN_LONG , CSTD_C89) \
|
|
X(register , TK_BASIC_KEYWORD , TOKEN_REGISTER , CSTD_C89) \
|
|
X(restrict , TK_BASIC_KEYWORD , TOKEN_RESTRICT , CSTD_C99) \
|
|
X(return , TK_BASIC_KEYWORD , TOKEN_RETURN , CSTD_C89) \
|
|
X(short , TK_BASIC_KEYWORD , TOKEN_SHORT , CSTD_C89) \
|
|
X(signed , TK_BASIC_KEYWORD , TOKEN_SIGNED , CSTD_C89) \
|
|
X(sizeof , TK_BASIC_KEYWORD , TOKEN_SIZEOF , CSTD_C89) \
|
|
X(static , TK_BASIC_KEYWORD , TOKEN_STATIC , CSTD_C89) \
|
|
X(struct , TK_BASIC_KEYWORD , TOKEN_STRUCT , CSTD_C89) \
|
|
X(switch , TK_BASIC_KEYWORD , TOKEN_SWITCH , CSTD_C89) \
|
|
X(typedef , TK_BASIC_KEYWORD , TOKEN_TYPEDEF , CSTD_C89) \
|
|
X(union , TK_BASIC_KEYWORD , TOKEN_UNION , CSTD_C89) \
|
|
X(unsigned , TK_BASIC_KEYWORD , TOKEN_UNSIGNED , CSTD_C89) \
|
|
X(void , TK_BASIC_KEYWORD , TOKEN_VOID , CSTD_C89) \
|
|
X(volatile , TK_BASIC_KEYWORD , TOKEN_VOLATILE , CSTD_C89) \
|
|
X(while , TK_BASIC_KEYWORD , TOKEN_WHILE , CSTD_C89) \
|
|
// KEYWORD_TABLE
|
|
|
|
#define TOKEN_TABLE \
|
|
X(unknown , TK_BASIC_INVALID, TOKEN_UNKNOWN ) \
|
|
X(EOF , TK_BASIC_EOF, TOKEN_EOF ) \
|
|
X(blank , TK_BASIC_EMPTYSPACE, TOKEN_BLANK ) \
|
|
X("==" , TK_BASIC_OPERATOR, TOKEN_EQ ) \
|
|
X("=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN ) \
|
|
X("++" , TK_BASIC_OPERATOR, TOKEN_ADD_ADD ) \
|
|
X("+=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_ADD ) \
|
|
X("+" , TK_BASIC_OPERATOR, TOKEN_ADD ) \
|
|
X("--" , TK_BASIC_OPERATOR, TOKEN_SUB_SUB ) \
|
|
X("-=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_SUB ) \
|
|
X("->" , TK_BASIC_OPERATOR, TOKEN_DEREF ) \
|
|
X("-" , TK_BASIC_OPERATOR, TOKEN_SUB ) \
|
|
X("*=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_MUL ) \
|
|
X("*" , TK_BASIC_OPERATOR, TOKEN_MUL ) \
|
|
X("/=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_DIV ) \
|
|
X("/" , TK_BASIC_OPERATOR, TOKEN_DIV ) \
|
|
X("//" , TK_BASIC_COMMENT , TOKEN_LINE_COMMENT ) \
|
|
X("/* */" , TK_BASIC_COMMENT , TOKEN_BLOCK_COMMENT ) \
|
|
X("%=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_MOD ) \
|
|
X("%" , TK_BASIC_OPERATOR, TOKEN_MOD ) \
|
|
X("&&" , TK_BASIC_OPERATOR, TOKEN_AND_AND ) \
|
|
X("&=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_AND ) \
|
|
X("&" , TK_BASIC_OPERATOR, TOKEN_AND ) \
|
|
X("||" , TK_BASIC_OPERATOR, TOKEN_OR_OR ) \
|
|
X("|=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_OR ) \
|
|
X("|" , TK_BASIC_OPERATOR, TOKEN_OR ) \
|
|
X("^=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_XOR ) \
|
|
X("^" , TK_BASIC_OPERATOR, TOKEN_XOR ) \
|
|
X("<<=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_L_SH ) \
|
|
X("<<" , TK_BASIC_OPERATOR, TOKEN_L_SH ) \
|
|
X("<=" , TK_BASIC_OPERATOR, TOKEN_LE ) \
|
|
X("<" , TK_BASIC_OPERATOR, TOKEN_LT ) \
|
|
X(">>=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_R_SH ) \
|
|
X(">>" , TK_BASIC_OPERATOR, TOKEN_R_SH ) \
|
|
X(">=" , TK_BASIC_OPERATOR, TOKEN_GE ) \
|
|
X(">" , TK_BASIC_OPERATOR, TOKEN_GT ) \
|
|
X("!" , TK_BASIC_OPERATOR, TOKEN_NOT ) \
|
|
X("!=" , TK_BASIC_OPERATOR, TOKEN_NEQ ) \
|
|
X("~" , TK_BASIC_OPERATOR, TOKEN_BIT_NOT ) \
|
|
X("[" , TK_BASIC_OPERATOR, TOKEN_L_BRACKET ) \
|
|
X("]" , TK_BASIC_OPERATOR, TOKEN_R_BRACKET ) \
|
|
X("(" , TK_BASIC_OPERATOR, TOKEN_L_PAREN ) \
|
|
X(")" , TK_BASIC_OPERATOR, TOKEN_R_PAREN ) \
|
|
X("{" , TK_BASIC_OPERATOR, TOKEN_L_BRACE ) \
|
|
X("}" , TK_BASIC_OPERATOR, TOKEN_R_BRACE ) \
|
|
X(";" , TK_BASIC_OPERATOR, TOKEN_SEMICOLON ) \
|
|
X("," , TK_BASIC_OPERATOR, TOKEN_COMMA ) \
|
|
X(":" , TK_BASIC_OPERATOR, TOKEN_COLON ) \
|
|
X("." , TK_BASIC_OPERATOR, TOKEN_DOT ) \
|
|
X("..." , TK_BASIC_OPERATOR, TOKEN_ELLIPSIS ) \
|
|
X("?" , TK_BASIC_OPERATOR, TOKEN_COND ) \
|
|
X(ident , TK_BASIC_IDENTIFIER, TOKEN_IDENT ) \
|
|
X(int_literal , TK_BASIC_LITERAL, TOKEN_INT_LITERAL ) \
|
|
X(float_literal , TK_BASIC_LITERAL, TOKEN_FLOAT_LITERAL ) \
|
|
X(char_literal , TK_BASIC_LITERAL, TOKEN_CHAR_LITERAL ) \
|
|
X(string_literal , TK_BASIC_LITERAL, TOKEN_STRING_LITERAL ) \
|
|
// END
|
|
/* clang-format on */
|
|
|
|
// 定义TokenType枚举
|
|
typedef enum cc_tktype {
|
|
// 处理普通token
|
|
#define X(str, subtype, tok) tok,
|
|
TOKEN_TABLE
|
|
#undef X
|
|
|
|
// 处理关键字(保持原有格式)
|
|
#define X(name, subtype, tok, std) tok,
|
|
KEYWORD_TABLE
|
|
#undef X
|
|
} token_type_t;
|
|
|
|
typedef enum token_subtype {
|
|
TK_BASIC_INVALID, // 错误占位
|
|
TK_BASIC_KEYWORD, // 关键字
|
|
TK_BASIC_OPERATOR, // 操作符
|
|
TK_BASIC_IDENTIFIER, // 标识符
|
|
TK_BASIC_LITERAL, // 字面量
|
|
|
|
TK_BASIC_EMPTYSPACE, // 空白
|
|
TK_BASIC_COMMENT, // 注释
|
|
TK_BASIC_EOF // 结束标记
|
|
} token_subtype_t;
|
|
|
|
token_subtype_t get_tok_subtype(token_type_t type);
|
|
const char *get_tok_name(token_type_t type);
|
|
|
|
#endif
|