feat: rename core types to scc prefix for consistency
Updated type names from `core_*` to `scc_*` across lex_parser and stream modules to maintain naming consistency within the SCC codebase. This includes changes to function signatures and internal usage of types like `core_probe_stream_t`, `core_pos_t`, and `cstring_t` to their `scc_*` counterparts.
This commit is contained in:
@@ -3,16 +3,16 @@
|
||||
* @brief C语言词法分析器核心数据结构与接口
|
||||
*/
|
||||
|
||||
#ifndef __SMCC_CC_LEXER_H__
|
||||
#define __SMCC_CC_LEXER_H__
|
||||
#ifndef __SCC_LEXER_H__
|
||||
#define __SCC_LEXER_H__
|
||||
|
||||
#include "lexer_token.h"
|
||||
#include <libcore.h>
|
||||
|
||||
typedef struct lexer_token {
|
||||
token_type_t type;
|
||||
core_cvalue_t value;
|
||||
core_pos_t loc;
|
||||
scc_tok_type_t type;
|
||||
scc_cvalue_t value;
|
||||
scc_pos_t loc;
|
||||
} lexer_tok_t;
|
||||
|
||||
/**
|
||||
@@ -21,16 +21,16 @@ typedef struct lexer_token {
|
||||
* 封装词法分析所需的状态信息和缓冲区管理
|
||||
*/
|
||||
typedef struct cc_lexer {
|
||||
core_probe_stream_t *stream;
|
||||
core_pos_t pos;
|
||||
} smcc_lexer_t;
|
||||
scc_probe_stream_t *stream;
|
||||
scc_pos_t pos;
|
||||
} scc_lexer_t;
|
||||
|
||||
/**
|
||||
* @brief 初始化词法分析器
|
||||
* @param[out] lexer 要初始化的词法分析器实例
|
||||
* @param[in] stream 输入流对象指针
|
||||
*/
|
||||
void lexer_init(smcc_lexer_t *lexer, core_probe_stream_t *stream);
|
||||
void scc_lexer_init(scc_lexer_t *lexer, scc_probe_stream_t *stream);
|
||||
|
||||
/**
|
||||
* @brief 获取原始token
|
||||
@@ -39,7 +39,7 @@ void lexer_init(smcc_lexer_t *lexer, core_probe_stream_t *stream);
|
||||
*
|
||||
* 此函数会返回所有类型的token,包括空白符等无效token
|
||||
*/
|
||||
void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token);
|
||||
void scc_lexer_get_token(scc_lexer_t *lexer, lexer_tok_t *token);
|
||||
|
||||
/**
|
||||
* @brief 获取有效token
|
||||
@@ -48,6 +48,6 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token);
|
||||
*
|
||||
* 此函数会自动跳过空白符等无效token,返回对语法分析有意义的token
|
||||
*/
|
||||
void lexer_get_valid_token(smcc_lexer_t *lexer, lexer_tok_t *token);
|
||||
void scc_lexer_get_valid_token(scc_lexer_t *lexer, lexer_tok_t *token);
|
||||
|
||||
#endif
|
||||
#endif /* __SCC_LEXER_H__ */
|
||||
|
||||
@@ -3,138 +3,138 @@
|
||||
|
||||
#include <libcore.h>
|
||||
|
||||
typedef enum ckeyword {
|
||||
CSTD_C89,
|
||||
CSTD_C99,
|
||||
CEXT_ASM,
|
||||
} ckeyword_t;
|
||||
typedef enum scc_cstd {
|
||||
SCC_CSTD_C89,
|
||||
SCC_CSTD_C99,
|
||||
SCC_CEXT_ASM,
|
||||
} scc_cstd_t;
|
||||
|
||||
/* clang-format off */
|
||||
// WARNING: Using Binary Search To Fast Find Keyword
|
||||
// 你必须确保其中是按照字典序排列
|
||||
#define KEYWORD_TABLE \
|
||||
X(asm , TK_BASIC_KEYWORD , TOKEN_ASM , CEXT_ASM) \
|
||||
X(break , TK_BASIC_KEYWORD , TOKEN_BREAK , CSTD_C89) \
|
||||
X(case , TK_BASIC_KEYWORD , TOKEN_CASE , CSTD_C89) \
|
||||
X(char , TK_BASIC_KEYWORD , TOKEN_CHAR , CSTD_C89) \
|
||||
X(const , TK_BASIC_KEYWORD , TOKEN_CONST , CSTD_C89) \
|
||||
X(continue , TK_BASIC_KEYWORD , TOKEN_CONTINUE , CSTD_C89) \
|
||||
X(default , TK_BASIC_KEYWORD , TOKEN_DEFAULT , CSTD_C89) \
|
||||
X(do , TK_BASIC_KEYWORD , TOKEN_DO , CSTD_C89) \
|
||||
X(double , TK_BASIC_KEYWORD , TOKEN_DOUBLE , CSTD_C89) \
|
||||
X(else , TK_BASIC_KEYWORD , TOKEN_ELSE , CSTD_C89) \
|
||||
X(enum , TK_BASIC_KEYWORD , TOKEN_ENUM , CSTD_C89) \
|
||||
X(extern , TK_BASIC_KEYWORD , TOKEN_EXTERN , CSTD_C89) \
|
||||
X(float , TK_BASIC_KEYWORD , TOKEN_FLOAT , CSTD_C89) \
|
||||
X(for , TK_BASIC_KEYWORD , TOKEN_FOR , CSTD_C89) \
|
||||
X(goto , TK_BASIC_KEYWORD , TOKEN_GOTO , CSTD_C89) \
|
||||
X(if , TK_BASIC_KEYWORD , TOKEN_IF , CSTD_C89) \
|
||||
X(inline , TK_BASIC_KEYWORD , TOKEN_INLINE , CSTD_C99) \
|
||||
X(int , TK_BASIC_KEYWORD , TOKEN_INT , CSTD_C89) \
|
||||
X(long , TK_BASIC_KEYWORD , TOKEN_LONG , CSTD_C89) \
|
||||
X(register , TK_BASIC_KEYWORD , TOKEN_REGISTER , CSTD_C89) \
|
||||
X(restrict , TK_BASIC_KEYWORD , TOKEN_RESTRICT , CSTD_C99) \
|
||||
X(return , TK_BASIC_KEYWORD , TOKEN_RETURN , CSTD_C89) \
|
||||
X(short , TK_BASIC_KEYWORD , TOKEN_SHORT , CSTD_C89) \
|
||||
X(signed , TK_BASIC_KEYWORD , TOKEN_SIGNED , CSTD_C89) \
|
||||
X(sizeof , TK_BASIC_KEYWORD , TOKEN_SIZEOF , CSTD_C89) \
|
||||
X(static , TK_BASIC_KEYWORD , TOKEN_STATIC , CSTD_C89) \
|
||||
X(struct , TK_BASIC_KEYWORD , TOKEN_STRUCT , CSTD_C89) \
|
||||
X(switch , TK_BASIC_KEYWORD , TOKEN_SWITCH , CSTD_C89) \
|
||||
X(typedef , TK_BASIC_KEYWORD , TOKEN_TYPEDEF , CSTD_C89) \
|
||||
X(union , TK_BASIC_KEYWORD , TOKEN_UNION , CSTD_C89) \
|
||||
X(unsigned , TK_BASIC_KEYWORD , TOKEN_UNSIGNED , CSTD_C89) \
|
||||
X(void , TK_BASIC_KEYWORD , TOKEN_VOID , CSTD_C89) \
|
||||
X(volatile , TK_BASIC_KEYWORD , TOKEN_VOLATILE , CSTD_C89) \
|
||||
X(while , TK_BASIC_KEYWORD , TOKEN_WHILE , CSTD_C89) \
|
||||
#define SCC_CKEYWORD_TABLE \
|
||||
X(asm , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_ASM , SCC_CEXT_ASM) \
|
||||
X(break , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_BREAK , SCC_CSTD_C89) \
|
||||
X(case , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_CASE , SCC_CSTD_C89) \
|
||||
X(char , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_CHAR , SCC_CSTD_C89) \
|
||||
X(const , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_CONST , SCC_CSTD_C89) \
|
||||
X(continue , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_CONTINUE , SCC_CSTD_C89) \
|
||||
X(default , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_DEFAULT , SCC_CSTD_C89) \
|
||||
X(do , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_DO , SCC_CSTD_C89) \
|
||||
X(double , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_DOUBLE , SCC_CSTD_C89) \
|
||||
X(else , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_ELSE , SCC_CSTD_C89) \
|
||||
X(enum , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_ENUM , SCC_CSTD_C89) \
|
||||
X(extern , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_EXTERN , SCC_CSTD_C89) \
|
||||
X(float , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_FLOAT , SCC_CSTD_C89) \
|
||||
X(for , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_FOR , SCC_CSTD_C89) \
|
||||
X(goto , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_GOTO , SCC_CSTD_C89) \
|
||||
X(if , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_IF , SCC_CSTD_C89) \
|
||||
X(inline , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_INLINE , SCC_CSTD_C99) \
|
||||
X(int , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_INT , SCC_CSTD_C89) \
|
||||
X(long , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_LONG , SCC_CSTD_C89) \
|
||||
X(register , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_REGISTER , SCC_CSTD_C89) \
|
||||
X(restrict , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_RESTRICT , SCC_CSTD_C99) \
|
||||
X(return , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_RETURN , SCC_CSTD_C89) \
|
||||
X(short , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_SHORT , SCC_CSTD_C89) \
|
||||
X(signed , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_SIGNED , SCC_CSTD_C89) \
|
||||
X(sizeof , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_SIZEOF , SCC_CSTD_C89) \
|
||||
X(static , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_STATIC , SCC_CSTD_C89) \
|
||||
X(struct , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_STRUCT , SCC_CSTD_C89) \
|
||||
X(switch , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_SWITCH , SCC_CSTD_C89) \
|
||||
X(typedef , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_TYPEDEF , SCC_CSTD_C89) \
|
||||
X(union , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_UNION , SCC_CSTD_C89) \
|
||||
X(unsigned , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_UNSIGNED , SCC_CSTD_C89) \
|
||||
X(void , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_VOID , SCC_CSTD_C89) \
|
||||
X(volatile , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_VOLATILE , SCC_CSTD_C89) \
|
||||
X(while , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_WHILE , SCC_CSTD_C89) \
|
||||
// KEYWORD_TABLE
|
||||
|
||||
#define TOKEN_TABLE \
|
||||
X(unknown , TK_BASIC_INVALID, TOKEN_UNKNOWN ) \
|
||||
X(EOF , TK_BASIC_EOF, TOKEN_EOF ) \
|
||||
X(blank , TK_BASIC_EMPTYSPACE, TOKEN_BLANK ) \
|
||||
X("==" , TK_BASIC_OPERATOR, TOKEN_EQ ) \
|
||||
X("=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN ) \
|
||||
X("++" , TK_BASIC_OPERATOR, TOKEN_ADD_ADD ) \
|
||||
X("+=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_ADD ) \
|
||||
X("+" , TK_BASIC_OPERATOR, TOKEN_ADD ) \
|
||||
X("--" , TK_BASIC_OPERATOR, TOKEN_SUB_SUB ) \
|
||||
X("-=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_SUB ) \
|
||||
X("->" , TK_BASIC_OPERATOR, TOKEN_DEREF ) \
|
||||
X("-" , TK_BASIC_OPERATOR, TOKEN_SUB ) \
|
||||
X("*=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_MUL ) \
|
||||
X("*" , TK_BASIC_OPERATOR, TOKEN_MUL ) \
|
||||
X("/=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_DIV ) \
|
||||
X("/" , TK_BASIC_OPERATOR, TOKEN_DIV ) \
|
||||
X("//" , TK_BASIC_COMMENT , TOKEN_LINE_COMMENT ) \
|
||||
X("/* */" , TK_BASIC_COMMENT , TOKEN_BLOCK_COMMENT ) \
|
||||
X("%=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_MOD ) \
|
||||
X("%" , TK_BASIC_OPERATOR, TOKEN_MOD ) \
|
||||
X("&&" , TK_BASIC_OPERATOR, TOKEN_AND_AND ) \
|
||||
X("&=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_AND ) \
|
||||
X("&" , TK_BASIC_OPERATOR, TOKEN_AND ) \
|
||||
X("||" , TK_BASIC_OPERATOR, TOKEN_OR_OR ) \
|
||||
X("|=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_OR ) \
|
||||
X("|" , TK_BASIC_OPERATOR, TOKEN_OR ) \
|
||||
X("^=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_XOR ) \
|
||||
X("^" , TK_BASIC_OPERATOR, TOKEN_XOR ) \
|
||||
X("<<=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_L_SH ) \
|
||||
X("<<" , TK_BASIC_OPERATOR, TOKEN_L_SH ) \
|
||||
X("<=" , TK_BASIC_OPERATOR, TOKEN_LE ) \
|
||||
X("<" , TK_BASIC_OPERATOR, TOKEN_LT ) \
|
||||
X(">>=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_R_SH ) \
|
||||
X(">>" , TK_BASIC_OPERATOR, TOKEN_R_SH ) \
|
||||
X(">=" , TK_BASIC_OPERATOR, TOKEN_GE ) \
|
||||
X(">" , TK_BASIC_OPERATOR, TOKEN_GT ) \
|
||||
X("!" , TK_BASIC_OPERATOR, TOKEN_NOT ) \
|
||||
X("!=" , TK_BASIC_OPERATOR, TOKEN_NEQ ) \
|
||||
X("~" , TK_BASIC_OPERATOR, TOKEN_BIT_NOT ) \
|
||||
X("[" , TK_BASIC_OPERATOR, TOKEN_L_BRACKET ) \
|
||||
X("]" , TK_BASIC_OPERATOR, TOKEN_R_BRACKET ) \
|
||||
X("(" , TK_BASIC_OPERATOR, TOKEN_L_PAREN ) \
|
||||
X(")" , TK_BASIC_OPERATOR, TOKEN_R_PAREN ) \
|
||||
X("{" , TK_BASIC_OPERATOR, TOKEN_L_BRACE ) \
|
||||
X("}" , TK_BASIC_OPERATOR, TOKEN_R_BRACE ) \
|
||||
X(";" , TK_BASIC_OPERATOR, TOKEN_SEMICOLON ) \
|
||||
X("," , TK_BASIC_OPERATOR, TOKEN_COMMA ) \
|
||||
X(":" , TK_BASIC_OPERATOR, TOKEN_COLON ) \
|
||||
X("." , TK_BASIC_OPERATOR, TOKEN_DOT ) \
|
||||
X("..." , TK_BASIC_OPERATOR, TOKEN_ELLIPSIS ) \
|
||||
X("?" , TK_BASIC_OPERATOR, TOKEN_COND ) \
|
||||
X(ident , TK_BASIC_IDENTIFIER, TOKEN_IDENT ) \
|
||||
X(int_literal , TK_BASIC_LITERAL, TOKEN_INT_LITERAL ) \
|
||||
X(float_literal , TK_BASIC_LITERAL, TOKEN_FLOAT_LITERAL ) \
|
||||
X(char_literal , TK_BASIC_LITERAL, TOKEN_CHAR_LITERAL ) \
|
||||
X(string_literal , TK_BASIC_LITERAL, TOKEN_STRING_LITERAL ) \
|
||||
#define SCC_CTOK_TABLE \
|
||||
X(unknown , SCC_TOK_SUBTYPE_INVALID, SCC_TOK_UNKNOWN ) \
|
||||
X(EOF , SCC_TOK_SUBTYPE_EOF, SCC_TOK_EOF ) \
|
||||
X(blank , SCC_TOK_SUBTYPE_EMPTYSPACE, SCC_TOK_BLANK ) \
|
||||
X("==" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_EQ ) \
|
||||
X("=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN ) \
|
||||
X("++" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ADD_ADD ) \
|
||||
X("+=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_ADD ) \
|
||||
X("+" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ADD ) \
|
||||
X("--" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_SUB_SUB ) \
|
||||
X("-=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_SUB ) \
|
||||
X("->" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_DEREF ) \
|
||||
X("-" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_SUB ) \
|
||||
X("*=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_MUL ) \
|
||||
X("*" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_MUL ) \
|
||||
X("/=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_DIV ) \
|
||||
X("/" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_DIV ) \
|
||||
X("//" , SCC_TOK_SUBTYPE_COMMENT , SCC_TOK_LINE_COMMENT ) \
|
||||
X("/* */" , SCC_TOK_SUBTYPE_COMMENT , SCC_TOK_BLOCK_COMMENT ) \
|
||||
X("%=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_MOD ) \
|
||||
X("%" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_MOD ) \
|
||||
X("&&" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_AND_AND ) \
|
||||
X("&=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_AND ) \
|
||||
X("&" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_AND ) \
|
||||
X("||" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_OR_OR ) \
|
||||
X("|=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_OR ) \
|
||||
X("|" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_OR ) \
|
||||
X("^=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_XOR ) \
|
||||
X("^" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_XOR ) \
|
||||
X("<<=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_L_SH ) \
|
||||
X("<<" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_L_SH ) \
|
||||
X("<=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_LE ) \
|
||||
X("<" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_LT ) \
|
||||
X(">>=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_R_SH ) \
|
||||
X(">>" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_R_SH ) \
|
||||
X(">=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_GE ) \
|
||||
X(">" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_GT ) \
|
||||
X("!" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_NOT ) \
|
||||
X("!=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_NEQ ) \
|
||||
X("~" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_BIT_NOT ) \
|
||||
X("[" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_L_BRACKET ) \
|
||||
X("]" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_R_BRACKET ) \
|
||||
X("(" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_L_PAREN ) \
|
||||
X(")" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_R_PAREN ) \
|
||||
X("{" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_L_BRACE ) \
|
||||
X("}" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_R_BRACE ) \
|
||||
X(";" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_SEMICOLON ) \
|
||||
X("," , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_COMMA ) \
|
||||
X(":" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_COLON ) \
|
||||
X("." , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_DOT ) \
|
||||
X("..." , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ELLIPSIS ) \
|
||||
X("?" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_COND ) \
|
||||
X(ident , SCC_TOK_SUBTYPE_IDENTIFIER, SCC_TOK_IDENT ) \
|
||||
X(int_literal , SCC_TOK_SUBTYPE_LITERAL, SCC_TOK_INT_LITERAL ) \
|
||||
X(float_literal , SCC_TOK_SUBTYPE_LITERAL, SCC_TOK_FLOAT_LITERAL ) \
|
||||
X(char_literal , SCC_TOK_SUBTYPE_LITERAL, SCC_TOK_CHAR_LITERAL ) \
|
||||
X(string_literal , SCC_TOK_SUBTYPE_LITERAL, SCC_TOK_STRING_LITERAL ) \
|
||||
// END
|
||||
/* clang-format on */
|
||||
|
||||
// 定义TokenType枚举
|
||||
typedef enum cc_tktype {
|
||||
typedef enum scc_tok_type {
|
||||
// 处理普通token
|
||||
#define X(str, subtype, tok) tok,
|
||||
TOKEN_TABLE
|
||||
SCC_CTOK_TABLE
|
||||
#undef X
|
||||
|
||||
// 处理关键字(保持原有格式)
|
||||
#define X(name, subtype, tok, std) tok,
|
||||
KEYWORD_TABLE
|
||||
SCC_CKEYWORD_TABLE
|
||||
#undef X
|
||||
} token_type_t;
|
||||
} scc_tok_type_t;
|
||||
|
||||
typedef enum token_subtype {
|
||||
TK_BASIC_INVALID, // 错误占位
|
||||
TK_BASIC_KEYWORD, // 关键字
|
||||
TK_BASIC_OPERATOR, // 操作符
|
||||
TK_BASIC_IDENTIFIER, // 标识符
|
||||
TK_BASIC_LITERAL, // 字面量
|
||||
typedef enum scc_tok_subtype {
|
||||
SCC_TOK_SUBTYPE_INVALID, // 错误占位
|
||||
SCC_TOK_SUBTYPE_KEYWORD, // 关键字
|
||||
SCC_TOK_SUBTYPE_OPERATOR, // 操作符
|
||||
SCC_TOK_SUBTYPE_IDENTIFIER, // 标识符
|
||||
SCC_TOK_SUBTYPE_LITERAL, // 字面量
|
||||
|
||||
TK_BASIC_EMPTYSPACE, // 空白
|
||||
TK_BASIC_COMMENT, // 注释
|
||||
TK_BASIC_EOF // 结束标记
|
||||
} token_subtype_t;
|
||||
SCC_TOK_SUBTYPE_EMPTYSPACE, // 空白
|
||||
SCC_TOK_SUBTYPE_COMMENT, // 注释
|
||||
SCC_TOK_SUBTYPE_EOF // 结束标记
|
||||
} scc_tok_subtype_t;
|
||||
|
||||
token_subtype_t get_tok_subtype(token_type_t type);
|
||||
const char *get_tok_name(token_type_t type);
|
||||
scc_tok_subtype_t scc_get_tok_subtype(scc_tok_type_t type);
|
||||
const char *scc_get_tok_name(scc_tok_type_t type);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -32,11 +32,11 @@ David Hanson / drh@drhanson.net
|
||||
|
||||
static const struct {
|
||||
const char *name;
|
||||
ckeyword_t std_type;
|
||||
token_type_t tok;
|
||||
scc_cstd_t std_type;
|
||||
scc_tok_type_t tok;
|
||||
} keywords[] = {
|
||||
#define X(name, subtype, tok, std_type, ...) {#name, std_type, tok},
|
||||
KEYWORD_TABLE
|
||||
SCC_CKEYWORD_TABLE
|
||||
#undef X
|
||||
};
|
||||
|
||||
@@ -75,23 +75,23 @@ static inline int keyword_cmp(const char *name, int len) {
|
||||
return -1; // Not a keyword.
|
||||
}
|
||||
|
||||
void lexer_init(smcc_lexer_t *lexer, core_probe_stream_t *stream) {
|
||||
void scc_lexer_init(scc_lexer_t *lexer, scc_probe_stream_t *stream) {
|
||||
lexer->stream = stream;
|
||||
lexer->pos = core_pos_init();
|
||||
lexer->pos = scc_pos_init();
|
||||
// FIXME
|
||||
lexer->pos.name = cstring_from_cstr(cstring_as_cstr(&stream->name));
|
||||
lexer->pos.name = scc_cstring_from_cstr(scc_cstring_as_cstr(&stream->name));
|
||||
}
|
||||
|
||||
#define set_err_token(token) ((token)->type = TOKEN_UNKNOWN)
|
||||
#define set_err_token(token) ((token)->type = SCC_TOK_UNKNOWN)
|
||||
|
||||
static void parse_line(smcc_lexer_t *lexer, lexer_tok_t *token) {
|
||||
static void parse_line(scc_lexer_t *lexer, lexer_tok_t *token) {
|
||||
token->loc = lexer->pos;
|
||||
core_probe_stream_t *stream = lexer->stream;
|
||||
core_probe_stream_reset(stream);
|
||||
int ch = core_probe_stream_next(stream);
|
||||
scc_probe_stream_t *stream = lexer->stream;
|
||||
scc_probe_stream_reset(stream);
|
||||
int ch = scc_probe_stream_next(stream);
|
||||
|
||||
usize n;
|
||||
cstring_t str = cstring_new();
|
||||
scc_cstring_t str = scc_cstring_new();
|
||||
|
||||
if (ch == core_stream_eof) {
|
||||
LEX_WARN("Unexpected EOF at begin");
|
||||
@@ -104,7 +104,7 @@ static void parse_line(smcc_lexer_t *lexer, lexer_tok_t *token) {
|
||||
const char line[] = "line";
|
||||
|
||||
for (int i = 0; i < (int)sizeof(line); i++) {
|
||||
ch = core_probe_stream_consume(stream);
|
||||
ch = scc_probe_stream_consume(stream);
|
||||
core_pos_next(&lexer->pos);
|
||||
if (ch != line[i]) {
|
||||
LEX_WARN("Maroc does not support in lexer rather in preprocessor, "
|
||||
@@ -118,12 +118,12 @@ static void parse_line(smcc_lexer_t *lexer, lexer_tok_t *token) {
|
||||
goto SKIP_LINE;
|
||||
}
|
||||
|
||||
if (core_probe_stream_consume(stream) != ' ') {
|
||||
if (scc_probe_stream_consume(stream) != ' ') {
|
||||
lex_parse_skip_line(lexer->stream, &lexer->pos);
|
||||
token->loc.line = token->value.n;
|
||||
}
|
||||
|
||||
if (core_probe_stream_next(stream) != '"') {
|
||||
if (scc_probe_stream_next(stream) != '"') {
|
||||
LEX_ERROR("Invalid `#` line");
|
||||
goto SKIP_LINE;
|
||||
}
|
||||
@@ -135,259 +135,259 @@ static void parse_line(smcc_lexer_t *lexer, lexer_tok_t *token) {
|
||||
lex_parse_skip_line(lexer->stream, &lexer->pos);
|
||||
token->loc.line = n;
|
||||
// FIXME memory leak
|
||||
token->loc.name = cstring_from_cstr(cstring_as_cstr(&str));
|
||||
cstring_free(&str);
|
||||
token->loc.name = scc_cstring_from_cstr(scc_cstring_as_cstr(&str));
|
||||
scc_cstring_free(&str);
|
||||
return;
|
||||
SKIP_LINE:
|
||||
lex_parse_skip_line(lexer->stream, &lexer->pos);
|
||||
ERR:
|
||||
set_err_token(token);
|
||||
cstring_free(&str);
|
||||
scc_cstring_free(&str);
|
||||
}
|
||||
|
||||
// /zh/c/language/operator_arithmetic.html
|
||||
void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
|
||||
void scc_lexer_get_token(scc_lexer_t *lexer, lexer_tok_t *token) {
|
||||
token->loc = lexer->pos;
|
||||
token->type = TOKEN_UNKNOWN;
|
||||
core_probe_stream_t *stream = lexer->stream;
|
||||
token->type = SCC_TOK_UNKNOWN;
|
||||
scc_probe_stream_t *stream = lexer->stream;
|
||||
|
||||
core_probe_stream_reset(stream);
|
||||
token_type_t type = TOKEN_UNKNOWN;
|
||||
int ch = core_probe_stream_next(stream);
|
||||
scc_probe_stream_reset(stream);
|
||||
scc_tok_type_t type = SCC_TOK_UNKNOWN;
|
||||
int ch = scc_probe_stream_next(stream);
|
||||
|
||||
// once step
|
||||
switch (ch) {
|
||||
case '=':
|
||||
switch (core_probe_stream_next(stream)) {
|
||||
switch (scc_probe_stream_next(stream)) {
|
||||
case '=':
|
||||
type = TOKEN_EQ;
|
||||
type = SCC_TOK_EQ;
|
||||
goto double_char;
|
||||
default:
|
||||
core_probe_stream_reset(stream), type = TOKEN_ASSIGN;
|
||||
scc_probe_stream_reset(stream), type = SCC_TOK_ASSIGN;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case '+':
|
||||
switch (core_probe_stream_next(stream)) {
|
||||
switch (scc_probe_stream_next(stream)) {
|
||||
case '+':
|
||||
type = TOKEN_ADD_ADD;
|
||||
type = SCC_TOK_ADD_ADD;
|
||||
goto double_char;
|
||||
case '=':
|
||||
type = TOKEN_ASSIGN_ADD;
|
||||
type = SCC_TOK_ASSIGN_ADD;
|
||||
goto double_char;
|
||||
default:
|
||||
core_probe_stream_reset(stream), type = TOKEN_ADD;
|
||||
scc_probe_stream_reset(stream), type = SCC_TOK_ADD;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case '-':
|
||||
switch (core_probe_stream_next(stream)) {
|
||||
switch (scc_probe_stream_next(stream)) {
|
||||
case '-':
|
||||
type = TOKEN_SUB_SUB;
|
||||
type = SCC_TOK_SUB_SUB;
|
||||
goto double_char;
|
||||
case '=':
|
||||
type = TOKEN_ASSIGN_SUB;
|
||||
type = SCC_TOK_ASSIGN_SUB;
|
||||
goto double_char;
|
||||
case '>':
|
||||
type = TOKEN_DEREF;
|
||||
type = SCC_TOK_DEREF;
|
||||
goto double_char;
|
||||
default:
|
||||
core_probe_stream_reset(stream), type = TOKEN_SUB;
|
||||
scc_probe_stream_reset(stream), type = SCC_TOK_SUB;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case '*':
|
||||
switch (core_probe_stream_next(stream)) {
|
||||
switch (scc_probe_stream_next(stream)) {
|
||||
case '=':
|
||||
type = TOKEN_ASSIGN_MUL;
|
||||
type = SCC_TOK_ASSIGN_MUL;
|
||||
goto double_char;
|
||||
default:
|
||||
core_probe_stream_reset(stream), type = TOKEN_MUL;
|
||||
scc_probe_stream_reset(stream), type = SCC_TOK_MUL;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case '/':
|
||||
switch (core_probe_stream_next(stream)) {
|
||||
switch (scc_probe_stream_next(stream)) {
|
||||
case '=':
|
||||
type = TOKEN_ASSIGN_DIV;
|
||||
type = SCC_TOK_ASSIGN_DIV;
|
||||
goto double_char;
|
||||
case '/':
|
||||
lex_parse_skip_line(lexer->stream, &lexer->pos);
|
||||
token->type = TOKEN_LINE_COMMENT;
|
||||
token->type = SCC_TOK_LINE_COMMENT;
|
||||
goto END;
|
||||
case '*':
|
||||
lex_parse_skip_block_comment(lexer->stream, &lexer->pos);
|
||||
token->type = TOKEN_BLOCK_COMMENT;
|
||||
token->type = SCC_TOK_BLOCK_COMMENT;
|
||||
goto END;
|
||||
default:
|
||||
core_probe_stream_reset(stream), type = TOKEN_DIV;
|
||||
scc_probe_stream_reset(stream), type = SCC_TOK_DIV;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case '%':
|
||||
switch (core_probe_stream_next(stream)) {
|
||||
switch (scc_probe_stream_next(stream)) {
|
||||
case '=':
|
||||
type = TOKEN_ASSIGN_MOD;
|
||||
type = SCC_TOK_ASSIGN_MOD;
|
||||
goto double_char;
|
||||
default:
|
||||
core_probe_stream_reset(stream), type = TOKEN_MOD;
|
||||
scc_probe_stream_reset(stream), type = SCC_TOK_MOD;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case '&':
|
||||
switch (core_probe_stream_next(stream)) {
|
||||
switch (scc_probe_stream_next(stream)) {
|
||||
case '&':
|
||||
type = TOKEN_AND_AND;
|
||||
type = SCC_TOK_AND_AND;
|
||||
goto double_char;
|
||||
case '=':
|
||||
type = TOKEN_ASSIGN_AND;
|
||||
type = SCC_TOK_ASSIGN_AND;
|
||||
goto double_char;
|
||||
default:
|
||||
core_probe_stream_reset(stream), type = TOKEN_AND;
|
||||
scc_probe_stream_reset(stream), type = SCC_TOK_AND;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case '|':
|
||||
switch (core_probe_stream_next(stream)) {
|
||||
switch (scc_probe_stream_next(stream)) {
|
||||
case '|':
|
||||
type = TOKEN_OR_OR;
|
||||
type = SCC_TOK_OR_OR;
|
||||
goto double_char;
|
||||
case '=':
|
||||
type = TOKEN_ASSIGN_OR;
|
||||
type = SCC_TOK_ASSIGN_OR;
|
||||
goto double_char;
|
||||
default:
|
||||
core_probe_stream_reset(stream), type = TOKEN_OR;
|
||||
scc_probe_stream_reset(stream), type = SCC_TOK_OR;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case '^':
|
||||
switch (core_probe_stream_next(stream)) {
|
||||
switch (scc_probe_stream_next(stream)) {
|
||||
case '=':
|
||||
type = TOKEN_ASSIGN_XOR;
|
||||
type = SCC_TOK_ASSIGN_XOR;
|
||||
goto double_char;
|
||||
default:
|
||||
core_probe_stream_reset(stream), type = TOKEN_XOR;
|
||||
scc_probe_stream_reset(stream), type = SCC_TOK_XOR;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case '<':
|
||||
switch (core_probe_stream_next(stream)) {
|
||||
switch (scc_probe_stream_next(stream)) {
|
||||
case '=':
|
||||
type = TOKEN_LE;
|
||||
type = SCC_TOK_LE;
|
||||
goto double_char;
|
||||
case '<': {
|
||||
if (core_probe_stream_next(stream) == '=') {
|
||||
type = TOKEN_ASSIGN_L_SH;
|
||||
if (scc_probe_stream_next(stream) == '=') {
|
||||
type = SCC_TOK_ASSIGN_L_SH;
|
||||
goto triple_char;
|
||||
} else {
|
||||
type = TOKEN_L_SH;
|
||||
type = SCC_TOK_L_SH;
|
||||
goto double_char;
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
core_probe_stream_reset(stream), type = TOKEN_LT;
|
||||
scc_probe_stream_reset(stream), type = SCC_TOK_LT;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case '>':
|
||||
switch (core_probe_stream_next(stream)) {
|
||||
switch (scc_probe_stream_next(stream)) {
|
||||
case '=':
|
||||
type = TOKEN_GE;
|
||||
type = SCC_TOK_GE;
|
||||
goto double_char;
|
||||
case '>': {
|
||||
if (core_probe_stream_next(stream) == '=') {
|
||||
type = TOKEN_ASSIGN_R_SH;
|
||||
if (scc_probe_stream_next(stream) == '=') {
|
||||
type = SCC_TOK_ASSIGN_R_SH;
|
||||
goto triple_char;
|
||||
} else {
|
||||
type = TOKEN_R_SH;
|
||||
type = SCC_TOK_R_SH;
|
||||
goto double_char;
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
core_probe_stream_reset(stream), type = TOKEN_GT;
|
||||
scc_probe_stream_reset(stream), type = SCC_TOK_GT;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case '~':
|
||||
type = TOKEN_BIT_NOT;
|
||||
type = SCC_TOK_BIT_NOT;
|
||||
break;
|
||||
case '!':
|
||||
switch (core_probe_stream_next(stream)) {
|
||||
switch (scc_probe_stream_next(stream)) {
|
||||
case '=':
|
||||
type = TOKEN_NEQ;
|
||||
type = SCC_TOK_NEQ;
|
||||
goto double_char;
|
||||
default:
|
||||
core_probe_stream_reset(stream), type = TOKEN_NOT;
|
||||
scc_probe_stream_reset(stream), type = SCC_TOK_NOT;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case '[':
|
||||
type = TOKEN_L_BRACKET;
|
||||
type = SCC_TOK_L_BRACKET;
|
||||
break;
|
||||
case ']':
|
||||
type = TOKEN_R_BRACKET;
|
||||
type = SCC_TOK_R_BRACKET;
|
||||
break;
|
||||
case '(':
|
||||
type = TOKEN_L_PAREN;
|
||||
type = SCC_TOK_L_PAREN;
|
||||
break;
|
||||
case ')':
|
||||
type = TOKEN_R_PAREN;
|
||||
type = SCC_TOK_R_PAREN;
|
||||
break;
|
||||
case '{':
|
||||
type = TOKEN_L_BRACE;
|
||||
type = SCC_TOK_L_BRACE;
|
||||
break;
|
||||
case '}':
|
||||
type = TOKEN_R_BRACE;
|
||||
type = SCC_TOK_R_BRACE;
|
||||
break;
|
||||
case ';':
|
||||
type = TOKEN_SEMICOLON;
|
||||
type = SCC_TOK_SEMICOLON;
|
||||
break;
|
||||
case ',':
|
||||
type = TOKEN_COMMA;
|
||||
type = SCC_TOK_COMMA;
|
||||
break;
|
||||
case ':':
|
||||
type = TOKEN_COLON;
|
||||
type = SCC_TOK_COLON;
|
||||
break;
|
||||
case '.':
|
||||
if (core_probe_stream_next(stream) == '.' &&
|
||||
core_probe_stream_next(stream) == '.') {
|
||||
type = TOKEN_ELLIPSIS;
|
||||
if (scc_probe_stream_next(stream) == '.' &&
|
||||
scc_probe_stream_next(stream) == '.') {
|
||||
type = SCC_TOK_ELLIPSIS;
|
||||
goto triple_char;
|
||||
}
|
||||
type = TOKEN_DOT;
|
||||
type = SCC_TOK_DOT;
|
||||
break;
|
||||
case '?':
|
||||
type = TOKEN_COND;
|
||||
type = SCC_TOK_COND;
|
||||
break;
|
||||
case '\v':
|
||||
case '\f':
|
||||
case ' ':
|
||||
case '\t':
|
||||
type = TOKEN_BLANK;
|
||||
type = SCC_TOK_BLANK;
|
||||
break;
|
||||
case '\r':
|
||||
case '\n':
|
||||
lex_parse_skip_endline(lexer->stream, &lexer->pos);
|
||||
token->type = TOKEN_BLANK;
|
||||
token->type = SCC_TOK_BLANK;
|
||||
goto END;
|
||||
case '#':
|
||||
parse_line(lexer, token);
|
||||
token->type = TOKEN_BLANK;
|
||||
token->type = SCC_TOK_BLANK;
|
||||
goto END;
|
||||
case '\0':
|
||||
case core_stream_eof:
|
||||
// EOF
|
||||
type = TOKEN_EOF;
|
||||
type = SCC_TOK_EOF;
|
||||
break;
|
||||
case '\'': {
|
||||
token->loc = lexer->pos;
|
||||
token->type = TOKEN_CHAR_LITERAL;
|
||||
token->type = SCC_TOK_CHAR_LITERAL;
|
||||
int ch = lex_parse_char(lexer->stream, &lexer->pos);
|
||||
if (ch == core_stream_eof) {
|
||||
LEX_ERROR("Unexpected character literal");
|
||||
token->type = TOKEN_UNKNOWN;
|
||||
token->type = SCC_TOK_UNKNOWN;
|
||||
} else {
|
||||
token->value.ch = ch;
|
||||
}
|
||||
@@ -395,14 +395,14 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
|
||||
}
|
||||
case '"': {
|
||||
token->loc = lexer->pos;
|
||||
token->type = TOKEN_STRING_LITERAL;
|
||||
cstring_t output = cstring_new();
|
||||
token->type = SCC_TOK_STRING_LITERAL;
|
||||
scc_cstring_t output = scc_cstring_new();
|
||||
if (lex_parse_string(lexer->stream, &lexer->pos, &output) == true) {
|
||||
token->value.cstr.data = cstring_as_cstr(&output);
|
||||
token->value.cstr.len = cstring_len(&output);
|
||||
token->value.cstr.data = scc_cstring_as_cstr(&output);
|
||||
token->value.cstr.len = scc_cstring_len(&output);
|
||||
} else {
|
||||
LEX_ERROR("Unexpected string literal");
|
||||
token->type = TOKEN_UNKNOWN;
|
||||
token->type = SCC_TOK_UNKNOWN;
|
||||
}
|
||||
|
||||
goto END;
|
||||
@@ -412,13 +412,13 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
|
||||
case '5': case '6': case '7': case '8': case '9':
|
||||
/* clang-format on */
|
||||
token->loc = lexer->pos;
|
||||
token->type = TOKEN_INT_LITERAL;
|
||||
token->type = SCC_TOK_INT_LITERAL;
|
||||
usize output;
|
||||
if (lex_parse_number(lexer->stream, &lexer->pos, &output) == true) {
|
||||
token->value.n = output;
|
||||
} else {
|
||||
LEX_ERROR("Unexpected number literal");
|
||||
token->type = TOKEN_UNKNOWN;
|
||||
token->type = SCC_TOK_UNKNOWN;
|
||||
}
|
||||
goto END;
|
||||
/* clang-format off */
|
||||
@@ -431,17 +431,17 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
|
||||
case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
|
||||
case 'V': case 'W': case 'X': case 'Y': case 'Z': case '_':
|
||||
/* clang-format on */
|
||||
cstring_t str = cstring_new();
|
||||
scc_cstring_t str = scc_cstring_new();
|
||||
cbool ret = lex_parse_identifier(lexer->stream, &lexer->pos, &str);
|
||||
Assert(ret == true);
|
||||
|
||||
int res = keyword_cmp(cstring_as_cstr(&str), cstring_len(&str));
|
||||
int res = keyword_cmp(scc_cstring_as_cstr(&str), scc_cstring_len(&str));
|
||||
if (res == -1) {
|
||||
token->value.cstr.data = (char *)cstring_as_cstr(&str);
|
||||
token->value.cstr.len = cstring_len(&str);
|
||||
type = TOKEN_IDENT;
|
||||
token->value.cstr.data = (char *)scc_cstring_as_cstr(&str);
|
||||
token->value.cstr.len = scc_cstring_len(&str);
|
||||
type = SCC_TOK_IDENT;
|
||||
} else {
|
||||
cstring_free(&str);
|
||||
scc_cstring_free(&str);
|
||||
type = keywords[res].tok;
|
||||
}
|
||||
token->type = type;
|
||||
@@ -452,29 +452,31 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
|
||||
}
|
||||
goto once_char;
|
||||
triple_char:
|
||||
core_probe_stream_consume(stream);
|
||||
scc_probe_stream_consume(stream);
|
||||
core_pos_next(&lexer->pos);
|
||||
double_char:
|
||||
core_probe_stream_consume(stream);
|
||||
scc_probe_stream_consume(stream);
|
||||
core_pos_next(&lexer->pos);
|
||||
once_char:
|
||||
core_probe_stream_consume(stream);
|
||||
scc_probe_stream_consume(stream);
|
||||
core_pos_next(&lexer->pos);
|
||||
token->type = type;
|
||||
END:
|
||||
LEX_DEBUG("get token `%s` in %s:%d:%d", get_tok_name(token->type),
|
||||
LEX_DEBUG("get token `%s` in %s:%d:%d", scc_get_tok_name(token->type),
|
||||
token->loc.name, token->loc.line, token->loc.column);
|
||||
}
|
||||
|
||||
// lexer_get_token maybe got invalid (with parser)
|
||||
void lexer_get_valid_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
|
||||
token_subtype_t type;
|
||||
// scc_lexer_get_token maybe got invalid (with parser)
|
||||
void scc_lexer_get_valid_token(scc_lexer_t *lexer, lexer_tok_t *token) {
|
||||
scc_tok_subtype_t type;
|
||||
do {
|
||||
lexer_get_token(lexer, token);
|
||||
type = get_tok_subtype(token->type);
|
||||
AssertFmt(type != TK_BASIC_INVALID, "Invalid token: `%s` at %s:%d:%d",
|
||||
get_tok_name(token->type), token->loc.name, token->loc.line,
|
||||
token->loc.col);
|
||||
Assert(type != TK_BASIC_INVALID);
|
||||
} while (type == TK_BASIC_EMPTYSPACE || type == TK_BASIC_COMMENT);
|
||||
scc_lexer_get_token(lexer, token);
|
||||
type = scc_get_tok_subtype(token->type);
|
||||
AssertFmt(type != SCC_TOK_SUBTYPE_INVALID,
|
||||
"Invalid token: `%s` at %s:%d:%d",
|
||||
scc_get_tok_name(token->type), token->loc.name,
|
||||
token->loc.line, token->loc.col);
|
||||
Assert(type != SCC_TOK_SUBTYPE_INVALID);
|
||||
} while (type == SCC_TOK_SUBTYPE_EMPTYSPACE ||
|
||||
type == SCC_TOK_SUBTYPE_COMMENT);
|
||||
}
|
||||
|
||||
@@ -1,30 +1,30 @@
|
||||
#include <lexer_token.h>
|
||||
|
||||
// 生成字符串映射(根据需求选择#str或#name)
|
||||
static const char* token_strings[] = {
|
||||
#define X(str, subtype, tok) [tok] = #str,
|
||||
TOKEN_TABLE
|
||||
#undef X
|
||||
static const char *token_strings[] = {
|
||||
#define X(str, subtype, tok) [tok] = #str,
|
||||
SCC_CTOK_TABLE
|
||||
#undef X
|
||||
|
||||
#define X(str, subtype, tok, std) [tok] = #str,
|
||||
KEYWORD_TABLE
|
||||
#undef X
|
||||
#define X(str, subtype, tok, std) [tok] = #str,
|
||||
SCC_CKEYWORD_TABLE
|
||||
#undef X
|
||||
};
|
||||
|
||||
static token_subtype_t token_subtypes[] = {
|
||||
#define X(str, subtype, tok) [tok] = subtype,
|
||||
TOKEN_TABLE
|
||||
#undef X
|
||||
static scc_tok_subtype_t token_subtypes[] = {
|
||||
#define X(str, subtype, tok) [tok] = subtype,
|
||||
SCC_CTOK_TABLE
|
||||
#undef X
|
||||
|
||||
#define X(str, subtype, tok, std) [tok] = subtype,
|
||||
KEYWORD_TABLE
|
||||
#undef X
|
||||
#define X(str, subtype, tok, std) [tok] = subtype,
|
||||
SCC_CKEYWORD_TABLE
|
||||
#undef X
|
||||
};
|
||||
|
||||
token_subtype_t get_tok_subtype(token_type_t type) {
|
||||
scc_tok_subtype_t scc_get_tok_subtype(scc_tok_type_t type) {
|
||||
return token_subtypes[type];
|
||||
}
|
||||
|
||||
const char* get_tok_name(token_type_t type) {
|
||||
const char *scc_get_tok_name(scc_tok_type_t type) {
|
||||
return token_strings[type];
|
||||
}
|
||||
|
||||
@@ -5,92 +5,92 @@
|
||||
|
||||
// 测试辅助函数
|
||||
static inline void test_lexer_string(const char *input,
|
||||
token_type_t expected_type) {
|
||||
smcc_lexer_t lexer;
|
||||
scc_tok_type_t expected_type) {
|
||||
scc_lexer_t lexer;
|
||||
lexer_tok_t token;
|
||||
core_mem_probe_stream_t stream;
|
||||
scc_mem_probe_stream_t stream;
|
||||
|
||||
lexer_init(&lexer, core_mem_probe_stream_init(&stream, input, strlen(input),
|
||||
false));
|
||||
lexer_get_token(&lexer, &token);
|
||||
scc_lexer_init(&lexer, scc_mem_probe_stream_init(&stream, input,
|
||||
strlen(input), false));
|
||||
scc_lexer_get_token(&lexer, &token);
|
||||
|
||||
TEST_CHECK(token.type == expected_type);
|
||||
TEST_MSG("Expected: %s", get_tok_name(expected_type));
|
||||
TEST_MSG("Got: %s", get_tok_name(token.type));
|
||||
TEST_MSG("Expected: %s", scc_get_tok_name(expected_type));
|
||||
TEST_MSG("Got: %s", scc_get_tok_name(token.type));
|
||||
}
|
||||
|
||||
// 基础运算符测试
|
||||
void test_operators() {
|
||||
TEST_CASE("Arithmetic operators");
|
||||
{
|
||||
test_lexer_string("+", TOKEN_ADD);
|
||||
test_lexer_string("++", TOKEN_ADD_ADD);
|
||||
test_lexer_string("+=", TOKEN_ASSIGN_ADD);
|
||||
test_lexer_string("-", TOKEN_SUB);
|
||||
test_lexer_string("--", TOKEN_SUB_SUB);
|
||||
test_lexer_string("-=", TOKEN_ASSIGN_SUB);
|
||||
test_lexer_string("*", TOKEN_MUL);
|
||||
test_lexer_string("*=", TOKEN_ASSIGN_MUL);
|
||||
test_lexer_string("/", TOKEN_DIV);
|
||||
test_lexer_string("/=", TOKEN_ASSIGN_DIV);
|
||||
test_lexer_string("%", TOKEN_MOD);
|
||||
test_lexer_string("%=", TOKEN_ASSIGN_MOD);
|
||||
test_lexer_string("+", SCC_TOK_ADD);
|
||||
test_lexer_string("++", SCC_TOK_ADD_ADD);
|
||||
test_lexer_string("+=", SCC_TOK_ASSIGN_ADD);
|
||||
test_lexer_string("-", SCC_TOK_SUB);
|
||||
test_lexer_string("--", SCC_TOK_SUB_SUB);
|
||||
test_lexer_string("-=", SCC_TOK_ASSIGN_SUB);
|
||||
test_lexer_string("*", SCC_TOK_MUL);
|
||||
test_lexer_string("*=", SCC_TOK_ASSIGN_MUL);
|
||||
test_lexer_string("/", SCC_TOK_DIV);
|
||||
test_lexer_string("/=", SCC_TOK_ASSIGN_DIV);
|
||||
test_lexer_string("%", SCC_TOK_MOD);
|
||||
test_lexer_string("%=", SCC_TOK_ASSIGN_MOD);
|
||||
}
|
||||
|
||||
TEST_CASE("Bitwise operators");
|
||||
{
|
||||
test_lexer_string("&", TOKEN_AND);
|
||||
test_lexer_string("&&", TOKEN_AND_AND);
|
||||
test_lexer_string("&=", TOKEN_ASSIGN_AND);
|
||||
test_lexer_string("|", TOKEN_OR);
|
||||
test_lexer_string("||", TOKEN_OR_OR);
|
||||
test_lexer_string("|=", TOKEN_ASSIGN_OR);
|
||||
test_lexer_string("^", TOKEN_XOR);
|
||||
test_lexer_string("^=", TOKEN_ASSIGN_XOR);
|
||||
test_lexer_string("~", TOKEN_BIT_NOT);
|
||||
test_lexer_string("<<", TOKEN_L_SH);
|
||||
test_lexer_string("<<=", TOKEN_ASSIGN_L_SH);
|
||||
test_lexer_string(">>", TOKEN_R_SH);
|
||||
test_lexer_string(">>=", TOKEN_ASSIGN_R_SH);
|
||||
test_lexer_string("&", SCC_TOK_AND);
|
||||
test_lexer_string("&&", SCC_TOK_AND_AND);
|
||||
test_lexer_string("&=", SCC_TOK_ASSIGN_AND);
|
||||
test_lexer_string("|", SCC_TOK_OR);
|
||||
test_lexer_string("||", SCC_TOK_OR_OR);
|
||||
test_lexer_string("|=", SCC_TOK_ASSIGN_OR);
|
||||
test_lexer_string("^", SCC_TOK_XOR);
|
||||
test_lexer_string("^=", SCC_TOK_ASSIGN_XOR);
|
||||
test_lexer_string("~", SCC_TOK_BIT_NOT);
|
||||
test_lexer_string("<<", SCC_TOK_L_SH);
|
||||
test_lexer_string("<<=", SCC_TOK_ASSIGN_L_SH);
|
||||
test_lexer_string(">>", SCC_TOK_R_SH);
|
||||
test_lexer_string(">>=", SCC_TOK_ASSIGN_R_SH);
|
||||
}
|
||||
|
||||
TEST_CASE("Comparison operators");
|
||||
{
|
||||
test_lexer_string("==", TOKEN_EQ);
|
||||
test_lexer_string("!=", TOKEN_NEQ);
|
||||
test_lexer_string("<", TOKEN_LT);
|
||||
test_lexer_string("<=", TOKEN_LE);
|
||||
test_lexer_string(">", TOKEN_GT);
|
||||
test_lexer_string(">=", TOKEN_GE);
|
||||
test_lexer_string("==", SCC_TOK_EQ);
|
||||
test_lexer_string("!=", SCC_TOK_NEQ);
|
||||
test_lexer_string("<", SCC_TOK_LT);
|
||||
test_lexer_string("<=", SCC_TOK_LE);
|
||||
test_lexer_string(">", SCC_TOK_GT);
|
||||
test_lexer_string(">=", SCC_TOK_GE);
|
||||
}
|
||||
|
||||
TEST_CASE("Special symbols");
|
||||
{
|
||||
test_lexer_string("(", TOKEN_L_PAREN);
|
||||
test_lexer_string(")", TOKEN_R_PAREN);
|
||||
test_lexer_string("[", TOKEN_L_BRACKET);
|
||||
test_lexer_string("]", TOKEN_R_BRACKET);
|
||||
test_lexer_string("{", TOKEN_L_BRACE);
|
||||
test_lexer_string("}", TOKEN_R_BRACE);
|
||||
test_lexer_string(";", TOKEN_SEMICOLON);
|
||||
test_lexer_string(",", TOKEN_COMMA);
|
||||
test_lexer_string(":", TOKEN_COLON);
|
||||
test_lexer_string(".", TOKEN_DOT);
|
||||
test_lexer_string("...", TOKEN_ELLIPSIS);
|
||||
test_lexer_string("->", TOKEN_DEREF);
|
||||
test_lexer_string("?", TOKEN_COND);
|
||||
test_lexer_string("(", SCC_TOK_L_PAREN);
|
||||
test_lexer_string(")", SCC_TOK_R_PAREN);
|
||||
test_lexer_string("[", SCC_TOK_L_BRACKET);
|
||||
test_lexer_string("]", SCC_TOK_R_BRACKET);
|
||||
test_lexer_string("{", SCC_TOK_L_BRACE);
|
||||
test_lexer_string("}", SCC_TOK_R_BRACE);
|
||||
test_lexer_string(";", SCC_TOK_SEMICOLON);
|
||||
test_lexer_string(",", SCC_TOK_COMMA);
|
||||
test_lexer_string(":", SCC_TOK_COLON);
|
||||
test_lexer_string(".", SCC_TOK_DOT);
|
||||
test_lexer_string("...", SCC_TOK_ELLIPSIS);
|
||||
test_lexer_string("->", SCC_TOK_DEREF);
|
||||
test_lexer_string("?", SCC_TOK_COND);
|
||||
}
|
||||
}
|
||||
|
||||
// 关键字测试
|
||||
void test_keywords() {
|
||||
TEST_CASE("C89 keywords");
|
||||
test_lexer_string("while", TOKEN_WHILE);
|
||||
test_lexer_string("sizeof", TOKEN_SIZEOF);
|
||||
test_lexer_string("while", SCC_TOK_WHILE);
|
||||
test_lexer_string("sizeof", SCC_TOK_SIZEOF);
|
||||
|
||||
TEST_CASE("C99 keywords");
|
||||
test_lexer_string("restrict", TOKEN_RESTRICT);
|
||||
// test_lexer_string("_Bool", TOKEN_INT); // 需确认你的类型定义
|
||||
test_lexer_string("restrict", SCC_TOK_RESTRICT);
|
||||
// test_lexer_string("_Bool", SCC_TOK_INT); // 需确认你的类型定义
|
||||
}
|
||||
|
||||
// 字面量测试
|
||||
@@ -98,55 +98,55 @@ void test_literals() {
|
||||
TEST_CASE("Integer literals");
|
||||
{
|
||||
// 十进制
|
||||
test_lexer_string("0", TOKEN_INT_LITERAL);
|
||||
test_lexer_string("123", TOKEN_INT_LITERAL);
|
||||
test_lexer_string("2147483647", TOKEN_INT_LITERAL);
|
||||
test_lexer_string("0", SCC_TOK_INT_LITERAL);
|
||||
test_lexer_string("123", SCC_TOK_INT_LITERAL);
|
||||
test_lexer_string("2147483647", SCC_TOK_INT_LITERAL);
|
||||
|
||||
// 十六进制
|
||||
test_lexer_string("0x0", TOKEN_INT_LITERAL);
|
||||
test_lexer_string("0x1A3F", TOKEN_INT_LITERAL);
|
||||
test_lexer_string("0XABCDEF", TOKEN_INT_LITERAL);
|
||||
test_lexer_string("0x0", SCC_TOK_INT_LITERAL);
|
||||
test_lexer_string("0x1A3F", SCC_TOK_INT_LITERAL);
|
||||
test_lexer_string("0XABCDEF", SCC_TOK_INT_LITERAL);
|
||||
|
||||
// 八进制
|
||||
test_lexer_string("0123", TOKEN_INT_LITERAL);
|
||||
test_lexer_string("0777", TOKEN_INT_LITERAL);
|
||||
test_lexer_string("0123", SCC_TOK_INT_LITERAL);
|
||||
test_lexer_string("0777", SCC_TOK_INT_LITERAL);
|
||||
|
||||
// 边界值测试
|
||||
test_lexer_string("2147483647", TOKEN_INT_LITERAL); // INT_MAX
|
||||
test_lexer_string("4294967295", TOKEN_INT_LITERAL); // UINT_MAX
|
||||
test_lexer_string("2147483647", SCC_TOK_INT_LITERAL); // INT_MAX
|
||||
test_lexer_string("4294967295", SCC_TOK_INT_LITERAL); // UINT_MAX
|
||||
}
|
||||
|
||||
TEST_CASE("Character literals");
|
||||
{
|
||||
test_lexer_string("'a'", TOKEN_CHAR_LITERAL);
|
||||
test_lexer_string("'\\n'", TOKEN_CHAR_LITERAL);
|
||||
test_lexer_string("'\\t'", TOKEN_CHAR_LITERAL);
|
||||
test_lexer_string("'\\\\'", TOKEN_CHAR_LITERAL);
|
||||
test_lexer_string("'\\0'", TOKEN_CHAR_LITERAL);
|
||||
test_lexer_string("'a'", SCC_TOK_CHAR_LITERAL);
|
||||
test_lexer_string("'\\n'", SCC_TOK_CHAR_LITERAL);
|
||||
test_lexer_string("'\\t'", SCC_TOK_CHAR_LITERAL);
|
||||
test_lexer_string("'\\\\'", SCC_TOK_CHAR_LITERAL);
|
||||
test_lexer_string("'\\0'", SCC_TOK_CHAR_LITERAL);
|
||||
}
|
||||
|
||||
TEST_CASE("String literals");
|
||||
{
|
||||
test_lexer_string("\"hello\"", TOKEN_STRING_LITERAL);
|
||||
test_lexer_string("\"multi-line\\nstring\"", TOKEN_STRING_LITERAL);
|
||||
test_lexer_string("\"escape\\\"quote\"", TOKEN_STRING_LITERAL);
|
||||
test_lexer_string("\"hello\"", SCC_TOK_STRING_LITERAL);
|
||||
test_lexer_string("\"multi-line\\nstring\"", SCC_TOK_STRING_LITERAL);
|
||||
test_lexer_string("\"escape\\\"quote\"", SCC_TOK_STRING_LITERAL);
|
||||
}
|
||||
|
||||
// TEST_CASE("Floating literals");
|
||||
// test_lexer_string("3.14e-5", TOKEN_FLOAT_LITERAL);
|
||||
// test_lexer_string("3.14e-5", SCC_TOK_FLOAT_LITERAL);
|
||||
}
|
||||
|
||||
// 边界测试
|
||||
void test_edge_cases() {
|
||||
// TEST_CASE("Long identifiers");
|
||||
// char long_id[LEXER_MAX_TOKEN_SIZE+2] = {0};
|
||||
// memset(long_id, 'a', LEXER_MAX_TOKEN_SIZE+1);
|
||||
// test_lexer_string(long_id, TOKEN_IDENT);
|
||||
// char long_id[LEXER_MAX_ SCC_TOK_SIZE+2] = {0};
|
||||
// memset(long_id, 'a', LEXER_MAX_ SCC_TOK_SIZE+1);
|
||||
// test_lexer_string(long_id, SCC_TOK_IDENT);
|
||||
|
||||
// TEST_CASE("Buffer boundary");
|
||||
// char boundary[LEXER_BUFFER_SIZE*2] = {0};
|
||||
// memset(boundary, '+', LEXER_BUFFER_SIZE*2-1);
|
||||
// test_lexer_string(boundary, TOKEN_ADD);
|
||||
// test_lexer_string(boundary, SCC_TOK_ADD);
|
||||
}
|
||||
|
||||
// 错误处理测试
|
||||
@@ -158,7 +158,7 @@ void test_edge_cases() {
|
||||
// init_lexer(&lexer, "test.c", NULL, test_read);
|
||||
// get_valid_token(&lexer, &token);
|
||||
|
||||
// TEST_CHECK(token.type == TOKEN_EOF); // 应触发错误处理
|
||||
// TEST_CHECK(token.type == SCC_TOK_EOF); // 应触发错误处理
|
||||
// }
|
||||
|
||||
// 测试列表
|
||||
|
||||
@@ -62,27 +62,28 @@ int main(int argc, char *argv[]) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
smcc_lexer_t lexer;
|
||||
core_mem_probe_stream_t mem_stream = {0};
|
||||
core_probe_stream_t *stream =
|
||||
core_mem_probe_stream_init(&mem_stream, buffer, fsize, false);
|
||||
scc_lexer_t lexer;
|
||||
scc_mem_probe_stream_t mem_stream = {0};
|
||||
scc_probe_stream_t *stream =
|
||||
scc_mem_probe_stream_init(&mem_stream, buffer, fsize, false);
|
||||
Assert(stream != null);
|
||||
cstring_clear(&stream->name);
|
||||
cstring_append_cstr(&stream->name, file_name, strlen(file_name));
|
||||
lexer_init(&lexer, stream);
|
||||
scc_cstring_clear(&stream->name);
|
||||
scc_cstring_append_cstr(&stream->name, file_name, strlen(file_name));
|
||||
scc_lexer_init(&lexer, stream);
|
||||
lexer_tok_t tok;
|
||||
|
||||
while (1) {
|
||||
lexer_get_valid_token(&lexer, &tok);
|
||||
if (tok.type == TOKEN_EOF) {
|
||||
scc_lexer_get_valid_token(&lexer, &tok);
|
||||
if (tok.type == SCC_TOK_EOF) {
|
||||
break;
|
||||
}
|
||||
LOG_DEBUG("token `%s` at %s:%u:%u", get_tok_name(tok.type),
|
||||
cstring_as_cstr(&tok.loc.name), tok.loc.line, tok.loc.col);
|
||||
LOG_DEBUG("token `%s` at %s:%u:%u", scc_get_tok_name(tok.type),
|
||||
scc_cstring_as_cstr(&tok.loc.name), tok.loc.line,
|
||||
tok.loc.col);
|
||||
Assert(tok.loc.offset <= fsize);
|
||||
// LOG_DEBUG("%s", tok.val.str);
|
||||
// printf("line: %d, column: %d, type: %3d, typename: %s\n",
|
||||
// lexer.line, lexer.index, tok.type, get_tok_name(tok.type));
|
||||
// lexer.line, lexer.index, tok.type, scc_get_tok_name(tok.type));
|
||||
}
|
||||
|
||||
free(buffer);
|
||||
|
||||
Reference in New Issue
Block a user