feat: rename core types to scc prefix for consistency

Updated type names from `core_*` to `scc_*` across lex_parser and stream modules to maintain naming consistency within the SCC codebase. This includes changes to function signatures and internal usage of types like `core_probe_stream_t`, `core_pos_t`, and `cstring_t` to their `scc_*` counterparts.
This commit is contained in:
zzy
2025-12-11 13:00:29 +08:00
parent 35c13ee30a
commit d88fa3b8d3
33 changed files with 741 additions and 745 deletions

View File

@@ -3,16 +3,16 @@
* @brief C语言词法分析器核心数据结构与接口
*/
#ifndef __SMCC_CC_LEXER_H__
#define __SMCC_CC_LEXER_H__
#ifndef __SCC_LEXER_H__
#define __SCC_LEXER_H__
#include "lexer_token.h"
#include <libcore.h>
typedef struct lexer_token {
token_type_t type;
core_cvalue_t value;
core_pos_t loc;
scc_tok_type_t type;
scc_cvalue_t value;
scc_pos_t loc;
} lexer_tok_t;
/**
@@ -21,16 +21,16 @@ typedef struct lexer_token {
* 封装词法分析所需的状态信息和缓冲区管理
*/
typedef struct cc_lexer {
core_probe_stream_t *stream;
core_pos_t pos;
} smcc_lexer_t;
scc_probe_stream_t *stream;
scc_pos_t pos;
} scc_lexer_t;
/**
* @brief 初始化词法分析器
* @param[out] lexer 要初始化的词法分析器实例
* @param[in] stream 输入流对象指针
*/
void lexer_init(smcc_lexer_t *lexer, core_probe_stream_t *stream);
void scc_lexer_init(scc_lexer_t *lexer, scc_probe_stream_t *stream);
/**
* @brief 获取原始token
@@ -39,7 +39,7 @@ void lexer_init(smcc_lexer_t *lexer, core_probe_stream_t *stream);
*
* 此函数会返回所有类型的token包括空白符等无效token
*/
void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token);
void scc_lexer_get_token(scc_lexer_t *lexer, lexer_tok_t *token);
/**
* @brief 获取有效token
@@ -48,6 +48,6 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token);
*
* 此函数会自动跳过空白符等无效token返回对语法分析有意义的token
*/
void lexer_get_valid_token(smcc_lexer_t *lexer, lexer_tok_t *token);
void scc_lexer_get_valid_token(scc_lexer_t *lexer, lexer_tok_t *token);
#endif
#endif /* __SCC_LEXER_H__ */

View File

@@ -3,138 +3,138 @@
#include <libcore.h>
typedef enum ckeyword {
CSTD_C89,
CSTD_C99,
CEXT_ASM,
} ckeyword_t;
typedef enum scc_cstd {
SCC_CSTD_C89,
SCC_CSTD_C99,
SCC_CEXT_ASM,
} scc_cstd_t;
/* clang-format off */
// WARNING: Using Binary Search To Fast Find Keyword
// 你必须确保其中是按照字典序排列
#define KEYWORD_TABLE \
X(asm , TK_BASIC_KEYWORD , TOKEN_ASM , CEXT_ASM) \
X(break , TK_BASIC_KEYWORD , TOKEN_BREAK , CSTD_C89) \
X(case , TK_BASIC_KEYWORD , TOKEN_CASE , CSTD_C89) \
X(char , TK_BASIC_KEYWORD , TOKEN_CHAR , CSTD_C89) \
X(const , TK_BASIC_KEYWORD , TOKEN_CONST , CSTD_C89) \
X(continue , TK_BASIC_KEYWORD , TOKEN_CONTINUE , CSTD_C89) \
X(default , TK_BASIC_KEYWORD , TOKEN_DEFAULT , CSTD_C89) \
X(do , TK_BASIC_KEYWORD , TOKEN_DO , CSTD_C89) \
X(double , TK_BASIC_KEYWORD , TOKEN_DOUBLE , CSTD_C89) \
X(else , TK_BASIC_KEYWORD , TOKEN_ELSE , CSTD_C89) \
X(enum , TK_BASIC_KEYWORD , TOKEN_ENUM , CSTD_C89) \
X(extern , TK_BASIC_KEYWORD , TOKEN_EXTERN , CSTD_C89) \
X(float , TK_BASIC_KEYWORD , TOKEN_FLOAT , CSTD_C89) \
X(for , TK_BASIC_KEYWORD , TOKEN_FOR , CSTD_C89) \
X(goto , TK_BASIC_KEYWORD , TOKEN_GOTO , CSTD_C89) \
X(if , TK_BASIC_KEYWORD , TOKEN_IF , CSTD_C89) \
X(inline , TK_BASIC_KEYWORD , TOKEN_INLINE , CSTD_C99) \
X(int , TK_BASIC_KEYWORD , TOKEN_INT , CSTD_C89) \
X(long , TK_BASIC_KEYWORD , TOKEN_LONG , CSTD_C89) \
X(register , TK_BASIC_KEYWORD , TOKEN_REGISTER , CSTD_C89) \
X(restrict , TK_BASIC_KEYWORD , TOKEN_RESTRICT , CSTD_C99) \
X(return , TK_BASIC_KEYWORD , TOKEN_RETURN , CSTD_C89) \
X(short , TK_BASIC_KEYWORD , TOKEN_SHORT , CSTD_C89) \
X(signed , TK_BASIC_KEYWORD , TOKEN_SIGNED , CSTD_C89) \
X(sizeof , TK_BASIC_KEYWORD , TOKEN_SIZEOF , CSTD_C89) \
X(static , TK_BASIC_KEYWORD , TOKEN_STATIC , CSTD_C89) \
X(struct , TK_BASIC_KEYWORD , TOKEN_STRUCT , CSTD_C89) \
X(switch , TK_BASIC_KEYWORD , TOKEN_SWITCH , CSTD_C89) \
X(typedef , TK_BASIC_KEYWORD , TOKEN_TYPEDEF , CSTD_C89) \
X(union , TK_BASIC_KEYWORD , TOKEN_UNION , CSTD_C89) \
X(unsigned , TK_BASIC_KEYWORD , TOKEN_UNSIGNED , CSTD_C89) \
X(void , TK_BASIC_KEYWORD , TOKEN_VOID , CSTD_C89) \
X(volatile , TK_BASIC_KEYWORD , TOKEN_VOLATILE , CSTD_C89) \
X(while , TK_BASIC_KEYWORD , TOKEN_WHILE , CSTD_C89) \
#define SCC_CKEYWORD_TABLE \
X(asm , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_ASM , SCC_CEXT_ASM) \
X(break , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_BREAK , SCC_CSTD_C89) \
X(case , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_CASE , SCC_CSTD_C89) \
X(char , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_CHAR , SCC_CSTD_C89) \
X(const , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_CONST , SCC_CSTD_C89) \
X(continue , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_CONTINUE , SCC_CSTD_C89) \
X(default , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_DEFAULT , SCC_CSTD_C89) \
X(do , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_DO , SCC_CSTD_C89) \
X(double , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_DOUBLE , SCC_CSTD_C89) \
X(else , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_ELSE , SCC_CSTD_C89) \
X(enum , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_ENUM , SCC_CSTD_C89) \
X(extern , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_EXTERN , SCC_CSTD_C89) \
X(float , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_FLOAT , SCC_CSTD_C89) \
X(for , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_FOR , SCC_CSTD_C89) \
X(goto , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_GOTO , SCC_CSTD_C89) \
X(if , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_IF , SCC_CSTD_C89) \
X(inline , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_INLINE , SCC_CSTD_C99) \
X(int , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_INT , SCC_CSTD_C89) \
X(long , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_LONG , SCC_CSTD_C89) \
X(register , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_REGISTER , SCC_CSTD_C89) \
X(restrict , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_RESTRICT , SCC_CSTD_C99) \
X(return , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_RETURN , SCC_CSTD_C89) \
X(short , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_SHORT , SCC_CSTD_C89) \
X(signed , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_SIGNED , SCC_CSTD_C89) \
X(sizeof , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_SIZEOF , SCC_CSTD_C89) \
X(static , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_STATIC , SCC_CSTD_C89) \
X(struct , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_STRUCT , SCC_CSTD_C89) \
X(switch , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_SWITCH , SCC_CSTD_C89) \
X(typedef , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_TYPEDEF , SCC_CSTD_C89) \
X(union , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_UNION , SCC_CSTD_C89) \
X(unsigned , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_UNSIGNED , SCC_CSTD_C89) \
X(void , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_VOID , SCC_CSTD_C89) \
X(volatile , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_VOLATILE , SCC_CSTD_C89) \
X(while , SCC_TOK_SUBTYPE_KEYWORD , SCC_TOK_WHILE , SCC_CSTD_C89) \
// KEYWORD_TABLE
#define TOKEN_TABLE \
X(unknown , TK_BASIC_INVALID, TOKEN_UNKNOWN ) \
X(EOF , TK_BASIC_EOF, TOKEN_EOF ) \
X(blank , TK_BASIC_EMPTYSPACE, TOKEN_BLANK ) \
X("==" , TK_BASIC_OPERATOR, TOKEN_EQ ) \
X("=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN ) \
X("++" , TK_BASIC_OPERATOR, TOKEN_ADD_ADD ) \
X("+=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_ADD ) \
X("+" , TK_BASIC_OPERATOR, TOKEN_ADD ) \
X("--" , TK_BASIC_OPERATOR, TOKEN_SUB_SUB ) \
X("-=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_SUB ) \
X("->" , TK_BASIC_OPERATOR, TOKEN_DEREF ) \
X("-" , TK_BASIC_OPERATOR, TOKEN_SUB ) \
X("*=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_MUL ) \
X("*" , TK_BASIC_OPERATOR, TOKEN_MUL ) \
X("/=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_DIV ) \
X("/" , TK_BASIC_OPERATOR, TOKEN_DIV ) \
X("//" , TK_BASIC_COMMENT , TOKEN_LINE_COMMENT ) \
X("/* */" , TK_BASIC_COMMENT , TOKEN_BLOCK_COMMENT ) \
X("%=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_MOD ) \
X("%" , TK_BASIC_OPERATOR, TOKEN_MOD ) \
X("&&" , TK_BASIC_OPERATOR, TOKEN_AND_AND ) \
X("&=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_AND ) \
X("&" , TK_BASIC_OPERATOR, TOKEN_AND ) \
X("||" , TK_BASIC_OPERATOR, TOKEN_OR_OR ) \
X("|=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_OR ) \
X("|" , TK_BASIC_OPERATOR, TOKEN_OR ) \
X("^=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_XOR ) \
X("^" , TK_BASIC_OPERATOR, TOKEN_XOR ) \
X("<<=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_L_SH ) \
X("<<" , TK_BASIC_OPERATOR, TOKEN_L_SH ) \
X("<=" , TK_BASIC_OPERATOR, TOKEN_LE ) \
X("<" , TK_BASIC_OPERATOR, TOKEN_LT ) \
X(">>=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_R_SH ) \
X(">>" , TK_BASIC_OPERATOR, TOKEN_R_SH ) \
X(">=" , TK_BASIC_OPERATOR, TOKEN_GE ) \
X(">" , TK_BASIC_OPERATOR, TOKEN_GT ) \
X("!" , TK_BASIC_OPERATOR, TOKEN_NOT ) \
X("!=" , TK_BASIC_OPERATOR, TOKEN_NEQ ) \
X("~" , TK_BASIC_OPERATOR, TOKEN_BIT_NOT ) \
X("[" , TK_BASIC_OPERATOR, TOKEN_L_BRACKET ) \
X("]" , TK_BASIC_OPERATOR, TOKEN_R_BRACKET ) \
X("(" , TK_BASIC_OPERATOR, TOKEN_L_PAREN ) \
X(")" , TK_BASIC_OPERATOR, TOKEN_R_PAREN ) \
X("{" , TK_BASIC_OPERATOR, TOKEN_L_BRACE ) \
X("}" , TK_BASIC_OPERATOR, TOKEN_R_BRACE ) \
X(";" , TK_BASIC_OPERATOR, TOKEN_SEMICOLON ) \
X("," , TK_BASIC_OPERATOR, TOKEN_COMMA ) \
X(":" , TK_BASIC_OPERATOR, TOKEN_COLON ) \
X("." , TK_BASIC_OPERATOR, TOKEN_DOT ) \
X("..." , TK_BASIC_OPERATOR, TOKEN_ELLIPSIS ) \
X("?" , TK_BASIC_OPERATOR, TOKEN_COND ) \
X(ident , TK_BASIC_IDENTIFIER, TOKEN_IDENT ) \
X(int_literal , TK_BASIC_LITERAL, TOKEN_INT_LITERAL ) \
X(float_literal , TK_BASIC_LITERAL, TOKEN_FLOAT_LITERAL ) \
X(char_literal , TK_BASIC_LITERAL, TOKEN_CHAR_LITERAL ) \
X(string_literal , TK_BASIC_LITERAL, TOKEN_STRING_LITERAL ) \
#define SCC_CTOK_TABLE \
X(unknown , SCC_TOK_SUBTYPE_INVALID, SCC_TOK_UNKNOWN ) \
X(EOF , SCC_TOK_SUBTYPE_EOF, SCC_TOK_EOF ) \
X(blank , SCC_TOK_SUBTYPE_EMPTYSPACE, SCC_TOK_BLANK ) \
X("==" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_EQ ) \
X("=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN ) \
X("++" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ADD_ADD ) \
X("+=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_ADD ) \
X("+" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ADD ) \
X("--" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_SUB_SUB ) \
X("-=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_SUB ) \
X("->" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_DEREF ) \
X("-" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_SUB ) \
X("*=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_MUL ) \
X("*" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_MUL ) \
X("/=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_DIV ) \
X("/" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_DIV ) \
X("//" , SCC_TOK_SUBTYPE_COMMENT , SCC_TOK_LINE_COMMENT ) \
X("/* */" , SCC_TOK_SUBTYPE_COMMENT , SCC_TOK_BLOCK_COMMENT ) \
X("%=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_MOD ) \
X("%" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_MOD ) \
X("&&" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_AND_AND ) \
X("&=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_AND ) \
X("&" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_AND ) \
X("||" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_OR_OR ) \
X("|=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_OR ) \
X("|" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_OR ) \
X("^=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_XOR ) \
X("^" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_XOR ) \
X("<<=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_L_SH ) \
X("<<" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_L_SH ) \
X("<=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_LE ) \
X("<" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_LT ) \
X(">>=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_R_SH ) \
X(">>" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_R_SH ) \
X(">=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_GE ) \
X(">" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_GT ) \
X("!" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_NOT ) \
X("!=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_NEQ ) \
X("~" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_BIT_NOT ) \
X("[" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_L_BRACKET ) \
X("]" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_R_BRACKET ) \
X("(" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_L_PAREN ) \
X(")" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_R_PAREN ) \
X("{" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_L_BRACE ) \
X("}" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_R_BRACE ) \
X(";" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_SEMICOLON ) \
X("," , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_COMMA ) \
X(":" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_COLON ) \
X("." , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_DOT ) \
X("..." , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ELLIPSIS ) \
X("?" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_COND ) \
X(ident , SCC_TOK_SUBTYPE_IDENTIFIER, SCC_TOK_IDENT ) \
X(int_literal , SCC_TOK_SUBTYPE_LITERAL, SCC_TOK_INT_LITERAL ) \
X(float_literal , SCC_TOK_SUBTYPE_LITERAL, SCC_TOK_FLOAT_LITERAL ) \
X(char_literal , SCC_TOK_SUBTYPE_LITERAL, SCC_TOK_CHAR_LITERAL ) \
X(string_literal , SCC_TOK_SUBTYPE_LITERAL, SCC_TOK_STRING_LITERAL ) \
// END
/* clang-format on */
// 定义TokenType枚举
typedef enum cc_tktype {
typedef enum scc_tok_type {
// 处理普通token
#define X(str, subtype, tok) tok,
TOKEN_TABLE
SCC_CTOK_TABLE
#undef X
// 处理关键字(保持原有格式)
#define X(name, subtype, tok, std) tok,
KEYWORD_TABLE
SCC_CKEYWORD_TABLE
#undef X
} token_type_t;
} scc_tok_type_t;
typedef enum token_subtype {
TK_BASIC_INVALID, // 错误占位
TK_BASIC_KEYWORD, // 关键字
TK_BASIC_OPERATOR, // 操作符
TK_BASIC_IDENTIFIER, // 标识符
TK_BASIC_LITERAL, // 字面量
typedef enum scc_tok_subtype {
SCC_TOK_SUBTYPE_INVALID, // 错误占位
SCC_TOK_SUBTYPE_KEYWORD, // 关键字
SCC_TOK_SUBTYPE_OPERATOR, // 操作符
SCC_TOK_SUBTYPE_IDENTIFIER, // 标识符
SCC_TOK_SUBTYPE_LITERAL, // 字面量
TK_BASIC_EMPTYSPACE, // 空白
TK_BASIC_COMMENT, // 注释
TK_BASIC_EOF // 结束标记
} token_subtype_t;
SCC_TOK_SUBTYPE_EMPTYSPACE, // 空白
SCC_TOK_SUBTYPE_COMMENT, // 注释
SCC_TOK_SUBTYPE_EOF // 结束标记
} scc_tok_subtype_t;
token_subtype_t get_tok_subtype(token_type_t type);
const char *get_tok_name(token_type_t type);
scc_tok_subtype_t scc_get_tok_subtype(scc_tok_type_t type);
const char *scc_get_tok_name(scc_tok_type_t type);
#endif

View File

@@ -32,11 +32,11 @@ David Hanson / drh@drhanson.net
static const struct {
const char *name;
ckeyword_t std_type;
token_type_t tok;
scc_cstd_t std_type;
scc_tok_type_t tok;
} keywords[] = {
#define X(name, subtype, tok, std_type, ...) {#name, std_type, tok},
KEYWORD_TABLE
SCC_CKEYWORD_TABLE
#undef X
};
@@ -75,23 +75,23 @@ static inline int keyword_cmp(const char *name, int len) {
return -1; // Not a keyword.
}
void lexer_init(smcc_lexer_t *lexer, core_probe_stream_t *stream) {
void scc_lexer_init(scc_lexer_t *lexer, scc_probe_stream_t *stream) {
lexer->stream = stream;
lexer->pos = core_pos_init();
lexer->pos = scc_pos_init();
// FIXME
lexer->pos.name = cstring_from_cstr(cstring_as_cstr(&stream->name));
lexer->pos.name = scc_cstring_from_cstr(scc_cstring_as_cstr(&stream->name));
}
#define set_err_token(token) ((token)->type = TOKEN_UNKNOWN)
#define set_err_token(token) ((token)->type = SCC_TOK_UNKNOWN)
static void parse_line(smcc_lexer_t *lexer, lexer_tok_t *token) {
static void parse_line(scc_lexer_t *lexer, lexer_tok_t *token) {
token->loc = lexer->pos;
core_probe_stream_t *stream = lexer->stream;
core_probe_stream_reset(stream);
int ch = core_probe_stream_next(stream);
scc_probe_stream_t *stream = lexer->stream;
scc_probe_stream_reset(stream);
int ch = scc_probe_stream_next(stream);
usize n;
cstring_t str = cstring_new();
scc_cstring_t str = scc_cstring_new();
if (ch == core_stream_eof) {
LEX_WARN("Unexpected EOF at begin");
@@ -104,7 +104,7 @@ static void parse_line(smcc_lexer_t *lexer, lexer_tok_t *token) {
const char line[] = "line";
for (int i = 0; i < (int)sizeof(line); i++) {
ch = core_probe_stream_consume(stream);
ch = scc_probe_stream_consume(stream);
core_pos_next(&lexer->pos);
if (ch != line[i]) {
LEX_WARN("Maroc does not support in lexer rather in preprocessor, "
@@ -118,12 +118,12 @@ static void parse_line(smcc_lexer_t *lexer, lexer_tok_t *token) {
goto SKIP_LINE;
}
if (core_probe_stream_consume(stream) != ' ') {
if (scc_probe_stream_consume(stream) != ' ') {
lex_parse_skip_line(lexer->stream, &lexer->pos);
token->loc.line = token->value.n;
}
if (core_probe_stream_next(stream) != '"') {
if (scc_probe_stream_next(stream) != '"') {
LEX_ERROR("Invalid `#` line");
goto SKIP_LINE;
}
@@ -135,259 +135,259 @@ static void parse_line(smcc_lexer_t *lexer, lexer_tok_t *token) {
lex_parse_skip_line(lexer->stream, &lexer->pos);
token->loc.line = n;
// FIXME memory leak
token->loc.name = cstring_from_cstr(cstring_as_cstr(&str));
cstring_free(&str);
token->loc.name = scc_cstring_from_cstr(scc_cstring_as_cstr(&str));
scc_cstring_free(&str);
return;
SKIP_LINE:
lex_parse_skip_line(lexer->stream, &lexer->pos);
ERR:
set_err_token(token);
cstring_free(&str);
scc_cstring_free(&str);
}
// /zh/c/language/operator_arithmetic.html
void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
void scc_lexer_get_token(scc_lexer_t *lexer, lexer_tok_t *token) {
token->loc = lexer->pos;
token->type = TOKEN_UNKNOWN;
core_probe_stream_t *stream = lexer->stream;
token->type = SCC_TOK_UNKNOWN;
scc_probe_stream_t *stream = lexer->stream;
core_probe_stream_reset(stream);
token_type_t type = TOKEN_UNKNOWN;
int ch = core_probe_stream_next(stream);
scc_probe_stream_reset(stream);
scc_tok_type_t type = SCC_TOK_UNKNOWN;
int ch = scc_probe_stream_next(stream);
// once step
switch (ch) {
case '=':
switch (core_probe_stream_next(stream)) {
switch (scc_probe_stream_next(stream)) {
case '=':
type = TOKEN_EQ;
type = SCC_TOK_EQ;
goto double_char;
default:
core_probe_stream_reset(stream), type = TOKEN_ASSIGN;
scc_probe_stream_reset(stream), type = SCC_TOK_ASSIGN;
break;
}
break;
case '+':
switch (core_probe_stream_next(stream)) {
switch (scc_probe_stream_next(stream)) {
case '+':
type = TOKEN_ADD_ADD;
type = SCC_TOK_ADD_ADD;
goto double_char;
case '=':
type = TOKEN_ASSIGN_ADD;
type = SCC_TOK_ASSIGN_ADD;
goto double_char;
default:
core_probe_stream_reset(stream), type = TOKEN_ADD;
scc_probe_stream_reset(stream), type = SCC_TOK_ADD;
break;
}
break;
case '-':
switch (core_probe_stream_next(stream)) {
switch (scc_probe_stream_next(stream)) {
case '-':
type = TOKEN_SUB_SUB;
type = SCC_TOK_SUB_SUB;
goto double_char;
case '=':
type = TOKEN_ASSIGN_SUB;
type = SCC_TOK_ASSIGN_SUB;
goto double_char;
case '>':
type = TOKEN_DEREF;
type = SCC_TOK_DEREF;
goto double_char;
default:
core_probe_stream_reset(stream), type = TOKEN_SUB;
scc_probe_stream_reset(stream), type = SCC_TOK_SUB;
break;
}
break;
case '*':
switch (core_probe_stream_next(stream)) {
switch (scc_probe_stream_next(stream)) {
case '=':
type = TOKEN_ASSIGN_MUL;
type = SCC_TOK_ASSIGN_MUL;
goto double_char;
default:
core_probe_stream_reset(stream), type = TOKEN_MUL;
scc_probe_stream_reset(stream), type = SCC_TOK_MUL;
break;
}
break;
case '/':
switch (core_probe_stream_next(stream)) {
switch (scc_probe_stream_next(stream)) {
case '=':
type = TOKEN_ASSIGN_DIV;
type = SCC_TOK_ASSIGN_DIV;
goto double_char;
case '/':
lex_parse_skip_line(lexer->stream, &lexer->pos);
token->type = TOKEN_LINE_COMMENT;
token->type = SCC_TOK_LINE_COMMENT;
goto END;
case '*':
lex_parse_skip_block_comment(lexer->stream, &lexer->pos);
token->type = TOKEN_BLOCK_COMMENT;
token->type = SCC_TOK_BLOCK_COMMENT;
goto END;
default:
core_probe_stream_reset(stream), type = TOKEN_DIV;
scc_probe_stream_reset(stream), type = SCC_TOK_DIV;
break;
}
break;
case '%':
switch (core_probe_stream_next(stream)) {
switch (scc_probe_stream_next(stream)) {
case '=':
type = TOKEN_ASSIGN_MOD;
type = SCC_TOK_ASSIGN_MOD;
goto double_char;
default:
core_probe_stream_reset(stream), type = TOKEN_MOD;
scc_probe_stream_reset(stream), type = SCC_TOK_MOD;
break;
}
break;
case '&':
switch (core_probe_stream_next(stream)) {
switch (scc_probe_stream_next(stream)) {
case '&':
type = TOKEN_AND_AND;
type = SCC_TOK_AND_AND;
goto double_char;
case '=':
type = TOKEN_ASSIGN_AND;
type = SCC_TOK_ASSIGN_AND;
goto double_char;
default:
core_probe_stream_reset(stream), type = TOKEN_AND;
scc_probe_stream_reset(stream), type = SCC_TOK_AND;
break;
}
break;
case '|':
switch (core_probe_stream_next(stream)) {
switch (scc_probe_stream_next(stream)) {
case '|':
type = TOKEN_OR_OR;
type = SCC_TOK_OR_OR;
goto double_char;
case '=':
type = TOKEN_ASSIGN_OR;
type = SCC_TOK_ASSIGN_OR;
goto double_char;
default:
core_probe_stream_reset(stream), type = TOKEN_OR;
scc_probe_stream_reset(stream), type = SCC_TOK_OR;
break;
}
break;
case '^':
switch (core_probe_stream_next(stream)) {
switch (scc_probe_stream_next(stream)) {
case '=':
type = TOKEN_ASSIGN_XOR;
type = SCC_TOK_ASSIGN_XOR;
goto double_char;
default:
core_probe_stream_reset(stream), type = TOKEN_XOR;
scc_probe_stream_reset(stream), type = SCC_TOK_XOR;
break;
}
break;
case '<':
switch (core_probe_stream_next(stream)) {
switch (scc_probe_stream_next(stream)) {
case '=':
type = TOKEN_LE;
type = SCC_TOK_LE;
goto double_char;
case '<': {
if (core_probe_stream_next(stream) == '=') {
type = TOKEN_ASSIGN_L_SH;
if (scc_probe_stream_next(stream) == '=') {
type = SCC_TOK_ASSIGN_L_SH;
goto triple_char;
} else {
type = TOKEN_L_SH;
type = SCC_TOK_L_SH;
goto double_char;
}
break;
}
default:
core_probe_stream_reset(stream), type = TOKEN_LT;
scc_probe_stream_reset(stream), type = SCC_TOK_LT;
break;
}
break;
case '>':
switch (core_probe_stream_next(stream)) {
switch (scc_probe_stream_next(stream)) {
case '=':
type = TOKEN_GE;
type = SCC_TOK_GE;
goto double_char;
case '>': {
if (core_probe_stream_next(stream) == '=') {
type = TOKEN_ASSIGN_R_SH;
if (scc_probe_stream_next(stream) == '=') {
type = SCC_TOK_ASSIGN_R_SH;
goto triple_char;
} else {
type = TOKEN_R_SH;
type = SCC_TOK_R_SH;
goto double_char;
}
break;
}
default:
core_probe_stream_reset(stream), type = TOKEN_GT;
scc_probe_stream_reset(stream), type = SCC_TOK_GT;
break;
}
break;
case '~':
type = TOKEN_BIT_NOT;
type = SCC_TOK_BIT_NOT;
break;
case '!':
switch (core_probe_stream_next(stream)) {
switch (scc_probe_stream_next(stream)) {
case '=':
type = TOKEN_NEQ;
type = SCC_TOK_NEQ;
goto double_char;
default:
core_probe_stream_reset(stream), type = TOKEN_NOT;
scc_probe_stream_reset(stream), type = SCC_TOK_NOT;
break;
}
break;
case '[':
type = TOKEN_L_BRACKET;
type = SCC_TOK_L_BRACKET;
break;
case ']':
type = TOKEN_R_BRACKET;
type = SCC_TOK_R_BRACKET;
break;
case '(':
type = TOKEN_L_PAREN;
type = SCC_TOK_L_PAREN;
break;
case ')':
type = TOKEN_R_PAREN;
type = SCC_TOK_R_PAREN;
break;
case '{':
type = TOKEN_L_BRACE;
type = SCC_TOK_L_BRACE;
break;
case '}':
type = TOKEN_R_BRACE;
type = SCC_TOK_R_BRACE;
break;
case ';':
type = TOKEN_SEMICOLON;
type = SCC_TOK_SEMICOLON;
break;
case ',':
type = TOKEN_COMMA;
type = SCC_TOK_COMMA;
break;
case ':':
type = TOKEN_COLON;
type = SCC_TOK_COLON;
break;
case '.':
if (core_probe_stream_next(stream) == '.' &&
core_probe_stream_next(stream) == '.') {
type = TOKEN_ELLIPSIS;
if (scc_probe_stream_next(stream) == '.' &&
scc_probe_stream_next(stream) == '.') {
type = SCC_TOK_ELLIPSIS;
goto triple_char;
}
type = TOKEN_DOT;
type = SCC_TOK_DOT;
break;
case '?':
type = TOKEN_COND;
type = SCC_TOK_COND;
break;
case '\v':
case '\f':
case ' ':
case '\t':
type = TOKEN_BLANK;
type = SCC_TOK_BLANK;
break;
case '\r':
case '\n':
lex_parse_skip_endline(lexer->stream, &lexer->pos);
token->type = TOKEN_BLANK;
token->type = SCC_TOK_BLANK;
goto END;
case '#':
parse_line(lexer, token);
token->type = TOKEN_BLANK;
token->type = SCC_TOK_BLANK;
goto END;
case '\0':
case core_stream_eof:
// EOF
type = TOKEN_EOF;
type = SCC_TOK_EOF;
break;
case '\'': {
token->loc = lexer->pos;
token->type = TOKEN_CHAR_LITERAL;
token->type = SCC_TOK_CHAR_LITERAL;
int ch = lex_parse_char(lexer->stream, &lexer->pos);
if (ch == core_stream_eof) {
LEX_ERROR("Unexpected character literal");
token->type = TOKEN_UNKNOWN;
token->type = SCC_TOK_UNKNOWN;
} else {
token->value.ch = ch;
}
@@ -395,14 +395,14 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
}
case '"': {
token->loc = lexer->pos;
token->type = TOKEN_STRING_LITERAL;
cstring_t output = cstring_new();
token->type = SCC_TOK_STRING_LITERAL;
scc_cstring_t output = scc_cstring_new();
if (lex_parse_string(lexer->stream, &lexer->pos, &output) == true) {
token->value.cstr.data = cstring_as_cstr(&output);
token->value.cstr.len = cstring_len(&output);
token->value.cstr.data = scc_cstring_as_cstr(&output);
token->value.cstr.len = scc_cstring_len(&output);
} else {
LEX_ERROR("Unexpected string literal");
token->type = TOKEN_UNKNOWN;
token->type = SCC_TOK_UNKNOWN;
}
goto END;
@@ -412,13 +412,13 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
case '5': case '6': case '7': case '8': case '9':
/* clang-format on */
token->loc = lexer->pos;
token->type = TOKEN_INT_LITERAL;
token->type = SCC_TOK_INT_LITERAL;
usize output;
if (lex_parse_number(lexer->stream, &lexer->pos, &output) == true) {
token->value.n = output;
} else {
LEX_ERROR("Unexpected number literal");
token->type = TOKEN_UNKNOWN;
token->type = SCC_TOK_UNKNOWN;
}
goto END;
/* clang-format off */
@@ -431,17 +431,17 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
case 'V': case 'W': case 'X': case 'Y': case 'Z': case '_':
/* clang-format on */
cstring_t str = cstring_new();
scc_cstring_t str = scc_cstring_new();
cbool ret = lex_parse_identifier(lexer->stream, &lexer->pos, &str);
Assert(ret == true);
int res = keyword_cmp(cstring_as_cstr(&str), cstring_len(&str));
int res = keyword_cmp(scc_cstring_as_cstr(&str), scc_cstring_len(&str));
if (res == -1) {
token->value.cstr.data = (char *)cstring_as_cstr(&str);
token->value.cstr.len = cstring_len(&str);
type = TOKEN_IDENT;
token->value.cstr.data = (char *)scc_cstring_as_cstr(&str);
token->value.cstr.len = scc_cstring_len(&str);
type = SCC_TOK_IDENT;
} else {
cstring_free(&str);
scc_cstring_free(&str);
type = keywords[res].tok;
}
token->type = type;
@@ -452,29 +452,31 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
}
goto once_char;
triple_char:
core_probe_stream_consume(stream);
scc_probe_stream_consume(stream);
core_pos_next(&lexer->pos);
double_char:
core_probe_stream_consume(stream);
scc_probe_stream_consume(stream);
core_pos_next(&lexer->pos);
once_char:
core_probe_stream_consume(stream);
scc_probe_stream_consume(stream);
core_pos_next(&lexer->pos);
token->type = type;
END:
LEX_DEBUG("get token `%s` in %s:%d:%d", get_tok_name(token->type),
LEX_DEBUG("get token `%s` in %s:%d:%d", scc_get_tok_name(token->type),
token->loc.name, token->loc.line, token->loc.column);
}
// lexer_get_token maybe got invalid (with parser)
void lexer_get_valid_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
token_subtype_t type;
// scc_lexer_get_token maybe got invalid (with parser)
void scc_lexer_get_valid_token(scc_lexer_t *lexer, lexer_tok_t *token) {
scc_tok_subtype_t type;
do {
lexer_get_token(lexer, token);
type = get_tok_subtype(token->type);
AssertFmt(type != TK_BASIC_INVALID, "Invalid token: `%s` at %s:%d:%d",
get_tok_name(token->type), token->loc.name, token->loc.line,
token->loc.col);
Assert(type != TK_BASIC_INVALID);
} while (type == TK_BASIC_EMPTYSPACE || type == TK_BASIC_COMMENT);
scc_lexer_get_token(lexer, token);
type = scc_get_tok_subtype(token->type);
AssertFmt(type != SCC_TOK_SUBTYPE_INVALID,
"Invalid token: `%s` at %s:%d:%d",
scc_get_tok_name(token->type), token->loc.name,
token->loc.line, token->loc.col);
Assert(type != SCC_TOK_SUBTYPE_INVALID);
} while (type == SCC_TOK_SUBTYPE_EMPTYSPACE ||
type == SCC_TOK_SUBTYPE_COMMENT);
}

View File

@@ -1,30 +1,30 @@
#include <lexer_token.h>
// 生成字符串映射(根据需求选择#str或#name
static const char* token_strings[] = {
#define X(str, subtype, tok) [tok] = #str,
TOKEN_TABLE
#undef X
static const char *token_strings[] = {
#define X(str, subtype, tok) [tok] = #str,
SCC_CTOK_TABLE
#undef X
#define X(str, subtype, tok, std) [tok] = #str,
KEYWORD_TABLE
#undef X
#define X(str, subtype, tok, std) [tok] = #str,
SCC_CKEYWORD_TABLE
#undef X
};
static token_subtype_t token_subtypes[] = {
#define X(str, subtype, tok) [tok] = subtype,
TOKEN_TABLE
#undef X
static scc_tok_subtype_t token_subtypes[] = {
#define X(str, subtype, tok) [tok] = subtype,
SCC_CTOK_TABLE
#undef X
#define X(str, subtype, tok, std) [tok] = subtype,
KEYWORD_TABLE
#undef X
#define X(str, subtype, tok, std) [tok] = subtype,
SCC_CKEYWORD_TABLE
#undef X
};
token_subtype_t get_tok_subtype(token_type_t type) {
scc_tok_subtype_t scc_get_tok_subtype(scc_tok_type_t type) {
return token_subtypes[type];
}
const char* get_tok_name(token_type_t type) {
const char *scc_get_tok_name(scc_tok_type_t type) {
return token_strings[type];
}

View File

@@ -5,92 +5,92 @@
// 测试辅助函数
static inline void test_lexer_string(const char *input,
token_type_t expected_type) {
smcc_lexer_t lexer;
scc_tok_type_t expected_type) {
scc_lexer_t lexer;
lexer_tok_t token;
core_mem_probe_stream_t stream;
scc_mem_probe_stream_t stream;
lexer_init(&lexer, core_mem_probe_stream_init(&stream, input, strlen(input),
false));
lexer_get_token(&lexer, &token);
scc_lexer_init(&lexer, scc_mem_probe_stream_init(&stream, input,
strlen(input), false));
scc_lexer_get_token(&lexer, &token);
TEST_CHECK(token.type == expected_type);
TEST_MSG("Expected: %s", get_tok_name(expected_type));
TEST_MSG("Got: %s", get_tok_name(token.type));
TEST_MSG("Expected: %s", scc_get_tok_name(expected_type));
TEST_MSG("Got: %s", scc_get_tok_name(token.type));
}
// 基础运算符测试
void test_operators() {
TEST_CASE("Arithmetic operators");
{
test_lexer_string("+", TOKEN_ADD);
test_lexer_string("++", TOKEN_ADD_ADD);
test_lexer_string("+=", TOKEN_ASSIGN_ADD);
test_lexer_string("-", TOKEN_SUB);
test_lexer_string("--", TOKEN_SUB_SUB);
test_lexer_string("-=", TOKEN_ASSIGN_SUB);
test_lexer_string("*", TOKEN_MUL);
test_lexer_string("*=", TOKEN_ASSIGN_MUL);
test_lexer_string("/", TOKEN_DIV);
test_lexer_string("/=", TOKEN_ASSIGN_DIV);
test_lexer_string("%", TOKEN_MOD);
test_lexer_string("%=", TOKEN_ASSIGN_MOD);
test_lexer_string("+", SCC_TOK_ADD);
test_lexer_string("++", SCC_TOK_ADD_ADD);
test_lexer_string("+=", SCC_TOK_ASSIGN_ADD);
test_lexer_string("-", SCC_TOK_SUB);
test_lexer_string("--", SCC_TOK_SUB_SUB);
test_lexer_string("-=", SCC_TOK_ASSIGN_SUB);
test_lexer_string("*", SCC_TOK_MUL);
test_lexer_string("*=", SCC_TOK_ASSIGN_MUL);
test_lexer_string("/", SCC_TOK_DIV);
test_lexer_string("/=", SCC_TOK_ASSIGN_DIV);
test_lexer_string("%", SCC_TOK_MOD);
test_lexer_string("%=", SCC_TOK_ASSIGN_MOD);
}
TEST_CASE("Bitwise operators");
{
test_lexer_string("&", TOKEN_AND);
test_lexer_string("&&", TOKEN_AND_AND);
test_lexer_string("&=", TOKEN_ASSIGN_AND);
test_lexer_string("|", TOKEN_OR);
test_lexer_string("||", TOKEN_OR_OR);
test_lexer_string("|=", TOKEN_ASSIGN_OR);
test_lexer_string("^", TOKEN_XOR);
test_lexer_string("^=", TOKEN_ASSIGN_XOR);
test_lexer_string("~", TOKEN_BIT_NOT);
test_lexer_string("<<", TOKEN_L_SH);
test_lexer_string("<<=", TOKEN_ASSIGN_L_SH);
test_lexer_string(">>", TOKEN_R_SH);
test_lexer_string(">>=", TOKEN_ASSIGN_R_SH);
test_lexer_string("&", SCC_TOK_AND);
test_lexer_string("&&", SCC_TOK_AND_AND);
test_lexer_string("&=", SCC_TOK_ASSIGN_AND);
test_lexer_string("|", SCC_TOK_OR);
test_lexer_string("||", SCC_TOK_OR_OR);
test_lexer_string("|=", SCC_TOK_ASSIGN_OR);
test_lexer_string("^", SCC_TOK_XOR);
test_lexer_string("^=", SCC_TOK_ASSIGN_XOR);
test_lexer_string("~", SCC_TOK_BIT_NOT);
test_lexer_string("<<", SCC_TOK_L_SH);
test_lexer_string("<<=", SCC_TOK_ASSIGN_L_SH);
test_lexer_string(">>", SCC_TOK_R_SH);
test_lexer_string(">>=", SCC_TOK_ASSIGN_R_SH);
}
TEST_CASE("Comparison operators");
{
test_lexer_string("==", TOKEN_EQ);
test_lexer_string("!=", TOKEN_NEQ);
test_lexer_string("<", TOKEN_LT);
test_lexer_string("<=", TOKEN_LE);
test_lexer_string(">", TOKEN_GT);
test_lexer_string(">=", TOKEN_GE);
test_lexer_string("==", SCC_TOK_EQ);
test_lexer_string("!=", SCC_TOK_NEQ);
test_lexer_string("<", SCC_TOK_LT);
test_lexer_string("<=", SCC_TOK_LE);
test_lexer_string(">", SCC_TOK_GT);
test_lexer_string(">=", SCC_TOK_GE);
}
TEST_CASE("Special symbols");
{
test_lexer_string("(", TOKEN_L_PAREN);
test_lexer_string(")", TOKEN_R_PAREN);
test_lexer_string("[", TOKEN_L_BRACKET);
test_lexer_string("]", TOKEN_R_BRACKET);
test_lexer_string("{", TOKEN_L_BRACE);
test_lexer_string("}", TOKEN_R_BRACE);
test_lexer_string(";", TOKEN_SEMICOLON);
test_lexer_string(",", TOKEN_COMMA);
test_lexer_string(":", TOKEN_COLON);
test_lexer_string(".", TOKEN_DOT);
test_lexer_string("...", TOKEN_ELLIPSIS);
test_lexer_string("->", TOKEN_DEREF);
test_lexer_string("?", TOKEN_COND);
test_lexer_string("(", SCC_TOK_L_PAREN);
test_lexer_string(")", SCC_TOK_R_PAREN);
test_lexer_string("[", SCC_TOK_L_BRACKET);
test_lexer_string("]", SCC_TOK_R_BRACKET);
test_lexer_string("{", SCC_TOK_L_BRACE);
test_lexer_string("}", SCC_TOK_R_BRACE);
test_lexer_string(";", SCC_TOK_SEMICOLON);
test_lexer_string(",", SCC_TOK_COMMA);
test_lexer_string(":", SCC_TOK_COLON);
test_lexer_string(".", SCC_TOK_DOT);
test_lexer_string("...", SCC_TOK_ELLIPSIS);
test_lexer_string("->", SCC_TOK_DEREF);
test_lexer_string("?", SCC_TOK_COND);
}
}
// 关键字测试
void test_keywords() {
TEST_CASE("C89 keywords");
test_lexer_string("while", TOKEN_WHILE);
test_lexer_string("sizeof", TOKEN_SIZEOF);
test_lexer_string("while", SCC_TOK_WHILE);
test_lexer_string("sizeof", SCC_TOK_SIZEOF);
TEST_CASE("C99 keywords");
test_lexer_string("restrict", TOKEN_RESTRICT);
// test_lexer_string("_Bool", TOKEN_INT); // 需确认你的类型定义
test_lexer_string("restrict", SCC_TOK_RESTRICT);
// test_lexer_string("_Bool", SCC_TOK_INT); // 需确认你的类型定义
}
// 字面量测试
@@ -98,55 +98,55 @@ void test_literals() {
TEST_CASE("Integer literals");
{
// 十进制
test_lexer_string("0", TOKEN_INT_LITERAL);
test_lexer_string("123", TOKEN_INT_LITERAL);
test_lexer_string("2147483647", TOKEN_INT_LITERAL);
test_lexer_string("0", SCC_TOK_INT_LITERAL);
test_lexer_string("123", SCC_TOK_INT_LITERAL);
test_lexer_string("2147483647", SCC_TOK_INT_LITERAL);
// 十六进制
test_lexer_string("0x0", TOKEN_INT_LITERAL);
test_lexer_string("0x1A3F", TOKEN_INT_LITERAL);
test_lexer_string("0XABCDEF", TOKEN_INT_LITERAL);
test_lexer_string("0x0", SCC_TOK_INT_LITERAL);
test_lexer_string("0x1A3F", SCC_TOK_INT_LITERAL);
test_lexer_string("0XABCDEF", SCC_TOK_INT_LITERAL);
// 八进制
test_lexer_string("0123", TOKEN_INT_LITERAL);
test_lexer_string("0777", TOKEN_INT_LITERAL);
test_lexer_string("0123", SCC_TOK_INT_LITERAL);
test_lexer_string("0777", SCC_TOK_INT_LITERAL);
// 边界值测试
test_lexer_string("2147483647", TOKEN_INT_LITERAL); // INT_MAX
test_lexer_string("4294967295", TOKEN_INT_LITERAL); // UINT_MAX
test_lexer_string("2147483647", SCC_TOK_INT_LITERAL); // INT_MAX
test_lexer_string("4294967295", SCC_TOK_INT_LITERAL); // UINT_MAX
}
TEST_CASE("Character literals");
{
test_lexer_string("'a'", TOKEN_CHAR_LITERAL);
test_lexer_string("'\\n'", TOKEN_CHAR_LITERAL);
test_lexer_string("'\\t'", TOKEN_CHAR_LITERAL);
test_lexer_string("'\\\\'", TOKEN_CHAR_LITERAL);
test_lexer_string("'\\0'", TOKEN_CHAR_LITERAL);
test_lexer_string("'a'", SCC_TOK_CHAR_LITERAL);
test_lexer_string("'\\n'", SCC_TOK_CHAR_LITERAL);
test_lexer_string("'\\t'", SCC_TOK_CHAR_LITERAL);
test_lexer_string("'\\\\'", SCC_TOK_CHAR_LITERAL);
test_lexer_string("'\\0'", SCC_TOK_CHAR_LITERAL);
}
TEST_CASE("String literals");
{
test_lexer_string("\"hello\"", TOKEN_STRING_LITERAL);
test_lexer_string("\"multi-line\\nstring\"", TOKEN_STRING_LITERAL);
test_lexer_string("\"escape\\\"quote\"", TOKEN_STRING_LITERAL);
test_lexer_string("\"hello\"", SCC_TOK_STRING_LITERAL);
test_lexer_string("\"multi-line\\nstring\"", SCC_TOK_STRING_LITERAL);
test_lexer_string("\"escape\\\"quote\"", SCC_TOK_STRING_LITERAL);
}
// TEST_CASE("Floating literals");
// test_lexer_string("3.14e-5", TOKEN_FLOAT_LITERAL);
// test_lexer_string("3.14e-5", SCC_TOK_FLOAT_LITERAL);
}
// 边界测试
void test_edge_cases() {
// TEST_CASE("Long identifiers");
// char long_id[LEXER_MAX_TOKEN_SIZE+2] = {0};
// memset(long_id, 'a', LEXER_MAX_TOKEN_SIZE+1);
// test_lexer_string(long_id, TOKEN_IDENT);
// char long_id[LEXER_MAX_ SCC_TOK_SIZE+2] = {0};
// memset(long_id, 'a', LEXER_MAX_ SCC_TOK_SIZE+1);
// test_lexer_string(long_id, SCC_TOK_IDENT);
// TEST_CASE("Buffer boundary");
// char boundary[LEXER_BUFFER_SIZE*2] = {0};
// memset(boundary, '+', LEXER_BUFFER_SIZE*2-1);
// test_lexer_string(boundary, TOKEN_ADD);
// test_lexer_string(boundary, SCC_TOK_ADD);
}
// 错误处理测试
@@ -158,7 +158,7 @@ void test_edge_cases() {
// init_lexer(&lexer, "test.c", NULL, test_read);
// get_valid_token(&lexer, &token);
// TEST_CHECK(token.type == TOKEN_EOF); // 应触发错误处理
// TEST_CHECK(token.type == SCC_TOK_EOF); // 应触发错误处理
// }
// 测试列表

View File

@@ -62,27 +62,28 @@ int main(int argc, char *argv[]) {
return 1;
}
smcc_lexer_t lexer;
core_mem_probe_stream_t mem_stream = {0};
core_probe_stream_t *stream =
core_mem_probe_stream_init(&mem_stream, buffer, fsize, false);
scc_lexer_t lexer;
scc_mem_probe_stream_t mem_stream = {0};
scc_probe_stream_t *stream =
scc_mem_probe_stream_init(&mem_stream, buffer, fsize, false);
Assert(stream != null);
cstring_clear(&stream->name);
cstring_append_cstr(&stream->name, file_name, strlen(file_name));
lexer_init(&lexer, stream);
scc_cstring_clear(&stream->name);
scc_cstring_append_cstr(&stream->name, file_name, strlen(file_name));
scc_lexer_init(&lexer, stream);
lexer_tok_t tok;
while (1) {
lexer_get_valid_token(&lexer, &tok);
if (tok.type == TOKEN_EOF) {
scc_lexer_get_valid_token(&lexer, &tok);
if (tok.type == SCC_TOK_EOF) {
break;
}
LOG_DEBUG("token `%s` at %s:%u:%u", get_tok_name(tok.type),
cstring_as_cstr(&tok.loc.name), tok.loc.line, tok.loc.col);
LOG_DEBUG("token `%s` at %s:%u:%u", scc_get_tok_name(tok.type),
scc_cstring_as_cstr(&tok.loc.name), tok.loc.line,
tok.loc.col);
Assert(tok.loc.offset <= fsize);
// LOG_DEBUG("%s", tok.val.str);
// printf("line: %d, column: %d, type: %3d, typename: %s\n",
// lexer.line, lexer.index, tok.type, get_tok_name(tok.type));
// lexer.line, lexer.index, tok.type, scc_get_tok_name(tok.type));
}
free(buffer);