refactor(lex_parser): 移除旧的词法解析器实现并更新依赖

移除了 libs/lex_parser 目录下的所有头文件和源文件，包括： - lex_parser.h 和 lex_parser.c 核心解析功能 - 所有测试文件（test_char.c, test_identifier.c, test_number.c, test_skip_block_comment.c, test_skip_line.c, test_string.c）更新了 lexer 模块的依赖配置，将 lex_parser 替换为 sstream，同时更新了 lexer.h 中的相关包含头文件和数据结构定义，简化了 scc_lexer_t 结构体的字段。
2026-02-16 16:56:40 +08:00
parent 088050c903
commit 0e7dec202a
30 changed files with 1840 additions and 1979 deletions
--- a/libs/lexer/cbuild.toml
+++ b/libs/lexer/cbuild.toml
@@ -4,5 +4,5 @@ version = "0.1.0"

 dependencies = [
    { name = "scc_core", path = "../../runtime/scc_core" },
-    { name = "lex_parser", path = "../lex_parser" },
+    { name = "lex_parser", path = "../sstream" },
 ]
--- a/libs/lexer/include/lexer.h
+++ b/libs/lexer/include/lexer.h
@@ -8,6 +8,7 @@

 #include "lexer_token.h"
 #include <scc_core.h>
+#include <scc_sstream.h>

 /**
 * @brief 词法分析器核心结构体
@@ -15,16 +16,11 @@
 * 封装词法分析所需的状态信息和缓冲区管理
 */
 typedef struct scc_lexer {
-    scc_probe_stream_t *stream;
-    scc_pos_t pos;
+    scc_sstream_ring_t stream_ref;
+    int jump_macro;
 } scc_lexer_t;

-/**
- * @brief 初始化词法分析器
- * @param[out] lexer 要初始化的词法分析器实例
- * @param[in] stream 输入流对象指针
- */
-void scc_lexer_init(scc_lexer_t *lexer, scc_probe_stream_t *stream);
+void scc_lexer_init(scc_lexer_t *lexer, scc_sstream_ring_t *stream_ref);

 /**
 * @brief 获取原始token
--- a/libs/lexer/include/lexer_token.h
+++ b/libs/lexer/include/lexer_token.h
@@ -2,6 +2,7 @@
 #define __SCC_LEXER_TOKEN_H__

 #include <scc_core.h>
+#include <scc_pos.h>

 typedef enum scc_cstd {
    SCC_CSTD_C89,
@@ -54,64 +55,64 @@ typedef enum scc_cstd {
    // KEYWORD_TABLE

 #define SCC_CTOK_TABLE \
-    X(unknown        , SCC_TOK_SUBTYPE_INVALID,        SCC_TOK_UNKNOWN           ) \
-    X(EOF            , SCC_TOK_SUBTYPE_EOF,            SCC_TOK_EOF               ) \
-    X(blank          , SCC_TOK_SUBTYPE_EMPTYSPACE,     SCC_TOK_BLANK             ) \
-    X(endline        , SCC_TOK_SUBTYPE_EMPTYSPACE,     SCC_TOK_ENDLINE           ) \
-    X("#"            , SCC_TOK_SUBTYPE_EMPTYSPACE,     SCC_TOK_SHARP             ) \
-    X("=="           , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_EQ                ) \
-    X("="            , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_ASSIGN            ) \
-    X("++"           , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_ADD_ADD           ) \
-    X("+="           , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_ASSIGN_ADD        ) \
-    X("+"            , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_ADD               ) \
-    X("--"           , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_SUB_SUB           ) \
-    X("-="           , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_ASSIGN_SUB        ) \
-    X("->"           , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_DEREF             ) \
-    X("-"            , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_SUB               ) \
-    X("*="           , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_ASSIGN_MUL        ) \
-    X("*"            , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_MUL               ) \
-    X("/="           , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_ASSIGN_DIV        ) \
-    X("/"            , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_DIV               ) \
-    X("//"           , SCC_TOK_SUBTYPE_COMMENT ,       SCC_TOK_LINE_COMMENT      ) \
-    X("/* */"        , SCC_TOK_SUBTYPE_COMMENT ,       SCC_TOK_BLOCK_COMMENT     ) \
-    X("%="           , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_ASSIGN_MOD        ) \
-    X("%"            , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_MOD               ) \
-    X("&&"           , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_AND_AND           ) \
-    X("&="           , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_ASSIGN_AND        ) \
-    X("&"            , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_AND               ) \
-    X("||"           , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_OR_OR             ) \
-    X("|="           , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_ASSIGN_OR         ) \
-    X("|"            , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_OR                ) \
-    X("^="           , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_ASSIGN_XOR        ) \
-    X("^"            , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_XOR               ) \
-    X("<<="          , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_ASSIGN_L_SH       ) \
-    X("<<"           , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_L_SH              ) \
-    X("<="           , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_LE                ) \
-    X("<"            , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_LT                ) \
-    X(">>="          , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_ASSIGN_R_SH       ) \
-    X(">>"           , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_R_SH              ) \
-    X(">="           , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_GE                ) \
-    X(">"            , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_GT                ) \
-    X("!"            , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_NOT               ) \
-    X("!="           , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_NEQ               ) \
-    X("~"            , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_BIT_NOT           ) \
-    X("["            , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_L_BRACKET         ) \
-    X("]"            , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_R_BRACKET         ) \
-    X("("            , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_L_PAREN           ) \
-    X(")"            , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_R_PAREN           ) \
-    X("{"            , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_L_BRACE           ) \
-    X("}"            , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_R_BRACE           ) \
-    X(";"            , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_SEMICOLON         ) \
-    X(","            , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_COMMA             ) \
-    X(":"            , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_COLON             ) \
-    X("."            , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_DOT               ) \
-    X("..."          , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_ELLIPSIS          ) \
-    X("?"            , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_COND              ) \
-    X(ident          , SCC_TOK_SUBTYPE_IDENTIFIER,     SCC_TOK_IDENT             ) \
-    X(int_literal    , SCC_TOK_SUBTYPE_LITERAL,        SCC_TOK_INT_LITERAL       ) \
-    X(float_literal  , SCC_TOK_SUBTYPE_LITERAL,        SCC_TOK_FLOAT_LITERAL     ) \
-    X(char_literal   , SCC_TOK_SUBTYPE_LITERAL,        SCC_TOK_CHAR_LITERAL      ) \
-    X(string_literal , SCC_TOK_SUBTYPE_LITERAL,        SCC_TOK_STRING_LITERAL    ) \
+    X(unknown   , SCC_TOK_SUBTYPE_INVALID,        SCC_TOK_UNKNOWN           ) \
+    X(EOF       , SCC_TOK_SUBTYPE_EOF,            SCC_TOK_EOF               ) \
+    X(blank     , SCC_TOK_SUBTYPE_EMPTYSPACE,     SCC_TOK_BLANK             ) \
+    X(endline   , SCC_TOK_SUBTYPE_EMPTYSPACE,     SCC_TOK_ENDLINE           ) \
+    X("#"       , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_SHARP             ) \
+    X("=="      , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_EQ                ) \
+    X("="       , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_ASSIGN            ) \
+    X("++"      , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_ADD_ADD           ) \
+    X("+="      , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_ASSIGN_ADD        ) \
+    X("+"       , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_ADD               ) \
+    X("--"      , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_SUB_SUB           ) \
+    X("-="      , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_ASSIGN_SUB        ) \
+    X("->"      , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_DEREF             ) \
+    X("-"       , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_SUB               ) \
+    X("*="      , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_ASSIGN_MUL        ) \
+    X("*"       , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_MUL               ) \
+    X("/="      , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_ASSIGN_DIV        ) \
+    X("/"       , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_DIV               ) \
+    X("//"      , SCC_TOK_SUBTYPE_COMMENT ,       SCC_TOK_LINE_COMMENT      ) \
+    X("/* */"   , SCC_TOK_SUBTYPE_COMMENT ,       SCC_TOK_BLOCK_COMMENT     ) \
+    X("%="      , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_ASSIGN_MOD        ) \
+    X("%"       , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_MOD               ) \
+    X("&&"      , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_AND_AND           ) \
+    X("&="      , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_ASSIGN_AND        ) \
+    X("&"       , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_AND               ) \
+    X("||"      , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_OR_OR             ) \
+    X("|="      , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_ASSIGN_OR         ) \
+    X("|"       , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_OR                ) \
+    X("^="      , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_ASSIGN_XOR        ) \
+    X("^"       , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_XOR               ) \
+    X("<<="     , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_ASSIGN_L_SH       ) \
+    X("<<"      , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_L_SH              ) \
+    X("<="      , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_LE                ) \
+    X("<"       , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_LT                ) \
+    X(">>="     , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_ASSIGN_R_SH       ) \
+    X(">>"      , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_R_SH              ) \
+    X(">="      , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_GE                ) \
+    X(">"       , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_GT                ) \
+    X("!"       , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_NOT               ) \
+    X("!="      , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_NEQ               ) \
+    X("~"       , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_BIT_NOT           ) \
+    X("["       , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_L_BRACKET         ) \
+    X("]"       , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_R_BRACKET         ) \
+    X("("       , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_L_PAREN           ) \
+    X(")"       , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_R_PAREN           ) \
+    X("{"       , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_L_BRACE           ) \
+    X("}"       , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_R_BRACE           ) \
+    X(";"       , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_SEMICOLON         ) \
+    X(","       , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_COMMA             ) \
+    X(":"       , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_COLON             ) \
+    X("."       , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_DOT               ) \
+    X("..."     , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_ELLIPSIS          ) \
+    X("?"       , SCC_TOK_SUBTYPE_OPERATOR,       SCC_TOK_COND              ) \
+    X(ident     , SCC_TOK_SUBTYPE_IDENTIFIER,     SCC_TOK_IDENT             ) \
+    X(int       , SCC_TOK_SUBTYPE_LITERAL,        SCC_TOK_INT_LITERAL       ) \
+    X(float     , SCC_TOK_SUBTYPE_LITERAL,        SCC_TOK_FLOAT_LITERAL     ) \
+    X(char      , SCC_TOK_SUBTYPE_LITERAL,        SCC_TOK_CHAR_LITERAL      ) \
+    X(string    , SCC_TOK_SUBTYPE_LITERAL,        SCC_TOK_STRING_LITERAL    ) \
    // END
 /* clang-format on */

@@ -145,7 +146,7 @@ const char *scc_get_tok_name(scc_tok_type_t type);

 typedef struct scc_lexer_token {
    scc_tok_type_t type;
-    scc_cvalue_t value;
+    scc_cstring_t lexeme;
    scc_pos_t loc;
 } scc_lexer_tok_t;

--- a/libs/lexer/src/lexer.c
+++ b/libs/lexer/src/lexer.c
@@ -1,4 +1,3 @@
-#include <lex_parser.h>
 #include <lexer.h>
 #include <lexer_log.h>

@@ -13,442 +12,460 @@ static const struct {
 };

 // by using binary search to find the keyword
-static inline int keyword_cmp(const char *name, int len) {
+static int keyword_cmp(const char *name, int len) {
    int low = 0;
    int high = sizeof(keywords) / sizeof(keywords[0]) - 1;
    while (low <= high) {
        int mid = (low + high) / 2;
        const char *key = keywords[mid].name;
        int cmp = 0;
-
-        // 自定义字符串比较逻辑
        for (int i = 0; i < len; i++) {
            if (name[i] != key[i]) {
                cmp = (unsigned char)name[i] - (unsigned char)key[i];
                break;
            }
            if (name[i] == '\0')
-                break; // 遇到终止符提前结束
+                break;
        }
-
        if (cmp == 0) {
-            // 完全匹配检查（长度相同）
            if (key[len] == '\0')
                return mid;
-            cmp = -1; // 当前关键词比输入长
+            cmp = -1;
        }
-
-        if (cmp < 0) {
+        if (cmp < 0)
            high = mid - 1;
-        } else {
+        else
            low = mid + 1;
-        }
    }
-    return -1; // Not a keyword.
+    return -1; // 不是关键字
 }

-void scc_lexer_init(scc_lexer_t *lexer, scc_probe_stream_t *stream) {
-    lexer->stream = stream;
-    lexer->pos = scc_pos_create();
-    // FIXME
-    lexer->pos.name = scc_cstring_copy(&stream->name);
+void scc_lexer_init(scc_lexer_t *lexer, scc_sstream_ring_t *stream_ref) {
+    lexer->stream_ref = *stream_ref;
+    lexer->jump_macro = false;
+}
+
+static inline cbool is_whitespace(int ch) {
+    return ch == ' ' || ch == '\t' || ch == '\v' || ch == '\f';
+}
+static inline cbool is_newline(int ch) { return ch == '\n' || ch == '\r'; }
+static inline cbool is_digit(int ch) { return ch >= '0' && ch <= '9'; }
+static inline cbool is_alpha(int ch) {
+    return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z');
+}
+static inline cbool is_alnum(int ch) { return is_alpha(ch) || is_digit(ch); }
+static inline cbool is_identifier_start(int ch) {
+    return is_alpha(ch) || ch == '_';
+}
+static inline cbool is_identifier_part(int ch) {
+    return is_alnum(ch) || ch == '_';
+}
+static inline cbool is_octal_digit(int ch) { return ch >= '0' && ch <= '7'; }
+static inline cbool is_hex_digit(int ch) {
+    return is_digit(ch) || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F');
+}
+
+/* 从环形缓冲区预览一个字符（带EOF检测） */
+static inline cbool peek_char(scc_lexer_t *lexer, scc_sstream_char_t *out) {
+    cbool ok;
+    scc_ring_peek(lexer->stream_ref, *out, ok);
+    return ok;
+}
+
+/* 从环形缓冲区消费一个字符，并将它追加到lexeme中 */
+static inline cbool next_char(scc_lexer_t *lexer, scc_cstring_t *lexeme,
+                              scc_sstream_char_t *out) {
+    cbool ok;
+    scc_ring_next(lexer->stream_ref, *out, ok);
+    if (!ok)
+        return false;
+    scc_cstring_append_ch(lexeme, out->character);
+    return true;
 }

 #define set_err_token(token) ((token)->type = SCC_TOK_UNKNOWN)

-static void parse_line(scc_lexer_t *lexer, scc_lexer_tok_t *token) {
-    token->loc = lexer->pos;
-    scc_probe_stream_t *stream = lexer->stream;
-    scc_probe_stream_reset(stream);
-    int ch = scc_probe_stream_next(stream);
-
-    usize n;
-    scc_cstring_t str = scc_cstring_create();
-
-    if (ch == scc_stream_eof) {
-        LEX_WARN("Unexpected EOF at begin");
-        goto ERR;
-    } else if (ch != '#') {
-        LEX_WARN("Unexpected character '%c' at begin", ch);
-        goto ERR;
-    }
-
-    const char line[] = "line";
-
-    for (int i = 0; i < (int)sizeof(line); i++) {
-        ch = scc_probe_stream_consume(stream);
-        scc_pos_next(&lexer->pos);
-        if (ch != line[i]) {
-            LEX_WARN("Maroc does not support in lexer rather in preprocessor, "
-                     "it will be ignored");
-            goto SKIP_LINE;
-        }
-    }
-
-    if (scc_lex_parse_number(stream, &lexer->pos, &n) == false) {
-        LEX_ERROR("Invalid line number");
-        goto SKIP_LINE;
-    }
-
-    if (scc_probe_stream_consume(stream) != ' ') {
-        scc_lex_parse_skip_line(stream, &lexer->pos);
-        token->loc.line = token->value.u;
-    }
-
-    if (scc_probe_stream_next(stream) != '"') {
-        LEX_ERROR("Invalid `#` line");
-        goto SKIP_LINE;
-    }
-    if (scc_lex_parse_string(stream, &lexer->pos, &str) == false) {
-        LEX_ERROR("Invalid filename");
-        goto SKIP_LINE;
-    }
-
-    scc_lex_parse_skip_line(stream, &lexer->pos);
-    scc_probe_stream_sync(stream);
-    token->loc.line = n;
-    // FIXME memory leak
-    token->loc.name = scc_cstring_copy(&str);
-    scc_cstring_free(&str);
-    return;
-SKIP_LINE:
-    scc_lex_parse_skip_line(stream, &lexer->pos);
-    scc_probe_stream_sync(stream);
-ERR:
-    set_err_token(token);
-    scc_cstring_free(&str);
-}
-
-// /zh/c/language/operator_arithmetic.html
 void scc_lexer_get_token(scc_lexer_t *lexer, scc_lexer_tok_t *token) {
-    token->loc = lexer->pos;
-    token->type = SCC_TOK_UNKNOWN;
-    scc_probe_stream_t *stream = lexer->stream;
+    scc_sstream_char_t cur;
+    scc_cstring_t lex = scc_cstring_create(); // 临时lexeme

-    scc_probe_stream_reset(stream);
-    scc_tok_type_t type = SCC_TOK_UNKNOWN;
-    int ch = scc_probe_stream_next(stream);
+    // 尝试预览第一个字符
+    if (!peek_char(lexer, &cur)) {
+        token->type = SCC_TOK_EOF;
+        token->loc = (scc_pos_t){0, 1, 1, 0}; // 默认位置
+        token->lexeme = lex;                  // 空字符串
+        return;
+    }
+
+    // 记录起始位置
+    scc_pos_t start_loc = cur.pos;
+    int ch = cur.character;

    // once step
-    switch (ch) {
-    case '=':
-        switch (scc_probe_stream_next(stream)) {
-        case '=':
-            type = SCC_TOK_EQ;
-            goto double_char;
-        default:
-            scc_probe_stream_reset(stream), type = SCC_TOK_ASSIGN;
-            break;
+    if (is_whitespace(ch)) {
+        // 空白符: 连续收集
+        token->type = SCC_TOK_BLANK;
+        while (peek_char(lexer, &cur) && is_whitespace(cur.character)) {
+            next_char(lexer, &lex, &cur);
        }
-        break;
-    case '+':
-        switch (scc_probe_stream_next(stream)) {
-        case '+':
-            type = SCC_TOK_ADD_ADD;
-            goto double_char;
-        case '=':
-            type = SCC_TOK_ASSIGN_ADD;
-            goto double_char;
-        default:
-            scc_probe_stream_reset(stream), type = SCC_TOK_ADD;
-            break;
-        }
-        break;
-    case '-':
-        switch (scc_probe_stream_next(stream)) {
-        case '-':
-            type = SCC_TOK_SUB_SUB;
-            goto double_char;
-        case '=':
-            type = SCC_TOK_ASSIGN_SUB;
-            goto double_char;
-        case '>':
-            type = SCC_TOK_DEREF;
-            goto double_char;
-        default:
-            scc_probe_stream_reset(stream), type = SCC_TOK_SUB;
-            break;
-        }
-        break;
-    case '*':
-        switch (scc_probe_stream_next(stream)) {
-        case '=':
-            type = SCC_TOK_ASSIGN_MUL;
-            goto double_char;
-        default:
-            scc_probe_stream_reset(stream), type = SCC_TOK_MUL;
-            break;
-        }
-        break;
-    case '/':
-        switch (scc_probe_stream_next(stream)) {
-        case '=':
-            type = SCC_TOK_ASSIGN_DIV;
-            goto double_char;
-        case '/':
-            scc_probe_stream_reset(stream);
-            scc_lex_parse_skip_line(stream, &lexer->pos);
-            scc_probe_stream_sync(stream);
-            token->type = SCC_TOK_LINE_COMMENT;
-            goto END;
-        case '*':
-            scc_probe_stream_reset(stream);
-            scc_lex_parse_skip_block_comment(stream, &lexer->pos);
-            scc_probe_stream_sync(stream);
-            token->type = SCC_TOK_BLOCK_COMMENT;
-            goto END;
-        default:
-            scc_probe_stream_reset(stream), type = SCC_TOK_DIV;
-            break;
-        }
-        break;
-    case '%':
-        switch (scc_probe_stream_next(stream)) {
-        case '=':
-            type = SCC_TOK_ASSIGN_MOD;
-            goto double_char;
-        default:
-            scc_probe_stream_reset(stream), type = SCC_TOK_MOD;
-            break;
-        }
-        break;
-    case '&':
-        switch (scc_probe_stream_next(stream)) {
-        case '&':
-            type = SCC_TOK_AND_AND;
-            goto double_char;
-        case '=':
-            type = SCC_TOK_ASSIGN_AND;
-            goto double_char;
-        default:
-            scc_probe_stream_reset(stream), type = SCC_TOK_AND;
-            break;
-        }
-        break;
-    case '|':
-        switch (scc_probe_stream_next(stream)) {
-        case '|':
-            type = SCC_TOK_OR_OR;
-            goto double_char;
-        case '=':
-            type = SCC_TOK_ASSIGN_OR;
-            goto double_char;
-        default:
-            scc_probe_stream_reset(stream), type = SCC_TOK_OR;
-            break;
-        }
-        break;
-    case '^':
-        switch (scc_probe_stream_next(stream)) {
-        case '=':
-            type = SCC_TOK_ASSIGN_XOR;
-            goto double_char;
-        default:
-            scc_probe_stream_reset(stream), type = SCC_TOK_XOR;
-            break;
-        }
-        break;
-    case '<':
-        switch (scc_probe_stream_next(stream)) {
-        case '=':
-            type = SCC_TOK_LE;
-            goto double_char;
-        case '<': {
-            if (scc_probe_stream_next(stream) == '=') {
-                type = SCC_TOK_ASSIGN_L_SH;
-                goto triple_char;
-            } else {
-                type = SCC_TOK_L_SH;
-                goto double_char;
-            }
-            break;
-        }
-        default:
-            scc_probe_stream_reset(stream), type = SCC_TOK_LT;
-            break;
-        }
-        break;
-    case '>':
-        switch (scc_probe_stream_next(stream)) {
-        case '=':
-            type = SCC_TOK_GE;
-            goto double_char;
-        case '>': {
-            if (scc_probe_stream_next(stream) == '=') {
-                type = SCC_TOK_ASSIGN_R_SH;
-                goto triple_char;
-            } else {
-                type = SCC_TOK_R_SH;
-                goto double_char;
-            }
-            break;
-        }
-        default:
-            scc_probe_stream_reset(stream), type = SCC_TOK_GT;
-            break;
-        }
-        break;
-    case '~':
-        type = SCC_TOK_BIT_NOT;
-        break;
-    case '!':
-        switch (scc_probe_stream_next(stream)) {
-        case '=':
-            type = SCC_TOK_NEQ;
-            goto double_char;
-        default:
-            scc_probe_stream_reset(stream), type = SCC_TOK_NOT;
-            break;
-        }
-        break;
-        /* clang-format off */
-    case '[': type = SCC_TOK_L_BRACKET; break;
-    case ']': type = SCC_TOK_R_BRACKET; break;
-    case '(': type = SCC_TOK_L_PAREN; break;
-    case ')': type = SCC_TOK_R_PAREN; break;
-    case '{': type = SCC_TOK_L_BRACE; break;
-    case '}': type = SCC_TOK_R_BRACE; break;
-    case ';': type = SCC_TOK_SEMICOLON; break;
-    case ',': type = SCC_TOK_COMMA; break;
-    case ':': type = SCC_TOK_COLON; break;
-        /* clang-format on */
-    case '.':
-        if (scc_probe_stream_next(stream) == '.' &&
-            scc_probe_stream_next(stream) == '.') {
-            type = SCC_TOK_ELLIPSIS;
-            goto triple_char;
-        }
-        type = SCC_TOK_DOT;
-        break;
-    case '?':
-        type = SCC_TOK_COND;
-        break;
-    case '\v':
-    case '\f':
-    case ' ':
-    case '\t':
-        type = SCC_TOK_BLANK;
-        break;
-    case '\r':
-    case '\n':
-        scc_probe_stream_back(stream);
-        scc_lex_parse_skip_endline(stream, &lexer->pos);
-        scc_probe_stream_sync(stream);
+    } else if (is_newline(ch)) {
+        // 换行符：处理 \r 或 \n，以及 \r\n 组合
        token->type = SCC_TOK_ENDLINE;
-        goto END;
-    case '#':
-        parse_line(lexer, token);
-        token->type = SCC_TOK_SHARP;
-        goto END;
-    case '\0':
-    case scc_stream_eof:
-        // EOF
-        type = SCC_TOK_EOF;
-        break;
-    case '\'': {
-        token->loc = lexer->pos;
+        next_char(lexer, &lex, &cur); // 消费第一个字符
+        if (ch == '\r') {
+            // 尝试消费后面的 \n
+            if (peek_char(lexer, &cur) && cur.character == '\n') {
+                next_char(lexer, &lex, &cur);
+            }
+        }
+    } else if (ch == '/') {
+        // 可能为注释或除号
+        scc_sstream_char_t next = {0};
+        next_char(lexer, &lex, &cur); // 消费 '/'
+        peek_char(lexer, &next);
+        if (next.character == '=') {
+            token->type = SCC_TOK_ASSIGN_DIV;
+            next_char(lexer, &lex, &cur);
+        } else if (next.character == '/') {
+            // 行注释 //
+            token->type = SCC_TOK_LINE_COMMENT;
+            next_char(lexer, &lex, &cur); // 消费 '/'
+            while (peek_char(lexer, &cur) && !is_newline(cur.character)) {
+                next_char(lexer, &lex, &cur);
+                scc_ring_consume(lexer->stream_ref);
+            }
+            // 注释结束，不包含换行符（换行符单独成token）
+        } else if (next.character == '*') {
+            // 块注释 /*
+            token->type = SCC_TOK_BLOCK_COMMENT;
+            next_char(lexer, &lex, &cur); // 消费 '*'
+            while (1) {
+                if (!next_char(lexer, &lex, &cur)) {
+                    // 文件结束，注释未闭合
+                    LOG_ERROR("Unterminated block comment");
+                    break;
+                }
+                if (cur.character == '*' && peek_char(lexer, &next) &&
+                    next.character == '/') {
+                    next_char(lexer, &lex, &cur); // 消费 '/'
+                    break;
+                }
+                scc_ring_consume(lexer->stream_ref);
+            }
+        } else {
+            // 只是除号 /
+            token->type = SCC_TOK_DIV;
+        }
+    } else if (is_identifier_start(ch)) {
+        // 标识符或关键字
+        token->type = SCC_TOK_IDENT; // 暂定
+        while (peek_char(lexer, &cur) && is_identifier_part(cur.character)) {
+            next_char(lexer, &lex, &cur);
+            scc_ring_consume(lexer->stream_ref);
+        }
+        // 检查是否为关键字
+        int idx = keyword_cmp(scc_cstring_as_cstr(&lex), scc_cstring_len(&lex));
+        if (idx != -1) {
+            token->type = keywords[idx].tok;
+        }
+    } else if (is_digit(ch)) {
+        // 数字字面量（整数/浮点）
+        token->type = SCC_TOK_INT_LITERAL; // 先假定整数
+        cbool maybe_float = false;
+        while (1) {
+            next_char(lexer, &lex, &cur); // 消费当前数字
+            if (!peek_char(lexer, &cur))
+                break;
+            ch = cur.character;
+            if (is_digit(ch) || (ch == '.' && !maybe_float)) {
+                if (ch == '.')
+                    maybe_float = true;
+                continue;
+            }
+            if (ch == 'e' || ch == 'E' || ch == 'p' || ch == 'P') {
+                maybe_float = true;
+                // 后面可能跟符号或数字
+                continue;
+            }
+            if (ch == 'x' || ch == 'X') {
+                // 十六进制前缀，需特殊处理
+                // 这里简化：将整个序列作为整数（保留前缀）
+                continue;
+            }
+            break;
+        }
+        if (maybe_float)
+            token->type = SCC_TOK_FLOAT_LITERAL;
+    } else if (ch == '\'') {
+        // 字符字面量
        token->type = SCC_TOK_CHAR_LITERAL;
-        scc_probe_stream_reset(stream);
-        int ch = scc_lex_parse_char(stream, &lexer->pos);
-        scc_probe_stream_sync(stream);
-        if (ch == scc_stream_eof) {
-            LEX_ERROR("Unexpected character literal");
-            token->type = SCC_TOK_UNKNOWN;
-        } else {
-            token->value.ch = ch;
+        next_char(lexer, &lex, &cur); // 开头的 '
+        while (1) {
+            if (!peek_char(lexer, &cur)) {
+                LOG_ERROR("Unterminated character literal");
+                break;
+            }
+            if (cur.character == '\'') {
+                next_char(lexer, &lex, &cur); // 闭引号
+                break;
+            }
+            if (cur.character == '\\') {
+                // 转义序列：原样保存反斜杠和下一个字符
+                next_char(lexer, &lex, &cur);
+                if (!peek_char(lexer, &cur))
+                    break;
+                next_char(lexer, &lex, &cur);
+            } else {
+                next_char(lexer, &lex, &cur);
+            }
        }
-        goto END;
-    }
-    case '"': {
-        token->loc = lexer->pos;
+    } else if (ch == '"') {
+        // 字符串字面量
        token->type = SCC_TOK_STRING_LITERAL;
-        scc_cstring_t output = scc_cstring_create();
-        scc_probe_stream_reset(stream);
-        if (scc_lex_parse_string(stream, &lexer->pos, &output) == true) {
-            scc_probe_stream_sync(stream);
-            token->value.cstr.data = scc_cstring_as_cstr(&output);
-            token->value.cstr.len = scc_cstring_len(&output);
-        } else {
-            LEX_ERROR("Unexpected string literal");
+        next_char(lexer, &lex, &cur); // 开头的 "
+        while (1) {
+            if (!peek_char(lexer, &cur)) {
+                LOG_ERROR("Unterminated string literal");
+                break;
+            }
+            if (cur.character == '"') {
+                next_char(lexer, &lex, &cur); // 闭引号
+                break;
+            }
+            if (cur.character == '\\') {
+                // 转义序列
+                next_char(lexer, &lex, &cur);
+                if (!peek_char(lexer, &cur))
+                    break;
+                next_char(lexer, &lex, &cur);
+            } else {
+                next_char(lexer, &lex, &cur);
+            }
+            scc_ring_consume(lexer->stream_ref);
+        }
+    } else {
+        scc_sstream_char_t next = {0};
+        next_char(lexer, &lex, &cur);
+        peek_char(lexer, &next);
+        switch (ch) {
+        case '=':
+            switch (next.character) {
+            case '=':
+                token->type = SCC_TOK_EQ;
+                next_char(lexer, &lex, &cur);
+                break;
+            default:
+                token->type = SCC_TOK_ASSIGN;
+                break;
+            }
+            break;
+        case '+':
+            switch (next.character) {
+            case '+':
+                token->type = SCC_TOK_ADD_ADD;
+                next_char(lexer, &lex, &cur);
+                break;
+            case '=':
+                token->type = SCC_TOK_ASSIGN_ADD;
+                next_char(lexer, &lex, &cur);
+                break;
+            default:
+                token->type = SCC_TOK_ADD;
+                break;
+            }
+            break;
+        case '-':
+            switch (next.character) {
+            case '-':
+                token->type = SCC_TOK_SUB_SUB;
+                next_char(lexer, &lex, &cur);
+                break;
+            case '=':
+                token->type = SCC_TOK_ASSIGN_SUB;
+                next_char(lexer, &lex, &cur);
+                break;
+            case '>':
+                token->type = SCC_TOK_DEREF;
+                next_char(lexer, &lex, &cur);
+                break;
+            default:
+                token->type = SCC_TOK_SUB;
+                break;
+            }
+            break;
+        case '*':
+            switch (next.character) {
+            case '=':
+                token->type = SCC_TOK_ASSIGN_MUL;
+                next_char(lexer, &lex, &cur);
+                break;
+            default:
+                token->type = SCC_TOK_MUL;
+                break;
+            }
+            break;
+        case '%':
+            switch (next.character) {
+            case '=':
+                token->type = SCC_TOK_ASSIGN_MOD;
+                next_char(lexer, &lex, &cur);
+                break;
+            default:
+                token->type = SCC_TOK_MOD;
+                break;
+            }
+            break;
+        case '&':
+            switch (next.character) {
+            case '&':
+                token->type = SCC_TOK_AND_AND;
+                next_char(lexer, &lex, &cur);
+                break;
+            case '=':
+                token->type = SCC_TOK_ASSIGN_AND;
+                next_char(lexer, &lex, &cur);
+                break;
+            default:
+                token->type = SCC_TOK_AND;
+                break;
+            }
+            break;
+        case '|':
+            switch (next.character) {
+            case '|':
+                token->type = SCC_TOK_OR_OR;
+                next_char(lexer, &lex, &cur);
+                break;
+            case '=':
+                token->type = SCC_TOK_ASSIGN_OR;
+                next_char(lexer, &lex, &cur);
+                break;
+            default:
+                token->type = SCC_TOK_OR;
+                break;
+            }
+            break;
+        case '^':
+            switch (next.character) {
+            case '=':
+                token->type = SCC_TOK_ASSIGN_XOR;
+                next_char(lexer, &lex, &cur);
+                break;
+            default:
+                token->type = SCC_TOK_XOR;
+                break;
+            }
+            break;
+        case '<':
+            switch (next.character) {
+            case '=':
+                token->type = SCC_TOK_LE;
+                next_char(lexer, &lex, &cur);
+                break;
+            case '<': {
+                next_char(lexer, &lex, &cur);
+                if (peek_char(lexer, &next) && next.character == '=') {
+                    token->type = SCC_TOK_ASSIGN_L_SH;
+                    next_char(lexer, &lex, &cur);
+                } else {
+                    token->type = SCC_TOK_L_SH;
+                }
+                break;
+            }
+            default:
+                token->type = SCC_TOK_LT;
+                break;
+            }
+            break;
+        case '>':
+            switch (next.character) {
+            case '=':
+                token->type = SCC_TOK_GE;
+                next_char(lexer, &lex, &cur);
+                break;
+            case '>': {
+                next_char(lexer, &lex, &cur);
+                if (peek_char(lexer, &next) && next.character == '=') {
+                    token->type = SCC_TOK_ASSIGN_R_SH;
+                    next_char(lexer, &lex, &cur);
+                } else {
+                    token->type = SCC_TOK_R_SH;
+                }
+                break;
+            }
+            default:
+                token->type = SCC_TOK_GT;
+                break;
+            }
+            break;
+        case '~':
+            token->type = SCC_TOK_BIT_NOT;
+            break;
+        case '!':
+            switch (next.character) {
+            case '=':
+                token->type = SCC_TOK_NEQ;
+                next_char(lexer, &lex, &cur);
+                break;
+            default:
+                token->type = SCC_TOK_NOT;
+                break;
+            }
+            break;
+            /* clang-format off */
+        case '[': token->type = SCC_TOK_L_BRACKET; break;
+        case ']': token->type = SCC_TOK_R_BRACKET; break;
+        case '(': token->type = SCC_TOK_L_PAREN; break;
+        case ')': token->type = SCC_TOK_R_PAREN; break;
+        case '{': token->type = SCC_TOK_L_BRACE; break;
+        case '}': token->type = SCC_TOK_R_BRACE; break;
+        case ';': token->type = SCC_TOK_SEMICOLON; break;
+        case ',': token->type = SCC_TOK_COMMA; break;
+        case ':': token->type = SCC_TOK_COLON; break;
+            /* clang-format on */
+        case '.':
+            if (next.character == '.' && peek_char(lexer, &next) &&
+                next.character == '.') {
+                token->type = SCC_TOK_ELLIPSIS;
+                next_char(lexer, &lex, &cur);
+                next_char(lexer, &lex, &cur);
+            } else {
+                token->type = SCC_TOK_DOT;
+            }
+            break;
+        case '?':
+            token->type = SCC_TOK_COND;
+            break;
+        case '#':
+            token->type = SCC_TOK_SHARP;
+            break;
+        default:
            token->type = SCC_TOK_UNKNOWN;
+            LEX_ERROR("unsupport char in sourse code `%c`:0x%x", ch, ch);
+            break;
        }
-
-        goto END;
    }
-        /* clang-format off */
-    case '0': case '1': case '2': case '3': case '4':
-    case '5': case '6': case '7': case '8': case '9':
-        /* clang-format on */
-        token->loc = lexer->pos;
-        token->type = SCC_TOK_INT_LITERAL;
-        usize output;
-        scc_probe_stream_reset(stream);
-        if (scc_lex_parse_number(stream, &lexer->pos, &output) == true) {
-            scc_probe_stream_sync(stream);
-            token->value.u = output;
-        } else {
-            LEX_ERROR("Unexpected number literal");
-            token->type = SCC_TOK_UNKNOWN;
-        }
-        goto END;
-        /* clang-format off */
-    case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
-    case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
-    case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
-    case 'v': case 'w': case 'x': case 'y': case 'z':
-    case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
-    case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
-    case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
-    case 'V': case 'W': case 'X': case 'Y': case 'Z': case '_':
-        /* clang-format on */
-        scc_cstring_t str = scc_cstring_create();
-        scc_probe_stream_reset(stream);
-        cbool ret = scc_lex_parse_identifier(stream, &lexer->pos, &str);
-        scc_probe_stream_sync(stream);
-        Assert(ret == true);

-        int res = keyword_cmp(scc_cstring_as_cstr(&str), scc_cstring_len(&str));
-        if (res == -1) {
-            token->value.cstr.data = (char *)scc_cstring_as_cstr(&str);
-            token->value.cstr.len = scc_cstring_len(&str);
-            type = SCC_TOK_IDENT;
-        } else {
-            scc_cstring_free(&str);
-            type = keywords[res].tok;
-        }
-        token->type = type;
-        goto END;
-    default:
-        LEX_ERROR("unsupport char in sourse code `%c`:0x%x", ch, ch);
-        break;
-    }
-    goto once_char;
-triple_char:
-    scc_probe_stream_consume(stream);
-    scc_pos_next(&lexer->pos);
-double_char:
-    scc_probe_stream_consume(stream);
-    scc_pos_next(&lexer->pos);
-once_char:
-    scc_probe_stream_consume(stream);
-    scc_pos_next(&lexer->pos);
-    token->type = type;
-END:
-    LEX_DEBUG("get token `%s` in %s:%d:%d", scc_get_tok_name(token->type),
-              token->loc.name, token->loc.line, token->loc.column);
+    // 设置token
+    scc_ring_consume(lexer->stream_ref);
+    token->type = token->type; // 上面已设
+    token->loc = start_loc;
+    token->lexeme = lex; // 转移所有权
+    LEX_DEBUG("get token `%s` (%s) at %s:%d:%d", scc_get_tok_name(token->type),
+              scc_cstring_as_cstr(&token->lexeme), token->loc.name,
+              token->loc.line, token->loc.col);
 }

 // scc_lexer_get_token maybe got invalid (with parser)
 void scc_lexer_get_valid_token(scc_lexer_t *lexer, scc_lexer_tok_t *token) {
-    scc_tok_subtype_t type;
+    scc_tok_subtype_t subtype;
    do {
        scc_lexer_get_token(lexer, token);
-        type = scc_get_tok_subtype(token->type);
-        AssertFmt(type != SCC_TOK_SUBTYPE_INVALID,
+        subtype = scc_get_tok_subtype(token->type);
+        AssertFmt(subtype != SCC_TOK_SUBTYPE_INVALID,
                  "Invalid token: `%s` at %s:%d:%d",
                  scc_get_tok_name(token->type), token->loc.name,
                  token->loc.line, token->loc.col);
-        Assert(type != SCC_TOK_SUBTYPE_INVALID);
-    } while (type == SCC_TOK_SUBTYPE_EMPTYSPACE ||
-             type == SCC_TOK_SUBTYPE_COMMENT);
+    } while (subtype == SCC_TOK_SUBTYPE_EMPTYSPACE ||
+             subtype == SCC_TOK_SUBTYPE_COMMENT);
 }
--- a/libs/lexer/src/main.c
+++ b/libs/lexer/src/main.c
@@ -0,0 +1,66 @@
+#include <lexer.h>
+#include <lexer_log.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+/// gcc -g ../lexer.c ../token.c test_lexer.c -o test_lexer
+/*
+tok_tConstant {
+    int have;
+    union {
+        char ch;
+        int i;
+        float f;
+        double d;
+        long long ll;
+        char* str;
+    };
+};
+*/
+
+int g_num;
+int g_num_arr[3];
+int main(int argc, char *argv[]) {
+    // int num = 0;
+    if (argc == 3 && strcmp(argv[2], "--debug") == 0) {
+        log_set_level(NULL, LOG_LEVEL_ALL);
+    } else {
+        // FIXME it is a hack lexer_logger
+        log_set_level(&__scc_lexer_log, LOG_LEVEL_NOTSET);
+        log_set_level(NULL, LOG_LEVEL_INFO | LOG_LEVEL_WARN | LOG_LEVEL_ERROR |
+                                LOG_LEVEL_FATAL);
+    }
+
+    const char *file_name = __FILE__;
+    if (argc == 2) {
+        file_name = argv[1];
+    }
+
+    scc_lexer_t lexer;
+    scc_sstream_t stream;
+    scc_sstream_init(&stream, file_name, 16);
+    scc_sstream_ring_t *ref = scc_sstream_ref_ring(&stream);
+    scc_lexer_init(&lexer, ref);
+    scc_lexer_tok_t token;
+
+    while (1) {
+        scc_lexer_get_valid_token(&lexer, &token);
+        if (token.type == SCC_TOK_EOF) {
+            break;
+        }
+        LOG_DEBUG("get token [%-8s] `%s` at %s:%d:%d",
+                  scc_get_tok_name(token.type),
+                  scc_cstring_as_cstr(&token.lexeme), token.loc.name,
+                  token.loc.line, token.loc.col);
+        // LOG_DEBUG("%s", token.val.str);
+        // printf("line: %d, column: %d, type: %3d, typename: %s\n",
+        //     lexer.line, lexer.index, token.type,
+        //     scc_get_tok_name(token.type));
+    }
+    scc_sstream_drop_ring(ref);
+    scc_sstream_drop(&stream);
+
+    LOG_INFO("Lexer is Ok...");
+    return 0;
+}
--- a/libs/lexer/tests/test_lexer.c
+++ b/libs/lexer/tests/test_lexer.c
@@ -0,0 +1,403 @@
+// test_lexer.c
+#include <lexer.h>
+#include <string.h>
+#include <utest/acutest.h>
+
+// 辅助函数：释放 token 的 lexeme
+static void free_token(scc_lexer_tok_t *tok) { scc_cstring_free(&tok->lexeme); }
+
+// 单 token 测试宏（检查类型）
+#define TEST_TOKEN(input, expected_type)                                       \
+    do {                                                                       \
+        scc_lexer_t lexer;                                                     \
+        scc_lexer_tok_t token;                                                 \
+        scc_sstream_t stream;                                                  \
+        scc_sstream_init_by_buffer(&stream, input, strlen(input), 0, 16);      \
+        scc_sstream_ring_t *ref = scc_sstream_ref_ring(&stream);               \
+        scc_lexer_init(&lexer, ref);                                           \
+        scc_lexer_get_token(&lexer, &token);                                   \
+                                                                               \
+        TEST_CHECK(token.type == expected_type);                               \
+        TEST_MSG("Input: '%s'", input);                                        \
+        TEST_MSG("Expected: %s", scc_get_tok_name(expected_type));             \
+        TEST_MSG("Got: %s", scc_get_tok_name(token.type));                     \
+                                                                               \
+        free_token(&token);                                                    \
+        scc_sstream_drop_ring(ref);                                            \
+        scc_sstream_drop(&stream);                                             \
+    } while (0)
+
+// 多 token 序列测试宏（接受类型数组）
+#define TEST_SEQUENCE(input, ...)                                              \
+    do {                                                                       \
+        scc_lexer_t lexer;                                                     \
+        scc_lexer_tok_t token;                                                 \
+        scc_sstream_t stream;                                                  \
+        scc_sstream_init_by_buffer(&stream, input, strlen(input), 0, 16);      \
+        scc_sstream_ring_t *ref = scc_sstream_ref_ring(&stream);               \
+        scc_lexer_init(&lexer, ref);                                           \
+                                                                               \
+        scc_tok_type_t expected[] = {__VA_ARGS__};                             \
+        size_t count = sizeof(expected) / sizeof(expected[0]);                 \
+        for (size_t i = 0; i < count; i++) {                                   \
+            scc_lexer_get_token(&lexer, &token);                               \
+            TEST_CHECK(token.type == expected[i]);                             \
+            TEST_MSG("Token %zu: input '%s'", i, input);                       \
+            TEST_MSG("Expected: %s", scc_get_tok_name(expected[i]));           \
+            TEST_MSG("Got: %s", scc_get_tok_name(token.type));                 \
+            free_token(&token);                                                \
+        }                                                                      \
+                                                                               \
+        scc_sstream_drop_ring(ref);                                            \
+        scc_sstream_drop(&stream);                                             \
+    } while (0)
+
+// ============================ 测试用例 ============================
+
+void test_operators() {
+    TEST_CASE("Arithmetic operators");
+    TEST_TOKEN("+", SCC_TOK_ADD);
+    TEST_TOKEN("++", SCC_TOK_ADD_ADD);
+    TEST_TOKEN("+=", SCC_TOK_ASSIGN_ADD);
+    TEST_TOKEN("-", SCC_TOK_SUB);
+    TEST_TOKEN("--", SCC_TOK_SUB_SUB);
+    TEST_TOKEN("-=", SCC_TOK_ASSIGN_SUB);
+    TEST_TOKEN("*", SCC_TOK_MUL);
+    TEST_TOKEN("*=", SCC_TOK_ASSIGN_MUL);
+    TEST_TOKEN("/", SCC_TOK_DIV);
+    TEST_TOKEN("/=", SCC_TOK_ASSIGN_DIV);
+    TEST_TOKEN("%", SCC_TOK_MOD);
+    TEST_TOKEN("%=", SCC_TOK_ASSIGN_MOD);
+
+    TEST_CASE("Bitwise operators");
+    TEST_TOKEN("&", SCC_TOK_AND);
+    TEST_TOKEN("&&", SCC_TOK_AND_AND);
+    TEST_TOKEN("&=", SCC_TOK_ASSIGN_AND);
+    TEST_TOKEN("|", SCC_TOK_OR);
+    TEST_TOKEN("||", SCC_TOK_OR_OR);
+    TEST_TOKEN("|=", SCC_TOK_ASSIGN_OR);
+    TEST_TOKEN("^", SCC_TOK_XOR);
+    TEST_TOKEN("^=", SCC_TOK_ASSIGN_XOR);
+    TEST_TOKEN("~", SCC_TOK_BIT_NOT);
+    TEST_TOKEN("<<", SCC_TOK_L_SH);
+    TEST_TOKEN("<<=", SCC_TOK_ASSIGN_L_SH);
+    TEST_TOKEN(">>", SCC_TOK_R_SH);
+    TEST_TOKEN(">>=", SCC_TOK_ASSIGN_R_SH);
+
+    TEST_CASE("Comparison operators");
+    TEST_TOKEN("==", SCC_TOK_EQ);
+    TEST_TOKEN("!=", SCC_TOK_NEQ);
+    TEST_TOKEN("<", SCC_TOK_LT);
+    TEST_TOKEN("<=", SCC_TOK_LE);
+    TEST_TOKEN(">", SCC_TOK_GT);
+    TEST_TOKEN(">=", SCC_TOK_GE);
+
+    TEST_CASE("Special symbols");
+    TEST_TOKEN("(", SCC_TOK_L_PAREN);
+    TEST_TOKEN(")", SCC_TOK_R_PAREN);
+    TEST_TOKEN("[", SCC_TOK_L_BRACKET);
+    TEST_TOKEN("]", SCC_TOK_R_BRACKET);
+    TEST_TOKEN("{", SCC_TOK_L_BRACE);
+    TEST_TOKEN("}", SCC_TOK_R_BRACE);
+    TEST_TOKEN(";", SCC_TOK_SEMICOLON);
+    TEST_TOKEN(",", SCC_TOK_COMMA);
+    TEST_TOKEN(":", SCC_TOK_COLON);
+    TEST_TOKEN(".", SCC_TOK_DOT);
+    TEST_TOKEN("...", SCC_TOK_ELLIPSIS);
+    TEST_TOKEN("->", SCC_TOK_DEREF);
+    TEST_TOKEN("?", SCC_TOK_COND);
+}
+
+void test_keywords() {
+    TEST_CASE("C89 keywords");
+    TEST_TOKEN("while", SCC_TOK_WHILE);
+    TEST_TOKEN("sizeof", SCC_TOK_SIZEOF);
+    TEST_TOKEN("if", SCC_TOK_IF);
+    TEST_TOKEN("else", SCC_TOK_ELSE);
+    TEST_TOKEN("for", SCC_TOK_FOR);
+    TEST_TOKEN("do", SCC_TOK_DO);
+    TEST_TOKEN("switch", SCC_TOK_SWITCH);
+    TEST_TOKEN("case", SCC_TOK_CASE);
+    TEST_TOKEN("default", SCC_TOK_DEFAULT);
+    TEST_TOKEN("break", SCC_TOK_BREAK);
+    TEST_TOKEN("continue", SCC_TOK_CONTINUE);
+    TEST_TOKEN("return", SCC_TOK_RETURN);
+    TEST_TOKEN("goto", SCC_TOK_GOTO);
+    TEST_TOKEN("auto", SCC_TOK_AUTO);
+    TEST_TOKEN("register", SCC_TOK_REGISTER);
+    TEST_TOKEN("static", SCC_TOK_STATIC);
+    TEST_TOKEN("extern", SCC_TOK_EXTERN);
+    TEST_TOKEN("typedef", SCC_TOK_TYPEDEF);
+    TEST_TOKEN("const", SCC_TOK_CONST);
+    TEST_TOKEN("volatile", SCC_TOK_VOLATILE);
+    TEST_TOKEN("signed", SCC_TOK_SIGNED);
+    TEST_TOKEN("unsigned", SCC_TOK_UNSIGNED);
+    TEST_TOKEN("short", SCC_TOK_SHORT);
+    TEST_TOKEN("long", SCC_TOK_LONG);
+    TEST_TOKEN("int", SCC_TOK_INT);
+    TEST_TOKEN("char", SCC_TOK_CHAR);
+    TEST_TOKEN("float", SCC_TOK_FLOAT);
+    TEST_TOKEN("double", SCC_TOK_DOUBLE);
+    TEST_TOKEN("void", SCC_TOK_VOID);
+    TEST_TOKEN("struct", SCC_TOK_STRUCT);
+    TEST_TOKEN("union", SCC_TOK_UNION);
+    TEST_TOKEN("enum", SCC_TOK_ENUM);
+
+    TEST_CASE("C99 keywords");
+    TEST_TOKEN("inline", SCC_TOK_INLINE);
+    TEST_TOKEN("restrict", SCC_TOK_RESTRICT);
+    // _Bool, _Complex, _Imaginary 可根据需要添加
+
+    TEST_CASE("SCC extensions (if enabled)");
+    TEST_TOKEN("asm", SCC_TOK_ASM);
+    TEST_TOKEN("atomic", SCC_TOK_ATOMIC);
+    TEST_TOKEN("bool", SCC_TOK_BOOL);
+    TEST_TOKEN("complex", SCC_TOK_COMPLEX);
+}
+
+void test_literals() {
+    TEST_CASE("Integer literals - decimal");
+    TEST_TOKEN("0", SCC_TOK_INT_LITERAL);
+    TEST_TOKEN("123", SCC_TOK_INT_LITERAL);
+    TEST_TOKEN("2147483647", SCC_TOK_INT_LITERAL);
+    TEST_TOKEN("4294967295", SCC_TOK_INT_LITERAL);
+
+    TEST_CASE("Integer literals - hexadecimal");
+    TEST_TOKEN("0x0", SCC_TOK_INT_LITERAL);
+    TEST_TOKEN("0x1A3F", SCC_TOK_INT_LITERAL);
+    TEST_TOKEN("0XABCDEF", SCC_TOK_INT_LITERAL);
+    TEST_TOKEN("0x123abc", SCC_TOK_INT_LITERAL);
+    TEST_TOKEN("0XFF", SCC_TOK_INT_LITERAL);
+
+    TEST_CASE("Integer literals - octal");
+    TEST_TOKEN("0123", SCC_TOK_INT_LITERAL);
+    TEST_TOKEN("0777", SCC_TOK_INT_LITERAL);
+    TEST_TOKEN("0", SCC_TOK_INT_LITERAL); // 0 既是十进制也是八进制
+
+    TEST_CASE("Integer literals - binary (C23 extension)");
+    TEST_TOKEN("0b1010", SCC_TOK_INT_LITERAL);
+    TEST_TOKEN("0B1100", SCC_TOK_INT_LITERAL);
+    TEST_TOKEN("0b0", SCC_TOK_INT_LITERAL);
+
+    TEST_CASE("Integer literals with suffixes");
+    TEST_TOKEN("123U", SCC_TOK_INT_LITERAL);
+    TEST_TOKEN("456L", SCC_TOK_INT_LITERAL);
+    TEST_TOKEN("789UL", SCC_TOK_INT_LITERAL);
+    TEST_TOKEN("0x1FFLL", SCC_TOK_INT_LITERAL);
+    TEST_TOKEN("0b1010ULL", SCC_TOK_INT_LITERAL);
+
+    TEST_CASE("Floating literals - decimal");
+    TEST_TOKEN("0.0", SCC_TOK_FLOAT_LITERAL);
+    TEST_TOKEN("3.14", SCC_TOK_FLOAT_LITERAL);
+    TEST_TOKEN(".5", SCC_TOK_FLOAT_LITERAL);
+    TEST_TOKEN("0.", SCC_TOK_FLOAT_LITERAL);
+
+    TEST_CASE("Floating literals - scientific");
+    TEST_TOKEN("1e10", SCC_TOK_FLOAT_LITERAL);
+    TEST_TOKEN("1E-5", SCC_TOK_FLOAT_LITERAL);
+    TEST_TOKEN("2.5e+3", SCC_TOK_FLOAT_LITERAL);
+    TEST_TOKEN(".1e2", SCC_TOK_FLOAT_LITERAL);
+    TEST_TOKEN("1.e3", SCC_TOK_FLOAT_LITERAL);
+    TEST_TOKEN("123.456e-7", SCC_TOK_FLOAT_LITERAL);
+
+    TEST_CASE("Floating literals - hexadecimal (C99)");
+    TEST_TOKEN("0x1.2p3", SCC_TOK_FLOAT_LITERAL);
+    TEST_TOKEN("0x1p-2", SCC_TOK_FLOAT_LITERAL);
+    TEST_TOKEN("0x0.1p10", SCC_TOK_FLOAT_LITERAL);
+    TEST_TOKEN("0X1.2P3", SCC_TOK_FLOAT_LITERAL);
+
+    TEST_CASE("Floating literals with suffixes");
+    TEST_TOKEN("1.0f", SCC_TOK_FLOAT_LITERAL);
+    TEST_TOKEN("2.0F", SCC_TOK_FLOAT_LITERAL);
+    TEST_TOKEN("3.0l", SCC_TOK_FLOAT_LITERAL);
+    TEST_TOKEN("4.0L", SCC_TOK_FLOAT_LITERAL);
+
+    TEST_CASE("Character literals - simple");
+    TEST_TOKEN("'a'", SCC_TOK_CHAR_LITERAL);
+    TEST_TOKEN("'0'", SCC_TOK_CHAR_LITERAL);
+    TEST_TOKEN("' '", SCC_TOK_CHAR_LITERAL);
+    TEST_TOKEN("'\t'", SCC_TOK_CHAR_LITERAL); // 制表符在单引号内
+
+    TEST_CASE("Character literals - escape sequences");
+    TEST_TOKEN("'\\n'", SCC_TOK_CHAR_LITERAL);
+    TEST_TOKEN("'\\t'", SCC_TOK_CHAR_LITERAL);
+    TEST_TOKEN("'\\\\'", SCC_TOK_CHAR_LITERAL);
+    TEST_TOKEN("'\\''", SCC_TOK_CHAR_LITERAL);
+    TEST_TOKEN("'\\\"'", SCC_TOK_CHAR_LITERAL);
+    TEST_TOKEN("'\\?'", SCC_TOK_CHAR_LITERAL);
+    TEST_TOKEN("'\\0'", SCC_TOK_CHAR_LITERAL);
+    TEST_TOKEN("'\\123'", SCC_TOK_CHAR_LITERAL); // 八进制
+    TEST_TOKEN("'\\xAB'", SCC_TOK_CHAR_LITERAL); // 十六进制
+
+    TEST_CASE("Character literals - multi-byte (implementation defined)");
+    TEST_TOKEN("'ab'", SCC_TOK_CHAR_LITERAL);
+    TEST_TOKEN("'\\x41\\x42'", SCC_TOK_CHAR_LITERAL); // 多个转义
+
+    TEST_CASE("String literals - basic");
+    TEST_TOKEN("\"hello\"", SCC_TOK_STRING_LITERAL);
+    TEST_TOKEN("\"\"", SCC_TOK_STRING_LITERAL);
+    TEST_TOKEN("\"a b c\"", SCC_TOK_STRING_LITERAL);
+
+    TEST_CASE("String literals - escape sequences");
+    TEST_TOKEN("\"a\\nb\\tc\"", SCC_TOK_STRING_LITERAL);
+    TEST_TOKEN("\"\\\\ \\\" \\' \\?\"", SCC_TOK_STRING_LITERAL);
+    TEST_TOKEN("\"\\123\\xAB\"", SCC_TOK_STRING_LITERAL);
+
+    TEST_CASE("String literals - wide and UTF-8 prefixes (C11)");
+    TEST_TOKEN("L\"wide\"", SCC_TOK_STRING_LITERAL);
+    TEST_TOKEN("u\"utf16\"", SCC_TOK_STRING_LITERAL);
+    TEST_TOKEN("U\"utf32\"", SCC_TOK_STRING_LITERAL);
+    TEST_TOKEN("u8\"utf8\"", SCC_TOK_STRING_LITERAL);
+}
+
+void test_whitespace() {
+    TEST_CASE("Whitespace characters");
+    TEST_TOKEN(" ", SCC_TOK_BLANK);
+    TEST_TOKEN("\t", SCC_TOK_BLANK);
+    TEST_TOKEN("\v", SCC_TOK_BLANK);
+    TEST_TOKEN("\f", SCC_TOK_BLANK);
+    TEST_TOKEN(" \t\v\f", SCC_TOK_BLANK); // 连续空白应为一个 token
+}
+
+void test_newlines() {
+    TEST_CASE("Newline characters");
+    TEST_TOKEN("\n", SCC_TOK_ENDLINE);
+    TEST_TOKEN("\r", SCC_TOK_ENDLINE);
+    TEST_TOKEN("\r\n", SCC_TOK_ENDLINE); // 应视为单个换行符
+}
+
+void test_comments() {
+    TEST_CASE("Line comments");
+    TEST_TOKEN("// single line comment", SCC_TOK_LINE_COMMENT);
+    TEST_TOKEN("// comment with // inside", SCC_TOK_LINE_COMMENT);
+    TEST_TOKEN("// comment at end", SCC_TOK_LINE_COMMENT);
+
+    TEST_CASE("Block comments");
+    TEST_TOKEN("/* simple */", SCC_TOK_BLOCK_COMMENT);
+    TEST_TOKEN("/* multi\nline */", SCC_TOK_BLOCK_COMMENT);
+    TEST_TOKEN("/**/", SCC_TOK_BLOCK_COMMENT); // 空注释
+    TEST_TOKEN("/* with * inside */", SCC_TOK_BLOCK_COMMENT);
+    TEST_TOKEN("/* nested /* not allowed in C */",
+               SCC_TOK_BLOCK_COMMENT); // 词法上不会嵌套
+}
+
+void test_identifiers() {
+    TEST_CASE("Valid identifiers");
+    TEST_TOKEN("foo", SCC_TOK_IDENT);
+    TEST_TOKEN("_foo", SCC_TOK_IDENT);
+    TEST_TOKEN("foo123", SCC_TOK_IDENT);
+    TEST_TOKEN("foo_bar", SCC_TOK_IDENT);
+    TEST_TOKEN("FOO", SCC_TOK_IDENT);
+    TEST_TOKEN("_", SCC_TOK_IDENT);
+    TEST_TOKEN("__LINE__", SCC_TOK_IDENT); // 预处理宏名也是标识符
+
+    // 超长标识符（假设缓冲区足够）
+    char long_id[1024];
+    memset(long_id, 'a', sizeof(long_id) - 1);
+    long_id[sizeof(long_id) - 1] = '\0';
+    TEST_TOKEN(long_id, SCC_TOK_IDENT);
+}
+
+void test_preprocessor() {
+    TEST_CASE("Preprocessor directives - just the # token");
+    TEST_TOKEN("#", SCC_TOK_SHARP);
+    TEST_TOKEN("##", SCC_TOK_SHARP); // 第一个 # 是 token，第二个 # 将是下一个
+                                     // token（在序列测试中验证）
+
+    // 多 token 序列测试 #include 等
+    TEST_SEQUENCE("#include <stdio.h>", SCC_TOK_SHARP, SCC_TOK_IDENT,
+                  SCC_TOK_BLANK, SCC_TOK_LT, SCC_TOK_IDENT, SCC_TOK_DOT,
+                  SCC_TOK_IDENT, SCC_TOK_GT);
+    TEST_SEQUENCE("#define FOO 123", SCC_TOK_SHARP, SCC_TOK_IDENT,
+                  SCC_TOK_BLANK, SCC_TOK_IDENT, SCC_TOK_BLANK,
+                  SCC_TOK_INT_LITERAL);
+}
+
+void test_edge_cases() {
+    TEST_CASE("Invalid characters");
+    TEST_TOKEN("@", SCC_TOK_UNKNOWN);
+    TEST_TOKEN("`", SCC_TOK_UNKNOWN);
+    TEST_TOKEN("$", SCC_TOK_UNKNOWN); // 在 C 中不是标识符字符
+
+    TEST_CASE("Empty input");
+    TEST_TOKEN("", SCC_TOK_EOF); // 立即 EOF
+
+    TEST_CASE("Only whitespace");
+    TEST_TOKEN("   \t", SCC_TOK_BLANK);
+    // 之后应该为 EOF，但我们的单 token 测试只取第一个 token
+
+    TEST_CASE("Numbers followed by letters (no suffix)");
+    // 词法上应拆分为数字和标识符
+    TEST_SEQUENCE("123abc", SCC_TOK_INT_LITERAL, SCC_TOK_IDENT);
+    TEST_SEQUENCE("0x123xyz", SCC_TOK_INT_LITERAL, SCC_TOK_IDENT);
+}
+
+void test_sequences() {
+    TEST_CASE("Simple expression");
+    TEST_SEQUENCE("a + b * c", SCC_TOK_IDENT, SCC_TOK_BLANK, SCC_TOK_ADD,
+                  SCC_TOK_BLANK, SCC_TOK_IDENT, SCC_TOK_BLANK, SCC_TOK_MUL,
+                  SCC_TOK_BLANK, SCC_TOK_IDENT);
+
+    TEST_CASE("Function call");
+    TEST_SEQUENCE("func(1, 2);", SCC_TOK_IDENT, SCC_TOK_L_PAREN,
+                  SCC_TOK_INT_LITERAL, SCC_TOK_COMMA, SCC_TOK_BLANK,
+                  SCC_TOK_INT_LITERAL, SCC_TOK_R_PAREN, SCC_TOK_SEMICOLON);
+
+    TEST_CASE("Multi-character operators");
+    TEST_SEQUENCE(">>=", SCC_TOK_ASSIGN_R_SH);
+    TEST_SEQUENCE("<<=", SCC_TOK_ASSIGN_L_SH);
+    TEST_SEQUENCE("...", SCC_TOK_ELLIPSIS);
+    TEST_SEQUENCE("->", SCC_TOK_DEREF);
+    TEST_SEQUENCE("##", SCC_TOK_SHARP, SCC_TOK_SHARP); // 两个预处理记号
+
+    TEST_CASE("Comments and whitespace interleaved");
+    TEST_SEQUENCE("/* comment */ a // line comment\n b", SCC_TOK_BLOCK_COMMENT,
+                  SCC_TOK_BLANK, SCC_TOK_IDENT, SCC_TOK_BLANK,
+                  SCC_TOK_LINE_COMMENT, SCC_TOK_ENDLINE, SCC_TOK_BLANK,
+                  SCC_TOK_IDENT);
+
+    TEST_CASE("String literals with escapes");
+    TEST_SEQUENCE("\"hello\\nworld\"", SCC_TOK_STRING_LITERAL);
+    TEST_SEQUENCE(
+        "L\"wide\"",
+        SCC_TOK_STRING_LITERAL); // 前缀作为标识符？不，整个是字符串字面量
+
+    TEST_CASE("Character literals with escapes");
+    TEST_SEQUENCE("'\\x41'", SCC_TOK_CHAR_LITERAL);
+    TEST_SEQUENCE("'\\123'", SCC_TOK_CHAR_LITERAL);
+}
+
+void test_error_recovery() {
+    // 测试未闭合的字符字面量：词法分析器可能继续直到遇到换行或 EOF
+    // 这里假设它会产生一个 SCC_TOK_CHAR_LITERAL 但包含到结束
+    // 但标准 C 中未闭合是错误，我们可能返回 UNKNOWN
+    TEST_CASE("Unterminated character literal");
+    TEST_TOKEN("'a", SCC_TOK_UNKNOWN); // 取决于实现，可能为 CHAR_LITERAL
+    // 更可靠的测试：序列中下一个 token 是什么
+    TEST_SEQUENCE("'a b", SCC_TOK_UNKNOWN,
+                  SCC_TOK_IDENT); // 假设第一个 token 是错误
+
+    TEST_CASE("Unterminated string literal");
+    TEST_TOKEN("\"hello", SCC_TOK_UNKNOWN); // 同样
+
+    TEST_CASE("Unterminated block comment");
+    TEST_SEQUENCE("/* comment",
+                  SCC_TOK_BLOCK_COMMENT); // 直到 EOF，可能仍为注释
+}
+
+// ============================ 主测试列表 ============================
+
+TEST_LIST = {
+    {"operators", test_operators},
+    {"keywords", test_keywords},
+    {"literals", test_literals},
+    {"whitespace", test_whitespace},
+    {"newlines", test_newlines},
+    {"comments", test_comments},
+    {"identifiers", test_identifiers},
+    {"preprocessor", test_preprocessor},
+    {"edge_cases", test_edge_cases},
+    {"sequences", test_sequences},
+    {"error_recovery", test_error_recovery},
+    {NULL, NULL},
+};
--- a/libs/lexer/tests/test_parse.c
+++ b/libs/lexer/tests/test_parse.c
@@ -1,170 +0,0 @@
-// test_lexer.c
-#include <lexer.h>
-#include <string.h>
-#include <utest/acutest.h>
-
-// 测试辅助函数
-static inline void test_lexer_string(const char *input,
-                                     scc_tok_type_t expected_type) {
-    scc_lexer_t lexer;
-    scc_lexer_tok_t token;
-    scc_mem_probe_stream_t stream;
-
-    scc_lexer_init(&lexer, scc_mem_probe_stream_init(&stream, input,
-                                                     strlen(input), false));
-    scc_lexer_get_token(&lexer, &token);
-
-    TEST_CHECK(token.type == expected_type);
-    TEST_MSG("Expected: %s", scc_get_tok_name(expected_type));
-    TEST_MSG("Got: %s", scc_get_tok_name(token.type));
-}
-
-// 基础运算符测试
-void test_operators() {
-    TEST_CASE("Arithmetic operators");
-    {
-        test_lexer_string("+", SCC_TOK_ADD);
-        test_lexer_string("++", SCC_TOK_ADD_ADD);
-        test_lexer_string("+=", SCC_TOK_ASSIGN_ADD);
-        test_lexer_string("-", SCC_TOK_SUB);
-        test_lexer_string("--", SCC_TOK_SUB_SUB);
-        test_lexer_string("-=", SCC_TOK_ASSIGN_SUB);
-        test_lexer_string("*", SCC_TOK_MUL);
-        test_lexer_string("*=", SCC_TOK_ASSIGN_MUL);
-        test_lexer_string("/", SCC_TOK_DIV);
-        test_lexer_string("/=", SCC_TOK_ASSIGN_DIV);
-        test_lexer_string("%", SCC_TOK_MOD);
-        test_lexer_string("%=", SCC_TOK_ASSIGN_MOD);
-    }
-
-    TEST_CASE("Bitwise operators");
-    {
-        test_lexer_string("&", SCC_TOK_AND);
-        test_lexer_string("&&", SCC_TOK_AND_AND);
-        test_lexer_string("&=", SCC_TOK_ASSIGN_AND);
-        test_lexer_string("|", SCC_TOK_OR);
-        test_lexer_string("||", SCC_TOK_OR_OR);
-        test_lexer_string("|=", SCC_TOK_ASSIGN_OR);
-        test_lexer_string("^", SCC_TOK_XOR);
-        test_lexer_string("^=", SCC_TOK_ASSIGN_XOR);
-        test_lexer_string("~", SCC_TOK_BIT_NOT);
-        test_lexer_string("<<", SCC_TOK_L_SH);
-        test_lexer_string("<<=", SCC_TOK_ASSIGN_L_SH);
-        test_lexer_string(">>", SCC_TOK_R_SH);
-        test_lexer_string(">>=", SCC_TOK_ASSIGN_R_SH);
-    }
-
-    TEST_CASE("Comparison operators");
-    {
-        test_lexer_string("==", SCC_TOK_EQ);
-        test_lexer_string("!=", SCC_TOK_NEQ);
-        test_lexer_string("<", SCC_TOK_LT);
-        test_lexer_string("<=", SCC_TOK_LE);
-        test_lexer_string(">", SCC_TOK_GT);
-        test_lexer_string(">=", SCC_TOK_GE);
-    }
-
-    TEST_CASE("Special symbols");
-    {
-        test_lexer_string("(", SCC_TOK_L_PAREN);
-        test_lexer_string(")", SCC_TOK_R_PAREN);
-        test_lexer_string("[", SCC_TOK_L_BRACKET);
-        test_lexer_string("]", SCC_TOK_R_BRACKET);
-        test_lexer_string("{", SCC_TOK_L_BRACE);
-        test_lexer_string("}", SCC_TOK_R_BRACE);
-        test_lexer_string(";", SCC_TOK_SEMICOLON);
-        test_lexer_string(",", SCC_TOK_COMMA);
-        test_lexer_string(":", SCC_TOK_COLON);
-        test_lexer_string(".", SCC_TOK_DOT);
-        test_lexer_string("...", SCC_TOK_ELLIPSIS);
-        test_lexer_string("->", SCC_TOK_DEREF);
-        test_lexer_string("?", SCC_TOK_COND);
-    }
-}
-
-// 关键字测试
-void test_keywords() {
-    TEST_CASE("C89 keywords");
-    test_lexer_string("while", SCC_TOK_WHILE);
-    test_lexer_string("sizeof", SCC_TOK_SIZEOF);
-
-    TEST_CASE("C99 keywords");
-    test_lexer_string("restrict", SCC_TOK_RESTRICT);
-    // test_lexer_string("_Bool",  SCC_TOK_INT); // 需确认你的类型定义
-}
-
-// 字面量测试
-void test_literals() {
-    TEST_CASE("Integer literals");
-    {
-        // 十进制
-        test_lexer_string("0", SCC_TOK_INT_LITERAL);
-        test_lexer_string("123", SCC_TOK_INT_LITERAL);
-        test_lexer_string("2147483647", SCC_TOK_INT_LITERAL);
-
-        // 十六进制
-        test_lexer_string("0x0", SCC_TOK_INT_LITERAL);
-        test_lexer_string("0x1A3F", SCC_TOK_INT_LITERAL);
-        test_lexer_string("0XABCDEF", SCC_TOK_INT_LITERAL);
-
-        // 八进制
-        test_lexer_string("0123", SCC_TOK_INT_LITERAL);
-        test_lexer_string("0777", SCC_TOK_INT_LITERAL);
-
-        // 边界值测试
-        test_lexer_string("2147483647", SCC_TOK_INT_LITERAL); // INT_MAX
-        test_lexer_string("4294967295", SCC_TOK_INT_LITERAL); // UINT_MAX
-    }
-
-    TEST_CASE("Character literals");
-    {
-        test_lexer_string("'a'", SCC_TOK_CHAR_LITERAL);
-        test_lexer_string("'\\n'", SCC_TOK_CHAR_LITERAL);
-        test_lexer_string("'\\t'", SCC_TOK_CHAR_LITERAL);
-        test_lexer_string("'\\\\'", SCC_TOK_CHAR_LITERAL);
-        test_lexer_string("'\\0'", SCC_TOK_CHAR_LITERAL);
-    }
-
-    TEST_CASE("String literals");
-    {
-        test_lexer_string("\"hello\"", SCC_TOK_STRING_LITERAL);
-        test_lexer_string("\"multi-line\\nstring\"", SCC_TOK_STRING_LITERAL);
-        test_lexer_string("\"escape\\\"quote\"", SCC_TOK_STRING_LITERAL);
-    }
-
-    // TEST_CASE("Floating literals");
-    // test_lexer_string("3.14e-5",  SCC_TOK_FLOAT_LITERAL);
-}
-
-// 边界测试
-void test_edge_cases() {
-    // TEST_CASE("Long identifiers");
-    // char long_id[LEXER_MAX_ SCC_TOK_SIZE+2] = {0};
-    // memset(long_id, 'a', LEXER_MAX_ SCC_TOK_SIZE+1);
-    // test_lexer_string(long_id,  SCC_TOK_IDENT);
-
-    // TEST_CASE("Buffer boundary");
-    // char boundary[LEXER_BUFFER_SIZE*2] = {0};
-    // memset(boundary, '+', LEXER_BUFFER_SIZE*2-1);
-    // test_lexer_string(boundary,  SCC_TOK_ADD);
-}
-
-// 错误处理测试
-// void test_error_handling() {
-//     TEST_CASE("Invalid characters");
-//     cc_lexer_t lexer;
-//     tok_t token;
-
-//     init_lexer(&lexer, "test.c", NULL, test_read);
-//     get_valid_token(&lexer, &token);
-
-//     TEST_CHECK(token.type ==  SCC_TOK_EOF); // 应触发错误处理
-// }
-
-// 测试列表
-TEST_LIST = {{"operators", test_operators},
-             {"keywords", test_keywords},
-             {"literals", test_literals},
-             {"edge_cases", test_edge_cases},
-             //  {"error_handling", test_error_handling},
-             {NULL, NULL}};
--- a/libs/lexer/tests/test_run.c
+++ b/libs/lexer/tests/test_run.c
@@ -1,93 +0,0 @@
-#include <lexer.h>
-#include <lexer_log.h>
-#include <stdbool.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-/// gcc -g ../lexer.c ../token.c test_lexer.c -o test_lexer
-/*
-tok_tConstant {
-    int have;
-    union {
-        char ch;
-        int i;
-        float f;
-        double d;
-        long long ll;
-        char* str;
-    };
-};
-*/
-
-int g_num;
-int g_num_arr[3];
-int main(int argc, char *argv[]) {
-    // int num = 0;
-    if (argc == 3 && strcmp(argv[2], "--debug") == 0) {
-        log_set_level(NULL, LOG_LEVEL_ALL);
-    } else {
-        // FIXME it is a hack lexer_logger
-        log_set_level(&__scc_lexer_log, LOG_LEVEL_NOTSET);
-        log_set_level(NULL, LOG_LEVEL_INFO | LOG_LEVEL_WARN | LOG_LEVEL_ERROR |
-                                LOG_LEVEL_FATAL);
-    }
-
-    const char *file_name = __FILE__;
-    if (argc == 2) {
-        file_name = argv[1];
-    }
-    FILE *fp = fopen(file_name, "rb");
-    if (fp == NULL) {
-        perror("open file failed");
-        return 1;
-    }
-
-    if (fseek(fp, 0, SEEK_END) != 0) {
-        perror("fseek failed");
-        return 1;
-    }
-    usize fsize = ftell(fp);
-    LOG_INFO("file size: %zu", fsize);
-    if (fseek(fp, 0, SEEK_SET)) {
-        perror("fseek failed");
-        return 1;
-    }
-
-    char *buffer = (char *)malloc(fsize);
-
-    usize read_ret = fread(buffer, 1, fsize, fp);
-    fclose(fp);
-    if (read_ret != fsize) {
-        LOG_FATAL("fread failed read_ret %u != fsize %u", read_ret, fsize);
-        free(buffer);
-        return 1;
-    }
-
-    scc_lexer_t lexer;
-    scc_mem_probe_stream_t mem_stream = {0};
-    scc_probe_stream_t *stream =
-        scc_mem_probe_stream_init(&mem_stream, buffer, fsize, false);
-    Assert(stream != null);
-    scc_cstring_clear(&stream->name);
-    scc_cstring_append_cstr(&stream->name, file_name, strlen(file_name));
-    scc_lexer_init(&lexer, stream);
-    scc_lexer_tok_t tok;
-
-    while (1) {
-        scc_lexer_get_valid_token(&lexer, &tok);
-        if (tok.type == SCC_TOK_EOF) {
-            break;
-        }
-        LOG_DEBUG("token `%s` at %s:%u:%u", scc_get_tok_name(tok.type),
-                  scc_cstring_as_cstr(&tok.loc.name), tok.loc.line,
-                  tok.loc.col);
-        Assert(tok.loc.offset <= fsize);
-        // LOG_DEBUG("%s", tok.val.str);
-        // printf("line: %d, column: %d, type: %3d, typename: %s\n",
-        //     lexer.line, lexer.index, tok.type, scc_get_tok_name(tok.type));
-    }
-
-    free(buffer);
-    LOG_INFO("Lexer is Ok...");
-    return 0;
-}