From 681a15cb44dc537a30bfc524dd1263cec18ddb88 Mon Sep 17 00:00:00 2001
From: zzy <2450266535@qq.com>
Date: Mon, 16 Feb 2026 22:27:09 +0800
Subject: [PATCH] =?UTF-8?q?feat(lexer):=20=E6=B7=BB=E5=8A=A0=E9=A2=84?=
 =?UTF-8?q?=E5=A4=84=E7=90=86=E5=99=A8=E5=85=B3=E9=94=AE=E5=AD=97=E6=94=AF?=
 =?UTF-8?q?=E6=8C=81=E5=B9=B6=E4=BC=98=E5=8C=96=E8=AF=8D=E6=B3=95=E5=88=86?=
 =?UTF-8?q?=E6=9E=90=E5=99=A8?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

添加了完整的C预处理器关键字表，包括define、include、ifdef等关键字，
用于支持预处理器功能。

- 新增SCC_PPKEYWORD_TABLE宏定义所有预处理器关键字
- 在token类型枚举中包含预处理关键字
- 重构词法分析器以正确识别预处理关键字
- 添加scc_lexer_tok_drop函数用于清理token资源

refactor(lexer): 重构词法分析器内部结构

- 修复keywords数组字段名从tok到tok_type
- 优化scc_lexer_get_valid_token使用while循环替代do-while
- 修改fill_token和fill_valid_token返回类型为cbool
- 调整lexer_to_ring参数语义更清晰

fix(sstream): 修正环形缓冲区填充函数返回类型

- 将fill_func返回类型从int改为cbool以保持一致性
- 更新SCC_RING宏文档说明fill回调函数返回值含义

docs(argparse): 重命名examples目录修复路径错误

- 将libs/argparse/example重命名为libs/argparse/examples保持一致性

test(lexer): 更新测试用例适配新的流接口

- 修改测试代码中的scc_sstream_ref_ring为scc_sstream_to_ring
- 确保测试用例与新的API保持兼容

style(lexer): 更新示例程序日志级别和实现方式

- 将调试日志改为信息日志
- 使用环形缓冲区实现示例程序的token获取
---
 libs/argparse/{example => examples}/main.c |  0
 libs/lexer/include/lexer_token.h           | 35 ++++++++++++++++++++--
 libs/lexer/src/lexer.c                     | 32 +++++++++++++-------
 libs/lexer/src/main.c                      | 22 +++++++++-----
 libs/lexer/tests/test_lexer.c              |  4 +--
 libs/sstream/src/scc_sstream.c             |  2 +-
 runtime/scc_core/include/scc_core_ring.h   |  4 +--
 7 files changed, 73 insertions(+), 26 deletions(-)
 rename libs/argparse/{example => examples}/main.c (100%)

diff --git a/libs/argparse/example/main.c b/libs/argparse/examples/main.c
similarity index 100%
rename from libs/argparse/example/main.c
rename to libs/argparse/examples/main.c
diff --git a/libs/lexer/include/lexer_token.h b/libs/lexer/include/lexer_token.h
index 9ccfc3a..92cbd0d 100644
--- a/libs/lexer/include/lexer_token.h
+++ b/libs/lexer/include/lexer_token.h
@@ -10,6 +10,28 @@ typedef enum scc_cstd {
     SCC_CEXT_SCC,
 } scc_cstd_t;
 
+/* clang-format off */
+/// https://cppreference.cn/w/c/preprocessor
+#define SCC_PPKEYWORD_TABLE \
+    X(define    , SCC_CSTD_C99, SCC_PP_TOK_DEFINE     ) \
+    X(elif      , SCC_CSTD_C99, SCC_PP_TOK_ELIF       ) \
+    X(elifdef   , SCC_CSTD_C99, SCC_PP_TOK_ELIFDEF    ) \
+    X(elifndef  , SCC_CSTD_C99, SCC_PP_TOK_ELIFNDEF   ) \
+    X(else      , SCC_CSTD_C99, SCC_PP_TOK_ELSE       ) \
+    X(embed     , SCC_CSTD_C99, SCC_PP_TOK_EMBED      ) \
+    X(endif     , SCC_CSTD_C99, SCC_PP_TOK_ENDIF      ) \
+    X(error     , SCC_CSTD_C99, SCC_PP_TOK_ERROR      ) \
+    X(if        , SCC_CSTD_C99, SCC_PP_TOK_IF         ) \
+    X(ifdef     , SCC_CEXT_SCC, SCC_PP_TOK_IFDEF      ) \
+    X(ifndef    , SCC_CSTD_C99, SCC_PP_TOK_IFNDEF     ) \
+    X(include   , SCC_CSTD_C99, SCC_PP_TOK_INCLUDE    ) \
+    X(line      , SCC_CEXT_SCC, SCC_PP_TOK_LINE       ) \
+    X(pragma    , SCC_CSTD_C99, SCC_PP_TOK_PRAGMA     ) \
+    X(undef     , SCC_CEXT_SCC, SCC_PP_TOK_UNDEF      ) \
+    X(warning   , SCC_CSTD_C99, SCC_PP_TOK_WARNING    ) \
+    // END
+/* clang-format on */
+
 /* clang-format off */
 // WARNING: Using Binary Search To Fast Find Keyword
 // 你必须确保其中是按照字典序排列
@@ -118,14 +140,17 @@ typedef enum scc_cstd {
 
 // 定义TokenType枚举
 typedef enum scc_tok_type {
-// 处理普通token
+
 #define X(str, subtype, tok) tok,
     SCC_CTOK_TABLE
 #undef X
 
-// 处理关键字（保持原有格式）
+#define X(name, type, tok) tok,
+        SCC_PPKEYWORD_TABLE
+#undef X
+
 #define X(name, subtype, tok, std) tok,
-        SCC_CKEYWORD_TABLE
+            SCC_CKEYWORD_TABLE
 #undef X
 } scc_tok_type_t;
 
@@ -154,6 +179,10 @@ typedef struct scc_lexer_token {
     scc_pos_t loc;
 } scc_lexer_tok_t;
 
+static inline void scc_lexer_tok_drop(scc_lexer_tok_t *tok) {
+    scc_cstring_free(&tok->lexeme);
+}
+
 static inline cbool scc_lexer_tok_match(const scc_lexer_tok_t *tok,
                                         scc_tok_type_t type) {
     return tok->type == type;
diff --git a/libs/lexer/src/lexer.c b/libs/lexer/src/lexer.c
index 723389e..3212195 100644
--- a/libs/lexer/src/lexer.c
+++ b/libs/lexer/src/lexer.c
@@ -5,7 +5,7 @@
 static const struct {
     const char *name;
     scc_cstd_t std_type;
-    scc_tok_type_t tok;
+    scc_tok_type_t tok_type;
 } keywords[] = {
 #define X(name, subtype, tok, std_type, ...) {#name, std_type, tok},
     SCC_CKEYWORD_TABLE
@@ -168,7 +168,7 @@ void scc_lexer_get_token(scc_lexer_t *lexer, scc_lexer_tok_t *token) {
         // 检查是否为关键字
         int idx = keyword_cmp(scc_cstring_as_cstr(&lex), scc_cstring_len(&lex));
         if (idx != -1) {
-            token->type = keywords[idx].tok;
+            token->type = keywords[idx].tok_type;
         }
     } else if (is_digit(ch)) {
         // 数字字面量（整数/浮点）
@@ -461,33 +461,43 @@ void scc_lexer_get_token(scc_lexer_t *lexer, scc_lexer_tok_t *token) {
 // scc_lexer_get_token maybe got invalid (with parser)
 void scc_lexer_get_valid_token(scc_lexer_t *lexer, scc_lexer_tok_t *token) {
     scc_tok_subtype_t subtype;
-    do {
+    while (1) {
         scc_lexer_get_token(lexer, token);
         subtype = scc_get_tok_subtype(token->type);
         AssertFmt(subtype != SCC_TOK_SUBTYPE_INVALID,
                   "Invalid token: `%s` at %s:%d:%d",
                   scc_get_tok_name(token->type), token->loc.name,
                   token->loc.line, token->loc.col);
-    } while (subtype == SCC_TOK_SUBTYPE_EMPTYSPACE ||
-             subtype == SCC_TOK_SUBTYPE_COMMENT);
+        if (subtype == SCC_TOK_SUBTYPE_EMPTYSPACE ||
+            subtype == SCC_TOK_SUBTYPE_COMMENT) {
+            scc_lexer_tok_drop(token);
+        }
+        break;
+    };
 }
 
-static int fill_token(scc_lexer_tok_t *out, void *userdata) {
+static cbool fill_token(scc_lexer_tok_t *out, void *userdata) {
     scc_lexer_t *lexer = userdata;
     scc_lexer_get_token(lexer, out);
-    return 0;
+    if (out->type == SCC_TOK_EOF) {
+        return false;
+    }
+    return true;
 }
 
-static int fill_valid_token(scc_lexer_tok_t *out, void *userdata) {
+static cbool fill_valid_token(scc_lexer_tok_t *out, void *userdata) {
     scc_lexer_t *lexer = userdata;
     scc_lexer_get_valid_token(lexer, out);
-    return 0;
+    if (out->type == SCC_TOK_EOF) {
+        return false;
+    }
+    return true;
 }
 
 scc_lexer_tok_ring_t *scc_lexer_to_ring(scc_lexer_t *lexer, int ring_size,
-                                        cbool need_comment) {
+                                        cbool fill_all) {
     scc_ring_init(lexer->ring, ring_size,
-                  need_comment ? fill_token : fill_valid_token, lexer);
+                  fill_all ? fill_token : fill_valid_token, lexer);
     lexer->ring_ref_count++;
     return &lexer->ring;
 }
diff --git a/libs/lexer/src/main.c b/libs/lexer/src/main.c
index ed273de..60a51aa 100644
--- a/libs/lexer/src/main.c
+++ b/libs/lexer/src/main.c
@@ -40,19 +40,27 @@ int main(int argc, char *argv[]) {
     scc_lexer_t lexer;
     scc_sstream_t stream;
     scc_sstream_init(&stream, file_name, 16);
-    scc_sstream_ring_t *ref = scc_sstream_ref_ring(&stream);
+    scc_sstream_ring_t *ref = scc_sstream_to_ring(&stream);
     scc_lexer_init(&lexer, ref);
     scc_lexer_tok_t token;
 
+    scc_lexer_tok_ring_t *tok_ring = scc_lexer_to_ring(&lexer, 16, false);
+    int ok;
     while (1) {
-        scc_lexer_get_valid_token(&lexer, &token);
-        if (token.type == SCC_TOK_EOF) {
+        // scc_lexer_get_valid_token(&lexer, &token);
+        // if (token.type == SCC_TOK_EOF) {
+        //     break;
+        // }
+
+        scc_ring_next_consume(*tok_ring, token, ok);
+        if (!ok) {
             break;
         }
-        LOG_DEBUG("get token [%-8s] `%s` at %s:%d:%d",
-                  scc_get_tok_name(token.type),
-                  scc_cstring_as_cstr(&token.lexeme), token.loc.name,
-                  token.loc.line, token.loc.col);
+
+        LOG_INFO("get token [%-8s] `%s` at %s:%d:%d",
+                 scc_get_tok_name(token.type),
+                 scc_cstring_as_cstr(&token.lexeme), token.loc.name,
+                 token.loc.line, token.loc.col);
         scc_cstring_free(&token.lexeme);
     }
     scc_sstream_drop_ring(ref);
diff --git a/libs/lexer/tests/test_lexer.c b/libs/lexer/tests/test_lexer.c
index 2218576..b0ad861 100644
--- a/libs/lexer/tests/test_lexer.c
+++ b/libs/lexer/tests/test_lexer.c
@@ -13,7 +13,7 @@ static void free_token(scc_lexer_tok_t *tok) { scc_cstring_free(&tok->lexeme); }
         scc_lexer_tok_t token;                                                 \
         scc_sstream_t stream;                                                  \
         scc_sstream_init_by_buffer(&stream, input, strlen(input), 0, 16);      \
-        scc_sstream_ring_t *ref = scc_sstream_ref_ring(&stream);               \
+        scc_sstream_ring_t *ref = scc_sstream_to_ring(&stream);                \
         scc_lexer_init(&lexer, ref);                                           \
         scc_lexer_get_token(&lexer, &token);                                   \
                                                                                \
@@ -34,7 +34,7 @@ static void free_token(scc_lexer_tok_t *tok) { scc_cstring_free(&tok->lexeme); }
         scc_lexer_tok_t token;                                                 \
         scc_sstream_t stream;                                                  \
         scc_sstream_init_by_buffer(&stream, input, strlen(input), 0, 16);      \
-        scc_sstream_ring_t *ref = scc_sstream_ref_ring(&stream);               \
+        scc_sstream_ring_t *ref = scc_sstream_to_ring(&stream);                \
         scc_lexer_init(&lexer, ref);                                           \
                                                                                \
         scc_tok_type_t expected[] = {__VA_ARGS__};                             \
diff --git a/libs/sstream/src/scc_sstream.c b/libs/sstream/src/scc_sstream.c
index 630666e..130ca61 100644
--- a/libs/sstream/src/scc_sstream.c
+++ b/libs/sstream/src/scc_sstream.c
@@ -66,7 +66,7 @@ static int sstream_scan_at(scc_sstream_t *stream, scc_pos_t scan_pos,
 }
 
 // 环形缓冲区填充回调（通过 userdata 获取流对象）
-static int fill_func(scc_sstream_char_t *out, void *userdata) {
+static cbool fill_func(scc_sstream_char_t *out, void *userdata) {
     scc_sstream_t *stream = (scc_sstream_t *)userdata;
     if (stream->fill_pos.offset >= stream->len)
         return false; // 已到文件尾
diff --git a/runtime/scc_core/include/scc_core_ring.h b/runtime/scc_core/include/scc_core_ring.h
index 152dbf2..a2fd2c1 100644
--- a/runtime/scc_core/include/scc_core_ring.h
+++ b/runtime/scc_core/include/scc_core_ring.h
@@ -14,7 +14,7 @@
  *   - head: 已消费的逻辑索引
  *   - probe: 预览索引
  *   - tail: 已填充的逻辑末尾索引
- *   - fill: 填充回调函数 (当需要新元素时调用)
+ *   - fill: 填充回调函数 (当需要新元素时调用) 返回true表示成功
  */
 #define SCC_RING(type)                                                         \
     struct {                                                                   \
@@ -23,7 +23,7 @@
         usize head;                                                            \
         usize probe;                                                           \
         usize tail;                                                            \
-        int (*fill)(type * out, void *userdata);                               \
+        cbool (*fill)(type * out, void *userdata);                             \
         void *userdata;                                                        \
     }