refactor(lex_parser): 重命名libcore为scc_core并重构头文件包含

- 将依赖项从libcore重命名为scc_core - 更新头文件包含路径从<libcore.h>到<scc_core.h> - 保持原有功能不变 refactor(lexer): 重命名libcore为scc_core并添加词法流式解析功能 - 将依赖项从libcore重命名为scc_core - 移除不再需要的scc_lexer_token结构体定义 - 重命名struct cc_lexer为struct scc_lexer - 添加scc_lexer_stream_t流式解析器相关定义和实现 - 新增lexer_stream.c文件实现流式token缓冲功能 refactor(lexer_log): 重命名logger变量和头文件定义 - 将头文件保护宏从__SMCC_LEXER_LOG_H__改为__SCC_LEXER_LOG_H__ - 将logger变量从__smcc_lexer_log改为__scc_lexer_log - 更新头文件包含从<libcore.h>到<scc_core.h> refactor(lexer_token): 重新组织token头文件结构 - 将头文件保护宏从__SMCC_CC_TOKEN_H__改为__SCC_LEXER_TOKEN_H__ - 更新头文件包含从<libcore.h>到<scc_core.h> - 将scc_lexer_token结构体定义移至该文件 refactor(lexer): 简化token匹配代码格式 - 移除LCC相关的注释内容 - 优化括号符号的token匹配代码格式，使用clang-format控制 refactor(pprocessor): 更新依赖项名称和头文件包含 - 将libcore重命名为scc_core - 将libutils重命名为scc_utils - 更新头文件包含路径 refactor(runtime): 重命名libcore为scc_core并重构目录结构 - 将libcore目录重命名为scc_core - 将libutils目录重命名为scc_utils - 更新所有相关的头文件包含路径 - 修改cbuild.toml中的包名称 - 更新core_vec.h中的宏定义以支持标准库模式
2026-01-08 11:22:27 +08:00
parent 09f4ac8de0
commit b753ae0911
40 changed files with 345 additions and 150 deletions
--- a/libs/lexer/src/lexer.c
+++ b/libs/lexer/src/lexer.c
@@ -1,31 +1,3 @@
-/**
- * 仿照LCCompiler的词法分析部分
- *
- * 如下为LCC的README in 2025.2
-This hierarchy is the distribution for lcc version 4.2.
-
-lcc version 3.x is described in the book "A Retargetable C Compiler:
-Design and Implementation" (Addison-Wesley, 1995, ISBN 0-8053-1670-1).
-There are significant differences between 3.x and 4.x, most notably in
-the intermediate code. For details, see
-https://drh.github.io/lcc/documents/interface4.pdf.
-
-VERSION 4.2 IS INCOMPATIBLE WITH EARLIER VERSIONS OF LCC. DO NOT
-UNLOAD THIS DISTRIBUTION ON TOP OF A 3.X DISTRIBUTION.
-
-LCC is a C89 ("ANSI C") compiler designed to be highly retargetable.
-
-LOG describes the changes since the last release.
-
-CPYRIGHT describes the conditions under you can use, copy, modify, and
-distribute lcc or works derived from lcc.
-
-doc/install.html is an HTML file that gives a complete description of
-the distribution and installation instructions.
-
-Chris Fraser / cwf@aya.yale.edu
-David Hanson / drh@drhanson.net
- */
 #include <lex_parser.h>
 #include <lexer.h>
 #include <lexer_log.h>
@@ -329,33 +301,17 @@ void scc_lexer_get_token(scc_lexer_t *lexer, scc_lexer_tok_t *token) {
            break;
        }
        break;
-    case '[':
-        type = SCC_TOK_L_BRACKET;
-        break;
-    case ']':
-        type = SCC_TOK_R_BRACKET;
-        break;
-    case '(':
-        type = SCC_TOK_L_PAREN;
-        break;
-    case ')':
-        type = SCC_TOK_R_PAREN;
-        break;
-    case '{':
-        type = SCC_TOK_L_BRACE;
-        break;
-    case '}':
-        type = SCC_TOK_R_BRACE;
-        break;
-    case ';':
-        type = SCC_TOK_SEMICOLON;
-        break;
-    case ',':
-        type = SCC_TOK_COMMA;
-        break;
-    case ':':
-        type = SCC_TOK_COLON;
-        break;
+        /* clang-format off */
+    case '[': type = SCC_TOK_L_BRACKET; break;
+    case ']': type = SCC_TOK_R_BRACKET; break;
+    case '(': type = SCC_TOK_L_PAREN; break;
+    case ')': type = SCC_TOK_R_PAREN; break;
+    case '{': type = SCC_TOK_L_BRACE; break;
+    case '}': type = SCC_TOK_R_BRACE; break;
+    case ';': type = SCC_TOK_SEMICOLON; break;
+    case ',': type = SCC_TOK_COMMA; break;
+    case ':': type = SCC_TOK_COLON; break;
+        /* clang-format on */
    case '.':
        if (scc_probe_stream_next(stream) == '.' &&
            scc_probe_stream_next(stream) == '.') {
--- a/libs/lexer/src/lexer_log.c
+++ b/libs/lexer/src/lexer_log.c
@@ -1,6 +1,6 @@
 #include <lexer_log.h>

-logger_t __smcc_lexer_log = {
+logger_t __scc_lexer_log = {
    .name = "lexer",
    .level = LOG_LEVEL_ALL,
    .handler = log_default_handler,
--- a/libs/lexer/src/lexer_stream.c
+++ b/libs/lexer/src/lexer_stream.c
@@ -0,0 +1,138 @@
+#include <lexer.h>
+
+static void lexer_stream_extend(scc_lexer_stream_t *stream, usize n) {
+    Assert(stream != null);
+    // 检查是否需要扩容
+    if ((stream->probe_pos - stream->curr_pos + n) >= stream->toks.cap) {
+        // 需要扩容 - 创建新缓冲区
+        usize new_cap = stream->toks.cap * 2;
+        if (new_cap < stream->probe_pos - stream->curr_pos + n + 1) {
+            new_cap = stream->probe_pos - stream->curr_pos + n + 1;
+        }
+
+        scc_lexer_tok_t *new_data =
+            scc_realloc(null, new_cap * sizeof(scc_lexer_tok_t));
+        if (!new_data) {
+            LOG_FATAL("lexer_stream_extend: realloc failed\n");
+        }
+
+        // 将旧缓冲区中的数据拷贝到新缓冲区，保持顺序
+        usize data_count = stream->probe_pos - stream->curr_pos;
+        for (usize i = 0; i < data_count; ++i) {
+            usize old_idx = (stream->curr_pos + i) % stream->toks.cap;
+            new_data[i] = stream->toks.data[old_idx];
+        }
+
+        // 释放旧缓冲区
+        if (stream->toks.data) {
+            scc_free(stream->toks.data);
+        }
+
+        // 更新结构体
+        stream->toks.data = new_data;
+        stream->toks.cap = new_cap;
+        stream->curr_pos = 0;
+        stream->probe_pos = data_count;
+    }
+
+    // 填充新token
+    for (usize i = 0; i < n; ++i) {
+        usize idx = (stream->probe_pos + i) % stream->toks.cap;
+        if (stream->need_comment)
+            scc_lexer_get_token(stream->lexer, &stream->toks.data[idx]);
+        else
+            scc_lexer_get_valid_token(stream->lexer, &stream->toks.data[idx]);
+    }
+
+    stream->probe_pos += n;
+}
+
+static const scc_lexer_tok_t *lexer_stream_peek(scc_lexer_stream_t *stream,
+                                                usize n) {
+    Assert(stream != null);
+
+    // 计算需要的前看token数量
+    usize available = stream->probe_pos - stream->curr_pos;
+    if (n >= available) {
+        // 需要扩展缓冲区
+        usize need = n - available + 1;
+        lexer_stream_extend(stream, need);
+    }
+
+    // 计算实际缓冲区中的位置
+    usize idx = (stream->curr_pos + n) % stream->toks.cap;
+    return &stream->toks.data[idx];
+}
+
+static void lexer_stream_advance(scc_lexer_stream_t *stream, usize offset) {
+    Assert(stream != null);
+
+    if (stream->curr_pos + offset > stream->probe_pos) {
+        // 尝试填充更多token
+        usize need = stream->curr_pos + offset - stream->probe_pos;
+        lexer_stream_extend(stream, need);
+    }
+
+    stream->curr_pos += offset;
+
+    // 可选：当已消费的token过多时，压缩缓冲区
+    if (stream->curr_pos > stream->toks.cap * 3 / 4) {
+        // 压缩缓冲区：将有效数据移动到前面
+        usize data_count = stream->probe_pos - stream->curr_pos;
+        scc_lexer_tok_t *temp =
+            scc_realloc(null, data_count * sizeof(scc_lexer_tok_t));
+        if (!temp)
+            return; // 压缩失败也没关系
+
+        for (usize i = 0; i < data_count; ++i) {
+            usize old_idx = (stream->curr_pos + i) % stream->toks.cap;
+            temp[i] = stream->toks.data[old_idx];
+        }
+
+        scc_free(stream->toks.data);
+        stream->toks.data = temp;
+        stream->toks.cap = data_count;
+        stream->curr_pos = 0;
+        stream->probe_pos = data_count;
+    }
+}
+
+static void lexer_stream_drop(scc_lexer_stream_t *stream) {
+    Assert(stream != null);
+
+    // 清理所有token（如果有需要清理的内部资源）
+    for (usize i = 0; i < stream->toks.cap; ++i) {
+        // 这里假设scc_lexer_tok_t可能包含需要释放的资源
+        // 如果有，需要调用相应的清理函数
+        // 例如: if (stream->toks.data[i].needs_free)
+        // scc_free(stream->toks.data[i].ptr);
+    }
+
+    scc_vec_free(stream->toks);
+    stream->lexer = null;
+    stream->curr_pos = 0;
+    stream->probe_pos = 0;
+    stream->need_comment = false;
+
+    stream->peek = null;
+    stream->advance = null;
+    stream->drop = null;
+}
+
+void scc_lexer_to_stream(scc_lexer_t *lexer, scc_lexer_stream_t *stream,
+                         cbool need_comment) {
+    Assert(lexer != null && stream != null);
+
+    stream->lexer = lexer;
+    stream->curr_pos = 0;
+    stream->probe_pos = 0;
+    stream->need_comment = need_comment;
+
+    // 初始化循环缓冲区
+    scc_vec_init(stream->toks);
+    scc_vec_realloc(stream->toks, 8); // 初始容量为8
+
+    stream->peek = lexer_stream_peek;
+    stream->advance = lexer_stream_advance;
+    stream->drop = lexer_stream_drop;
+}