feat(core): 重构词法分析器流接口并迁移至 core 库

将 lexer_stream 抽象为 core_stream，统一运行时核心组件的输入流模型。移除了旧的 `lexer_stream.h` 定义，并将其功能完整迁移至 `core_stream.h` 中。更新了内存流实现以适配新的 core_stream 接口，并修复部分资源释放问题。同时调整日志模块包含方式，增强模块间解耦能力。此变更影响词法分析器对输入流的操作方式，所有涉及 stream 的类型与函数均已替换为 core 前缀版本。测试用例同步更新并验证通过。
2025-11-20 14:17:03 +08:00
parent 5c24f35c87
commit 47b56d52f6
13 changed files with 161 additions and 99 deletions
--- a/libs/lexer/include/lexer.h
+++ b/libs/lexer/include/lexer.h
@@ -7,7 +7,6 @@
 #define __SMCC_CC_LEXER_H__

 #include <libcore.h>
-#include "lexer_stream.h"
 #include "lexer_token.h"

 typedef struct lexer_loc {
@@ -30,7 +29,7 @@ typedef struct lexer_token {
 * 封装词法分析所需的状态信息和缓冲区管理
 */
 typedef struct cc_lexer {
-    lexer_stream_t* stream;
+    core_stream_t* stream;
    lexer_loc_t pos;
 } smcc_lexer_t;

@@ -39,7 +38,7 @@ typedef struct cc_lexer {
 * @param[out] lexer 要初始化的词法分析器实例
 * @param[in] stream 输入流对象指针
 */
-void lexer_init(smcc_lexer_t* lexer, lexer_stream_t* stream);
+void lexer_init(smcc_lexer_t* lexer, core_stream_t* stream);

 /**
 * @brief 获取原始token
--- a/libs/lexer/include/lexer_stream.h
+++ b/libs/lexer/include/lexer_stream.h
@@ -1,37 +0,0 @@
-#include <core_type.h>
-
-typedef struct lexer_stream lexer_stream_t;
-
-#define lexer_stream_eof (-1)
-
-struct lexer_stream {
-    const char* name;
-    usize name_len;
-
-    /// @brief 读取指定数量的字符到缓冲区
-    usize (*read_buf)(lexer_stream_t* stream, char* buffer, usize count);
-
-    /// @brief 获取下一个字符
-    int (*peek_char)(lexer_stream_t* stream);
-
-    /// @brief 重置字符流位置
-    void (*reset_char) (lexer_stream_t* stream);
-
-    /// @brief 读取并消费下一个字符（移动流位置）
-    int (*next_char)(lexer_stream_t* stream);
-
-    /// @brief 释放资源
-    void (*free_stream) (lexer_stream_t* steam);
-};
-
-#ifndef __SMCC_LEXER_NO_MEM_STREAM__
-typedef struct lexer_mem_stream {
-    lexer_stream_t stream;
-    const char* data;
-    usize data_length;
-    usize curr_pos;
-    usize peek_pos;
-    cbool owned;
-} lexer_mem_stream_t;
-lexer_stream_t* lexer_mem_stream_init(lexer_mem_stream_t* stream, const char* data, usize length, cbool need_copy);
-#endif
--- a/libs/lexer/src/lexer.c
+++ b/libs/lexer/src/lexer.c
@@ -72,11 +72,11 @@ static inline int keyword_cmp(const char* name, int len) {
    return -1; // Not a keyword.
 }

-void lexer_init(smcc_lexer_t* lexer, lexer_stream_t* stream) {
+void lexer_init(smcc_lexer_t* lexer, core_stream_t* stream) {
    lexer->stream = stream;
    lexer->pos = (lexer_loc_t) {
-        .name = stream->name,
-        .name_len = stream->name_len,
+        .name = cstring_as_cstr(&stream->name),
+        .name_len = cstring_len(&stream->name),
        .line = 1,
        .column = 1,
        .offset = 0,
@@ -91,14 +91,14 @@ void lexer_init(smcc_lexer_t* lexer, lexer_stream_t* stream) {
 #define set_err_token(token)        ((token)->type = TOKEN_UNKNOWN)

 static void skip_newline(smcc_lexer_t* lexer, lexer_tok_t* token) {
-    lexer_stream_t* stream = lexer->stream;
+    core_stream_t* stream = lexer->stream;
    token->type = TOKEN_LINE_COMMENT;

    // 循环直到遇到换行符或文件结束
    while (1) {
        int ch = stream_next_char(stream);
        
-        if (ch == lexer_stream_eof) {
+        if (ch == core_stream_eof) {
            // 到达文件末尾，直接返回
            return;
        }
@@ -114,7 +114,7 @@ static void skip_newline(smcc_lexer_t* lexer, lexer_tok_t* token) {
 }

 static void skip_block_comment(smcc_lexer_t* lexer, lexer_tok_t* token) {
-    lexer_stream_t* stream = lexer->stream;
+    core_stream_t* stream = lexer->stream;
    token->type = TOKEN_BLOCK_COMMENT;
    int ch;
    
@@ -131,7 +131,7 @@ static void skip_block_comment(smcc_lexer_t* lexer, lexer_tok_t* token) {
        ch = stream_next_char(stream);
        lexer_next_pos(lexer);

-        if (ch == lexer_stream_eof) {
+        if (ch == core_stream_eof) {
            // 未闭合的块注释
            LEX_WARN("Unterminated block comment");
            return;
@@ -183,11 +183,11 @@ static inline int got_slash(int peek) {
 static void parse_char(smcc_lexer_t* lexer, lexer_tok_t* token) {
    token->loc = lexer->pos;
    token->type = TOKEN_CHAR_LITERAL;
-    lexer_stream_t *stream = lexer->stream;
+    core_stream_t *stream = lexer->stream;
    stream_reset_char(stream);
    int ch = stream_peek_char(stream);

-    if (ch == lexer_stream_eof) {
+    if (ch == core_stream_eof) {
        LEX_WARN("Unexpected EOF at begin");
        goto ERR;
    } else if (ch != '\'') {
@@ -200,7 +200,7 @@ static void parse_char(smcc_lexer_t* lexer, lexer_tok_t* token) {
    ch = stream_next_char(stream);
    lexer_next_pos(lexer);

-    if (ch == lexer_stream_eof) {
+    if (ch == core_stream_eof) {
        LEX_WARN("Unexpected EOF at middle");
        goto ERR;
    } else if (ch == '\\') {
@@ -229,11 +229,11 @@ ERR:
 static void parse_string(smcc_lexer_t* lexer, lexer_tok_t* token) {
    token->loc = lexer->pos;
    token->type = TOKEN_STRING_LITERAL;
-    lexer_stream_t *stream = lexer->stream;
+    core_stream_t *stream = lexer->stream;
    stream_reset_char(stream);
    int ch = stream_peek_char(stream);

-    if (ch == lexer_stream_eof) {
+    if (ch == core_stream_eof) {
        LEX_WARN("Unexpected EOF at begin");
        goto ERR;
    } else if (ch != '"') {
@@ -248,7 +248,7 @@ static void parse_string(smcc_lexer_t* lexer, lexer_tok_t* token) {
    while (1) {
        ch = stream_peek_char(stream);
        
-        if (ch == lexer_stream_eof) {
+        if (ch == core_stream_eof) {
            LEX_ERROR("Unexpected EOF at string literal");
            break;
        } else if (ch == '\n') {
@@ -285,11 +285,11 @@ ERR:

 static void parse_number(smcc_lexer_t* lexer, lexer_tok_t* token) {
    token->loc = lexer->pos;
-    lexer_stream_t *stream = lexer->stream;
+    core_stream_t *stream = lexer->stream;
    stream_reset_char(stream);
    int ch = stream_peek_char(stream);
    int base = 0;
-    if (ch == lexer_stream_eof) {
+    if (ch == core_stream_eof) {
        LEX_WARN("Unexpected EOF at begin");
        goto ERR;
    } else if (ch == '0') {
@@ -325,7 +325,7 @@ static void parse_number(smcc_lexer_t* lexer, lexer_tok_t* token) {
    while (1) {
        ch = stream_peek_char(stream);

-        if (ch == lexer_stream_eof) {
+        if (ch == core_stream_eof) {
            break;
        } else if (ch >= 'a' && ch <= 'z') {
            tmp = ch - 'a' + 10;
@@ -356,11 +356,11 @@ ERR:

 static void parse_line(smcc_lexer_t* lexer, lexer_tok_t* token) {
    token->loc = lexer->pos;
-    lexer_stream_t *stream = lexer->stream;
+    core_stream_t *stream = lexer->stream;
    stream_reset_char(stream);
    int ch = stream_peek_char(stream);

-    if (ch == lexer_stream_eof) {
+    if (ch == core_stream_eof) {
        LEX_WARN("Unexpected EOF at begin");
        goto ERR;
    } else if (ch != '#') {
@@ -418,7 +418,7 @@ ERR:
 void lexer_get_token(smcc_lexer_t* lexer, lexer_tok_t* token) {
    token->loc = lexer->pos;
    token->type = TOKEN_UNKNOWN;
-    lexer_stream_t *stream = lexer->stream;
+    core_stream_t *stream = lexer->stream;

    stream_reset_char(stream);
    token_type_t type = TOKEN_UNKNOWN;
@@ -556,7 +556,7 @@ void lexer_get_token(smcc_lexer_t* lexer, lexer_tok_t* token) {
        token->type = TOKEN_BLANK;
        goto END;
    case '\0':
-    case lexer_stream_eof:
+    case core_stream_eof:
        // EOF
        type = TOKEN_EOF;
        break;
--- a/libs/lexer/tests/test_run.c
+++ b/libs/lexer/tests/test_run.c
@@ -59,11 +59,11 @@ int main(int argc, char* argv[]) {
    }

    smcc_lexer_t lexer;
-    lexer_mem_stream_t mem_stream = {0};
-    lexer_stream_t* stream = lexer_mem_stream_init(&mem_stream, buffer, fsize, false);
+    core_mem_stream_t mem_stream = {0};
+    core_stream_t* stream = core_mem_stream_init(&mem_stream, buffer, fsize, false);
    Assert(stream != null);
-    stream->name = __FILE__;
-    stream->name_len = strlen(__FILE__);
+    cstring_clear(&stream->name);
+    cstring_push_cstr(&stream->name, __FILE__, strlen(__FILE__));
    lexer_init(&lexer, stream);
    lexer_tok_t tok;

@@ -80,4 +80,6 @@ int main(int argc, char* argv[]) {
    }

    free(buffer);
+    LOG_INFO("Lexer is Ok...");
+    return 0;
 }
--- a/runtime/libcore/include/core_log.h
+++ b/runtime/libcore/include/core_log.h
@@ -0,0 +1,18 @@
+#ifndef __SMCC_CORE_LOG_H__
+#define __SMCC_CORE_LOG_H__
+
+#ifndef log_snprintf
+#define log_snprintf    smcc_snprintf
+#endif
+
+#ifndef log_printf
+#define log_printf      smcc_printf
+#endif
+
+#ifndef log_exit
+#define log_exit        smcc_exit
+#endif
+#include <log.h>
+
+
+#endif /* __SMCC_CORE_LOG_H__ */
--- a/runtime/libcore/include/core_str.h
+++ b/runtime/libcore/include/core_str.h
@@ -3,7 +3,7 @@

 #include "core_type.h"
 #include "core_impl.h"
-#include "log.h"
+#include "core_log.h"

 typedef struct cstring {
    char* data;
@@ -37,16 +37,16 @@ static inline cstring_t cstring_from_cstr(const char* s) {
    if (s == null) {
        return cstring_new();
    }
-    
+
    usize len = 0;
    const char* p = s;
    while (*p++) len++;
-    
+
    char* data = (char*)smcc_malloc(len + 1);
    Assert(data != null);
    smcc_memcpy(data, s, len);
    data[len] = '\0';
-    
+
    return (cstring_t) { .data = data, .len = len, .cap = len };
 }

@@ -54,7 +54,7 @@ static inline cstring_t cstring_from_cstr(const char* s) {
 * 释放字符串资源
 */
 static inline void cstring_free(cstring_t* str) {
-    if (str && str->data) {
+    if (str && str->data && str->cap != 0) {
        smcc_free(str->data);
        str->data = null;
        str->len = 0;
@@ -65,11 +65,11 @@ static inline void cstring_free(cstring_t* str) {
 /**
 * 向字符串追加内容
 */
-static inline void cstring_push_str(cstring_t* str, const char* data, usize len) {
+static inline void cstring_push_cstr(cstring_t* str, const char* data, usize len) {
    if (str == null || data == null || len == 0) {
        return;
    }
-    
+
    // 如果需要扩容
    if (str->len + len + 1 > str->cap) {
        // FIXME c string 兼容性问题 bad practice a lot of `+ 1`
@@ -81,7 +81,7 @@ static inline void cstring_push_str(cstring_t* str, const char* data, usize len)
                break;
            }
        }
-        
+
        char* new_data = str->data ? 
            (char*)smcc_realloc(str->data, new_cap) : 
            (char*)smcc_malloc(new_cap);
@@ -90,7 +90,7 @@ static inline void cstring_push_str(cstring_t* str, const char* data, usize len)
        str->data = new_data;
        str->cap = new_cap;
    }
-    
+
    smcc_memcpy(str->data + str->len, data, len);
    str->len += len;
    str->data[str->len] = '\0'; // 保证 C 字符串兼容性
@@ -100,7 +100,7 @@ static inline void cstring_push_str(cstring_t* str, const char* data, usize len)
 * 向字符串追加单个字符
 */
 static inline void cstring_push(cstring_t* str, char ch) {
-    cstring_push_str(str, &ch, 1);
+    cstring_push_cstr(str, &ch, 1);
 }

 /**
--- a/runtime/libcore/include/core_stream.h
+++ b/runtime/libcore/include/core_stream.h
@@ -0,0 +1,65 @@
+#ifndef __SMCC_CORE_STREAM_H__
+#define __SMCC_CORE_STREAM_H__
+
+#include "core_impl.h"
+#include "core_mem.h"
+#include "core_str.h"
+#include "core_macro.h"
+
+typedef struct core_stream core_stream_t;
+
+#define core_stream_eof (-1)
+
+struct core_stream {
+    cstring_t name;
+
+    /// @brief 读取指定数量的字符到缓冲区
+    usize (*read_buf)(core_stream_t* stream, char* buffer, usize count);
+
+    /// @brief 获取下一个字符
+    int (*peek_char)(core_stream_t* stream);
+
+    /// @brief 重置字符流位置
+    void (*reset_char) (core_stream_t* stream);
+
+    /// @brief 读取并消费下一个字符（移动流位置）
+    int (*next_char)(core_stream_t* stream);
+
+    /// @brief 释放资源
+    void (*free_stream) (core_stream_t* steam);
+};
+
+static inline usize core_stream_read_buf(core_stream_t* self, char* buffer, usize count) {
+    return self->read_buf(self, buffer, count);
+}
+
+static inline int core_stream_peek_char(core_stream_t* self) {
+    return self->peek_char(self);
+}
+
+static inline void core_stream_reset_char(core_stream_t* self) {
+    self->reset_char(self);
+}
+
+static inline int core_stream_next_char(core_stream_t* self) {
+    return self->next_char(self);
+}
+
+static inline void core_stream_free_stream(core_stream_t* self) {
+    self->free_stream(self);
+}
+
+#ifndef __SMCC_CORE_NO_MEM_STREAM__
+typedef struct core_mem_stream {
+    core_stream_t stream;
+    const char* data;
+    usize data_length;
+    usize curr_pos;
+    usize peek_pos;
+    cbool owned;
+} core_mem_stream_t;
+core_stream_t* core_mem_stream_init(core_mem_stream_t* stream, const char* data, usize length, cbool need_copy);
+#endif
+
+
+#endif /* __SMCC_CORE_STREAM_H__ */
--- a/runtime/libcore/include/libcore.h
+++ b/runtime/libcore/include/libcore.h
@@ -16,5 +16,6 @@

 #define SMCC_ARRLEN(arr) (sizeof(arr) / sizeof(arr[0]))
 #include <core_str.h>
+#include <core_stream.h>

 #endif // __SMCC_CORE_H__
--- a/runtime/libcore/src/cfg.std_impl.c
+++ b/runtime/libcore/src/cfg.std_impl.c
@@ -1,13 +1,19 @@
 #ifdef _MSC_VER
 #define _CRT_SECURE_NO_WARNINGS
 #endif
+
 #include <core_impl.h>
+#define __SMCC_LOG_IMPORT_SRC__
+#define log_snprintf    smcc_snprintf
+#define log_printf      smcc_printf
+#define log_exit        smcc_exit
+#include <log.h>
+
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdarg.h>
 #include <string.h>

-
 /* ====== 内存管理核心接口实现 ====== */

 void* smcc_malloc(usize size) {
--- a/runtime/libcore/src/core_mem.c
+++ b/runtime/libcore/src/core_mem.c
--- a/runtime/libcore/src/stream.c
+++ b/runtime/libcore/src/stream.c
@@ -1,12 +1,10 @@
-#include <lexer_stream.h>
-#include <lexer_log.h>
-#include <libcore.h>
+#include <core_log.h>
+#include <core_stream.h>

 // 内存流的具体实现结构
-
-static usize read_buf(lexer_stream_t* _stream, char* buffer, usize count) {
+static usize read_buf(core_stream_t* _stream, char* buffer, usize count) {
    Assert(buffer != null && buffer != null);
-    lexer_mem_stream_t* stream = (lexer_mem_stream_t*)_stream;
+    core_mem_stream_t* stream = (core_mem_stream_t*)_stream;

    usize remaining = stream->data_length - stream->curr_pos;
    usize to_read = (remaining < count) ? remaining : count;
@@ -15,31 +13,31 @@ static usize read_buf(lexer_stream_t* _stream, char* buffer, usize count) {
        smcc_memcpy(buffer, stream->data + stream->curr_pos, to_read);
        stream->curr_pos += to_read;
    } else {
-        LEX_WARN("Reading past end of stream [maybe count is too large or negative?]");
+        LOG_WARN("Reading past end of stream [maybe count is too large or negative?]");
    }

    return to_read;
 }

-static int peek_char(lexer_stream_t* _stream) {
+static int peek_char(core_stream_t* _stream) {
    Assert(_stream != null);
-    lexer_mem_stream_t* stream = (lexer_mem_stream_t*)_stream;
+    core_mem_stream_t* stream = (core_mem_stream_t*)_stream;

    // 如果已经到达末尾，返回EOF
    if (stream->peek_pos >= stream->data_length) {
-        return lexer_stream_eof; // EOF
+        return core_stream_eof; // EOF
    }

    return (int)(unsigned char)stream->data[stream->peek_pos++];
 }

-static int next_char(lexer_stream_t* _stream) {
+static int next_char(core_stream_t* _stream) {
    Assert(_stream != NULL);
-    lexer_mem_stream_t* stream = (lexer_mem_stream_t*)_stream;
+    core_mem_stream_t* stream = (core_mem_stream_t*)_stream;

    // 如果已经到达末尾，返回EOF
    if (stream->curr_pos >= stream->data_length) {
-        return lexer_stream_eof; // EOF
+        return core_stream_eof; // EOF
    }
    
    unsigned char ch = stream->data[stream->curr_pos++];
@@ -49,24 +47,28 @@ static int next_char(lexer_stream_t* _stream) {
    return (int)ch;
 }

-static void reset_char(lexer_stream_t* _stream) {
+static void reset_char(core_stream_t* _stream) {
    Assert(_stream != NULL);
-    lexer_mem_stream_t* stream = (lexer_mem_stream_t*)_stream;
+    core_mem_stream_t* stream = (core_mem_stream_t*)_stream;

    stream->peek_pos = stream->curr_pos;
 }

-static void free_stream(lexer_stream_t* _stream) {
+static void free_stream(core_stream_t* _stream) {
    Assert(_stream != null);
-    lexer_mem_stream_t* stream = (lexer_mem_stream_t*)_stream;
+    core_mem_stream_t* stream = (core_mem_stream_t*)_stream;
+
+    // FIXME maybe double free?
+    cstring_free(&stream->stream.name);
+
    if (stream->owned) {
        smcc_free((void*)stream->data);
    }
 }

-lexer_stream_t* lexer_mem_stream_init(lexer_mem_stream_t* stream, const char* data, usize length, cbool need_copy) {
+core_stream_t* core_mem_stream_init(core_mem_stream_t* stream, const char* data, usize length, cbool need_copy) {
    if (stream == null || data == NULL || length == 0) {
-        LEX_ERROR("param error");
+        LOG_ERROR("param error");
        return null;
    }

@@ -74,7 +76,7 @@ lexer_stream_t* lexer_mem_stream_init(lexer_mem_stream_t* stream, const char* da
    if (need_copy) {
        char* buf = (char*)smcc_malloc(length);
        if (buf == null) {
-            LEX_ERROR("malloc error");
+            LOG_ERROR("malloc error");
            return null;
        }

@@ -87,9 +89,7 @@ lexer_stream_t* lexer_mem_stream_init(lexer_mem_stream_t* stream, const char* da
    stream->curr_pos = 0;
    stream->peek_pos = 0;

-    static const char name[] = "mem_stream";
-    stream->stream.name = name;
-    stream->stream.name_len = sizeof(name) - 1;
+    stream->stream.name = cstring_from_cstr("mem_stream");
    
    stream->stream.read_buf = read_buf;
    stream->stream.peek_char = peek_char;
--- a/runtime/log/include/log.c
+++ b/runtime/log/include/log.c
@@ -31,6 +31,10 @@ void log_default_handler(log_level_t level, const char* module, const char* file
    log_printf("[%s] %s:%d | %s: %s\n", 
            level_str, file, line, module, message);
 #endif
+    // for clangd warning
+    // clang-analyzer-deadcode.DeadStores
+    (void)color_code;
+    (void)level_str;
    if (level & LOG_LEVEL_FATAL) {
        log_exit(-LOG_LEVEL_FATAL);
    }
--- a/runtime/log/include/log.h
+++ b/runtime/log/include/log.h
@@ -8,7 +8,7 @@

 #include "color.h"

-#ifndef __SMCC_LOG_NO_STD_IMPL__
+#ifdef __SMCC_LOG_USE_STD_IMPL__
 #include <stdio.h>
 #include <stdlib.h>
 #define log_snprintf snprintf
@@ -192,4 +192,8 @@ void logger_destroy(logger_t* logger);
 #define FIXME(str) PanicFmt("FIXME " __LOG_STR(str)) ///< 提醒开发者修改代码（触发致命错误）
 /// @}

+#ifdef __SMCC_LOG_IMPORT_SRC__
+#include "log.c"
+#endif
+
 #endif // __SMCC_LOG_H__