feat(core): 重构词法分析器流接口并迁移至 core 库

将 lexer_stream 抽象为 core_stream,统一运行时核心组件的输入流模型。
移除了旧的 `lexer_stream.h` 定义,并将其功能完整迁移至 `core_stream.h` 中。
更新了内存流实现以适配新的 core_stream 接口,并修复部分资源释放问题。
同时调整日志模块包含方式,增强模块间解耦能力。

此变更影响词法分析器对输入流的操作方式,所有涉及 stream 的类型与函数均已替换为 core 前缀版本。
测试用例同步更新并验证通过。
This commit is contained in:
zzy
2025-11-20 14:17:03 +08:00
parent 5c24f35c87
commit 47b56d52f6
13 changed files with 161 additions and 99 deletions

View File

@@ -7,7 +7,6 @@
#define __SMCC_CC_LEXER_H__
#include <libcore.h>
#include "lexer_stream.h"
#include "lexer_token.h"
typedef struct lexer_loc {
@@ -30,7 +29,7 @@ typedef struct lexer_token {
* 封装词法分析所需的状态信息和缓冲区管理
*/
typedef struct cc_lexer {
lexer_stream_t* stream;
core_stream_t* stream;
lexer_loc_t pos;
} smcc_lexer_t;
@@ -39,7 +38,7 @@ typedef struct cc_lexer {
* @param[out] lexer 要初始化的词法分析器实例
* @param[in] stream 输入流对象指针
*/
void lexer_init(smcc_lexer_t* lexer, lexer_stream_t* stream);
void lexer_init(smcc_lexer_t* lexer, core_stream_t* stream);
/**
* @brief 获取原始token

View File

@@ -1,37 +0,0 @@
#include <core_type.h>
typedef struct lexer_stream lexer_stream_t;
#define lexer_stream_eof (-1)
struct lexer_stream {
const char* name;
usize name_len;
/// @brief 读取指定数量的字符到缓冲区
usize (*read_buf)(lexer_stream_t* stream, char* buffer, usize count);
/// @brief 获取下一个字符
int (*peek_char)(lexer_stream_t* stream);
/// @brief 重置字符流位置
void (*reset_char) (lexer_stream_t* stream);
/// @brief 读取并消费下一个字符(移动流位置)
int (*next_char)(lexer_stream_t* stream);
/// @brief 释放资源
void (*free_stream) (lexer_stream_t* steam);
};
#ifndef __SMCC_LEXER_NO_MEM_STREAM__
typedef struct lexer_mem_stream {
lexer_stream_t stream;
const char* data;
usize data_length;
usize curr_pos;
usize peek_pos;
cbool owned;
} lexer_mem_stream_t;
lexer_stream_t* lexer_mem_stream_init(lexer_mem_stream_t* stream, const char* data, usize length, cbool need_copy);
#endif

View File

@@ -72,11 +72,11 @@ static inline int keyword_cmp(const char* name, int len) {
return -1; // Not a keyword.
}
void lexer_init(smcc_lexer_t* lexer, lexer_stream_t* stream) {
void lexer_init(smcc_lexer_t* lexer, core_stream_t* stream) {
lexer->stream = stream;
lexer->pos = (lexer_loc_t) {
.name = stream->name,
.name_len = stream->name_len,
.name = cstring_as_cstr(&stream->name),
.name_len = cstring_len(&stream->name),
.line = 1,
.column = 1,
.offset = 0,
@@ -91,14 +91,14 @@ void lexer_init(smcc_lexer_t* lexer, lexer_stream_t* stream) {
#define set_err_token(token) ((token)->type = TOKEN_UNKNOWN)
static void skip_newline(smcc_lexer_t* lexer, lexer_tok_t* token) {
lexer_stream_t* stream = lexer->stream;
core_stream_t* stream = lexer->stream;
token->type = TOKEN_LINE_COMMENT;
// 循环直到遇到换行符或文件结束
while (1) {
int ch = stream_next_char(stream);
if (ch == lexer_stream_eof) {
if (ch == core_stream_eof) {
// 到达文件末尾,直接返回
return;
}
@@ -114,7 +114,7 @@ static void skip_newline(smcc_lexer_t* lexer, lexer_tok_t* token) {
}
static void skip_block_comment(smcc_lexer_t* lexer, lexer_tok_t* token) {
lexer_stream_t* stream = lexer->stream;
core_stream_t* stream = lexer->stream;
token->type = TOKEN_BLOCK_COMMENT;
int ch;
@@ -131,7 +131,7 @@ static void skip_block_comment(smcc_lexer_t* lexer, lexer_tok_t* token) {
ch = stream_next_char(stream);
lexer_next_pos(lexer);
if (ch == lexer_stream_eof) {
if (ch == core_stream_eof) {
// 未闭合的块注释
LEX_WARN("Unterminated block comment");
return;
@@ -183,11 +183,11 @@ static inline int got_slash(int peek) {
static void parse_char(smcc_lexer_t* lexer, lexer_tok_t* token) {
token->loc = lexer->pos;
token->type = TOKEN_CHAR_LITERAL;
lexer_stream_t *stream = lexer->stream;
core_stream_t *stream = lexer->stream;
stream_reset_char(stream);
int ch = stream_peek_char(stream);
if (ch == lexer_stream_eof) {
if (ch == core_stream_eof) {
LEX_WARN("Unexpected EOF at begin");
goto ERR;
} else if (ch != '\'') {
@@ -200,7 +200,7 @@ static void parse_char(smcc_lexer_t* lexer, lexer_tok_t* token) {
ch = stream_next_char(stream);
lexer_next_pos(lexer);
if (ch == lexer_stream_eof) {
if (ch == core_stream_eof) {
LEX_WARN("Unexpected EOF at middle");
goto ERR;
} else if (ch == '\\') {
@@ -229,11 +229,11 @@ ERR:
static void parse_string(smcc_lexer_t* lexer, lexer_tok_t* token) {
token->loc = lexer->pos;
token->type = TOKEN_STRING_LITERAL;
lexer_stream_t *stream = lexer->stream;
core_stream_t *stream = lexer->stream;
stream_reset_char(stream);
int ch = stream_peek_char(stream);
if (ch == lexer_stream_eof) {
if (ch == core_stream_eof) {
LEX_WARN("Unexpected EOF at begin");
goto ERR;
} else if (ch != '"') {
@@ -248,7 +248,7 @@ static void parse_string(smcc_lexer_t* lexer, lexer_tok_t* token) {
while (1) {
ch = stream_peek_char(stream);
if (ch == lexer_stream_eof) {
if (ch == core_stream_eof) {
LEX_ERROR("Unexpected EOF at string literal");
break;
} else if (ch == '\n') {
@@ -285,11 +285,11 @@ ERR:
static void parse_number(smcc_lexer_t* lexer, lexer_tok_t* token) {
token->loc = lexer->pos;
lexer_stream_t *stream = lexer->stream;
core_stream_t *stream = lexer->stream;
stream_reset_char(stream);
int ch = stream_peek_char(stream);
int base = 0;
if (ch == lexer_stream_eof) {
if (ch == core_stream_eof) {
LEX_WARN("Unexpected EOF at begin");
goto ERR;
} else if (ch == '0') {
@@ -325,7 +325,7 @@ static void parse_number(smcc_lexer_t* lexer, lexer_tok_t* token) {
while (1) {
ch = stream_peek_char(stream);
if (ch == lexer_stream_eof) {
if (ch == core_stream_eof) {
break;
} else if (ch >= 'a' && ch <= 'z') {
tmp = ch - 'a' + 10;
@@ -356,11 +356,11 @@ ERR:
static void parse_line(smcc_lexer_t* lexer, lexer_tok_t* token) {
token->loc = lexer->pos;
lexer_stream_t *stream = lexer->stream;
core_stream_t *stream = lexer->stream;
stream_reset_char(stream);
int ch = stream_peek_char(stream);
if (ch == lexer_stream_eof) {
if (ch == core_stream_eof) {
LEX_WARN("Unexpected EOF at begin");
goto ERR;
} else if (ch != '#') {
@@ -418,7 +418,7 @@ ERR:
void lexer_get_token(smcc_lexer_t* lexer, lexer_tok_t* token) {
token->loc = lexer->pos;
token->type = TOKEN_UNKNOWN;
lexer_stream_t *stream = lexer->stream;
core_stream_t *stream = lexer->stream;
stream_reset_char(stream);
token_type_t type = TOKEN_UNKNOWN;
@@ -556,7 +556,7 @@ void lexer_get_token(smcc_lexer_t* lexer, lexer_tok_t* token) {
token->type = TOKEN_BLANK;
goto END;
case '\0':
case lexer_stream_eof:
case core_stream_eof:
// EOF
type = TOKEN_EOF;
break;

View File

@@ -59,11 +59,11 @@ int main(int argc, char* argv[]) {
}
smcc_lexer_t lexer;
lexer_mem_stream_t mem_stream = {0};
lexer_stream_t* stream = lexer_mem_stream_init(&mem_stream, buffer, fsize, false);
core_mem_stream_t mem_stream = {0};
core_stream_t* stream = core_mem_stream_init(&mem_stream, buffer, fsize, false);
Assert(stream != null);
stream->name = __FILE__;
stream->name_len = strlen(__FILE__);
cstring_clear(&stream->name);
cstring_push_cstr(&stream->name, __FILE__, strlen(__FILE__));
lexer_init(&lexer, stream);
lexer_tok_t tok;
@@ -80,4 +80,6 @@ int main(int argc, char* argv[]) {
}
free(buffer);
LOG_INFO("Lexer is Ok...");
return 0;
}

View File

@@ -0,0 +1,18 @@
#ifndef __SMCC_CORE_LOG_H__
#define __SMCC_CORE_LOG_H__
#ifndef log_snprintf
#define log_snprintf smcc_snprintf
#endif
#ifndef log_printf
#define log_printf smcc_printf
#endif
#ifndef log_exit
#define log_exit smcc_exit
#endif
#include <log.h>
#endif /* __SMCC_CORE_LOG_H__ */

View File

@@ -3,7 +3,7 @@
#include "core_type.h"
#include "core_impl.h"
#include "log.h"
#include "core_log.h"
typedef struct cstring {
char* data;
@@ -37,16 +37,16 @@ static inline cstring_t cstring_from_cstr(const char* s) {
if (s == null) {
return cstring_new();
}
usize len = 0;
const char* p = s;
while (*p++) len++;
char* data = (char*)smcc_malloc(len + 1);
Assert(data != null);
smcc_memcpy(data, s, len);
data[len] = '\0';
return (cstring_t) { .data = data, .len = len, .cap = len };
}
@@ -54,7 +54,7 @@ static inline cstring_t cstring_from_cstr(const char* s) {
* 释放字符串资源
*/
static inline void cstring_free(cstring_t* str) {
if (str && str->data) {
if (str && str->data && str->cap != 0) {
smcc_free(str->data);
str->data = null;
str->len = 0;
@@ -65,11 +65,11 @@ static inline void cstring_free(cstring_t* str) {
/**
* 向字符串追加内容
*/
static inline void cstring_push_str(cstring_t* str, const char* data, usize len) {
static inline void cstring_push_cstr(cstring_t* str, const char* data, usize len) {
if (str == null || data == null || len == 0) {
return;
}
// 如果需要扩容
if (str->len + len + 1 > str->cap) {
// FIXME c string 兼容性问题 bad practice a lot of `+ 1`
@@ -81,7 +81,7 @@ static inline void cstring_push_str(cstring_t* str, const char* data, usize len)
break;
}
}
char* new_data = str->data ?
(char*)smcc_realloc(str->data, new_cap) :
(char*)smcc_malloc(new_cap);
@@ -90,7 +90,7 @@ static inline void cstring_push_str(cstring_t* str, const char* data, usize len)
str->data = new_data;
str->cap = new_cap;
}
smcc_memcpy(str->data + str->len, data, len);
str->len += len;
str->data[str->len] = '\0'; // 保证 C 字符串兼容性
@@ -100,7 +100,7 @@ static inline void cstring_push_str(cstring_t* str, const char* data, usize len)
* 向字符串追加单个字符
*/
static inline void cstring_push(cstring_t* str, char ch) {
cstring_push_str(str, &ch, 1);
cstring_push_cstr(str, &ch, 1);
}
/**

View File

@@ -0,0 +1,65 @@
#ifndef __SMCC_CORE_STREAM_H__
#define __SMCC_CORE_STREAM_H__
#include "core_impl.h"
#include "core_mem.h"
#include "core_str.h"
#include "core_macro.h"
typedef struct core_stream core_stream_t;
#define core_stream_eof (-1)
struct core_stream {
cstring_t name;
/// @brief 读取指定数量的字符到缓冲区
usize (*read_buf)(core_stream_t* stream, char* buffer, usize count);
/// @brief 获取下一个字符
int (*peek_char)(core_stream_t* stream);
/// @brief 重置字符流位置
void (*reset_char) (core_stream_t* stream);
/// @brief 读取并消费下一个字符(移动流位置)
int (*next_char)(core_stream_t* stream);
/// @brief 释放资源
void (*free_stream) (core_stream_t* steam);
};
static inline usize core_stream_read_buf(core_stream_t* self, char* buffer, usize count) {
return self->read_buf(self, buffer, count);
}
static inline int core_stream_peek_char(core_stream_t* self) {
return self->peek_char(self);
}
static inline void core_stream_reset_char(core_stream_t* self) {
self->reset_char(self);
}
static inline int core_stream_next_char(core_stream_t* self) {
return self->next_char(self);
}
static inline void core_stream_free_stream(core_stream_t* self) {
self->free_stream(self);
}
#ifndef __SMCC_CORE_NO_MEM_STREAM__
typedef struct core_mem_stream {
core_stream_t stream;
const char* data;
usize data_length;
usize curr_pos;
usize peek_pos;
cbool owned;
} core_mem_stream_t;
core_stream_t* core_mem_stream_init(core_mem_stream_t* stream, const char* data, usize length, cbool need_copy);
#endif
#endif /* __SMCC_CORE_STREAM_H__ */

View File

@@ -16,5 +16,6 @@
#define SMCC_ARRLEN(arr) (sizeof(arr) / sizeof(arr[0]))
#include <core_str.h>
#include <core_stream.h>
#endif // __SMCC_CORE_H__

View File

@@ -1,13 +1,19 @@
#ifdef _MSC_VER
#define _CRT_SECURE_NO_WARNINGS
#endif
#include <core_impl.h>
#define __SMCC_LOG_IMPORT_SRC__
#define log_snprintf smcc_snprintf
#define log_printf smcc_printf
#define log_exit smcc_exit
#include <log.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <string.h>
/* ====== 内存管理核心接口实现 ====== */
void* smcc_malloc(usize size) {

View File

@@ -1,12 +1,10 @@
#include <lexer_stream.h>
#include <lexer_log.h>
#include <libcore.h>
#include <core_log.h>
#include <core_stream.h>
// 内存流的具体实现结构
static usize read_buf(lexer_stream_t* _stream, char* buffer, usize count) {
static usize read_buf(core_stream_t* _stream, char* buffer, usize count) {
Assert(buffer != null && buffer != null);
lexer_mem_stream_t* stream = (lexer_mem_stream_t*)_stream;
core_mem_stream_t* stream = (core_mem_stream_t*)_stream;
usize remaining = stream->data_length - stream->curr_pos;
usize to_read = (remaining < count) ? remaining : count;
@@ -15,31 +13,31 @@ static usize read_buf(lexer_stream_t* _stream, char* buffer, usize count) {
smcc_memcpy(buffer, stream->data + stream->curr_pos, to_read);
stream->curr_pos += to_read;
} else {
LEX_WARN("Reading past end of stream [maybe count is too large or negative?]");
LOG_WARN("Reading past end of stream [maybe count is too large or negative?]");
}
return to_read;
}
static int peek_char(lexer_stream_t* _stream) {
static int peek_char(core_stream_t* _stream) {
Assert(_stream != null);
lexer_mem_stream_t* stream = (lexer_mem_stream_t*)_stream;
core_mem_stream_t* stream = (core_mem_stream_t*)_stream;
// 如果已经到达末尾返回EOF
if (stream->peek_pos >= stream->data_length) {
return lexer_stream_eof; // EOF
return core_stream_eof; // EOF
}
return (int)(unsigned char)stream->data[stream->peek_pos++];
}
static int next_char(lexer_stream_t* _stream) {
static int next_char(core_stream_t* _stream) {
Assert(_stream != NULL);
lexer_mem_stream_t* stream = (lexer_mem_stream_t*)_stream;
core_mem_stream_t* stream = (core_mem_stream_t*)_stream;
// 如果已经到达末尾返回EOF
if (stream->curr_pos >= stream->data_length) {
return lexer_stream_eof; // EOF
return core_stream_eof; // EOF
}
unsigned char ch = stream->data[stream->curr_pos++];
@@ -49,24 +47,28 @@ static int next_char(lexer_stream_t* _stream) {
return (int)ch;
}
static void reset_char(lexer_stream_t* _stream) {
static void reset_char(core_stream_t* _stream) {
Assert(_stream != NULL);
lexer_mem_stream_t* stream = (lexer_mem_stream_t*)_stream;
core_mem_stream_t* stream = (core_mem_stream_t*)_stream;
stream->peek_pos = stream->curr_pos;
}
static void free_stream(lexer_stream_t* _stream) {
static void free_stream(core_stream_t* _stream) {
Assert(_stream != null);
lexer_mem_stream_t* stream = (lexer_mem_stream_t*)_stream;
core_mem_stream_t* stream = (core_mem_stream_t*)_stream;
// FIXME maybe double free?
cstring_free(&stream->stream.name);
if (stream->owned) {
smcc_free((void*)stream->data);
}
}
lexer_stream_t* lexer_mem_stream_init(lexer_mem_stream_t* stream, const char* data, usize length, cbool need_copy) {
core_stream_t* core_mem_stream_init(core_mem_stream_t* stream, const char* data, usize length, cbool need_copy) {
if (stream == null || data == NULL || length == 0) {
LEX_ERROR("param error");
LOG_ERROR("param error");
return null;
}
@@ -74,7 +76,7 @@ lexer_stream_t* lexer_mem_stream_init(lexer_mem_stream_t* stream, const char* da
if (need_copy) {
char* buf = (char*)smcc_malloc(length);
if (buf == null) {
LEX_ERROR("malloc error");
LOG_ERROR("malloc error");
return null;
}
@@ -87,9 +89,7 @@ lexer_stream_t* lexer_mem_stream_init(lexer_mem_stream_t* stream, const char* da
stream->curr_pos = 0;
stream->peek_pos = 0;
static const char name[] = "mem_stream";
stream->stream.name = name;
stream->stream.name_len = sizeof(name) - 1;
stream->stream.name = cstring_from_cstr("mem_stream");
stream->stream.read_buf = read_buf;
stream->stream.peek_char = peek_char;

View File

@@ -31,6 +31,10 @@ void log_default_handler(log_level_t level, const char* module, const char* file
log_printf("[%s] %s:%d | %s: %s\n",
level_str, file, line, module, message);
#endif
// for clangd warning
// clang-analyzer-deadcode.DeadStores
(void)color_code;
(void)level_str;
if (level & LOG_LEVEL_FATAL) {
log_exit(-LOG_LEVEL_FATAL);
}

View File

@@ -8,7 +8,7 @@
#include "color.h"
#ifndef __SMCC_LOG_NO_STD_IMPL__
#ifdef __SMCC_LOG_USE_STD_IMPL__
#include <stdio.h>
#include <stdlib.h>
#define log_snprintf snprintf
@@ -192,4 +192,8 @@ void logger_destroy(logger_t* logger);
#define FIXME(str) PanicFmt("FIXME " __LOG_STR(str)) ///< 提醒开发者修改代码(触发致命错误)
/// @}
#ifdef __SMCC_LOG_IMPORT_SRC__
#include "log.c"
#endif
#endif // __SMCC_LOG_H__