feat(core): 重构词法分析器流接口并迁移至 core 库

将 lexer_stream 抽象为 core_stream,统一运行时核心组件的输入流模型。
移除了旧的 `lexer_stream.h` 定义,并将其功能完整迁移至 `core_stream.h` 中。
更新了内存流实现以适配新的 core_stream 接口,并修复部分资源释放问题。
同时调整日志模块包含方式,增强模块间解耦能力。

此变更影响词法分析器对输入流的操作方式,所有涉及 stream 的类型与函数均已替换为 core 前缀版本。
测试用例同步更新并验证通过。
This commit is contained in:
zzy
2025-11-20 14:17:03 +08:00
parent 5c24f35c87
commit 47b56d52f6
13 changed files with 161 additions and 99 deletions

View File

@@ -72,11 +72,11 @@ static inline int keyword_cmp(const char* name, int len) {
return -1; // Not a keyword.
}
void lexer_init(smcc_lexer_t* lexer, lexer_stream_t* stream) {
void lexer_init(smcc_lexer_t* lexer, core_stream_t* stream) {
lexer->stream = stream;
lexer->pos = (lexer_loc_t) {
.name = stream->name,
.name_len = stream->name_len,
.name = cstring_as_cstr(&stream->name),
.name_len = cstring_len(&stream->name),
.line = 1,
.column = 1,
.offset = 0,
@@ -91,14 +91,14 @@ void lexer_init(smcc_lexer_t* lexer, lexer_stream_t* stream) {
#define set_err_token(token) ((token)->type = TOKEN_UNKNOWN)
static void skip_newline(smcc_lexer_t* lexer, lexer_tok_t* token) {
lexer_stream_t* stream = lexer->stream;
core_stream_t* stream = lexer->stream;
token->type = TOKEN_LINE_COMMENT;
// 循环直到遇到换行符或文件结束
while (1) {
int ch = stream_next_char(stream);
if (ch == lexer_stream_eof) {
if (ch == core_stream_eof) {
// 到达文件末尾,直接返回
return;
}
@@ -114,7 +114,7 @@ static void skip_newline(smcc_lexer_t* lexer, lexer_tok_t* token) {
}
static void skip_block_comment(smcc_lexer_t* lexer, lexer_tok_t* token) {
lexer_stream_t* stream = lexer->stream;
core_stream_t* stream = lexer->stream;
token->type = TOKEN_BLOCK_COMMENT;
int ch;
@@ -131,7 +131,7 @@ static void skip_block_comment(smcc_lexer_t* lexer, lexer_tok_t* token) {
ch = stream_next_char(stream);
lexer_next_pos(lexer);
if (ch == lexer_stream_eof) {
if (ch == core_stream_eof) {
// 未闭合的块注释
LEX_WARN("Unterminated block comment");
return;
@@ -183,11 +183,11 @@ static inline int got_slash(int peek) {
static void parse_char(smcc_lexer_t* lexer, lexer_tok_t* token) {
token->loc = lexer->pos;
token->type = TOKEN_CHAR_LITERAL;
lexer_stream_t *stream = lexer->stream;
core_stream_t *stream = lexer->stream;
stream_reset_char(stream);
int ch = stream_peek_char(stream);
if (ch == lexer_stream_eof) {
if (ch == core_stream_eof) {
LEX_WARN("Unexpected EOF at begin");
goto ERR;
} else if (ch != '\'') {
@@ -200,7 +200,7 @@ static void parse_char(smcc_lexer_t* lexer, lexer_tok_t* token) {
ch = stream_next_char(stream);
lexer_next_pos(lexer);
if (ch == lexer_stream_eof) {
if (ch == core_stream_eof) {
LEX_WARN("Unexpected EOF at middle");
goto ERR;
} else if (ch == '\\') {
@@ -229,11 +229,11 @@ ERR:
static void parse_string(smcc_lexer_t* lexer, lexer_tok_t* token) {
token->loc = lexer->pos;
token->type = TOKEN_STRING_LITERAL;
lexer_stream_t *stream = lexer->stream;
core_stream_t *stream = lexer->stream;
stream_reset_char(stream);
int ch = stream_peek_char(stream);
if (ch == lexer_stream_eof) {
if (ch == core_stream_eof) {
LEX_WARN("Unexpected EOF at begin");
goto ERR;
} else if (ch != '"') {
@@ -248,7 +248,7 @@ static void parse_string(smcc_lexer_t* lexer, lexer_tok_t* token) {
while (1) {
ch = stream_peek_char(stream);
if (ch == lexer_stream_eof) {
if (ch == core_stream_eof) {
LEX_ERROR("Unexpected EOF at string literal");
break;
} else if (ch == '\n') {
@@ -285,11 +285,11 @@ ERR:
static void parse_number(smcc_lexer_t* lexer, lexer_tok_t* token) {
token->loc = lexer->pos;
lexer_stream_t *stream = lexer->stream;
core_stream_t *stream = lexer->stream;
stream_reset_char(stream);
int ch = stream_peek_char(stream);
int base = 0;
if (ch == lexer_stream_eof) {
if (ch == core_stream_eof) {
LEX_WARN("Unexpected EOF at begin");
goto ERR;
} else if (ch == '0') {
@@ -325,7 +325,7 @@ static void parse_number(smcc_lexer_t* lexer, lexer_tok_t* token) {
while (1) {
ch = stream_peek_char(stream);
if (ch == lexer_stream_eof) {
if (ch == core_stream_eof) {
break;
} else if (ch >= 'a' && ch <= 'z') {
tmp = ch - 'a' + 10;
@@ -356,11 +356,11 @@ ERR:
static void parse_line(smcc_lexer_t* lexer, lexer_tok_t* token) {
token->loc = lexer->pos;
lexer_stream_t *stream = lexer->stream;
core_stream_t *stream = lexer->stream;
stream_reset_char(stream);
int ch = stream_peek_char(stream);
if (ch == lexer_stream_eof) {
if (ch == core_stream_eof) {
LEX_WARN("Unexpected EOF at begin");
goto ERR;
} else if (ch != '#') {
@@ -418,7 +418,7 @@ ERR:
void lexer_get_token(smcc_lexer_t* lexer, lexer_tok_t* token) {
token->loc = lexer->pos;
token->type = TOKEN_UNKNOWN;
lexer_stream_t *stream = lexer->stream;
core_stream_t *stream = lexer->stream;
stream_reset_char(stream);
token_type_t type = TOKEN_UNKNOWN;
@@ -556,7 +556,7 @@ void lexer_get_token(smcc_lexer_t* lexer, lexer_tok_t* token) {
token->type = TOKEN_BLANK;
goto END;
case '\0':
case lexer_stream_eof:
case core_stream_eof:
// EOF
type = TOKEN_EOF;
break;

View File

@@ -1,101 +0,0 @@
#include <lexer_stream.h>
#include <lexer_log.h>
#include <libcore.h>
// 内存流的具体实现结构
static usize read_buf(lexer_stream_t* _stream, char* buffer, usize count) {
Assert(buffer != null && buffer != null);
lexer_mem_stream_t* stream = (lexer_mem_stream_t*)_stream;
usize remaining = stream->data_length - stream->curr_pos;
usize to_read = (remaining < count) ? remaining : count;
if (to_read > 0) {
smcc_memcpy(buffer, stream->data + stream->curr_pos, to_read);
stream->curr_pos += to_read;
} else {
LEX_WARN("Reading past end of stream [maybe count is too large or negative?]");
}
return to_read;
}
static int peek_char(lexer_stream_t* _stream) {
Assert(_stream != null);
lexer_mem_stream_t* stream = (lexer_mem_stream_t*)_stream;
// 如果已经到达末尾返回EOF
if (stream->peek_pos >= stream->data_length) {
return lexer_stream_eof; // EOF
}
return (int)(unsigned char)stream->data[stream->peek_pos++];
}
static int next_char(lexer_stream_t* _stream) {
Assert(_stream != NULL);
lexer_mem_stream_t* stream = (lexer_mem_stream_t*)_stream;
// 如果已经到达末尾返回EOF
if (stream->curr_pos >= stream->data_length) {
return lexer_stream_eof; // EOF
}
unsigned char ch = stream->data[stream->curr_pos++];
if (stream->peek_pos < stream->curr_pos) {
stream->peek_pos = stream->curr_pos;
}
return (int)ch;
}
static void reset_char(lexer_stream_t* _stream) {
Assert(_stream != NULL);
lexer_mem_stream_t* stream = (lexer_mem_stream_t*)_stream;
stream->peek_pos = stream->curr_pos;
}
static void free_stream(lexer_stream_t* _stream) {
Assert(_stream != null);
lexer_mem_stream_t* stream = (lexer_mem_stream_t*)_stream;
if (stream->owned) {
smcc_free((void*)stream->data);
}
}
lexer_stream_t* lexer_mem_stream_init(lexer_mem_stream_t* stream, const char* data, usize length, cbool need_copy) {
if (stream == null || data == NULL || length == 0) {
LEX_ERROR("param error");
return null;
}
stream->owned = need_copy;
if (need_copy) {
char* buf = (char*)smcc_malloc(length);
if (buf == null) {
LEX_ERROR("malloc error");
return null;
}
smcc_memcpy(buf, data, length);
stream->data = buf;
} else {
stream->data = data;
}
stream->data_length = length;
stream->curr_pos = 0;
stream->peek_pos = 0;
static const char name[] = "mem_stream";
stream->stream.name = name;
stream->stream.name_len = sizeof(name) - 1;
stream->stream.read_buf = read_buf;
stream->stream.peek_char = peek_char;
stream->stream.next_char = next_char;
stream->stream.reset_char = reset_char;
stream->stream.free_stream = free_stream;
return (void*)stream;
}