refactor(lexer): 重构词法分析器头文件结构并优化缓冲区管理
移除了旧的lexer_stream.c实现,引入新的环形缓冲区机制来替代原有的 动态数组缓冲区。更新了词法分析器的核心数据结构,修改了token获取 相关函数的实现以支持新的缓冲区管理方式。 BREAKING CHANGE: 移除了scc_lexer_stream_t相关的API,替换为基于 环形缓冲区的新接口scc_lexer_to_ring和相关函数。 feat(lexer_token): 添加词法分析结果内存泄漏警告注释 docs: 移除预处理器模块的测试文件和相关配置
This commit is contained in:
@@ -1,100 +0,0 @@
|
||||
/**
|
||||
* @file lexer.h
|
||||
* @brief C语言词法分析器核心数据结构与接口
|
||||
*/
|
||||
|
||||
#ifndef __SCC_LEXER_H__
|
||||
#define __SCC_LEXER_H__
|
||||
|
||||
#include "lexer_token.h"
|
||||
#include <scc_core.h>
|
||||
#include <scc_sstream.h>
|
||||
|
||||
/**
|
||||
* @brief 词法分析器核心结构体
|
||||
*
|
||||
* 封装词法分析所需的状态信息和缓冲区管理
|
||||
*/
|
||||
typedef struct scc_lexer {
|
||||
scc_sstream_ring_t stream_ref;
|
||||
int jump_macro;
|
||||
} scc_lexer_t;
|
||||
|
||||
void scc_lexer_init(scc_lexer_t *lexer, scc_sstream_ring_t *stream_ref);
|
||||
|
||||
/**
|
||||
* @brief 获取原始token
|
||||
* @param[in] lexer 词法分析器实例
|
||||
* @param[out] token 输出token存储位置
|
||||
*
|
||||
* 此函数会返回所有类型的token,包括空白符等无效token
|
||||
*/
|
||||
void scc_lexer_get_token(scc_lexer_t *lexer, scc_lexer_tok_t *token);
|
||||
|
||||
/**
|
||||
* @brief 获取有效token
|
||||
* @param[in] lexer 词法分析器实例
|
||||
* @param[out] token 输出token存储位置
|
||||
*
|
||||
* 此函数会自动跳过空白符等无效token,返回对语法分析有意义的token
|
||||
*/
|
||||
void scc_lexer_get_valid_token(scc_lexer_t *lexer, scc_lexer_tok_t *token);
|
||||
|
||||
typedef SCC_VEC(scc_lexer_tok_t) scc_lexer_tok_vec_t;
|
||||
|
||||
typedef struct scc_lexer_stream scc_lexer_stream_t;
|
||||
struct scc_lexer_stream {
|
||||
scc_lexer_t *lexer;
|
||||
scc_lexer_tok_vec_t toks; // 循环缓冲区
|
||||
usize curr_pos; // 当前读取位置(逻辑位置)
|
||||
usize probe_pos; // 已填充位置(逻辑位置)
|
||||
cbool need_comment;
|
||||
|
||||
/// @brief 向前读取n个token
|
||||
const scc_lexer_tok_t *(*peek)(scc_lexer_stream_t *stream, usize n);
|
||||
|
||||
/// @brief 指针推进到offset
|
||||
void (*advance)(scc_lexer_stream_t *stream, usize offset);
|
||||
|
||||
/// @brief 销毁并释放资源
|
||||
void (*drop)(scc_lexer_stream_t *stream);
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief 将词法分析器转换成流式输出(自带缓冲区)
|
||||
* @param[in] lexer 已经词法分析器实例
|
||||
* @param[out] stream 输出流对象指针
|
||||
* @param[in] need_comment 输出时是否需要注释
|
||||
*/
|
||||
void scc_lexer_to_stream(scc_lexer_t *lexer, scc_lexer_stream_t *stream,
|
||||
cbool need_comment);
|
||||
|
||||
static inline const scc_lexer_tok_t *
|
||||
scc_lexer_stream_current(scc_lexer_stream_t *stream) {
|
||||
Assert(stream != null);
|
||||
return stream->peek(stream, 0);
|
||||
}
|
||||
|
||||
static inline const scc_lexer_tok_t *
|
||||
scc_lexer_stream_peek(scc_lexer_stream_t *stream, usize n) {
|
||||
Assert(stream != null);
|
||||
return stream->peek(stream, n);
|
||||
}
|
||||
|
||||
static inline void scc_lexer_stream_consume(scc_lexer_stream_t *stream) {
|
||||
Assert(stream != null);
|
||||
return stream->advance(stream, 1);
|
||||
}
|
||||
|
||||
static inline void scc_lexer_stream_advance(scc_lexer_stream_t *stream,
|
||||
usize n) {
|
||||
Assert(stream != null);
|
||||
return stream->advance(stream, n);
|
||||
}
|
||||
|
||||
static inline void scc_lexer_stream_drop(scc_lexer_stream_t *stream) {
|
||||
Assert(stream != null);
|
||||
return stream->drop(stream);
|
||||
}
|
||||
|
||||
#endif /* __SCC_LEXER_H__ */
|
||||
@@ -144,6 +144,10 @@ typedef enum scc_tok_subtype {
|
||||
scc_tok_subtype_t scc_get_tok_subtype(scc_tok_type_t type);
|
||||
const char *scc_get_tok_name(scc_tok_type_t type);
|
||||
|
||||
/**
|
||||
* @brief 词法分析结果
|
||||
* @warning 需要手动释放lexeme否则会出现内存泄漏
|
||||
*/
|
||||
typedef struct scc_lexer_token {
|
||||
scc_tok_type_t type;
|
||||
scc_cstring_t lexeme;
|
||||
|
||||
54
libs/lexer/include/scc_lexer.h
Normal file
54
libs/lexer/include/scc_lexer.h
Normal file
@@ -0,0 +1,54 @@
|
||||
/**
|
||||
* @file lexer.h
|
||||
* @brief C语言词法分析器核心数据结构与接口
|
||||
*/
|
||||
|
||||
#ifndef __SCC_LEXER_H__
|
||||
#define __SCC_LEXER_H__
|
||||
|
||||
#include "lexer_token.h"
|
||||
#include <scc_core.h>
|
||||
#include <scc_core_ring.h>
|
||||
#include <scc_sstream.h>
|
||||
|
||||
typedef SCC_RING(scc_lexer_tok_t) scc_lexer_tok_ring_t;
|
||||
typedef SCC_VEC(scc_lexer_tok_t) scc_lexer_tok_vec_t;
|
||||
/**
|
||||
* @brief 词法分析器核心结构体
|
||||
*
|
||||
* 封装词法分析所需的状态信息和缓冲区管理
|
||||
*/
|
||||
typedef struct scc_lexer {
|
||||
scc_sstream_ring_t *stream_ref;
|
||||
scc_lexer_tok_ring_t ring;
|
||||
int ring_ref_count;
|
||||
int jump_macro;
|
||||
} scc_lexer_t;
|
||||
|
||||
void scc_lexer_init(scc_lexer_t *lexer, scc_sstream_ring_t *stream_ref);
|
||||
|
||||
/**
|
||||
* @brief 获取原始token
|
||||
* @param[in] lexer 词法分析器实例
|
||||
* @param[out] token 输出token存储位置
|
||||
*
|
||||
* 此函数会返回所有类型的token,包括空白符等无效token
|
||||
*/
|
||||
void scc_lexer_get_token(scc_lexer_t *lexer, scc_lexer_tok_t *token);
|
||||
|
||||
/**
|
||||
* @brief 获取有效token
|
||||
* @param[in] lexer 词法分析器实例
|
||||
* @param[out] token 输出token存储位置
|
||||
*
|
||||
* 此函数会自动跳过空白符等无效token,返回对语法分析有意义的token
|
||||
*/
|
||||
void scc_lexer_get_valid_token(scc_lexer_t *lexer, scc_lexer_tok_t *token);
|
||||
|
||||
scc_lexer_tok_ring_t *scc_lexer_to_ring(scc_lexer_t *lexer, int ring_size,
|
||||
cbool need_comment);
|
||||
|
||||
void scc_lexer_drop_ring(scc_lexer_tok_ring_t *ring_ref);
|
||||
void scc_lexer_drop(scc_lexer_t *lexer);
|
||||
|
||||
#endif /* __SCC_LEXER_H__ */
|
||||
@@ -1,5 +1,6 @@
|
||||
#include <lexer.h>
|
||||
#include "scc_lexer.h"
|
||||
#include <lexer_log.h>
|
||||
#include <scc_lexer.h>
|
||||
|
||||
static const struct {
|
||||
const char *name;
|
||||
@@ -41,7 +42,8 @@ static int keyword_cmp(const char *name, int len) {
|
||||
}
|
||||
|
||||
void scc_lexer_init(scc_lexer_t *lexer, scc_sstream_ring_t *stream_ref) {
|
||||
lexer->stream_ref = *stream_ref;
|
||||
lexer->stream_ref = stream_ref;
|
||||
lexer->ring_ref_count = 0;
|
||||
lexer->jump_macro = false;
|
||||
}
|
||||
|
||||
@@ -68,7 +70,7 @@ static inline cbool is_hex_digit(int ch) {
|
||||
/* 从环形缓冲区预览一个字符(带EOF检测) */
|
||||
static inline cbool peek_char(scc_lexer_t *lexer, scc_sstream_char_t *out) {
|
||||
cbool ok;
|
||||
scc_ring_peek(lexer->stream_ref, *out, ok);
|
||||
scc_ring_peek(*lexer->stream_ref, *out, ok);
|
||||
return ok;
|
||||
}
|
||||
|
||||
@@ -76,7 +78,7 @@ static inline cbool peek_char(scc_lexer_t *lexer, scc_sstream_char_t *out) {
|
||||
static inline cbool next_char(scc_lexer_t *lexer, scc_cstring_t *lexeme,
|
||||
scc_sstream_char_t *out) {
|
||||
cbool ok;
|
||||
scc_ring_next(lexer->stream_ref, *out, ok);
|
||||
scc_ring_next(*lexer->stream_ref, *out, ok);
|
||||
if (!ok)
|
||||
return false;
|
||||
scc_cstring_append_ch(lexeme, out->character);
|
||||
@@ -132,7 +134,7 @@ void scc_lexer_get_token(scc_lexer_t *lexer, scc_lexer_tok_t *token) {
|
||||
next_char(lexer, &lex, &cur); // 消费 '/'
|
||||
while (peek_char(lexer, &cur) && !is_newline(cur.character)) {
|
||||
next_char(lexer, &lex, &cur);
|
||||
scc_ring_consume(lexer->stream_ref);
|
||||
scc_ring_consume(*lexer->stream_ref);
|
||||
}
|
||||
// 注释结束,不包含换行符(换行符单独成token)
|
||||
} else if (next.character == '*') {
|
||||
@@ -150,7 +152,7 @@ void scc_lexer_get_token(scc_lexer_t *lexer, scc_lexer_tok_t *token) {
|
||||
next_char(lexer, &lex, &cur); // 消费 '/'
|
||||
break;
|
||||
}
|
||||
scc_ring_consume(lexer->stream_ref);
|
||||
scc_ring_consume(*lexer->stream_ref);
|
||||
}
|
||||
} else {
|
||||
// 只是除号 /
|
||||
@@ -161,7 +163,7 @@ void scc_lexer_get_token(scc_lexer_t *lexer, scc_lexer_tok_t *token) {
|
||||
token->type = SCC_TOK_IDENT; // 暂定
|
||||
while (peek_char(lexer, &cur) && is_identifier_part(cur.character)) {
|
||||
next_char(lexer, &lex, &cur);
|
||||
scc_ring_consume(lexer->stream_ref);
|
||||
scc_ring_consume(*lexer->stream_ref);
|
||||
}
|
||||
// 检查是否为关键字
|
||||
int idx = keyword_cmp(scc_cstring_as_cstr(&lex), scc_cstring_len(&lex));
|
||||
@@ -241,7 +243,7 @@ void scc_lexer_get_token(scc_lexer_t *lexer, scc_lexer_tok_t *token) {
|
||||
} else {
|
||||
next_char(lexer, &lex, &cur);
|
||||
}
|
||||
scc_ring_consume(lexer->stream_ref);
|
||||
scc_ring_consume(*lexer->stream_ref);
|
||||
}
|
||||
} else {
|
||||
scc_sstream_char_t next = {0};
|
||||
@@ -447,7 +449,7 @@ void scc_lexer_get_token(scc_lexer_t *lexer, scc_lexer_tok_t *token) {
|
||||
}
|
||||
|
||||
// 设置token
|
||||
scc_ring_consume(lexer->stream_ref);
|
||||
scc_ring_consume(*lexer->stream_ref);
|
||||
token->type = token->type; // 上面已设
|
||||
token->loc = start_loc;
|
||||
token->lexeme = lex; // 转移所有权
|
||||
@@ -469,3 +471,42 @@ void scc_lexer_get_valid_token(scc_lexer_t *lexer, scc_lexer_tok_t *token) {
|
||||
} while (subtype == SCC_TOK_SUBTYPE_EMPTYSPACE ||
|
||||
subtype == SCC_TOK_SUBTYPE_COMMENT);
|
||||
}
|
||||
|
||||
static int fill_token(scc_lexer_tok_t *out, void *userdata) {
|
||||
scc_lexer_t *lexer = userdata;
|
||||
scc_lexer_get_token(lexer, out);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int fill_valid_token(scc_lexer_tok_t *out, void *userdata) {
|
||||
scc_lexer_t *lexer = userdata;
|
||||
scc_lexer_get_valid_token(lexer, out);
|
||||
return 0;
|
||||
}
|
||||
|
||||
scc_lexer_tok_ring_t *scc_lexer_to_ring(scc_lexer_t *lexer, int ring_size,
|
||||
cbool need_comment) {
|
||||
scc_ring_init(lexer->ring, ring_size,
|
||||
need_comment ? fill_token : fill_valid_token, lexer);
|
||||
lexer->ring_ref_count++;
|
||||
return &lexer->ring;
|
||||
}
|
||||
|
||||
void scc_lexer_drop_ring(scc_lexer_tok_ring_t *ring_ref) {
|
||||
scc_lexer_t *lexer = ring_ref->userdata;
|
||||
if (lexer->ring_ref_count > 0) {
|
||||
lexer->ring_ref_count--;
|
||||
} else {
|
||||
LOG_WARN("double drop sstream ring");
|
||||
}
|
||||
}
|
||||
|
||||
void scc_lexer_drop(scc_lexer_t *lexer) {
|
||||
Assert(lexer != null);
|
||||
if (lexer->ring_ref_count) {
|
||||
LOG_FATAL("drop sstream must be drop ring before ref [%d]",
|
||||
lexer->ring_ref_count);
|
||||
}
|
||||
scc_ring_free(lexer->ring);
|
||||
scc_sstream_drop_ring(lexer->stream_ref);
|
||||
}
|
||||
|
||||
@@ -1,139 +0,0 @@
|
||||
#include <lexer.h>
|
||||
|
||||
static void lexer_stream_extend(scc_lexer_stream_t *stream, usize n) {
|
||||
Assert(stream != null);
|
||||
// 检查是否需要扩容
|
||||
if ((stream->probe_pos - stream->curr_pos + n) >= stream->toks.cap) {
|
||||
// 需要扩容 - 创建新缓冲区
|
||||
usize new_cap = stream->toks.cap * 2;
|
||||
if (new_cap < stream->probe_pos - stream->curr_pos + n + 1) {
|
||||
new_cap = stream->probe_pos - stream->curr_pos + n + 1;
|
||||
}
|
||||
|
||||
scc_lexer_tok_t *new_data =
|
||||
scc_realloc(null, new_cap * sizeof(scc_lexer_tok_t));
|
||||
if (!new_data) {
|
||||
LOG_FATAL("lexer_stream_extend: realloc failed\n");
|
||||
return;
|
||||
}
|
||||
|
||||
// 将旧缓冲区中的数据拷贝到新缓冲区,保持顺序
|
||||
usize data_count = stream->probe_pos - stream->curr_pos;
|
||||
for (usize i = 0; i < data_count; ++i) {
|
||||
usize old_idx = (stream->curr_pos + i) % stream->toks.cap;
|
||||
new_data[i] = stream->toks.data[old_idx];
|
||||
}
|
||||
|
||||
// 释放旧缓冲区
|
||||
if (stream->toks.data) {
|
||||
scc_free(stream->toks.data);
|
||||
}
|
||||
|
||||
// 更新结构体
|
||||
stream->toks.data = new_data;
|
||||
stream->toks.cap = new_cap;
|
||||
stream->curr_pos = 0;
|
||||
stream->probe_pos = data_count;
|
||||
}
|
||||
|
||||
// 填充新token
|
||||
for (usize i = 0; i < n; ++i) {
|
||||
usize idx = (stream->probe_pos + i) % stream->toks.cap;
|
||||
if (stream->need_comment)
|
||||
scc_lexer_get_token(stream->lexer, &stream->toks.data[idx]);
|
||||
else
|
||||
scc_lexer_get_valid_token(stream->lexer, &stream->toks.data[idx]);
|
||||
}
|
||||
|
||||
stream->probe_pos += n;
|
||||
}
|
||||
|
||||
static const scc_lexer_tok_t *lexer_stream_peek(scc_lexer_stream_t *stream,
|
||||
usize n) {
|
||||
Assert(stream != null);
|
||||
|
||||
// 计算需要的前看token数量
|
||||
usize available = stream->probe_pos - stream->curr_pos;
|
||||
if (n >= available) {
|
||||
// 需要扩展缓冲区
|
||||
usize need = n - available + 1;
|
||||
lexer_stream_extend(stream, need);
|
||||
}
|
||||
|
||||
// 计算实际缓冲区中的位置
|
||||
usize idx = (stream->curr_pos + n) % stream->toks.cap;
|
||||
return &stream->toks.data[idx];
|
||||
}
|
||||
|
||||
static void lexer_stream_advance(scc_lexer_stream_t *stream, usize offset) {
|
||||
Assert(stream != null);
|
||||
|
||||
if (stream->curr_pos + offset > stream->probe_pos) {
|
||||
// 尝试填充更多token
|
||||
usize need = stream->curr_pos + offset - stream->probe_pos;
|
||||
lexer_stream_extend(stream, need);
|
||||
}
|
||||
|
||||
stream->curr_pos += offset;
|
||||
|
||||
// 可选:当已消费的token过多时,压缩缓冲区
|
||||
if (stream->curr_pos > stream->toks.cap * 3 / 4) {
|
||||
// 压缩缓冲区:将有效数据移动到前面
|
||||
usize data_count = stream->probe_pos - stream->curr_pos;
|
||||
scc_lexer_tok_t *temp =
|
||||
scc_realloc(null, data_count * sizeof(scc_lexer_tok_t));
|
||||
if (!temp)
|
||||
return; // 压缩失败也没关系
|
||||
|
||||
for (usize i = 0; i < data_count; ++i) {
|
||||
usize old_idx = (stream->curr_pos + i) % stream->toks.cap;
|
||||
temp[i] = stream->toks.data[old_idx];
|
||||
}
|
||||
|
||||
scc_free(stream->toks.data);
|
||||
stream->toks.data = temp;
|
||||
stream->toks.cap = data_count;
|
||||
stream->curr_pos = 0;
|
||||
stream->probe_pos = data_count;
|
||||
}
|
||||
}
|
||||
|
||||
static void lexer_stream_drop(scc_lexer_stream_t *stream) {
|
||||
Assert(stream != null);
|
||||
|
||||
// 清理所有token(如果有需要清理的内部资源)
|
||||
for (usize i = 0; i < stream->toks.cap; ++i) {
|
||||
// 这里假设scc_lexer_tok_t可能包含需要释放的资源
|
||||
// 如果有,需要调用相应的清理函数
|
||||
// 例如: if (stream->toks.data[i].needs_free)
|
||||
// scc_free(stream->toks.data[i].ptr);
|
||||
}
|
||||
|
||||
scc_vec_free(stream->toks);
|
||||
stream->lexer = null;
|
||||
stream->curr_pos = 0;
|
||||
stream->probe_pos = 0;
|
||||
stream->need_comment = false;
|
||||
|
||||
stream->peek = null;
|
||||
stream->advance = null;
|
||||
stream->drop = null;
|
||||
}
|
||||
|
||||
void scc_lexer_to_stream(scc_lexer_t *lexer, scc_lexer_stream_t *stream,
|
||||
cbool need_comment) {
|
||||
Assert(lexer != null && stream != null);
|
||||
|
||||
stream->lexer = lexer;
|
||||
stream->curr_pos = 0;
|
||||
stream->probe_pos = 0;
|
||||
stream->need_comment = need_comment;
|
||||
|
||||
// 初始化循环缓冲区
|
||||
scc_vec_init(stream->toks);
|
||||
scc_vec_realloc(stream->toks, 8); // 初始容量为8
|
||||
|
||||
stream->peek = lexer_stream_peek;
|
||||
stream->advance = lexer_stream_advance;
|
||||
stream->drop = lexer_stream_drop;
|
||||
}
|
||||
@@ -1,5 +1,5 @@
|
||||
#include <lexer.h>
|
||||
#include <lexer_log.h>
|
||||
#include <scc_lexer.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
@@ -53,10 +53,7 @@ int main(int argc, char *argv[]) {
|
||||
scc_get_tok_name(token.type),
|
||||
scc_cstring_as_cstr(&token.lexeme), token.loc.name,
|
||||
token.loc.line, token.loc.col);
|
||||
// LOG_DEBUG("%s", token.val.str);
|
||||
// printf("line: %d, column: %d, type: %3d, typename: %s\n",
|
||||
// lexer.line, lexer.index, token.type,
|
||||
// scc_get_tok_name(token.type));
|
||||
scc_cstring_free(&token.lexeme);
|
||||
}
|
||||
scc_sstream_drop_ring(ref);
|
||||
scc_sstream_drop(&stream);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// test_lexer.c
|
||||
#include <lexer.h>
|
||||
#include <scc_lexer.h>
|
||||
#include <string.h>
|
||||
#include <utest/acutest.h>
|
||||
|
||||
|
||||
Reference in New Issue
Block a user