refactor(lexer): 重构词法分析器头文件结构并优化缓冲区管理

移除了旧的lexer_stream.c实现,引入新的环形缓冲区机制来替代原有的
动态数组缓冲区。更新了词法分析器的核心数据结构,修改了token获取
相关函数的实现以支持新的缓冲区管理方式。

BREAKING CHANGE: 移除了scc_lexer_stream_t相关的API,替换为基于
环形缓冲区的新接口scc_lexer_to_ring和相关函数。

feat(lexer_token): 添加词法分析结果内存泄漏警告注释

docs: 移除预处理器模块的测试文件和相关配置
This commit is contained in:
zzy
2026-02-16 21:21:23 +08:00
parent 0e7dec202a
commit b4929be6b8
72 changed files with 119 additions and 2474 deletions

View File

@@ -1,100 +0,0 @@
/**
* @file lexer.h
* @brief C语言词法分析器核心数据结构与接口
*/
#ifndef __SCC_LEXER_H__
#define __SCC_LEXER_H__
#include "lexer_token.h"
#include <scc_core.h>
#include <scc_sstream.h>
/**
* @brief 词法分析器核心结构体
*
* 封装词法分析所需的状态信息和缓冲区管理
*/
typedef struct scc_lexer {
scc_sstream_ring_t stream_ref;
int jump_macro;
} scc_lexer_t;
void scc_lexer_init(scc_lexer_t *lexer, scc_sstream_ring_t *stream_ref);
/**
* @brief 获取原始token
* @param[in] lexer 词法分析器实例
* @param[out] token 输出token存储位置
*
* 此函数会返回所有类型的token包括空白符等无效token
*/
void scc_lexer_get_token(scc_lexer_t *lexer, scc_lexer_tok_t *token);
/**
* @brief 获取有效token
* @param[in] lexer 词法分析器实例
* @param[out] token 输出token存储位置
*
* 此函数会自动跳过空白符等无效token返回对语法分析有意义的token
*/
void scc_lexer_get_valid_token(scc_lexer_t *lexer, scc_lexer_tok_t *token);
typedef SCC_VEC(scc_lexer_tok_t) scc_lexer_tok_vec_t;
typedef struct scc_lexer_stream scc_lexer_stream_t;
struct scc_lexer_stream {
scc_lexer_t *lexer;
scc_lexer_tok_vec_t toks; // 循环缓冲区
usize curr_pos; // 当前读取位置(逻辑位置)
usize probe_pos; // 已填充位置(逻辑位置)
cbool need_comment;
/// @brief 向前读取n个token
const scc_lexer_tok_t *(*peek)(scc_lexer_stream_t *stream, usize n);
/// @brief 指针推进到offset
void (*advance)(scc_lexer_stream_t *stream, usize offset);
/// @brief 销毁并释放资源
void (*drop)(scc_lexer_stream_t *stream);
};
/**
* @brief 将词法分析器转换成流式输出(自带缓冲区)
* @param[in] lexer 已经词法分析器实例
* @param[out] stream 输出流对象指针
* @param[in] need_comment 输出时是否需要注释
*/
void scc_lexer_to_stream(scc_lexer_t *lexer, scc_lexer_stream_t *stream,
cbool need_comment);
static inline const scc_lexer_tok_t *
scc_lexer_stream_current(scc_lexer_stream_t *stream) {
Assert(stream != null);
return stream->peek(stream, 0);
}
static inline const scc_lexer_tok_t *
scc_lexer_stream_peek(scc_lexer_stream_t *stream, usize n) {
Assert(stream != null);
return stream->peek(stream, n);
}
static inline void scc_lexer_stream_consume(scc_lexer_stream_t *stream) {
Assert(stream != null);
return stream->advance(stream, 1);
}
static inline void scc_lexer_stream_advance(scc_lexer_stream_t *stream,
usize n) {
Assert(stream != null);
return stream->advance(stream, n);
}
static inline void scc_lexer_stream_drop(scc_lexer_stream_t *stream) {
Assert(stream != null);
return stream->drop(stream);
}
#endif /* __SCC_LEXER_H__ */

View File

@@ -144,6 +144,10 @@ typedef enum scc_tok_subtype {
scc_tok_subtype_t scc_get_tok_subtype(scc_tok_type_t type);
const char *scc_get_tok_name(scc_tok_type_t type);
/**
* @brief 词法分析结果
* @warning 需要手动释放lexeme否则会出现内存泄漏
*/
typedef struct scc_lexer_token {
scc_tok_type_t type;
scc_cstring_t lexeme;

View File

@@ -0,0 +1,54 @@
/**
* @file lexer.h
* @brief C语言词法分析器核心数据结构与接口
*/
#ifndef __SCC_LEXER_H__
#define __SCC_LEXER_H__
#include "lexer_token.h"
#include <scc_core.h>
#include <scc_core_ring.h>
#include <scc_sstream.h>
typedef SCC_RING(scc_lexer_tok_t) scc_lexer_tok_ring_t;
typedef SCC_VEC(scc_lexer_tok_t) scc_lexer_tok_vec_t;
/**
* @brief 词法分析器核心结构体
*
* 封装词法分析所需的状态信息和缓冲区管理
*/
typedef struct scc_lexer {
scc_sstream_ring_t *stream_ref;
scc_lexer_tok_ring_t ring;
int ring_ref_count;
int jump_macro;
} scc_lexer_t;
void scc_lexer_init(scc_lexer_t *lexer, scc_sstream_ring_t *stream_ref);
/**
* @brief 获取原始token
* @param[in] lexer 词法分析器实例
* @param[out] token 输出token存储位置
*
* 此函数会返回所有类型的token包括空白符等无效token
*/
void scc_lexer_get_token(scc_lexer_t *lexer, scc_lexer_tok_t *token);
/**
* @brief 获取有效token
* @param[in] lexer 词法分析器实例
* @param[out] token 输出token存储位置
*
* 此函数会自动跳过空白符等无效token返回对语法分析有意义的token
*/
void scc_lexer_get_valid_token(scc_lexer_t *lexer, scc_lexer_tok_t *token);
scc_lexer_tok_ring_t *scc_lexer_to_ring(scc_lexer_t *lexer, int ring_size,
cbool need_comment);
void scc_lexer_drop_ring(scc_lexer_tok_ring_t *ring_ref);
void scc_lexer_drop(scc_lexer_t *lexer);
#endif /* __SCC_LEXER_H__ */

View File

@@ -1,5 +1,6 @@
#include <lexer.h>
#include "scc_lexer.h"
#include <lexer_log.h>
#include <scc_lexer.h>
static const struct {
const char *name;
@@ -41,7 +42,8 @@ static int keyword_cmp(const char *name, int len) {
}
void scc_lexer_init(scc_lexer_t *lexer, scc_sstream_ring_t *stream_ref) {
lexer->stream_ref = *stream_ref;
lexer->stream_ref = stream_ref;
lexer->ring_ref_count = 0;
lexer->jump_macro = false;
}
@@ -68,7 +70,7 @@ static inline cbool is_hex_digit(int ch) {
/* 从环形缓冲区预览一个字符带EOF检测 */
static inline cbool peek_char(scc_lexer_t *lexer, scc_sstream_char_t *out) {
cbool ok;
scc_ring_peek(lexer->stream_ref, *out, ok);
scc_ring_peek(*lexer->stream_ref, *out, ok);
return ok;
}
@@ -76,7 +78,7 @@ static inline cbool peek_char(scc_lexer_t *lexer, scc_sstream_char_t *out) {
static inline cbool next_char(scc_lexer_t *lexer, scc_cstring_t *lexeme,
scc_sstream_char_t *out) {
cbool ok;
scc_ring_next(lexer->stream_ref, *out, ok);
scc_ring_next(*lexer->stream_ref, *out, ok);
if (!ok)
return false;
scc_cstring_append_ch(lexeme, out->character);
@@ -132,7 +134,7 @@ void scc_lexer_get_token(scc_lexer_t *lexer, scc_lexer_tok_t *token) {
next_char(lexer, &lex, &cur); // 消费 '/'
while (peek_char(lexer, &cur) && !is_newline(cur.character)) {
next_char(lexer, &lex, &cur);
scc_ring_consume(lexer->stream_ref);
scc_ring_consume(*lexer->stream_ref);
}
// 注释结束不包含换行符换行符单独成token
} else if (next.character == '*') {
@@ -150,7 +152,7 @@ void scc_lexer_get_token(scc_lexer_t *lexer, scc_lexer_tok_t *token) {
next_char(lexer, &lex, &cur); // 消费 '/'
break;
}
scc_ring_consume(lexer->stream_ref);
scc_ring_consume(*lexer->stream_ref);
}
} else {
// 只是除号 /
@@ -161,7 +163,7 @@ void scc_lexer_get_token(scc_lexer_t *lexer, scc_lexer_tok_t *token) {
token->type = SCC_TOK_IDENT; // 暂定
while (peek_char(lexer, &cur) && is_identifier_part(cur.character)) {
next_char(lexer, &lex, &cur);
scc_ring_consume(lexer->stream_ref);
scc_ring_consume(*lexer->stream_ref);
}
// 检查是否为关键字
int idx = keyword_cmp(scc_cstring_as_cstr(&lex), scc_cstring_len(&lex));
@@ -241,7 +243,7 @@ void scc_lexer_get_token(scc_lexer_t *lexer, scc_lexer_tok_t *token) {
} else {
next_char(lexer, &lex, &cur);
}
scc_ring_consume(lexer->stream_ref);
scc_ring_consume(*lexer->stream_ref);
}
} else {
scc_sstream_char_t next = {0};
@@ -447,7 +449,7 @@ void scc_lexer_get_token(scc_lexer_t *lexer, scc_lexer_tok_t *token) {
}
// 设置token
scc_ring_consume(lexer->stream_ref);
scc_ring_consume(*lexer->stream_ref);
token->type = token->type; // 上面已设
token->loc = start_loc;
token->lexeme = lex; // 转移所有权
@@ -469,3 +471,42 @@ void scc_lexer_get_valid_token(scc_lexer_t *lexer, scc_lexer_tok_t *token) {
} while (subtype == SCC_TOK_SUBTYPE_EMPTYSPACE ||
subtype == SCC_TOK_SUBTYPE_COMMENT);
}
static int fill_token(scc_lexer_tok_t *out, void *userdata) {
scc_lexer_t *lexer = userdata;
scc_lexer_get_token(lexer, out);
return 0;
}
static int fill_valid_token(scc_lexer_tok_t *out, void *userdata) {
scc_lexer_t *lexer = userdata;
scc_lexer_get_valid_token(lexer, out);
return 0;
}
scc_lexer_tok_ring_t *scc_lexer_to_ring(scc_lexer_t *lexer, int ring_size,
cbool need_comment) {
scc_ring_init(lexer->ring, ring_size,
need_comment ? fill_token : fill_valid_token, lexer);
lexer->ring_ref_count++;
return &lexer->ring;
}
void scc_lexer_drop_ring(scc_lexer_tok_ring_t *ring_ref) {
scc_lexer_t *lexer = ring_ref->userdata;
if (lexer->ring_ref_count > 0) {
lexer->ring_ref_count--;
} else {
LOG_WARN("double drop sstream ring");
}
}
void scc_lexer_drop(scc_lexer_t *lexer) {
Assert(lexer != null);
if (lexer->ring_ref_count) {
LOG_FATAL("drop sstream must be drop ring before ref [%d]",
lexer->ring_ref_count);
}
scc_ring_free(lexer->ring);
scc_sstream_drop_ring(lexer->stream_ref);
}

View File

@@ -1,139 +0,0 @@
#include <lexer.h>
static void lexer_stream_extend(scc_lexer_stream_t *stream, usize n) {
Assert(stream != null);
// 检查是否需要扩容
if ((stream->probe_pos - stream->curr_pos + n) >= stream->toks.cap) {
// 需要扩容 - 创建新缓冲区
usize new_cap = stream->toks.cap * 2;
if (new_cap < stream->probe_pos - stream->curr_pos + n + 1) {
new_cap = stream->probe_pos - stream->curr_pos + n + 1;
}
scc_lexer_tok_t *new_data =
scc_realloc(null, new_cap * sizeof(scc_lexer_tok_t));
if (!new_data) {
LOG_FATAL("lexer_stream_extend: realloc failed\n");
return;
}
// 将旧缓冲区中的数据拷贝到新缓冲区,保持顺序
usize data_count = stream->probe_pos - stream->curr_pos;
for (usize i = 0; i < data_count; ++i) {
usize old_idx = (stream->curr_pos + i) % stream->toks.cap;
new_data[i] = stream->toks.data[old_idx];
}
// 释放旧缓冲区
if (stream->toks.data) {
scc_free(stream->toks.data);
}
// 更新结构体
stream->toks.data = new_data;
stream->toks.cap = new_cap;
stream->curr_pos = 0;
stream->probe_pos = data_count;
}
// 填充新token
for (usize i = 0; i < n; ++i) {
usize idx = (stream->probe_pos + i) % stream->toks.cap;
if (stream->need_comment)
scc_lexer_get_token(stream->lexer, &stream->toks.data[idx]);
else
scc_lexer_get_valid_token(stream->lexer, &stream->toks.data[idx]);
}
stream->probe_pos += n;
}
static const scc_lexer_tok_t *lexer_stream_peek(scc_lexer_stream_t *stream,
usize n) {
Assert(stream != null);
// 计算需要的前看token数量
usize available = stream->probe_pos - stream->curr_pos;
if (n >= available) {
// 需要扩展缓冲区
usize need = n - available + 1;
lexer_stream_extend(stream, need);
}
// 计算实际缓冲区中的位置
usize idx = (stream->curr_pos + n) % stream->toks.cap;
return &stream->toks.data[idx];
}
static void lexer_stream_advance(scc_lexer_stream_t *stream, usize offset) {
Assert(stream != null);
if (stream->curr_pos + offset > stream->probe_pos) {
// 尝试填充更多token
usize need = stream->curr_pos + offset - stream->probe_pos;
lexer_stream_extend(stream, need);
}
stream->curr_pos += offset;
// 可选当已消费的token过多时压缩缓冲区
if (stream->curr_pos > stream->toks.cap * 3 / 4) {
// 压缩缓冲区:将有效数据移动到前面
usize data_count = stream->probe_pos - stream->curr_pos;
scc_lexer_tok_t *temp =
scc_realloc(null, data_count * sizeof(scc_lexer_tok_t));
if (!temp)
return; // 压缩失败也没关系
for (usize i = 0; i < data_count; ++i) {
usize old_idx = (stream->curr_pos + i) % stream->toks.cap;
temp[i] = stream->toks.data[old_idx];
}
scc_free(stream->toks.data);
stream->toks.data = temp;
stream->toks.cap = data_count;
stream->curr_pos = 0;
stream->probe_pos = data_count;
}
}
static void lexer_stream_drop(scc_lexer_stream_t *stream) {
Assert(stream != null);
// 清理所有token如果有需要清理的内部资源
for (usize i = 0; i < stream->toks.cap; ++i) {
// 这里假设scc_lexer_tok_t可能包含需要释放的资源
// 如果有,需要调用相应的清理函数
// 例如: if (stream->toks.data[i].needs_free)
// scc_free(stream->toks.data[i].ptr);
}
scc_vec_free(stream->toks);
stream->lexer = null;
stream->curr_pos = 0;
stream->probe_pos = 0;
stream->need_comment = false;
stream->peek = null;
stream->advance = null;
stream->drop = null;
}
void scc_lexer_to_stream(scc_lexer_t *lexer, scc_lexer_stream_t *stream,
cbool need_comment) {
Assert(lexer != null && stream != null);
stream->lexer = lexer;
stream->curr_pos = 0;
stream->probe_pos = 0;
stream->need_comment = need_comment;
// 初始化循环缓冲区
scc_vec_init(stream->toks);
scc_vec_realloc(stream->toks, 8); // 初始容量为8
stream->peek = lexer_stream_peek;
stream->advance = lexer_stream_advance;
stream->drop = lexer_stream_drop;
}

View File

@@ -1,5 +1,5 @@
#include <lexer.h>
#include <lexer_log.h>
#include <scc_lexer.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
@@ -53,10 +53,7 @@ int main(int argc, char *argv[]) {
scc_get_tok_name(token.type),
scc_cstring_as_cstr(&token.lexeme), token.loc.name,
token.loc.line, token.loc.col);
// LOG_DEBUG("%s", token.val.str);
// printf("line: %d, column: %d, type: %3d, typename: %s\n",
// lexer.line, lexer.index, token.type,
// scc_get_tok_name(token.type));
scc_cstring_free(&token.lexeme);
}
scc_sstream_drop_ring(ref);
scc_sstream_drop(&stream);

View File

@@ -1,5 +1,5 @@
// test_lexer.c
#include <lexer.h>
#include <scc_lexer.h>
#include <string.h>
#include <utest/acutest.h>