From 681a15cb44dc537a30bfc524dd1263cec18ddb88 Mon Sep 17 00:00:00 2001 From: zzy <2450266535@qq.com> Date: Mon, 16 Feb 2026 22:27:09 +0800 Subject: [PATCH] =?UTF-8?q?feat(lexer):=20=E6=B7=BB=E5=8A=A0=E9=A2=84?= =?UTF-8?q?=E5=A4=84=E7=90=86=E5=99=A8=E5=85=B3=E9=94=AE=E5=AD=97=E6=94=AF?= =?UTF-8?q?=E6=8C=81=E5=B9=B6=E4=BC=98=E5=8C=96=E8=AF=8D=E6=B3=95=E5=88=86?= =?UTF-8?q?=E6=9E=90=E5=99=A8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 添加了完整的C预处理器关键字表,包括define、include、ifdef等关键字, 用于支持预处理器功能。 - 新增SCC_PPKEYWORD_TABLE宏定义所有预处理器关键字 - 在token类型枚举中包含预处理关键字 - 重构词法分析器以正确识别预处理关键字 - 添加scc_lexer_tok_drop函数用于清理token资源 refactor(lexer): 重构词法分析器内部结构 - 修复keywords数组字段名从tok到tok_type - 优化scc_lexer_get_valid_token使用while循环替代do-while - 修改fill_token和fill_valid_token返回类型为cbool - 调整lexer_to_ring参数语义更清晰 fix(sstream): 修正环形缓冲区填充函数返回类型 - 将fill_func返回类型从int改为cbool以保持一致性 - 更新SCC_RING宏文档说明fill回调函数返回值含义 docs(argparse): 重命名examples目录修复路径错误 - 将libs/argparse/example重命名为libs/argparse/examples保持一致性 test(lexer): 更新测试用例适配新的流接口 - 修改测试代码中的scc_sstream_ref_ring为scc_sstream_to_ring - 确保测试用例与新的API保持兼容 style(lexer): 更新示例程序日志级别和实现方式 - 将调试日志改为信息日志 - 使用环形缓冲区实现示例程序的token获取 --- libs/argparse/{example => examples}/main.c | 0 libs/lexer/include/lexer_token.h | 35 ++++++++++++++++++++-- libs/lexer/src/lexer.c | 32 +++++++++++++------- libs/lexer/src/main.c | 22 +++++++++----- libs/lexer/tests/test_lexer.c | 4 +-- libs/sstream/src/scc_sstream.c | 2 +- runtime/scc_core/include/scc_core_ring.h | 4 +-- 7 files changed, 73 insertions(+), 26 deletions(-) rename libs/argparse/{example => examples}/main.c (100%) diff --git a/libs/argparse/example/main.c b/libs/argparse/examples/main.c similarity index 100% rename from libs/argparse/example/main.c rename to libs/argparse/examples/main.c diff --git a/libs/lexer/include/lexer_token.h b/libs/lexer/include/lexer_token.h index 9ccfc3a..92cbd0d 100644 --- a/libs/lexer/include/lexer_token.h +++ b/libs/lexer/include/lexer_token.h @@ -10,6 +10,28 @@ typedef enum scc_cstd { SCC_CEXT_SCC, } scc_cstd_t; +/* clang-format off */ +/// https://cppreference.cn/w/c/preprocessor +#define SCC_PPKEYWORD_TABLE \ + X(define , SCC_CSTD_C99, SCC_PP_TOK_DEFINE ) \ + X(elif , SCC_CSTD_C99, SCC_PP_TOK_ELIF ) \ + X(elifdef , SCC_CSTD_C99, SCC_PP_TOK_ELIFDEF ) \ + X(elifndef , SCC_CSTD_C99, SCC_PP_TOK_ELIFNDEF ) \ + X(else , SCC_CSTD_C99, SCC_PP_TOK_ELSE ) \ + X(embed , SCC_CSTD_C99, SCC_PP_TOK_EMBED ) \ + X(endif , SCC_CSTD_C99, SCC_PP_TOK_ENDIF ) \ + X(error , SCC_CSTD_C99, SCC_PP_TOK_ERROR ) \ + X(if , SCC_CSTD_C99, SCC_PP_TOK_IF ) \ + X(ifdef , SCC_CEXT_SCC, SCC_PP_TOK_IFDEF ) \ + X(ifndef , SCC_CSTD_C99, SCC_PP_TOK_IFNDEF ) \ + X(include , SCC_CSTD_C99, SCC_PP_TOK_INCLUDE ) \ + X(line , SCC_CEXT_SCC, SCC_PP_TOK_LINE ) \ + X(pragma , SCC_CSTD_C99, SCC_PP_TOK_PRAGMA ) \ + X(undef , SCC_CEXT_SCC, SCC_PP_TOK_UNDEF ) \ + X(warning , SCC_CSTD_C99, SCC_PP_TOK_WARNING ) \ + // END +/* clang-format on */ + /* clang-format off */ // WARNING: Using Binary Search To Fast Find Keyword // 你必须确保其中是按照字典序排列 @@ -118,14 +140,17 @@ typedef enum scc_cstd { // 定义TokenType枚举 typedef enum scc_tok_type { -// 处理普通token + #define X(str, subtype, tok) tok, SCC_CTOK_TABLE #undef X -// 处理关键字(保持原有格式) +#define X(name, type, tok) tok, + SCC_PPKEYWORD_TABLE +#undef X + #define X(name, subtype, tok, std) tok, - SCC_CKEYWORD_TABLE + SCC_CKEYWORD_TABLE #undef X } scc_tok_type_t; @@ -154,6 +179,10 @@ typedef struct scc_lexer_token { scc_pos_t loc; } scc_lexer_tok_t; +static inline void scc_lexer_tok_drop(scc_lexer_tok_t *tok) { + scc_cstring_free(&tok->lexeme); +} + static inline cbool scc_lexer_tok_match(const scc_lexer_tok_t *tok, scc_tok_type_t type) { return tok->type == type; diff --git a/libs/lexer/src/lexer.c b/libs/lexer/src/lexer.c index 723389e..3212195 100644 --- a/libs/lexer/src/lexer.c +++ b/libs/lexer/src/lexer.c @@ -5,7 +5,7 @@ static const struct { const char *name; scc_cstd_t std_type; - scc_tok_type_t tok; + scc_tok_type_t tok_type; } keywords[] = { #define X(name, subtype, tok, std_type, ...) {#name, std_type, tok}, SCC_CKEYWORD_TABLE @@ -168,7 +168,7 @@ void scc_lexer_get_token(scc_lexer_t *lexer, scc_lexer_tok_t *token) { // 检查是否为关键字 int idx = keyword_cmp(scc_cstring_as_cstr(&lex), scc_cstring_len(&lex)); if (idx != -1) { - token->type = keywords[idx].tok; + token->type = keywords[idx].tok_type; } } else if (is_digit(ch)) { // 数字字面量(整数/浮点) @@ -461,33 +461,43 @@ void scc_lexer_get_token(scc_lexer_t *lexer, scc_lexer_tok_t *token) { // scc_lexer_get_token maybe got invalid (with parser) void scc_lexer_get_valid_token(scc_lexer_t *lexer, scc_lexer_tok_t *token) { scc_tok_subtype_t subtype; - do { + while (1) { scc_lexer_get_token(lexer, token); subtype = scc_get_tok_subtype(token->type); AssertFmt(subtype != SCC_TOK_SUBTYPE_INVALID, "Invalid token: `%s` at %s:%d:%d", scc_get_tok_name(token->type), token->loc.name, token->loc.line, token->loc.col); - } while (subtype == SCC_TOK_SUBTYPE_EMPTYSPACE || - subtype == SCC_TOK_SUBTYPE_COMMENT); + if (subtype == SCC_TOK_SUBTYPE_EMPTYSPACE || + subtype == SCC_TOK_SUBTYPE_COMMENT) { + scc_lexer_tok_drop(token); + } + break; + }; } -static int fill_token(scc_lexer_tok_t *out, void *userdata) { +static cbool fill_token(scc_lexer_tok_t *out, void *userdata) { scc_lexer_t *lexer = userdata; scc_lexer_get_token(lexer, out); - return 0; + if (out->type == SCC_TOK_EOF) { + return false; + } + return true; } -static int fill_valid_token(scc_lexer_tok_t *out, void *userdata) { +static cbool fill_valid_token(scc_lexer_tok_t *out, void *userdata) { scc_lexer_t *lexer = userdata; scc_lexer_get_valid_token(lexer, out); - return 0; + if (out->type == SCC_TOK_EOF) { + return false; + } + return true; } scc_lexer_tok_ring_t *scc_lexer_to_ring(scc_lexer_t *lexer, int ring_size, - cbool need_comment) { + cbool fill_all) { scc_ring_init(lexer->ring, ring_size, - need_comment ? fill_token : fill_valid_token, lexer); + fill_all ? fill_token : fill_valid_token, lexer); lexer->ring_ref_count++; return &lexer->ring; } diff --git a/libs/lexer/src/main.c b/libs/lexer/src/main.c index ed273de..60a51aa 100644 --- a/libs/lexer/src/main.c +++ b/libs/lexer/src/main.c @@ -40,19 +40,27 @@ int main(int argc, char *argv[]) { scc_lexer_t lexer; scc_sstream_t stream; scc_sstream_init(&stream, file_name, 16); - scc_sstream_ring_t *ref = scc_sstream_ref_ring(&stream); + scc_sstream_ring_t *ref = scc_sstream_to_ring(&stream); scc_lexer_init(&lexer, ref); scc_lexer_tok_t token; + scc_lexer_tok_ring_t *tok_ring = scc_lexer_to_ring(&lexer, 16, false); + int ok; while (1) { - scc_lexer_get_valid_token(&lexer, &token); - if (token.type == SCC_TOK_EOF) { + // scc_lexer_get_valid_token(&lexer, &token); + // if (token.type == SCC_TOK_EOF) { + // break; + // } + + scc_ring_next_consume(*tok_ring, token, ok); + if (!ok) { break; } - LOG_DEBUG("get token [%-8s] `%s` at %s:%d:%d", - scc_get_tok_name(token.type), - scc_cstring_as_cstr(&token.lexeme), token.loc.name, - token.loc.line, token.loc.col); + + LOG_INFO("get token [%-8s] `%s` at %s:%d:%d", + scc_get_tok_name(token.type), + scc_cstring_as_cstr(&token.lexeme), token.loc.name, + token.loc.line, token.loc.col); scc_cstring_free(&token.lexeme); } scc_sstream_drop_ring(ref); diff --git a/libs/lexer/tests/test_lexer.c b/libs/lexer/tests/test_lexer.c index 2218576..b0ad861 100644 --- a/libs/lexer/tests/test_lexer.c +++ b/libs/lexer/tests/test_lexer.c @@ -13,7 +13,7 @@ static void free_token(scc_lexer_tok_t *tok) { scc_cstring_free(&tok->lexeme); } scc_lexer_tok_t token; \ scc_sstream_t stream; \ scc_sstream_init_by_buffer(&stream, input, strlen(input), 0, 16); \ - scc_sstream_ring_t *ref = scc_sstream_ref_ring(&stream); \ + scc_sstream_ring_t *ref = scc_sstream_to_ring(&stream); \ scc_lexer_init(&lexer, ref); \ scc_lexer_get_token(&lexer, &token); \ \ @@ -34,7 +34,7 @@ static void free_token(scc_lexer_tok_t *tok) { scc_cstring_free(&tok->lexeme); } scc_lexer_tok_t token; \ scc_sstream_t stream; \ scc_sstream_init_by_buffer(&stream, input, strlen(input), 0, 16); \ - scc_sstream_ring_t *ref = scc_sstream_ref_ring(&stream); \ + scc_sstream_ring_t *ref = scc_sstream_to_ring(&stream); \ scc_lexer_init(&lexer, ref); \ \ scc_tok_type_t expected[] = {__VA_ARGS__}; \ diff --git a/libs/sstream/src/scc_sstream.c b/libs/sstream/src/scc_sstream.c index 630666e..130ca61 100644 --- a/libs/sstream/src/scc_sstream.c +++ b/libs/sstream/src/scc_sstream.c @@ -66,7 +66,7 @@ static int sstream_scan_at(scc_sstream_t *stream, scc_pos_t scan_pos, } // 环形缓冲区填充回调(通过 userdata 获取流对象) -static int fill_func(scc_sstream_char_t *out, void *userdata) { +static cbool fill_func(scc_sstream_char_t *out, void *userdata) { scc_sstream_t *stream = (scc_sstream_t *)userdata; if (stream->fill_pos.offset >= stream->len) return false; // 已到文件尾 diff --git a/runtime/scc_core/include/scc_core_ring.h b/runtime/scc_core/include/scc_core_ring.h index 152dbf2..a2fd2c1 100644 --- a/runtime/scc_core/include/scc_core_ring.h +++ b/runtime/scc_core/include/scc_core_ring.h @@ -14,7 +14,7 @@ * - head: 已消费的逻辑索引 * - probe: 预览索引 * - tail: 已填充的逻辑末尾索引 - * - fill: 填充回调函数 (当需要新元素时调用) + * - fill: 填充回调函数 (当需要新元素时调用) 返回true表示成功 */ #define SCC_RING(type) \ struct { \ @@ -23,7 +23,7 @@ usize head; \ usize probe; \ usize tail; \ - int (*fill)(type * out, void *userdata); \ + cbool (*fill)(type * out, void *userdata); \ void *userdata; \ }