From 0e7dec202af3a5a328b7de85eb422f61a943018c Mon Sep 17 00:00:00 2001 From: zzy <2450266535@qq.com> Date: Mon, 16 Feb 2026 16:56:40 +0800 Subject: [PATCH] =?UTF-8?q?refactor(lex=5Fparser):=20=E7=A7=BB=E9=99=A4?= =?UTF-8?q?=E6=97=A7=E7=9A=84=E8=AF=8D=E6=B3=95=E8=A7=A3=E6=9E=90=E5=99=A8?= =?UTF-8?q?=E5=AE=9E=E7=8E=B0=E5=B9=B6=E6=9B=B4=E6=96=B0=E4=BE=9D=E8=B5=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 移除了 libs/lex_parser 目录下的所有头文件和源文件,包括: - lex_parser.h 和 lex_parser.c 核心解析功能 - 所有测试文件(test_char.c, test_identifier.c, test_number.c, test_skip_block_comment.c, test_skip_line.c, test_string.c) 更新了 lexer 模块的依赖配置,将 lex_parser 替换为 sstream, 同时更新了 lexer.h 中的相关包含头文件和数据结构定义, 简化了 scc_lexer_t 结构体的字段。 --- libs/lex_parser/include/lex_parser.h | 32 - libs/lex_parser/src/lex_parser.c | 434 ---------- libs/lex_parser/tests/test_char.c | 64 -- libs/lex_parser/tests/test_identifier.c | 56 -- libs/lex_parser/tests/test_number.c | 135 --- .../tests/test_skip_block_comment.c | 52 -- libs/lex_parser/tests/test_skip_line.c | 51 -- libs/lex_parser/tests/test_string.c | 62 -- libs/lexer/cbuild.toml | 2 +- libs/lexer/include/lexer.h | 12 +- libs/lexer/include/lexer_token.h | 119 +-- libs/lexer/src/lexer.c | 799 +++++++++--------- libs/lexer/src/main.c | 66 ++ libs/lexer/tests/test_lexer.c | 403 +++++++++ libs/lexer/tests/test_parse.c | 170 ---- libs/lexer/tests/test_run.c | 93 -- libs/{lex_parser => sstream}/cbuild.toml | 6 +- libs/sstream/include/scc_pos.h | 32 + libs/sstream/include/scc_sstream.h | 33 + libs/sstream/src/main.c | 51 ++ libs/sstream/src/scc_sstream.c | 145 ++++ runtime/runtime_gdb.py | 140 ++- runtime/scc_core/include/scc_core.h | 2 - runtime/scc_core/include/scc_core_impl.h | 1 + runtime/scc_core/include/scc_core_pos.h | 28 - runtime/scc_core/include/scc_core_ring.h | 178 ++++ runtime/scc_core/include/scc_core_stream.h | 130 --- runtime/scc_core/src/cfg.std_impl.c | 14 + runtime/scc_core/src/stream.c | 183 ---- runtime/scc_core/tests/test_core_ring.c | 326 +++++++ 30 files changed, 1840 insertions(+), 1979 deletions(-) delete mode 100644 libs/lex_parser/include/lex_parser.h delete mode 100644 libs/lex_parser/src/lex_parser.c delete mode 100644 libs/lex_parser/tests/test_char.c delete mode 100644 libs/lex_parser/tests/test_identifier.c delete mode 100644 libs/lex_parser/tests/test_number.c delete mode 100644 libs/lex_parser/tests/test_skip_block_comment.c delete mode 100644 libs/lex_parser/tests/test_skip_line.c delete mode 100644 libs/lex_parser/tests/test_string.c create mode 100644 libs/lexer/src/main.c create mode 100644 libs/lexer/tests/test_lexer.c delete mode 100644 libs/lexer/tests/test_parse.c delete mode 100644 libs/lexer/tests/test_run.c rename libs/{lex_parser => sstream}/cbuild.toml (53%) create mode 100644 libs/sstream/include/scc_pos.h create mode 100644 libs/sstream/include/scc_sstream.h create mode 100644 libs/sstream/src/main.c create mode 100644 libs/sstream/src/scc_sstream.c delete mode 100644 runtime/scc_core/include/scc_core_pos.h create mode 100644 runtime/scc_core/include/scc_core_ring.h delete mode 100644 runtime/scc_core/include/scc_core_stream.h delete mode 100644 runtime/scc_core/src/stream.c create mode 100644 runtime/scc_core/tests/test_core_ring.c diff --git a/libs/lex_parser/include/lex_parser.h b/libs/lex_parser/include/lex_parser.h deleted file mode 100644 index c73f561..0000000 --- a/libs/lex_parser/include/lex_parser.h +++ /dev/null @@ -1,32 +0,0 @@ -#ifndef __SCC_LEX_PARSER_H__ -#define __SCC_LEX_PARSER_H__ - -#include - -static inline cbool scc_lex_parse_is_endline(int ch) { - return ch == '\n' || ch == '\r'; -} - -static inline cbool scc_lex_parse_is_whitespace(int ch) { - return ch == ' ' || ch == '\t'; -} - -// TODO identifier check is right? -static inline cbool scc_lex_parse_is_identifier_prefix(int ch) { - return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '_'; -} - -int scc_lex_parse_char(scc_probe_stream_t *input, scc_pos_t *pos); -cbool scc_lex_parse_string(scc_probe_stream_t *input, scc_pos_t *pos, - scc_cstring_t *output); -cbool scc_lex_parse_number(scc_probe_stream_t *input, scc_pos_t *pos, - usize *output); -cbool scc_lex_parse_identifier(scc_probe_stream_t *input, scc_pos_t *pos, - scc_cstring_t *output); -void scc_lex_parse_skip_endline(scc_probe_stream_t *input, scc_pos_t *pos); -void scc_lex_parse_skip_block_comment(scc_probe_stream_t *input, - scc_pos_t *pos); -void scc_lex_parse_skip_line(scc_probe_stream_t *input, scc_pos_t *pos); -void scc_lex_parse_skip_whitespace(scc_probe_stream_t *input, scc_pos_t *pos); - -#endif /* __SCC_LEX_PARSER_H__ */ diff --git a/libs/lex_parser/src/lex_parser.c b/libs/lex_parser/src/lex_parser.c deleted file mode 100644 index 9a985a0..0000000 --- a/libs/lex_parser/src/lex_parser.c +++ /dev/null @@ -1,434 +0,0 @@ -#include - -void scc_lex_parse_skip_endline(scc_probe_stream_t *input, scc_pos_t *pos) { - Assert(input != null && pos != null); - // scc_probe_stream_reset(input); - int ch = scc_probe_stream_peek(input); - if (ch == '\r') { - scc_probe_stream_next(input); - ch = scc_probe_stream_peek(input); - if (ch == '\n') { - scc_probe_stream_next(input); - } - scc_pos_next_line(pos); - } else if (ch == '\n') { - scc_probe_stream_next(input); - scc_pos_next_line(pos); - } else { - LOG_WARN("not a newline character"); - } -} - -/** - * @brief - * - * @param ch - * @return int - * https://cppreference.cn/w/c/language/escape - * `\'` 单引号 在 ASCII 编码中为字节 0x27 - * `\"` 双引号 在 ASCII 编码中为字节 0x22 - * `\?` 问号 在 ASCII 编码中为字节 0x3f - * `\\` 反斜杠 在 ASCII 编码中为字节 0x5c - * `\a` 响铃 在 ASCII 编码中为字节 0x07 - * `\b` 退格 在 ASCII 编码中为字节 0x08 - * `\f` 换页 - 新页 在 ASCII 编码中为字节 0x0c - * `\n` 换行 - 新行 在 ASCII 编码中为字节 0x0a - * `\r` 回车 在 ASCII 编码中为字节 0x0d - * `\t` 水平制表符 在 ASCII 编码中为字节 0x09 - * `\v` 垂直制表符 在 ASCII 编码中为字节 0x0b - */ -static inline int got_simple_escape(int ch) { - /* clang-format off */ - #define CASE(ch) case ch: return ch; - switch (ch) { - case '\'': return '\''; - case '\"': return '\"'; - case '\?': return '\?'; - case '\\': return '\\'; - case 'a': return '\a'; - case 'b': return '\b'; - case 'f': return '\f'; - case 'n': return '\n'; - case 'r': return '\r'; - case 't': return '\t'; - case 'v': return '\v'; - default: return -1; - } - /* clang-format on */ -} - -void scc_lex_parse_skip_line(scc_probe_stream_t *input, scc_pos_t *pos) { - scc_probe_stream_t *stream = input; - Assert(stream != null && pos != null); - // scc_probe_stream_reset(stream); - while (1) { - int ch = scc_probe_stream_peek(stream); - - if (ch == scc_stream_eof) { - return; - } - - // TODO endline - if (scc_lex_parse_is_endline(ch)) { - scc_lex_parse_skip_endline(stream, pos); - return; - } else { - scc_probe_stream_next(stream); - scc_pos_next(pos); - } - } -} - -void scc_lex_parse_skip_block_comment(scc_probe_stream_t *input, - scc_pos_t *pos) { - scc_probe_stream_t *stream = input; - Assert(stream != null && pos != null); - int ch; - // scc_probe_stream_reset(stream); - ch = scc_probe_stream_next(stream); - scc_pos_next(pos); - // FIXME Assertion - Assert(ch == '/'); - ch = scc_probe_stream_next(stream); - scc_pos_next(pos); - Assert(ch == '*'); - - // all ready match `/*` - while (1) { - // scc_probe_stream_reset(stream); - ch = scc_probe_stream_peek(stream); - - if (ch == scc_stream_eof) { - LOG_WARN("Unterminated block comment"); - return; - } - - if (scc_lex_parse_is_endline(ch)) { - scc_lex_parse_skip_endline(stream, pos); - continue; - } - scc_probe_stream_next(stream); - scc_pos_next(pos); - if (ch == '*') { - ch = scc_probe_stream_peek(stream); - if (ch == '/') { - scc_probe_stream_next(stream); - scc_pos_next(pos); - return; - } - } - } -} - -void scc_lex_parse_skip_whitespace(scc_probe_stream_t *input, scc_pos_t *pos) { - scc_probe_stream_t *stream = input; - Assert(stream != null && pos != null); - // scc_probe_stream_reset(stream); - while (1) { - int ch = scc_probe_stream_peek(stream); - - if (!scc_lex_parse_is_whitespace(ch)) { - return; - } - - scc_probe_stream_next(stream); - scc_pos_next(pos); - } -} - -static inline cbool _lex_parse_uint(scc_probe_stream_t *input, scc_pos_t *pos, - int base, usize *output) { - Assert(input != null && pos != null); - if (input == null || pos == null) { - return false; - } - Assert(base == 2 || base == 8 || base == 10 || base == 16); - // scc_probe_stream_reset(input); - int ch, tmp; - usize n = 0; - usize offset = pos->offset; - while (1) { - ch = scc_probe_stream_peek(input); - - if (ch == scc_stream_eof) { - break; - } else if (ch >= 'a' && ch <= 'z') { - tmp = ch - 'a' + 10; - } else if (ch >= 'A' && ch <= 'Z') { - tmp = ch - 'A' + 10; - } else if (ch >= '0' && ch <= '9') { - tmp = ch - '0'; - } else { - break; - } - - if (tmp >= base) { - LOG_ERROR("Invalid digit"); - return false; - } - - scc_probe_stream_next(input); - scc_pos_next(pos); - n = n * base + tmp; - // TODO number overflow - } - if (offset == pos->offset) { - // None match any number - return false; - } - *output = n; - return true; -} - -/** - * @brief - * - * @param input - * @param pos - * @return int - * https://cppreference.cn/w/c/language/character_constant - */ -int scc_lex_parse_char(scc_probe_stream_t *input, scc_pos_t *pos) { - scc_probe_stream_t *stream = input; - Assert(stream != null && pos != null); - int ch = scc_probe_stream_next(stream); - scc_pos_next(pos); - int ret = scc_stream_eof; - - if (ch == scc_stream_eof) { - LOG_WARN("Unexpected EOF at begin"); - goto ERR; - } else if (ch != '\'') { - LOG_WARN("Unexpected character '%c' at begin", ch); - goto ERR; - } - // scc_probe_stream_next(stream); - - ch = scc_probe_stream_next(stream); - scc_pos_next(pos); - - if (ch == scc_stream_eof) { - LOG_WARN("Unexpected EOF at middle"); - goto ERR; - } else if (ch == '\\') { - ch = scc_probe_stream_next(stream); - scc_pos_next(pos); - if (ch == '0') { - // 数字转义序列 - // \nnn 任意八进制值 码元 nnn - // FIXME 这里如果返回 0 理论上为错误但是恰好与正确值相同 - ret = 0; - _lex_parse_uint(stream, pos, 8, (usize *)&ret); - } else if (ch == 'x') { - // TODO https://cppreference.cn/w/c/language/escape - // \xn... 任意十六进制值 码元 n... (任意数量的十六进制数字) - // 通用字符名 - TODO(); - } else if (ch == 'u' || ch == 'U') { - // \unnnn (C99 起) Unicode 值在允许范围内; - // 可能产生多个码元 码点 U+nnnn - // \Unnnnnnnn (C99 起) Unicode 值在允许范围内; - // 可能产生多个码元 码点 U+nnnnnnnn - TODO(); - } else if ((ret = got_simple_escape(ch)) == -1) { - LOG_ERROR("Invalid escape character"); - goto ERR; - } - } else { - ret = ch; - } - if ((ch = scc_probe_stream_next(stream)) != '\'') { - LOG_ERROR("Unclosed character literal '%c' at end, expect `'`", ch); - scc_pos_next(pos); - goto ERR; - } - - return ret; -ERR: - return scc_stream_eof; -} - -/** - * @brief - * - * @param input - * @param pos - * @param output - * @return cbool - * https://cppreference.cn/w/c/language/string_literal - */ -cbool scc_lex_parse_string(scc_probe_stream_t *input, scc_pos_t *pos, - scc_cstring_t *output) { - scc_probe_stream_t *stream = input; - Assert(stream != null && pos != null && output != null); - // scc_probe_stream_reset(stream); - int ch = scc_probe_stream_peek(stream); - - Assert(scc_cstring_is_empty(output)); - if (ch == scc_stream_eof) { - LOG_WARN("Unexpected EOF at begin"); - goto ERR; - } else if (ch != '"') { - LOG_WARN("Unexpected character '%c' at begin", ch); - goto ERR; - } - scc_probe_stream_next(stream); - scc_pos_next(pos); - - scc_cstring_t str = scc_cstring_from_cstr(""); - while (1) { - ch = scc_probe_stream_peek(stream); - - if (ch == scc_stream_eof) { - LOG_ERROR("Unexpected EOF at string literal"); - goto ERR; - } else if (scc_lex_parse_is_endline(ch)) { - LOG_ERROR("Unexpected newline at string literal"); - goto ERR; - } else if (ch == '\\') { - // TODO bad practice and maybe bugs here - scc_probe_stream_next(stream); - ch = scc_probe_stream_next(stream); - int val = got_simple_escape(ch); - if (val == -1) { - LOG_ERROR("Invalid escape character it is \\%c [%d]", ch, ch); - } else { - scc_cstring_append_ch(&str, val); - continue; - } - } else if (ch == '"') { - scc_probe_stream_next(stream); - scc_pos_next(pos); - break; - } - - scc_probe_stream_next(stream); - scc_pos_next(pos); - scc_cstring_append_ch(&str, ch); - } - - *output = str; - return true; -ERR: - scc_cstring_free(&str); - return false; -} - -/** - * @brief - * - * @param input - * @param pos - * @param output - * @return cbool - * https://cppreference.cn/w/c/language/integer_constant - */ -cbool scc_lex_parse_number(scc_probe_stream_t *input, scc_pos_t *pos, - usize *output) { - scc_probe_stream_t *stream = input; - Assert(stream != null && pos != null && output != null); - // scc_probe_stream_reset(stream); - int ch = scc_probe_stream_peek(stream); - int base = 10; // 默认十进制 - - if (ch == scc_stream_eof) { - LOG_WARN("Unexpected EOF at begin"); - goto ERR; - } - - if (ch == '0') { - // 消费 '0' - scc_probe_stream_next(stream); - scc_pos_next(pos); - - // 查看下一个字符 - ch = scc_probe_stream_peek(stream); - if (ch == 'x' || ch == 'X') { - // 十六进制 - base = 16; - scc_probe_stream_next(stream); - scc_pos_next(pos); - } else if (ch == 'b' || ch == 'B') { - // 二进制 (C23扩展) - base = 2; - scc_probe_stream_next(stream); - scc_pos_next(pos); - } else if (ch >= '0' && ch <= '7') { - // 八进制 - base = 8; - // 不消费,数字将由 _lex_parse_uint 处理 - } else if (ch == '8' || ch == '9') { - LOG_ERROR("Invalid digit '%d' in octal literal", ch); - return false; - } else { - // 只是0,十进制 - *output = 0; - return true; - } - } else if (ch >= '1' && ch <= '9') { - // 十进制,不消费,由 _lex_parse_uint 处理 - base = 10; - } else { - // 无效的数字 - return false; - } - - // 解析整数部分 - // scc_probe_stream_reset(stream); - usize n; - if (_lex_parse_uint(stream, pos, base, &n) == false) { - // 如果没有匹配任何数字,但输入是 '0',已经处理过了 - // 对于十进制数字,至少应该有一个数字 - if (base == 10) { - // 单个数字的情况,例如 "1" - // 我们需要消费这个数字并返回它的值 - if (ch >= '1' && ch <= '9') { - scc_probe_stream_next(stream); - scc_pos_next(pos); - *output = ch - '0'; - return true; - } - } - return false; - } - *output = n; - return true; -ERR: - return false; -} - -/** - * @brief - * - * @param input - * @param pos - * @param output - * @return cbool - * https://cppreference.cn/w/c/language/identifier - */ -cbool scc_lex_parse_identifier(scc_probe_stream_t *input, scc_pos_t *pos, - scc_cstring_t *output) { - Assert(input != null && pos != null && output != null); - Assert(scc_cstring_is_empty(output)); - scc_probe_stream_t *stream = input; - // scc_probe_stream_reset(stream); - int ch = scc_probe_stream_peek(stream); - - if (ch == scc_stream_eof) { - LOG_WARN("Unexpected EOF at begin"); - } else if (scc_lex_parse_is_identifier_prefix(ch)) { - while (1) { - scc_cstring_append_ch(output, ch); - scc_probe_stream_next(stream); - scc_pos_next(pos); - ch = scc_probe_stream_peek(stream); - if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || - (ch == '_') || (ch >= '0' && ch <= '9')) { - continue; - } - break; - } - return true; - } - return false; -} diff --git a/libs/lex_parser/tests/test_char.c b/libs/lex_parser/tests/test_char.c deleted file mode 100644 index f7fde2e..0000000 --- a/libs/lex_parser/tests/test_char.c +++ /dev/null @@ -1,64 +0,0 @@ -// test_char.c -#include -#include - -cbool check_char(const char *str, int expect, int *output) { - log_set_level(&__default_logger_root, 0); - scc_pos_t pos = scc_pos_create(); - scc_mem_probe_stream_t mem_stream; - scc_probe_stream_t *stream = - scc_mem_probe_stream_init(&mem_stream, str, scc_strlen(str), false); - *output = scc_lex_parse_char(stream, &pos); - cbool ret1 = *output == expect; - scc_probe_stream_reset(stream); - *output = scc_lex_parse_char(stream, &pos); - cbool ret2 = *output == expect; - return ret1 && ret2; -} - -#define CHECK_CHAR_VALID(str, expect) \ - do { \ - int _output; \ - cbool ret = check_char(str, expect, &_output); \ - TEST_CHECK(ret == true); \ - } while (0) - -#define CHECK_CHAR_INVALID(str) \ - do { \ - int _output; \ - check_char(str, scc_stream_eof, &_output); \ - TEST_CHECK(_output == scc_stream_eof); \ - } while (0) - -void test_simple_char(void) { - TEST_CASE("simple chars"); - CHECK_CHAR_VALID("'a'", 'a'); - CHECK_CHAR_VALID("'Z'", 'Z'); - CHECK_CHAR_VALID("'0'", '0'); - CHECK_CHAR_VALID("' '", ' '); -} - -void test_escape_char(void) { - TEST_CASE("escape chars"); - CHECK_CHAR_VALID("'\\n'", '\n'); - CHECK_CHAR_VALID("'\\t'", '\t'); - CHECK_CHAR_VALID("'\\r'", '\r'); - CHECK_CHAR_VALID("'\\\\'", '\\'); - CHECK_CHAR_VALID("'\\''", '\''); - CHECK_CHAR_VALID("'\\\"'", '\"'); -} - -void test_invalid_char(void) { - TEST_CASE("invalid chars"); - CHECK_CHAR_INVALID("'"); - CHECK_CHAR_INVALID("''"); - CHECK_CHAR_INVALID("'ab'"); - CHECK_CHAR_INVALID("'\\'"); -} - -TEST_LIST = { - {"test_simple_char", test_simple_char}, - {"test_escape_char", test_escape_char}, - {"test_invalid_char", test_invalid_char}, - {NULL, NULL}, -}; diff --git a/libs/lex_parser/tests/test_identifier.c b/libs/lex_parser/tests/test_identifier.c deleted file mode 100644 index c7c77e4..0000000 --- a/libs/lex_parser/tests/test_identifier.c +++ /dev/null @@ -1,56 +0,0 @@ -// test_identifier.c -#include -#include - -cbool check_identifier(const char *str, const char *expect, - scc_cstring_t *output) { - log_set_level(&__default_logger_root, 0); - scc_pos_t pos = scc_pos_create(); - scc_mem_probe_stream_t mem_stream; - scc_probe_stream_t *stream = - scc_mem_probe_stream_init(&mem_stream, str, scc_strlen(str), false); - - cbool ret = scc_lex_parse_identifier(stream, &pos, output); - if (ret && expect) { - return strcmp(output->data, expect) == 0; - } - return ret; -} - -#define CHECK_IDENTIFIER_VALID(str, expect) \ - do { \ - scc_cstring_t _output = scc_cstring_create(); \ - cbool ret = check_identifier(str, expect, &_output); \ - TEST_CHECK(ret == true); \ - TEST_CHECK(strcmp(_output.data, expect) == 0); \ - scc_cstring_free(&_output); \ - } while (0) - -#define CHECK_IDENTIFIER_INVALID(str) \ - do { \ - scc_cstring_t _output = scc_cstring_create(); \ - cbool ret = check_identifier(str, NULL, &_output); \ - TEST_CHECK(ret == false); \ - scc_cstring_free(&_output); \ - } while (0) - -void test_valid_identifier(void) { - TEST_CASE("valid identifiers"); - CHECK_IDENTIFIER_VALID("variable", "variable"); - CHECK_IDENTIFIER_VALID("my_var", "my_var"); - CHECK_IDENTIFIER_VALID("_private", "_private"); - CHECK_IDENTIFIER_VALID("Var123", "Var123"); - CHECK_IDENTIFIER_VALID("a", "a"); -} - -void test_invalid_identifier(void) { - TEST_CASE("invalid identifiers"); - CHECK_IDENTIFIER_INVALID(""); - CHECK_IDENTIFIER_INVALID("123var"); -} - -TEST_LIST = { - {"test_valid_identifier", test_valid_identifier}, - {"test_invalid_identifier", test_invalid_identifier}, - {NULL, NULL}, -}; \ No newline at end of file diff --git a/libs/lex_parser/tests/test_number.c b/libs/lex_parser/tests/test_number.c deleted file mode 100644 index 9bd6404..0000000 --- a/libs/lex_parser/tests/test_number.c +++ /dev/null @@ -1,135 +0,0 @@ -#include -#include - -cbool check(const char *str, usize expect, usize *output) { - // TODO maybe have other logger - (void)(expect); - log_set_level(&__default_logger_root, 0); - scc_pos_t pos = scc_pos_create(); - scc_mem_probe_stream_t mem_stream; - scc_probe_stream_t *stream = - scc_mem_probe_stream_init(&mem_stream, str, scc_strlen(str), false); - return scc_lex_parse_number(stream, &pos, output); -} - -#define CHECK_VALID(str, expect) \ - do { \ - usize _output; \ - cbool ret = check(str, expect, &_output); \ - TEST_CHECK(ret == true); \ - TEST_CHECK(_output == expect); \ - TEST_MSG("Produced: %llu", _output); \ - } while (0) - -#define CHECK_INVALID(str) \ - do { \ - usize _output; \ - cbool ret = check(str, 0, &_output); \ - TEST_CHECK(ret == false); \ - } while (0) - -void test_simple_hex(void) { - TEST_CASE("lowercase hex"); - CHECK_VALID("0xff", 255); - CHECK_VALID("0x0", 0); - CHECK_VALID("0xa", 10); - CHECK_VALID("0xf", 15); - CHECK_VALID("0x1a", 26); - - TEST_CASE("uppercase hex"); - CHECK_VALID("0xFF", 255); - CHECK_VALID("0xA0", 160); - CHECK_VALID("0xCAFEBABE", 3405691582); - - TEST_CASE("mixed case hex"); - CHECK_VALID("0xFf", 255); - CHECK_VALID("0xCaFeBaBe", 3405691582); - - TEST_CASE("larger hex values"); - CHECK_VALID("0xff00", 65280); - CHECK_VALID("0xFFFF", 65535); - - TEST_CASE("invalid hex"); - CHECK_INVALID("0xG"); // Invalid hex digit - CHECK_INVALID("0xyz"); // Invalid prefix - CHECK_INVALID("0x"); // Incomplete hex -} - -void test_simple_oct(void) { - TEST_CASE("basic octal"); - CHECK_VALID("00", 0); - CHECK_VALID("01", 1); - CHECK_VALID("07", 7); - - TEST_CASE("multi-digit octal"); - CHECK_VALID("010", 8); - CHECK_VALID("017", 15); - CHECK_VALID("077", 63); - - TEST_CASE("larger octal values"); - CHECK_VALID("0177", 127); - CHECK_VALID("0377", 255); - CHECK_VALID("0777", 511); - - TEST_CASE("invalid octal"); - CHECK_INVALID("08"); // Invalid octal digit - CHECK_INVALID("09"); // Invalid octal digit -} - -void test_simple_dec(void) { - TEST_CASE("single digits"); - CHECK_VALID("0", 0); - CHECK_VALID("1", 1); - CHECK_VALID("9", 9); - - TEST_CASE("multi-digit decimal"); - CHECK_VALID("10", 10); - CHECK_VALID("42", 42); - CHECK_VALID("123", 123); - - TEST_CASE("larger decimal values"); - CHECK_VALID("999", 999); - CHECK_VALID("1234", 1234); - CHECK_VALID("65535", 65535); -} - -void test_simple_bin(void) { - TEST_CASE("basic binary"); - CHECK_VALID("0b0", 0); - CHECK_VALID("0b1", 1); - - TEST_CASE("multi-digit binary"); - CHECK_VALID("0b10", 2); - CHECK_VALID("0b11", 3); - CHECK_VALID("0b100", 4); - CHECK_VALID("0b1010", 10); - - TEST_CASE("larger binary values"); - CHECK_VALID("0b1111", 15); - CHECK_VALID("0b11111111", 255); - CHECK_VALID("0b10101010", 170); - - TEST_CASE("invalid binary"); - CHECK_INVALID("0b2"); // Invalid binary digit - CHECK_INVALID("0b3"); // Invalid binary digit - CHECK_INVALID("0b"); // Incomplete binary -} - -void test_edge_cases(void) { - TEST_CASE("empty string"); - CHECK_INVALID(""); // Empty string - - TEST_CASE("non-numeric strings"); - CHECK_INVALID("abc"); // Non-numeric - CHECK_INVALID("xyz"); // Non-numeric - - TEST_CASE("mixed invalid formats"); - CHECK_INVALID("0x1G"); // Mixed valid/invalid hex - CHECK_INVALID("0b12"); // Mixed valid/invalid binary -} - -TEST_LIST = { - {"test_simple_hex", test_simple_hex}, {"test_simple_oct", test_simple_oct}, - {"test_simple_dec", test_simple_dec}, {"test_simple_bin", test_simple_bin}, - {"test_edge_cases", test_edge_cases}, {NULL, NULL}, -}; \ No newline at end of file diff --git a/libs/lex_parser/tests/test_skip_block_comment.c b/libs/lex_parser/tests/test_skip_block_comment.c deleted file mode 100644 index 9bfc9ae..0000000 --- a/libs/lex_parser/tests/test_skip_block_comment.c +++ /dev/null @@ -1,52 +0,0 @@ -// test_skip_block_comment.c -#include -#include - -void check_skip_block_comment(const char *str, const char *expect_remaining) { - log_set_level(&__default_logger_root, 0); - scc_pos_t pos = scc_pos_create(); - scc_mem_probe_stream_t mem_stream; - scc_probe_stream_t *stream = - scc_mem_probe_stream_init(&mem_stream, str, scc_strlen(str), false); - - scc_lex_parse_skip_block_comment(stream, &pos); - scc_probe_stream_sync(stream); - - // Check remaining content - char buffer[256] = {0}; - int i = 0; - int ch; - while ((ch = scc_probe_stream_consume(stream)) != scc_stream_eof && - i < 255) { - buffer[i++] = (char)ch; - } - - if (expect_remaining) { - TEST_CHECK(strcmp(buffer, expect_remaining) == 0); - } -} - -void test_simple_block_comment(void) { - TEST_CASE("simple block comments"); - check_skip_block_comment("/* comment */", ""); - check_skip_block_comment("/* comment */ int x;", " int x;"); -} - -void test_multiline_block_comment(void) { - TEST_CASE("multiline block comments"); - check_skip_block_comment("/* line1\nline2 */", ""); - check_skip_block_comment("/* line1\nline2 */ int x;", " int x;"); -} - -void test_nested_asterisk_block_comment(void) { - TEST_CASE("nested asterisk block comments"); - check_skip_block_comment("/* *** */", ""); - check_skip_block_comment("/* *** */ int x;", " int x;"); -} - -TEST_LIST = { - {"test_simple_block_comment", test_simple_block_comment}, - {"test_multiline_block_comment", test_multiline_block_comment}, - {"test_nested_asterisk_block_comment", test_nested_asterisk_block_comment}, - {NULL, NULL}, -}; \ No newline at end of file diff --git a/libs/lex_parser/tests/test_skip_line.c b/libs/lex_parser/tests/test_skip_line.c deleted file mode 100644 index f82f0a2..0000000 --- a/libs/lex_parser/tests/test_skip_line.c +++ /dev/null @@ -1,51 +0,0 @@ -// test_skip_line.c -#include -#include - -void check_skip_line(const char *str, const char *expect_remaining) { - log_set_level(&__default_logger_root, 0); - scc_pos_t pos = scc_pos_create(); - scc_mem_probe_stream_t mem_stream; - scc_probe_stream_t *stream = - scc_mem_probe_stream_init(&mem_stream, str, scc_strlen(str), false); - - scc_lex_parse_skip_line(stream, &pos); - scc_probe_stream_sync(stream); - - // Check remaining content - char buffer[256] = {0}; - int i = 0; - int ch; - while ((ch = scc_probe_stream_consume(stream)) != scc_stream_eof && - i < 255) { - buffer[i++] = (char)ch; - } - - if (expect_remaining) { - TEST_CHECK(strcmp(buffer, expect_remaining) == 0); - } -} - -void test_simple_line_comment(void) { - TEST_CASE("simple line comments"); - check_skip_line("// comment\n", ""); - check_skip_line("// comment\nint x;", "int x;"); -} - -void test_crlf_line_comment(void) { - TEST_CASE("CRLF line comments"); - check_skip_line("// comment\r\n", ""); - check_skip_line("// comment\r\nint x;", "int x;"); -} - -void test_eof_line_comment(void) { - TEST_CASE("EOF line comments"); - check_skip_line("// comment", ""); -} - -TEST_LIST = { - {"test_simple_line_comment", test_simple_line_comment}, - {"test_crlf_line_comment", test_crlf_line_comment}, - {"test_eof_line_comment", test_eof_line_comment}, - {NULL, NULL}, -}; \ No newline at end of file diff --git a/libs/lex_parser/tests/test_string.c b/libs/lex_parser/tests/test_string.c deleted file mode 100644 index f7640e6..0000000 --- a/libs/lex_parser/tests/test_string.c +++ /dev/null @@ -1,62 +0,0 @@ -// test_string.c -#include -#include - -cbool check_string(const char *str, const char *expect, scc_cstring_t *output) { - log_set_level(&__default_logger_root, 0); - scc_pos_t pos = scc_pos_create(); - scc_mem_probe_stream_t mem_stream; - scc_probe_stream_t *stream = - scc_mem_probe_stream_init(&mem_stream, str, scc_strlen(str), false); - - cbool ret = scc_lex_parse_string(stream, &pos, output); - if (ret && expect) { - return strcmp(output->data, expect) == 0; - } - return ret; -} - -#define CHECK_STRING_VALID(str, expect) \ - do { \ - scc_cstring_t _output = scc_cstring_create(); \ - cbool ret = check_string(str, expect, &_output); \ - TEST_CHECK(ret == true); \ - TEST_CHECK(strcmp(_output.data, expect) == 0); \ - scc_cstring_free(&_output); \ - } while (0) - -#define CHECK_STRING_INVALID(str) \ - do { \ - scc_cstring_t _output = scc_cstring_create(); \ - cbool ret = check_string(str, NULL, &_output); \ - TEST_CHECK(ret == false); \ - scc_cstring_free(&_output); \ - } while (0) - -void test_simple_string(void) { - TEST_CASE("simple strings"); - CHECK_STRING_VALID("\"\"", ""); - CHECK_STRING_VALID("\"hello\"", "hello"); - CHECK_STRING_VALID("\"hello world\"", "hello world"); -} - -void test_escape_string(void) { - TEST_CASE("escape strings"); - CHECK_STRING_VALID("\"\\n\"", "\n"); - CHECK_STRING_VALID("\"\\t\"", "\t"); - CHECK_STRING_VALID("\"\\\"\"", "\""); - CHECK_STRING_VALID("\"Hello\\nWorld\"", "Hello\nWorld"); -} - -void test_invalid_string(void) { - TEST_CASE("invalid strings"); - CHECK_STRING_INVALID("\"unterminated"); - CHECK_STRING_INVALID("\"newline\n\""); -} - -TEST_LIST = { - {"test_simple_string", test_simple_string}, - {"test_escape_string", test_escape_string}, - {"test_invalid_string", test_invalid_string}, - {NULL, NULL}, -}; \ No newline at end of file diff --git a/libs/lexer/cbuild.toml b/libs/lexer/cbuild.toml index 6e546a8..cb4670e 100644 --- a/libs/lexer/cbuild.toml +++ b/libs/lexer/cbuild.toml @@ -4,5 +4,5 @@ version = "0.1.0" dependencies = [ { name = "scc_core", path = "../../runtime/scc_core" }, - { name = "lex_parser", path = "../lex_parser" }, + { name = "lex_parser", path = "../sstream" }, ] diff --git a/libs/lexer/include/lexer.h b/libs/lexer/include/lexer.h index d604eb3..251f4fb 100644 --- a/libs/lexer/include/lexer.h +++ b/libs/lexer/include/lexer.h @@ -8,6 +8,7 @@ #include "lexer_token.h" #include +#include /** * @brief 词法分析器核心结构体 @@ -15,16 +16,11 @@ * 封装词法分析所需的状态信息和缓冲区管理 */ typedef struct scc_lexer { - scc_probe_stream_t *stream; - scc_pos_t pos; + scc_sstream_ring_t stream_ref; + int jump_macro; } scc_lexer_t; -/** - * @brief 初始化词法分析器 - * @param[out] lexer 要初始化的词法分析器实例 - * @param[in] stream 输入流对象指针 - */ -void scc_lexer_init(scc_lexer_t *lexer, scc_probe_stream_t *stream); +void scc_lexer_init(scc_lexer_t *lexer, scc_sstream_ring_t *stream_ref); /** * @brief 获取原始token diff --git a/libs/lexer/include/lexer_token.h b/libs/lexer/include/lexer_token.h index ca09d36..f4c3c55 100644 --- a/libs/lexer/include/lexer_token.h +++ b/libs/lexer/include/lexer_token.h @@ -2,6 +2,7 @@ #define __SCC_LEXER_TOKEN_H__ #include +#include typedef enum scc_cstd { SCC_CSTD_C89, @@ -54,64 +55,64 @@ typedef enum scc_cstd { // KEYWORD_TABLE #define SCC_CTOK_TABLE \ - X(unknown , SCC_TOK_SUBTYPE_INVALID, SCC_TOK_UNKNOWN ) \ - X(EOF , SCC_TOK_SUBTYPE_EOF, SCC_TOK_EOF ) \ - X(blank , SCC_TOK_SUBTYPE_EMPTYSPACE, SCC_TOK_BLANK ) \ - X(endline , SCC_TOK_SUBTYPE_EMPTYSPACE, SCC_TOK_ENDLINE ) \ - X("#" , SCC_TOK_SUBTYPE_EMPTYSPACE, SCC_TOK_SHARP ) \ - X("==" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_EQ ) \ - X("=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN ) \ - X("++" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ADD_ADD ) \ - X("+=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_ADD ) \ - X("+" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ADD ) \ - X("--" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_SUB_SUB ) \ - X("-=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_SUB ) \ - X("->" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_DEREF ) \ - X("-" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_SUB ) \ - X("*=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_MUL ) \ - X("*" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_MUL ) \ - X("/=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_DIV ) \ - X("/" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_DIV ) \ - X("//" , SCC_TOK_SUBTYPE_COMMENT , SCC_TOK_LINE_COMMENT ) \ - X("/* */" , SCC_TOK_SUBTYPE_COMMENT , SCC_TOK_BLOCK_COMMENT ) \ - X("%=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_MOD ) \ - X("%" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_MOD ) \ - X("&&" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_AND_AND ) \ - X("&=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_AND ) \ - X("&" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_AND ) \ - X("||" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_OR_OR ) \ - X("|=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_OR ) \ - X("|" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_OR ) \ - X("^=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_XOR ) \ - X("^" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_XOR ) \ - X("<<=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_L_SH ) \ - X("<<" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_L_SH ) \ - X("<=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_LE ) \ - X("<" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_LT ) \ - X(">>=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_R_SH ) \ - X(">>" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_R_SH ) \ - X(">=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_GE ) \ - X(">" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_GT ) \ - X("!" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_NOT ) \ - X("!=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_NEQ ) \ - X("~" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_BIT_NOT ) \ - X("[" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_L_BRACKET ) \ - X("]" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_R_BRACKET ) \ - X("(" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_L_PAREN ) \ - X(")" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_R_PAREN ) \ - X("{" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_L_BRACE ) \ - X("}" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_R_BRACE ) \ - X(";" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_SEMICOLON ) \ - X("," , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_COMMA ) \ - X(":" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_COLON ) \ - X("." , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_DOT ) \ - X("..." , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ELLIPSIS ) \ - X("?" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_COND ) \ - X(ident , SCC_TOK_SUBTYPE_IDENTIFIER, SCC_TOK_IDENT ) \ - X(int_literal , SCC_TOK_SUBTYPE_LITERAL, SCC_TOK_INT_LITERAL ) \ - X(float_literal , SCC_TOK_SUBTYPE_LITERAL, SCC_TOK_FLOAT_LITERAL ) \ - X(char_literal , SCC_TOK_SUBTYPE_LITERAL, SCC_TOK_CHAR_LITERAL ) \ - X(string_literal , SCC_TOK_SUBTYPE_LITERAL, SCC_TOK_STRING_LITERAL ) \ + X(unknown , SCC_TOK_SUBTYPE_INVALID, SCC_TOK_UNKNOWN ) \ + X(EOF , SCC_TOK_SUBTYPE_EOF, SCC_TOK_EOF ) \ + X(blank , SCC_TOK_SUBTYPE_EMPTYSPACE, SCC_TOK_BLANK ) \ + X(endline , SCC_TOK_SUBTYPE_EMPTYSPACE, SCC_TOK_ENDLINE ) \ + X("#" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_SHARP ) \ + X("==" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_EQ ) \ + X("=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN ) \ + X("++" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ADD_ADD ) \ + X("+=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_ADD ) \ + X("+" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ADD ) \ + X("--" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_SUB_SUB ) \ + X("-=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_SUB ) \ + X("->" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_DEREF ) \ + X("-" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_SUB ) \ + X("*=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_MUL ) \ + X("*" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_MUL ) \ + X("/=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_DIV ) \ + X("/" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_DIV ) \ + X("//" , SCC_TOK_SUBTYPE_COMMENT , SCC_TOK_LINE_COMMENT ) \ + X("/* */" , SCC_TOK_SUBTYPE_COMMENT , SCC_TOK_BLOCK_COMMENT ) \ + X("%=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_MOD ) \ + X("%" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_MOD ) \ + X("&&" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_AND_AND ) \ + X("&=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_AND ) \ + X("&" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_AND ) \ + X("||" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_OR_OR ) \ + X("|=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_OR ) \ + X("|" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_OR ) \ + X("^=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_XOR ) \ + X("^" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_XOR ) \ + X("<<=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_L_SH ) \ + X("<<" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_L_SH ) \ + X("<=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_LE ) \ + X("<" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_LT ) \ + X(">>=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_R_SH ) \ + X(">>" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_R_SH ) \ + X(">=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_GE ) \ + X(">" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_GT ) \ + X("!" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_NOT ) \ + X("!=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_NEQ ) \ + X("~" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_BIT_NOT ) \ + X("[" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_L_BRACKET ) \ + X("]" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_R_BRACKET ) \ + X("(" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_L_PAREN ) \ + X(")" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_R_PAREN ) \ + X("{" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_L_BRACE ) \ + X("}" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_R_BRACE ) \ + X(";" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_SEMICOLON ) \ + X("," , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_COMMA ) \ + X(":" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_COLON ) \ + X("." , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_DOT ) \ + X("..." , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ELLIPSIS ) \ + X("?" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_COND ) \ + X(ident , SCC_TOK_SUBTYPE_IDENTIFIER, SCC_TOK_IDENT ) \ + X(int , SCC_TOK_SUBTYPE_LITERAL, SCC_TOK_INT_LITERAL ) \ + X(float , SCC_TOK_SUBTYPE_LITERAL, SCC_TOK_FLOAT_LITERAL ) \ + X(char , SCC_TOK_SUBTYPE_LITERAL, SCC_TOK_CHAR_LITERAL ) \ + X(string , SCC_TOK_SUBTYPE_LITERAL, SCC_TOK_STRING_LITERAL ) \ // END /* clang-format on */ @@ -145,7 +146,7 @@ const char *scc_get_tok_name(scc_tok_type_t type); typedef struct scc_lexer_token { scc_tok_type_t type; - scc_cvalue_t value; + scc_cstring_t lexeme; scc_pos_t loc; } scc_lexer_tok_t; diff --git a/libs/lexer/src/lexer.c b/libs/lexer/src/lexer.c index 4ef0771..14ee507 100644 --- a/libs/lexer/src/lexer.c +++ b/libs/lexer/src/lexer.c @@ -1,4 +1,3 @@ -#include #include #include @@ -13,442 +12,460 @@ static const struct { }; // by using binary search to find the keyword -static inline int keyword_cmp(const char *name, int len) { +static int keyword_cmp(const char *name, int len) { int low = 0; int high = sizeof(keywords) / sizeof(keywords[0]) - 1; while (low <= high) { int mid = (low + high) / 2; const char *key = keywords[mid].name; int cmp = 0; - - // 自定义字符串比较逻辑 for (int i = 0; i < len; i++) { if (name[i] != key[i]) { cmp = (unsigned char)name[i] - (unsigned char)key[i]; break; } if (name[i] == '\0') - break; // 遇到终止符提前结束 + break; } - if (cmp == 0) { - // 完全匹配检查(长度相同) if (key[len] == '\0') return mid; - cmp = -1; // 当前关键词比输入长 + cmp = -1; } - - if (cmp < 0) { + if (cmp < 0) high = mid - 1; - } else { + else low = mid + 1; - } } - return -1; // Not a keyword. + return -1; // 不是关键字 } -void scc_lexer_init(scc_lexer_t *lexer, scc_probe_stream_t *stream) { - lexer->stream = stream; - lexer->pos = scc_pos_create(); - // FIXME - lexer->pos.name = scc_cstring_copy(&stream->name); +void scc_lexer_init(scc_lexer_t *lexer, scc_sstream_ring_t *stream_ref) { + lexer->stream_ref = *stream_ref; + lexer->jump_macro = false; +} + +static inline cbool is_whitespace(int ch) { + return ch == ' ' || ch == '\t' || ch == '\v' || ch == '\f'; +} +static inline cbool is_newline(int ch) { return ch == '\n' || ch == '\r'; } +static inline cbool is_digit(int ch) { return ch >= '0' && ch <= '9'; } +static inline cbool is_alpha(int ch) { + return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'); +} +static inline cbool is_alnum(int ch) { return is_alpha(ch) || is_digit(ch); } +static inline cbool is_identifier_start(int ch) { + return is_alpha(ch) || ch == '_'; +} +static inline cbool is_identifier_part(int ch) { + return is_alnum(ch) || ch == '_'; +} +static inline cbool is_octal_digit(int ch) { return ch >= '0' && ch <= '7'; } +static inline cbool is_hex_digit(int ch) { + return is_digit(ch) || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F'); +} + +/* 从环形缓冲区预览一个字符(带EOF检测) */ +static inline cbool peek_char(scc_lexer_t *lexer, scc_sstream_char_t *out) { + cbool ok; + scc_ring_peek(lexer->stream_ref, *out, ok); + return ok; +} + +/* 从环形缓冲区消费一个字符,并将它追加到lexeme中 */ +static inline cbool next_char(scc_lexer_t *lexer, scc_cstring_t *lexeme, + scc_sstream_char_t *out) { + cbool ok; + scc_ring_next(lexer->stream_ref, *out, ok); + if (!ok) + return false; + scc_cstring_append_ch(lexeme, out->character); + return true; } #define set_err_token(token) ((token)->type = SCC_TOK_UNKNOWN) -static void parse_line(scc_lexer_t *lexer, scc_lexer_tok_t *token) { - token->loc = lexer->pos; - scc_probe_stream_t *stream = lexer->stream; - scc_probe_stream_reset(stream); - int ch = scc_probe_stream_next(stream); - - usize n; - scc_cstring_t str = scc_cstring_create(); - - if (ch == scc_stream_eof) { - LEX_WARN("Unexpected EOF at begin"); - goto ERR; - } else if (ch != '#') { - LEX_WARN("Unexpected character '%c' at begin", ch); - goto ERR; - } - - const char line[] = "line"; - - for (int i = 0; i < (int)sizeof(line); i++) { - ch = scc_probe_stream_consume(stream); - scc_pos_next(&lexer->pos); - if (ch != line[i]) { - LEX_WARN("Maroc does not support in lexer rather in preprocessor, " - "it will be ignored"); - goto SKIP_LINE; - } - } - - if (scc_lex_parse_number(stream, &lexer->pos, &n) == false) { - LEX_ERROR("Invalid line number"); - goto SKIP_LINE; - } - - if (scc_probe_stream_consume(stream) != ' ') { - scc_lex_parse_skip_line(stream, &lexer->pos); - token->loc.line = token->value.u; - } - - if (scc_probe_stream_next(stream) != '"') { - LEX_ERROR("Invalid `#` line"); - goto SKIP_LINE; - } - if (scc_lex_parse_string(stream, &lexer->pos, &str) == false) { - LEX_ERROR("Invalid filename"); - goto SKIP_LINE; - } - - scc_lex_parse_skip_line(stream, &lexer->pos); - scc_probe_stream_sync(stream); - token->loc.line = n; - // FIXME memory leak - token->loc.name = scc_cstring_copy(&str); - scc_cstring_free(&str); - return; -SKIP_LINE: - scc_lex_parse_skip_line(stream, &lexer->pos); - scc_probe_stream_sync(stream); -ERR: - set_err_token(token); - scc_cstring_free(&str); -} - -// /zh/c/language/operator_arithmetic.html void scc_lexer_get_token(scc_lexer_t *lexer, scc_lexer_tok_t *token) { - token->loc = lexer->pos; - token->type = SCC_TOK_UNKNOWN; - scc_probe_stream_t *stream = lexer->stream; + scc_sstream_char_t cur; + scc_cstring_t lex = scc_cstring_create(); // 临时lexeme - scc_probe_stream_reset(stream); - scc_tok_type_t type = SCC_TOK_UNKNOWN; - int ch = scc_probe_stream_next(stream); + // 尝试预览第一个字符 + if (!peek_char(lexer, &cur)) { + token->type = SCC_TOK_EOF; + token->loc = (scc_pos_t){0, 1, 1, 0}; // 默认位置 + token->lexeme = lex; // 空字符串 + return; + } + + // 记录起始位置 + scc_pos_t start_loc = cur.pos; + int ch = cur.character; // once step - switch (ch) { - case '=': - switch (scc_probe_stream_next(stream)) { - case '=': - type = SCC_TOK_EQ; - goto double_char; - default: - scc_probe_stream_reset(stream), type = SCC_TOK_ASSIGN; - break; + if (is_whitespace(ch)) { + // 空白符: 连续收集 + token->type = SCC_TOK_BLANK; + while (peek_char(lexer, &cur) && is_whitespace(cur.character)) { + next_char(lexer, &lex, &cur); } - break; - case '+': - switch (scc_probe_stream_next(stream)) { - case '+': - type = SCC_TOK_ADD_ADD; - goto double_char; - case '=': - type = SCC_TOK_ASSIGN_ADD; - goto double_char; - default: - scc_probe_stream_reset(stream), type = SCC_TOK_ADD; - break; - } - break; - case '-': - switch (scc_probe_stream_next(stream)) { - case '-': - type = SCC_TOK_SUB_SUB; - goto double_char; - case '=': - type = SCC_TOK_ASSIGN_SUB; - goto double_char; - case '>': - type = SCC_TOK_DEREF; - goto double_char; - default: - scc_probe_stream_reset(stream), type = SCC_TOK_SUB; - break; - } - break; - case '*': - switch (scc_probe_stream_next(stream)) { - case '=': - type = SCC_TOK_ASSIGN_MUL; - goto double_char; - default: - scc_probe_stream_reset(stream), type = SCC_TOK_MUL; - break; - } - break; - case '/': - switch (scc_probe_stream_next(stream)) { - case '=': - type = SCC_TOK_ASSIGN_DIV; - goto double_char; - case '/': - scc_probe_stream_reset(stream); - scc_lex_parse_skip_line(stream, &lexer->pos); - scc_probe_stream_sync(stream); - token->type = SCC_TOK_LINE_COMMENT; - goto END; - case '*': - scc_probe_stream_reset(stream); - scc_lex_parse_skip_block_comment(stream, &lexer->pos); - scc_probe_stream_sync(stream); - token->type = SCC_TOK_BLOCK_COMMENT; - goto END; - default: - scc_probe_stream_reset(stream), type = SCC_TOK_DIV; - break; - } - break; - case '%': - switch (scc_probe_stream_next(stream)) { - case '=': - type = SCC_TOK_ASSIGN_MOD; - goto double_char; - default: - scc_probe_stream_reset(stream), type = SCC_TOK_MOD; - break; - } - break; - case '&': - switch (scc_probe_stream_next(stream)) { - case '&': - type = SCC_TOK_AND_AND; - goto double_char; - case '=': - type = SCC_TOK_ASSIGN_AND; - goto double_char; - default: - scc_probe_stream_reset(stream), type = SCC_TOK_AND; - break; - } - break; - case '|': - switch (scc_probe_stream_next(stream)) { - case '|': - type = SCC_TOK_OR_OR; - goto double_char; - case '=': - type = SCC_TOK_ASSIGN_OR; - goto double_char; - default: - scc_probe_stream_reset(stream), type = SCC_TOK_OR; - break; - } - break; - case '^': - switch (scc_probe_stream_next(stream)) { - case '=': - type = SCC_TOK_ASSIGN_XOR; - goto double_char; - default: - scc_probe_stream_reset(stream), type = SCC_TOK_XOR; - break; - } - break; - case '<': - switch (scc_probe_stream_next(stream)) { - case '=': - type = SCC_TOK_LE; - goto double_char; - case '<': { - if (scc_probe_stream_next(stream) == '=') { - type = SCC_TOK_ASSIGN_L_SH; - goto triple_char; - } else { - type = SCC_TOK_L_SH; - goto double_char; - } - break; - } - default: - scc_probe_stream_reset(stream), type = SCC_TOK_LT; - break; - } - break; - case '>': - switch (scc_probe_stream_next(stream)) { - case '=': - type = SCC_TOK_GE; - goto double_char; - case '>': { - if (scc_probe_stream_next(stream) == '=') { - type = SCC_TOK_ASSIGN_R_SH; - goto triple_char; - } else { - type = SCC_TOK_R_SH; - goto double_char; - } - break; - } - default: - scc_probe_stream_reset(stream), type = SCC_TOK_GT; - break; - } - break; - case '~': - type = SCC_TOK_BIT_NOT; - break; - case '!': - switch (scc_probe_stream_next(stream)) { - case '=': - type = SCC_TOK_NEQ; - goto double_char; - default: - scc_probe_stream_reset(stream), type = SCC_TOK_NOT; - break; - } - break; - /* clang-format off */ - case '[': type = SCC_TOK_L_BRACKET; break; - case ']': type = SCC_TOK_R_BRACKET; break; - case '(': type = SCC_TOK_L_PAREN; break; - case ')': type = SCC_TOK_R_PAREN; break; - case '{': type = SCC_TOK_L_BRACE; break; - case '}': type = SCC_TOK_R_BRACE; break; - case ';': type = SCC_TOK_SEMICOLON; break; - case ',': type = SCC_TOK_COMMA; break; - case ':': type = SCC_TOK_COLON; break; - /* clang-format on */ - case '.': - if (scc_probe_stream_next(stream) == '.' && - scc_probe_stream_next(stream) == '.') { - type = SCC_TOK_ELLIPSIS; - goto triple_char; - } - type = SCC_TOK_DOT; - break; - case '?': - type = SCC_TOK_COND; - break; - case '\v': - case '\f': - case ' ': - case '\t': - type = SCC_TOK_BLANK; - break; - case '\r': - case '\n': - scc_probe_stream_back(stream); - scc_lex_parse_skip_endline(stream, &lexer->pos); - scc_probe_stream_sync(stream); + } else if (is_newline(ch)) { + // 换行符:处理 \r 或 \n,以及 \r\n 组合 token->type = SCC_TOK_ENDLINE; - goto END; - case '#': - parse_line(lexer, token); - token->type = SCC_TOK_SHARP; - goto END; - case '\0': - case scc_stream_eof: - // EOF - type = SCC_TOK_EOF; - break; - case '\'': { - token->loc = lexer->pos; + next_char(lexer, &lex, &cur); // 消费第一个字符 + if (ch == '\r') { + // 尝试消费后面的 \n + if (peek_char(lexer, &cur) && cur.character == '\n') { + next_char(lexer, &lex, &cur); + } + } + } else if (ch == '/') { + // 可能为注释或除号 + scc_sstream_char_t next = {0}; + next_char(lexer, &lex, &cur); // 消费 '/' + peek_char(lexer, &next); + if (next.character == '=') { + token->type = SCC_TOK_ASSIGN_DIV; + next_char(lexer, &lex, &cur); + } else if (next.character == '/') { + // 行注释 // + token->type = SCC_TOK_LINE_COMMENT; + next_char(lexer, &lex, &cur); // 消费 '/' + while (peek_char(lexer, &cur) && !is_newline(cur.character)) { + next_char(lexer, &lex, &cur); + scc_ring_consume(lexer->stream_ref); + } + // 注释结束,不包含换行符(换行符单独成token) + } else if (next.character == '*') { + // 块注释 /* + token->type = SCC_TOK_BLOCK_COMMENT; + next_char(lexer, &lex, &cur); // 消费 '*' + while (1) { + if (!next_char(lexer, &lex, &cur)) { + // 文件结束,注释未闭合 + LOG_ERROR("Unterminated block comment"); + break; + } + if (cur.character == '*' && peek_char(lexer, &next) && + next.character == '/') { + next_char(lexer, &lex, &cur); // 消费 '/' + break; + } + scc_ring_consume(lexer->stream_ref); + } + } else { + // 只是除号 / + token->type = SCC_TOK_DIV; + } + } else if (is_identifier_start(ch)) { + // 标识符或关键字 + token->type = SCC_TOK_IDENT; // 暂定 + while (peek_char(lexer, &cur) && is_identifier_part(cur.character)) { + next_char(lexer, &lex, &cur); + scc_ring_consume(lexer->stream_ref); + } + // 检查是否为关键字 + int idx = keyword_cmp(scc_cstring_as_cstr(&lex), scc_cstring_len(&lex)); + if (idx != -1) { + token->type = keywords[idx].tok; + } + } else if (is_digit(ch)) { + // 数字字面量(整数/浮点) + token->type = SCC_TOK_INT_LITERAL; // 先假定整数 + cbool maybe_float = false; + while (1) { + next_char(lexer, &lex, &cur); // 消费当前数字 + if (!peek_char(lexer, &cur)) + break; + ch = cur.character; + if (is_digit(ch) || (ch == '.' && !maybe_float)) { + if (ch == '.') + maybe_float = true; + continue; + } + if (ch == 'e' || ch == 'E' || ch == 'p' || ch == 'P') { + maybe_float = true; + // 后面可能跟符号或数字 + continue; + } + if (ch == 'x' || ch == 'X') { + // 十六进制前缀,需特殊处理 + // 这里简化:将整个序列作为整数(保留前缀) + continue; + } + break; + } + if (maybe_float) + token->type = SCC_TOK_FLOAT_LITERAL; + } else if (ch == '\'') { + // 字符字面量 token->type = SCC_TOK_CHAR_LITERAL; - scc_probe_stream_reset(stream); - int ch = scc_lex_parse_char(stream, &lexer->pos); - scc_probe_stream_sync(stream); - if (ch == scc_stream_eof) { - LEX_ERROR("Unexpected character literal"); - token->type = SCC_TOK_UNKNOWN; - } else { - token->value.ch = ch; + next_char(lexer, &lex, &cur); // 开头的 ' + while (1) { + if (!peek_char(lexer, &cur)) { + LOG_ERROR("Unterminated character literal"); + break; + } + if (cur.character == '\'') { + next_char(lexer, &lex, &cur); // 闭引号 + break; + } + if (cur.character == '\\') { + // 转义序列:原样保存反斜杠和下一个字符 + next_char(lexer, &lex, &cur); + if (!peek_char(lexer, &cur)) + break; + next_char(lexer, &lex, &cur); + } else { + next_char(lexer, &lex, &cur); + } } - goto END; - } - case '"': { - token->loc = lexer->pos; + } else if (ch == '"') { + // 字符串字面量 token->type = SCC_TOK_STRING_LITERAL; - scc_cstring_t output = scc_cstring_create(); - scc_probe_stream_reset(stream); - if (scc_lex_parse_string(stream, &lexer->pos, &output) == true) { - scc_probe_stream_sync(stream); - token->value.cstr.data = scc_cstring_as_cstr(&output); - token->value.cstr.len = scc_cstring_len(&output); - } else { - LEX_ERROR("Unexpected string literal"); + next_char(lexer, &lex, &cur); // 开头的 " + while (1) { + if (!peek_char(lexer, &cur)) { + LOG_ERROR("Unterminated string literal"); + break; + } + if (cur.character == '"') { + next_char(lexer, &lex, &cur); // 闭引号 + break; + } + if (cur.character == '\\') { + // 转义序列 + next_char(lexer, &lex, &cur); + if (!peek_char(lexer, &cur)) + break; + next_char(lexer, &lex, &cur); + } else { + next_char(lexer, &lex, &cur); + } + scc_ring_consume(lexer->stream_ref); + } + } else { + scc_sstream_char_t next = {0}; + next_char(lexer, &lex, &cur); + peek_char(lexer, &next); + switch (ch) { + case '=': + switch (next.character) { + case '=': + token->type = SCC_TOK_EQ; + next_char(lexer, &lex, &cur); + break; + default: + token->type = SCC_TOK_ASSIGN; + break; + } + break; + case '+': + switch (next.character) { + case '+': + token->type = SCC_TOK_ADD_ADD; + next_char(lexer, &lex, &cur); + break; + case '=': + token->type = SCC_TOK_ASSIGN_ADD; + next_char(lexer, &lex, &cur); + break; + default: + token->type = SCC_TOK_ADD; + break; + } + break; + case '-': + switch (next.character) { + case '-': + token->type = SCC_TOK_SUB_SUB; + next_char(lexer, &lex, &cur); + break; + case '=': + token->type = SCC_TOK_ASSIGN_SUB; + next_char(lexer, &lex, &cur); + break; + case '>': + token->type = SCC_TOK_DEREF; + next_char(lexer, &lex, &cur); + break; + default: + token->type = SCC_TOK_SUB; + break; + } + break; + case '*': + switch (next.character) { + case '=': + token->type = SCC_TOK_ASSIGN_MUL; + next_char(lexer, &lex, &cur); + break; + default: + token->type = SCC_TOK_MUL; + break; + } + break; + case '%': + switch (next.character) { + case '=': + token->type = SCC_TOK_ASSIGN_MOD; + next_char(lexer, &lex, &cur); + break; + default: + token->type = SCC_TOK_MOD; + break; + } + break; + case '&': + switch (next.character) { + case '&': + token->type = SCC_TOK_AND_AND; + next_char(lexer, &lex, &cur); + break; + case '=': + token->type = SCC_TOK_ASSIGN_AND; + next_char(lexer, &lex, &cur); + break; + default: + token->type = SCC_TOK_AND; + break; + } + break; + case '|': + switch (next.character) { + case '|': + token->type = SCC_TOK_OR_OR; + next_char(lexer, &lex, &cur); + break; + case '=': + token->type = SCC_TOK_ASSIGN_OR; + next_char(lexer, &lex, &cur); + break; + default: + token->type = SCC_TOK_OR; + break; + } + break; + case '^': + switch (next.character) { + case '=': + token->type = SCC_TOK_ASSIGN_XOR; + next_char(lexer, &lex, &cur); + break; + default: + token->type = SCC_TOK_XOR; + break; + } + break; + case '<': + switch (next.character) { + case '=': + token->type = SCC_TOK_LE; + next_char(lexer, &lex, &cur); + break; + case '<': { + next_char(lexer, &lex, &cur); + if (peek_char(lexer, &next) && next.character == '=') { + token->type = SCC_TOK_ASSIGN_L_SH; + next_char(lexer, &lex, &cur); + } else { + token->type = SCC_TOK_L_SH; + } + break; + } + default: + token->type = SCC_TOK_LT; + break; + } + break; + case '>': + switch (next.character) { + case '=': + token->type = SCC_TOK_GE; + next_char(lexer, &lex, &cur); + break; + case '>': { + next_char(lexer, &lex, &cur); + if (peek_char(lexer, &next) && next.character == '=') { + token->type = SCC_TOK_ASSIGN_R_SH; + next_char(lexer, &lex, &cur); + } else { + token->type = SCC_TOK_R_SH; + } + break; + } + default: + token->type = SCC_TOK_GT; + break; + } + break; + case '~': + token->type = SCC_TOK_BIT_NOT; + break; + case '!': + switch (next.character) { + case '=': + token->type = SCC_TOK_NEQ; + next_char(lexer, &lex, &cur); + break; + default: + token->type = SCC_TOK_NOT; + break; + } + break; + /* clang-format off */ + case '[': token->type = SCC_TOK_L_BRACKET; break; + case ']': token->type = SCC_TOK_R_BRACKET; break; + case '(': token->type = SCC_TOK_L_PAREN; break; + case ')': token->type = SCC_TOK_R_PAREN; break; + case '{': token->type = SCC_TOK_L_BRACE; break; + case '}': token->type = SCC_TOK_R_BRACE; break; + case ';': token->type = SCC_TOK_SEMICOLON; break; + case ',': token->type = SCC_TOK_COMMA; break; + case ':': token->type = SCC_TOK_COLON; break; + /* clang-format on */ + case '.': + if (next.character == '.' && peek_char(lexer, &next) && + next.character == '.') { + token->type = SCC_TOK_ELLIPSIS; + next_char(lexer, &lex, &cur); + next_char(lexer, &lex, &cur); + } else { + token->type = SCC_TOK_DOT; + } + break; + case '?': + token->type = SCC_TOK_COND; + break; + case '#': + token->type = SCC_TOK_SHARP; + break; + default: token->type = SCC_TOK_UNKNOWN; + LEX_ERROR("unsupport char in sourse code `%c`:0x%x", ch, ch); + break; } - - goto END; } - /* clang-format off */ - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - /* clang-format on */ - token->loc = lexer->pos; - token->type = SCC_TOK_INT_LITERAL; - usize output; - scc_probe_stream_reset(stream); - if (scc_lex_parse_number(stream, &lexer->pos, &output) == true) { - scc_probe_stream_sync(stream); - token->value.u = output; - } else { - LEX_ERROR("Unexpected number literal"); - token->type = SCC_TOK_UNKNOWN; - } - goto END; - /* clang-format off */ - case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': - case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': - case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': - case 'v': case 'w': case 'x': case 'y': case 'z': - case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': - case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': - case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': - case 'V': case 'W': case 'X': case 'Y': case 'Z': case '_': - /* clang-format on */ - scc_cstring_t str = scc_cstring_create(); - scc_probe_stream_reset(stream); - cbool ret = scc_lex_parse_identifier(stream, &lexer->pos, &str); - scc_probe_stream_sync(stream); - Assert(ret == true); - int res = keyword_cmp(scc_cstring_as_cstr(&str), scc_cstring_len(&str)); - if (res == -1) { - token->value.cstr.data = (char *)scc_cstring_as_cstr(&str); - token->value.cstr.len = scc_cstring_len(&str); - type = SCC_TOK_IDENT; - } else { - scc_cstring_free(&str); - type = keywords[res].tok; - } - token->type = type; - goto END; - default: - LEX_ERROR("unsupport char in sourse code `%c`:0x%x", ch, ch); - break; - } - goto once_char; -triple_char: - scc_probe_stream_consume(stream); - scc_pos_next(&lexer->pos); -double_char: - scc_probe_stream_consume(stream); - scc_pos_next(&lexer->pos); -once_char: - scc_probe_stream_consume(stream); - scc_pos_next(&lexer->pos); - token->type = type; -END: - LEX_DEBUG("get token `%s` in %s:%d:%d", scc_get_tok_name(token->type), - token->loc.name, token->loc.line, token->loc.column); + // 设置token + scc_ring_consume(lexer->stream_ref); + token->type = token->type; // 上面已设 + token->loc = start_loc; + token->lexeme = lex; // 转移所有权 + LEX_DEBUG("get token `%s` (%s) at %s:%d:%d", scc_get_tok_name(token->type), + scc_cstring_as_cstr(&token->lexeme), token->loc.name, + token->loc.line, token->loc.col); } // scc_lexer_get_token maybe got invalid (with parser) void scc_lexer_get_valid_token(scc_lexer_t *lexer, scc_lexer_tok_t *token) { - scc_tok_subtype_t type; + scc_tok_subtype_t subtype; do { scc_lexer_get_token(lexer, token); - type = scc_get_tok_subtype(token->type); - AssertFmt(type != SCC_TOK_SUBTYPE_INVALID, + subtype = scc_get_tok_subtype(token->type); + AssertFmt(subtype != SCC_TOK_SUBTYPE_INVALID, "Invalid token: `%s` at %s:%d:%d", scc_get_tok_name(token->type), token->loc.name, token->loc.line, token->loc.col); - Assert(type != SCC_TOK_SUBTYPE_INVALID); - } while (type == SCC_TOK_SUBTYPE_EMPTYSPACE || - type == SCC_TOK_SUBTYPE_COMMENT); + } while (subtype == SCC_TOK_SUBTYPE_EMPTYSPACE || + subtype == SCC_TOK_SUBTYPE_COMMENT); } diff --git a/libs/lexer/src/main.c b/libs/lexer/src/main.c new file mode 100644 index 0000000..73755cd --- /dev/null +++ b/libs/lexer/src/main.c @@ -0,0 +1,66 @@ +#include +#include +#include +#include +#include +#include +/// gcc -g ../lexer.c ../token.c test_lexer.c -o test_lexer +/* +tok_tConstant { + int have; + union { + char ch; + int i; + float f; + double d; + long long ll; + char* str; + }; +}; +*/ + +int g_num; +int g_num_arr[3]; +int main(int argc, char *argv[]) { + // int num = 0; + if (argc == 3 && strcmp(argv[2], "--debug") == 0) { + log_set_level(NULL, LOG_LEVEL_ALL); + } else { + // FIXME it is a hack lexer_logger + log_set_level(&__scc_lexer_log, LOG_LEVEL_NOTSET); + log_set_level(NULL, LOG_LEVEL_INFO | LOG_LEVEL_WARN | LOG_LEVEL_ERROR | + LOG_LEVEL_FATAL); + } + + const char *file_name = __FILE__; + if (argc == 2) { + file_name = argv[1]; + } + + scc_lexer_t lexer; + scc_sstream_t stream; + scc_sstream_init(&stream, file_name, 16); + scc_sstream_ring_t *ref = scc_sstream_ref_ring(&stream); + scc_lexer_init(&lexer, ref); + scc_lexer_tok_t token; + + while (1) { + scc_lexer_get_valid_token(&lexer, &token); + if (token.type == SCC_TOK_EOF) { + break; + } + LOG_DEBUG("get token [%-8s] `%s` at %s:%d:%d", + scc_get_tok_name(token.type), + scc_cstring_as_cstr(&token.lexeme), token.loc.name, + token.loc.line, token.loc.col); + // LOG_DEBUG("%s", token.val.str); + // printf("line: %d, column: %d, type: %3d, typename: %s\n", + // lexer.line, lexer.index, token.type, + // scc_get_tok_name(token.type)); + } + scc_sstream_drop_ring(ref); + scc_sstream_drop(&stream); + + LOG_INFO("Lexer is Ok..."); + return 0; +} diff --git a/libs/lexer/tests/test_lexer.c b/libs/lexer/tests/test_lexer.c new file mode 100644 index 0000000..2cdd5f3 --- /dev/null +++ b/libs/lexer/tests/test_lexer.c @@ -0,0 +1,403 @@ +// test_lexer.c +#include +#include +#include + +// 辅助函数:释放 token 的 lexeme +static void free_token(scc_lexer_tok_t *tok) { scc_cstring_free(&tok->lexeme); } + +// 单 token 测试宏(检查类型) +#define TEST_TOKEN(input, expected_type) \ + do { \ + scc_lexer_t lexer; \ + scc_lexer_tok_t token; \ + scc_sstream_t stream; \ + scc_sstream_init_by_buffer(&stream, input, strlen(input), 0, 16); \ + scc_sstream_ring_t *ref = scc_sstream_ref_ring(&stream); \ + scc_lexer_init(&lexer, ref); \ + scc_lexer_get_token(&lexer, &token); \ + \ + TEST_CHECK(token.type == expected_type); \ + TEST_MSG("Input: '%s'", input); \ + TEST_MSG("Expected: %s", scc_get_tok_name(expected_type)); \ + TEST_MSG("Got: %s", scc_get_tok_name(token.type)); \ + \ + free_token(&token); \ + scc_sstream_drop_ring(ref); \ + scc_sstream_drop(&stream); \ + } while (0) + +// 多 token 序列测试宏(接受类型数组) +#define TEST_SEQUENCE(input, ...) \ + do { \ + scc_lexer_t lexer; \ + scc_lexer_tok_t token; \ + scc_sstream_t stream; \ + scc_sstream_init_by_buffer(&stream, input, strlen(input), 0, 16); \ + scc_sstream_ring_t *ref = scc_sstream_ref_ring(&stream); \ + scc_lexer_init(&lexer, ref); \ + \ + scc_tok_type_t expected[] = {__VA_ARGS__}; \ + size_t count = sizeof(expected) / sizeof(expected[0]); \ + for (size_t i = 0; i < count; i++) { \ + scc_lexer_get_token(&lexer, &token); \ + TEST_CHECK(token.type == expected[i]); \ + TEST_MSG("Token %zu: input '%s'", i, input); \ + TEST_MSG("Expected: %s", scc_get_tok_name(expected[i])); \ + TEST_MSG("Got: %s", scc_get_tok_name(token.type)); \ + free_token(&token); \ + } \ + \ + scc_sstream_drop_ring(ref); \ + scc_sstream_drop(&stream); \ + } while (0) + +// ============================ 测试用例 ============================ + +void test_operators() { + TEST_CASE("Arithmetic operators"); + TEST_TOKEN("+", SCC_TOK_ADD); + TEST_TOKEN("++", SCC_TOK_ADD_ADD); + TEST_TOKEN("+=", SCC_TOK_ASSIGN_ADD); + TEST_TOKEN("-", SCC_TOK_SUB); + TEST_TOKEN("--", SCC_TOK_SUB_SUB); + TEST_TOKEN("-=", SCC_TOK_ASSIGN_SUB); + TEST_TOKEN("*", SCC_TOK_MUL); + TEST_TOKEN("*=", SCC_TOK_ASSIGN_MUL); + TEST_TOKEN("/", SCC_TOK_DIV); + TEST_TOKEN("/=", SCC_TOK_ASSIGN_DIV); + TEST_TOKEN("%", SCC_TOK_MOD); + TEST_TOKEN("%=", SCC_TOK_ASSIGN_MOD); + + TEST_CASE("Bitwise operators"); + TEST_TOKEN("&", SCC_TOK_AND); + TEST_TOKEN("&&", SCC_TOK_AND_AND); + TEST_TOKEN("&=", SCC_TOK_ASSIGN_AND); + TEST_TOKEN("|", SCC_TOK_OR); + TEST_TOKEN("||", SCC_TOK_OR_OR); + TEST_TOKEN("|=", SCC_TOK_ASSIGN_OR); + TEST_TOKEN("^", SCC_TOK_XOR); + TEST_TOKEN("^=", SCC_TOK_ASSIGN_XOR); + TEST_TOKEN("~", SCC_TOK_BIT_NOT); + TEST_TOKEN("<<", SCC_TOK_L_SH); + TEST_TOKEN("<<=", SCC_TOK_ASSIGN_L_SH); + TEST_TOKEN(">>", SCC_TOK_R_SH); + TEST_TOKEN(">>=", SCC_TOK_ASSIGN_R_SH); + + TEST_CASE("Comparison operators"); + TEST_TOKEN("==", SCC_TOK_EQ); + TEST_TOKEN("!=", SCC_TOK_NEQ); + TEST_TOKEN("<", SCC_TOK_LT); + TEST_TOKEN("<=", SCC_TOK_LE); + TEST_TOKEN(">", SCC_TOK_GT); + TEST_TOKEN(">=", SCC_TOK_GE); + + TEST_CASE("Special symbols"); + TEST_TOKEN("(", SCC_TOK_L_PAREN); + TEST_TOKEN(")", SCC_TOK_R_PAREN); + TEST_TOKEN("[", SCC_TOK_L_BRACKET); + TEST_TOKEN("]", SCC_TOK_R_BRACKET); + TEST_TOKEN("{", SCC_TOK_L_BRACE); + TEST_TOKEN("}", SCC_TOK_R_BRACE); + TEST_TOKEN(";", SCC_TOK_SEMICOLON); + TEST_TOKEN(",", SCC_TOK_COMMA); + TEST_TOKEN(":", SCC_TOK_COLON); + TEST_TOKEN(".", SCC_TOK_DOT); + TEST_TOKEN("...", SCC_TOK_ELLIPSIS); + TEST_TOKEN("->", SCC_TOK_DEREF); + TEST_TOKEN("?", SCC_TOK_COND); +} + +void test_keywords() { + TEST_CASE("C89 keywords"); + TEST_TOKEN("while", SCC_TOK_WHILE); + TEST_TOKEN("sizeof", SCC_TOK_SIZEOF); + TEST_TOKEN("if", SCC_TOK_IF); + TEST_TOKEN("else", SCC_TOK_ELSE); + TEST_TOKEN("for", SCC_TOK_FOR); + TEST_TOKEN("do", SCC_TOK_DO); + TEST_TOKEN("switch", SCC_TOK_SWITCH); + TEST_TOKEN("case", SCC_TOK_CASE); + TEST_TOKEN("default", SCC_TOK_DEFAULT); + TEST_TOKEN("break", SCC_TOK_BREAK); + TEST_TOKEN("continue", SCC_TOK_CONTINUE); + TEST_TOKEN("return", SCC_TOK_RETURN); + TEST_TOKEN("goto", SCC_TOK_GOTO); + TEST_TOKEN("auto", SCC_TOK_AUTO); + TEST_TOKEN("register", SCC_TOK_REGISTER); + TEST_TOKEN("static", SCC_TOK_STATIC); + TEST_TOKEN("extern", SCC_TOK_EXTERN); + TEST_TOKEN("typedef", SCC_TOK_TYPEDEF); + TEST_TOKEN("const", SCC_TOK_CONST); + TEST_TOKEN("volatile", SCC_TOK_VOLATILE); + TEST_TOKEN("signed", SCC_TOK_SIGNED); + TEST_TOKEN("unsigned", SCC_TOK_UNSIGNED); + TEST_TOKEN("short", SCC_TOK_SHORT); + TEST_TOKEN("long", SCC_TOK_LONG); + TEST_TOKEN("int", SCC_TOK_INT); + TEST_TOKEN("char", SCC_TOK_CHAR); + TEST_TOKEN("float", SCC_TOK_FLOAT); + TEST_TOKEN("double", SCC_TOK_DOUBLE); + TEST_TOKEN("void", SCC_TOK_VOID); + TEST_TOKEN("struct", SCC_TOK_STRUCT); + TEST_TOKEN("union", SCC_TOK_UNION); + TEST_TOKEN("enum", SCC_TOK_ENUM); + + TEST_CASE("C99 keywords"); + TEST_TOKEN("inline", SCC_TOK_INLINE); + TEST_TOKEN("restrict", SCC_TOK_RESTRICT); + // _Bool, _Complex, _Imaginary 可根据需要添加 + + TEST_CASE("SCC extensions (if enabled)"); + TEST_TOKEN("asm", SCC_TOK_ASM); + TEST_TOKEN("atomic", SCC_TOK_ATOMIC); + TEST_TOKEN("bool", SCC_TOK_BOOL); + TEST_TOKEN("complex", SCC_TOK_COMPLEX); +} + +void test_literals() { + TEST_CASE("Integer literals - decimal"); + TEST_TOKEN("0", SCC_TOK_INT_LITERAL); + TEST_TOKEN("123", SCC_TOK_INT_LITERAL); + TEST_TOKEN("2147483647", SCC_TOK_INT_LITERAL); + TEST_TOKEN("4294967295", SCC_TOK_INT_LITERAL); + + TEST_CASE("Integer literals - hexadecimal"); + TEST_TOKEN("0x0", SCC_TOK_INT_LITERAL); + TEST_TOKEN("0x1A3F", SCC_TOK_INT_LITERAL); + TEST_TOKEN("0XABCDEF", SCC_TOK_INT_LITERAL); + TEST_TOKEN("0x123abc", SCC_TOK_INT_LITERAL); + TEST_TOKEN("0XFF", SCC_TOK_INT_LITERAL); + + TEST_CASE("Integer literals - octal"); + TEST_TOKEN("0123", SCC_TOK_INT_LITERAL); + TEST_TOKEN("0777", SCC_TOK_INT_LITERAL); + TEST_TOKEN("0", SCC_TOK_INT_LITERAL); // 0 既是十进制也是八进制 + + TEST_CASE("Integer literals - binary (C23 extension)"); + TEST_TOKEN("0b1010", SCC_TOK_INT_LITERAL); + TEST_TOKEN("0B1100", SCC_TOK_INT_LITERAL); + TEST_TOKEN("0b0", SCC_TOK_INT_LITERAL); + + TEST_CASE("Integer literals with suffixes"); + TEST_TOKEN("123U", SCC_TOK_INT_LITERAL); + TEST_TOKEN("456L", SCC_TOK_INT_LITERAL); + TEST_TOKEN("789UL", SCC_TOK_INT_LITERAL); + TEST_TOKEN("0x1FFLL", SCC_TOK_INT_LITERAL); + TEST_TOKEN("0b1010ULL", SCC_TOK_INT_LITERAL); + + TEST_CASE("Floating literals - decimal"); + TEST_TOKEN("0.0", SCC_TOK_FLOAT_LITERAL); + TEST_TOKEN("3.14", SCC_TOK_FLOAT_LITERAL); + TEST_TOKEN(".5", SCC_TOK_FLOAT_LITERAL); + TEST_TOKEN("0.", SCC_TOK_FLOAT_LITERAL); + + TEST_CASE("Floating literals - scientific"); + TEST_TOKEN("1e10", SCC_TOK_FLOAT_LITERAL); + TEST_TOKEN("1E-5", SCC_TOK_FLOAT_LITERAL); + TEST_TOKEN("2.5e+3", SCC_TOK_FLOAT_LITERAL); + TEST_TOKEN(".1e2", SCC_TOK_FLOAT_LITERAL); + TEST_TOKEN("1.e3", SCC_TOK_FLOAT_LITERAL); + TEST_TOKEN("123.456e-7", SCC_TOK_FLOAT_LITERAL); + + TEST_CASE("Floating literals - hexadecimal (C99)"); + TEST_TOKEN("0x1.2p3", SCC_TOK_FLOAT_LITERAL); + TEST_TOKEN("0x1p-2", SCC_TOK_FLOAT_LITERAL); + TEST_TOKEN("0x0.1p10", SCC_TOK_FLOAT_LITERAL); + TEST_TOKEN("0X1.2P3", SCC_TOK_FLOAT_LITERAL); + + TEST_CASE("Floating literals with suffixes"); + TEST_TOKEN("1.0f", SCC_TOK_FLOAT_LITERAL); + TEST_TOKEN("2.0F", SCC_TOK_FLOAT_LITERAL); + TEST_TOKEN("3.0l", SCC_TOK_FLOAT_LITERAL); + TEST_TOKEN("4.0L", SCC_TOK_FLOAT_LITERAL); + + TEST_CASE("Character literals - simple"); + TEST_TOKEN("'a'", SCC_TOK_CHAR_LITERAL); + TEST_TOKEN("'0'", SCC_TOK_CHAR_LITERAL); + TEST_TOKEN("' '", SCC_TOK_CHAR_LITERAL); + TEST_TOKEN("'\t'", SCC_TOK_CHAR_LITERAL); // 制表符在单引号内 + + TEST_CASE("Character literals - escape sequences"); + TEST_TOKEN("'\\n'", SCC_TOK_CHAR_LITERAL); + TEST_TOKEN("'\\t'", SCC_TOK_CHAR_LITERAL); + TEST_TOKEN("'\\\\'", SCC_TOK_CHAR_LITERAL); + TEST_TOKEN("'\\''", SCC_TOK_CHAR_LITERAL); + TEST_TOKEN("'\\\"'", SCC_TOK_CHAR_LITERAL); + TEST_TOKEN("'\\?'", SCC_TOK_CHAR_LITERAL); + TEST_TOKEN("'\\0'", SCC_TOK_CHAR_LITERAL); + TEST_TOKEN("'\\123'", SCC_TOK_CHAR_LITERAL); // 八进制 + TEST_TOKEN("'\\xAB'", SCC_TOK_CHAR_LITERAL); // 十六进制 + + TEST_CASE("Character literals - multi-byte (implementation defined)"); + TEST_TOKEN("'ab'", SCC_TOK_CHAR_LITERAL); + TEST_TOKEN("'\\x41\\x42'", SCC_TOK_CHAR_LITERAL); // 多个转义 + + TEST_CASE("String literals - basic"); + TEST_TOKEN("\"hello\"", SCC_TOK_STRING_LITERAL); + TEST_TOKEN("\"\"", SCC_TOK_STRING_LITERAL); + TEST_TOKEN("\"a b c\"", SCC_TOK_STRING_LITERAL); + + TEST_CASE("String literals - escape sequences"); + TEST_TOKEN("\"a\\nb\\tc\"", SCC_TOK_STRING_LITERAL); + TEST_TOKEN("\"\\\\ \\\" \\' \\?\"", SCC_TOK_STRING_LITERAL); + TEST_TOKEN("\"\\123\\xAB\"", SCC_TOK_STRING_LITERAL); + + TEST_CASE("String literals - wide and UTF-8 prefixes (C11)"); + TEST_TOKEN("L\"wide\"", SCC_TOK_STRING_LITERAL); + TEST_TOKEN("u\"utf16\"", SCC_TOK_STRING_LITERAL); + TEST_TOKEN("U\"utf32\"", SCC_TOK_STRING_LITERAL); + TEST_TOKEN("u8\"utf8\"", SCC_TOK_STRING_LITERAL); +} + +void test_whitespace() { + TEST_CASE("Whitespace characters"); + TEST_TOKEN(" ", SCC_TOK_BLANK); + TEST_TOKEN("\t", SCC_TOK_BLANK); + TEST_TOKEN("\v", SCC_TOK_BLANK); + TEST_TOKEN("\f", SCC_TOK_BLANK); + TEST_TOKEN(" \t\v\f", SCC_TOK_BLANK); // 连续空白应为一个 token +} + +void test_newlines() { + TEST_CASE("Newline characters"); + TEST_TOKEN("\n", SCC_TOK_ENDLINE); + TEST_TOKEN("\r", SCC_TOK_ENDLINE); + TEST_TOKEN("\r\n", SCC_TOK_ENDLINE); // 应视为单个换行符 +} + +void test_comments() { + TEST_CASE("Line comments"); + TEST_TOKEN("// single line comment", SCC_TOK_LINE_COMMENT); + TEST_TOKEN("// comment with // inside", SCC_TOK_LINE_COMMENT); + TEST_TOKEN("// comment at end", SCC_TOK_LINE_COMMENT); + + TEST_CASE("Block comments"); + TEST_TOKEN("/* simple */", SCC_TOK_BLOCK_COMMENT); + TEST_TOKEN("/* multi\nline */", SCC_TOK_BLOCK_COMMENT); + TEST_TOKEN("/**/", SCC_TOK_BLOCK_COMMENT); // 空注释 + TEST_TOKEN("/* with * inside */", SCC_TOK_BLOCK_COMMENT); + TEST_TOKEN("/* nested /* not allowed in C */", + SCC_TOK_BLOCK_COMMENT); // 词法上不会嵌套 +} + +void test_identifiers() { + TEST_CASE("Valid identifiers"); + TEST_TOKEN("foo", SCC_TOK_IDENT); + TEST_TOKEN("_foo", SCC_TOK_IDENT); + TEST_TOKEN("foo123", SCC_TOK_IDENT); + TEST_TOKEN("foo_bar", SCC_TOK_IDENT); + TEST_TOKEN("FOO", SCC_TOK_IDENT); + TEST_TOKEN("_", SCC_TOK_IDENT); + TEST_TOKEN("__LINE__", SCC_TOK_IDENT); // 预处理宏名也是标识符 + + // 超长标识符(假设缓冲区足够) + char long_id[1024]; + memset(long_id, 'a', sizeof(long_id) - 1); + long_id[sizeof(long_id) - 1] = '\0'; + TEST_TOKEN(long_id, SCC_TOK_IDENT); +} + +void test_preprocessor() { + TEST_CASE("Preprocessor directives - just the # token"); + TEST_TOKEN("#", SCC_TOK_SHARP); + TEST_TOKEN("##", SCC_TOK_SHARP); // 第一个 # 是 token,第二个 # 将是下一个 + // token(在序列测试中验证) + + // 多 token 序列测试 #include 等 + TEST_SEQUENCE("#include ", SCC_TOK_SHARP, SCC_TOK_IDENT, + SCC_TOK_BLANK, SCC_TOK_LT, SCC_TOK_IDENT, SCC_TOK_DOT, + SCC_TOK_IDENT, SCC_TOK_GT); + TEST_SEQUENCE("#define FOO 123", SCC_TOK_SHARP, SCC_TOK_IDENT, + SCC_TOK_BLANK, SCC_TOK_IDENT, SCC_TOK_BLANK, + SCC_TOK_INT_LITERAL); +} + +void test_edge_cases() { + TEST_CASE("Invalid characters"); + TEST_TOKEN("@", SCC_TOK_UNKNOWN); + TEST_TOKEN("`", SCC_TOK_UNKNOWN); + TEST_TOKEN("$", SCC_TOK_UNKNOWN); // 在 C 中不是标识符字符 + + TEST_CASE("Empty input"); + TEST_TOKEN("", SCC_TOK_EOF); // 立即 EOF + + TEST_CASE("Only whitespace"); + TEST_TOKEN(" \t", SCC_TOK_BLANK); + // 之后应该为 EOF,但我们的单 token 测试只取第一个 token + + TEST_CASE("Numbers followed by letters (no suffix)"); + // 词法上应拆分为数字和标识符 + TEST_SEQUENCE("123abc", SCC_TOK_INT_LITERAL, SCC_TOK_IDENT); + TEST_SEQUENCE("0x123xyz", SCC_TOK_INT_LITERAL, SCC_TOK_IDENT); +} + +void test_sequences() { + TEST_CASE("Simple expression"); + TEST_SEQUENCE("a + b * c", SCC_TOK_IDENT, SCC_TOK_BLANK, SCC_TOK_ADD, + SCC_TOK_BLANK, SCC_TOK_IDENT, SCC_TOK_BLANK, SCC_TOK_MUL, + SCC_TOK_BLANK, SCC_TOK_IDENT); + + TEST_CASE("Function call"); + TEST_SEQUENCE("func(1, 2);", SCC_TOK_IDENT, SCC_TOK_L_PAREN, + SCC_TOK_INT_LITERAL, SCC_TOK_COMMA, SCC_TOK_BLANK, + SCC_TOK_INT_LITERAL, SCC_TOK_R_PAREN, SCC_TOK_SEMICOLON); + + TEST_CASE("Multi-character operators"); + TEST_SEQUENCE(">>=", SCC_TOK_ASSIGN_R_SH); + TEST_SEQUENCE("<<=", SCC_TOK_ASSIGN_L_SH); + TEST_SEQUENCE("...", SCC_TOK_ELLIPSIS); + TEST_SEQUENCE("->", SCC_TOK_DEREF); + TEST_SEQUENCE("##", SCC_TOK_SHARP, SCC_TOK_SHARP); // 两个预处理记号 + + TEST_CASE("Comments and whitespace interleaved"); + TEST_SEQUENCE("/* comment */ a // line comment\n b", SCC_TOK_BLOCK_COMMENT, + SCC_TOK_BLANK, SCC_TOK_IDENT, SCC_TOK_BLANK, + SCC_TOK_LINE_COMMENT, SCC_TOK_ENDLINE, SCC_TOK_BLANK, + SCC_TOK_IDENT); + + TEST_CASE("String literals with escapes"); + TEST_SEQUENCE("\"hello\\nworld\"", SCC_TOK_STRING_LITERAL); + TEST_SEQUENCE( + "L\"wide\"", + SCC_TOK_STRING_LITERAL); // 前缀作为标识符?不,整个是字符串字面量 + + TEST_CASE("Character literals with escapes"); + TEST_SEQUENCE("'\\x41'", SCC_TOK_CHAR_LITERAL); + TEST_SEQUENCE("'\\123'", SCC_TOK_CHAR_LITERAL); +} + +void test_error_recovery() { + // 测试未闭合的字符字面量:词法分析器可能继续直到遇到换行或 EOF + // 这里假设它会产生一个 SCC_TOK_CHAR_LITERAL 但包含到结束 + // 但标准 C 中未闭合是错误,我们可能返回 UNKNOWN + TEST_CASE("Unterminated character literal"); + TEST_TOKEN("'a", SCC_TOK_UNKNOWN); // 取决于实现,可能为 CHAR_LITERAL + // 更可靠的测试:序列中下一个 token 是什么 + TEST_SEQUENCE("'a b", SCC_TOK_UNKNOWN, + SCC_TOK_IDENT); // 假设第一个 token 是错误 + + TEST_CASE("Unterminated string literal"); + TEST_TOKEN("\"hello", SCC_TOK_UNKNOWN); // 同样 + + TEST_CASE("Unterminated block comment"); + TEST_SEQUENCE("/* comment", + SCC_TOK_BLOCK_COMMENT); // 直到 EOF,可能仍为注释 +} + +// ============================ 主测试列表 ============================ + +TEST_LIST = { + {"operators", test_operators}, + {"keywords", test_keywords}, + {"literals", test_literals}, + {"whitespace", test_whitespace}, + {"newlines", test_newlines}, + {"comments", test_comments}, + {"identifiers", test_identifiers}, + {"preprocessor", test_preprocessor}, + {"edge_cases", test_edge_cases}, + {"sequences", test_sequences}, + {"error_recovery", test_error_recovery}, + {NULL, NULL}, +}; \ No newline at end of file diff --git a/libs/lexer/tests/test_parse.c b/libs/lexer/tests/test_parse.c deleted file mode 100644 index 3d11c83..0000000 --- a/libs/lexer/tests/test_parse.c +++ /dev/null @@ -1,170 +0,0 @@ -// test_lexer.c -#include -#include -#include - -// 测试辅助函数 -static inline void test_lexer_string(const char *input, - scc_tok_type_t expected_type) { - scc_lexer_t lexer; - scc_lexer_tok_t token; - scc_mem_probe_stream_t stream; - - scc_lexer_init(&lexer, scc_mem_probe_stream_init(&stream, input, - strlen(input), false)); - scc_lexer_get_token(&lexer, &token); - - TEST_CHECK(token.type == expected_type); - TEST_MSG("Expected: %s", scc_get_tok_name(expected_type)); - TEST_MSG("Got: %s", scc_get_tok_name(token.type)); -} - -// 基础运算符测试 -void test_operators() { - TEST_CASE("Arithmetic operators"); - { - test_lexer_string("+", SCC_TOK_ADD); - test_lexer_string("++", SCC_TOK_ADD_ADD); - test_lexer_string("+=", SCC_TOK_ASSIGN_ADD); - test_lexer_string("-", SCC_TOK_SUB); - test_lexer_string("--", SCC_TOK_SUB_SUB); - test_lexer_string("-=", SCC_TOK_ASSIGN_SUB); - test_lexer_string("*", SCC_TOK_MUL); - test_lexer_string("*=", SCC_TOK_ASSIGN_MUL); - test_lexer_string("/", SCC_TOK_DIV); - test_lexer_string("/=", SCC_TOK_ASSIGN_DIV); - test_lexer_string("%", SCC_TOK_MOD); - test_lexer_string("%=", SCC_TOK_ASSIGN_MOD); - } - - TEST_CASE("Bitwise operators"); - { - test_lexer_string("&", SCC_TOK_AND); - test_lexer_string("&&", SCC_TOK_AND_AND); - test_lexer_string("&=", SCC_TOK_ASSIGN_AND); - test_lexer_string("|", SCC_TOK_OR); - test_lexer_string("||", SCC_TOK_OR_OR); - test_lexer_string("|=", SCC_TOK_ASSIGN_OR); - test_lexer_string("^", SCC_TOK_XOR); - test_lexer_string("^=", SCC_TOK_ASSIGN_XOR); - test_lexer_string("~", SCC_TOK_BIT_NOT); - test_lexer_string("<<", SCC_TOK_L_SH); - test_lexer_string("<<=", SCC_TOK_ASSIGN_L_SH); - test_lexer_string(">>", SCC_TOK_R_SH); - test_lexer_string(">>=", SCC_TOK_ASSIGN_R_SH); - } - - TEST_CASE("Comparison operators"); - { - test_lexer_string("==", SCC_TOK_EQ); - test_lexer_string("!=", SCC_TOK_NEQ); - test_lexer_string("<", SCC_TOK_LT); - test_lexer_string("<=", SCC_TOK_LE); - test_lexer_string(">", SCC_TOK_GT); - test_lexer_string(">=", SCC_TOK_GE); - } - - TEST_CASE("Special symbols"); - { - test_lexer_string("(", SCC_TOK_L_PAREN); - test_lexer_string(")", SCC_TOK_R_PAREN); - test_lexer_string("[", SCC_TOK_L_BRACKET); - test_lexer_string("]", SCC_TOK_R_BRACKET); - test_lexer_string("{", SCC_TOK_L_BRACE); - test_lexer_string("}", SCC_TOK_R_BRACE); - test_lexer_string(";", SCC_TOK_SEMICOLON); - test_lexer_string(",", SCC_TOK_COMMA); - test_lexer_string(":", SCC_TOK_COLON); - test_lexer_string(".", SCC_TOK_DOT); - test_lexer_string("...", SCC_TOK_ELLIPSIS); - test_lexer_string("->", SCC_TOK_DEREF); - test_lexer_string("?", SCC_TOK_COND); - } -} - -// 关键字测试 -void test_keywords() { - TEST_CASE("C89 keywords"); - test_lexer_string("while", SCC_TOK_WHILE); - test_lexer_string("sizeof", SCC_TOK_SIZEOF); - - TEST_CASE("C99 keywords"); - test_lexer_string("restrict", SCC_TOK_RESTRICT); - // test_lexer_string("_Bool", SCC_TOK_INT); // 需确认你的类型定义 -} - -// 字面量测试 -void test_literals() { - TEST_CASE("Integer literals"); - { - // 十进制 - test_lexer_string("0", SCC_TOK_INT_LITERAL); - test_lexer_string("123", SCC_TOK_INT_LITERAL); - test_lexer_string("2147483647", SCC_TOK_INT_LITERAL); - - // 十六进制 - test_lexer_string("0x0", SCC_TOK_INT_LITERAL); - test_lexer_string("0x1A3F", SCC_TOK_INT_LITERAL); - test_lexer_string("0XABCDEF", SCC_TOK_INT_LITERAL); - - // 八进制 - test_lexer_string("0123", SCC_TOK_INT_LITERAL); - test_lexer_string("0777", SCC_TOK_INT_LITERAL); - - // 边界值测试 - test_lexer_string("2147483647", SCC_TOK_INT_LITERAL); // INT_MAX - test_lexer_string("4294967295", SCC_TOK_INT_LITERAL); // UINT_MAX - } - - TEST_CASE("Character literals"); - { - test_lexer_string("'a'", SCC_TOK_CHAR_LITERAL); - test_lexer_string("'\\n'", SCC_TOK_CHAR_LITERAL); - test_lexer_string("'\\t'", SCC_TOK_CHAR_LITERAL); - test_lexer_string("'\\\\'", SCC_TOK_CHAR_LITERAL); - test_lexer_string("'\\0'", SCC_TOK_CHAR_LITERAL); - } - - TEST_CASE("String literals"); - { - test_lexer_string("\"hello\"", SCC_TOK_STRING_LITERAL); - test_lexer_string("\"multi-line\\nstring\"", SCC_TOK_STRING_LITERAL); - test_lexer_string("\"escape\\\"quote\"", SCC_TOK_STRING_LITERAL); - } - - // TEST_CASE("Floating literals"); - // test_lexer_string("3.14e-5", SCC_TOK_FLOAT_LITERAL); -} - -// 边界测试 -void test_edge_cases() { - // TEST_CASE("Long identifiers"); - // char long_id[LEXER_MAX_ SCC_TOK_SIZE+2] = {0}; - // memset(long_id, 'a', LEXER_MAX_ SCC_TOK_SIZE+1); - // test_lexer_string(long_id, SCC_TOK_IDENT); - - // TEST_CASE("Buffer boundary"); - // char boundary[LEXER_BUFFER_SIZE*2] = {0}; - // memset(boundary, '+', LEXER_BUFFER_SIZE*2-1); - // test_lexer_string(boundary, SCC_TOK_ADD); -} - -// 错误处理测试 -// void test_error_handling() { -// TEST_CASE("Invalid characters"); -// cc_lexer_t lexer; -// tok_t token; - -// init_lexer(&lexer, "test.c", NULL, test_read); -// get_valid_token(&lexer, &token); - -// TEST_CHECK(token.type == SCC_TOK_EOF); // 应触发错误处理 -// } - -// 测试列表 -TEST_LIST = {{"operators", test_operators}, - {"keywords", test_keywords}, - {"literals", test_literals}, - {"edge_cases", test_edge_cases}, - // {"error_handling", test_error_handling}, - {NULL, NULL}}; diff --git a/libs/lexer/tests/test_run.c b/libs/lexer/tests/test_run.c deleted file mode 100644 index a73b8e1..0000000 --- a/libs/lexer/tests/test_run.c +++ /dev/null @@ -1,93 +0,0 @@ -#include -#include -#include -#include -#include -#include -/// gcc -g ../lexer.c ../token.c test_lexer.c -o test_lexer -/* -tok_tConstant { - int have; - union { - char ch; - int i; - float f; - double d; - long long ll; - char* str; - }; -}; -*/ - -int g_num; -int g_num_arr[3]; -int main(int argc, char *argv[]) { - // int num = 0; - if (argc == 3 && strcmp(argv[2], "--debug") == 0) { - log_set_level(NULL, LOG_LEVEL_ALL); - } else { - // FIXME it is a hack lexer_logger - log_set_level(&__scc_lexer_log, LOG_LEVEL_NOTSET); - log_set_level(NULL, LOG_LEVEL_INFO | LOG_LEVEL_WARN | LOG_LEVEL_ERROR | - LOG_LEVEL_FATAL); - } - - const char *file_name = __FILE__; - if (argc == 2) { - file_name = argv[1]; - } - FILE *fp = fopen(file_name, "rb"); - if (fp == NULL) { - perror("open file failed"); - return 1; - } - - if (fseek(fp, 0, SEEK_END) != 0) { - perror("fseek failed"); - return 1; - } - usize fsize = ftell(fp); - LOG_INFO("file size: %zu", fsize); - if (fseek(fp, 0, SEEK_SET)) { - perror("fseek failed"); - return 1; - } - - char *buffer = (char *)malloc(fsize); - - usize read_ret = fread(buffer, 1, fsize, fp); - fclose(fp); - if (read_ret != fsize) { - LOG_FATAL("fread failed read_ret %u != fsize %u", read_ret, fsize); - free(buffer); - return 1; - } - - scc_lexer_t lexer; - scc_mem_probe_stream_t mem_stream = {0}; - scc_probe_stream_t *stream = - scc_mem_probe_stream_init(&mem_stream, buffer, fsize, false); - Assert(stream != null); - scc_cstring_clear(&stream->name); - scc_cstring_append_cstr(&stream->name, file_name, strlen(file_name)); - scc_lexer_init(&lexer, stream); - scc_lexer_tok_t tok; - - while (1) { - scc_lexer_get_valid_token(&lexer, &tok); - if (tok.type == SCC_TOK_EOF) { - break; - } - LOG_DEBUG("token `%s` at %s:%u:%u", scc_get_tok_name(tok.type), - scc_cstring_as_cstr(&tok.loc.name), tok.loc.line, - tok.loc.col); - Assert(tok.loc.offset <= fsize); - // LOG_DEBUG("%s", tok.val.str); - // printf("line: %d, column: %d, type: %3d, typename: %s\n", - // lexer.line, lexer.index, tok.type, scc_get_tok_name(tok.type)); - } - - free(buffer); - LOG_INFO("Lexer is Ok..."); - return 0; -} diff --git a/libs/lex_parser/cbuild.toml b/libs/sstream/cbuild.toml similarity index 53% rename from libs/lex_parser/cbuild.toml rename to libs/sstream/cbuild.toml index 421e53a..94a9191 100644 --- a/libs/lex_parser/cbuild.toml +++ b/libs/sstream/cbuild.toml @@ -1,5 +1,9 @@ [package] -name = "scc_lex_parser" +name = "sstream" version = "0.1.0" +authors = [] +description = "" dependencies = [{ name = "scc_core", path = "../../runtime/scc_core" }] +# features = {} +# default_features = [] diff --git a/libs/sstream/include/scc_pos.h b/libs/sstream/include/scc_pos.h new file mode 100644 index 0000000..e113661 --- /dev/null +++ b/libs/sstream/include/scc_pos.h @@ -0,0 +1,32 @@ +#ifndef __SCC_POS_H__ +#define __SCC_POS_H__ + +#include +#include + +typedef struct scc_pos { + const char *name; + usize line; + usize col; + usize offset; +} scc_pos_t; + +static inline scc_pos_t scc_pos_create() { return (scc_pos_t){0, 1, 1, 0}; } + +static inline void scc_pos_next(scc_pos_t *pos) { + pos->offset++; + pos->col++; +} + +static inline void scc_pos_next_offset(scc_pos_t *pos, int offset) { + pos->offset += offset; + pos->offset += offset; +} + +static inline void scc_pos_next_line(scc_pos_t *pos) { + pos->offset++; + pos->line++; + pos->col = 1; +} + +#endif /* __SCC_POS_H__ */ diff --git a/libs/sstream/include/scc_sstream.h b/libs/sstream/include/scc_sstream.h new file mode 100644 index 0000000..6b81c93 --- /dev/null +++ b/libs/sstream/include/scc_sstream.h @@ -0,0 +1,33 @@ +#ifndef __SCC_SSTREAM_H__ +#define __SCC_SSTREAM_H__ + +#include "scc_pos.h" +#include +#include + +typedef struct { + scc_pos_t pos; + int character; +} scc_sstream_char_t; + +typedef SCC_RING(scc_sstream_char_t) scc_sstream_ring_t; + +typedef struct { + const char *fname; + scc_pos_t pos; // 当前消费位置 (可选,可由 ring 推导) + int used; // 是否仍然在使用 + int owned_src; // 是否拥有src内存 即是否需要释放 + const char *src; // 文件内容缓冲区 (由 sstream 管理) + usize len; // 缓冲区长度 + scc_pos_t fill_pos; // 内部填充位置 + scc_sstream_ring_t ring; +} scc_sstream_t; + +int scc_sstream_init(scc_sstream_t *stream, const char *fname, int ring_size); +int scc_sstream_init_by_buffer(scc_sstream_t *stream, const char *buffer, + usize len, int owned, int ring_size); +scc_sstream_ring_t *scc_sstream_ref_ring(scc_sstream_t *stream); +void scc_sstream_drop_ring(scc_sstream_ring_t *ring); +void scc_sstream_drop(scc_sstream_t *stream); + +#endif /* __SCC_SSTREAM_H__ */ diff --git a/libs/sstream/src/main.c b/libs/sstream/src/main.c new file mode 100644 index 0000000..c466964 --- /dev/null +++ b/libs/sstream/src/main.c @@ -0,0 +1,51 @@ +#include "scc_sstream.h" +#include + +int main(int argc, char **argv) { + const char *filename = (argc > 1) ? argv[1] : __FILE__; // 默认读取自身 + scc_sstream_t stream; + scc_sstream_ring_t *ring; + + // 初始化 + if (scc_sstream_init(&stream, filename, 16) != 0) { + fprintf(stderr, "Failed to initialize stream for %s\n", filename); + return 1; + } + ring = scc_sstream_ref_ring(&stream); + Assert(ring != null); + + printf("Reading file: %s\n", filename); + + scc_sstream_char_t elem; + cbool ok; + int char_count = 0; + int line_count = 0; + + // 循环读取所有字符 + while (1) { + scc_ring_next_consume(*ring, elem, ok); + if (!ok) + break; // 文件结束或错误 + + char_count++; + if (elem.character == '\n') + line_count++; + + // 打印前 200 个字符的位置信息(避免刷屏) + if (char_count <= 200) { + printf("char[%d]: '%c' (line %zu, col %zu)\n", char_count, + elem.character == '\n' ? ' ' + : elem.character, // 换行符显示为空格 + elem.pos.line, elem.pos.col); + } + } + + printf("\nSummary:\n"); + printf(" Total characters: %d\n", char_count); + printf(" Total lines: %d\n", line_count); + + // 释放资源 + scc_sstream_drop_ring(ring); + scc_sstream_drop(&stream); + return 0; +} diff --git a/libs/sstream/src/scc_sstream.c b/libs/sstream/src/scc_sstream.c new file mode 100644 index 0000000..251e850 --- /dev/null +++ b/libs/sstream/src/scc_sstream.c @@ -0,0 +1,145 @@ +#include + +// 内部扫描函数:从指定位置扫描下一个有效字符 +static int sstream_scan_at(scc_sstream_t *stream, scc_pos_t scan_pos, + scc_pos_t *out_char_pos, scc_pos_t *out_next_pos) { + while (1) { + if (scan_pos.offset >= stream->len) + return -1; // EOF + + scc_pos_t start = scan_pos; + char c = stream->src[scan_pos.offset]; + + // 处理反斜杠换行 + if (c == '\\') { + usize next_off = scan_pos.offset + 1; + if (next_off < stream->len) { + char n = stream->src[next_off]; + if (n == '\n') { + // 跳过 '\' 和 '\n' + scan_pos.offset += 2; + scan_pos.line++; + scan_pos.col = 1; + continue; + } else if (n == '\r' && next_off + 1 < stream->len && + stream->src[next_off + 1] == '\n') { + // 跳过 '\' + '\r' + '\n' + scan_pos.offset += 3; + scan_pos.line++; + scan_pos.col = 1; + continue; + } + } + } + + // 处理 \r\n 转换为 \n + if (c == '\r') { + usize next_off = scan_pos.offset + 1; + if (next_off < stream->len && stream->src[next_off] == '\n') { + if (out_char_pos) + *out_char_pos = start; + // 下一个位置:偏移+2,行+1,列=1 + scan_pos.offset += 2; + scan_pos.line++; + scan_pos.col = 1; + if (out_next_pos) + *out_next_pos = scan_pos; + return '\n'; + } + } + + // 普通字符(包括单独的 '\n'、'\r' 等) + if (out_char_pos) + *out_char_pos = start; + // 计算下一个位置 + scan_pos.offset++; + if (c == '\n') { + scan_pos.line++; + scan_pos.col = 1; + } else { + scan_pos.col++; + } + if (out_next_pos) + *out_next_pos = scan_pos; + return c; + } +} + +// 环形缓冲区填充回调(通过 userdata 获取流对象) +static cbool fill_func(scc_sstream_char_t *out, void *userdata) { + scc_sstream_t *stream = (scc_sstream_t *)userdata; + if (stream->fill_pos.offset >= stream->len) + return false; // 已到文件尾 + + int ch = sstream_scan_at(stream, stream->fill_pos, &out->pos, + &(stream->fill_pos)); + if (ch == -1) + return false; + out->character = ch; + return true; +} + +int scc_sstream_init(scc_sstream_t *stream, const char *fname, int ring_size) { + Assert(stream != null && fname != null); + scc_file_t file = scc_fopen(fname, SCC_FILE_READ); + usize fsize = scc_fsize(file); + if (fsize == 0) { + LOG_WARN("file size is 0"); + scc_fclose(file); + return 0; + } + char *buffer = (char *)scc_malloc(fsize); + scc_memset(buffer, 0, fsize); + usize read_ret = scc_fread(file, buffer, fsize); + Assert(read_ret == fsize); /* read bytes assert it */ + scc_fclose(file); + + scc_sstream_init_by_buffer(stream, buffer, read_ret, 1, ring_size); + stream->fname = fname; + stream->fill_pos.name = stream->fname; + return 0; +} + +int scc_sstream_init_by_buffer(scc_sstream_t *stream, const char *buffer, + usize len, int owned, int ring_size) { + stream->fname = ""; + stream->fill_pos = scc_pos_create(); + stream->fill_pos.name = stream->fname; + stream->src = buffer; + stream->len = len; + stream->owned_src = owned; + + scc_ring_init(stream->ring, ring_size <= 0 ? 64 : ring_size, fill_func, + stream); + stream->used = 0; + return 0; +} + +scc_sstream_ring_t *scc_sstream_ref_ring(scc_sstream_t *stream) { + Assert(stream != null); + stream->used++; + return &stream->ring; +} + +void scc_sstream_drop_ring(scc_sstream_ring_t *ring) { + Assert(ring != null && ring->userdata != null); + scc_sstream_t *stream = (scc_sstream_t *)ring->userdata; + if (stream->used > 0) { + stream->used--; + } else { + LOG_WARN("double drop sstream ring"); + } +} + +void scc_sstream_drop(scc_sstream_t *stream) { + Assert(stream != null); + if (stream->used) { + LOG_FATAL("drop sstream must be drop ring before ref [%d]", + stream->used); + } + if (stream->src && stream->owned_src) { + scc_free((void *)stream->src); + stream->src = null; + } + scc_ring_free(stream->ring); +} diff --git a/runtime/runtime_gdb.py b/runtime/runtime_gdb.py index 700ae65..3f77bb4 100644 --- a/runtime/runtime_gdb.py +++ b/runtime/runtime_gdb.py @@ -157,40 +157,128 @@ class VectorPrinter(gdb.ValuePrinter): class HashTablePrinter(gdb.ValuePrinter): - def __init__(self, val: gdb.Value): - self.val: gdb.Value = val + """打印 scc_hashtable_t 结构""" + + def __init__(self, val): + self.val = val @staticmethod - def check_type(val: gdb.Value) -> bool: - if val.type.name in ["scc_hashtable_t", "scc_hashtable"]: + def check_type(val): + # 通过类型名或关键字段检查 + type_name = val.type.name + if type_name and type_name in ("scc_hashtable_t", "scc_hashtable"): return True + try: + fields = {f.name for f in val.type.fields()} + required = {"entries", "count", "tombstone_count", "hash_func", "key_cmp"} + if required.issubset(fields): + return True + except: + pass return False + def to_string(self): + count = self.val["count"] + tombstone = self.val["tombstone_count"] + cap = self.val["entries"]["size"] # 总槽位数 + return f"hashtable(count={count}, tombstone={tombstone}, capacity={cap})" -def append_printer(): - "注册方式一:传统append方法(您之前有效的方式)self" - gdb.pretty_printers.append( - lambda val: VectorPrinter(val) if VectorPrinter.check_type(val) else None - ) + def display_hint(self): + return "map" + + def num_children(self): + return int(self.val["count"]) + + def children(self): + entries = self.val["entries"] + size = int(entries["size"]) + data = entries["data"] + if size == 0 or data == 0: + return + # ENTRY_ACTIVE = 1(根据枚举定义) + for i in range(size): + entry = data[i] + state = int(entry["state"]) + if state == 1: # 只输出有效条目 + yield (f"[{i}]", entry) -def register_new_printer(): - "注册方式二:新版注册方法(备用方案)" +class StrPoolPrinter(gdb.ValuePrinter): + """打印 scc_strpool_t,将键值作为字符串展示""" - def str_lookup_function(val): - if VectorPrinter.check_type(val) is False: - return None - ret = VectorPrinter(val) - # print( - # f"ret {ret}, type {val.type.name}, {[(i.name, i.type) for i in val.type.fields()]}" - # ) - return ret + def __init__(self, val): + self.val = val + self.ht = val["ht"] # 内部哈希表 - gdb.printing.register_pretty_printer(gdb.current_objfile(), str_lookup_function) - # if gdb.current_progspace() is not None: - # pts = gdb.current_progspace().pretty_printers - # print(pts, len(pts)) - # pts.append(str_lookup_function) + @staticmethod + def check_type(val): + type_name = val.type.name + if type_name and type_name == "scc_strpool_t": + return True + try: + fields = {f.name for f in val.type.fields()} + if "ht" in fields: + # 可进一步检查 ht 的类型,但非必须 + return True + except: + pass + return False + + def to_string(self): + count = self.ht["count"] + cap = self.ht["entries"]["size"] + return f"strpool(count={count}, capacity={cap})" + + def display_hint(self): + return "map" + + def num_children(self): + return int(self.ht["count"]) + + def children(self): + entries = self.ht["entries"] + size = int(entries["size"]) + data = entries["data"] + if size == 0 or data == 0: + return + const_char_ptr = gdb.lookup_type("const char").pointer() + char_ptr = gdb.lookup_type("char").pointer() + + for i in range(size): + entry = data[i] + state = int(entry["state"]) + if state == 1: # ACTIVE + key_val = entry["key"] + value_val = entry["value"] + + # 尝试将 void* 转为字符串 + try: + key_str = key_val.cast(const_char_ptr).string() + except: + key_str = str(key_val) # 失败则回退到地址 + + try: + value_str = value_val.cast(char_ptr).string() + except: + value_str = str(value_val) + + # 使用带引号的字符串作为名称,值直接是字符串 + yield (repr(key_str), value_str) + + +def register_pretty_printers(): + """统一的查找函数,注册所有打印机""" + + def lookup_function(val): + if VectorPrinter.check_type(val): + return VectorPrinter(val) + if HashTablePrinter.check_type(val): + return HashTablePrinter(val) + if StrPoolPrinter.check_type(val): + return StrPoolPrinter(val) + return None + + gdb.printing.register_pretty_printer(gdb.current_objfile(), lookup_function) class VectorInfoCommand(gdb.Command): @@ -216,7 +304,5 @@ class VectorInfoCommand(gdb.Command): if __name__ == "__main__": - # 双重注册保证兼容性 - # append_printer() # 保留您原来有效的方式 - register_new_printer() # 添加新版注册 + register_pretty_printers() VectorInfoCommand() diff --git a/runtime/scc_core/include/scc_core.h b/runtime/scc_core/include/scc_core.h index 59834f3..d9ae686 100644 --- a/runtime/scc_core/include/scc_core.h +++ b/runtime/scc_core/include/scc_core.h @@ -6,9 +6,7 @@ #include #include #include -#include #include -#include #include #endif // __SCC_CORE_H__ diff --git a/runtime/scc_core/include/scc_core_impl.h b/runtime/scc_core/include/scc_core_impl.h index 324ed80..f4400c8 100644 --- a/runtime/scc_core/include/scc_core_impl.h +++ b/runtime/scc_core/include/scc_core_impl.h @@ -18,6 +18,7 @@ typedef enum { scc_file_t scc_fopen(const char *path, scc_fmode_t mode); void scc_fclose(scc_file_t file); +usize scc_fsize(scc_file_t file); usize scc_fread(scc_file_t file, void *buffer, usize size); usize scc_fwrite(scc_file_t file, const void *buffer, usize size); cbool scc_fexists(const char *path); diff --git a/runtime/scc_core/include/scc_core_pos.h b/runtime/scc_core/include/scc_core_pos.h deleted file mode 100644 index a8b8b47..0000000 --- a/runtime/scc_core/include/scc_core_pos.h +++ /dev/null @@ -1,28 +0,0 @@ -#ifndef __SCC_CORE_POS_H__ -#define __SCC_CORE_POS_H__ - -#include "scc_core_str.h" -#include "scc_core_type.h" -typedef struct scc_pos { - scc_cstring_t name; - usize line; - usize col; - usize offset; -} scc_pos_t; - -static inline scc_pos_t scc_pos_create() { - return (scc_pos_t){scc_cstring_create(), 1, 1, 0}; -} - -static inline void scc_pos_next(scc_pos_t *pos) { - pos->offset++; - pos->col++; -} - -static inline void scc_pos_next_line(scc_pos_t *pos) { - pos->offset++; - pos->line++; - pos->col = 1; -} - -#endif /* __SCC_CORE_POS_H__ */ diff --git a/runtime/scc_core/include/scc_core_ring.h b/runtime/scc_core/include/scc_core_ring.h new file mode 100644 index 0000000..de651b8 --- /dev/null +++ b/runtime/scc_core/include/scc_core_ring.h @@ -0,0 +1,178 @@ +#ifndef __SCC_CORE_RING_H__ +#define __SCC_CORE_RING_H__ + +#include + +/** + * @def SCC_RING(type) + * @brief 声明环形缓冲区结构体 + * @param type 存储的元素类型 + * + * 生成包含以下字段的结构体: + * - data: 元素数组 + * - cap: 容量 + * - head: 已消费的逻辑索引 + * - probe: 预览索引 + * - tail: 已填充的逻辑末尾索引 + * - fill: 填充回调函数 (当需要新元素时调用) + */ +#define SCC_RING(type) \ + struct { \ + type *data; \ + usize cap; \ + usize head; \ + usize probe; \ + usize tail; \ + cbool (*fill)(type * out, void *userdata); \ + void *userdata; \ + } + +// ==================== 内部辅助宏 (不直接使用) ==================== + +#define scc_ring_phys(ring, idx) ((idx) % (ring).cap) + +/** + * @brief 确保 probe 位置有数据可用 (尝试填充) + * @param ring 环形缓冲区变量 + * @param ok 变量名 (如 int ok_flag) ,宏会将其设置为 true 或 false + */ +#define scc_ring_ensure(ring, ok) \ + do { \ + ok = 1; \ + if ((ring).probe < (ring).tail) \ + break; \ + /* probe == tail,需要填充新元素 */ \ + if (!(ring).fill) { \ + ok = 0; \ + break; \ + } \ + if ((ring).tail - (ring).head >= (ring).cap) { \ + ok = 0; /* 缓冲区满,无法填充 */ \ + break; \ + } \ + usize phys_tail = scc_ring_phys(ring, (ring).tail); \ + if (!(ring).fill(&(ring).data[phys_tail], (ring).userdata)) { \ + ok = 0; \ + break; \ + } \ + (ring).tail++; \ + } while (0) + +// ==================== 用户操作宏 ==================== + +/** + * @brief 初始化环形缓冲区 + * @param ring 环形缓冲区变量 + * @param cap 容量 + * @param fill_func 填充回调函数 (可传 NULL) + * + * 内存分配失败由 scc_malloc 内部处理 (如 LOG_FATAL) + */ +#define scc_ring_init(ring, _cap, fill_func, _userdata) \ + do { \ + (ring).data = scc_malloc((_cap) * sizeof(*(ring).data)); \ + (ring).cap = (_cap); \ + (ring).head = 0; \ + (ring).probe = 0; \ + (ring).tail = 0; \ + (ring).fill = (fill_func); \ + (ring).userdata = (_userdata); \ + } while (0) + +/** + * @brief 释放环形缓冲区内存 + * @param ring 环形缓冲区变量 + */ +#define scc_ring_free(ring) \ + do { \ + scc_free((ring).data); \ + (ring).data = NULL; \ + (ring).cap = (ring).head = (ring).probe = (ring).tail = 0; \ + } while (0) + +/** + * @brief 预览 probe 位置的元素 (不移动 probe) + * @param ring 环形缓冲区变量 + * @param val 变量名,用于接收元素值 (例如 int ch) + * @param ok 变量名,用于接收成功状态 (cbool 类型) + */ +#define scc_ring_peek(ring, val, ok) \ + do { \ + scc_ring_ensure(ring, ok); \ + if (!(ok)) \ + break; \ + if ((ring).probe >= (ring).tail) { \ + ok = 0; \ + break; \ + } \ + usize _phys = scc_ring_phys(ring, (ring).probe); \ + val = (ring).data[_phys]; \ + } while (0) + +/** + * @brief 获取 probe 位置的元素,并将 probe 前进一步 + * @param ring 环形缓冲区变量 + * @param val 变量名,用于接收元素值 (例如 int ch) + * @param ok 变量名,用于接收成功状态 (cbool 类型) + */ +#define scc_ring_next(ring, val, ok) \ + do { \ + scc_ring_ensure(ring, ok); \ + if (!(ok)) \ + break; \ + if ((ring).probe >= (ring).tail) { \ + ok = 0; \ + break; \ + } \ + usize _phys = scc_ring_phys(ring, (ring).probe); \ + val = (ring).data[_phys]; \ + (ring).probe++; \ + } while (0) + +/** + * @brief 将 probe 后退一步 (不能低于 head) + * @param ring 环形缓冲区变量 + * @param ok 变量名,用于接收成功状态 (cbool 类型) + */ +#define scc_ring_back(ring, ok) \ + do { \ + if ((ring).probe > (ring).head) { \ + (ring).probe--; \ + ok = 1; \ + } else { \ + ok = 0; \ + } \ + } while (0) + +/** + * @brief 将 probe 重置为 head + * @param ring 环形缓冲区变量 + */ +#define scc_ring_reset(ring) ((ring).probe = (ring).head) + +/** + * @brief 将 head 移动到 probe 位置,标记 probe 之前的元素为已消费 + * @param ring 环形缓冲区变量 + */ +#define scc_ring_consume(ring) ((ring).head = (ring).probe) + +/** + * @brief 返回 probe 到 tail 之间的元素个数 (可预览数量) + * @param ring 环形缓冲区变量 + * @return 可预览元素个数 + */ +#define scc_ring_available(ring) ((ring).tail - (ring).probe) + +/** + * @brief 获取 probe 位置的元素,并将 probe 前进一步同时标记为已消费 + * @param ring 环形缓冲区变量 + * @param val 变量名,用于接收元素值 (例如 int ch) + * @param ok 变量名,用于接收成功状态 (cbool 类型) + */ +#define scc_ring_next_consume(ring, val, ok) \ + do { \ + scc_ring_next(ring, val, ok); \ + scc_ring_consume(ring); \ + } while (0) + +#endif /* __SCC_CORE_RING_H__ */ diff --git a/runtime/scc_core/include/scc_core_stream.h b/runtime/scc_core/include/scc_core_stream.h deleted file mode 100644 index 31db64f..0000000 --- a/runtime/scc_core/include/scc_core_stream.h +++ /dev/null @@ -1,130 +0,0 @@ -#ifndef __SMCC_CORE_PROBE_STREAM_H__ -#define __SMCC_CORE_PROBE_STREAM_H__ - -#include "scc_core_impl.h" -#include "scc_core_macro.h" -#include "scc_core_mem.h" -#include "scc_core_str.h" - -struct scc_probe_stream; -typedef struct scc_probe_stream scc_probe_stream_t; - -#define scc_stream_eof (-1) - -/** - * @brief 带探针的流接口 - * - * 这个流提供了双指针机制:当前读取位置(头指针)和探针位置(尾指针)。 - * 尾指针只能向前移动,用于查看而不消费。 - * 头指针可以前进或单次后退,但不能一直后退到尾指针后面。 - */ -struct scc_probe_stream { - scc_cstring_t name; - - /// @brief 消费头指针处的字符(移动头指针) - int (*consume)(scc_probe_stream_t *stream); - - /// @brief 查看当前探针位置的字符,不移动任何指针 - int (*peek)(scc_probe_stream_t *stream); - - /// @brief 移动探针位置并返回字符 - int (*next)(scc_probe_stream_t *stream); - - /// @brief 回退一个字符(单次后退,探针位置后退一步) - cbool (*back)(scc_probe_stream_t *stream); - - /// @brief 移动头指针到探针位置 - void (*sync)(scc_probe_stream_t *stream); - - /// @brief 重置探针位置到头指针位置 - void (*reset)(scc_probe_stream_t *stream); - - /// @brief 读取指定数量的字符到缓冲区 - usize (*read_buf)(scc_probe_stream_t *stream, char *buffer, usize count); - - /// @brief 检查是否到达流末尾 - cbool (*is_at_end)(scc_probe_stream_t *stream); - - /// @brief 销毁流并释放资源 - void (*drop)(scc_probe_stream_t *stream); -}; - -static inline int scc_probe_stream_consume(scc_probe_stream_t *self) { - return self->consume(self); -} - -static inline int scc_probe_stream_peek(scc_probe_stream_t *self) { - return self->peek(self); -} - -static inline int scc_probe_stream_next(scc_probe_stream_t *self) { - return self->next(self); -} - -static inline void scc_probe_stream_sync(scc_probe_stream_t *self) { - self->sync(self); -} - -static inline cbool scc_probe_stream_back(scc_probe_stream_t *self) { - return self->back(self); -} - -static inline void scc_probe_stream_reset(scc_probe_stream_t *self) { - self->reset(self); -} - -static inline usize scc_probe_stream_read_buf(scc_probe_stream_t *self, - char *buffer, usize count) { - return self->read_buf(self, buffer, count); -} - -static inline cbool scc_probe_stream_is_at_end(scc_probe_stream_t *self) { - return self->is_at_end(self); -} - -static inline cbool scc_probe_stream_has_more(scc_probe_stream_t *self) { - return !self->is_at_end(self); -} - -static inline void scc_probe_stream_drop(scc_probe_stream_t *self) { - self->drop(self); -} - -#ifndef __SCC_NO_MEM_PROBE_STREAM__ -/** - * @brief 内存探针流结构 - */ -typedef struct scc_mem_probe_stream { - scc_probe_stream_t stream; - const char *data; - usize data_length; - usize curr_pos; // 当前读取位置 - usize probe_pos; // 探针位置(用于peek) - cbool owned; // 是否拥有数据(如果拥有将会自动释放) -} scc_mem_probe_stream_t; - -/** - * @brief 初始化内存探针流(由你负责scc_mem_probe_stream_t的释放) - * - * @param stream 流结构指针 - * @param data 数据指针 - * @param length 数据长度 - * @param owned 是否拥有数据(如果拥有将会自动释放) - * @return core_probe_stream_t* 成功返回流指针,失败返回NULL - */ -scc_probe_stream_t *scc_mem_probe_stream_init(scc_mem_probe_stream_t *stream, - const char *data, usize length, - cbool owned); -/** - * @brief 构造内存探针流(其中drop会自动释放内存) - * - * @param data - * @param length - * @param owned 是否拥有数据(如果拥有将会自动释放) - * @return scc_probe_stream_t* - */ -scc_probe_stream_t *scc_mem_probe_stream_alloc(const char *data, usize length, - cbool owned); -#endif - -#endif /* __SMCC_CORE_PROBE_STREAM_H__ */ diff --git a/runtime/scc_core/src/cfg.std_impl.c b/runtime/scc_core/src/cfg.std_impl.c index 246b676..44d37fb 100644 --- a/runtime/scc_core/src/cfg.std_impl.c +++ b/runtime/scc_core/src/cfg.std_impl.c @@ -53,6 +53,20 @@ void scc_fclose(scc_file_t file) { } } +usize scc_fsize(scc_file_t file) { + FILE *fp = (FILE *)file; + if (fseek(fp, 0, SEEK_END) != 0) { + perror("fseek failed"); + return 0; + } + usize fsize = ftell(fp); + if (fseek(fp, 0, SEEK_SET)) { + perror("fseek failed"); + return 0; + } + return fsize; +} + usize scc_fread(scc_file_t file, void *buffer, usize size) { if (!file || !buffer) return 0; diff --git a/runtime/scc_core/src/stream.c b/runtime/scc_core/src/stream.c deleted file mode 100644 index a2cd64d..0000000 --- a/runtime/scc_core/src/stream.c +++ /dev/null @@ -1,183 +0,0 @@ -#include -#include - -#ifndef __SCC_CORE_NO_MEM_PROBE_STREAM__ - -static int mem_probe_stream_consume(scc_probe_stream_t *_stream) { - Assert(_stream != null); - scc_mem_probe_stream_t *stream = (scc_mem_probe_stream_t *)_stream; - - if (stream->curr_pos >= stream->data_length) { - return scc_stream_eof; - } - - unsigned char ch = stream->data[stream->curr_pos++]; - // 如果探针位置落后于当前读取位置,则更新探针位置 - if (stream->probe_pos < stream->curr_pos) { - stream->probe_pos = stream->curr_pos; - } - return (int)ch; -} - -static int mem_probe_stream_peek(scc_probe_stream_t *_stream) { - Assert(_stream != null); - scc_mem_probe_stream_t *stream = (scc_mem_probe_stream_t *)_stream; - - if (stream->probe_pos >= stream->data_length) { - return scc_stream_eof; - } - - // 只查看而不移动探针位置 - return (int)(unsigned char)stream->data[stream->probe_pos]; -} - -static int mem_probe_stream_next(scc_probe_stream_t *_stream) { - Assert(_stream != null); - scc_mem_probe_stream_t *stream = (scc_mem_probe_stream_t *)_stream; - - if (stream->probe_pos >= stream->data_length) { - return scc_stream_eof; - } - - // 返回探针位置的字符,并将探针位置向前移动 - int ch = (int)(unsigned char)stream->data[stream->probe_pos]; - stream->probe_pos++; - return ch; -} - -static void mem_probe_stream_sync(scc_probe_stream_t *_stream) { - Assert(_stream != null); - scc_mem_probe_stream_t *stream = (scc_mem_probe_stream_t *)_stream; - - // 移动头指针到探针位置(消费已查看的字符) - if (stream->probe_pos > stream->curr_pos) { - stream->curr_pos = stream->probe_pos; - } -} - -static cbool mem_probe_stream_back(scc_probe_stream_t *_stream) { - Assert(_stream != null); - scc_mem_probe_stream_t *stream = (scc_mem_probe_stream_t *)_stream; - - // 只能回退一个字符 - if (stream->probe_pos == 0) - return false; - if (stream->curr_pos + 1 > stream->probe_pos) - return false; - - stream->probe_pos--; - return true; -} - -static usize mem_probe_stream_read_buf(scc_probe_stream_t *_stream, - char *buffer, usize count) { - Assert(_stream != null); - scc_mem_probe_stream_t *stream = (scc_mem_probe_stream_t *)_stream; - - if (buffer == null) { - LOG_WARN("Buffer is null"); - return 0; - } - - usize remaining = stream->data_length - stream->curr_pos; - usize to_read = (remaining < count) ? remaining : count; - - if (to_read > 0) { - scc_memcpy(buffer, stream->data + stream->curr_pos, to_read); - stream->curr_pos += to_read; - // 更新探针位置 - if (stream->probe_pos < stream->curr_pos) { - stream->probe_pos = stream->curr_pos; - } - } else { - LOG_WARN("Reading past end of stream [maybe count is too large or " - "negative?]"); - } - - return to_read; -} - -static void mem_probe_stream_reset(scc_probe_stream_t *_stream) { - Assert(_stream != null); - scc_mem_probe_stream_t *stream = (scc_mem_probe_stream_t *)_stream; - - // 重置探针位置到头指针位置 - stream->probe_pos = stream->curr_pos; -} - -static cbool mem_probe_stream_is_at_end(scc_probe_stream_t *_stream) { - Assert(_stream != null); - scc_mem_probe_stream_t *stream = (scc_mem_probe_stream_t *)_stream; - - return stream->curr_pos >= stream->data_length; -} - -static void mem_probe_stream_drop(scc_probe_stream_t *_stream) { - Assert(_stream != null); - scc_mem_probe_stream_t *stream = (scc_mem_probe_stream_t *)_stream; - - scc_cstring_free(&stream->stream.name); - - if (stream->owned) { - scc_free((void *)stream->data); - stream->data = null; - } -} - -scc_probe_stream_t *scc_mem_probe_stream_init(scc_mem_probe_stream_t *stream, - const char *data, usize length, - cbool owned) { - if (stream == null || data == null) { - LOG_ERROR("param error"); - return null; - } - - if (length == 0) { - LOG_WARN("input memory is empty"); - owned = false; - } - - stream->owned = owned; - stream->data = data; - stream->data_length = length; - stream->curr_pos = 0; - stream->probe_pos = 0; - - stream->stream.name = scc_cstring_from_cstr("mem_probe_stream"); - - // 设置函数指针 - stream->stream.consume = mem_probe_stream_consume; - stream->stream.peek = mem_probe_stream_peek; - stream->stream.next = mem_probe_stream_next; - stream->stream.sync = mem_probe_stream_sync; - stream->stream.back = mem_probe_stream_back; - stream->stream.read_buf = mem_probe_stream_read_buf; - stream->stream.reset = mem_probe_stream_reset; - stream->stream.is_at_end = mem_probe_stream_is_at_end; - stream->stream.drop = mem_probe_stream_drop; - - return (scc_probe_stream_t *)stream; -} - -static void scc_owned_mem_stream_drop(scc_probe_stream_t *_stream) { - scc_mem_probe_stream_t *stream = (scc_mem_probe_stream_t *)_stream; - mem_probe_stream_drop(_stream); - scc_free(stream); -} - -scc_probe_stream_t *scc_mem_probe_stream_alloc(const char *data, usize length, - cbool owned) { - scc_mem_probe_stream_t *stream = - (scc_mem_probe_stream_t *)scc_malloc(sizeof(scc_mem_probe_stream_t)); - if (stream == null) { - return null; - } - - scc_probe_stream_t *ret = - scc_mem_probe_stream_init(stream, data, length, owned); - stream->stream.drop = scc_owned_mem_stream_drop; - Assert(ret != null); - return ret; -} - -#endif /* __SCC_CORE_NO_MEM_PROBE_STREAM__ */ diff --git a/runtime/scc_core/tests/test_core_ring.c b/runtime/scc_core/tests/test_core_ring.c new file mode 100644 index 0000000..0c633ba --- /dev/null +++ b/runtime/scc_core/tests/test_core_ring.c @@ -0,0 +1,326 @@ +#include +#include +#include +#include +#include + +// 为测试定义简单的 token 类型(包含动态字符串) +typedef struct { + int id; + char *data; +} test_token_t; + +// 定义环形缓冲区类型别名(方便使用) +typedef SCC_RING(char) char_ring_t; +typedef SCC_RING(test_token_t) token_ring_t; + +/* ------------------- 字符流测试辅助 ------------------ */ +static const char *test_chars = "abcdefghijklmnopqrstuvwxyz"; +static size_t char_index = 0; + +cbool char_fill(char *out, void *userdata) { + (void)userdata; + if (char_index < strlen(test_chars)) { + *out = test_chars[char_index++]; + return true; + } + return false; +} + +void reset_char_fill(void) { char_index = 0; } + +/* ------------------- token 流测试辅助 ------------------ */ +static int token_id = 0; + +cbool token_fill(test_token_t *out, void *userdata) { + (void)userdata; + if (token_id < 10) { // 只产生 10 个 token + out->id = token_id; + out->data = (char *)scc_malloc(20); + if (!out->data) + return false; + snprintf_(out->data, 20, "token%d", token_id); + token_id++; + return true; + } + return false; +} + +void reset_token_fill(void) { token_id = 0; } + +void free_token(test_token_t *tok) { + if (tok->data) { + scc_free(tok->data); + tok->data = NULL; + } +} + +/* ==================== 字符环形缓冲区测试 ==================== */ +void test_char_ring_basic(void) { + reset_char_fill(); + char_ring_t ring; + scc_ring_init(ring, 4, char_fill, 0); + char c; + cbool ok; + + scc_ring_next(ring, c, ok); + TEST_CHECK(ok == true); + TEST_CHECK(c == 'a'); + scc_ring_next(ring, c, ok); + TEST_CHECK(ok == true); + TEST_CHECK(c == 'b'); + + // peek + scc_ring_peek(ring, c, ok); + TEST_CHECK(ok == true); + TEST_CHECK(c == 'c'); + scc_ring_next(ring, c, ok); + TEST_CHECK(ok == true); + TEST_CHECK(c == 'c'); + + // back + scc_ring_back(ring, ok); + TEST_CHECK(ok == true); + scc_ring_peek(ring, c, ok); + TEST_CHECK(ok == true); + TEST_CHECK(c == 'c'); + + // consume & reset + scc_ring_consume(ring); + scc_ring_next(ring, c, ok); + TEST_CHECK(ok == true); + TEST_CHECK(c == 'c'); + scc_ring_next(ring, c, ok); + TEST_CHECK(ok == true); + TEST_CHECK(c == 'd'); + + scc_ring_reset(ring); + scc_ring_peek(ring, c, ok); + TEST_CHECK(ok == true); + TEST_CHECK(c == 'c'); + + scc_ring_back(ring, ok); + TEST_CHECK(ok == false); // 不能低于 head + + scc_ring_free(ring); +} + +void test_char_ring_full(void) { + reset_char_fill(); + char_ring_t ring; + scc_ring_init(ring, 3, char_fill, 0); + char c; + cbool ok; + + scc_ring_next(ring, c, ok); + TEST_CHECK(ok == true); // a + scc_ring_next(ring, c, ok); + TEST_CHECK(ok == true); // b + scc_ring_next(ring, c, ok); + TEST_CHECK(ok == true); // c + // 缓冲区满,peek 应失败 + scc_ring_peek(ring, c, ok); + TEST_CHECK(ok == false); + + scc_ring_consume(ring); // 释放已读空间 + scc_ring_next(ring, c, ok); + TEST_CHECK(ok == true); // d + TEST_CHECK(c == 'd'); + scc_ring_next(ring, c, ok); + TEST_CHECK(ok == true); // e + scc_ring_next(ring, c, ok); + TEST_CHECK(ok == true); // f + scc_ring_peek(ring, c, ok); + TEST_CHECK(ok == false); // 再次满 + + scc_ring_free(ring); +} + +void test_char_ring_eof(void) { + reset_char_fill(); + char_ring_t ring; + scc_ring_init(ring, 32, char_fill, 0); + char c; + cbool ok; + + for (int i = 0; i < 26; i++) { + scc_ring_next(ring, c, ok); + TEST_CHECK(ok == true); + TEST_CHECK(c == test_chars[i]); + } + scc_ring_next(ring, c, ok); + TEST_CHECK(ok == false); + scc_ring_peek(ring, c, ok); + TEST_CHECK(ok == false); + + scc_ring_free(ring); +} + +void test_char_ring_back_boundary(void) { + reset_char_fill(); + char_ring_t ring; + scc_ring_init(ring, 4, char_fill, 0); + char c; + cbool ok; + + scc_ring_next(ring, c, ok); + TEST_CHECK(ok == true); // a + scc_ring_next(ring, c, ok); + TEST_CHECK(ok == true); // b + + scc_ring_back(ring, ok); + TEST_CHECK(ok == true); + scc_ring_back(ring, ok); + TEST_CHECK(ok == true); + scc_ring_back(ring, ok); + TEST_CHECK(ok == false); // 已到 head + + scc_ring_peek(ring, c, ok); + TEST_CHECK(ok == true); + TEST_CHECK(c == 'a'); + + scc_ring_free(ring); +} + +void test_char_ring_consume_reset(void) { + reset_char_fill(); + char_ring_t ring; + scc_ring_init(ring, 5, char_fill, 0); + char c; + cbool ok; + + scc_ring_next(ring, c, ok); + TEST_CHECK(ok == true); // a + scc_ring_next(ring, c, ok); + TEST_CHECK(ok == true); // b + scc_ring_next(ring, c, ok); + TEST_CHECK(ok == true); // c + scc_ring_back(ring, ok); + TEST_CHECK(ok == true); + scc_ring_back(ring, ok); + TEST_CHECK(ok == true); + scc_ring_back(ring, ok); + TEST_CHECK(ok == true); // 此时 probe 指向 a + + scc_ring_consume(ring); // head 移至 a + scc_ring_reset(ring); // probe 也移至 a + scc_ring_next(ring, c, ok); + TEST_CHECK(ok == true); + TEST_CHECK(c == 'a'); // 应该返回 a + scc_ring_next(ring, c, ok); + TEST_CHECK(ok == true); // b + scc_ring_next(ring, c, ok); + TEST_CHECK(ok == true); // c + + scc_ring_free(ring); +} + +void test_char_ring_wrap(void) { + reset_char_fill(); + char_ring_t ring; + scc_ring_init(ring, 3, char_fill, 0); + char c; + cbool ok; + + for (int i = 0; i < 26; i++) { + scc_ring_next(ring, c, ok); + TEST_CHECK(ok == true); + TEST_CHECK(c == test_chars[i]); + scc_ring_consume(ring); // 立即消费,保持缓冲区几乎为空 + } + scc_ring_peek(ring, c, ok); + TEST_CHECK(ok == false); // 无数据 + + scc_ring_free(ring); +} + +/* ==================== token 环形缓冲区测试 ==================== */ +void test_token_ring_basic(void) { + reset_token_fill(); + token_ring_t ring; + scc_ring_init(ring, 3, token_fill, 0); + test_token_t tok; + cbool ok; + + scc_ring_next(ring, tok, ok); + TEST_CHECK(ok == true); + TEST_CHECK(tok.id == 0); + free_token(&tok); + + scc_ring_next(ring, tok, ok); + TEST_CHECK(ok == true); + TEST_CHECK(tok.id == 1); + free_token(&tok); + + scc_ring_peek(ring, tok, ok); + TEST_CHECK(ok == true); + TEST_CHECK(tok.id == 2); // peek 不应消费 + scc_ring_next(ring, tok, ok); + TEST_CHECK(ok == true); + TEST_CHECK(tok.id == 2); + + scc_ring_back(ring, ok); + TEST_CHECK(ok == true); + scc_ring_peek(ring, tok, ok); + TEST_CHECK(ok == true); + TEST_CHECK(tok.id == 2); + scc_ring_next(ring, tok, ok); + TEST_CHECK(ok == true); + TEST_CHECK(tok.id == 2); + free_token(&tok); + + scc_ring_consume(ring); + // 消费剩余 token + while (1) { + scc_ring_next(ring, tok, ok); + if (!ok) + break; + free_token(&tok); + } + scc_ring_free(ring); +} + +void test_token_ring_full(void) { + reset_token_fill(); + token_ring_t ring; + scc_ring_init(ring, 2, token_fill, 0); + test_token_t tok; + cbool ok; + + scc_ring_next(ring, tok, ok); + TEST_CHECK(ok == true); + TEST_CHECK(tok.id == 0); + free_token(&tok); + scc_ring_next(ring, tok, ok); + TEST_CHECK(ok == true); + TEST_CHECK(tok.id == 1); + free_token(&tok); + scc_ring_peek(ring, tok, ok); + TEST_CHECK(ok == false); // 缓冲区满 + + scc_ring_consume(ring); + scc_ring_next(ring, tok, ok); + TEST_CHECK(ok == true); + TEST_CHECK(tok.id == 2); + free_token(&tok); + + scc_ring_next(ring, tok, ok); + TEST_CHECK(ok == true); + TEST_CHECK(tok.id == 3); + free_token(&tok); + scc_ring_peek(ring, tok, ok); + TEST_CHECK(ok == false); // 再次满 + + scc_ring_free(ring); +} + +/* ==================== 测试列表 ==================== */ +TEST_LIST = {{"test_char_ring_basic", test_char_ring_basic}, + {"test_char_ring_full", test_char_ring_full}, + {"test_char_ring_eof", test_char_ring_eof}, + {"test_char_ring_back_boundary", test_char_ring_back_boundary}, + {"test_char_ring_consume_reset", test_char_ring_consume_reset}, + {"test_char_ring_wrap", test_char_ring_wrap}, + {"test_token_ring_basic", test_token_ring_basic}, + {"test_token_ring_full", test_token_ring_full}, + {NULL, NULL}}; \ No newline at end of file