From 36bff64a91d313a956beda86324f8d2f9a013205 Mon Sep 17 00:00:00 2001 From: zzy <2450266535@qq.com> Date: Mon, 8 Dec 2025 23:04:11 +0800 Subject: [PATCH] =?UTF-8?q?feat=20=E9=87=8D=E6=9E=84stream=E6=B5=81API?= =?UTF-8?q?=E5=B9=B6=E9=80=82=E9=85=8Dlex=5Fparse=E5=92=8Clexer?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- libs/lex_parser/include/lex_parser.h | 17 +- libs/lex_parser/src/lex_parser.c | 174 ++++++++++-------- libs/lex_parser/tests/test_char.c | 8 +- libs/lex_parser/tests/test_identifier.c | 6 +- libs/lex_parser/tests/test_number.c | 7 +- .../tests/test_skip_block_comment.c | 9 +- libs/lex_parser/tests/test_skip_line.c | 9 +- libs/lex_parser/tests/test_string.c | 6 +- libs/lexer/include/lexer.h | 4 +- libs/lexer/src/lexer.c | 82 ++++----- libs/lexer/tests/test_parse.c | 8 +- libs/lexer/tests/test_run.c | 8 +- runtime/libcore/include/core_str.h | 2 +- runtime/libcore/include/core_stream.h | 129 +++++++++---- runtime/libcore/include/core_vec.h | 1 + runtime/libcore/src/stream.c | 164 ++++++++++++----- runtime/log/include/log.h | 12 ++ 17 files changed, 402 insertions(+), 244 deletions(-) diff --git a/libs/lex_parser/include/lex_parser.h b/libs/lex_parser/include/lex_parser.h index 5bbebff..0844f6b 100644 --- a/libs/lex_parser/include/lex_parser.h +++ b/libs/lex_parser/include/lex_parser.h @@ -11,15 +11,16 @@ static inline cbool lex_parse_is_whitespace(int ch) { return ch == ' ' || ch == '\t'; } -int lex_parse_char(core_stream_t *input, core_pos_t *pos); -cbool lex_parse_string(core_stream_t *input, core_pos_t *pos, +int lex_parse_char(core_probe_stream_t *input, core_pos_t *pos); +cbool lex_parse_string(core_probe_stream_t *input, core_pos_t *pos, cstring_t *output); -cbool lex_parse_number(core_stream_t *input, core_pos_t *pos, usize *output); -cbool lex_parse_identifier(core_stream_t *input, core_pos_t *pos, +cbool lex_parse_number(core_probe_stream_t *input, core_pos_t *pos, + usize *output); +cbool lex_parse_identifier(core_probe_stream_t *input, core_pos_t *pos, cstring_t *output); -void lex_parse_skip_endline(core_stream_t *input, core_pos_t *pos); -void lex_parse_skip_block_comment(core_stream_t *input, core_pos_t *pos); -void lex_parse_skip_line(core_stream_t *input, core_pos_t *pos); -void lex_parse_skip_whitespace(core_stream_t *input, core_pos_t *pos); +void lex_parse_skip_endline(core_probe_stream_t *input, core_pos_t *pos); +void lex_parse_skip_block_comment(core_probe_stream_t *input, core_pos_t *pos); +void lex_parse_skip_line(core_probe_stream_t *input, core_pos_t *pos); +void lex_parse_skip_whitespace(core_probe_stream_t *input, core_pos_t *pos); #endif /* __SMCC_LEX_PARSER_H__ */ diff --git a/libs/lex_parser/src/lex_parser.c b/libs/lex_parser/src/lex_parser.c index 6ba1c55..fb55280 100644 --- a/libs/lex_parser/src/lex_parser.c +++ b/libs/lex_parser/src/lex_parser.c @@ -1,18 +1,18 @@ #include -void lex_parse_skip_endline(core_stream_t *input, core_pos_t *pos) { +void lex_parse_skip_endline(core_probe_stream_t *input, core_pos_t *pos) { Assert(input != null && pos != null); - core_stream_reset_char(input); - int ch = core_stream_peek_char(input); + core_probe_stream_reset(input); + int ch = core_probe_stream_peek(input); if (ch == '\r') { - core_stream_next_char(input); - ch = core_stream_peek_char(input); + core_probe_stream_consume(input); + ch = core_probe_stream_peek(input); if (ch == '\n') { - core_stream_next_char(input); + core_probe_stream_consume(input); } core_pos_next_line(pos); } else if (ch == '\n') { - core_stream_next_char(input); + core_probe_stream_consume(input); core_pos_next_line(pos); } else { LOG_WARN("not a newline character"); @@ -57,12 +57,12 @@ static inline int got_simple_escape(int ch) { /* clang-format on */ } -void lex_parse_skip_line(core_stream_t *input, core_pos_t *pos) { - core_stream_t *stream = input; +void lex_parse_skip_line(core_probe_stream_t *input, core_pos_t *pos) { + core_probe_stream_t *stream = input; Assert(stream != null && pos != null); - core_stream_reset_char(stream); + core_probe_stream_reset(stream); while (1) { - int ch = core_stream_peek_char(stream); + int ch = core_probe_stream_peek(stream); if (ch == core_stream_eof) { return; @@ -73,29 +73,29 @@ void lex_parse_skip_line(core_stream_t *input, core_pos_t *pos) { lex_parse_skip_endline(stream, pos); return; } else { - core_stream_next_char(stream); + core_probe_stream_consume(stream); core_pos_next(pos); } } } -void lex_parse_skip_block_comment(core_stream_t *input, core_pos_t *pos) { - core_stream_t *stream = input; +void lex_parse_skip_block_comment(core_probe_stream_t *input, core_pos_t *pos) { + core_probe_stream_t *stream = input; Assert(stream != null && pos != null); int ch; - core_stream_reset_char(stream); - ch = core_stream_next_char(stream); + core_probe_stream_reset(stream); + ch = core_probe_stream_consume(stream); core_pos_next(pos); // FIXME Assertion Assert(ch == '/'); - ch = core_stream_next_char(stream); + ch = core_probe_stream_consume(stream); core_pos_next(pos); Assert(ch == '*'); // all ready match `/*` while (1) { - core_stream_reset_char(stream); - ch = core_stream_peek_char(stream); + core_probe_stream_reset(stream); + ch = core_probe_stream_peek(stream); if (ch == core_stream_eof) { LOG_WARN("Unterminated block comment"); @@ -106,12 +106,12 @@ void lex_parse_skip_block_comment(core_stream_t *input, core_pos_t *pos) { lex_parse_skip_endline(stream, pos); continue; } - core_stream_next_char(stream); + core_probe_stream_consume(stream); core_pos_next(pos); if (ch == '*') { - ch = core_stream_peek_char(stream); + ch = core_probe_stream_peek(stream); if (ch == '/') { - core_stream_next_char(stream); + core_probe_stream_consume(stream); core_pos_next(pos); return; } @@ -119,35 +119,35 @@ void lex_parse_skip_block_comment(core_stream_t *input, core_pos_t *pos) { } } -void lex_parse_skip_whitespace(core_stream_t *input, core_pos_t *pos) { - core_stream_t *stream = input; +void lex_parse_skip_whitespace(core_probe_stream_t *input, core_pos_t *pos) { + core_probe_stream_t *stream = input; Assert(stream != null && pos != null); - core_stream_reset_char(stream); + core_probe_stream_reset(stream); while (1) { - int ch = core_stream_peek_char(stream); + int ch = core_probe_stream_peek(stream); if (!lex_parse_is_whitespace(ch)) { return; } - core_stream_next_char(stream); + core_probe_stream_consume(stream); core_pos_next(pos); } } -static inline cbool _lex_parse_uint(core_stream_t *input, core_pos_t *pos, +static inline cbool _lex_parse_uint(core_probe_stream_t *input, core_pos_t *pos, int base, usize *output) { Assert(input != null && pos != null); if (input == null || pos == null) { return false; } Assert(base == 2 || base == 8 || base == 10 || base == 16); - core_stream_reset_char(input); + core_probe_stream_reset(input); int ch, tmp; usize n = 0; usize offset = pos->offset; while (1) { - ch = core_stream_peek_char(input); + ch = core_probe_stream_peek(input); if (ch == core_stream_eof) { break; @@ -166,7 +166,7 @@ static inline cbool _lex_parse_uint(core_stream_t *input, core_pos_t *pos, return false; } - core_stream_next_char(input); + core_probe_stream_consume(input); core_pos_next(pos); n = n * base + tmp; // TODO number overflow @@ -187,11 +187,11 @@ static inline cbool _lex_parse_uint(core_stream_t *input, core_pos_t *pos, * @return int * https://cppreference.cn/w/c/language/character_constant */ -int lex_parse_char(core_stream_t *input, core_pos_t *pos) { - core_stream_t *stream = input; +int lex_parse_char(core_probe_stream_t *input, core_pos_t *pos) { + core_probe_stream_t *stream = input; Assert(stream != null && pos != null); - core_stream_reset_char(stream); - int ch = core_stream_peek_char(stream); + core_probe_stream_reset(stream); + int ch = core_probe_stream_peek(stream); int ret = core_stream_eof; if (ch == core_stream_eof) { @@ -201,17 +201,17 @@ int lex_parse_char(core_stream_t *input, core_pos_t *pos) { LOG_WARN("Unexpected character '%c' at begin", ch); goto ERR; } - core_stream_next_char(stream); + core_probe_stream_consume(stream); core_pos_next(pos); - ch = core_stream_next_char(stream); + ch = core_probe_stream_consume(stream); core_pos_next(pos); if (ch == core_stream_eof) { LOG_WARN("Unexpected EOF at middle"); goto ERR; } else if (ch == '\\') { - ch = core_stream_next_char(stream); + ch = core_probe_stream_consume(stream); core_pos_next(pos); if (ch == '0') { // 数字转义序列 @@ -237,7 +237,7 @@ int lex_parse_char(core_stream_t *input, core_pos_t *pos) { } else { ret = ch; } - if ((ch = core_stream_next_char(stream)) != '\'') { + if ((ch = core_probe_stream_consume(stream)) != '\'') { LOG_ERROR("Unclosed character literal '%c' at end, expect `'`", ch); core_pos_next(pos); goto ERR; @@ -257,12 +257,12 @@ ERR: * @return cbool * https://cppreference.cn/w/c/language/string_literal */ -cbool lex_parse_string(core_stream_t *input, core_pos_t *pos, +cbool lex_parse_string(core_probe_stream_t *input, core_pos_t *pos, cstring_t *output) { - core_stream_t *stream = input; + core_probe_stream_t *stream = input; Assert(stream != null && pos != null && output != null); - core_stream_reset_char(stream); - int ch = core_stream_peek_char(stream); + core_probe_stream_reset(stream); + int ch = core_probe_stream_peek(stream); Assert(cstring_is_empty(output)); if (ch == core_stream_eof) { @@ -272,12 +272,12 @@ cbool lex_parse_string(core_stream_t *input, core_pos_t *pos, LOG_WARN("Unexpected character '%c' at begin", ch); goto ERR; } - core_stream_next_char(stream); + core_probe_stream_consume(stream); core_pos_next(pos); cstring_t str = cstring_from_cstr(""); while (1) { - ch = core_stream_peek_char(stream); + ch = core_probe_stream_peek(stream); if (ch == core_stream_eof) { LOG_ERROR("Unexpected EOF at string literal"); @@ -287,8 +287,8 @@ cbool lex_parse_string(core_stream_t *input, core_pos_t *pos, goto ERR; } else if (ch == '\\') { // TODO bad practice and maybe bugs here - core_stream_next_char(stream); - ch = core_stream_next_char(stream); + core_probe_stream_consume(stream); + ch = core_probe_stream_consume(stream); int val = got_simple_escape(ch); if (val == -1) { LOG_ERROR("Invalid escape character it is \\%c [%d]", ch, ch); @@ -297,12 +297,12 @@ cbool lex_parse_string(core_stream_t *input, core_pos_t *pos, continue; } } else if (ch == '"') { - core_stream_next_char(stream); + core_probe_stream_consume(stream); core_pos_next(pos); break; } - core_stream_next_char(stream); + core_probe_stream_consume(stream); core_pos_next(pos); cstring_push(&str, ch); } @@ -323,48 +323,72 @@ ERR: * @return cbool * https://cppreference.cn/w/c/language/integer_constant */ -cbool lex_parse_number(core_stream_t *input, core_pos_t *pos, usize *output) { - core_stream_t *stream = input; +cbool lex_parse_number(core_probe_stream_t *input, core_pos_t *pos, + usize *output) { + core_probe_stream_t *stream = input; Assert(stream != null && pos != null && output != null); - core_stream_reset_char(stream); - int ch = core_stream_peek_char(stream); - int base = 0; + core_probe_stream_reset(stream); + int ch = core_probe_stream_peek(stream); + int base = 10; // 默认十进制 + if (ch == core_stream_eof) { LOG_WARN("Unexpected EOF at begin"); goto ERR; - } else if (ch == '0') { - ch = core_stream_peek_char(stream); + } + + if (ch == '0') { + // 消费 '0' + core_probe_stream_consume(stream); + core_pos_next(pos); + + // 查看下一个字符 + ch = core_probe_stream_peek(stream); if (ch == 'x' || ch == 'X') { + // 十六进制 base = 16; - core_stream_next_char(stream); - core_pos_next(pos); - core_stream_next_char(stream); + core_probe_stream_consume(stream); core_pos_next(pos); } else if (ch == 'b' || ch == 'B') { - // FIXME C23 external integer base + // 二进制 (C23扩展) base = 2; - core_stream_next_char(stream); - core_pos_next(pos); - core_stream_next_char(stream); + core_probe_stream_consume(stream); core_pos_next(pos); } else if (ch >= '0' && ch <= '7') { + // 八进制 base = 8; - core_stream_next_char(stream); - core_pos_next(pos); - } else if (ch == '9' || ch == '8') { + // 不消费,数字将由 _lex_parse_uint 处理 + } else if (ch == '8' || ch == '9') { LOG_ERROR("Invalid digit '%d' in octal literal", ch); return false; } else { - base = 10; + // 只是0,十进制 + *output = 0; + return true; } - } else { + } else if (ch >= '1' && ch <= '9') { + // 十进制,不消费,由 _lex_parse_uint 处理 base = 10; + } else { + // 无效的数字 + return false; } // 解析整数部分 - core_stream_reset_char(stream); + core_probe_stream_reset(stream); usize n; if (_lex_parse_uint(stream, pos, base, &n) == false) { + // 如果没有匹配任何数字,但输入是 '0',已经处理过了 + // 对于十进制数字,至少应该有一个数字 + if (base == 10) { + // 单个数字的情况,例如 "1" + // 我们需要消费这个数字并返回它的值 + if (ch >= '1' && ch <= '9') { + core_probe_stream_consume(stream); + core_pos_next(pos); + *output = ch - '0'; + return true; + } + } return false; } *output = n; @@ -382,13 +406,13 @@ ERR: * @return cbool * https://cppreference.cn/w/c/language/identifier */ -cbool lex_parse_identifier(core_stream_t *input, core_pos_t *pos, +cbool lex_parse_identifier(core_probe_stream_t *input, core_pos_t *pos, cstring_t *output) { Assert(input != null && pos != null && output != null); Assert(cstring_is_empty(output)); - core_stream_t *stream = input; - core_stream_reset_char(stream); - int ch = core_stream_peek_char(stream); + core_probe_stream_t *stream = input; + core_probe_stream_reset(stream); + int ch = core_probe_stream_peek(stream); if (ch == core_stream_eof) { LOG_WARN("Unexpected EOF at begin"); @@ -396,9 +420,9 @@ cbool lex_parse_identifier(core_stream_t *input, core_pos_t *pos, (ch >= 'A' && ch <= 'Z')) { while (1) { cstring_push(output, ch); - core_stream_next_char(stream); + core_probe_stream_consume(stream); core_pos_next(pos); - ch = core_stream_peek_char(stream); + ch = core_probe_stream_peek(stream); if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch == '_') || (ch >= '0' && ch <= '9')) { continue; diff --git a/libs/lex_parser/tests/test_char.c b/libs/lex_parser/tests/test_char.c index 56cd53c..540780f 100644 --- a/libs/lex_parser/tests/test_char.c +++ b/libs/lex_parser/tests/test_char.c @@ -5,9 +5,9 @@ cbool check_char(const char *str, int expect, int *output) { log_set_level(&__default_logger_root, 0); core_pos_t pos = core_pos_init(); - core_mem_stream_t mem_stream; - core_stream_t *stream = - core_mem_stream_init(&mem_stream, str, smcc_strlen(str), false); + core_mem_probe_stream_t mem_stream; + core_probe_stream_t *stream = + core_mem_probe_stream_init(&mem_stream, str, smcc_strlen(str), false); *output = lex_parse_char(stream, &pos); return *output == expect; } @@ -57,4 +57,4 @@ TEST_LIST = { {"test_escape_char", test_escape_char}, {"test_invalid_char", test_invalid_char}, {NULL, NULL}, -}; \ No newline at end of file +}; diff --git a/libs/lex_parser/tests/test_identifier.c b/libs/lex_parser/tests/test_identifier.c index 24c2be4..65e6bea 100644 --- a/libs/lex_parser/tests/test_identifier.c +++ b/libs/lex_parser/tests/test_identifier.c @@ -5,9 +5,9 @@ cbool check_identifier(const char *str, const char *expect, cstring_t *output) { log_set_level(&__default_logger_root, 0); core_pos_t pos = core_pos_init(); - core_mem_stream_t mem_stream; - core_stream_t *stream = - core_mem_stream_init(&mem_stream, str, smcc_strlen(str), false); + core_mem_probe_stream_t mem_stream; + core_probe_stream_t *stream = + core_mem_probe_stream_init(&mem_stream, str, smcc_strlen(str), false); cbool ret = lex_parse_identifier(stream, &pos, output); if (ret && expect) { diff --git a/libs/lex_parser/tests/test_number.c b/libs/lex_parser/tests/test_number.c index eda153b..9e044ac 100644 --- a/libs/lex_parser/tests/test_number.c +++ b/libs/lex_parser/tests/test_number.c @@ -4,9 +4,9 @@ cbool check(const char *str, usize expect, usize *output) { // TODO maybe have other logger log_set_level(&__default_logger_root, 0); core_pos_t pos = core_pos_init(); - core_mem_stream_t mem_stream; - core_stream_t *stream = - core_mem_stream_init(&mem_stream, str, smcc_strlen(str), false); + core_mem_probe_stream_t mem_stream; + core_probe_stream_t *stream = + core_mem_probe_stream_init(&mem_stream, str, smcc_strlen(str), false); return lex_parse_number(stream, &pos, output); } @@ -16,6 +16,7 @@ cbool check(const char *str, usize expect, usize *output) { cbool ret = check(str, expect, &_output); \ TEST_CHECK(ret == true); \ TEST_CHECK(_output == expect); \ + TEST_MSG("Produced: %llu", _output); \ } while (0) #define CHECK_INVALID(str) \ diff --git a/libs/lex_parser/tests/test_skip_block_comment.c b/libs/lex_parser/tests/test_skip_block_comment.c index c04acbf..563e9cf 100644 --- a/libs/lex_parser/tests/test_skip_block_comment.c +++ b/libs/lex_parser/tests/test_skip_block_comment.c @@ -5,9 +5,9 @@ void check_skip_block_comment(const char *str, const char *expect_remaining) { log_set_level(&__default_logger_root, 0); core_pos_t pos = core_pos_init(); - core_mem_stream_t mem_stream; - core_stream_t *stream = - core_mem_stream_init(&mem_stream, str, smcc_strlen(str), false); + core_mem_probe_stream_t mem_stream; + core_probe_stream_t *stream = + core_mem_probe_stream_init(&mem_stream, str, smcc_strlen(str), false); lex_parse_skip_block_comment(stream, &pos); @@ -15,7 +15,8 @@ void check_skip_block_comment(const char *str, const char *expect_remaining) { char buffer[256] = {0}; int i = 0; int ch; - while ((ch = core_stream_next_char(stream)) != core_stream_eof && i < 255) { + while ((ch = core_probe_stream_consume(stream)) != core_stream_eof && + i < 255) { buffer[i++] = (char)ch; } diff --git a/libs/lex_parser/tests/test_skip_line.c b/libs/lex_parser/tests/test_skip_line.c index fca4441..336e485 100644 --- a/libs/lex_parser/tests/test_skip_line.c +++ b/libs/lex_parser/tests/test_skip_line.c @@ -5,9 +5,9 @@ void check_skip_line(const char *str, const char *expect_remaining) { log_set_level(&__default_logger_root, 0); core_pos_t pos = core_pos_init(); - core_mem_stream_t mem_stream; - core_stream_t *stream = - core_mem_stream_init(&mem_stream, str, smcc_strlen(str), false); + core_mem_probe_stream_t mem_stream; + core_probe_stream_t *stream = + core_mem_probe_stream_init(&mem_stream, str, smcc_strlen(str), false); lex_parse_skip_line(stream, &pos); @@ -15,7 +15,8 @@ void check_skip_line(const char *str, const char *expect_remaining) { char buffer[256] = {0}; int i = 0; int ch; - while ((ch = core_stream_next_char(stream)) != core_stream_eof && i < 255) { + while ((ch = core_probe_stream_consume(stream)) != core_stream_eof && + i < 255) { buffer[i++] = (char)ch; } diff --git a/libs/lex_parser/tests/test_string.c b/libs/lex_parser/tests/test_string.c index 690f6ea..930109a 100644 --- a/libs/lex_parser/tests/test_string.c +++ b/libs/lex_parser/tests/test_string.c @@ -5,9 +5,9 @@ cbool check_string(const char *str, const char *expect, cstring_t *output) { log_set_level(&__default_logger_root, 0); core_pos_t pos = core_pos_init(); - core_mem_stream_t mem_stream; - core_stream_t *stream = - core_mem_stream_init(&mem_stream, str, smcc_strlen(str), false); + core_mem_probe_stream_t mem_stream; + core_probe_stream_t *stream = + core_mem_probe_stream_init(&mem_stream, str, smcc_strlen(str), false); cbool ret = lex_parse_string(stream, &pos, output); if (ret && expect) { diff --git a/libs/lexer/include/lexer.h b/libs/lexer/include/lexer.h index e36d43d..5ac864e 100644 --- a/libs/lexer/include/lexer.h +++ b/libs/lexer/include/lexer.h @@ -21,7 +21,7 @@ typedef struct lexer_token { * 封装词法分析所需的状态信息和缓冲区管理 */ typedef struct cc_lexer { - core_stream_t *stream; + core_probe_stream_t *stream; core_pos_t pos; } smcc_lexer_t; @@ -30,7 +30,7 @@ typedef struct cc_lexer { * @param[out] lexer 要初始化的词法分析器实例 * @param[in] stream 输入流对象指针 */ -void lexer_init(smcc_lexer_t *lexer, core_stream_t *stream); +void lexer_init(smcc_lexer_t *lexer, core_probe_stream_t *stream); /** * @brief 获取原始token diff --git a/libs/lexer/src/lexer.c b/libs/lexer/src/lexer.c index 5748192..db3473e 100644 --- a/libs/lexer/src/lexer.c +++ b/libs/lexer/src/lexer.c @@ -75,7 +75,7 @@ static inline int keyword_cmp(const char *name, int len) { return -1; // Not a keyword. } -void lexer_init(smcc_lexer_t *lexer, core_stream_t *stream) { +void lexer_init(smcc_lexer_t *lexer, core_probe_stream_t *stream) { lexer->stream = stream; lexer->pos = core_pos_init(); // FIXME @@ -86,9 +86,9 @@ void lexer_init(smcc_lexer_t *lexer, core_stream_t *stream) { static void parse_line(smcc_lexer_t *lexer, lexer_tok_t *token) { token->loc = lexer->pos; - core_stream_t *stream = lexer->stream; - core_stream_reset_char(stream); - int ch = core_stream_peek_char(stream); + core_probe_stream_t *stream = lexer->stream; + core_probe_stream_reset(stream); + int ch = core_probe_stream_next(stream); usize n; cstring_t str = cstring_new(); @@ -104,7 +104,7 @@ static void parse_line(smcc_lexer_t *lexer, lexer_tok_t *token) { const char line[] = "line"; for (int i = 0; i < (int)sizeof(line); i++) { - ch = core_stream_next_char(stream); + ch = core_probe_stream_consume(stream); core_pos_next(&lexer->pos); if (ch != line[i]) { LEX_WARN("Maroc does not support in lexer rather in preprocessor, " @@ -118,12 +118,12 @@ static void parse_line(smcc_lexer_t *lexer, lexer_tok_t *token) { goto SKIP_LINE; } - if (core_stream_next_char(stream) != ' ') { + if (core_probe_stream_consume(stream) != ' ') { lex_parse_skip_line(lexer->stream, &lexer->pos); token->loc.line = token->value.n; } - if (core_stream_peek_char(stream) != '"') { + if (core_probe_stream_next(stream) != '"') { LEX_ERROR("Invalid `#` line"); goto SKIP_LINE; } @@ -149,26 +149,26 @@ ERR: void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) { token->loc = lexer->pos; token->type = TOKEN_UNKNOWN; - core_stream_t *stream = lexer->stream; + core_probe_stream_t *stream = lexer->stream; - core_stream_reset_char(stream); + core_probe_stream_reset(stream); token_type_t type = TOKEN_UNKNOWN; - int ch = core_stream_peek_char(stream); + int ch = core_probe_stream_next(stream); // once step switch (ch) { case '=': - switch (core_stream_peek_char(stream)) { + switch (core_probe_stream_next(stream)) { case '=': type = TOKEN_EQ; goto double_char; default: - core_stream_reset_char(stream), type = TOKEN_ASSIGN; + core_probe_stream_reset(stream), type = TOKEN_ASSIGN; break; } break; case '+': - switch (core_stream_peek_char(stream)) { + switch (core_probe_stream_next(stream)) { case '+': type = TOKEN_ADD_ADD; goto double_char; @@ -176,12 +176,12 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) { type = TOKEN_ASSIGN_ADD; goto double_char; default: - core_stream_reset_char(stream), type = TOKEN_ADD; + core_probe_stream_reset(stream), type = TOKEN_ADD; break; } break; case '-': - switch (core_stream_peek_char(stream)) { + switch (core_probe_stream_next(stream)) { case '-': type = TOKEN_SUB_SUB; goto double_char; @@ -192,22 +192,22 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) { type = TOKEN_DEREF; goto double_char; default: - core_stream_reset_char(stream), type = TOKEN_SUB; + core_probe_stream_reset(stream), type = TOKEN_SUB; break; } break; case '*': - switch (core_stream_peek_char(stream)) { + switch (core_probe_stream_next(stream)) { case '=': type = TOKEN_ASSIGN_MUL; goto double_char; default: - core_stream_reset_char(stream), type = TOKEN_MUL; + core_probe_stream_reset(stream), type = TOKEN_MUL; break; } break; case '/': - switch (core_stream_peek_char(stream)) { + switch (core_probe_stream_next(stream)) { case '=': type = TOKEN_ASSIGN_DIV; goto double_char; @@ -220,22 +220,22 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) { token->type = TOKEN_BLOCK_COMMENT; goto END; default: - core_stream_reset_char(stream), type = TOKEN_DIV; + core_probe_stream_reset(stream), type = TOKEN_DIV; break; } break; case '%': - switch (core_stream_peek_char(stream)) { + switch (core_probe_stream_next(stream)) { case '=': type = TOKEN_ASSIGN_MOD; goto double_char; default: - core_stream_reset_char(stream), type = TOKEN_MOD; + core_probe_stream_reset(stream), type = TOKEN_MOD; break; } break; case '&': - switch (core_stream_peek_char(stream)) { + switch (core_probe_stream_next(stream)) { case '&': type = TOKEN_AND_AND; goto double_char; @@ -243,12 +243,12 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) { type = TOKEN_ASSIGN_AND; goto double_char; default: - core_stream_reset_char(stream), type = TOKEN_AND; + core_probe_stream_reset(stream), type = TOKEN_AND; break; } break; case '|': - switch (core_stream_peek_char(stream)) { + switch (core_probe_stream_next(stream)) { case '|': type = TOKEN_OR_OR; goto double_char; @@ -256,27 +256,27 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) { type = TOKEN_ASSIGN_OR; goto double_char; default: - core_stream_reset_char(stream), type = TOKEN_OR; + core_probe_stream_reset(stream), type = TOKEN_OR; break; } break; case '^': - switch (core_stream_peek_char(stream)) { + switch (core_probe_stream_next(stream)) { case '=': type = TOKEN_ASSIGN_XOR; goto double_char; default: - core_stream_reset_char(stream), type = TOKEN_XOR; + core_probe_stream_reset(stream), type = TOKEN_XOR; break; } break; case '<': - switch (core_stream_peek_char(stream)) { + switch (core_probe_stream_next(stream)) { case '=': type = TOKEN_LE; goto double_char; case '<': { - if (core_stream_peek_char(stream) == '=') { + if (core_probe_stream_next(stream) == '=') { type = TOKEN_ASSIGN_L_SH; goto triple_char; } else { @@ -286,17 +286,17 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) { break; } default: - core_stream_reset_char(stream), type = TOKEN_LT; + core_probe_stream_reset(stream), type = TOKEN_LT; break; } break; case '>': - switch (core_stream_peek_char(stream)) { + switch (core_probe_stream_next(stream)) { case '=': type = TOKEN_GE; goto double_char; case '>': { - if (core_stream_peek_char(stream) == '=') { + if (core_probe_stream_next(stream) == '=') { type = TOKEN_ASSIGN_R_SH; goto triple_char; } else { @@ -306,7 +306,7 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) { break; } default: - core_stream_reset_char(stream), type = TOKEN_GT; + core_probe_stream_reset(stream), type = TOKEN_GT; break; } break; @@ -314,12 +314,12 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) { type = TOKEN_BIT_NOT; break; case '!': - switch (core_stream_peek_char(stream)) { + switch (core_probe_stream_next(stream)) { case '=': type = TOKEN_NEQ; goto double_char; default: - core_stream_reset_char(stream), type = TOKEN_NOT; + core_probe_stream_reset(stream), type = TOKEN_NOT; break; } break; @@ -351,8 +351,8 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) { type = TOKEN_COLON; break; case '.': - if (core_stream_peek_char(stream) == '.' && - core_stream_peek_char(stream) == '.') { + if (core_probe_stream_next(stream) == '.' && + core_probe_stream_next(stream) == '.') { type = TOKEN_ELLIPSIS; goto triple_char; } @@ -452,13 +452,13 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) { } goto once_char; triple_char: - core_stream_next_char(stream); + core_probe_stream_consume(stream); core_pos_next(&lexer->pos); double_char: - core_stream_next_char(stream); + core_probe_stream_consume(stream); core_pos_next(&lexer->pos); once_char: - core_stream_next_char(stream); + core_probe_stream_consume(stream); core_pos_next(&lexer->pos); token->type = type; END: diff --git a/libs/lexer/tests/test_parse.c b/libs/lexer/tests/test_parse.c index 2e47d95..5f5bfe4 100644 --- a/libs/lexer/tests/test_parse.c +++ b/libs/lexer/tests/test_parse.c @@ -8,10 +8,10 @@ static inline void test_lexer_string(const char *input, token_type_t expected_type) { smcc_lexer_t lexer; lexer_tok_t token; - core_mem_stream_t stream; + core_mem_probe_stream_t stream; - lexer_init(&lexer, - core_mem_stream_init(&stream, input, strlen(input), false)); + lexer_init(&lexer, core_mem_probe_stream_init(&stream, input, strlen(input), + false)); lexer_get_token(&lexer, &token); TEST_CHECK(token.type == expected_type); @@ -167,4 +167,4 @@ TEST_LIST = {{"operators", test_operators}, {"literals", test_literals}, {"edge_cases", test_edge_cases}, // {"error_handling", test_error_handling}, - {NULL, NULL}}; \ No newline at end of file + {NULL, NULL}}; diff --git a/libs/lexer/tests/test_run.c b/libs/lexer/tests/test_run.c index 631ddbd..a2c38d8 100644 --- a/libs/lexer/tests/test_run.c +++ b/libs/lexer/tests/test_run.c @@ -63,9 +63,9 @@ int main(int argc, char *argv[]) { } smcc_lexer_t lexer; - core_mem_stream_t mem_stream = {0}; - core_stream_t *stream = - core_mem_stream_init(&mem_stream, buffer, fsize, false); + core_mem_probe_stream_t mem_stream = {0}; + core_probe_stream_t *stream = + core_mem_probe_stream_init(&mem_stream, buffer, fsize, false); Assert(stream != null); cstring_clear(&stream->name); cstring_push_cstr(&stream->name, file_name, strlen(file_name)); @@ -78,7 +78,7 @@ int main(int argc, char *argv[]) { break; } LOG_DEBUG("token `%s` at %s:%u:%u", get_tok_name(tok.type), - tok.loc.name, tok.loc.line, tok.loc.col); + cstring_as_cstr(&tok.loc.name), tok.loc.line, tok.loc.col); Assert(tok.loc.offset <= fsize); // LOG_DEBUG("%s", tok.val.str); // printf("line: %d, column: %d, type: %3d, typename: %s\n", diff --git a/runtime/libcore/include/core_str.h b/runtime/libcore/include/core_str.h index 86ec4c2..29f8eb3 100644 --- a/runtime/libcore/include/core_str.h +++ b/runtime/libcore/include/core_str.h @@ -57,7 +57,7 @@ static inline void cstring_free(cstring_t *str) { if (str == null) { return; } - if (str->data != null && str->cap != 0) { + if (str->cap != 0 && str->data != null) { smcc_free(str->data); str->data = null; } diff --git a/runtime/libcore/include/core_stream.h b/runtime/libcore/include/core_stream.h index 7806c51..be74ad6 100644 --- a/runtime/libcore/include/core_stream.h +++ b/runtime/libcore/include/core_stream.h @@ -1,67 +1,120 @@ -#ifndef __SMCC_CORE_STREAM_H__ -#define __SMCC_CORE_STREAM_H__ +#ifndef __SMCC_CORE_PROBE_STREAM_H__ +#define __SMCC_CORE_PROBE_STREAM_H__ #include "core_impl.h" #include "core_macro.h" #include "core_mem.h" #include "core_str.h" -struct core_stream; -typedef struct core_stream core_stream_t; +struct core_probe_stream; +typedef struct core_probe_stream core_probe_stream_t; #define core_stream_eof (-1) -struct core_stream { +/** + * @brief 带探针的流接口 + * + * 这个流提供了双指针机制:当前读取位置(头指针)和探针位置(尾指针)。 + * 尾指针只能向前移动,用于查看而不消费。 + * 头指针可以前进或单次后退,但不能一直后退到尾指针后面。 + */ +struct core_probe_stream { cstring_t name; + /// @brief 消费头指针处的字符(移动头指针) + int (*consume)(core_probe_stream_t *stream); + + /// @brief 查看当前探针位置的字符,不移动任何指针 + int (*peek)(core_probe_stream_t *stream); + + /// @brief 移动探针位置并返回字符 + int (*next)(core_probe_stream_t *stream); + + /// @brief 移动头指针到探针位置 + void (*sync)(core_probe_stream_t *stream); + + /// @brief 重置探针位置到头指针位置 + void (*reset)(core_probe_stream_t *stream); + + /// @brief 回退一个字符(单次后退,头指针后退一步) + cbool (*back)(core_probe_stream_t *stream); + /// @brief 读取指定数量的字符到缓冲区 - usize (*read_buf)(core_stream_t *stream, char *buffer, usize count); + usize (*read_buf)(core_probe_stream_t *stream, char *buffer, usize count); - /// @brief 获取下一个字符 - int (*peek_char)(core_stream_t *stream); + /// @brief 检查是否到达流末尾 + cbool (*is_at_end)(core_probe_stream_t *stream); - /// @brief 重置字符流位置 - void (*reset_char)(core_stream_t *stream); - - /// @brief 读取并消费下一个字符(移动流位置) - int (*next_char)(core_stream_t *stream); - - /// @brief 释放资源 - void (*free_stream)(core_stream_t *steam); + /// @brief 销毁流并释放资源 + void (*destroy)(core_probe_stream_t *stream); }; -static inline usize core_stream_read_buf(core_stream_t *self, char *buffer, - usize count) { +static inline int core_probe_stream_consume(core_probe_stream_t *self) { + return self->consume(self); +} + +static inline int core_probe_stream_peek(core_probe_stream_t *self) { + return self->peek(self); +} + +static inline int core_probe_stream_next(core_probe_stream_t *self) { + return self->next(self); +} + +static inline void core_probe_stream_sync(core_probe_stream_t *self) { + self->sync(self); +} + +static inline cbool core_probe_stream_back(core_probe_stream_t *self) { + return self->back(self); +} + +static inline void core_probe_stream_reset(core_probe_stream_t *self) { + self->reset(self); +} + +static inline usize core_probe_stream_read_buf(core_probe_stream_t *self, + char *buffer, usize count) { return self->read_buf(self, buffer, count); } -static inline int core_stream_peek_char(core_stream_t *self) { - return self->peek_char(self); +static inline cbool core_probe_stream_is_at_end(core_probe_stream_t *self) { + return self->is_at_end(self); } -static inline void core_stream_reset_char(core_stream_t *self) { - self->reset_char(self); +static inline cbool core_probe_stream_has_more(core_probe_stream_t *self) { + return !self->is_at_end(self); } -static inline int core_stream_next_char(core_stream_t *self) { - return self->next_char(self); +static inline void core_probe_stream_destroy(core_probe_stream_t *self) { + self->destroy(self); } -static inline void core_stream_free_stream(core_stream_t *self) { - self->free_stream(self); -} - -#ifndef __SMCC_CORE_NO_MEM_STREAM__ -typedef struct core_mem_stream { - core_stream_t stream; +#ifndef __SMCC_CORE_NO_MEM_PROBE_STREAM__ +/** + * @brief 内存探针流结构 + */ +typedef struct core_mem_probe_stream { + core_probe_stream_t stream; const char *data; usize data_length; - usize curr_pos; - usize peek_pos; - cbool owned; -} core_mem_stream_t; -core_stream_t *core_mem_stream_init(core_mem_stream_t *stream, const char *data, - usize length, cbool need_copy); + usize curr_pos; // 当前读取位置 + usize probe_pos; // 探针位置(用于peek) + cbool owned; // 是否拥有数据(需要释放) +} core_mem_probe_stream_t; + +/** + * @brief 初始化内存探针流 + * + * @param stream 流结构指针 + * @param data 数据指针 + * @param length 数据长度 + * @param need_copy 是否需要复制数据 + * @return core_probe_stream_t* 成功返回流指针,失败返回NULL + */ +core_probe_stream_t *core_mem_probe_stream_init(core_mem_probe_stream_t *stream, + const char *data, usize length, + cbool need_copy); #endif -#endif /* __SMCC_CORE_STREAM_H__ */ +#endif /* __SMCC_CORE_PROBE_STREAM_H__ */ diff --git a/runtime/libcore/include/core_vec.h b/runtime/libcore/include/core_vec.h index 7aa5844..6c17d02 100644 --- a/runtime/libcore/include/core_vec.h +++ b/runtime/libcore/include/core_vec.h @@ -70,6 +70,7 @@ (vec).cap = cap; \ (vec).data = data; \ } \ + Assert((vec).data != null); \ (vec).data[(vec).size++] = value; \ } while (0) diff --git a/runtime/libcore/src/stream.c b/runtime/libcore/src/stream.c index 0cdd2b2..70c9a66 100644 --- a/runtime/libcore/src/stream.c +++ b/runtime/libcore/src/stream.c @@ -1,10 +1,82 @@ #include #include -// 内存流的具体实现结构 -static usize read_buf(core_stream_t *_stream, char *buffer, usize count) { - Assert(buffer != null && buffer != null); - core_mem_stream_t *stream = (core_mem_stream_t *)_stream; +#ifndef __SMCC_CORE_NO_MEM_PROBE_STREAM__ + +static int mem_probe_stream_consume(core_probe_stream_t *_stream) { + Assert(_stream != null); + core_mem_probe_stream_t *stream = (core_mem_probe_stream_t *)_stream; + + if (stream->curr_pos >= stream->data_length) { + return core_stream_eof; + } + + unsigned char ch = stream->data[stream->curr_pos++]; + // 如果探针位置落后于当前读取位置,则更新探针位置 + if (stream->probe_pos < stream->curr_pos) { + stream->probe_pos = stream->curr_pos; + } + return (int)ch; +} + +static int mem_probe_stream_peek(core_probe_stream_t *_stream) { + Assert(_stream != null); + core_mem_probe_stream_t *stream = (core_mem_probe_stream_t *)_stream; + + if (stream->probe_pos >= stream->data_length) { + return core_stream_eof; + } + + // 只查看而不移动探针位置 + return (int)(unsigned char)stream->data[stream->probe_pos]; +} + +static int mem_probe_stream_next(core_probe_stream_t *_stream) { + Assert(_stream != null); + core_mem_probe_stream_t *stream = (core_mem_probe_stream_t *)_stream; + + if (stream->probe_pos >= stream->data_length) { + return core_stream_eof; + } + + // 返回探针位置的字符,并将探针位置向前移动 + int ch = (int)(unsigned char)stream->data[stream->probe_pos]; + stream->probe_pos++; + return ch; +} + +static void mem_probe_stream_sync(core_probe_stream_t *_stream) { + Assert(_stream != null); + core_mem_probe_stream_t *stream = (core_mem_probe_stream_t *)_stream; + + // 移动头指针到探针位置(消费已查看的字符) + if (stream->probe_pos > stream->curr_pos) { + stream->curr_pos = stream->probe_pos; + } +} + +static cbool mem_probe_stream_back(core_probe_stream_t *_stream) { + Assert(_stream != null); + core_mem_probe_stream_t *stream = (core_mem_probe_stream_t *)_stream; + + // 只能回退一个字符,且不能回退到探针位置之前 + if (stream->curr_pos == 0 || stream->curr_pos <= stream->probe_pos) { + return false; + } + + stream->curr_pos--; + return true; +} + +static usize mem_probe_stream_read_buf(core_probe_stream_t *_stream, char *buffer, + usize count) { + Assert(_stream != null); + core_mem_probe_stream_t *stream = (core_mem_probe_stream_t *)_stream; + + if (buffer == null) { + LOG_WARN("Buffer is null"); + return 0; + } usize remaining = stream->data_length - stream->curr_pos; usize to_read = (remaining < count) ? remaining : count; @@ -12,63 +84,48 @@ static usize read_buf(core_stream_t *_stream, char *buffer, usize count) { if (to_read > 0) { smcc_memcpy(buffer, stream->data + stream->curr_pos, to_read); stream->curr_pos += to_read; + // 更新探针位置 + if (stream->probe_pos < stream->curr_pos) { + stream->probe_pos = stream->curr_pos; + } } else { - LOG_WARN("Reading past end of stream " - "[maybe count is too large or negative?]"); + LOG_WARN("Reading past end of stream [maybe count is too large or " + "negative?]"); } return to_read; } -static int peek_char(core_stream_t *_stream) { +static void mem_probe_stream_reset(core_probe_stream_t *_stream) { Assert(_stream != null); - core_mem_stream_t *stream = (core_mem_stream_t *)_stream; + core_mem_probe_stream_t *stream = (core_mem_probe_stream_t *)_stream; - // 如果已经到达末尾,返回EOF - if (stream->peek_pos >= stream->data_length) { - return core_stream_eof; // EOF - } - - return (int)(unsigned char)stream->data[stream->peek_pos++]; + // 重置探针位置到头指针位置 + stream->probe_pos = stream->curr_pos; } -static int next_char(core_stream_t *_stream) { - Assert(_stream != NULL); - core_mem_stream_t *stream = (core_mem_stream_t *)_stream; - - // 如果已经到达末尾,返回EOF - if (stream->curr_pos >= stream->data_length) { - return core_stream_eof; // EOF - } - - unsigned char ch = stream->data[stream->curr_pos++]; - if (stream->peek_pos < stream->curr_pos) { - stream->peek_pos = stream->curr_pos; - } - return (int)ch; -} - -static void reset_char(core_stream_t *_stream) { - Assert(_stream != NULL); - core_mem_stream_t *stream = (core_mem_stream_t *)_stream; - - stream->peek_pos = stream->curr_pos; -} - -static void free_stream(core_stream_t *_stream) { +static cbool mem_probe_stream_is_at_end(core_probe_stream_t *_stream) { Assert(_stream != null); - core_mem_stream_t *stream = (core_mem_stream_t *)_stream; + core_mem_probe_stream_t *stream = (core_mem_probe_stream_t *)_stream; + + return stream->curr_pos >= stream->data_length; +} + +static void mem_probe_stream_destroy(core_probe_stream_t *_stream) { + Assert(_stream != null); + core_mem_probe_stream_t *stream = (core_mem_probe_stream_t *)_stream; - // FIXME maybe double free? cstring_free(&stream->stream.name); if (stream->owned) { smcc_free((void *)stream->data); + stream->data = null; } } -core_stream_t *core_mem_stream_init(core_mem_stream_t *stream, const char *data, - usize length, cbool need_copy) { +core_probe_stream_t *core_mem_probe_stream_init(core_mem_probe_stream_t *stream, + const char *data, usize length, + cbool need_copy) { if (stream == null || data == null) { LOG_ERROR("param error"); return null; @@ -94,15 +151,22 @@ core_stream_t *core_mem_stream_init(core_mem_stream_t *stream, const char *data, } stream->data_length = length; stream->curr_pos = 0; - stream->peek_pos = 0; + stream->probe_pos = 0; - stream->stream.name = cstring_from_cstr("mem_stream"); + stream->stream.name = cstring_from_cstr("mem_probe_stream"); - stream->stream.read_buf = read_buf; - stream->stream.peek_char = peek_char; - stream->stream.next_char = next_char; - stream->stream.reset_char = reset_char; - stream->stream.free_stream = free_stream; + // 设置函数指针 + stream->stream.consume = mem_probe_stream_consume; + stream->stream.peek = mem_probe_stream_peek; + stream->stream.next = mem_probe_stream_next; + stream->stream.sync = mem_probe_stream_sync; + stream->stream.back = mem_probe_stream_back; + stream->stream.read_buf = mem_probe_stream_read_buf; + stream->stream.reset = mem_probe_stream_reset; + stream->stream.is_at_end = mem_probe_stream_is_at_end; + stream->stream.destroy = mem_probe_stream_destroy; - return (void *)stream; + return (core_probe_stream_t *)stream; } + +#endif /* __SMCC_CORE_NO_MEM_PROBE_STREAM__ */ diff --git a/runtime/log/include/log.h b/runtime/log/include/log.h index 1efa031..80b2d6b 100644 --- a/runtime/log/include/log.h +++ b/runtime/log/include/log.h @@ -16,6 +16,16 @@ #define log_exit exit #endif +#ifdef __GNUC__ // GCC, Clang, ICC +#define __smcc_log_unreachable() (__builtin_unreachable()) +#elif defined _MSC_VER // MSVC +#define __smcc_log_unreachable() (__assume(false)) +#elif defined __SMCC_BUILT_IN__ // The SMCC (my compiler) +#define __smcc_log_unreachable() (__smcc_builtin_unreachable()) +#else +#define __smcc_log_unreachable() +#endif + #ifndef log_snprintf #define log_snprintf(...) #warning "log_snprintf not defined" @@ -172,6 +182,8 @@ void logger_destroy(logger_t *logger); do { \ if (!(cond)) { \ LOG_FATAL(__VA_ARGS__); \ + log_exit(1); \ + __smcc_log_unreachable(); \ } \ } while (0)