feat 重构stream流API并适配lex_parse和lexer

This commit is contained in:
zzy
2025-12-08 23:04:11 +08:00
parent 1ab07a5815
commit 36bff64a91
17 changed files with 402 additions and 244 deletions

View File

@@ -11,15 +11,16 @@ static inline cbool lex_parse_is_whitespace(int ch) {
return ch == ' ' || ch == '\t';
}
int lex_parse_char(core_stream_t *input, core_pos_t *pos);
cbool lex_parse_string(core_stream_t *input, core_pos_t *pos,
int lex_parse_char(core_probe_stream_t *input, core_pos_t *pos);
cbool lex_parse_string(core_probe_stream_t *input, core_pos_t *pos,
cstring_t *output);
cbool lex_parse_number(core_stream_t *input, core_pos_t *pos, usize *output);
cbool lex_parse_identifier(core_stream_t *input, core_pos_t *pos,
cbool lex_parse_number(core_probe_stream_t *input, core_pos_t *pos,
usize *output);
cbool lex_parse_identifier(core_probe_stream_t *input, core_pos_t *pos,
cstring_t *output);
void lex_parse_skip_endline(core_stream_t *input, core_pos_t *pos);
void lex_parse_skip_block_comment(core_stream_t *input, core_pos_t *pos);
void lex_parse_skip_line(core_stream_t *input, core_pos_t *pos);
void lex_parse_skip_whitespace(core_stream_t *input, core_pos_t *pos);
void lex_parse_skip_endline(core_probe_stream_t *input, core_pos_t *pos);
void lex_parse_skip_block_comment(core_probe_stream_t *input, core_pos_t *pos);
void lex_parse_skip_line(core_probe_stream_t *input, core_pos_t *pos);
void lex_parse_skip_whitespace(core_probe_stream_t *input, core_pos_t *pos);
#endif /* __SMCC_LEX_PARSER_H__ */

View File

@@ -1,18 +1,18 @@
#include <lex_parser.h>
void lex_parse_skip_endline(core_stream_t *input, core_pos_t *pos) {
void lex_parse_skip_endline(core_probe_stream_t *input, core_pos_t *pos) {
Assert(input != null && pos != null);
core_stream_reset_char(input);
int ch = core_stream_peek_char(input);
core_probe_stream_reset(input);
int ch = core_probe_stream_peek(input);
if (ch == '\r') {
core_stream_next_char(input);
ch = core_stream_peek_char(input);
core_probe_stream_consume(input);
ch = core_probe_stream_peek(input);
if (ch == '\n') {
core_stream_next_char(input);
core_probe_stream_consume(input);
}
core_pos_next_line(pos);
} else if (ch == '\n') {
core_stream_next_char(input);
core_probe_stream_consume(input);
core_pos_next_line(pos);
} else {
LOG_WARN("not a newline character");
@@ -57,12 +57,12 @@ static inline int got_simple_escape(int ch) {
/* clang-format on */
}
void lex_parse_skip_line(core_stream_t *input, core_pos_t *pos) {
core_stream_t *stream = input;
void lex_parse_skip_line(core_probe_stream_t *input, core_pos_t *pos) {
core_probe_stream_t *stream = input;
Assert(stream != null && pos != null);
core_stream_reset_char(stream);
core_probe_stream_reset(stream);
while (1) {
int ch = core_stream_peek_char(stream);
int ch = core_probe_stream_peek(stream);
if (ch == core_stream_eof) {
return;
@@ -73,29 +73,29 @@ void lex_parse_skip_line(core_stream_t *input, core_pos_t *pos) {
lex_parse_skip_endline(stream, pos);
return;
} else {
core_stream_next_char(stream);
core_probe_stream_consume(stream);
core_pos_next(pos);
}
}
}
void lex_parse_skip_block_comment(core_stream_t *input, core_pos_t *pos) {
core_stream_t *stream = input;
void lex_parse_skip_block_comment(core_probe_stream_t *input, core_pos_t *pos) {
core_probe_stream_t *stream = input;
Assert(stream != null && pos != null);
int ch;
core_stream_reset_char(stream);
ch = core_stream_next_char(stream);
core_probe_stream_reset(stream);
ch = core_probe_stream_consume(stream);
core_pos_next(pos);
// FIXME Assertion
Assert(ch == '/');
ch = core_stream_next_char(stream);
ch = core_probe_stream_consume(stream);
core_pos_next(pos);
Assert(ch == '*');
// all ready match `/*`
while (1) {
core_stream_reset_char(stream);
ch = core_stream_peek_char(stream);
core_probe_stream_reset(stream);
ch = core_probe_stream_peek(stream);
if (ch == core_stream_eof) {
LOG_WARN("Unterminated block comment");
@@ -106,12 +106,12 @@ void lex_parse_skip_block_comment(core_stream_t *input, core_pos_t *pos) {
lex_parse_skip_endline(stream, pos);
continue;
}
core_stream_next_char(stream);
core_probe_stream_consume(stream);
core_pos_next(pos);
if (ch == '*') {
ch = core_stream_peek_char(stream);
ch = core_probe_stream_peek(stream);
if (ch == '/') {
core_stream_next_char(stream);
core_probe_stream_consume(stream);
core_pos_next(pos);
return;
}
@@ -119,35 +119,35 @@ void lex_parse_skip_block_comment(core_stream_t *input, core_pos_t *pos) {
}
}
void lex_parse_skip_whitespace(core_stream_t *input, core_pos_t *pos) {
core_stream_t *stream = input;
void lex_parse_skip_whitespace(core_probe_stream_t *input, core_pos_t *pos) {
core_probe_stream_t *stream = input;
Assert(stream != null && pos != null);
core_stream_reset_char(stream);
core_probe_stream_reset(stream);
while (1) {
int ch = core_stream_peek_char(stream);
int ch = core_probe_stream_peek(stream);
if (!lex_parse_is_whitespace(ch)) {
return;
}
core_stream_next_char(stream);
core_probe_stream_consume(stream);
core_pos_next(pos);
}
}
static inline cbool _lex_parse_uint(core_stream_t *input, core_pos_t *pos,
static inline cbool _lex_parse_uint(core_probe_stream_t *input, core_pos_t *pos,
int base, usize *output) {
Assert(input != null && pos != null);
if (input == null || pos == null) {
return false;
}
Assert(base == 2 || base == 8 || base == 10 || base == 16);
core_stream_reset_char(input);
core_probe_stream_reset(input);
int ch, tmp;
usize n = 0;
usize offset = pos->offset;
while (1) {
ch = core_stream_peek_char(input);
ch = core_probe_stream_peek(input);
if (ch == core_stream_eof) {
break;
@@ -166,7 +166,7 @@ static inline cbool _lex_parse_uint(core_stream_t *input, core_pos_t *pos,
return false;
}
core_stream_next_char(input);
core_probe_stream_consume(input);
core_pos_next(pos);
n = n * base + tmp;
// TODO number overflow
@@ -187,11 +187,11 @@ static inline cbool _lex_parse_uint(core_stream_t *input, core_pos_t *pos,
* @return int
* https://cppreference.cn/w/c/language/character_constant
*/
int lex_parse_char(core_stream_t *input, core_pos_t *pos) {
core_stream_t *stream = input;
int lex_parse_char(core_probe_stream_t *input, core_pos_t *pos) {
core_probe_stream_t *stream = input;
Assert(stream != null && pos != null);
core_stream_reset_char(stream);
int ch = core_stream_peek_char(stream);
core_probe_stream_reset(stream);
int ch = core_probe_stream_peek(stream);
int ret = core_stream_eof;
if (ch == core_stream_eof) {
@@ -201,17 +201,17 @@ int lex_parse_char(core_stream_t *input, core_pos_t *pos) {
LOG_WARN("Unexpected character '%c' at begin", ch);
goto ERR;
}
core_stream_next_char(stream);
core_probe_stream_consume(stream);
core_pos_next(pos);
ch = core_stream_next_char(stream);
ch = core_probe_stream_consume(stream);
core_pos_next(pos);
if (ch == core_stream_eof) {
LOG_WARN("Unexpected EOF at middle");
goto ERR;
} else if (ch == '\\') {
ch = core_stream_next_char(stream);
ch = core_probe_stream_consume(stream);
core_pos_next(pos);
if (ch == '0') {
// 数字转义序列
@@ -237,7 +237,7 @@ int lex_parse_char(core_stream_t *input, core_pos_t *pos) {
} else {
ret = ch;
}
if ((ch = core_stream_next_char(stream)) != '\'') {
if ((ch = core_probe_stream_consume(stream)) != '\'') {
LOG_ERROR("Unclosed character literal '%c' at end, expect `'`", ch);
core_pos_next(pos);
goto ERR;
@@ -257,12 +257,12 @@ ERR:
* @return cbool
* https://cppreference.cn/w/c/language/string_literal
*/
cbool lex_parse_string(core_stream_t *input, core_pos_t *pos,
cbool lex_parse_string(core_probe_stream_t *input, core_pos_t *pos,
cstring_t *output) {
core_stream_t *stream = input;
core_probe_stream_t *stream = input;
Assert(stream != null && pos != null && output != null);
core_stream_reset_char(stream);
int ch = core_stream_peek_char(stream);
core_probe_stream_reset(stream);
int ch = core_probe_stream_peek(stream);
Assert(cstring_is_empty(output));
if (ch == core_stream_eof) {
@@ -272,12 +272,12 @@ cbool lex_parse_string(core_stream_t *input, core_pos_t *pos,
LOG_WARN("Unexpected character '%c' at begin", ch);
goto ERR;
}
core_stream_next_char(stream);
core_probe_stream_consume(stream);
core_pos_next(pos);
cstring_t str = cstring_from_cstr("");
while (1) {
ch = core_stream_peek_char(stream);
ch = core_probe_stream_peek(stream);
if (ch == core_stream_eof) {
LOG_ERROR("Unexpected EOF at string literal");
@@ -287,8 +287,8 @@ cbool lex_parse_string(core_stream_t *input, core_pos_t *pos,
goto ERR;
} else if (ch == '\\') {
// TODO bad practice and maybe bugs here
core_stream_next_char(stream);
ch = core_stream_next_char(stream);
core_probe_stream_consume(stream);
ch = core_probe_stream_consume(stream);
int val = got_simple_escape(ch);
if (val == -1) {
LOG_ERROR("Invalid escape character it is \\%c [%d]", ch, ch);
@@ -297,12 +297,12 @@ cbool lex_parse_string(core_stream_t *input, core_pos_t *pos,
continue;
}
} else if (ch == '"') {
core_stream_next_char(stream);
core_probe_stream_consume(stream);
core_pos_next(pos);
break;
}
core_stream_next_char(stream);
core_probe_stream_consume(stream);
core_pos_next(pos);
cstring_push(&str, ch);
}
@@ -323,48 +323,72 @@ ERR:
* @return cbool
* https://cppreference.cn/w/c/language/integer_constant
*/
cbool lex_parse_number(core_stream_t *input, core_pos_t *pos, usize *output) {
core_stream_t *stream = input;
cbool lex_parse_number(core_probe_stream_t *input, core_pos_t *pos,
usize *output) {
core_probe_stream_t *stream = input;
Assert(stream != null && pos != null && output != null);
core_stream_reset_char(stream);
int ch = core_stream_peek_char(stream);
int base = 0;
core_probe_stream_reset(stream);
int ch = core_probe_stream_peek(stream);
int base = 10; // 默认十进制
if (ch == core_stream_eof) {
LOG_WARN("Unexpected EOF at begin");
goto ERR;
} else if (ch == '0') {
ch = core_stream_peek_char(stream);
}
if (ch == '0') {
// 消费 '0'
core_probe_stream_consume(stream);
core_pos_next(pos);
// 查看下一个字符
ch = core_probe_stream_peek(stream);
if (ch == 'x' || ch == 'X') {
// 十六进制
base = 16;
core_stream_next_char(stream);
core_pos_next(pos);
core_stream_next_char(stream);
core_probe_stream_consume(stream);
core_pos_next(pos);
} else if (ch == 'b' || ch == 'B') {
// FIXME C23 external integer base
// 二进制 (C23扩展)
base = 2;
core_stream_next_char(stream);
core_pos_next(pos);
core_stream_next_char(stream);
core_probe_stream_consume(stream);
core_pos_next(pos);
} else if (ch >= '0' && ch <= '7') {
// 八进制
base = 8;
core_stream_next_char(stream);
core_pos_next(pos);
} else if (ch == '9' || ch == '8') {
// 不消费,数字将由 _lex_parse_uint 处理
} else if (ch == '8' || ch == '9') {
LOG_ERROR("Invalid digit '%d' in octal literal", ch);
return false;
} else {
base = 10;
// 只是0十进制
*output = 0;
return true;
}
} else {
} else if (ch >= '1' && ch <= '9') {
// 十进制,不消费,由 _lex_parse_uint 处理
base = 10;
} else {
// 无效的数字
return false;
}
// 解析整数部分
core_stream_reset_char(stream);
core_probe_stream_reset(stream);
usize n;
if (_lex_parse_uint(stream, pos, base, &n) == false) {
// 如果没有匹配任何数字,但输入是 '0',已经处理过了
// 对于十进制数字,至少应该有一个数字
if (base == 10) {
// 单个数字的情况,例如 "1"
// 我们需要消费这个数字并返回它的值
if (ch >= '1' && ch <= '9') {
core_probe_stream_consume(stream);
core_pos_next(pos);
*output = ch - '0';
return true;
}
}
return false;
}
*output = n;
@@ -382,13 +406,13 @@ ERR:
* @return cbool
* https://cppreference.cn/w/c/language/identifier
*/
cbool lex_parse_identifier(core_stream_t *input, core_pos_t *pos,
cbool lex_parse_identifier(core_probe_stream_t *input, core_pos_t *pos,
cstring_t *output) {
Assert(input != null && pos != null && output != null);
Assert(cstring_is_empty(output));
core_stream_t *stream = input;
core_stream_reset_char(stream);
int ch = core_stream_peek_char(stream);
core_probe_stream_t *stream = input;
core_probe_stream_reset(stream);
int ch = core_probe_stream_peek(stream);
if (ch == core_stream_eof) {
LOG_WARN("Unexpected EOF at begin");
@@ -396,9 +420,9 @@ cbool lex_parse_identifier(core_stream_t *input, core_pos_t *pos,
(ch >= 'A' && ch <= 'Z')) {
while (1) {
cstring_push(output, ch);
core_stream_next_char(stream);
core_probe_stream_consume(stream);
core_pos_next(pos);
ch = core_stream_peek_char(stream);
ch = core_probe_stream_peek(stream);
if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') ||
(ch == '_') || (ch >= '0' && ch <= '9')) {
continue;

View File

@@ -5,9 +5,9 @@
cbool check_char(const char *str, int expect, int *output) {
log_set_level(&__default_logger_root, 0);
core_pos_t pos = core_pos_init();
core_mem_stream_t mem_stream;
core_stream_t *stream =
core_mem_stream_init(&mem_stream, str, smcc_strlen(str), false);
core_mem_probe_stream_t mem_stream;
core_probe_stream_t *stream =
core_mem_probe_stream_init(&mem_stream, str, smcc_strlen(str), false);
*output = lex_parse_char(stream, &pos);
return *output == expect;
}
@@ -57,4 +57,4 @@ TEST_LIST = {
{"test_escape_char", test_escape_char},
{"test_invalid_char", test_invalid_char},
{NULL, NULL},
};
};

View File

@@ -5,9 +5,9 @@
cbool check_identifier(const char *str, const char *expect, cstring_t *output) {
log_set_level(&__default_logger_root, 0);
core_pos_t pos = core_pos_init();
core_mem_stream_t mem_stream;
core_stream_t *stream =
core_mem_stream_init(&mem_stream, str, smcc_strlen(str), false);
core_mem_probe_stream_t mem_stream;
core_probe_stream_t *stream =
core_mem_probe_stream_init(&mem_stream, str, smcc_strlen(str), false);
cbool ret = lex_parse_identifier(stream, &pos, output);
if (ret && expect) {

View File

@@ -4,9 +4,9 @@ cbool check(const char *str, usize expect, usize *output) {
// TODO maybe have other logger
log_set_level(&__default_logger_root, 0);
core_pos_t pos = core_pos_init();
core_mem_stream_t mem_stream;
core_stream_t *stream =
core_mem_stream_init(&mem_stream, str, smcc_strlen(str), false);
core_mem_probe_stream_t mem_stream;
core_probe_stream_t *stream =
core_mem_probe_stream_init(&mem_stream, str, smcc_strlen(str), false);
return lex_parse_number(stream, &pos, output);
}
@@ -16,6 +16,7 @@ cbool check(const char *str, usize expect, usize *output) {
cbool ret = check(str, expect, &_output); \
TEST_CHECK(ret == true); \
TEST_CHECK(_output == expect); \
TEST_MSG("Produced: %llu", _output); \
} while (0)
#define CHECK_INVALID(str) \

View File

@@ -5,9 +5,9 @@
void check_skip_block_comment(const char *str, const char *expect_remaining) {
log_set_level(&__default_logger_root, 0);
core_pos_t pos = core_pos_init();
core_mem_stream_t mem_stream;
core_stream_t *stream =
core_mem_stream_init(&mem_stream, str, smcc_strlen(str), false);
core_mem_probe_stream_t mem_stream;
core_probe_stream_t *stream =
core_mem_probe_stream_init(&mem_stream, str, smcc_strlen(str), false);
lex_parse_skip_block_comment(stream, &pos);
@@ -15,7 +15,8 @@ void check_skip_block_comment(const char *str, const char *expect_remaining) {
char buffer[256] = {0};
int i = 0;
int ch;
while ((ch = core_stream_next_char(stream)) != core_stream_eof && i < 255) {
while ((ch = core_probe_stream_consume(stream)) != core_stream_eof &&
i < 255) {
buffer[i++] = (char)ch;
}

View File

@@ -5,9 +5,9 @@
void check_skip_line(const char *str, const char *expect_remaining) {
log_set_level(&__default_logger_root, 0);
core_pos_t pos = core_pos_init();
core_mem_stream_t mem_stream;
core_stream_t *stream =
core_mem_stream_init(&mem_stream, str, smcc_strlen(str), false);
core_mem_probe_stream_t mem_stream;
core_probe_stream_t *stream =
core_mem_probe_stream_init(&mem_stream, str, smcc_strlen(str), false);
lex_parse_skip_line(stream, &pos);
@@ -15,7 +15,8 @@ void check_skip_line(const char *str, const char *expect_remaining) {
char buffer[256] = {0};
int i = 0;
int ch;
while ((ch = core_stream_next_char(stream)) != core_stream_eof && i < 255) {
while ((ch = core_probe_stream_consume(stream)) != core_stream_eof &&
i < 255) {
buffer[i++] = (char)ch;
}

View File

@@ -5,9 +5,9 @@
cbool check_string(const char *str, const char *expect, cstring_t *output) {
log_set_level(&__default_logger_root, 0);
core_pos_t pos = core_pos_init();
core_mem_stream_t mem_stream;
core_stream_t *stream =
core_mem_stream_init(&mem_stream, str, smcc_strlen(str), false);
core_mem_probe_stream_t mem_stream;
core_probe_stream_t *stream =
core_mem_probe_stream_init(&mem_stream, str, smcc_strlen(str), false);
cbool ret = lex_parse_string(stream, &pos, output);
if (ret && expect) {