feat 重构stream流API并适配lex_parse和lexer

This commit is contained in:
zzy
2025-12-08 23:04:11 +08:00
parent 1ab07a5815
commit 36bff64a91
17 changed files with 402 additions and 244 deletions

View File

@@ -21,7 +21,7 @@ typedef struct lexer_token {
* 封装词法分析所需的状态信息和缓冲区管理
*/
typedef struct cc_lexer {
core_stream_t *stream;
core_probe_stream_t *stream;
core_pos_t pos;
} smcc_lexer_t;
@@ -30,7 +30,7 @@ typedef struct cc_lexer {
* @param[out] lexer 要初始化的词法分析器实例
* @param[in] stream 输入流对象指针
*/
void lexer_init(smcc_lexer_t *lexer, core_stream_t *stream);
void lexer_init(smcc_lexer_t *lexer, core_probe_stream_t *stream);
/**
* @brief 获取原始token

View File

@@ -75,7 +75,7 @@ static inline int keyword_cmp(const char *name, int len) {
return -1; // Not a keyword.
}
void lexer_init(smcc_lexer_t *lexer, core_stream_t *stream) {
void lexer_init(smcc_lexer_t *lexer, core_probe_stream_t *stream) {
lexer->stream = stream;
lexer->pos = core_pos_init();
// FIXME
@@ -86,9 +86,9 @@ void lexer_init(smcc_lexer_t *lexer, core_stream_t *stream) {
static void parse_line(smcc_lexer_t *lexer, lexer_tok_t *token) {
token->loc = lexer->pos;
core_stream_t *stream = lexer->stream;
core_stream_reset_char(stream);
int ch = core_stream_peek_char(stream);
core_probe_stream_t *stream = lexer->stream;
core_probe_stream_reset(stream);
int ch = core_probe_stream_next(stream);
usize n;
cstring_t str = cstring_new();
@@ -104,7 +104,7 @@ static void parse_line(smcc_lexer_t *lexer, lexer_tok_t *token) {
const char line[] = "line";
for (int i = 0; i < (int)sizeof(line); i++) {
ch = core_stream_next_char(stream);
ch = core_probe_stream_consume(stream);
core_pos_next(&lexer->pos);
if (ch != line[i]) {
LEX_WARN("Maroc does not support in lexer rather in preprocessor, "
@@ -118,12 +118,12 @@ static void parse_line(smcc_lexer_t *lexer, lexer_tok_t *token) {
goto SKIP_LINE;
}
if (core_stream_next_char(stream) != ' ') {
if (core_probe_stream_consume(stream) != ' ') {
lex_parse_skip_line(lexer->stream, &lexer->pos);
token->loc.line = token->value.n;
}
if (core_stream_peek_char(stream) != '"') {
if (core_probe_stream_next(stream) != '"') {
LEX_ERROR("Invalid `#` line");
goto SKIP_LINE;
}
@@ -149,26 +149,26 @@ ERR:
void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
token->loc = lexer->pos;
token->type = TOKEN_UNKNOWN;
core_stream_t *stream = lexer->stream;
core_probe_stream_t *stream = lexer->stream;
core_stream_reset_char(stream);
core_probe_stream_reset(stream);
token_type_t type = TOKEN_UNKNOWN;
int ch = core_stream_peek_char(stream);
int ch = core_probe_stream_next(stream);
// once step
switch (ch) {
case '=':
switch (core_stream_peek_char(stream)) {
switch (core_probe_stream_next(stream)) {
case '=':
type = TOKEN_EQ;
goto double_char;
default:
core_stream_reset_char(stream), type = TOKEN_ASSIGN;
core_probe_stream_reset(stream), type = TOKEN_ASSIGN;
break;
}
break;
case '+':
switch (core_stream_peek_char(stream)) {
switch (core_probe_stream_next(stream)) {
case '+':
type = TOKEN_ADD_ADD;
goto double_char;
@@ -176,12 +176,12 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
type = TOKEN_ASSIGN_ADD;
goto double_char;
default:
core_stream_reset_char(stream), type = TOKEN_ADD;
core_probe_stream_reset(stream), type = TOKEN_ADD;
break;
}
break;
case '-':
switch (core_stream_peek_char(stream)) {
switch (core_probe_stream_next(stream)) {
case '-':
type = TOKEN_SUB_SUB;
goto double_char;
@@ -192,22 +192,22 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
type = TOKEN_DEREF;
goto double_char;
default:
core_stream_reset_char(stream), type = TOKEN_SUB;
core_probe_stream_reset(stream), type = TOKEN_SUB;
break;
}
break;
case '*':
switch (core_stream_peek_char(stream)) {
switch (core_probe_stream_next(stream)) {
case '=':
type = TOKEN_ASSIGN_MUL;
goto double_char;
default:
core_stream_reset_char(stream), type = TOKEN_MUL;
core_probe_stream_reset(stream), type = TOKEN_MUL;
break;
}
break;
case '/':
switch (core_stream_peek_char(stream)) {
switch (core_probe_stream_next(stream)) {
case '=':
type = TOKEN_ASSIGN_DIV;
goto double_char;
@@ -220,22 +220,22 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
token->type = TOKEN_BLOCK_COMMENT;
goto END;
default:
core_stream_reset_char(stream), type = TOKEN_DIV;
core_probe_stream_reset(stream), type = TOKEN_DIV;
break;
}
break;
case '%':
switch (core_stream_peek_char(stream)) {
switch (core_probe_stream_next(stream)) {
case '=':
type = TOKEN_ASSIGN_MOD;
goto double_char;
default:
core_stream_reset_char(stream), type = TOKEN_MOD;
core_probe_stream_reset(stream), type = TOKEN_MOD;
break;
}
break;
case '&':
switch (core_stream_peek_char(stream)) {
switch (core_probe_stream_next(stream)) {
case '&':
type = TOKEN_AND_AND;
goto double_char;
@@ -243,12 +243,12 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
type = TOKEN_ASSIGN_AND;
goto double_char;
default:
core_stream_reset_char(stream), type = TOKEN_AND;
core_probe_stream_reset(stream), type = TOKEN_AND;
break;
}
break;
case '|':
switch (core_stream_peek_char(stream)) {
switch (core_probe_stream_next(stream)) {
case '|':
type = TOKEN_OR_OR;
goto double_char;
@@ -256,27 +256,27 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
type = TOKEN_ASSIGN_OR;
goto double_char;
default:
core_stream_reset_char(stream), type = TOKEN_OR;
core_probe_stream_reset(stream), type = TOKEN_OR;
break;
}
break;
case '^':
switch (core_stream_peek_char(stream)) {
switch (core_probe_stream_next(stream)) {
case '=':
type = TOKEN_ASSIGN_XOR;
goto double_char;
default:
core_stream_reset_char(stream), type = TOKEN_XOR;
core_probe_stream_reset(stream), type = TOKEN_XOR;
break;
}
break;
case '<':
switch (core_stream_peek_char(stream)) {
switch (core_probe_stream_next(stream)) {
case '=':
type = TOKEN_LE;
goto double_char;
case '<': {
if (core_stream_peek_char(stream) == '=') {
if (core_probe_stream_next(stream) == '=') {
type = TOKEN_ASSIGN_L_SH;
goto triple_char;
} else {
@@ -286,17 +286,17 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
break;
}
default:
core_stream_reset_char(stream), type = TOKEN_LT;
core_probe_stream_reset(stream), type = TOKEN_LT;
break;
}
break;
case '>':
switch (core_stream_peek_char(stream)) {
switch (core_probe_stream_next(stream)) {
case '=':
type = TOKEN_GE;
goto double_char;
case '>': {
if (core_stream_peek_char(stream) == '=') {
if (core_probe_stream_next(stream) == '=') {
type = TOKEN_ASSIGN_R_SH;
goto triple_char;
} else {
@@ -306,7 +306,7 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
break;
}
default:
core_stream_reset_char(stream), type = TOKEN_GT;
core_probe_stream_reset(stream), type = TOKEN_GT;
break;
}
break;
@@ -314,12 +314,12 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
type = TOKEN_BIT_NOT;
break;
case '!':
switch (core_stream_peek_char(stream)) {
switch (core_probe_stream_next(stream)) {
case '=':
type = TOKEN_NEQ;
goto double_char;
default:
core_stream_reset_char(stream), type = TOKEN_NOT;
core_probe_stream_reset(stream), type = TOKEN_NOT;
break;
}
break;
@@ -351,8 +351,8 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
type = TOKEN_COLON;
break;
case '.':
if (core_stream_peek_char(stream) == '.' &&
core_stream_peek_char(stream) == '.') {
if (core_probe_stream_next(stream) == '.' &&
core_probe_stream_next(stream) == '.') {
type = TOKEN_ELLIPSIS;
goto triple_char;
}
@@ -452,13 +452,13 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
}
goto once_char;
triple_char:
core_stream_next_char(stream);
core_probe_stream_consume(stream);
core_pos_next(&lexer->pos);
double_char:
core_stream_next_char(stream);
core_probe_stream_consume(stream);
core_pos_next(&lexer->pos);
once_char:
core_stream_next_char(stream);
core_probe_stream_consume(stream);
core_pos_next(&lexer->pos);
token->type = type;
END:

View File

@@ -8,10 +8,10 @@ static inline void test_lexer_string(const char *input,
token_type_t expected_type) {
smcc_lexer_t lexer;
lexer_tok_t token;
core_mem_stream_t stream;
core_mem_probe_stream_t stream;
lexer_init(&lexer,
core_mem_stream_init(&stream, input, strlen(input), false));
lexer_init(&lexer, core_mem_probe_stream_init(&stream, input, strlen(input),
false));
lexer_get_token(&lexer, &token);
TEST_CHECK(token.type == expected_type);
@@ -167,4 +167,4 @@ TEST_LIST = {{"operators", test_operators},
{"literals", test_literals},
{"edge_cases", test_edge_cases},
// {"error_handling", test_error_handling},
{NULL, NULL}};
{NULL, NULL}};

View File

@@ -63,9 +63,9 @@ int main(int argc, char *argv[]) {
}
smcc_lexer_t lexer;
core_mem_stream_t mem_stream = {0};
core_stream_t *stream =
core_mem_stream_init(&mem_stream, buffer, fsize, false);
core_mem_probe_stream_t mem_stream = {0};
core_probe_stream_t *stream =
core_mem_probe_stream_init(&mem_stream, buffer, fsize, false);
Assert(stream != null);
cstring_clear(&stream->name);
cstring_push_cstr(&stream->name, file_name, strlen(file_name));
@@ -78,7 +78,7 @@ int main(int argc, char *argv[]) {
break;
}
LOG_DEBUG("token `%s` at %s:%u:%u", get_tok_name(tok.type),
tok.loc.name, tok.loc.line, tok.loc.col);
cstring_as_cstr(&tok.loc.name), tok.loc.line, tok.loc.col);
Assert(tok.loc.offset <= fsize);
// LOG_DEBUG("%s", tok.val.str);
// printf("line: %d, column: %d, type: %3d, typename: %s\n",