feat 重构stream流API并适配lex_parse和lexer

2025-12-08 23:04:11 +08:00
parent 1ab07a5815
commit 36bff64a91
17 changed files with 402 additions and 244 deletions
--- a/libs/lexer/src/lexer.c
+++ b/libs/lexer/src/lexer.c
@@ -75,7 +75,7 @@ static inline int keyword_cmp(const char *name, int len) {
    return -1; // Not a keyword.
 }

-void lexer_init(smcc_lexer_t *lexer, core_stream_t *stream) {
+void lexer_init(smcc_lexer_t *lexer, core_probe_stream_t *stream) {
    lexer->stream = stream;
    lexer->pos = core_pos_init();
    // FIXME
@@ -86,9 +86,9 @@ void lexer_init(smcc_lexer_t *lexer, core_stream_t *stream) {

 static void parse_line(smcc_lexer_t *lexer, lexer_tok_t *token) {
    token->loc = lexer->pos;
-    core_stream_t *stream = lexer->stream;
-    core_stream_reset_char(stream);
-    int ch = core_stream_peek_char(stream);
+    core_probe_stream_t *stream = lexer->stream;
+    core_probe_stream_reset(stream);
+    int ch = core_probe_stream_next(stream);

    usize n;
    cstring_t str = cstring_new();
@@ -104,7 +104,7 @@ static void parse_line(smcc_lexer_t *lexer, lexer_tok_t *token) {
    const char line[] = "line";

    for (int i = 0; i < (int)sizeof(line); i++) {
-        ch = core_stream_next_char(stream);
+        ch = core_probe_stream_consume(stream);
        core_pos_next(&lexer->pos);
        if (ch != line[i]) {
            LEX_WARN("Maroc does not support in lexer rather in preprocessor, "
@@ -118,12 +118,12 @@ static void parse_line(smcc_lexer_t *lexer, lexer_tok_t *token) {
        goto SKIP_LINE;
    }

-    if (core_stream_next_char(stream) != ' ') {
+    if (core_probe_stream_consume(stream) != ' ') {
        lex_parse_skip_line(lexer->stream, &lexer->pos);
        token->loc.line = token->value.n;
    }

-    if (core_stream_peek_char(stream) != '"') {
+    if (core_probe_stream_next(stream) != '"') {
        LEX_ERROR("Invalid `#` line");
        goto SKIP_LINE;
    }
@@ -149,26 +149,26 @@ ERR:
 void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
    token->loc = lexer->pos;
    token->type = TOKEN_UNKNOWN;
-    core_stream_t *stream = lexer->stream;
+    core_probe_stream_t *stream = lexer->stream;

-    core_stream_reset_char(stream);
+    core_probe_stream_reset(stream);
    token_type_t type = TOKEN_UNKNOWN;
-    int ch = core_stream_peek_char(stream);
+    int ch = core_probe_stream_next(stream);

    // once step
    switch (ch) {
    case '=':
-        switch (core_stream_peek_char(stream)) {
+        switch (core_probe_stream_next(stream)) {
        case '=':
            type = TOKEN_EQ;
            goto double_char;
        default:
-            core_stream_reset_char(stream), type = TOKEN_ASSIGN;
+            core_probe_stream_reset(stream), type = TOKEN_ASSIGN;
            break;
        }
        break;
    case '+':
-        switch (core_stream_peek_char(stream)) {
+        switch (core_probe_stream_next(stream)) {
        case '+':
            type = TOKEN_ADD_ADD;
            goto double_char;
@@ -176,12 +176,12 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
            type = TOKEN_ASSIGN_ADD;
            goto double_char;
        default:
-            core_stream_reset_char(stream), type = TOKEN_ADD;
+            core_probe_stream_reset(stream), type = TOKEN_ADD;
            break;
        }
        break;
    case '-':
-        switch (core_stream_peek_char(stream)) {
+        switch (core_probe_stream_next(stream)) {
        case '-':
            type = TOKEN_SUB_SUB;
            goto double_char;
@@ -192,22 +192,22 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
            type = TOKEN_DEREF;
            goto double_char;
        default:
-            core_stream_reset_char(stream), type = TOKEN_SUB;
+            core_probe_stream_reset(stream), type = TOKEN_SUB;
            break;
        }
        break;
    case '*':
-        switch (core_stream_peek_char(stream)) {
+        switch (core_probe_stream_next(stream)) {
        case '=':
            type = TOKEN_ASSIGN_MUL;
            goto double_char;
        default:
-            core_stream_reset_char(stream), type = TOKEN_MUL;
+            core_probe_stream_reset(stream), type = TOKEN_MUL;
            break;
        }
        break;
    case '/':
-        switch (core_stream_peek_char(stream)) {
+        switch (core_probe_stream_next(stream)) {
        case '=':
            type = TOKEN_ASSIGN_DIV;
            goto double_char;
@@ -220,22 +220,22 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
            token->type = TOKEN_BLOCK_COMMENT;
            goto END;
        default:
-            core_stream_reset_char(stream), type = TOKEN_DIV;
+            core_probe_stream_reset(stream), type = TOKEN_DIV;
            break;
        }
        break;
    case '%':
-        switch (core_stream_peek_char(stream)) {
+        switch (core_probe_stream_next(stream)) {
        case '=':
            type = TOKEN_ASSIGN_MOD;
            goto double_char;
        default:
-            core_stream_reset_char(stream), type = TOKEN_MOD;
+            core_probe_stream_reset(stream), type = TOKEN_MOD;
            break;
        }
        break;
    case '&':
-        switch (core_stream_peek_char(stream)) {
+        switch (core_probe_stream_next(stream)) {
        case '&':
            type = TOKEN_AND_AND;
            goto double_char;
@@ -243,12 +243,12 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
            type = TOKEN_ASSIGN_AND;
            goto double_char;
        default:
-            core_stream_reset_char(stream), type = TOKEN_AND;
+            core_probe_stream_reset(stream), type = TOKEN_AND;
            break;
        }
        break;
    case '|':
-        switch (core_stream_peek_char(stream)) {
+        switch (core_probe_stream_next(stream)) {
        case '|':
            type = TOKEN_OR_OR;
            goto double_char;
@@ -256,27 +256,27 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
            type = TOKEN_ASSIGN_OR;
            goto double_char;
        default:
-            core_stream_reset_char(stream), type = TOKEN_OR;
+            core_probe_stream_reset(stream), type = TOKEN_OR;
            break;
        }
        break;
    case '^':
-        switch (core_stream_peek_char(stream)) {
+        switch (core_probe_stream_next(stream)) {
        case '=':
            type = TOKEN_ASSIGN_XOR;
            goto double_char;
        default:
-            core_stream_reset_char(stream), type = TOKEN_XOR;
+            core_probe_stream_reset(stream), type = TOKEN_XOR;
            break;
        }
        break;
    case '<':
-        switch (core_stream_peek_char(stream)) {
+        switch (core_probe_stream_next(stream)) {
        case '=':
            type = TOKEN_LE;
            goto double_char;
        case '<': {
-            if (core_stream_peek_char(stream) == '=') {
+            if (core_probe_stream_next(stream) == '=') {
                type = TOKEN_ASSIGN_L_SH;
                goto triple_char;
            } else {
@@ -286,17 +286,17 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
            break;
        }
        default:
-            core_stream_reset_char(stream), type = TOKEN_LT;
+            core_probe_stream_reset(stream), type = TOKEN_LT;
            break;
        }
        break;
    case '>':
-        switch (core_stream_peek_char(stream)) {
+        switch (core_probe_stream_next(stream)) {
        case '=':
            type = TOKEN_GE;
            goto double_char;
        case '>': {
-            if (core_stream_peek_char(stream) == '=') {
+            if (core_probe_stream_next(stream) == '=') {
                type = TOKEN_ASSIGN_R_SH;
                goto triple_char;
            } else {
@@ -306,7 +306,7 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
            break;
        }
        default:
-            core_stream_reset_char(stream), type = TOKEN_GT;
+            core_probe_stream_reset(stream), type = TOKEN_GT;
            break;
        }
        break;
@@ -314,12 +314,12 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
        type = TOKEN_BIT_NOT;
        break;
    case '!':
-        switch (core_stream_peek_char(stream)) {
+        switch (core_probe_stream_next(stream)) {
        case '=':
            type = TOKEN_NEQ;
            goto double_char;
        default:
-            core_stream_reset_char(stream), type = TOKEN_NOT;
+            core_probe_stream_reset(stream), type = TOKEN_NOT;
            break;
        }
        break;
@@ -351,8 +351,8 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
        type = TOKEN_COLON;
        break;
    case '.':
-        if (core_stream_peek_char(stream) == '.' &&
-            core_stream_peek_char(stream) == '.') {
+        if (core_probe_stream_next(stream) == '.' &&
+            core_probe_stream_next(stream) == '.') {
            type = TOKEN_ELLIPSIS;
            goto triple_char;
        }
@@ -452,13 +452,13 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
    }
    goto once_char;
 triple_char:
-    core_stream_next_char(stream);
+    core_probe_stream_consume(stream);
    core_pos_next(&lexer->pos);
 double_char:
-    core_stream_next_char(stream);
+    core_probe_stream_consume(stream);
    core_pos_next(&lexer->pos);
 once_char:
-    core_stream_next_char(stream);
+    core_probe_stream_consume(stream);
    core_pos_next(&lexer->pos);
    token->type = type;
 END: