refactor: 重构前端代码并添加日志功能

- 重命名和重构了多个文件，包括 lexer、parser 和 AST 相关代码 - 添加了日志功能，使用 LOG_* 宏替代原有的 error 和 warn 函数 - 优化了错误处理和内存分配方式 - 调整了代码结构，提高了模块化和可读性
2025-03-19 12:22:55 +08:00
parent 172d72b0a0
commit 05c637e594
76 changed files with 1479 additions and 310 deletions
--- a/ccompiler/frontend/lexer/lexer.c
+++ b/ccompiler/frontend/lexer/lexer.c
@ -26,8 +26,8 @@ the distribution and installation instructions.
 Chris Fraser / cwf@aya.yale.edu
 David Hanson / drh@drhanson.net
 */
-#define FRONTEND_IMPLEMENTATION
-#include "../frontend.h"
+#include <lib/core.h>
+#include "lexer_log.h"
 #include "token.h"
 #include "lexer.h"

@ -74,8 +74,9 @@ static inline int keyword_cmp(const char* name, int len) {
    return -1; // Not a keyword.
 }

-void init_lexer(lexer_t* lexer, const char* file_name, void* stream, lexer_sread_fn sread)
-{
+void init_lexer(lexer_t* lexer, const char* file_name, void* stream, lexer_sread_fn sread) {
+    init_lib_core();
+
    lexer->cur_ptr = lexer->end_ptr = (unsigned char*)&(lexer->buffer);
    lexer->index = 1;
    lexer->line = 1;
@ -96,10 +97,10 @@ static void flush_buffer(lexer_t* lexer) {
    lexer->cur_ptr = (unsigned char*)lexer->buffer;

    int read_size = LEXER_BUFFER_SIZE - num;
-    // TODO size_t to int maybe lose precision
+    // TODO rt_size_t to int maybe lose precision
    int got_size = lexer->sread(lexer->buffer + num, read_size, 1, read_size, lexer->stream);
    if (got_size < 0) {
-        error("lexer read error");
+        LEX_ERROR("lexer read error");
    } else if (got_size < read_size) {
        lexer->end_ptr += got_size;
        lexer->end_ptr[0] = '\0'; // EOF
@ -107,7 +108,7 @@ static void flush_buffer(lexer_t* lexer) {
    } else if (got_size == read_size) {
        lexer->end_ptr += got_size;
    } else {
-        error("lexer read error imposible got_size > read_size maybe overflow?");
+        LEX_ERROR("lexer read error imposible got_size > read_size maybe overflow?");
    }
 }

@ -153,8 +154,10 @@ static char got_slash(unsigned char* peek) {
        case 'r': return '\r';
        case 't': return '\t';
        case 'v': return '\v';
-        default: error("Unknown escape character"); 
+        default: break;
    }
+    LEX_ERROR("Unknown escape character");
+    return -1;
 }

 static void parse_char_literal(lexer_t* lexer, tok_t* token) {
@ -168,7 +171,7 @@ static void parse_char_literal(lexer_t* lexer, tok_t* token) {
        val = *peek++;
    }

-    if (*peek++ != '\'') error("Unclosed character literal");
+    if (*peek++ != '\'') LEX_ERROR("Unclosed character literal");
    token->val.ch = val;
    lexer->cur_ptr = peek;
    token->val.have = 1;
@ -178,7 +181,7 @@ static void parse_char_literal(lexer_t* lexer, tok_t* token) {
 static void parse_string_literal(lexer_t* lexer, tok_t* token) {
    unsigned char* peek = lexer->cur_ptr + 1;
    // TODO string literal size check
-    char* dest = token->val.str = xmalloc(LEXER_MAX_TOKEN_SIZE + 1);
+    char* dest = token->val.str = rt._malloc(LEXER_MAX_TOKEN_SIZE + 1);
    int len = 0;

    while (*peek != '"') {
@ -189,7 +192,7 @@ static void parse_string_literal(lexer_t* lexer, tok_t* token) {
            *peek = got_slash(peek);
        }
        
-        if (len >= LEXER_MAX_TOKEN_SIZE) error("String too long");
+        if (len >= LEXER_MAX_TOKEN_SIZE) LEX_ERROR("String too long");
        dest[len++] = *peek++;
    }
    dest[len] = '\0';
@ -431,7 +434,7 @@ void get_token(lexer_t* lexer, tok_t* token) {
        lexer->line++;
        tok = TOKEN_FLUSH; break;
    case '#':
-        warn("TODO: #define\n");
+        LEX_WARN("Marroc does not support in lexer rather in preprocessor, it will be ignored");
        goto_newline(lexer);
        tok = TOKEN_FLUSH;
        goto END;
@ -458,14 +461,14 @@ void get_token(lexer_t* lexer, tok_t* token) {
    case '_':
        // TOKEN_IDENT
        if ((*peek == 'L' && *peek == '\'') || (*peek == 'L' && *peek == '"')) {
-            error("unsupport wide-character char literal by `L` format");
+            LEX_ERROR("unsupport wide-character char literal by `L` format");
        }
        while (1) {
            if (peek == lexer->end_ptr) {
-                error("unsupport outof 64 length identifier");
+                LEX_ERROR("unsupport outof 64 length identifier");
            }
            if ((*peek >= 'a' && *peek <= 'z') || (*peek >= 'A' && *peek <= 'Z') ||
-            (*peek == '_') || (*peek >= '0' && *peek <= '9')) {
+                    (*peek == '_') || (*peek >= '0' && *peek <= '9')) {
                peek++;
                continue;
            }
@ -475,7 +478,7 @@ void get_token(lexer_t* lexer, tok_t* token) {
        int res = keyword_cmp((const char*)lexer->cur_ptr, peek - (lexer->cur_ptr));
        if (res == -1) {
            int strlen = peek - lexer->cur_ptr;
-            unsigned char* str = xmalloc(strlen + 1);
+            unsigned char* str = rt._malloc(strlen + 1);
            constant.have = 1;
            constant.str = (char*)str;
            for (int i = 0; i < strlen; i++) {
@ -489,7 +492,7 @@ void get_token(lexer_t* lexer, tok_t* token) {
            tok = keywords[res].tok; break;
        }
    default:
-        error("unsupport char in sourse code `%c`", *(lexer->cur_ptr));
+        LEX_ERROR("unsupport char in sourse code `%c`", *(lexer->cur_ptr));
        break;
    }

@ -497,6 +500,7 @@ void get_token(lexer_t* lexer, tok_t* token) {
 END:
    token->val = constant;
    token->type = tok;
+    LEX_DEBUG("get token `%s` (ch: %c, int: %d)", get_tok_name(token->type), token->val.ch, token->val.i);
 }

 // get_token maybe got invalid (with parser)
@ -507,4 +511,3 @@ void get_valid_token(lexer_t* lexer, tok_t* token) {
        type = token->type;
    } while (type == TOKEN_FLUSH || type == TOKEN_LINE_COMMENT || type == TOKEN_BLOCK_COMMENT);
 }
-
--- a/ccompiler/frontend/lexer/lexer.h
+++ b/ccompiler/frontend/lexer/lexer.h
@ -1,6 +1,7 @@
-#ifndef __LEXER_H__
-#define __LEXER_H__
+#ifndef __SMCC_LEXER_H__
+#define __SMCC_LEXER_H__

+#include <lib/core.h>
 #include "token.h"
 #ifndef LEXER_MAX_TOKEN_SIZE 
 #define LEXER_MAX_TOKEN_SIZE 63
--- a/ccompiler/frontend/lexer/lexer_log.h
+++ b/ccompiler/frontend/lexer/lexer_log.h
@ -0,0 +1,13 @@
+#ifndef __SMCC_LEXER_LOG_H__
+#define __SMCC_LEXER_LOG_H__
+
+#include <lib/rt/rt.h>
+
+#define LEX_NOTSET( fmt, ...)     LOG_NOTSET("LEXER: " fmt, ##__VA_ARGS__)
+#define LEX_DEBUG(  fmt, ...)      LOG_DEBUG("LEXER: " fmt, ##__VA_ARGS__) 
+#define LEX_INFO(   fmt, ...)       LOG_INFO("LEXER: " fmt, ##__VA_ARGS__)  
+#define LEX_WARN(   fmt, ...)       LOG_WARN("LEXER: " fmt, ##__VA_ARGS__)  
+#define LEX_ERROR(  fmt, ...)      LOG_ERROR("LEXER: " fmt, ##__VA_ARGS__) 
+#define LEX_FATAL(  fmt, ...)      LOG_FATAL("LEXER: " fmt, ##__VA_ARGS__) 
+
+#endif // __SMCC_LEXER_LOG_H__
--- a/ccompiler/frontend/lexer/tests/Makefile
+++ b/ccompiler/frontend/lexer/tests/Makefile
@ -1,6 +1,7 @@
 CC = gcc
-CFLAGS = -g -Wall
+CFLAGS = -g -Wall -I../../../..
 SRC = ../lexer.c ../token.c
+LIB = -L../../../../lib -lcore

 all = test_all

@ -8,10 +9,10 @@ test_all: test
 	./test

 run:
-	$(CC) $(CFLAGS) $(SRC) run.c -o run
+	$(CC) $(CFLAGS) $(SRC) run.c $(LIB) -o run

 test:
-	$(CC) $(CFLAGS) $(SRC) -o test test.c
+	$(CC) $(CFLAGS) $(SRC) $(LIB) -o test test.c

 clean:
 	rm -f test run
--- a/ccompiler/frontend/lexer/tests/run.c
+++ b/ccompiler/frontend/lexer/tests/run.c
@ -1,5 +1,6 @@
 #include "../lexer.h"
 #include <stdio.h>
+#include <string.h>
 // gcc -g ../lexer.c ../token.c test_lexer.c -o test_lexer
 /*
 tok_tConstant {
@ -18,9 +19,14 @@ tok_tConstant {
 int g_num;
 int g_num_arr[3];
 int main(int argc, char* argv[]) {
-    int num = 0;
+    // int num = 0;
+    // You Must Be Call
+    init_lib_core();
+    if (argc == 3 && strcmp(argv[2], "-nodebug") == 0) {
+        log_set_level(NULL, LOG_LEVEL_ALL & ~LOG_LEVEL_DEBUG);
+    }

-    const char* file_name = "test_lexer.c";
+    const char* file_name = "run.c";
    if (argc == 2) {
        file_name = argv[1];
    }
@ -32,7 +38,7 @@ int main(int argc, char* argv[]) {
    printf("open file success\n");

    lexer_t lexer;
-    init_lexer(&lexer, "test_lexter.c", fp, (lexer_sread_fn)fread_s);
+    init_lexer(&lexer, file_name, fp, (lexer_sread_fn)fread_s);
    tok_t tok;

    while (1) {
@ -40,7 +46,7 @@ int main(int argc, char* argv[]) {
        if (tok.type == TOKEN_EOF) {
            break;
        }
-        printf("line: %d, column: %d, type: %3d, typename: %s\n",
-            lexer.line, lexer.index, tok.type, get_tok_name(tok.type));
+        // printf("line: %d, column: %d, type: %3d, typename: %s\n",
+        //     lexer.line, lexer.index, tok.type, get_tok_name(tok.type));
    }
 }
--- a/ccompiler/frontend/lexer/token.c
+++ b/ccompiler/frontend/lexer/token.c
@ -1,12 +1,12 @@
-#define FRONTEND_IMPLEMENTATION
-#include "../frontend.h"
+#include <lib/core.h>
+#include "lexer_log.h"
 #include "token.h"

 #define ROUND_IDX(idx) ((idx) % tokbuf->cap)

-tok_t* pop_tok(tok_buf_t* tokbuf) {
+tok_t* pop_tok(tok_stream_t* tokbuf) {
    if (tokbuf->size == 0) {
-        error("no token to pop");
+        LEX_ERROR("no token to pop");
        return NULL;
    }
    int idx = tokbuf->cur;
@ -15,11 +15,11 @@ tok_t* pop_tok(tok_buf_t* tokbuf) {
    return tokbuf->buf + idx;
 }

-void flush_peek_tok(tok_buf_t* tokbuf) {
+void flush_peek_tok(tok_stream_t* tokbuf) {
    tokbuf->peek = tokbuf->cur;
 }

-void init_tokbuf(tok_buf_t *tokbuf, void *stream, get_tokbuf_func gettok) {
+void init_tokbuf(tok_stream_t *tokbuf, void *stream, tok_stream_get_func gettok) {
    tokbuf->cur = 0;
    tokbuf->end = 0;
    tokbuf->peek = 0;
@ -30,18 +30,19 @@ void init_tokbuf(tok_buf_t *tokbuf, void *stream, get_tokbuf_func gettok) {
    tokbuf->cap = 0;
 }

-tok_t *peek_tok(tok_buf_t *tokbuf) {
+tok_t *peek_tok(tok_stream_t *tokbuf) {
+    Assert(tokbuf->size <= tokbuf->cap);
    int idx = tokbuf->peek;
    tokbuf->peek = ROUND_IDX(idx + 1);
-    if (tokbuf->size >= tokbuf->cap) {
-        error("peek too deep, outof array size");
-    }
+
    if (idx == tokbuf->end) {
        if (tokbuf->size == tokbuf->cap) {
-            error("peek_tok buffer overflow");
+            LEX_ERROR("peek_tok buffer overflow");
+            return NULL;
        }
        if (tokbuf->gettok == NULL) {
-            error("peek_tok can not got tok");
+            LEX_ERROR("peek_tok can not got tok");
+            return NULL;
        }
        tokbuf->gettok(tokbuf->stream, &(tokbuf->buf[idx]));
        tokbuf->size++;
@ -51,15 +52,16 @@ tok_t *peek_tok(tok_buf_t *tokbuf) {
    return &(tokbuf->buf[idx]);
 }

-tok_type_t peek_tok_type(tok_buf_t* tokbuf) {
+tok_type_t peek_tok_type(tok_stream_t* tokbuf) {
    return peek_tok(tokbuf)->type;
 }

-int expect_pop_tok(tok_buf_t* tokbuf, tok_type_t type) {
+int expect_pop_tok(tok_stream_t* tokbuf, tok_type_t type) {
    flush_peek_tok(tokbuf);
    tok_t* tok = peek_tok(tokbuf);
    if (tok->type != type) {
-        error("expected tok: %s, got %s", get_tok_name(type), get_tok_name(tok->type));
+        LEX_ERROR("expected tok `%s` but got `%s`", get_tok_name(type), get_tok_name(tok->type));
+        return 0;
    } else {
        pop_tok(tokbuf);
    }
--- a/ccompiler/frontend/lexer/token.h
+++ b/ccompiler/frontend/lexer/token.h
@ -51,10 +51,10 @@ enum CSTD_KEYWORD {
    X(flush          , TOKEN_FLUSH)                         \
    X("=="           , TOKEN_EQ)                            \
    X("="            , TOKEN_ASSIGN)                        \
-    X("++"           , TOKEN_ADD_ADD)                           \
+    X("++"           , TOKEN_ADD_ADD)                       \
    X("+="           , TOKEN_ASSIGN_ADD)                    \
    X("+"            , TOKEN_ADD)                           \
-    X("--"           , TOKEN_SUB_SUB)                           \
+    X("--"           , TOKEN_SUB_SUB)                       \
    X("-="           , TOKEN_ASSIGN_SUB)                    \
    X("->"           , TOKEN_DEREF)                         \
    X("-"            , TOKEN_SUB)                           \
@ -134,7 +134,7 @@ typedef struct tok {
    tok_val_t val;
 } tok_t;

-typedef struct tok_buf {
+typedef struct tok_stream {
    int cur;
    int end;
    int peek;
@ -143,15 +143,15 @@ typedef struct tok_buf {
    tok_t* buf;
    void* stream;
    void (*gettok)(void* stream, tok_t* token);
-} tok_buf_t;
+} tok_stream_t;

-typedef void(*get_tokbuf_func)(void* stream, tok_t* token);
-void init_tokbuf(tok_buf_t* tokbuf, void* stream, get_tokbuf_func gettok);
-tok_t* peek_tok(tok_buf_t* tokbuf);
-tok_t* pop_tok(tok_buf_t* tokbuf);
-void flush_peek_tok(tok_buf_t* tokbuf);
-tok_type_t peek_tok_type(tok_buf_t* tokbuf);
-int expect_pop_tok(tok_buf_t* tokbuf, tok_type_t type);
+typedef void(*tok_stream_get_func)(void* stream, tok_t* token);
+void init_tokbuf(tok_stream_t* tokbuf, void* stream, tok_stream_get_func gettok);
+tok_t* peek_tok(tok_stream_t* tokbuf);
+tok_t* pop_tok(tok_stream_t* tokbuf);
+void flush_peek_tok(tok_stream_t* tokbuf);
+tok_type_t peek_tok_type(tok_stream_t* tokbuf);
+int expect_pop_tok(tok_stream_t* tokbuf, tok_type_t type);
 const char* get_tok_name(tok_type_t type);

-#endif
+#endif