feat add func call and rewrite codes

2025-03-07 12:29:53 +08:00
parent 09299e339c
commit 95bf44eb3f
37 changed files with 3369 additions and 1063 deletions
--- a/ccompiler/frontend/lexer/lexer.c
+++ b/ccompiler/frontend/lexer/lexer.c
@@ -26,13 +26,15 @@ the distribution and installation instructions.
 Chris Fraser / cwf@aya.yale.edu
 David Hanson / drh@drhanson.net
 */
+#define FRONTEND_IMPLEMENTATION
 #include "../frontend.h"
+#include "token.h"
 #include "lexer.h"

 static const struct {
    const char* name;
    enum CSTD_KEYWORD std_type;
-    enum TokenType tok;
+    tok_type_t tok;
 } keywords[] = {
    #define X(name, std_type, tok, ...) { #name, std_type, tok },
    KEYWORD_TABLE
@@ -72,7 +74,7 @@ static inline int keyword_cmp(const char* name, int len) {
    return -1; // Not a keyword.
 }

-void init_lexer(struct Lexer* lexer, const char* file_name, void* stream, lexer_sread_fn sread)
+void init_lexer(lexer_t* lexer, const char* file_name, void* stream, lexer_sread_fn sread)
 {
    lexer->cur_ptr = lexer->end_ptr = (unsigned char*)&(lexer->buffer);
    lexer->index = 1;
@@ -86,12 +88,12 @@ void init_lexer(struct Lexer* lexer, const char* file_name, void* stream, lexer_
    }
 }

-static void flush_buffer(struct Lexer* lexer) {
+static void flush_buffer(lexer_t* lexer) {
    int num = lexer->end_ptr - lexer->cur_ptr;
    for (int i = 0; i < num; i++) {
        lexer->buffer[i] = lexer->cur_ptr[i];
    }
-    lexer->cur_ptr = lexer->buffer;
+    lexer->cur_ptr = (unsigned char*)lexer->buffer;

    int read_size = LEXER_BUFFER_SIZE - num;
    // TODO size_t to int maybe lose precision
@@ -109,7 +111,7 @@ static void flush_buffer(struct Lexer* lexer) {
    }
 }

-static void goto_newline(struct Lexer* lexer) {
+static void goto_newline(lexer_t* lexer) {
    do {
        if (lexer->cur_ptr == lexer->end_ptr) {
            flush_buffer(lexer);
@@ -119,7 +121,7 @@ static void goto_newline(struct Lexer* lexer) {
    } while (*lexer->cur_ptr != '\n' && *lexer->cur_ptr != '\0');
 }

-static void goto_block_comment(struct Lexer* lexer) {
+static void goto_block_comment(lexer_t* lexer) {
    while (1) {
        if (lexer->end_ptr - lexer->cur_ptr < 2) {
            flush_buffer(lexer);
@@ -155,7 +157,7 @@ static char got_slash(unsigned char* peek) {
    }
 }

-static void parse_char_literal(struct Lexer* lexer, struct Token* token) {
+static void parse_char_literal(lexer_t* lexer, tok_t* token) {
    char val = 0;
    unsigned char* peek = lexer->cur_ptr + 1;
    if (*peek == '\\') {
@@ -166,16 +168,16 @@ static void parse_char_literal(struct Lexer* lexer, struct Token* token) {
    }

    if (*peek != '\'') error("Unclosed character literal");
-    token->constant.ch = val;
+    token->val.ch = val;
    lexer->cur_ptr = peek + 1;
-    token->constant.have = 1;
+    token->val.have = 1;
    token->type = TOKEN_CHAR_LITERAL;
 }

-static void parse_string_literal(struct Lexer* lexer, struct Token* token) {
+static void parse_string_literal(lexer_t* lexer, tok_t* token) {
    unsigned char* peek = lexer->cur_ptr + 1;
    // TODO string literal size check
-    char* dest = token->constant.str = xmalloc(LEXER_MAX_TOKEN_SIZE + 1);
+    char* dest = token->val.str = xmalloc(LEXER_MAX_TOKEN_SIZE + 1);
    int len = 0;

    while (*peek != '"') {
@@ -191,12 +193,12 @@ static void parse_string_literal(struct Lexer* lexer, struct Token* token) {
    }
    dest[len] = '\0';
    lexer->cur_ptr = peek + 1;
-    token->constant.have = 1;
+    token->val.have = 1;
    token->type = TOKEN_STRING_LITERAL;
 }

 // FIXME it write by AI maybe error
-static void parse_number(struct Lexer* lexer, struct Token* token) {
+static void parse_number(lexer_t* lexer, tok_t* token) {
    unsigned char* peek = lexer->cur_ptr;
    int base = 10;
    int is_float = 0;
@@ -255,12 +257,12 @@ static void parse_number(struct Lexer* lexer, struct Token* token) {
    if ((*peek == 'e' || *peek == 'E') && base == 10) {
        is_float = 1;
        peek++;
-        int exp_sign = 1;
+        // int exp_sign = 1;
        int exponent = 0;

        if (*peek == '+') peek++;
        else if (*peek == '-') {
-            exp_sign = -1;
+            // exp_sign = -1;
            peek++;
        }

@@ -273,19 +275,19 @@ static void parse_number(struct Lexer* lexer, struct Token* token) {

    // 存储结果
    lexer->cur_ptr = peek;
-    token->constant.have = 1;
+    token->val.have = 1;
    if (is_float) {
-        token->constant.d = float_val;
+        token->val.d = float_val;
        token->type = TOKEN_FLOAT_LITERAL;
    } else {
-        token->constant.ll = int_val;
+        token->val.ll = int_val;
        token->type = TOKEN_INT_LITERAL;
    }
 }

 #define GOT_ONE_TOKEN_BUF_SIZE 64
 // /zh/c/language/operator_arithmetic.html
-void get_token(struct Lexer* lexer, struct Token* token) {
+void get_token(lexer_t* lexer, tok_t* token) {
    // 需要保证缓冲区始终可读
    if (lexer->end_ptr - lexer->cur_ptr < GOT_ONE_TOKEN_BUF_SIZE) {
        flush_buffer(lexer);
@@ -305,8 +307,8 @@ void get_token(struct Lexer* lexer, struct Token* token) {
        token->type = TOKEN_FLUSH;
    }
    
-    enum TokenType tok = TOKEN_INIT;
-    struct TokenConstant constant;
+    tok_type_t tok = TOKEN_INIT;
+    tok_val_t constant;
    constant.have = 0;
    
    // once step
@@ -392,7 +394,7 @@ void get_token(struct Lexer* lexer, struct Token* token) {
        switch (*peek++) {
            case '=': tok = TOKEN_NEQ; break;
            default: peek--, tok = TOKEN_NOT; break;
-        }
+        } break;
    case '[':
        tok = TOKEN_L_BRACKET; break;
    case ']':
@@ -454,7 +456,7 @@ void get_token(struct Lexer* lexer, struct Token* token) {
    case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':case 'Y': case 'Z':
    case '_':
        // TOKEN_IDENT
-        if (*peek == 'L' && *peek == '\'' || *peek == 'L' && *peek == '"') {
+        if ((*peek == 'L' && *peek == '\'') || (*peek == 'L' && *peek == '"')) {
            error("unsupport wide-character char literal by `L` format");
        }
        while (1) {
@@ -469,18 +471,18 @@ void get_token(struct Lexer* lexer, struct Token* token) {
            break;
        }
    
-        int res = keyword_cmp(lexer->cur_ptr, peek - (lexer->cur_ptr));
+        int res = keyword_cmp((const char*)lexer->cur_ptr, peek - (lexer->cur_ptr));
        if (res == -1) {
            int strlen = peek - lexer->cur_ptr;
            unsigned char* str = xmalloc(strlen + 1);
            constant.have = 1;
-            constant.str = str;
+            constant.str = (char*)str;
            for (int i = 0; i < strlen; i++) {
                str[i] = lexer->cur_ptr[i];
            }
            str[strlen] = '\0';
            constant.have = 1;
-            constant.str = str;
+            constant.str = (char*)str;
            tok = TOKEN_IDENT; break;
        } else {
            tok = keywords[res].tok; break;
@@ -492,32 +494,16 @@ void get_token(struct Lexer* lexer, struct Token* token) {

    lexer->cur_ptr = peek;
 END:
-    token->constant = constant;
+    token->val = constant;
    token->type = tok;
 }

 // get_token maybe got invalid (with parser)
-void get_valid_token(struct Lexer* lexer, struct Token* token) {
-    enum TokenType type;
+void get_valid_token(lexer_t* lexer, tok_t* token) {
+    tok_type_t type;
    do {
        get_token(lexer, token);
        type = token->type;
    } while (type == TOKEN_FLUSH || type == TOKEN_LINE_COMMENT || type == TOKEN_BLOCK_COMMENT);
 }

-// 生成字符串映射（根据需求选择#str或#name）
-static const char* token_strings[] = {
-    // 普通token使用#str
-    #define X(str, tok) [tok] = #str,
-    TOKEN_TABLE
-    #undef X
-    
-    // 关键字使用#name
-    #define X(name, std, tok) [tok] = #name,
-    KEYWORD_TABLE
-    #undef X
-};
-
-const char* get_token_name(enum TokenType type) {
-    return token_strings[type];
-}