stable

2025-04-01 00:13:21 +08:00
parent 2b4857001c
commit 74f43a1ab7
79 changed files with 2271 additions and 2861 deletions
--- a/ccompiler/frontend/lexer/lexer.c
+++ b/ccompiler/frontend/lexer/lexer.c
@ -74,7 +74,7 @@ static inline int keyword_cmp(const char* name, int len) {
    return -1; // Not a keyword.
 }

-void init_lexer(lexer_t* lexer, const char* file_name, void* stream, lexer_sread_fn sread, strpool_t* strpool) {
+void init_lexer(cc_lexer_t* lexer, const char* file_name, void* stream, lexer_sread_fn sread, strpool_t* strpool) {
    lexer->strpool = strpool;
    lexer->cur_ptr = lexer->end_ptr = (char*)&(lexer->buffer);
    lexer->loc.fname = strpool_intern(lexer->strpool, file_name);
@ -87,7 +87,7 @@ void init_lexer(lexer_t* lexer, const char* file_name, void* stream, lexer_sread
    rt_memset(lexer->buffer, 0, sizeof(lexer->buffer));
 }

-static void flush_buffer(lexer_t* lexer) {
+static void flush_buffer(cc_lexer_t* lexer) {
    int num = lexer->end_ptr - lexer->cur_ptr;
    for (int i = 0; i < num; i++) {
        lexer->buffer[i] = lexer->cur_ptr[i];
@ -96,7 +96,7 @@ static void flush_buffer(lexer_t* lexer) {

    int read_size = LEXER_BUFFER_SIZE - num;
    // TODO rt_size_t to int maybe lose precision
-    int got_size = lexer->sread(lexer->buffer + num, read_size, 1, read_size, lexer->stream);
+    int got_size = lexer->sread(lexer->buffer + num, 1, read_size, lexer->stream);
    if (got_size < 0) {
        LEX_ERROR("lexer read error");
    } else if (got_size < read_size) {
@ -110,7 +110,7 @@ static void flush_buffer(lexer_t* lexer) {
    }
 }

-static void goto_newline(lexer_t* lexer) {
+static void goto_newline(cc_lexer_t* lexer) {
    do {
        if (lexer->cur_ptr == lexer->end_ptr) {
            flush_buffer(lexer);
@ -120,7 +120,7 @@ static void goto_newline(lexer_t* lexer) {
    } while (*lexer->cur_ptr != '\n' && *lexer->cur_ptr != '\0');
 }

-static void goto_block_comment(lexer_t* lexer) {
+static void goto_block_comment(cc_lexer_t* lexer) {
    while (1) {
        if (lexer->end_ptr - lexer->cur_ptr < 2) {
            flush_buffer(lexer);
@ -159,7 +159,7 @@ static char got_slash(char* peek) {
    return -1;
 }

-static void parse_char_literal(lexer_t* lexer, tok_t* token) {
+static void parse_char_literal(cc_lexer_t* lexer, tok_t* token) {
    char val = 0;
    char* peek = lexer->cur_ptr + 1;
    if (*peek == '\\') {
@ -175,7 +175,7 @@ static void parse_char_literal(lexer_t* lexer, tok_t* token) {
    token->val.ch = val;
 }

-static void parse_string_literal(lexer_t* lexer, tok_t* token) {
+static void parse_string_literal(cc_lexer_t* lexer, tok_t* token) {
    char* peek = lexer->cur_ptr + 1;
    // TODO string literal size check
    static char dest[LEXER_MAX_TOKEN_SIZE + 1];
@ -200,7 +200,7 @@ static void parse_string_literal(lexer_t* lexer, tok_t* token) {
 }

 // FIXME it write by AI maybe error
-static void parse_number(lexer_t* lexer, tok_t* token) {
+static void parse_number(cc_lexer_t* lexer, tok_t* token) {
    char* peek = lexer->cur_ptr;
    int base = 10;
    int is_float = 0;
@ -290,7 +290,7 @@ static void parse_number(lexer_t* lexer, tok_t* token) {

 #define GOT_ONE_TOKEN_BUF_SIZE 64
 // /zh/c/language/operator_arithmetic.html
-void get_token(lexer_t* lexer, tok_t* token) {
+void get_token(cc_lexer_t* lexer, tok_t* token) {
    // 需要保证缓冲区始终可读
    if (lexer->end_ptr - lexer->cur_ptr < GOT_ONE_TOKEN_BUF_SIZE) {
        flush_buffer(lexer);
@ -515,7 +515,7 @@ static const tok_basic_type_t tok_type_map[] = {
 }

 // get_token maybe got invalid (with parser)
-void get_valid_token(lexer_t* lexer, tok_t* token) {
+void get_valid_token(cc_lexer_t* lexer, tok_t* token) {
    tok_basic_type_t type;
    do {
        get_token(lexer, token);
--- a/ccompiler/frontend/lexer/lexer.h
+++ b/ccompiler/frontend/lexer/lexer.h
@ -10,10 +10,9 @@
 #define LEXER_BUFFER_SIZE 4095
 #endif

-typedef int (*lexer_sread_fn)(void *dst_buf, int dst_size,
-        int elem_size, int count, void *stream);
+typedef int (*lexer_sread_fn)(void *dst_buf, int elem_size, int count, void *stream);

-typedef struct lexer {
+typedef struct cc_lexer {
    loc_t loc;

    char* cur_ptr; // 当前扫描的字符，但是还没有开始扫描
@ -24,15 +23,15 @@ typedef struct lexer {
    void* stream;

    strpool_t* strpool;
-} lexer_t;
+} cc_lexer_t;

-void init_lexer(lexer_t* lexer, const char* file_name, void* stream,
+void init_lexer(cc_lexer_t* lexer, const char* file_name, void* stream,
    lexer_sread_fn sread, strpool_t* strpool);

 // pure token getter it will included empty token like TOKEN_BLANK
-void get_token(lexer_t* lexer, tok_t* token);
+void get_token(cc_lexer_t* lexer, tok_t* token);

 // get_token maybe got invalid (with parser as TOKEN_BLANK)
-void get_valid_token(lexer_t* lexer, tok_t* token);
+void get_valid_token(cc_lexer_t* lexer, tok_t* token);

 #endif
--- a/ccompiler/frontend/lexer/tests/Makefile
+++ b/ccompiler/frontend/lexer/tests/Makefile
@ -1,18 +0,0 @@
-CC = gcc
-CFLAGS = -g -Wall -I../../../.. -DLEX_LOG_LEVEL=4
-SRC = ../lexer.c ../token.c
-LIB = -L../../../../lib -lcore
-
-all = test_all
-
-test_all: test
-	./test
-
-run:
-	$(CC) $(CFLAGS) $(SRC) run.c $(LIB) -o run
-
-test:
-	$(CC) $(CFLAGS) $(SRC) $(LIB) -o test test.c
-
-clean:
-	rm -f test run
--- a/ccompiler/frontend/lexer/tests/run.c
+++ b/ccompiler/frontend/lexer/tests/run.c
@ -1,56 +0,0 @@
-#include "../lexer.h"
-#include <stdio.h>
-#include <string.h>
-// gcc -g ../lexer.c ../token.c test_lexer.c -o test_lexer
-/*
-tok_tConstant {
-   int have;
-   union {
-       char ch;
-       int i;
-       float f;
-       double d;
-       long long ll;
-       char* str;
-   };
-};
-*/
-
-int g_num;
-int g_num_arr[3];
-int main(int argc, char* argv[]) {
-    // int num = 0;
-    // You Must Be Call
-    init_lib_core();
-    if (argc == 3 && strcmp(argv[2], "-nodebug") == 0) {
-        log_set_level(NULL, LOG_LEVEL_ALL & ~LOG_LEVEL_DEBUG);
-    }
-
-    const char* file_name = "run.c";
-    if (argc == 2) {
-        file_name = argv[1];
-    }
-    FILE* fp = fopen(file_name, "r");
-    if (fp == NULL) {
-        perror("open file failed");
-        return 1;
-    }
-    printf("open file success\n");
-
-    lexer_t lexer;
-    strpool_t strpool;
-    init_strpool(&strpool);
-    init_lexer(&lexer, file_name, fp, (lexer_sread_fn)fread_s, &strpool);
-    tok_t tok;
-
-    while (1) {
-        get_valid_token(&lexer, &tok);
-        if (tok.sub_type == TOKEN_EOF) {
-            break;
-        }
-        LOG_DEBUG("tk type `%s` in %s:%d:%d", get_tok_name(tok.sub_type), tok.loc.fname, tok.loc.line, tok.loc.col);
-        // LOG_DEBUG("%s", tok.val.str);
-        // printf("line: %d, column: %d, type: %3d, typename: %s\n",
-        //     lexer.line, lexer.index, tok.type, get_tok_name(tok.type));
-    }
-}
--- a/ccompiler/frontend/lexer/tests/test.c
+++ b/ccompiler/frontend/lexer/tests/test.c
@ -1,172 +0,0 @@
-// test_lexer.c
-#include <lib/acutest.h>
-#include "../lexer.h"
-#include <string.h>
-
-int test_read(void *dst_buf, int dst_size, int elem_size, int count, void *stream) {
-    if (stream == NULL) {
-        return 0;
-    }
-    int size = dst_size > elem_size * count ? elem_size * count : dst_size;
-    memcpy(dst_buf, stream, size);
-    return size;
-}
-
-// 测试辅助函数
-static inline void test_lexer_string(const char* input, cc_tktype_t expected_type) {
-    lexer_t lexer;
-    tok_t token;
-    
-    init_lexer(&lexer, "test.c", (void*)input, test_read);
-    get_valid_token(&lexer, &token);
-    
-    TEST_CHECK(token.type == expected_type);
-    TEST_MSG("Expected: %s", get_tok_name(expected_type));
-    TEST_MSG("Got: %s", get_tok_name(token.type));
-}
-
-// 基础运算符测试
-void test_operators() {
-    TEST_CASE("Arithmetic operators"); {
-        test_lexer_string("+", TOKEN_ADD);
-        test_lexer_string("++", TOKEN_ADD_ADD);
-        test_lexer_string("+=", TOKEN_ASSIGN_ADD);
-        test_lexer_string("-", TOKEN_SUB);
-        test_lexer_string("--", TOKEN_SUB_SUB);
-        test_lexer_string("-=", TOKEN_ASSIGN_SUB);
-        test_lexer_string("*", TOKEN_MUL);
-        test_lexer_string("*=", TOKEN_ASSIGN_MUL);
-        test_lexer_string("/", TOKEN_DIV);
-        test_lexer_string("/=", TOKEN_ASSIGN_DIV);
-        test_lexer_string("%", TOKEN_MOD);
-        test_lexer_string("%=", TOKEN_ASSIGN_MOD);
-    }
-
-    TEST_CASE("Bitwise operators"); {
-        test_lexer_string("&", TOKEN_AND);
-        test_lexer_string("&&", TOKEN_AND_AND);
-        test_lexer_string("&=", TOKEN_ASSIGN_AND);
-        test_lexer_string("|", TOKEN_OR);
-        test_lexer_string("||", TOKEN_OR_OR);
-        test_lexer_string("|=", TOKEN_ASSIGN_OR);
-        test_lexer_string("^", TOKEN_XOR);
-        test_lexer_string("^=", TOKEN_ASSIGN_XOR);
-        test_lexer_string("~", TOKEN_BIT_NOT);
-        test_lexer_string("<<", TOKEN_L_SH);
-        test_lexer_string("<<=", TOKEN_ASSIGN_L_SH);
-        test_lexer_string(">>", TOKEN_R_SH);
-        test_lexer_string(">>=", TOKEN_ASSIGN_R_SH);
-    }
-
-    TEST_CASE("Comparison operators"); {
-        test_lexer_string("==", TOKEN_EQ);
-        test_lexer_string("!=", TOKEN_NEQ);
-        test_lexer_string("<", TOKEN_LT);
-        test_lexer_string("<=", TOKEN_LE);
-        test_lexer_string(">", TOKEN_GT);
-        test_lexer_string(">=", TOKEN_GE);
-    }
-
-    TEST_CASE("Special symbols"); {
-        test_lexer_string("(", TOKEN_L_PAREN);
-        test_lexer_string(")", TOKEN_R_PAREN);
-        test_lexer_string("[", TOKEN_L_BRACKET);
-        test_lexer_string("]", TOKEN_R_BRACKET);
-        test_lexer_string("{", TOKEN_L_BRACE);
-        test_lexer_string("}", TOKEN_R_BRACE);
-        test_lexer_string(";", TOKEN_SEMICOLON);
-        test_lexer_string(",", TOKEN_COMMA);
-        test_lexer_string(":", TOKEN_COLON);
-        test_lexer_string(".", TOKEN_DOT);
-        test_lexer_string("...", TOKEN_ELLIPSIS);
-        test_lexer_string("->", TOKEN_DEREF);
-        test_lexer_string("?", TOKEN_COND);
-    }
-}
-
-// 关键字测试
-void test_keywords() {
-    TEST_CASE("C89 keywords");
-    test_lexer_string("while", TOKEN_WHILE);
-    test_lexer_string("sizeof", TOKEN_SIZEOF);
-    
-    // TEST_CASE("C99 keywords");
-    // test_lexer_string("restrict", TOKEN_RESTRICT);
-    // test_lexer_string("_Bool", TOKEN_INT); // 需确认你的类型定义
-}
-
-// 字面量测试
-void test_literals() {
-    TEST_CASE("Integer literals"); {
-        // 十进制
-        test_lexer_string("0", TOKEN_INT_LITERAL);
-        test_lexer_string("123", TOKEN_INT_LITERAL);
-        // test_lexer_string("2147483647", TOKEN_INT_LITERAL);
-        
-        // // 十六进制
-        // test_lexer_string("0x0", TOKEN_INT_LITERAL);
-        // test_lexer_string("0x1A3F", TOKEN_INT_LITERAL);
-        // test_lexer_string("0XABCDEF", TOKEN_INT_LITERAL);
-        
-        // // 八进制
-        // test_lexer_string("0123", TOKEN_INT_LITERAL);
-        // test_lexer_string("0777", TOKEN_INT_LITERAL);
-        
-        // // 边界值测试
-        // test_lexer_string("2147483647", TOKEN_INT_LITERAL); // INT_MAX
-        // test_lexer_string("4294967295", TOKEN_INT_LITERAL); // UINT_MAX
-    }
-
-    TEST_CASE("Character literals"); {
-        test_lexer_string("'a'", TOKEN_CHAR_LITERAL);
-        test_lexer_string("'\\n'", TOKEN_CHAR_LITERAL);
-        test_lexer_string("'\\t'", TOKEN_CHAR_LITERAL);
-        test_lexer_string("'\\\\'", TOKEN_CHAR_LITERAL);
-        test_lexer_string("'\\0'", TOKEN_CHAR_LITERAL);
-    }
-
-    TEST_CASE("String literals"); {
-        test_lexer_string("\"hello\"", TOKEN_STRING_LITERAL);
-        test_lexer_string("\"multi-line\\nstring\"", TOKEN_STRING_LITERAL);
-        test_lexer_string("\"escape\\\"quote\"", TOKEN_STRING_LITERAL);
-    }
-    
-    // TEST_CASE("Floating literals");
-    // test_lexer_string("3.14e-5", TOKEN_FLOAT_LITERAL);
-    
-}
-
-// 边界测试
-void test_edge_cases() {
-    // TEST_CASE("Long identifiers");
-    // char long_id[LEXER_MAX_TOKEN_SIZE+2] = {0};
-    // memset(long_id, 'a', LEXER_MAX_TOKEN_SIZE+1);
-    // test_lexer_string(long_id, TOKEN_IDENT);
-    
-    // TEST_CASE("Buffer boundary");
-    // char boundary[LEXER_BUFFER_SIZE*2] = {0};
-    // memset(boundary, '+', LEXER_BUFFER_SIZE*2-1);
-    // test_lexer_string(boundary, TOKEN_ADD);
-}
-
-// 错误处理测试
-void test_error_handling() {
-    TEST_CASE("Invalid characters");
-    lexer_t lexer;
-    tok_t token;
-    
-    init_lexer(&lexer, "test.c", NULL, test_read);
-    get_valid_token(&lexer, &token);
-    
-    TEST_CHECK(token.type == TOKEN_EOF); // 应触发错误处理
-}
-
-// 测试列表
-TEST_LIST = {
-    {"operators", test_operators},
-    {"keywords", test_keywords},
-    {"literals", test_literals},
-    {"edge_cases", test_edge_cases},
-    {"error_handling", test_error_handling},
-    {NULL, NULL}
-};