diff --git a/libs/lexer/include/lexer_token.h b/libs/lexer/include/lexer_token.h index 14349b4..cdcbb47 100644 --- a/libs/lexer/include/lexer_token.h +++ b/libs/lexer/include/lexer_token.h @@ -9,8 +9,9 @@ typedef enum ckeyword { CEXT_ASM, } ckeyword_t; -// Using Binary Search To Fast Find Keyword /* clang-format off */ +// WARNING: Using Binary Search To Fast Find Keyword +// 你必须确保其中是按照字典序排列 #define KEYWORD_TABLE \ X(asm , TK_BASIC_KEYWORD , TOKEN_ASM , CEXT_ASM) \ X(break , TK_BASIC_KEYWORD , TOKEN_BREAK , CSTD_C89) \ diff --git a/libs/lexer/src/lexer.c b/libs/lexer/src/lexer.c index a27f587..7169e41 100644 --- a/libs/lexer/src/lexer.c +++ b/libs/lexer/src/lexer.c @@ -693,6 +693,8 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) { // LEX_ERROR("unsupport wide-character char literal by `L` format"); // } cstring_t str = cstring_new(); + cstring_push(&str, stream_next_char(stream)); + lexer_next_pos(lexer); while (1) { ch = stream_peek_char(stream); if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || @@ -705,16 +707,17 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) { break; } - int res = keyword_cmp((const char *)str.data, str.size - 1); + int res = keyword_cmp(cstring_as_cstr(&str), cstring_len(&str)); if (res == -1) { token->value.cstr.data = (char *)cstring_as_cstr(&str); token->value.cstr.len = cstring_len(&str); type = TOKEN_IDENT; - break; } else { + cstring_free(&str); type = keywords[res].tok; - break; } + token->type = type; + goto END; default: LEX_ERROR("unsupport char in sourse code `%c`", ch); break; diff --git a/libs/lexer/tests/test_number.c b/libs/lexer/tests/test_number.c deleted file mode 100644 index afd7957..0000000 --- a/libs/lexer/tests/test_number.c +++ /dev/null @@ -1,2 +0,0 @@ - -int main() {} \ No newline at end of file diff --git a/libs/lexer/tests/test_parse.c b/libs/lexer/tests/test_parse.c new file mode 100644 index 0000000..2e47d95 --- /dev/null +++ b/libs/lexer/tests/test_parse.c @@ -0,0 +1,170 @@ +// test_lexer.c +#include +#include +#include + +// 测试辅助函数 +static inline void test_lexer_string(const char *input, + token_type_t expected_type) { + smcc_lexer_t lexer; + lexer_tok_t token; + core_mem_stream_t stream; + + lexer_init(&lexer, + core_mem_stream_init(&stream, input, strlen(input), false)); + lexer_get_token(&lexer, &token); + + TEST_CHECK(token.type == expected_type); + TEST_MSG("Expected: %s", get_tok_name(expected_type)); + TEST_MSG("Got: %s", get_tok_name(token.type)); +} + +// 基础运算符测试 +void test_operators() { + TEST_CASE("Arithmetic operators"); + { + test_lexer_string("+", TOKEN_ADD); + test_lexer_string("++", TOKEN_ADD_ADD); + test_lexer_string("+=", TOKEN_ASSIGN_ADD); + test_lexer_string("-", TOKEN_SUB); + test_lexer_string("--", TOKEN_SUB_SUB); + test_lexer_string("-=", TOKEN_ASSIGN_SUB); + test_lexer_string("*", TOKEN_MUL); + test_lexer_string("*=", TOKEN_ASSIGN_MUL); + test_lexer_string("/", TOKEN_DIV); + test_lexer_string("/=", TOKEN_ASSIGN_DIV); + test_lexer_string("%", TOKEN_MOD); + test_lexer_string("%=", TOKEN_ASSIGN_MOD); + } + + TEST_CASE("Bitwise operators"); + { + test_lexer_string("&", TOKEN_AND); + test_lexer_string("&&", TOKEN_AND_AND); + test_lexer_string("&=", TOKEN_ASSIGN_AND); + test_lexer_string("|", TOKEN_OR); + test_lexer_string("||", TOKEN_OR_OR); + test_lexer_string("|=", TOKEN_ASSIGN_OR); + test_lexer_string("^", TOKEN_XOR); + test_lexer_string("^=", TOKEN_ASSIGN_XOR); + test_lexer_string("~", TOKEN_BIT_NOT); + test_lexer_string("<<", TOKEN_L_SH); + test_lexer_string("<<=", TOKEN_ASSIGN_L_SH); + test_lexer_string(">>", TOKEN_R_SH); + test_lexer_string(">>=", TOKEN_ASSIGN_R_SH); + } + + TEST_CASE("Comparison operators"); + { + test_lexer_string("==", TOKEN_EQ); + test_lexer_string("!=", TOKEN_NEQ); + test_lexer_string("<", TOKEN_LT); + test_lexer_string("<=", TOKEN_LE); + test_lexer_string(">", TOKEN_GT); + test_lexer_string(">=", TOKEN_GE); + } + + TEST_CASE("Special symbols"); + { + test_lexer_string("(", TOKEN_L_PAREN); + test_lexer_string(")", TOKEN_R_PAREN); + test_lexer_string("[", TOKEN_L_BRACKET); + test_lexer_string("]", TOKEN_R_BRACKET); + test_lexer_string("{", TOKEN_L_BRACE); + test_lexer_string("}", TOKEN_R_BRACE); + test_lexer_string(";", TOKEN_SEMICOLON); + test_lexer_string(",", TOKEN_COMMA); + test_lexer_string(":", TOKEN_COLON); + test_lexer_string(".", TOKEN_DOT); + test_lexer_string("...", TOKEN_ELLIPSIS); + test_lexer_string("->", TOKEN_DEREF); + test_lexer_string("?", TOKEN_COND); + } +} + +// 关键字测试 +void test_keywords() { + TEST_CASE("C89 keywords"); + test_lexer_string("while", TOKEN_WHILE); + test_lexer_string("sizeof", TOKEN_SIZEOF); + + TEST_CASE("C99 keywords"); + test_lexer_string("restrict", TOKEN_RESTRICT); + // test_lexer_string("_Bool", TOKEN_INT); // 需确认你的类型定义 +} + +// 字面量测试 +void test_literals() { + TEST_CASE("Integer literals"); + { + // 十进制 + test_lexer_string("0", TOKEN_INT_LITERAL); + test_lexer_string("123", TOKEN_INT_LITERAL); + test_lexer_string("2147483647", TOKEN_INT_LITERAL); + + // 十六进制 + test_lexer_string("0x0", TOKEN_INT_LITERAL); + test_lexer_string("0x1A3F", TOKEN_INT_LITERAL); + test_lexer_string("0XABCDEF", TOKEN_INT_LITERAL); + + // 八进制 + test_lexer_string("0123", TOKEN_INT_LITERAL); + test_lexer_string("0777", TOKEN_INT_LITERAL); + + // 边界值测试 + test_lexer_string("2147483647", TOKEN_INT_LITERAL); // INT_MAX + test_lexer_string("4294967295", TOKEN_INT_LITERAL); // UINT_MAX + } + + TEST_CASE("Character literals"); + { + test_lexer_string("'a'", TOKEN_CHAR_LITERAL); + test_lexer_string("'\\n'", TOKEN_CHAR_LITERAL); + test_lexer_string("'\\t'", TOKEN_CHAR_LITERAL); + test_lexer_string("'\\\\'", TOKEN_CHAR_LITERAL); + test_lexer_string("'\\0'", TOKEN_CHAR_LITERAL); + } + + TEST_CASE("String literals"); + { + test_lexer_string("\"hello\"", TOKEN_STRING_LITERAL); + test_lexer_string("\"multi-line\\nstring\"", TOKEN_STRING_LITERAL); + test_lexer_string("\"escape\\\"quote\"", TOKEN_STRING_LITERAL); + } + + // TEST_CASE("Floating literals"); + // test_lexer_string("3.14e-5", TOKEN_FLOAT_LITERAL); +} + +// 边界测试 +void test_edge_cases() { + // TEST_CASE("Long identifiers"); + // char long_id[LEXER_MAX_TOKEN_SIZE+2] = {0}; + // memset(long_id, 'a', LEXER_MAX_TOKEN_SIZE+1); + // test_lexer_string(long_id, TOKEN_IDENT); + + // TEST_CASE("Buffer boundary"); + // char boundary[LEXER_BUFFER_SIZE*2] = {0}; + // memset(boundary, '+', LEXER_BUFFER_SIZE*2-1); + // test_lexer_string(boundary, TOKEN_ADD); +} + +// 错误处理测试 +// void test_error_handling() { +// TEST_CASE("Invalid characters"); +// cc_lexer_t lexer; +// tok_t token; + +// init_lexer(&lexer, "test.c", NULL, test_read); +// get_valid_token(&lexer, &token); + +// TEST_CHECK(token.type == TOKEN_EOF); // 应触发错误处理 +// } + +// 测试列表 +TEST_LIST = {{"operators", test_operators}, + {"keywords", test_keywords}, + {"literals", test_literals}, + {"edge_cases", test_edge_cases}, + // {"error_handling", test_error_handling}, + {NULL, NULL}}; \ No newline at end of file diff --git a/libs/lexer/tests/test_run.c b/libs/lexer/tests/test_run.c index 92dbb99..3492004 100644 --- a/libs/lexer/tests/test_run.c +++ b/libs/lexer/tests/test_run.c @@ -22,7 +22,9 @@ int g_num; int g_num_arr[3]; int main(int argc, char *argv[]) { // int num = 0; - if (argc == 3 && strcmp(argv[2], "-nodebug") == 0) { + if (argc == 3 && strcmp(argv[2], "--debug") == 0) { + log_set_level(NULL, LOG_LEVEL_ALL); + } else { log_set_level(NULL, LOG_LEVEL_INFO | LOG_LEVEL_WARN | LOG_LEVEL_ERROR); }