feat(lex_parser): 初始化词法解析器模块

新增词法解析器库 `smcc_lex_parser`,包含基础的词法规则解析功能:
- 支持字符、字符串、数字、标识符的解析
- 支持跳过注释、空白符、行尾等辅助函数
- 提供对应的单元测试用例,覆盖各类合法与非法输入情况

该模块依赖 `libcore`,并被 `smcc_lex` 模块引用以支持更上层的词法分析逻辑。
This commit is contained in:
zzy
2025-11-23 22:53:46 +08:00
parent 67af0c6bf2
commit 871d031ceb
18 changed files with 996 additions and 392 deletions

View File

@@ -0,0 +1,60 @@
// test_char.c
#include <lex_parser.h>
#include <utest/acutest.h>
cbool check_char(const char *str, int expect, int *output) {
log_set_level(&__default_logger_root, 0);
core_pos_t pos = core_pos_init();
core_mem_stream_t mem_stream;
core_stream_t *stream =
core_mem_stream_init(&mem_stream, str, smcc_strlen(str), false);
*output = lex_parse_char(stream, &pos);
return *output == expect;
}
#define CHECK_CHAR_VALID(str, expect) \
do { \
int _output; \
cbool ret = check_char(str, expect, &_output); \
TEST_CHECK(ret == true); \
} while (0)
#define CHECK_CHAR_INVALID(str) \
do { \
int _output; \
check_char(str, core_stream_eof, &_output); \
TEST_CHECK(_output == core_stream_eof); \
} while (0)
void test_simple_char(void) {
TEST_CASE("simple chars");
CHECK_CHAR_VALID("'a'", 'a');
CHECK_CHAR_VALID("'Z'", 'Z');
CHECK_CHAR_VALID("'0'", '0');
CHECK_CHAR_VALID("' '", ' ');
}
void test_escape_char(void) {
TEST_CASE("escape chars");
CHECK_CHAR_VALID("'\\n'", '\n');
CHECK_CHAR_VALID("'\\t'", '\t');
CHECK_CHAR_VALID("'\\r'", '\r');
CHECK_CHAR_VALID("'\\\\'", '\\');
CHECK_CHAR_VALID("'\\''", '\'');
CHECK_CHAR_VALID("'\\\"'", '\"');
}
void test_invalid_char(void) {
TEST_CASE("invalid chars");
CHECK_CHAR_INVALID("'");
CHECK_CHAR_INVALID("''");
CHECK_CHAR_INVALID("'ab'");
CHECK_CHAR_INVALID("'\\'");
}
TEST_LIST = {
{"test_simple_char", test_simple_char},
{"test_escape_char", test_escape_char},
{"test_invalid_char", test_invalid_char},
{NULL, NULL},
};

View File

@@ -0,0 +1,55 @@
// test_identifier.c
#include <lex_parser.h>
#include <utest/acutest.h>
cbool check_identifier(const char *str, const char *expect, cstring_t *output) {
log_set_level(&__default_logger_root, 0);
core_pos_t pos = core_pos_init();
core_mem_stream_t mem_stream;
core_stream_t *stream =
core_mem_stream_init(&mem_stream, str, smcc_strlen(str), false);
cbool ret = lex_parse_identifier(stream, &pos, output);
if (ret && expect) {
return strcmp(output->data, expect) == 0;
}
return ret;
}
#define CHECK_IDENTIFIER_VALID(str, expect) \
do { \
cstring_t _output = cstring_new(); \
cbool ret = check_identifier(str, expect, &_output); \
TEST_CHECK(ret == true); \
TEST_CHECK(strcmp(_output.data, expect) == 0); \
cstring_free(&_output); \
} while (0)
#define CHECK_IDENTIFIER_INVALID(str) \
do { \
cstring_t _output = cstring_new(); \
cbool ret = check_identifier(str, NULL, &_output); \
TEST_CHECK(ret == false); \
cstring_free(&_output); \
} while (0)
void test_valid_identifier(void) {
TEST_CASE("valid identifiers");
CHECK_IDENTIFIER_VALID("variable", "variable");
CHECK_IDENTIFIER_VALID("my_var", "my_var");
CHECK_IDENTIFIER_VALID("_private", "_private");
CHECK_IDENTIFIER_VALID("Var123", "Var123");
CHECK_IDENTIFIER_VALID("a", "a");
}
void test_invalid_identifier(void) {
TEST_CASE("invalid identifiers");
CHECK_IDENTIFIER_INVALID("");
CHECK_IDENTIFIER_INVALID("123var");
}
TEST_LIST = {
{"test_valid_identifier", test_valid_identifier},
{"test_invalid_identifier", test_invalid_identifier},
{NULL, NULL},
};

View File

@@ -0,0 +1,132 @@
#include <lex_parser.h>
#include <utest/acutest.h>
cbool check(const char *str, usize expect, usize *output) {
// TODO maybe have other logger
log_set_level(&__default_logger_root, 0);
core_pos_t pos = core_pos_init();
core_mem_stream_t mem_stream;
core_stream_t *stream =
core_mem_stream_init(&mem_stream, str, smcc_strlen(str), false);
return lex_parse_number(stream, &pos, output);
}
#define CHECK_VALID(str, expect) \
do { \
usize _output; \
cbool ret = check(str, expect, &_output); \
TEST_CHECK(ret == true); \
TEST_CHECK(_output == expect); \
} while (0)
#define CHECK_INVALID(str) \
do { \
usize _output; \
cbool ret = check(str, 0, &_output); \
TEST_CHECK(ret == false); \
} while (0)
void test_simple_hex(void) {
TEST_CASE("lowercase hex");
CHECK_VALID("0xff", 255);
CHECK_VALID("0x0", 0);
CHECK_VALID("0xa", 10);
CHECK_VALID("0xf", 15);
CHECK_VALID("0x1a", 26);
TEST_CASE("uppercase hex");
CHECK_VALID("0xFF", 255);
CHECK_VALID("0xA0", 160);
CHECK_VALID("0xCAFEBABE", 3405691582);
TEST_CASE("mixed case hex");
CHECK_VALID("0xFf", 255);
CHECK_VALID("0xCaFeBaBe", 3405691582);
TEST_CASE("larger hex values");
CHECK_VALID("0xff00", 65280);
CHECK_VALID("0xFFFF", 65535);
TEST_CASE("invalid hex");
CHECK_INVALID("0xG"); // Invalid hex digit
CHECK_INVALID("0xyz"); // Invalid prefix
CHECK_INVALID("0x"); // Incomplete hex
}
void test_simple_oct(void) {
TEST_CASE("basic octal");
CHECK_VALID("00", 0);
CHECK_VALID("01", 1);
CHECK_VALID("07", 7);
TEST_CASE("multi-digit octal");
CHECK_VALID("010", 8);
CHECK_VALID("017", 15);
CHECK_VALID("077", 63);
TEST_CASE("larger octal values");
CHECK_VALID("0177", 127);
CHECK_VALID("0377", 255);
CHECK_VALID("0777", 511);
TEST_CASE("invalid octal");
CHECK_INVALID("08"); // Invalid octal digit
CHECK_INVALID("09"); // Invalid octal digit
}
void test_simple_dec(void) {
TEST_CASE("single digits");
CHECK_VALID("0", 0);
CHECK_VALID("1", 1);
CHECK_VALID("9", 9);
TEST_CASE("multi-digit decimal");
CHECK_VALID("10", 10);
CHECK_VALID("42", 42);
CHECK_VALID("123", 123);
TEST_CASE("larger decimal values");
CHECK_VALID("999", 999);
CHECK_VALID("1234", 1234);
CHECK_VALID("65535", 65535);
}
void test_simple_bin(void) {
TEST_CASE("basic binary");
CHECK_VALID("0b0", 0);
CHECK_VALID("0b1", 1);
TEST_CASE("multi-digit binary");
CHECK_VALID("0b10", 2);
CHECK_VALID("0b11", 3);
CHECK_VALID("0b100", 4);
CHECK_VALID("0b1010", 10);
TEST_CASE("larger binary values");
CHECK_VALID("0b1111", 15);
CHECK_VALID("0b11111111", 255);
CHECK_VALID("0b10101010", 170);
TEST_CASE("invalid binary");
CHECK_INVALID("0b2"); // Invalid binary digit
CHECK_INVALID("0b3"); // Invalid binary digit
CHECK_INVALID("0b"); // Incomplete binary
}
void test_edge_cases(void) {
TEST_CASE("empty string");
CHECK_INVALID(""); // Empty string
TEST_CASE("non-numeric strings");
CHECK_INVALID("abc"); // Non-numeric
CHECK_INVALID("xyz"); // Non-numeric
TEST_CASE("mixed invalid formats");
CHECK_INVALID("0x1G"); // Mixed valid/invalid hex
CHECK_INVALID("0b12"); // Mixed valid/invalid binary
}
TEST_LIST = {
{"test_simple_hex", test_simple_hex}, {"test_simple_oct", test_simple_oct},
{"test_simple_dec", test_simple_dec}, {"test_simple_bin", test_simple_bin},
{"test_edge_cases", test_edge_cases}, {NULL, NULL},
};

View File

@@ -0,0 +1,50 @@
// test_skip_block_comment.c
#include <lex_parser.h>
#include <utest/acutest.h>
void check_skip_block_comment(const char *str, const char *expect_remaining) {
log_set_level(&__default_logger_root, 0);
core_pos_t pos = core_pos_init();
core_mem_stream_t mem_stream;
core_stream_t *stream =
core_mem_stream_init(&mem_stream, str, smcc_strlen(str), false);
lex_parse_skip_block_comment(stream, &pos);
// Check remaining content
char buffer[256] = {0};
int i = 0;
int ch;
while ((ch = core_stream_next_char(stream)) != core_stream_eof && i < 255) {
buffer[i++] = (char)ch;
}
if (expect_remaining) {
TEST_CHECK(strcmp(buffer, expect_remaining) == 0);
}
}
void test_simple_block_comment(void) {
TEST_CASE("simple block comments");
check_skip_block_comment("/* comment */", "");
check_skip_block_comment("/* comment */ int x;", " int x;");
}
void test_multiline_block_comment(void) {
TEST_CASE("multiline block comments");
check_skip_block_comment("/* line1\nline2 */", "");
check_skip_block_comment("/* line1\nline2 */ int x;", " int x;");
}
void test_nested_asterisk_block_comment(void) {
TEST_CASE("nested asterisk block comments");
check_skip_block_comment("/* *** */", "");
check_skip_block_comment("/* *** */ int x;", " int x;");
}
TEST_LIST = {
{"test_simple_block_comment", test_simple_block_comment},
{"test_multiline_block_comment", test_multiline_block_comment},
{"test_nested_asterisk_block_comment", test_nested_asterisk_block_comment},
{NULL, NULL},
};

View File

@@ -0,0 +1,49 @@
// test_skip_line.c
#include <lex_parser.h>
#include <utest/acutest.h>
void check_skip_line(const char *str, const char *expect_remaining) {
log_set_level(&__default_logger_root, 0);
core_pos_t pos = core_pos_init();
core_mem_stream_t mem_stream;
core_stream_t *stream =
core_mem_stream_init(&mem_stream, str, smcc_strlen(str), false);
lex_parse_skip_line(stream, &pos);
// Check remaining content
char buffer[256] = {0};
int i = 0;
int ch;
while ((ch = core_stream_next_char(stream)) != core_stream_eof && i < 255) {
buffer[i++] = (char)ch;
}
if (expect_remaining) {
TEST_CHECK(strcmp(buffer, expect_remaining) == 0);
}
}
void test_simple_line_comment(void) {
TEST_CASE("simple line comments");
check_skip_line("// comment\n", "");
check_skip_line("// comment\nint x;", "int x;");
}
void test_crlf_line_comment(void) {
TEST_CASE("CRLF line comments");
check_skip_line("// comment\r\n", "");
check_skip_line("// comment\r\nint x;", "int x;");
}
void test_eof_line_comment(void) {
TEST_CASE("EOF line comments");
check_skip_line("// comment", "");
}
TEST_LIST = {
{"test_simple_line_comment", test_simple_line_comment},
{"test_crlf_line_comment", test_crlf_line_comment},
{"test_eof_line_comment", test_eof_line_comment},
{NULL, NULL},
};

View File

@@ -0,0 +1,62 @@
// test_string.c
#include <lex_parser.h>
#include <utest/acutest.h>
cbool check_string(const char *str, const char *expect, cstring_t *output) {
log_set_level(&__default_logger_root, 0);
core_pos_t pos = core_pos_init();
core_mem_stream_t mem_stream;
core_stream_t *stream =
core_mem_stream_init(&mem_stream, str, smcc_strlen(str), false);
cbool ret = lex_parse_string(stream, &pos, output);
if (ret && expect) {
return strcmp(output->data, expect) == 0;
}
return ret;
}
#define CHECK_STRING_VALID(str, expect) \
do { \
cstring_t _output = cstring_new(); \
cbool ret = check_string(str, expect, &_output); \
TEST_CHECK(ret == true); \
TEST_CHECK(strcmp(_output.data, expect) == 0); \
cstring_free(&_output); \
} while (0)
#define CHECK_STRING_INVALID(str) \
do { \
cstring_t _output = cstring_new(); \
cbool ret = check_string(str, NULL, &_output); \
TEST_CHECK(ret == false); \
cstring_free(&_output); \
} while (0)
void test_simple_string(void) {
TEST_CASE("simple strings");
CHECK_STRING_VALID("\"\"", "");
CHECK_STRING_VALID("\"hello\"", "hello");
CHECK_STRING_VALID("\"hello world\"", "hello world");
}
void test_escape_string(void) {
TEST_CASE("escape strings");
CHECK_STRING_VALID("\"\\n\"", "\n");
CHECK_STRING_VALID("\"\\t\"", "\t");
CHECK_STRING_VALID("\"\\\"\"", "\"");
CHECK_STRING_VALID("\"Hello\\nWorld\"", "Hello\nWorld");
}
void test_invalid_string(void) {
TEST_CASE("invalid strings");
CHECK_STRING_INVALID("\"unterminated");
CHECK_STRING_INVALID("\"newline\n\"");
}
TEST_LIST = {
{"test_simple_string", test_simple_string},
{"test_escape_string", test_escape_string},
{"test_invalid_string", test_invalid_string},
{NULL, NULL},
};