feat(lex_parser): 初始化词法解析器模块

新增词法解析器库 `smcc_lex_parser`，包含基础的词法规则解析功能： - 支持字符、字符串、数字、标识符的解析 - 支持跳过注释、空白符、行尾等辅助函数 - 提供对应的单元测试用例，覆盖各类合法与非法输入情况该模块依赖 `libcore`，并被 `smcc_lex` 模块引用以支持更上层的词法分析逻辑。
2025-11-23 22:53:46 +08:00
parent 67af0c6bf2
commit 871d031ceb
18 changed files with 996 additions and 392 deletions
--- a/libs/lex_parser/cbuild.toml
+++ b/libs/lex_parser/cbuild.toml
@@ -0,0 +1,5 @@
+[package]
+name = "smcc_lex_parser"
+version = "0.1.0"
+
+dependencies = [{ name = "libcore", path = "../../runtime/libcore" }]
--- a/libs/lex_parser/include/lex_parser.h
+++ b/libs/lex_parser/include/lex_parser.h
@@ -0,0 +1,17 @@
+#ifndef __SMCC_LEX_PARSER_H__
+#define __SMCC_LEX_PARSER_H__
+
+#include <libcore.h>
+
+int lex_parse_char(core_stream_t *input, core_pos_t *pos);
+cbool lex_parse_string(core_stream_t *input, core_pos_t *pos,
+                       cstring_t *output);
+cbool lex_parse_number(core_stream_t *input, core_pos_t *pos, usize *output);
+cbool lex_parse_identifier(core_stream_t *input, core_pos_t *pos,
+                           cstring_t *output);
+void lex_parse_skip_endline(core_stream_t *input, core_pos_t *pos);
+void lex_parse_skip_block_comment(core_stream_t *input, core_pos_t *pos);
+void lex_parse_skip_line(core_stream_t *input, core_pos_t *pos);
+void lex_parse_skip_whitespace(core_stream_t *input, core_pos_t *pos);
+
+#endif /* __SMCC_LEX_PARSER_H__ */
--- a/libs/lex_parser/src/lex_parser.c
+++ b/libs/lex_parser/src/lex_parser.c
@@ -0,0 +1,406 @@
+#include <lex_parser.h>
+
+static inline cbool is_next_line(int ch) { return ch == '\n' || ch == '\r'; }
+
+void lex_parse_skip_endline(core_stream_t *input, core_pos_t *pos) {
+    core_stream_reset_char(input);
+    int ch = core_stream_peek_char(input);
+    if (ch == '\r') {
+        core_stream_next_char(input);
+        ch = core_stream_peek_char(input);
+        if (ch == '\n') {
+            core_stream_next_char(input);
+        }
+        core_pos_next_line(pos);
+    } else if (ch == '\n') {
+        core_stream_next_char(input);
+        core_pos_next_line(pos);
+    } else {
+        LOG_WARN("not a newline character");
+    }
+}
+
+/**
+ * @brief
+ *
+ * @param ch
+ * @return int
+ * https://cppreference.cn/w/c/language/escape
+ * `\'`	单引号	在 ASCII 编码中为字节 0x27
+ * `\"`	双引号	在 ASCII 编码中为字节 0x22
+ * `\?`	问号	在 ASCII 编码中为字节 0x3f
+ * `\\`	反斜杠	在 ASCII 编码中为字节 0x5c
+ * `\a`	响铃	在 ASCII 编码中为字节 0x07
+ * `\b`	退格	在 ASCII 编码中为字节 0x08
+ * `\f`	换页 - 新页	在 ASCII 编码中为字节 0x0c
+ * `\n`	换行 - 新行	在 ASCII 编码中为字节 0x0a
+ * `\r`	回车	在 ASCII 编码中为字节 0x0d
+ * `\t`	水平制表符	在 ASCII 编码中为字节 0x09
+ * `\v`	垂直制表符	在 ASCII 编码中为字节 0x0b
+ */
+static inline int got_simple_escape(int ch) {
+    /* clang-format off */
+    #define CASE(ch) case ch: return ch;
+    switch (ch) {
+    case '\'':  return '\'';
+    case '\"':  return '\"';
+    case '\?':  return '\?';
+    case '\\':  return '\\';
+    case 'a':   return '\a';
+    case 'b':   return '\b';
+    case 'f':   return '\f';
+    case 'n':   return '\n';
+    case 'r':   return '\r';
+    case 't':   return '\t';
+    case 'v':   return '\v';
+    default:    return -1;
+    }
+    /* clang-format on */
+}
+
+void lex_parse_skip_line(core_stream_t *input, core_pos_t *pos) {
+    core_stream_t *stream = input;
+    Assert(stream != null);
+    core_stream_reset_char(stream);
+    while (1) {
+        int ch = core_stream_peek_char(stream);
+
+        if (ch == core_stream_eof) {
+            return;
+        }
+
+        // TODO endline
+        if (is_next_line(ch)) {
+            lex_parse_skip_endline(stream, pos);
+            return;
+        } else {
+            core_stream_next_char(stream);
+            core_pos_next(pos);
+        }
+    }
+}
+
+void lex_parse_skip_block_comment(core_stream_t *input, core_pos_t *pos) {
+    core_stream_t *stream = input;
+    int ch;
+    core_stream_reset_char(stream);
+    ch = core_stream_next_char(stream);
+    core_pos_next(pos);
+    // FIXME Assertion
+    Assert(ch == '/');
+    ch = core_stream_next_char(stream);
+    core_pos_next(pos);
+    Assert(ch == '*');
+
+    // all ready match `/*`
+    while (1) {
+        core_stream_reset_char(stream);
+        ch = core_stream_peek_char(stream);
+
+        if (ch == core_stream_eof) {
+            LOG_WARN("Unterminated block comment");
+            return;
+        }
+
+        if (is_next_line(ch)) {
+            lex_parse_skip_endline(stream, pos);
+            continue;
+        }
+        core_stream_next_char(stream);
+        core_pos_next(pos);
+        if (ch == '*') {
+            ch = core_stream_peek_char(stream);
+            if (ch == '/') {
+                core_stream_next_char(stream);
+                core_pos_next(pos);
+                return;
+            }
+        }
+    }
+}
+
+void lex_parse_skip_whitespace(core_stream_t *input, core_pos_t *pos) {
+    core_stream_t *stream = input;
+    Assert(stream != null);
+    core_stream_reset_char(stream);
+    while (1) {
+        int ch = core_stream_next_char(stream);
+
+        if (ch == core_stream_eof) {
+            return;
+        }
+
+        core_pos_next(pos);
+    }
+}
+
+static inline cbool _lex_parse_uint(core_stream_t *input, core_pos_t *pos,
+                                    int base, usize *output) {
+    Assert(input != null && pos != null);
+    if (input == null || pos == null) {
+        return false;
+    }
+    Assert(base == 2 || base == 8 || base == 10 || base == 16);
+    core_stream_reset_char(input);
+    int ch, tmp;
+    usize n = 0;
+    usize offset = pos->offset;
+    while (1) {
+        ch = core_stream_peek_char(input);
+
+        if (ch == core_stream_eof) {
+            break;
+        } else if (ch >= 'a' && ch <= 'z') {
+            tmp = ch - 'a' + 10;
+        } else if (ch >= 'A' && ch <= 'Z') {
+            tmp = ch - 'A' + 10;
+        } else if (ch >= '0' && ch <= '9') {
+            tmp = ch - '0';
+        } else {
+            break;
+        }
+
+        if (tmp >= base) {
+            LOG_ERROR("Invalid digit");
+            return false;
+        }
+
+        core_stream_next_char(input);
+        core_pos_next(pos);
+        n = n * base + tmp;
+        // TODO number overflow
+    }
+    if (offset == pos->offset) {
+        // None match any number
+        return false;
+    }
+    *output = n;
+    return true;
+}
+
+/**
+ * @brief
+ *
+ * @param input
+ * @param pos
+ * @return int
+ * https://cppreference.cn/w/c/language/character_constant
+ */
+int lex_parse_char(core_stream_t *input, core_pos_t *pos) {
+    core_stream_t *stream = input;
+    core_stream_reset_char(stream);
+    int ch = core_stream_peek_char(stream);
+    int ret = core_stream_eof;
+
+    if (ch == core_stream_eof) {
+        LOG_WARN("Unexpected EOF at begin");
+        goto ERR;
+    } else if (ch != '\'') {
+        LOG_WARN("Unexpected character '%c' at begin", ch);
+        goto ERR;
+    }
+    core_stream_next_char(stream);
+    core_pos_next(pos);
+
+    ch = core_stream_next_char(stream);
+    core_pos_next(pos);
+
+    if (ch == core_stream_eof) {
+        LOG_WARN("Unexpected EOF at middle");
+        goto ERR;
+    } else if (ch == '\\') {
+        ch = core_stream_next_char(stream);
+        core_pos_next(pos);
+        if (ch == '0') {
+            // 数字转义序列
+            // \nnn	任意八进制值	码元 nnn
+            // FIXME 这里如果返回 0 理论上为错误但是恰好与正确值相同
+            ret = 0;
+            _lex_parse_uint(stream, pos, 8, (usize *)&ret);
+        } else if (ch == 'x') {
+            // TODO https://cppreference.cn/w/c/language/escape
+            // \xn...	任意十六进制值	码元 n... (任意数量的十六进制数字)
+            // 通用字符名
+            TODO();
+        } else if (ch == 'u' || ch == 'U') {
+            // \unnnn (C99 起)	Unicode 值在允许范围内；
+            // 可能产生多个码元	码点 U+nnnn
+            // \Unnnnnnnn (C99 起)	Unicode 值在允许范围内；
+            // 可能产生多个码元	码点 U+nnnnnnnn
+            TODO();
+        } else if ((ret = got_simple_escape(ch)) == -1) {
+            LOG_ERROR("Invalid escape character");
+            goto ERR;
+        }
+    } else {
+        ret = ch;
+    }
+    if ((ch = core_stream_next_char(stream)) != '\'') {
+        LOG_ERROR("Unclosed character literal '%c' at end, expect `'`", ch);
+        core_pos_next(pos);
+        goto ERR;
+    }
+
+    return ret;
+ERR:
+    return core_stream_eof;
+}
+
+/**
+ * @brief
+ *
+ * @param input
+ * @param pos
+ * @param output
+ * @return cbool
+ * https://cppreference.cn/w/c/language/string_literal
+ */
+cbool lex_parse_string(core_stream_t *input, core_pos_t *pos,
+                       cstring_t *output) {
+    core_stream_t *stream = input;
+    core_stream_reset_char(stream);
+    int ch = core_stream_peek_char(stream);
+
+    Assert(cstring_is_empty(output));
+    if (ch == core_stream_eof) {
+        LOG_WARN("Unexpected EOF at begin");
+        goto ERR;
+    } else if (ch != '"') {
+        LOG_WARN("Unexpected character '%c' at begin", ch);
+        goto ERR;
+    }
+    core_stream_next_char(stream);
+    core_pos_next(pos);
+
+    cstring_t str = cstring_from_cstr("");
+    while (1) {
+        ch = core_stream_peek_char(stream);
+
+        if (ch == core_stream_eof) {
+            LOG_ERROR("Unexpected EOF at string literal");
+            goto ERR;
+        } else if (is_next_line(ch)) {
+            LOG_ERROR("Unexpected newline at string literal");
+            goto ERR;
+        } else if (ch == '\\') {
+            // TODO bad practice and maybe bugs here
+            core_stream_next_char(stream);
+            ch = core_stream_next_char(stream);
+            int val = got_simple_escape(ch);
+            if (val == -1) {
+                LOG_ERROR("Invalid escape character it is \\%c [%d]", ch, ch);
+            } else {
+                cstring_push(&str, val);
+                continue;
+            }
+        } else if (ch == '"') {
+            core_stream_next_char(stream);
+            core_pos_next(pos);
+            break;
+        }
+
+        core_stream_next_char(stream);
+        core_pos_next(pos);
+        cstring_push(&str, ch);
+    }
+
+    *output = str;
+    return true;
+ERR:
+    cstring_free(&str);
+    return false;
+}
+
+/**
+ * @brief
+ *
+ * @param input
+ * @param pos
+ * @param output
+ * @return cbool
+ * https://cppreference.cn/w/c/language/integer_constant
+ */
+cbool lex_parse_number(core_stream_t *input, core_pos_t *pos, usize *output) {
+    core_stream_t *stream = input;
+    core_stream_reset_char(stream);
+    int ch = core_stream_peek_char(stream);
+    int base = 0;
+    if (ch == core_stream_eof) {
+        LOG_WARN("Unexpected EOF at begin");
+        goto ERR;
+    } else if (ch == '0') {
+        ch = core_stream_peek_char(stream);
+        if (ch == 'x' || ch == 'X') {
+            base = 16;
+            core_stream_next_char(stream);
+            core_pos_next(pos);
+            core_stream_next_char(stream);
+            core_pos_next(pos);
+        } else if (ch == 'b' || ch == 'B') {
+            // FIXME C23 external integer base
+            base = 2;
+            core_stream_next_char(stream);
+            core_pos_next(pos);
+            core_stream_next_char(stream);
+            core_pos_next(pos);
+        } else if (ch >= '0' && ch <= '7') {
+            base = 8;
+            core_stream_next_char(stream);
+            core_pos_next(pos);
+        } else if (ch == '9' || ch == '8') {
+            LOG_ERROR("Invalid digit '%d' in octal literal", ch);
+            return false;
+        } else {
+            base = 10;
+        }
+    } else {
+        base = 10;
+    }
+
+    // 解析整数部分
+    core_stream_reset_char(stream);
+    usize n;
+    if (_lex_parse_uint(stream, pos, base, &n) == false) {
+        return false;
+    }
+    *output = n;
+    return true;
+ERR:
+    return false;
+}
+
+/**
+ * @brief
+ *
+ * @param input
+ * @param pos
+ * @param output
+ * @return cbool
+ * https://cppreference.cn/w/c/language/identifier
+ */
+cbool lex_parse_identifier(core_stream_t *input, core_pos_t *pos,
+                           cstring_t *output) {
+    Assert(cstring_is_empty(output));
+    core_stream_t *stream = input;
+    core_stream_reset_char(stream);
+    int ch = core_stream_peek_char(stream);
+
+    if (ch == core_stream_eof) {
+        LOG_WARN("Unexpected EOF at begin");
+    } else if (ch == '_' || (ch >= 'a' && ch <= 'z') ||
+               (ch >= 'A' && ch <= 'Z')) {
+        while (1) {
+            cstring_push(output, ch);
+            core_stream_next_char(stream);
+            core_pos_next(pos);
+            ch = core_stream_peek_char(stream);
+            if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') ||
+                (ch == '_') || (ch >= '0' && ch <= '9')) {
+                continue;
+            }
+            break;
+        }
+        return true;
+    }
+    return false;
+}
--- a/libs/lex_parser/tests/test_char.c
+++ b/libs/lex_parser/tests/test_char.c
@@ -0,0 +1,60 @@
+// test_char.c
+#include <lex_parser.h>
+#include <utest/acutest.h>
+
+cbool check_char(const char *str, int expect, int *output) {
+    log_set_level(&__default_logger_root, 0);
+    core_pos_t pos = core_pos_init();
+    core_mem_stream_t mem_stream;
+    core_stream_t *stream =
+        core_mem_stream_init(&mem_stream, str, smcc_strlen(str), false);
+    *output = lex_parse_char(stream, &pos);
+    return *output == expect;
+}
+
+#define CHECK_CHAR_VALID(str, expect)                                          \
+    do {                                                                       \
+        int _output;                                                           \
+        cbool ret = check_char(str, expect, &_output);                         \
+        TEST_CHECK(ret == true);                                               \
+    } while (0)
+
+#define CHECK_CHAR_INVALID(str)                                                \
+    do {                                                                       \
+        int _output;                                                           \
+        check_char(str, core_stream_eof, &_output);                            \
+        TEST_CHECK(_output == core_stream_eof);                                \
+    } while (0)
+
+void test_simple_char(void) {
+    TEST_CASE("simple chars");
+    CHECK_CHAR_VALID("'a'", 'a');
+    CHECK_CHAR_VALID("'Z'", 'Z');
+    CHECK_CHAR_VALID("'0'", '0');
+    CHECK_CHAR_VALID("' '", ' ');
+}
+
+void test_escape_char(void) {
+    TEST_CASE("escape chars");
+    CHECK_CHAR_VALID("'\\n'", '\n');
+    CHECK_CHAR_VALID("'\\t'", '\t');
+    CHECK_CHAR_VALID("'\\r'", '\r');
+    CHECK_CHAR_VALID("'\\\\'", '\\');
+    CHECK_CHAR_VALID("'\\''", '\'');
+    CHECK_CHAR_VALID("'\\\"'", '\"');
+}
+
+void test_invalid_char(void) {
+    TEST_CASE("invalid chars");
+    CHECK_CHAR_INVALID("'");
+    CHECK_CHAR_INVALID("''");
+    CHECK_CHAR_INVALID("'ab'");
+    CHECK_CHAR_INVALID("'\\'");
+}
+
+TEST_LIST = {
+    {"test_simple_char", test_simple_char},
+    {"test_escape_char", test_escape_char},
+    {"test_invalid_char", test_invalid_char},
+    {NULL, NULL},
+};
--- a/libs/lex_parser/tests/test_identifier.c
+++ b/libs/lex_parser/tests/test_identifier.c
@@ -0,0 +1,55 @@
+// test_identifier.c
+#include <lex_parser.h>
+#include <utest/acutest.h>
+
+cbool check_identifier(const char *str, const char *expect, cstring_t *output) {
+    log_set_level(&__default_logger_root, 0);
+    core_pos_t pos = core_pos_init();
+    core_mem_stream_t mem_stream;
+    core_stream_t *stream =
+        core_mem_stream_init(&mem_stream, str, smcc_strlen(str), false);
+
+    cbool ret = lex_parse_identifier(stream, &pos, output);
+    if (ret && expect) {
+        return strcmp(output->data, expect) == 0;
+    }
+    return ret;
+}
+
+#define CHECK_IDENTIFIER_VALID(str, expect)                                    \
+    do {                                                                       \
+        cstring_t _output = cstring_new();                                     \
+        cbool ret = check_identifier(str, expect, &_output);                   \
+        TEST_CHECK(ret == true);                                               \
+        TEST_CHECK(strcmp(_output.data, expect) == 0);                         \
+        cstring_free(&_output);                                                \
+    } while (0)
+
+#define CHECK_IDENTIFIER_INVALID(str)                                          \
+    do {                                                                       \
+        cstring_t _output = cstring_new();                                     \
+        cbool ret = check_identifier(str, NULL, &_output);                     \
+        TEST_CHECK(ret == false);                                              \
+        cstring_free(&_output);                                                \
+    } while (0)
+
+void test_valid_identifier(void) {
+    TEST_CASE("valid identifiers");
+    CHECK_IDENTIFIER_VALID("variable", "variable");
+    CHECK_IDENTIFIER_VALID("my_var", "my_var");
+    CHECK_IDENTIFIER_VALID("_private", "_private");
+    CHECK_IDENTIFIER_VALID("Var123", "Var123");
+    CHECK_IDENTIFIER_VALID("a", "a");
+}
+
+void test_invalid_identifier(void) {
+    TEST_CASE("invalid identifiers");
+    CHECK_IDENTIFIER_INVALID("");
+    CHECK_IDENTIFIER_INVALID("123var");
+}
+
+TEST_LIST = {
+    {"test_valid_identifier", test_valid_identifier},
+    {"test_invalid_identifier", test_invalid_identifier},
+    {NULL, NULL},
+};
--- a/libs/lex_parser/tests/test_number.c
+++ b/libs/lex_parser/tests/test_number.c
@@ -0,0 +1,132 @@
+#include <lex_parser.h>
+#include <utest/acutest.h>
+cbool check(const char *str, usize expect, usize *output) {
+    // TODO maybe have other logger
+    log_set_level(&__default_logger_root, 0);
+    core_pos_t pos = core_pos_init();
+    core_mem_stream_t mem_stream;
+    core_stream_t *stream =
+        core_mem_stream_init(&mem_stream, str, smcc_strlen(str), false);
+    return lex_parse_number(stream, &pos, output);
+}
+
+#define CHECK_VALID(str, expect)                                               \
+    do {                                                                       \
+        usize _output;                                                         \
+        cbool ret = check(str, expect, &_output);                              \
+        TEST_CHECK(ret == true);                                               \
+        TEST_CHECK(_output == expect);                                         \
+    } while (0)
+
+#define CHECK_INVALID(str)                                                     \
+    do {                                                                       \
+        usize _output;                                                         \
+        cbool ret = check(str, 0, &_output);                                   \
+        TEST_CHECK(ret == false);                                              \
+    } while (0)
+
+void test_simple_hex(void) {
+    TEST_CASE("lowercase hex");
+    CHECK_VALID("0xff", 255);
+    CHECK_VALID("0x0", 0);
+    CHECK_VALID("0xa", 10);
+    CHECK_VALID("0xf", 15);
+    CHECK_VALID("0x1a", 26);
+
+    TEST_CASE("uppercase hex");
+    CHECK_VALID("0xFF", 255);
+    CHECK_VALID("0xA0", 160);
+    CHECK_VALID("0xCAFEBABE", 3405691582);
+
+    TEST_CASE("mixed case hex");
+    CHECK_VALID("0xFf", 255);
+    CHECK_VALID("0xCaFeBaBe", 3405691582);
+
+    TEST_CASE("larger hex values");
+    CHECK_VALID("0xff00", 65280);
+    CHECK_VALID("0xFFFF", 65535);
+
+    TEST_CASE("invalid hex");
+    CHECK_INVALID("0xG");  // Invalid hex digit
+    CHECK_INVALID("0xyz"); // Invalid prefix
+    CHECK_INVALID("0x");   // Incomplete hex
+}
+
+void test_simple_oct(void) {
+    TEST_CASE("basic octal");
+    CHECK_VALID("00", 0);
+    CHECK_VALID("01", 1);
+    CHECK_VALID("07", 7);
+
+    TEST_CASE("multi-digit octal");
+    CHECK_VALID("010", 8);
+    CHECK_VALID("017", 15);
+    CHECK_VALID("077", 63);
+
+    TEST_CASE("larger octal values");
+    CHECK_VALID("0177", 127);
+    CHECK_VALID("0377", 255);
+    CHECK_VALID("0777", 511);
+
+    TEST_CASE("invalid octal");
+    CHECK_INVALID("08"); // Invalid octal digit
+    CHECK_INVALID("09"); // Invalid octal digit
+}
+
+void test_simple_dec(void) {
+    TEST_CASE("single digits");
+    CHECK_VALID("0", 0);
+    CHECK_VALID("1", 1);
+    CHECK_VALID("9", 9);
+
+    TEST_CASE("multi-digit decimal");
+    CHECK_VALID("10", 10);
+    CHECK_VALID("42", 42);
+    CHECK_VALID("123", 123);
+
+    TEST_CASE("larger decimal values");
+    CHECK_VALID("999", 999);
+    CHECK_VALID("1234", 1234);
+    CHECK_VALID("65535", 65535);
+}
+
+void test_simple_bin(void) {
+    TEST_CASE("basic binary");
+    CHECK_VALID("0b0", 0);
+    CHECK_VALID("0b1", 1);
+
+    TEST_CASE("multi-digit binary");
+    CHECK_VALID("0b10", 2);
+    CHECK_VALID("0b11", 3);
+    CHECK_VALID("0b100", 4);
+    CHECK_VALID("0b1010", 10);
+
+    TEST_CASE("larger binary values");
+    CHECK_VALID("0b1111", 15);
+    CHECK_VALID("0b11111111", 255);
+    CHECK_VALID("0b10101010", 170);
+
+    TEST_CASE("invalid binary");
+    CHECK_INVALID("0b2"); // Invalid binary digit
+    CHECK_INVALID("0b3"); // Invalid binary digit
+    CHECK_INVALID("0b");  // Incomplete binary
+}
+
+void test_edge_cases(void) {
+    TEST_CASE("empty string");
+    CHECK_INVALID(""); // Empty string
+
+    TEST_CASE("non-numeric strings");
+    CHECK_INVALID("abc"); // Non-numeric
+    CHECK_INVALID("xyz"); // Non-numeric
+
+    TEST_CASE("mixed invalid formats");
+    CHECK_INVALID("0x1G"); // Mixed valid/invalid hex
+    CHECK_INVALID("0b12"); // Mixed valid/invalid binary
+}
+
+TEST_LIST = {
+    {"test_simple_hex", test_simple_hex}, {"test_simple_oct", test_simple_oct},
+    {"test_simple_dec", test_simple_dec}, {"test_simple_bin", test_simple_bin},
+    {"test_edge_cases", test_edge_cases}, {NULL, NULL},
+};
--- a/libs/lex_parser/tests/test_skip_block_comment.c
+++ b/libs/lex_parser/tests/test_skip_block_comment.c
@@ -0,0 +1,50 @@
+// test_skip_block_comment.c
+#include <lex_parser.h>
+#include <utest/acutest.h>
+
+void check_skip_block_comment(const char *str, const char *expect_remaining) {
+    log_set_level(&__default_logger_root, 0);
+    core_pos_t pos = core_pos_init();
+    core_mem_stream_t mem_stream;
+    core_stream_t *stream =
+        core_mem_stream_init(&mem_stream, str, smcc_strlen(str), false);
+
+    lex_parse_skip_block_comment(stream, &pos);
+
+    // Check remaining content
+    char buffer[256] = {0};
+    int i = 0;
+    int ch;
+    while ((ch = core_stream_next_char(stream)) != core_stream_eof && i < 255) {
+        buffer[i++] = (char)ch;
+    }
+
+    if (expect_remaining) {
+        TEST_CHECK(strcmp(buffer, expect_remaining) == 0);
+    }
+}
+
+void test_simple_block_comment(void) {
+    TEST_CASE("simple block comments");
+    check_skip_block_comment("/* comment */", "");
+    check_skip_block_comment("/* comment */ int x;", " int x;");
+}
+
+void test_multiline_block_comment(void) {
+    TEST_CASE("multiline block comments");
+    check_skip_block_comment("/* line1\nline2 */", "");
+    check_skip_block_comment("/* line1\nline2 */ int x;", " int x;");
+}
+
+void test_nested_asterisk_block_comment(void) {
+    TEST_CASE("nested asterisk block comments");
+    check_skip_block_comment("/* *** */", "");
+    check_skip_block_comment("/* *** */ int x;", " int x;");
+}
+
+TEST_LIST = {
+    {"test_simple_block_comment", test_simple_block_comment},
+    {"test_multiline_block_comment", test_multiline_block_comment},
+    {"test_nested_asterisk_block_comment", test_nested_asterisk_block_comment},
+    {NULL, NULL},
+};
--- a/libs/lex_parser/tests/test_skip_line.c
+++ b/libs/lex_parser/tests/test_skip_line.c
@@ -0,0 +1,49 @@
+// test_skip_line.c
+#include <lex_parser.h>
+#include <utest/acutest.h>
+
+void check_skip_line(const char *str, const char *expect_remaining) {
+    log_set_level(&__default_logger_root, 0);
+    core_pos_t pos = core_pos_init();
+    core_mem_stream_t mem_stream;
+    core_stream_t *stream =
+        core_mem_stream_init(&mem_stream, str, smcc_strlen(str), false);
+
+    lex_parse_skip_line(stream, &pos);
+
+    // Check remaining content
+    char buffer[256] = {0};
+    int i = 0;
+    int ch;
+    while ((ch = core_stream_next_char(stream)) != core_stream_eof && i < 255) {
+        buffer[i++] = (char)ch;
+    }
+
+    if (expect_remaining) {
+        TEST_CHECK(strcmp(buffer, expect_remaining) == 0);
+    }
+}
+
+void test_simple_line_comment(void) {
+    TEST_CASE("simple line comments");
+    check_skip_line("// comment\n", "");
+    check_skip_line("// comment\nint x;", "int x;");
+}
+
+void test_crlf_line_comment(void) {
+    TEST_CASE("CRLF line comments");
+    check_skip_line("// comment\r\n", "");
+    check_skip_line("// comment\r\nint x;", "int x;");
+}
+
+void test_eof_line_comment(void) {
+    TEST_CASE("EOF line comments");
+    check_skip_line("// comment", "");
+}
+
+TEST_LIST = {
+    {"test_simple_line_comment", test_simple_line_comment},
+    {"test_crlf_line_comment", test_crlf_line_comment},
+    {"test_eof_line_comment", test_eof_line_comment},
+    {NULL, NULL},
+};
--- a/libs/lex_parser/tests/test_string.c
+++ b/libs/lex_parser/tests/test_string.c
@@ -0,0 +1,62 @@
+// test_string.c
+#include <lex_parser.h>
+#include <utest/acutest.h>
+
+cbool check_string(const char *str, const char *expect, cstring_t *output) {
+    log_set_level(&__default_logger_root, 0);
+    core_pos_t pos = core_pos_init();
+    core_mem_stream_t mem_stream;
+    core_stream_t *stream =
+        core_mem_stream_init(&mem_stream, str, smcc_strlen(str), false);
+
+    cbool ret = lex_parse_string(stream, &pos, output);
+    if (ret && expect) {
+        return strcmp(output->data, expect) == 0;
+    }
+    return ret;
+}
+
+#define CHECK_STRING_VALID(str, expect)                                        \
+    do {                                                                       \
+        cstring_t _output = cstring_new();                                     \
+        cbool ret = check_string(str, expect, &_output);                       \
+        TEST_CHECK(ret == true);                                               \
+        TEST_CHECK(strcmp(_output.data, expect) == 0);                         \
+        cstring_free(&_output);                                                \
+    } while (0)
+
+#define CHECK_STRING_INVALID(str)                                              \
+    do {                                                                       \
+        cstring_t _output = cstring_new();                                     \
+        cbool ret = check_string(str, NULL, &_output);                         \
+        TEST_CHECK(ret == false);                                              \
+        cstring_free(&_output);                                                \
+    } while (0)
+
+void test_simple_string(void) {
+    TEST_CASE("simple strings");
+    CHECK_STRING_VALID("\"\"", "");
+    CHECK_STRING_VALID("\"hello\"", "hello");
+    CHECK_STRING_VALID("\"hello world\"", "hello world");
+}
+
+void test_escape_string(void) {
+    TEST_CASE("escape strings");
+    CHECK_STRING_VALID("\"\\n\"", "\n");
+    CHECK_STRING_VALID("\"\\t\"", "\t");
+    CHECK_STRING_VALID("\"\\\"\"", "\"");
+    CHECK_STRING_VALID("\"Hello\\nWorld\"", "Hello\nWorld");
+}
+
+void test_invalid_string(void) {
+    TEST_CASE("invalid strings");
+    CHECK_STRING_INVALID("\"unterminated");
+    CHECK_STRING_INVALID("\"newline\n\"");
+}
+
+TEST_LIST = {
+    {"test_simple_string", test_simple_string},
+    {"test_escape_string", test_escape_string},
+    {"test_invalid_string", test_invalid_string},
+    {NULL, NULL},
+};
--- a/libs/lexer/cbuild.toml
+++ b/libs/lexer/cbuild.toml
@@ -2,4 +2,7 @@
 name = "smcc_lex"
 version = "0.1.0"

-dependencies = [{ name = "libcore", path = "../../runtime/libcore" }]
+dependencies = [
+    { name = "libcore", path = "../../runtime/libcore" },
+    { name = "smcc_lex_parser", path = "../lex_parser" },
+]
--- a/libs/lexer/include/lexer.h
+++ b/libs/lexer/include/lexer.h
@@ -9,18 +9,10 @@
 #include "lexer_token.h"
 #include <libcore.h>

-typedef struct lexer_loc {
-    const char *name;
-    usize name_len;
-    usize line;
-    usize column;
-    usize offset;
-} lexer_loc_t;
-
 typedef struct lexer_token {
    token_type_t type;
    core_cvalue_t value;
-    lexer_loc_t loc;
+    core_pos_t loc;
 } lexer_tok_t;

 /**
@@ -30,7 +22,7 @@ typedef struct lexer_token {
 */
 typedef struct cc_lexer {
    core_stream_t *stream;
-    lexer_loc_t pos;
+    core_pos_t pos;
 } smcc_lexer_t;

 /**
--- a/libs/lexer/src/lexer.c
+++ b/libs/lexer/src/lexer.c
@@ -26,6 +26,7 @@ the distribution and installation instructions.
 Chris Fraser / cwf@aya.yale.edu
 David Hanson / drh@drhanson.net
 */
+#include <lex_parser.h>
 #include <lexer.h>
 #include <lexer_log.h>

@@ -76,303 +77,21 @@ static inline int keyword_cmp(const char *name, int len) {

 void lexer_init(smcc_lexer_t *lexer, core_stream_t *stream) {
    lexer->stream = stream;
-    lexer->pos = (lexer_loc_t){
-        .name = cstring_as_cstr(&stream->name),
-        .name_len = cstring_len(&stream->name),
-        .line = 1,
-        .column = 1,
-        .offset = 0,
-    };
+    lexer->pos = core_pos_init();
+    // FIXME
+    lexer->pos.name = cstring_from_cstr(cstring_as_cstr(&stream->name));
 }

-#define stream_reset_char(stream) ((stream)->reset_char(stream))
-#define stream_next_char(stream) ((stream)->next_char(stream))
-#define stream_peek_char(stream) ((stream)->peek_char(stream))
-#define lexer_next_pos(lexer) ((lexer)->pos.column++, (lexer)->pos.offset++)
-#define lexer_next_line(lexer) ((lexer)->pos.line++, (lexer)->pos.column = 1)
 #define set_err_token(token) ((token)->type = TOKEN_UNKNOWN)

-static void skip_newline(smcc_lexer_t *lexer, lexer_tok_t *token) {
-    core_stream_t *stream = lexer->stream;
-    token->type = TOKEN_LINE_COMMENT;
-
-    // 循环直到遇到换行符或文件结束
-    while (1) {
-        int ch = stream_next_char(stream);
-
-        if (ch == core_stream_eof) {
-            // 到达文件末尾，直接返回
-            return;
-        }
-
-        // 更新位置信息
-        lexer_next_pos(lexer);
-        if (ch == '\n') {
-            // 遇到换行符，增加行号并重置列号
-            lexer_next_line(lexer);
-            return;
-        }
-    }
-}
-
-static void skip_block_comment(smcc_lexer_t *lexer, lexer_tok_t *token) {
-    core_stream_t *stream = lexer->stream;
-    token->type = TOKEN_BLOCK_COMMENT;
-    int ch;
-
-    stream_reset_char(stream);
-    ch = stream_next_char(stream);
-    lexer_next_pos(lexer);
-    // FIXME Assertion
-    Assert(ch == '/');
-    ch = stream_next_char(stream);
-    lexer_next_pos(lexer);
-    Assert(ch == '*');
-    // 我们已经识别了 "/*"，现在需要找到 "*/"
-    while (1) {
-        ch = stream_next_char(stream);
-        lexer_next_pos(lexer);
-
-        if (ch == core_stream_eof) {
-            // 未闭合的块注释
-            LEX_WARN("Unterminated block comment");
-            return;
-        }
-
-        // LEX_ERROR("%c", ch);
-
-        // 更新位置信息
-        if (ch == '\n') {
-            lexer_next_line(lexer);
-        } else if (ch == '*') {
-            // 查看下一个字符是否是 '/'
-            int next_ch = stream_peek_char(stream);
-
-            if (next_ch == '/') {
-                // 消费 '/' 字符
-                stream_next_char(stream);
-
-                // 更新位置信息
-                lexer_next_pos(lexer);
-
-                // 成功找到注释结束标记
-                return;
-            }
-        }
-    }
-}
-
-// TODO escape character not enough
-static inline int got_slash(int peek) {
-    switch (peek) {
-    case '\\':
-        return '\\';
-    case '\'':
-        return '\'';
-    case '\"':
-        return '\"';
-    case '\?':
-        return '\?';
-    case '0':
-        return '\0';
-
-    case 'b':
-        return '\b';
-    case 'f':
-        return '\f';
-    case 'n':
-        return '\n';
-    case 'r':
-        return '\r';
-    case 't':
-        return '\t';
-    case 'v':
-        return '\v';
-    default:
-        break;
-    }
-    return -1;
-}
-
-static void parse_char(smcc_lexer_t *lexer, lexer_tok_t *token) {
-    token->loc = lexer->pos;
-    token->type = TOKEN_CHAR_LITERAL;
-    core_stream_t *stream = lexer->stream;
-    stream_reset_char(stream);
-    int ch = stream_peek_char(stream);
-
-    if (ch == core_stream_eof) {
-        LEX_WARN("Unexpected EOF at begin");
-        goto ERR;
-    } else if (ch != '\'') {
-        LEX_WARN("Unexpected character '%c' at begin", ch);
-        goto ERR;
-    }
-    stream_next_char(stream);
-    lexer_next_pos(lexer);
-
-    ch = stream_next_char(stream);
-    lexer_next_pos(lexer);
-
-    if (ch == core_stream_eof) {
-        LEX_WARN("Unexpected EOF at middle");
-        goto ERR;
-    } else if (ch == '\\') {
-        ch = stream_next_char(stream);
-        lexer_next_pos(lexer);
-        if ((ch = got_slash(ch)) == -1) {
-            LEX_ERROR("Invalid escape character");
-            // TODO 特殊情况处理
-            goto ERR;
-        }
-        token->value.ch = ch;
-    } else {
-        token->value.ch = ch;
-    }
-    if ((ch = stream_next_char(stream)) != '\'') {
-        LEX_ERROR("Unclosed character literal '%c' at end, expect `'`", ch);
-        lexer_next_pos(lexer);
-        goto ERR;
-    }
-
-    return;
-ERR:
-    set_err_token(token);
-}
-
-static void parse_string(smcc_lexer_t *lexer, lexer_tok_t *token) {
-    token->loc = lexer->pos;
-    token->type = TOKEN_STRING_LITERAL;
-    core_stream_t *stream = lexer->stream;
-    stream_reset_char(stream);
-    int ch = stream_peek_char(stream);
-
-    if (ch == core_stream_eof) {
-        LEX_WARN("Unexpected EOF at begin");
-        goto ERR;
-    } else if (ch != '"') {
-        LEX_WARN("Unexpected character '%c' at begin", ch);
-        goto ERR;
-    }
-    stream_next_char(stream);
-    lexer_next_pos(lexer);
-
-    int base = 0;
-    cstring_t str = cstring_new();
-    while (1) {
-        ch = stream_peek_char(stream);
-
-        if (ch == core_stream_eof) {
-            LEX_ERROR("Unexpected EOF at string literal");
-            break;
-        } else if (ch == '\n') {
-            LEX_ERROR("Unexpected newline at string literal");
-            break;
-        } else if (ch == '\\') {
-            // TODO bad practice and maybe bugs here
-            stream_next_char(stream);
-            ch = stream_next_char(stream);
-            int val = got_slash(ch);
-            if (val == -1) {
-                LEX_ERROR("Invalid escape character it is \\%c [%d]", ch, ch);
-            } else {
-                cstring_push(&str, val);
-                continue;
-            }
-        } else if (ch == '"') {
-            stream_next_char(stream);
-            lexer_next_pos(lexer);
-            break;
-        }
-
-        stream_next_char(stream);
-        lexer_next_pos(lexer);
-        cstring_push(&str, ch);
-    }
-
-    token->value.cstr.data = (char *)cstring_as_cstr(&str);
-    token->value.cstr.len = cstring_len(&str);
-    return;
-ERR:
-    set_err_token(token);
-}
-
-static void parse_number(smcc_lexer_t *lexer, lexer_tok_t *token) {
-    token->loc = lexer->pos;
-    core_stream_t *stream = lexer->stream;
-    stream_reset_char(stream);
-    int ch = stream_peek_char(stream);
-    int base = 0;
-    if (ch == core_stream_eof) {
-        LEX_WARN("Unexpected EOF at begin");
-        goto ERR;
-    } else if (ch == '0') {
-        ch = stream_peek_char(stream);
-        if (ch == 'x' || ch == 'X') {
-            base = 16;
-            stream_next_char(stream);
-            lexer_next_pos(lexer);
-            stream_next_char(stream);
-            lexer_next_pos(lexer);
-        } else if (ch == 'b' || ch == 'B') {
-            // FIXME C23 external integer base
-            base = 2;
-            stream_next_char(stream);
-            lexer_next_pos(lexer);
-            stream_next_char(stream);
-            lexer_next_pos(lexer);
-        } else if (ch >= '0' && ch <= '7') {
-            base = 8;
-            stream_next_char(stream);
-            lexer_next_pos(lexer);
-        } else {
-            base = 10;
-        }
-    } else {
-        base = 10;
-    }
-
-    // 解析整数部分
-    stream_reset_char(stream);
-    int tmp = 0;
-    token->value.n = 0;
-    while (1) {
-        ch = stream_peek_char(stream);
-
-        if (ch == core_stream_eof) {
-            break;
-        } else if (ch >= 'a' && ch <= 'z') {
-            tmp = ch - 'a' + 10;
-        } else if (ch >= 'A' && ch <= 'Z') {
-            tmp = ch - 'A' + 10;
-        } else if (ch >= '0' && ch <= '9') {
-            tmp = ch - '0';
-        } else {
-            break;
-        }
-
-        if (tmp >= base) {
-            LOG_ERROR("Invalid digit");
-            break;
-        }
-
-        stream_next_char(stream);
-        lexer_next_pos(lexer);
-        token->value.n = token->value.n * base + tmp;
-        // TODO number overflow
-    }
-
-    token->type = TOKEN_INT_LITERAL;
-    return;
-ERR:
-    set_err_token(token);
-}
-
 static void parse_line(smcc_lexer_t *lexer, lexer_tok_t *token) {
    token->loc = lexer->pos;
    core_stream_t *stream = lexer->stream;
-    stream_reset_char(stream);
-    int ch = stream_peek_char(stream);
+    core_stream_reset_char(stream);
+    int ch = core_stream_peek_char(stream);
+
+    usize n;
+    cstring_t str = cstring_new();

    if (ch == core_stream_eof) {
        LEX_WARN("Unexpected EOF at begin");
@@ -384,9 +103,9 @@ static void parse_line(smcc_lexer_t *lexer, lexer_tok_t *token) {

    const char line[] = "line";

-    for (int i = 0; i < sizeof(line); i++) {
-        ch = stream_next_char(stream);
-        lexer_next_pos(lexer);
+    for (int i = 0; i < (int)sizeof(line); i++) {
+        ch = core_stream_next_char(stream);
+        core_pos_next(&lexer->pos);
        if (ch != line[i]) {
            LEX_WARN("Maroc does not support in lexer rather in preprocessor, "
                     "it will be ignored");
@@ -394,38 +113,36 @@ static void parse_line(smcc_lexer_t *lexer, lexer_tok_t *token) {
        }
    }

-    parse_number(lexer, token);
-    if (token->type != TOKEN_INT_LITERAL) {
+    if (lex_parse_number(lexer->stream, &lexer->pos, &n) == false) {
        LEX_ERROR("Invalid line number");
        goto SKIP_LINE;
    }

-    if (stream_next_char(stream) != ' ') {
-        skip_newline(lexer, token);
+    if (core_stream_next_char(stream) != ' ') {
+        lex_parse_skip_line(lexer->stream, &lexer->pos);
        token->loc.line = token->value.n;
    }

-    if (stream_peek_char(stream) != '"') {
+    if (core_stream_peek_char(stream) != '"') {
        LEX_ERROR("Invalid `#` line");
        goto SKIP_LINE;
    }
-    parse_string(lexer, token);
-    if (token->type != TOKEN_STRING_LITERAL) {
+    if (lex_parse_string(lexer->stream, &lexer->pos, &str) == false) {
        LEX_ERROR("Invalid filename");
        goto SKIP_LINE;
    }

-    skip_newline(lexer, token);
-    token->loc.line = token->value.n;
+    lex_parse_skip_line(lexer->stream, &lexer->pos);
+    token->loc.line = n;
    // FIXME memory leak
-    token->loc.name = cstring_as_cstr((const cstring_t *)&token->value.cstr);
-    token->loc.name_len = cstring_len((const cstring_t *)&token->value.cstr);
-
+    token->loc.name = cstring_from_cstr(cstring_as_cstr(&str));
+    cstring_free(&str);
    return;
 SKIP_LINE:
-    skip_newline(lexer, token);
+    lex_parse_skip_line(lexer->stream, &lexer->pos);
 ERR:
    set_err_token(token);
+    cstring_free(&str);
 }

 // /zh/c/language/operator_arithmetic.html
@@ -434,24 +151,24 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
    token->type = TOKEN_UNKNOWN;
    core_stream_t *stream = lexer->stream;

-    stream_reset_char(stream);
+    core_stream_reset_char(stream);
    token_type_t type = TOKEN_UNKNOWN;
-    int ch = stream_peek_char(stream);
+    int ch = core_stream_peek_char(stream);

    // once step
    switch (ch) {
    case '=':
-        switch (stream_peek_char(stream)) {
+        switch (core_stream_peek_char(stream)) {
        case '=':
            type = TOKEN_EQ;
            goto double_char;
        default:
-            stream_reset_char(stream), type = TOKEN_ASSIGN;
+            core_stream_reset_char(stream), type = TOKEN_ASSIGN;
            break;
        }
        break;
    case '+':
-        switch (stream_peek_char(stream)) {
+        switch (core_stream_peek_char(stream)) {
        case '+':
            type = TOKEN_ADD_ADD;
            goto double_char;
@@ -459,12 +176,12 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
            type = TOKEN_ASSIGN_ADD;
            goto double_char;
        default:
-            stream_reset_char(stream), type = TOKEN_ADD;
+            core_stream_reset_char(stream), type = TOKEN_ADD;
            break;
        }
        break;
    case '-':
-        switch (stream_peek_char(stream)) {
+        switch (core_stream_peek_char(stream)) {
        case '-':
            type = TOKEN_SUB_SUB;
            goto double_char;
@@ -475,48 +192,50 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
            type = TOKEN_DEREF;
            goto double_char;
        default:
-            stream_reset_char(stream), type = TOKEN_SUB;
+            core_stream_reset_char(stream), type = TOKEN_SUB;
            break;
        }
        break;
    case '*':
-        switch (stream_peek_char(stream)) {
+        switch (core_stream_peek_char(stream)) {
        case '=':
            type = TOKEN_ASSIGN_MUL;
            goto double_char;
        default:
-            stream_reset_char(stream), type = TOKEN_MUL;
+            core_stream_reset_char(stream), type = TOKEN_MUL;
            break;
        }
        break;
    case '/':
-        switch (stream_peek_char(stream)) {
+        switch (core_stream_peek_char(stream)) {
        case '=':
            type = TOKEN_ASSIGN_DIV;
            goto double_char;
        case '/':
-            skip_newline(lexer, token);
+            lex_parse_skip_line(lexer->stream, &lexer->pos);
+            token->type = TOKEN_LINE_COMMENT;
            goto END;
        case '*':
-            skip_block_comment(lexer, token);
+            lex_parse_skip_block_comment(lexer->stream, &lexer->pos);
+            token->type = TOKEN_BLOCK_COMMENT;
            goto END;
        default:
-            stream_reset_char(stream), type = TOKEN_DIV;
+            core_stream_reset_char(stream), type = TOKEN_DIV;
            break;
        }
        break;
    case '%':
-        switch (stream_peek_char(stream)) {
+        switch (core_stream_peek_char(stream)) {
        case '=':
            type = TOKEN_ASSIGN_MOD;
            goto double_char;
        default:
-            stream_reset_char(stream), type = TOKEN_MOD;
+            core_stream_reset_char(stream), type = TOKEN_MOD;
            break;
        }
        break;
    case '&':
-        switch (stream_peek_char(stream)) {
+        switch (core_stream_peek_char(stream)) {
        case '&':
            type = TOKEN_AND_AND;
            goto double_char;
@@ -524,12 +243,12 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
            type = TOKEN_ASSIGN_AND;
            goto double_char;
        default:
-            stream_reset_char(stream), type = TOKEN_AND;
+            core_stream_reset_char(stream), type = TOKEN_AND;
            break;
        }
        break;
    case '|':
-        switch (stream_peek_char(stream)) {
+        switch (core_stream_peek_char(stream)) {
        case '|':
            type = TOKEN_OR_OR;
            goto double_char;
@@ -537,27 +256,27 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
            type = TOKEN_ASSIGN_OR;
            goto double_char;
        default:
-            stream_reset_char(stream), type = TOKEN_OR;
+            core_stream_reset_char(stream), type = TOKEN_OR;
            break;
        }
        break;
    case '^':
-        switch (stream_peek_char(stream)) {
+        switch (core_stream_peek_char(stream)) {
        case '=':
            type = TOKEN_ASSIGN_XOR;
            goto double_char;
        default:
-            stream_reset_char(stream), type = TOKEN_XOR;
+            core_stream_reset_char(stream), type = TOKEN_XOR;
            break;
        }
        break;
    case '<':
-        switch (stream_peek_char(stream)) {
+        switch (core_stream_peek_char(stream)) {
        case '=':
            type = TOKEN_LE;
            goto double_char;
        case '<': {
-            if (stream_peek_char(stream) == '=') {
+            if (core_stream_peek_char(stream) == '=') {
                type = TOKEN_ASSIGN_L_SH;
                goto triple_char;
            } else {
@@ -567,17 +286,17 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
            break;
        }
        default:
-            stream_reset_char(stream), type = TOKEN_LT;
+            core_stream_reset_char(stream), type = TOKEN_LT;
            break;
        }
        break;
    case '>':
-        switch (stream_peek_char(stream)) {
+        switch (core_stream_peek_char(stream)) {
        case '=':
            type = TOKEN_GE;
            goto double_char;
        case '>': {
-            if (stream_peek_char(stream) == '=') {
+            if (core_stream_peek_char(stream) == '=') {
                type = TOKEN_ASSIGN_R_SH;
                goto triple_char;
            } else {
@@ -587,7 +306,7 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
            break;
        }
        default:
-            stream_reset_char(stream), type = TOKEN_GT;
+            core_stream_reset_char(stream), type = TOKEN_GT;
            break;
        }
        break;
@@ -595,12 +314,12 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
        type = TOKEN_BIT_NOT;
        break;
    case '!':
-        switch (stream_peek_char(stream)) {
+        switch (core_stream_peek_char(stream)) {
        case '=':
            type = TOKEN_NEQ;
            goto double_char;
        default:
-            stream_reset_char(stream), type = TOKEN_NOT;
+            core_stream_reset_char(stream), type = TOKEN_NOT;
            break;
        }
        break;
@@ -632,8 +351,8 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
        type = TOKEN_COLON;
        break;
    case '.':
-        if (stream_peek_char(stream) == '.' &&
-            stream_peek_char(stream) == '.') {
+        if (core_stream_peek_char(stream) == '.' &&
+            core_stream_peek_char(stream) == '.') {
            type = TOKEN_ELLIPSIS;
            goto triple_char;
        }
@@ -643,17 +362,14 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
        type = TOKEN_COND;
        break;
    case '\v':
-    case '\r':
    case '\f':
    case ' ':
    case '\t':
        type = TOKEN_BLANK;
        break;
+    case '\r':
    case '\n':
-        // you need to flush a newline or blank
-        stream_next_char(stream);
-        lexer_next_line(lexer);
-        // FIXME some error
+        lex_parse_skip_endline(lexer->stream, &lexer->pos);
        token->type = TOKEN_BLANK;
        goto END;
    case '#':
@@ -665,17 +381,45 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
        // EOF
        type = TOKEN_EOF;
        break;
-    case '\'':
-        parse_char(lexer, token);
+    case '\'': {
+        token->loc = lexer->pos;
+        token->type = TOKEN_CHAR_LITERAL;
+        int ch = lex_parse_char(lexer->stream, &lexer->pos);
+        if (ch == core_stream_eof) {
+            LEX_ERROR("Unexpected character literal");
+            token->type = TOKEN_UNKNOWN;
+        } else {
+            token->value.ch = ch;
+        }
        goto END;
-    case '"':
-        parse_string(lexer, token);
+    }
+    case '"': {
+        token->loc = lexer->pos;
+        token->type = TOKEN_STRING_LITERAL;
+        cstring_t output = cstring_new();
+        if (lex_parse_string(lexer->stream, &lexer->pos, &output) == true) {
+            token->value.cstr.data = cstring_as_cstr(&output);
+            token->value.cstr.len = cstring_len(&output);
+        } else {
+            LEX_ERROR("Unexpected string literal");
+            token->type = TOKEN_UNKNOWN;
+        }
+
        goto END;
+    }
        /* clang-format off */
    case '0': case '1': case '2': case '3': case '4':
    case '5': case '6': case '7': case '8': case '9':
        /* clang-format on */
-        parse_number(lexer, token);
+        token->loc = lexer->pos;
+        token->type = TOKEN_INT_LITERAL;
+        usize output;
+        if (lex_parse_number(lexer->stream, &lexer->pos, &output) == true) {
+            token->value.n = output;
+        } else {
+            LEX_ERROR("Unexpected number literal");
+            token->type = TOKEN_UNKNOWN;
+        }
        goto END;
        /* clang-format off */
    case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
@@ -687,25 +431,9 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
    case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
    case 'V': case 'W': case 'X': case 'Y': case 'Z': case '_':
        /* clang-format on */
-        // TOKEN_IDENT
-        // TODO
-        // if ((ch == 'L' && ch == '\'') || (ch == 'L' && ch == '"')) {
-        //     LEX_ERROR("unsupport wide-character char literal by `L` format");
-        // }
        cstring_t str = cstring_new();
-        cstring_push(&str, stream_next_char(stream));
-        lexer_next_pos(lexer);
-        while (1) {
-            ch = stream_peek_char(stream);
-            if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') ||
-                (ch == '_') || (ch >= '0' && ch <= '9')) {
-                stream_next_char(stream);
-                lexer_next_pos(lexer);
-                cstring_push(&str, ch);
-                continue;
-            }
-            break;
-        }
+        cbool ret = lex_parse_identifier(lexer->stream, &lexer->pos, &str);
+        Assert(ret == true);

        int res = keyword_cmp(cstring_as_cstr(&str), cstring_len(&str));
        if (res == -1) {
@@ -724,14 +452,14 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
    }
    goto once_char;
 triple_char:
-    stream_next_char(stream);
-    lexer_next_pos(lexer);
+    core_stream_next_char(stream);
+    core_pos_next(&lexer->pos);
 double_char:
-    stream_next_char(stream);
-    lexer_next_pos(lexer);
+    core_stream_next_char(stream);
+    core_pos_next(&lexer->pos);
 once_char:
-    stream_next_char(stream);
-    lexer_next_pos(lexer);
+    core_stream_next_char(stream);
+    core_pos_next(&lexer->pos);
    token->type = type;
 END:
    LEX_DEBUG("get token `%s` in %s:%d:%d", get_tok_name(token->type),
@@ -746,6 +474,7 @@ void lexer_get_valid_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
        type = get_tok_subtype(token->type);
        AssertFmt(type != TK_BASIC_INVALID, "Invalid token: `%s` at %s:%d:%d",
                  get_tok_name(token->type), token->loc.name, token->loc.line,
-                  token->loc.column);
+                  token->loc.col);
+        Assert(type != TK_BASIC_INVALID);
    } while (type == TK_BASIC_EMPTYSPACE || type == TK_BASIC_COMMENT);
 }
--- a/libs/lexer/tests/test_run.c
+++ b/libs/lexer/tests/test_run.c
@@ -40,7 +40,6 @@ int main(int argc, char *argv[]) {
        perror("open file failed");
        return 1;
    }
-    printf("open file success\n");

    if (fseek(fp, 0, SEEK_END) != 0) {
        perror("fseek failed");
@@ -79,7 +78,7 @@ int main(int argc, char *argv[]) {
            break;
        }
        LOG_DEBUG("token `%s` at %s:%u:%u", get_tok_name(tok.type),
-                  tok.loc.name, tok.loc.line, tok.loc.column);
+                  tok.loc.name, tok.loc.line, tok.loc.col);
        Assert(tok.loc.offset <= fsize);
        // LOG_DEBUG("%s", tok.val.str);
        // printf("line: %d, column: %d, type: %3d, typename: %s\n",
--- a/runtime/libcore/include/core_pos.h
+++ b/runtime/libcore/include/core_pos.h
@@ -0,0 +1,28 @@
+#ifndef __SMCC_CORE_POS_H__
+#define __SMCC_CORE_POS_H__
+
+#include "core_str.h"
+#include "core_type.h"
+typedef struct {
+    cstring_t name;
+    usize line;
+    usize col;
+    usize offset;
+} core_pos_t;
+
+static inline core_pos_t core_pos_init() {
+    return (core_pos_t){cstring_new(), 1, 1, 0};
+}
+
+static inline void core_pos_next(core_pos_t *pos) {
+    pos->offset++;
+    pos->col++;
+}
+
+static inline void core_pos_next_line(core_pos_t *pos) {
+    pos->offset++;
+    pos->line++;
+    pos->col = 1;
+}
+
+#endif /* __SMCC_CORE_POS_H__ */
--- a/runtime/libcore/include/core_str.h
+++ b/runtime/libcore/include/core_str.h
@@ -54,12 +54,15 @@ static inline cstring_t cstring_from_cstr(const char *s) {
 * @param str 要被释放的字符串指针
 */
 static inline void cstring_free(cstring_t *str) {
-    if (str && str->data && str->cap != 0) {
+    if (str == null) {
+        return;
+    }
+    if (str->data != null && str->cap != 0) {
        smcc_free(str->data);
        str->data = null;
-        str->size = 0;
-        str->cap = 0;
    }
+    str->size = 0;
+    str->cap = 0;
 }

 /**
--- a/runtime/libcore/include/libcore.h
+++ b/runtime/libcore/include/libcore.h
@@ -21,6 +21,7 @@
 #define SMCC_STR(str) _SMCC_STR(str)

 #define SMCC_ARRLEN(arr) (sizeof(arr) / sizeof(arr[0]))
+#include <core_pos.h>
 #include <core_str.h>
 #include <core_stream.h>
 #include <core_vec.h>
--- a/runtime/libcore/src/stream.c
+++ b/runtime/libcore/src/stream.c
@@ -69,11 +69,16 @@ static void free_stream(core_stream_t *_stream) {

 core_stream_t *core_mem_stream_init(core_mem_stream_t *stream, const char *data,
                                    usize length, cbool need_copy) {
-    if (stream == null || data == NULL || length == 0) {
+    if (stream == null || data == null) {
        LOG_ERROR("param error");
        return null;
    }

+    if (length == 0) {
+        LOG_WARN("input memory is empty");
+        need_copy = false;
+    }
+
    stream->owned = need_copy;
    if (need_copy) {
        char *buf = (char *)smcc_malloc(length);
--- a/runtime/log/include/log.c
+++ b/runtime/log/include/log.c
@@ -79,7 +79,11 @@ void init_logger(logger_t *logger, const char *name) {
    log_set_level(logger, LOG_LEVEL_ALL);
 }

-logger_t *log_get(const char *name) { return &__default_logger_root; }
+logger_t *log_get(const char *name) {
+    // TODO for -Wunused-parameter
+    (void)name;
+    return &__default_logger_root;
+}

 void log_set_level(logger_t *logger, int level) {
    if (logger)
@@ -95,4 +99,8 @@ void log_set_handler(logger_t *logger, log_handler handler) {
        __default_logger_root.handler = handler;
 }

-void logger_destroy(logger_t *logger) { return; }
+void logger_destroy(logger_t *logger) {
+    // TODO for -Wunused-parameter
+    (void)logger;
+    return;
+}