refactor(lex_parser): 移除旧的词法解析器实现并更新依赖
移除了 libs/lex_parser 目录下的所有头文件和源文件,包括: - lex_parser.h 和 lex_parser.c 核心解析功能 - 所有测试文件(test_char.c, test_identifier.c, test_number.c, test_skip_block_comment.c, test_skip_line.c, test_string.c) 更新了 lexer 模块的依赖配置,将 lex_parser 替换为 sstream, 同时更新了 lexer.h 中的相关包含头文件和数据结构定义, 简化了 scc_lexer_t 结构体的字段。
This commit is contained in:
@@ -1,32 +0,0 @@
|
||||
#ifndef __SCC_LEX_PARSER_H__
|
||||
#define __SCC_LEX_PARSER_H__
|
||||
|
||||
#include <scc_core.h>
|
||||
|
||||
static inline cbool scc_lex_parse_is_endline(int ch) {
|
||||
return ch == '\n' || ch == '\r';
|
||||
}
|
||||
|
||||
static inline cbool scc_lex_parse_is_whitespace(int ch) {
|
||||
return ch == ' ' || ch == '\t';
|
||||
}
|
||||
|
||||
// TODO identifier check is right?
|
||||
static inline cbool scc_lex_parse_is_identifier_prefix(int ch) {
|
||||
return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '_';
|
||||
}
|
||||
|
||||
int scc_lex_parse_char(scc_probe_stream_t *input, scc_pos_t *pos);
|
||||
cbool scc_lex_parse_string(scc_probe_stream_t *input, scc_pos_t *pos,
|
||||
scc_cstring_t *output);
|
||||
cbool scc_lex_parse_number(scc_probe_stream_t *input, scc_pos_t *pos,
|
||||
usize *output);
|
||||
cbool scc_lex_parse_identifier(scc_probe_stream_t *input, scc_pos_t *pos,
|
||||
scc_cstring_t *output);
|
||||
void scc_lex_parse_skip_endline(scc_probe_stream_t *input, scc_pos_t *pos);
|
||||
void scc_lex_parse_skip_block_comment(scc_probe_stream_t *input,
|
||||
scc_pos_t *pos);
|
||||
void scc_lex_parse_skip_line(scc_probe_stream_t *input, scc_pos_t *pos);
|
||||
void scc_lex_parse_skip_whitespace(scc_probe_stream_t *input, scc_pos_t *pos);
|
||||
|
||||
#endif /* __SCC_LEX_PARSER_H__ */
|
||||
@@ -1,434 +0,0 @@
|
||||
#include <lex_parser.h>
|
||||
|
||||
void scc_lex_parse_skip_endline(scc_probe_stream_t *input, scc_pos_t *pos) {
|
||||
Assert(input != null && pos != null);
|
||||
// scc_probe_stream_reset(input);
|
||||
int ch = scc_probe_stream_peek(input);
|
||||
if (ch == '\r') {
|
||||
scc_probe_stream_next(input);
|
||||
ch = scc_probe_stream_peek(input);
|
||||
if (ch == '\n') {
|
||||
scc_probe_stream_next(input);
|
||||
}
|
||||
scc_pos_next_line(pos);
|
||||
} else if (ch == '\n') {
|
||||
scc_probe_stream_next(input);
|
||||
scc_pos_next_line(pos);
|
||||
} else {
|
||||
LOG_WARN("not a newline character");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief
|
||||
*
|
||||
* @param ch
|
||||
* @return int
|
||||
* https://cppreference.cn/w/c/language/escape
|
||||
* `\'` 单引号 在 ASCII 编码中为字节 0x27
|
||||
* `\"` 双引号 在 ASCII 编码中为字节 0x22
|
||||
* `\?` 问号 在 ASCII 编码中为字节 0x3f
|
||||
* `\\` 反斜杠 在 ASCII 编码中为字节 0x5c
|
||||
* `\a` 响铃 在 ASCII 编码中为字节 0x07
|
||||
* `\b` 退格 在 ASCII 编码中为字节 0x08
|
||||
* `\f` 换页 - 新页 在 ASCII 编码中为字节 0x0c
|
||||
* `\n` 换行 - 新行 在 ASCII 编码中为字节 0x0a
|
||||
* `\r` 回车 在 ASCII 编码中为字节 0x0d
|
||||
* `\t` 水平制表符 在 ASCII 编码中为字节 0x09
|
||||
* `\v` 垂直制表符 在 ASCII 编码中为字节 0x0b
|
||||
*/
|
||||
static inline int got_simple_escape(int ch) {
|
||||
/* clang-format off */
|
||||
#define CASE(ch) case ch: return ch;
|
||||
switch (ch) {
|
||||
case '\'': return '\'';
|
||||
case '\"': return '\"';
|
||||
case '\?': return '\?';
|
||||
case '\\': return '\\';
|
||||
case 'a': return '\a';
|
||||
case 'b': return '\b';
|
||||
case 'f': return '\f';
|
||||
case 'n': return '\n';
|
||||
case 'r': return '\r';
|
||||
case 't': return '\t';
|
||||
case 'v': return '\v';
|
||||
default: return -1;
|
||||
}
|
||||
/* clang-format on */
|
||||
}
|
||||
|
||||
void scc_lex_parse_skip_line(scc_probe_stream_t *input, scc_pos_t *pos) {
|
||||
scc_probe_stream_t *stream = input;
|
||||
Assert(stream != null && pos != null);
|
||||
// scc_probe_stream_reset(stream);
|
||||
while (1) {
|
||||
int ch = scc_probe_stream_peek(stream);
|
||||
|
||||
if (ch == scc_stream_eof) {
|
||||
return;
|
||||
}
|
||||
|
||||
// TODO endline
|
||||
if (scc_lex_parse_is_endline(ch)) {
|
||||
scc_lex_parse_skip_endline(stream, pos);
|
||||
return;
|
||||
} else {
|
||||
scc_probe_stream_next(stream);
|
||||
scc_pos_next(pos);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void scc_lex_parse_skip_block_comment(scc_probe_stream_t *input,
|
||||
scc_pos_t *pos) {
|
||||
scc_probe_stream_t *stream = input;
|
||||
Assert(stream != null && pos != null);
|
||||
int ch;
|
||||
// scc_probe_stream_reset(stream);
|
||||
ch = scc_probe_stream_next(stream);
|
||||
scc_pos_next(pos);
|
||||
// FIXME Assertion
|
||||
Assert(ch == '/');
|
||||
ch = scc_probe_stream_next(stream);
|
||||
scc_pos_next(pos);
|
||||
Assert(ch == '*');
|
||||
|
||||
// all ready match `/*`
|
||||
while (1) {
|
||||
// scc_probe_stream_reset(stream);
|
||||
ch = scc_probe_stream_peek(stream);
|
||||
|
||||
if (ch == scc_stream_eof) {
|
||||
LOG_WARN("Unterminated block comment");
|
||||
return;
|
||||
}
|
||||
|
||||
if (scc_lex_parse_is_endline(ch)) {
|
||||
scc_lex_parse_skip_endline(stream, pos);
|
||||
continue;
|
||||
}
|
||||
scc_probe_stream_next(stream);
|
||||
scc_pos_next(pos);
|
||||
if (ch == '*') {
|
||||
ch = scc_probe_stream_peek(stream);
|
||||
if (ch == '/') {
|
||||
scc_probe_stream_next(stream);
|
||||
scc_pos_next(pos);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void scc_lex_parse_skip_whitespace(scc_probe_stream_t *input, scc_pos_t *pos) {
|
||||
scc_probe_stream_t *stream = input;
|
||||
Assert(stream != null && pos != null);
|
||||
// scc_probe_stream_reset(stream);
|
||||
while (1) {
|
||||
int ch = scc_probe_stream_peek(stream);
|
||||
|
||||
if (!scc_lex_parse_is_whitespace(ch)) {
|
||||
return;
|
||||
}
|
||||
|
||||
scc_probe_stream_next(stream);
|
||||
scc_pos_next(pos);
|
||||
}
|
||||
}
|
||||
|
||||
static inline cbool _lex_parse_uint(scc_probe_stream_t *input, scc_pos_t *pos,
|
||||
int base, usize *output) {
|
||||
Assert(input != null && pos != null);
|
||||
if (input == null || pos == null) {
|
||||
return false;
|
||||
}
|
||||
Assert(base == 2 || base == 8 || base == 10 || base == 16);
|
||||
// scc_probe_stream_reset(input);
|
||||
int ch, tmp;
|
||||
usize n = 0;
|
||||
usize offset = pos->offset;
|
||||
while (1) {
|
||||
ch = scc_probe_stream_peek(input);
|
||||
|
||||
if (ch == scc_stream_eof) {
|
||||
break;
|
||||
} else if (ch >= 'a' && ch <= 'z') {
|
||||
tmp = ch - 'a' + 10;
|
||||
} else if (ch >= 'A' && ch <= 'Z') {
|
||||
tmp = ch - 'A' + 10;
|
||||
} else if (ch >= '0' && ch <= '9') {
|
||||
tmp = ch - '0';
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
|
||||
if (tmp >= base) {
|
||||
LOG_ERROR("Invalid digit");
|
||||
return false;
|
||||
}
|
||||
|
||||
scc_probe_stream_next(input);
|
||||
scc_pos_next(pos);
|
||||
n = n * base + tmp;
|
||||
// TODO number overflow
|
||||
}
|
||||
if (offset == pos->offset) {
|
||||
// None match any number
|
||||
return false;
|
||||
}
|
||||
*output = n;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief
|
||||
*
|
||||
* @param input
|
||||
* @param pos
|
||||
* @return int
|
||||
* https://cppreference.cn/w/c/language/character_constant
|
||||
*/
|
||||
int scc_lex_parse_char(scc_probe_stream_t *input, scc_pos_t *pos) {
|
||||
scc_probe_stream_t *stream = input;
|
||||
Assert(stream != null && pos != null);
|
||||
int ch = scc_probe_stream_next(stream);
|
||||
scc_pos_next(pos);
|
||||
int ret = scc_stream_eof;
|
||||
|
||||
if (ch == scc_stream_eof) {
|
||||
LOG_WARN("Unexpected EOF at begin");
|
||||
goto ERR;
|
||||
} else if (ch != '\'') {
|
||||
LOG_WARN("Unexpected character '%c' at begin", ch);
|
||||
goto ERR;
|
||||
}
|
||||
// scc_probe_stream_next(stream);
|
||||
|
||||
ch = scc_probe_stream_next(stream);
|
||||
scc_pos_next(pos);
|
||||
|
||||
if (ch == scc_stream_eof) {
|
||||
LOG_WARN("Unexpected EOF at middle");
|
||||
goto ERR;
|
||||
} else if (ch == '\\') {
|
||||
ch = scc_probe_stream_next(stream);
|
||||
scc_pos_next(pos);
|
||||
if (ch == '0') {
|
||||
// 数字转义序列
|
||||
// \nnn 任意八进制值 码元 nnn
|
||||
// FIXME 这里如果返回 0 理论上为错误但是恰好与正确值相同
|
||||
ret = 0;
|
||||
_lex_parse_uint(stream, pos, 8, (usize *)&ret);
|
||||
} else if (ch == 'x') {
|
||||
// TODO https://cppreference.cn/w/c/language/escape
|
||||
// \xn... 任意十六进制值 码元 n... (任意数量的十六进制数字)
|
||||
// 通用字符名
|
||||
TODO();
|
||||
} else if (ch == 'u' || ch == 'U') {
|
||||
// \unnnn (C99 起) Unicode 值在允许范围内;
|
||||
// 可能产生多个码元 码点 U+nnnn
|
||||
// \Unnnnnnnn (C99 起) Unicode 值在允许范围内;
|
||||
// 可能产生多个码元 码点 U+nnnnnnnn
|
||||
TODO();
|
||||
} else if ((ret = got_simple_escape(ch)) == -1) {
|
||||
LOG_ERROR("Invalid escape character");
|
||||
goto ERR;
|
||||
}
|
||||
} else {
|
||||
ret = ch;
|
||||
}
|
||||
if ((ch = scc_probe_stream_next(stream)) != '\'') {
|
||||
LOG_ERROR("Unclosed character literal '%c' at end, expect `'`", ch);
|
||||
scc_pos_next(pos);
|
||||
goto ERR;
|
||||
}
|
||||
|
||||
return ret;
|
||||
ERR:
|
||||
return scc_stream_eof;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief
|
||||
*
|
||||
* @param input
|
||||
* @param pos
|
||||
* @param output
|
||||
* @return cbool
|
||||
* https://cppreference.cn/w/c/language/string_literal
|
||||
*/
|
||||
cbool scc_lex_parse_string(scc_probe_stream_t *input, scc_pos_t *pos,
|
||||
scc_cstring_t *output) {
|
||||
scc_probe_stream_t *stream = input;
|
||||
Assert(stream != null && pos != null && output != null);
|
||||
// scc_probe_stream_reset(stream);
|
||||
int ch = scc_probe_stream_peek(stream);
|
||||
|
||||
Assert(scc_cstring_is_empty(output));
|
||||
if (ch == scc_stream_eof) {
|
||||
LOG_WARN("Unexpected EOF at begin");
|
||||
goto ERR;
|
||||
} else if (ch != '"') {
|
||||
LOG_WARN("Unexpected character '%c' at begin", ch);
|
||||
goto ERR;
|
||||
}
|
||||
scc_probe_stream_next(stream);
|
||||
scc_pos_next(pos);
|
||||
|
||||
scc_cstring_t str = scc_cstring_from_cstr("");
|
||||
while (1) {
|
||||
ch = scc_probe_stream_peek(stream);
|
||||
|
||||
if (ch == scc_stream_eof) {
|
||||
LOG_ERROR("Unexpected EOF at string literal");
|
||||
goto ERR;
|
||||
} else if (scc_lex_parse_is_endline(ch)) {
|
||||
LOG_ERROR("Unexpected newline at string literal");
|
||||
goto ERR;
|
||||
} else if (ch == '\\') {
|
||||
// TODO bad practice and maybe bugs here
|
||||
scc_probe_stream_next(stream);
|
||||
ch = scc_probe_stream_next(stream);
|
||||
int val = got_simple_escape(ch);
|
||||
if (val == -1) {
|
||||
LOG_ERROR("Invalid escape character it is \\%c [%d]", ch, ch);
|
||||
} else {
|
||||
scc_cstring_append_ch(&str, val);
|
||||
continue;
|
||||
}
|
||||
} else if (ch == '"') {
|
||||
scc_probe_stream_next(stream);
|
||||
scc_pos_next(pos);
|
||||
break;
|
||||
}
|
||||
|
||||
scc_probe_stream_next(stream);
|
||||
scc_pos_next(pos);
|
||||
scc_cstring_append_ch(&str, ch);
|
||||
}
|
||||
|
||||
*output = str;
|
||||
return true;
|
||||
ERR:
|
||||
scc_cstring_free(&str);
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief
|
||||
*
|
||||
* @param input
|
||||
* @param pos
|
||||
* @param output
|
||||
* @return cbool
|
||||
* https://cppreference.cn/w/c/language/integer_constant
|
||||
*/
|
||||
cbool scc_lex_parse_number(scc_probe_stream_t *input, scc_pos_t *pos,
|
||||
usize *output) {
|
||||
scc_probe_stream_t *stream = input;
|
||||
Assert(stream != null && pos != null && output != null);
|
||||
// scc_probe_stream_reset(stream);
|
||||
int ch = scc_probe_stream_peek(stream);
|
||||
int base = 10; // 默认十进制
|
||||
|
||||
if (ch == scc_stream_eof) {
|
||||
LOG_WARN("Unexpected EOF at begin");
|
||||
goto ERR;
|
||||
}
|
||||
|
||||
if (ch == '0') {
|
||||
// 消费 '0'
|
||||
scc_probe_stream_next(stream);
|
||||
scc_pos_next(pos);
|
||||
|
||||
// 查看下一个字符
|
||||
ch = scc_probe_stream_peek(stream);
|
||||
if (ch == 'x' || ch == 'X') {
|
||||
// 十六进制
|
||||
base = 16;
|
||||
scc_probe_stream_next(stream);
|
||||
scc_pos_next(pos);
|
||||
} else if (ch == 'b' || ch == 'B') {
|
||||
// 二进制 (C23扩展)
|
||||
base = 2;
|
||||
scc_probe_stream_next(stream);
|
||||
scc_pos_next(pos);
|
||||
} else if (ch >= '0' && ch <= '7') {
|
||||
// 八进制
|
||||
base = 8;
|
||||
// 不消费,数字将由 _lex_parse_uint 处理
|
||||
} else if (ch == '8' || ch == '9') {
|
||||
LOG_ERROR("Invalid digit '%d' in octal literal", ch);
|
||||
return false;
|
||||
} else {
|
||||
// 只是0,十进制
|
||||
*output = 0;
|
||||
return true;
|
||||
}
|
||||
} else if (ch >= '1' && ch <= '9') {
|
||||
// 十进制,不消费,由 _lex_parse_uint 处理
|
||||
base = 10;
|
||||
} else {
|
||||
// 无效的数字
|
||||
return false;
|
||||
}
|
||||
|
||||
// 解析整数部分
|
||||
// scc_probe_stream_reset(stream);
|
||||
usize n;
|
||||
if (_lex_parse_uint(stream, pos, base, &n) == false) {
|
||||
// 如果没有匹配任何数字,但输入是 '0',已经处理过了
|
||||
// 对于十进制数字,至少应该有一个数字
|
||||
if (base == 10) {
|
||||
// 单个数字的情况,例如 "1"
|
||||
// 我们需要消费这个数字并返回它的值
|
||||
if (ch >= '1' && ch <= '9') {
|
||||
scc_probe_stream_next(stream);
|
||||
scc_pos_next(pos);
|
||||
*output = ch - '0';
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
*output = n;
|
||||
return true;
|
||||
ERR:
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief
|
||||
*
|
||||
* @param input
|
||||
* @param pos
|
||||
* @param output
|
||||
* @return cbool
|
||||
* https://cppreference.cn/w/c/language/identifier
|
||||
*/
|
||||
cbool scc_lex_parse_identifier(scc_probe_stream_t *input, scc_pos_t *pos,
|
||||
scc_cstring_t *output) {
|
||||
Assert(input != null && pos != null && output != null);
|
||||
Assert(scc_cstring_is_empty(output));
|
||||
scc_probe_stream_t *stream = input;
|
||||
// scc_probe_stream_reset(stream);
|
||||
int ch = scc_probe_stream_peek(stream);
|
||||
|
||||
if (ch == scc_stream_eof) {
|
||||
LOG_WARN("Unexpected EOF at begin");
|
||||
} else if (scc_lex_parse_is_identifier_prefix(ch)) {
|
||||
while (1) {
|
||||
scc_cstring_append_ch(output, ch);
|
||||
scc_probe_stream_next(stream);
|
||||
scc_pos_next(pos);
|
||||
ch = scc_probe_stream_peek(stream);
|
||||
if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') ||
|
||||
(ch == '_') || (ch >= '0' && ch <= '9')) {
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
@@ -1,64 +0,0 @@
|
||||
// test_char.c
|
||||
#include <lex_parser.h>
|
||||
#include <utest/acutest.h>
|
||||
|
||||
cbool check_char(const char *str, int expect, int *output) {
|
||||
log_set_level(&__default_logger_root, 0);
|
||||
scc_pos_t pos = scc_pos_create();
|
||||
scc_mem_probe_stream_t mem_stream;
|
||||
scc_probe_stream_t *stream =
|
||||
scc_mem_probe_stream_init(&mem_stream, str, scc_strlen(str), false);
|
||||
*output = scc_lex_parse_char(stream, &pos);
|
||||
cbool ret1 = *output == expect;
|
||||
scc_probe_stream_reset(stream);
|
||||
*output = scc_lex_parse_char(stream, &pos);
|
||||
cbool ret2 = *output == expect;
|
||||
return ret1 && ret2;
|
||||
}
|
||||
|
||||
#define CHECK_CHAR_VALID(str, expect) \
|
||||
do { \
|
||||
int _output; \
|
||||
cbool ret = check_char(str, expect, &_output); \
|
||||
TEST_CHECK(ret == true); \
|
||||
} while (0)
|
||||
|
||||
#define CHECK_CHAR_INVALID(str) \
|
||||
do { \
|
||||
int _output; \
|
||||
check_char(str, scc_stream_eof, &_output); \
|
||||
TEST_CHECK(_output == scc_stream_eof); \
|
||||
} while (0)
|
||||
|
||||
void test_simple_char(void) {
|
||||
TEST_CASE("simple chars");
|
||||
CHECK_CHAR_VALID("'a'", 'a');
|
||||
CHECK_CHAR_VALID("'Z'", 'Z');
|
||||
CHECK_CHAR_VALID("'0'", '0');
|
||||
CHECK_CHAR_VALID("' '", ' ');
|
||||
}
|
||||
|
||||
void test_escape_char(void) {
|
||||
TEST_CASE("escape chars");
|
||||
CHECK_CHAR_VALID("'\\n'", '\n');
|
||||
CHECK_CHAR_VALID("'\\t'", '\t');
|
||||
CHECK_CHAR_VALID("'\\r'", '\r');
|
||||
CHECK_CHAR_VALID("'\\\\'", '\\');
|
||||
CHECK_CHAR_VALID("'\\''", '\'');
|
||||
CHECK_CHAR_VALID("'\\\"'", '\"');
|
||||
}
|
||||
|
||||
void test_invalid_char(void) {
|
||||
TEST_CASE("invalid chars");
|
||||
CHECK_CHAR_INVALID("'");
|
||||
CHECK_CHAR_INVALID("''");
|
||||
CHECK_CHAR_INVALID("'ab'");
|
||||
CHECK_CHAR_INVALID("'\\'");
|
||||
}
|
||||
|
||||
TEST_LIST = {
|
||||
{"test_simple_char", test_simple_char},
|
||||
{"test_escape_char", test_escape_char},
|
||||
{"test_invalid_char", test_invalid_char},
|
||||
{NULL, NULL},
|
||||
};
|
||||
@@ -1,56 +0,0 @@
|
||||
// test_identifier.c
|
||||
#include <lex_parser.h>
|
||||
#include <utest/acutest.h>
|
||||
|
||||
cbool check_identifier(const char *str, const char *expect,
|
||||
scc_cstring_t *output) {
|
||||
log_set_level(&__default_logger_root, 0);
|
||||
scc_pos_t pos = scc_pos_create();
|
||||
scc_mem_probe_stream_t mem_stream;
|
||||
scc_probe_stream_t *stream =
|
||||
scc_mem_probe_stream_init(&mem_stream, str, scc_strlen(str), false);
|
||||
|
||||
cbool ret = scc_lex_parse_identifier(stream, &pos, output);
|
||||
if (ret && expect) {
|
||||
return strcmp(output->data, expect) == 0;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
#define CHECK_IDENTIFIER_VALID(str, expect) \
|
||||
do { \
|
||||
scc_cstring_t _output = scc_cstring_create(); \
|
||||
cbool ret = check_identifier(str, expect, &_output); \
|
||||
TEST_CHECK(ret == true); \
|
||||
TEST_CHECK(strcmp(_output.data, expect) == 0); \
|
||||
scc_cstring_free(&_output); \
|
||||
} while (0)
|
||||
|
||||
#define CHECK_IDENTIFIER_INVALID(str) \
|
||||
do { \
|
||||
scc_cstring_t _output = scc_cstring_create(); \
|
||||
cbool ret = check_identifier(str, NULL, &_output); \
|
||||
TEST_CHECK(ret == false); \
|
||||
scc_cstring_free(&_output); \
|
||||
} while (0)
|
||||
|
||||
void test_valid_identifier(void) {
|
||||
TEST_CASE("valid identifiers");
|
||||
CHECK_IDENTIFIER_VALID("variable", "variable");
|
||||
CHECK_IDENTIFIER_VALID("my_var", "my_var");
|
||||
CHECK_IDENTIFIER_VALID("_private", "_private");
|
||||
CHECK_IDENTIFIER_VALID("Var123", "Var123");
|
||||
CHECK_IDENTIFIER_VALID("a", "a");
|
||||
}
|
||||
|
||||
void test_invalid_identifier(void) {
|
||||
TEST_CASE("invalid identifiers");
|
||||
CHECK_IDENTIFIER_INVALID("");
|
||||
CHECK_IDENTIFIER_INVALID("123var");
|
||||
}
|
||||
|
||||
TEST_LIST = {
|
||||
{"test_valid_identifier", test_valid_identifier},
|
||||
{"test_invalid_identifier", test_invalid_identifier},
|
||||
{NULL, NULL},
|
||||
};
|
||||
@@ -1,135 +0,0 @@
|
||||
#include <lex_parser.h>
|
||||
#include <utest/acutest.h>
|
||||
|
||||
cbool check(const char *str, usize expect, usize *output) {
|
||||
// TODO maybe have other logger
|
||||
(void)(expect);
|
||||
log_set_level(&__default_logger_root, 0);
|
||||
scc_pos_t pos = scc_pos_create();
|
||||
scc_mem_probe_stream_t mem_stream;
|
||||
scc_probe_stream_t *stream =
|
||||
scc_mem_probe_stream_init(&mem_stream, str, scc_strlen(str), false);
|
||||
return scc_lex_parse_number(stream, &pos, output);
|
||||
}
|
||||
|
||||
#define CHECK_VALID(str, expect) \
|
||||
do { \
|
||||
usize _output; \
|
||||
cbool ret = check(str, expect, &_output); \
|
||||
TEST_CHECK(ret == true); \
|
||||
TEST_CHECK(_output == expect); \
|
||||
TEST_MSG("Produced: %llu", _output); \
|
||||
} while (0)
|
||||
|
||||
#define CHECK_INVALID(str) \
|
||||
do { \
|
||||
usize _output; \
|
||||
cbool ret = check(str, 0, &_output); \
|
||||
TEST_CHECK(ret == false); \
|
||||
} while (0)
|
||||
|
||||
void test_simple_hex(void) {
|
||||
TEST_CASE("lowercase hex");
|
||||
CHECK_VALID("0xff", 255);
|
||||
CHECK_VALID("0x0", 0);
|
||||
CHECK_VALID("0xa", 10);
|
||||
CHECK_VALID("0xf", 15);
|
||||
CHECK_VALID("0x1a", 26);
|
||||
|
||||
TEST_CASE("uppercase hex");
|
||||
CHECK_VALID("0xFF", 255);
|
||||
CHECK_VALID("0xA0", 160);
|
||||
CHECK_VALID("0xCAFEBABE", 3405691582);
|
||||
|
||||
TEST_CASE("mixed case hex");
|
||||
CHECK_VALID("0xFf", 255);
|
||||
CHECK_VALID("0xCaFeBaBe", 3405691582);
|
||||
|
||||
TEST_CASE("larger hex values");
|
||||
CHECK_VALID("0xff00", 65280);
|
||||
CHECK_VALID("0xFFFF", 65535);
|
||||
|
||||
TEST_CASE("invalid hex");
|
||||
CHECK_INVALID("0xG"); // Invalid hex digit
|
||||
CHECK_INVALID("0xyz"); // Invalid prefix
|
||||
CHECK_INVALID("0x"); // Incomplete hex
|
||||
}
|
||||
|
||||
void test_simple_oct(void) {
|
||||
TEST_CASE("basic octal");
|
||||
CHECK_VALID("00", 0);
|
||||
CHECK_VALID("01", 1);
|
||||
CHECK_VALID("07", 7);
|
||||
|
||||
TEST_CASE("multi-digit octal");
|
||||
CHECK_VALID("010", 8);
|
||||
CHECK_VALID("017", 15);
|
||||
CHECK_VALID("077", 63);
|
||||
|
||||
TEST_CASE("larger octal values");
|
||||
CHECK_VALID("0177", 127);
|
||||
CHECK_VALID("0377", 255);
|
||||
CHECK_VALID("0777", 511);
|
||||
|
||||
TEST_CASE("invalid octal");
|
||||
CHECK_INVALID("08"); // Invalid octal digit
|
||||
CHECK_INVALID("09"); // Invalid octal digit
|
||||
}
|
||||
|
||||
void test_simple_dec(void) {
|
||||
TEST_CASE("single digits");
|
||||
CHECK_VALID("0", 0);
|
||||
CHECK_VALID("1", 1);
|
||||
CHECK_VALID("9", 9);
|
||||
|
||||
TEST_CASE("multi-digit decimal");
|
||||
CHECK_VALID("10", 10);
|
||||
CHECK_VALID("42", 42);
|
||||
CHECK_VALID("123", 123);
|
||||
|
||||
TEST_CASE("larger decimal values");
|
||||
CHECK_VALID("999", 999);
|
||||
CHECK_VALID("1234", 1234);
|
||||
CHECK_VALID("65535", 65535);
|
||||
}
|
||||
|
||||
void test_simple_bin(void) {
|
||||
TEST_CASE("basic binary");
|
||||
CHECK_VALID("0b0", 0);
|
||||
CHECK_VALID("0b1", 1);
|
||||
|
||||
TEST_CASE("multi-digit binary");
|
||||
CHECK_VALID("0b10", 2);
|
||||
CHECK_VALID("0b11", 3);
|
||||
CHECK_VALID("0b100", 4);
|
||||
CHECK_VALID("0b1010", 10);
|
||||
|
||||
TEST_CASE("larger binary values");
|
||||
CHECK_VALID("0b1111", 15);
|
||||
CHECK_VALID("0b11111111", 255);
|
||||
CHECK_VALID("0b10101010", 170);
|
||||
|
||||
TEST_CASE("invalid binary");
|
||||
CHECK_INVALID("0b2"); // Invalid binary digit
|
||||
CHECK_INVALID("0b3"); // Invalid binary digit
|
||||
CHECK_INVALID("0b"); // Incomplete binary
|
||||
}
|
||||
|
||||
void test_edge_cases(void) {
|
||||
TEST_CASE("empty string");
|
||||
CHECK_INVALID(""); // Empty string
|
||||
|
||||
TEST_CASE("non-numeric strings");
|
||||
CHECK_INVALID("abc"); // Non-numeric
|
||||
CHECK_INVALID("xyz"); // Non-numeric
|
||||
|
||||
TEST_CASE("mixed invalid formats");
|
||||
CHECK_INVALID("0x1G"); // Mixed valid/invalid hex
|
||||
CHECK_INVALID("0b12"); // Mixed valid/invalid binary
|
||||
}
|
||||
|
||||
TEST_LIST = {
|
||||
{"test_simple_hex", test_simple_hex}, {"test_simple_oct", test_simple_oct},
|
||||
{"test_simple_dec", test_simple_dec}, {"test_simple_bin", test_simple_bin},
|
||||
{"test_edge_cases", test_edge_cases}, {NULL, NULL},
|
||||
};
|
||||
@@ -1,52 +0,0 @@
|
||||
// test_skip_block_comment.c
|
||||
#include <lex_parser.h>
|
||||
#include <utest/acutest.h>
|
||||
|
||||
void check_skip_block_comment(const char *str, const char *expect_remaining) {
|
||||
log_set_level(&__default_logger_root, 0);
|
||||
scc_pos_t pos = scc_pos_create();
|
||||
scc_mem_probe_stream_t mem_stream;
|
||||
scc_probe_stream_t *stream =
|
||||
scc_mem_probe_stream_init(&mem_stream, str, scc_strlen(str), false);
|
||||
|
||||
scc_lex_parse_skip_block_comment(stream, &pos);
|
||||
scc_probe_stream_sync(stream);
|
||||
|
||||
// Check remaining content
|
||||
char buffer[256] = {0};
|
||||
int i = 0;
|
||||
int ch;
|
||||
while ((ch = scc_probe_stream_consume(stream)) != scc_stream_eof &&
|
||||
i < 255) {
|
||||
buffer[i++] = (char)ch;
|
||||
}
|
||||
|
||||
if (expect_remaining) {
|
||||
TEST_CHECK(strcmp(buffer, expect_remaining) == 0);
|
||||
}
|
||||
}
|
||||
|
||||
void test_simple_block_comment(void) {
|
||||
TEST_CASE("simple block comments");
|
||||
check_skip_block_comment("/* comment */", "");
|
||||
check_skip_block_comment("/* comment */ int x;", " int x;");
|
||||
}
|
||||
|
||||
void test_multiline_block_comment(void) {
|
||||
TEST_CASE("multiline block comments");
|
||||
check_skip_block_comment("/* line1\nline2 */", "");
|
||||
check_skip_block_comment("/* line1\nline2 */ int x;", " int x;");
|
||||
}
|
||||
|
||||
void test_nested_asterisk_block_comment(void) {
|
||||
TEST_CASE("nested asterisk block comments");
|
||||
check_skip_block_comment("/* *** */", "");
|
||||
check_skip_block_comment("/* *** */ int x;", " int x;");
|
||||
}
|
||||
|
||||
TEST_LIST = {
|
||||
{"test_simple_block_comment", test_simple_block_comment},
|
||||
{"test_multiline_block_comment", test_multiline_block_comment},
|
||||
{"test_nested_asterisk_block_comment", test_nested_asterisk_block_comment},
|
||||
{NULL, NULL},
|
||||
};
|
||||
@@ -1,51 +0,0 @@
|
||||
// test_skip_line.c
|
||||
#include <lex_parser.h>
|
||||
#include <utest/acutest.h>
|
||||
|
||||
void check_skip_line(const char *str, const char *expect_remaining) {
|
||||
log_set_level(&__default_logger_root, 0);
|
||||
scc_pos_t pos = scc_pos_create();
|
||||
scc_mem_probe_stream_t mem_stream;
|
||||
scc_probe_stream_t *stream =
|
||||
scc_mem_probe_stream_init(&mem_stream, str, scc_strlen(str), false);
|
||||
|
||||
scc_lex_parse_skip_line(stream, &pos);
|
||||
scc_probe_stream_sync(stream);
|
||||
|
||||
// Check remaining content
|
||||
char buffer[256] = {0};
|
||||
int i = 0;
|
||||
int ch;
|
||||
while ((ch = scc_probe_stream_consume(stream)) != scc_stream_eof &&
|
||||
i < 255) {
|
||||
buffer[i++] = (char)ch;
|
||||
}
|
||||
|
||||
if (expect_remaining) {
|
||||
TEST_CHECK(strcmp(buffer, expect_remaining) == 0);
|
||||
}
|
||||
}
|
||||
|
||||
void test_simple_line_comment(void) {
|
||||
TEST_CASE("simple line comments");
|
||||
check_skip_line("// comment\n", "");
|
||||
check_skip_line("// comment\nint x;", "int x;");
|
||||
}
|
||||
|
||||
void test_crlf_line_comment(void) {
|
||||
TEST_CASE("CRLF line comments");
|
||||
check_skip_line("// comment\r\n", "");
|
||||
check_skip_line("// comment\r\nint x;", "int x;");
|
||||
}
|
||||
|
||||
void test_eof_line_comment(void) {
|
||||
TEST_CASE("EOF line comments");
|
||||
check_skip_line("// comment", "");
|
||||
}
|
||||
|
||||
TEST_LIST = {
|
||||
{"test_simple_line_comment", test_simple_line_comment},
|
||||
{"test_crlf_line_comment", test_crlf_line_comment},
|
||||
{"test_eof_line_comment", test_eof_line_comment},
|
||||
{NULL, NULL},
|
||||
};
|
||||
@@ -1,62 +0,0 @@
|
||||
// test_string.c
|
||||
#include <lex_parser.h>
|
||||
#include <utest/acutest.h>
|
||||
|
||||
cbool check_string(const char *str, const char *expect, scc_cstring_t *output) {
|
||||
log_set_level(&__default_logger_root, 0);
|
||||
scc_pos_t pos = scc_pos_create();
|
||||
scc_mem_probe_stream_t mem_stream;
|
||||
scc_probe_stream_t *stream =
|
||||
scc_mem_probe_stream_init(&mem_stream, str, scc_strlen(str), false);
|
||||
|
||||
cbool ret = scc_lex_parse_string(stream, &pos, output);
|
||||
if (ret && expect) {
|
||||
return strcmp(output->data, expect) == 0;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
#define CHECK_STRING_VALID(str, expect) \
|
||||
do { \
|
||||
scc_cstring_t _output = scc_cstring_create(); \
|
||||
cbool ret = check_string(str, expect, &_output); \
|
||||
TEST_CHECK(ret == true); \
|
||||
TEST_CHECK(strcmp(_output.data, expect) == 0); \
|
||||
scc_cstring_free(&_output); \
|
||||
} while (0)
|
||||
|
||||
#define CHECK_STRING_INVALID(str) \
|
||||
do { \
|
||||
scc_cstring_t _output = scc_cstring_create(); \
|
||||
cbool ret = check_string(str, NULL, &_output); \
|
||||
TEST_CHECK(ret == false); \
|
||||
scc_cstring_free(&_output); \
|
||||
} while (0)
|
||||
|
||||
void test_simple_string(void) {
|
||||
TEST_CASE("simple strings");
|
||||
CHECK_STRING_VALID("\"\"", "");
|
||||
CHECK_STRING_VALID("\"hello\"", "hello");
|
||||
CHECK_STRING_VALID("\"hello world\"", "hello world");
|
||||
}
|
||||
|
||||
void test_escape_string(void) {
|
||||
TEST_CASE("escape strings");
|
||||
CHECK_STRING_VALID("\"\\n\"", "\n");
|
||||
CHECK_STRING_VALID("\"\\t\"", "\t");
|
||||
CHECK_STRING_VALID("\"\\\"\"", "\"");
|
||||
CHECK_STRING_VALID("\"Hello\\nWorld\"", "Hello\nWorld");
|
||||
}
|
||||
|
||||
void test_invalid_string(void) {
|
||||
TEST_CASE("invalid strings");
|
||||
CHECK_STRING_INVALID("\"unterminated");
|
||||
CHECK_STRING_INVALID("\"newline\n\"");
|
||||
}
|
||||
|
||||
TEST_LIST = {
|
||||
{"test_simple_string", test_simple_string},
|
||||
{"test_escape_string", test_escape_string},
|
||||
{"test_invalid_string", test_invalid_string},
|
||||
{NULL, NULL},
|
||||
};
|
||||
@@ -4,5 +4,5 @@ version = "0.1.0"
|
||||
|
||||
dependencies = [
|
||||
{ name = "scc_core", path = "../../runtime/scc_core" },
|
||||
{ name = "lex_parser", path = "../lex_parser" },
|
||||
{ name = "lex_parser", path = "../sstream" },
|
||||
]
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
|
||||
#include "lexer_token.h"
|
||||
#include <scc_core.h>
|
||||
#include <scc_sstream.h>
|
||||
|
||||
/**
|
||||
* @brief 词法分析器核心结构体
|
||||
@@ -15,16 +16,11 @@
|
||||
* 封装词法分析所需的状态信息和缓冲区管理
|
||||
*/
|
||||
typedef struct scc_lexer {
|
||||
scc_probe_stream_t *stream;
|
||||
scc_pos_t pos;
|
||||
scc_sstream_ring_t stream_ref;
|
||||
int jump_macro;
|
||||
} scc_lexer_t;
|
||||
|
||||
/**
|
||||
* @brief 初始化词法分析器
|
||||
* @param[out] lexer 要初始化的词法分析器实例
|
||||
* @param[in] stream 输入流对象指针
|
||||
*/
|
||||
void scc_lexer_init(scc_lexer_t *lexer, scc_probe_stream_t *stream);
|
||||
void scc_lexer_init(scc_lexer_t *lexer, scc_sstream_ring_t *stream_ref);
|
||||
|
||||
/**
|
||||
* @brief 获取原始token
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
#define __SCC_LEXER_TOKEN_H__
|
||||
|
||||
#include <scc_core.h>
|
||||
#include <scc_pos.h>
|
||||
|
||||
typedef enum scc_cstd {
|
||||
SCC_CSTD_C89,
|
||||
@@ -54,64 +55,64 @@ typedef enum scc_cstd {
|
||||
// KEYWORD_TABLE
|
||||
|
||||
#define SCC_CTOK_TABLE \
|
||||
X(unknown , SCC_TOK_SUBTYPE_INVALID, SCC_TOK_UNKNOWN ) \
|
||||
X(EOF , SCC_TOK_SUBTYPE_EOF, SCC_TOK_EOF ) \
|
||||
X(blank , SCC_TOK_SUBTYPE_EMPTYSPACE, SCC_TOK_BLANK ) \
|
||||
X(endline , SCC_TOK_SUBTYPE_EMPTYSPACE, SCC_TOK_ENDLINE ) \
|
||||
X("#" , SCC_TOK_SUBTYPE_EMPTYSPACE, SCC_TOK_SHARP ) \
|
||||
X("==" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_EQ ) \
|
||||
X("=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN ) \
|
||||
X("++" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ADD_ADD ) \
|
||||
X("+=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_ADD ) \
|
||||
X("+" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ADD ) \
|
||||
X("--" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_SUB_SUB ) \
|
||||
X("-=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_SUB ) \
|
||||
X("->" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_DEREF ) \
|
||||
X("-" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_SUB ) \
|
||||
X("*=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_MUL ) \
|
||||
X("*" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_MUL ) \
|
||||
X("/=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_DIV ) \
|
||||
X("/" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_DIV ) \
|
||||
X("//" , SCC_TOK_SUBTYPE_COMMENT , SCC_TOK_LINE_COMMENT ) \
|
||||
X("/* */" , SCC_TOK_SUBTYPE_COMMENT , SCC_TOK_BLOCK_COMMENT ) \
|
||||
X("%=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_MOD ) \
|
||||
X("%" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_MOD ) \
|
||||
X("&&" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_AND_AND ) \
|
||||
X("&=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_AND ) \
|
||||
X("&" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_AND ) \
|
||||
X("||" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_OR_OR ) \
|
||||
X("|=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_OR ) \
|
||||
X("|" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_OR ) \
|
||||
X("^=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_XOR ) \
|
||||
X("^" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_XOR ) \
|
||||
X("<<=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_L_SH ) \
|
||||
X("<<" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_L_SH ) \
|
||||
X("<=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_LE ) \
|
||||
X("<" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_LT ) \
|
||||
X(">>=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_R_SH ) \
|
||||
X(">>" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_R_SH ) \
|
||||
X(">=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_GE ) \
|
||||
X(">" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_GT ) \
|
||||
X("!" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_NOT ) \
|
||||
X("!=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_NEQ ) \
|
||||
X("~" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_BIT_NOT ) \
|
||||
X("[" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_L_BRACKET ) \
|
||||
X("]" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_R_BRACKET ) \
|
||||
X("(" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_L_PAREN ) \
|
||||
X(")" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_R_PAREN ) \
|
||||
X("{" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_L_BRACE ) \
|
||||
X("}" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_R_BRACE ) \
|
||||
X(";" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_SEMICOLON ) \
|
||||
X("," , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_COMMA ) \
|
||||
X(":" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_COLON ) \
|
||||
X("." , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_DOT ) \
|
||||
X("..." , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ELLIPSIS ) \
|
||||
X("?" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_COND ) \
|
||||
X(ident , SCC_TOK_SUBTYPE_IDENTIFIER, SCC_TOK_IDENT ) \
|
||||
X(int_literal , SCC_TOK_SUBTYPE_LITERAL, SCC_TOK_INT_LITERAL ) \
|
||||
X(float_literal , SCC_TOK_SUBTYPE_LITERAL, SCC_TOK_FLOAT_LITERAL ) \
|
||||
X(char_literal , SCC_TOK_SUBTYPE_LITERAL, SCC_TOK_CHAR_LITERAL ) \
|
||||
X(string_literal , SCC_TOK_SUBTYPE_LITERAL, SCC_TOK_STRING_LITERAL ) \
|
||||
X(unknown , SCC_TOK_SUBTYPE_INVALID, SCC_TOK_UNKNOWN ) \
|
||||
X(EOF , SCC_TOK_SUBTYPE_EOF, SCC_TOK_EOF ) \
|
||||
X(blank , SCC_TOK_SUBTYPE_EMPTYSPACE, SCC_TOK_BLANK ) \
|
||||
X(endline , SCC_TOK_SUBTYPE_EMPTYSPACE, SCC_TOK_ENDLINE ) \
|
||||
X("#" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_SHARP ) \
|
||||
X("==" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_EQ ) \
|
||||
X("=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN ) \
|
||||
X("++" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ADD_ADD ) \
|
||||
X("+=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_ADD ) \
|
||||
X("+" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ADD ) \
|
||||
X("--" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_SUB_SUB ) \
|
||||
X("-=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_SUB ) \
|
||||
X("->" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_DEREF ) \
|
||||
X("-" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_SUB ) \
|
||||
X("*=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_MUL ) \
|
||||
X("*" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_MUL ) \
|
||||
X("/=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_DIV ) \
|
||||
X("/" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_DIV ) \
|
||||
X("//" , SCC_TOK_SUBTYPE_COMMENT , SCC_TOK_LINE_COMMENT ) \
|
||||
X("/* */" , SCC_TOK_SUBTYPE_COMMENT , SCC_TOK_BLOCK_COMMENT ) \
|
||||
X("%=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_MOD ) \
|
||||
X("%" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_MOD ) \
|
||||
X("&&" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_AND_AND ) \
|
||||
X("&=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_AND ) \
|
||||
X("&" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_AND ) \
|
||||
X("||" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_OR_OR ) \
|
||||
X("|=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_OR ) \
|
||||
X("|" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_OR ) \
|
||||
X("^=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_XOR ) \
|
||||
X("^" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_XOR ) \
|
||||
X("<<=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_L_SH ) \
|
||||
X("<<" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_L_SH ) \
|
||||
X("<=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_LE ) \
|
||||
X("<" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_LT ) \
|
||||
X(">>=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ASSIGN_R_SH ) \
|
||||
X(">>" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_R_SH ) \
|
||||
X(">=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_GE ) \
|
||||
X(">" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_GT ) \
|
||||
X("!" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_NOT ) \
|
||||
X("!=" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_NEQ ) \
|
||||
X("~" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_BIT_NOT ) \
|
||||
X("[" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_L_BRACKET ) \
|
||||
X("]" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_R_BRACKET ) \
|
||||
X("(" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_L_PAREN ) \
|
||||
X(")" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_R_PAREN ) \
|
||||
X("{" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_L_BRACE ) \
|
||||
X("}" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_R_BRACE ) \
|
||||
X(";" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_SEMICOLON ) \
|
||||
X("," , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_COMMA ) \
|
||||
X(":" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_COLON ) \
|
||||
X("." , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_DOT ) \
|
||||
X("..." , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_ELLIPSIS ) \
|
||||
X("?" , SCC_TOK_SUBTYPE_OPERATOR, SCC_TOK_COND ) \
|
||||
X(ident , SCC_TOK_SUBTYPE_IDENTIFIER, SCC_TOK_IDENT ) \
|
||||
X(int , SCC_TOK_SUBTYPE_LITERAL, SCC_TOK_INT_LITERAL ) \
|
||||
X(float , SCC_TOK_SUBTYPE_LITERAL, SCC_TOK_FLOAT_LITERAL ) \
|
||||
X(char , SCC_TOK_SUBTYPE_LITERAL, SCC_TOK_CHAR_LITERAL ) \
|
||||
X(string , SCC_TOK_SUBTYPE_LITERAL, SCC_TOK_STRING_LITERAL ) \
|
||||
// END
|
||||
/* clang-format on */
|
||||
|
||||
@@ -145,7 +146,7 @@ const char *scc_get_tok_name(scc_tok_type_t type);
|
||||
|
||||
typedef struct scc_lexer_token {
|
||||
scc_tok_type_t type;
|
||||
scc_cvalue_t value;
|
||||
scc_cstring_t lexeme;
|
||||
scc_pos_t loc;
|
||||
} scc_lexer_tok_t;
|
||||
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
#include <lex_parser.h>
|
||||
#include <lexer.h>
|
||||
#include <lexer_log.h>
|
||||
|
||||
@@ -13,442 +12,460 @@ static const struct {
|
||||
};
|
||||
|
||||
// by using binary search to find the keyword
|
||||
static inline int keyword_cmp(const char *name, int len) {
|
||||
static int keyword_cmp(const char *name, int len) {
|
||||
int low = 0;
|
||||
int high = sizeof(keywords) / sizeof(keywords[0]) - 1;
|
||||
while (low <= high) {
|
||||
int mid = (low + high) / 2;
|
||||
const char *key = keywords[mid].name;
|
||||
int cmp = 0;
|
||||
|
||||
// 自定义字符串比较逻辑
|
||||
for (int i = 0; i < len; i++) {
|
||||
if (name[i] != key[i]) {
|
||||
cmp = (unsigned char)name[i] - (unsigned char)key[i];
|
||||
break;
|
||||
}
|
||||
if (name[i] == '\0')
|
||||
break; // 遇到终止符提前结束
|
||||
break;
|
||||
}
|
||||
|
||||
if (cmp == 0) {
|
||||
// 完全匹配检查(长度相同)
|
||||
if (key[len] == '\0')
|
||||
return mid;
|
||||
cmp = -1; // 当前关键词比输入长
|
||||
cmp = -1;
|
||||
}
|
||||
|
||||
if (cmp < 0) {
|
||||
if (cmp < 0)
|
||||
high = mid - 1;
|
||||
} else {
|
||||
else
|
||||
low = mid + 1;
|
||||
}
|
||||
}
|
||||
return -1; // Not a keyword.
|
||||
return -1; // 不是关键字
|
||||
}
|
||||
|
||||
void scc_lexer_init(scc_lexer_t *lexer, scc_probe_stream_t *stream) {
|
||||
lexer->stream = stream;
|
||||
lexer->pos = scc_pos_create();
|
||||
// FIXME
|
||||
lexer->pos.name = scc_cstring_copy(&stream->name);
|
||||
void scc_lexer_init(scc_lexer_t *lexer, scc_sstream_ring_t *stream_ref) {
|
||||
lexer->stream_ref = *stream_ref;
|
||||
lexer->jump_macro = false;
|
||||
}
|
||||
|
||||
static inline cbool is_whitespace(int ch) {
|
||||
return ch == ' ' || ch == '\t' || ch == '\v' || ch == '\f';
|
||||
}
|
||||
static inline cbool is_newline(int ch) { return ch == '\n' || ch == '\r'; }
|
||||
static inline cbool is_digit(int ch) { return ch >= '0' && ch <= '9'; }
|
||||
static inline cbool is_alpha(int ch) {
|
||||
return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z');
|
||||
}
|
||||
static inline cbool is_alnum(int ch) { return is_alpha(ch) || is_digit(ch); }
|
||||
static inline cbool is_identifier_start(int ch) {
|
||||
return is_alpha(ch) || ch == '_';
|
||||
}
|
||||
static inline cbool is_identifier_part(int ch) {
|
||||
return is_alnum(ch) || ch == '_';
|
||||
}
|
||||
static inline cbool is_octal_digit(int ch) { return ch >= '0' && ch <= '7'; }
|
||||
static inline cbool is_hex_digit(int ch) {
|
||||
return is_digit(ch) || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F');
|
||||
}
|
||||
|
||||
/* 从环形缓冲区预览一个字符(带EOF检测) */
|
||||
static inline cbool peek_char(scc_lexer_t *lexer, scc_sstream_char_t *out) {
|
||||
cbool ok;
|
||||
scc_ring_peek(lexer->stream_ref, *out, ok);
|
||||
return ok;
|
||||
}
|
||||
|
||||
/* 从环形缓冲区消费一个字符,并将它追加到lexeme中 */
|
||||
static inline cbool next_char(scc_lexer_t *lexer, scc_cstring_t *lexeme,
|
||||
scc_sstream_char_t *out) {
|
||||
cbool ok;
|
||||
scc_ring_next(lexer->stream_ref, *out, ok);
|
||||
if (!ok)
|
||||
return false;
|
||||
scc_cstring_append_ch(lexeme, out->character);
|
||||
return true;
|
||||
}
|
||||
|
||||
#define set_err_token(token) ((token)->type = SCC_TOK_UNKNOWN)
|
||||
|
||||
static void parse_line(scc_lexer_t *lexer, scc_lexer_tok_t *token) {
|
||||
token->loc = lexer->pos;
|
||||
scc_probe_stream_t *stream = lexer->stream;
|
||||
scc_probe_stream_reset(stream);
|
||||
int ch = scc_probe_stream_next(stream);
|
||||
|
||||
usize n;
|
||||
scc_cstring_t str = scc_cstring_create();
|
||||
|
||||
if (ch == scc_stream_eof) {
|
||||
LEX_WARN("Unexpected EOF at begin");
|
||||
goto ERR;
|
||||
} else if (ch != '#') {
|
||||
LEX_WARN("Unexpected character '%c' at begin", ch);
|
||||
goto ERR;
|
||||
}
|
||||
|
||||
const char line[] = "line";
|
||||
|
||||
for (int i = 0; i < (int)sizeof(line); i++) {
|
||||
ch = scc_probe_stream_consume(stream);
|
||||
scc_pos_next(&lexer->pos);
|
||||
if (ch != line[i]) {
|
||||
LEX_WARN("Maroc does not support in lexer rather in preprocessor, "
|
||||
"it will be ignored");
|
||||
goto SKIP_LINE;
|
||||
}
|
||||
}
|
||||
|
||||
if (scc_lex_parse_number(stream, &lexer->pos, &n) == false) {
|
||||
LEX_ERROR("Invalid line number");
|
||||
goto SKIP_LINE;
|
||||
}
|
||||
|
||||
if (scc_probe_stream_consume(stream) != ' ') {
|
||||
scc_lex_parse_skip_line(stream, &lexer->pos);
|
||||
token->loc.line = token->value.u;
|
||||
}
|
||||
|
||||
if (scc_probe_stream_next(stream) != '"') {
|
||||
LEX_ERROR("Invalid `#` line");
|
||||
goto SKIP_LINE;
|
||||
}
|
||||
if (scc_lex_parse_string(stream, &lexer->pos, &str) == false) {
|
||||
LEX_ERROR("Invalid filename");
|
||||
goto SKIP_LINE;
|
||||
}
|
||||
|
||||
scc_lex_parse_skip_line(stream, &lexer->pos);
|
||||
scc_probe_stream_sync(stream);
|
||||
token->loc.line = n;
|
||||
// FIXME memory leak
|
||||
token->loc.name = scc_cstring_copy(&str);
|
||||
scc_cstring_free(&str);
|
||||
return;
|
||||
SKIP_LINE:
|
||||
scc_lex_parse_skip_line(stream, &lexer->pos);
|
||||
scc_probe_stream_sync(stream);
|
||||
ERR:
|
||||
set_err_token(token);
|
||||
scc_cstring_free(&str);
|
||||
}
|
||||
|
||||
// /zh/c/language/operator_arithmetic.html
|
||||
void scc_lexer_get_token(scc_lexer_t *lexer, scc_lexer_tok_t *token) {
|
||||
token->loc = lexer->pos;
|
||||
token->type = SCC_TOK_UNKNOWN;
|
||||
scc_probe_stream_t *stream = lexer->stream;
|
||||
scc_sstream_char_t cur;
|
||||
scc_cstring_t lex = scc_cstring_create(); // 临时lexeme
|
||||
|
||||
scc_probe_stream_reset(stream);
|
||||
scc_tok_type_t type = SCC_TOK_UNKNOWN;
|
||||
int ch = scc_probe_stream_next(stream);
|
||||
// 尝试预览第一个字符
|
||||
if (!peek_char(lexer, &cur)) {
|
||||
token->type = SCC_TOK_EOF;
|
||||
token->loc = (scc_pos_t){0, 1, 1, 0}; // 默认位置
|
||||
token->lexeme = lex; // 空字符串
|
||||
return;
|
||||
}
|
||||
|
||||
// 记录起始位置
|
||||
scc_pos_t start_loc = cur.pos;
|
||||
int ch = cur.character;
|
||||
|
||||
// once step
|
||||
switch (ch) {
|
||||
case '=':
|
||||
switch (scc_probe_stream_next(stream)) {
|
||||
case '=':
|
||||
type = SCC_TOK_EQ;
|
||||
goto double_char;
|
||||
default:
|
||||
scc_probe_stream_reset(stream), type = SCC_TOK_ASSIGN;
|
||||
break;
|
||||
if (is_whitespace(ch)) {
|
||||
// 空白符: 连续收集
|
||||
token->type = SCC_TOK_BLANK;
|
||||
while (peek_char(lexer, &cur) && is_whitespace(cur.character)) {
|
||||
next_char(lexer, &lex, &cur);
|
||||
}
|
||||
break;
|
||||
case '+':
|
||||
switch (scc_probe_stream_next(stream)) {
|
||||
case '+':
|
||||
type = SCC_TOK_ADD_ADD;
|
||||
goto double_char;
|
||||
case '=':
|
||||
type = SCC_TOK_ASSIGN_ADD;
|
||||
goto double_char;
|
||||
default:
|
||||
scc_probe_stream_reset(stream), type = SCC_TOK_ADD;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case '-':
|
||||
switch (scc_probe_stream_next(stream)) {
|
||||
case '-':
|
||||
type = SCC_TOK_SUB_SUB;
|
||||
goto double_char;
|
||||
case '=':
|
||||
type = SCC_TOK_ASSIGN_SUB;
|
||||
goto double_char;
|
||||
case '>':
|
||||
type = SCC_TOK_DEREF;
|
||||
goto double_char;
|
||||
default:
|
||||
scc_probe_stream_reset(stream), type = SCC_TOK_SUB;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case '*':
|
||||
switch (scc_probe_stream_next(stream)) {
|
||||
case '=':
|
||||
type = SCC_TOK_ASSIGN_MUL;
|
||||
goto double_char;
|
||||
default:
|
||||
scc_probe_stream_reset(stream), type = SCC_TOK_MUL;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case '/':
|
||||
switch (scc_probe_stream_next(stream)) {
|
||||
case '=':
|
||||
type = SCC_TOK_ASSIGN_DIV;
|
||||
goto double_char;
|
||||
case '/':
|
||||
scc_probe_stream_reset(stream);
|
||||
scc_lex_parse_skip_line(stream, &lexer->pos);
|
||||
scc_probe_stream_sync(stream);
|
||||
token->type = SCC_TOK_LINE_COMMENT;
|
||||
goto END;
|
||||
case '*':
|
||||
scc_probe_stream_reset(stream);
|
||||
scc_lex_parse_skip_block_comment(stream, &lexer->pos);
|
||||
scc_probe_stream_sync(stream);
|
||||
token->type = SCC_TOK_BLOCK_COMMENT;
|
||||
goto END;
|
||||
default:
|
||||
scc_probe_stream_reset(stream), type = SCC_TOK_DIV;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case '%':
|
||||
switch (scc_probe_stream_next(stream)) {
|
||||
case '=':
|
||||
type = SCC_TOK_ASSIGN_MOD;
|
||||
goto double_char;
|
||||
default:
|
||||
scc_probe_stream_reset(stream), type = SCC_TOK_MOD;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case '&':
|
||||
switch (scc_probe_stream_next(stream)) {
|
||||
case '&':
|
||||
type = SCC_TOK_AND_AND;
|
||||
goto double_char;
|
||||
case '=':
|
||||
type = SCC_TOK_ASSIGN_AND;
|
||||
goto double_char;
|
||||
default:
|
||||
scc_probe_stream_reset(stream), type = SCC_TOK_AND;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case '|':
|
||||
switch (scc_probe_stream_next(stream)) {
|
||||
case '|':
|
||||
type = SCC_TOK_OR_OR;
|
||||
goto double_char;
|
||||
case '=':
|
||||
type = SCC_TOK_ASSIGN_OR;
|
||||
goto double_char;
|
||||
default:
|
||||
scc_probe_stream_reset(stream), type = SCC_TOK_OR;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case '^':
|
||||
switch (scc_probe_stream_next(stream)) {
|
||||
case '=':
|
||||
type = SCC_TOK_ASSIGN_XOR;
|
||||
goto double_char;
|
||||
default:
|
||||
scc_probe_stream_reset(stream), type = SCC_TOK_XOR;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case '<':
|
||||
switch (scc_probe_stream_next(stream)) {
|
||||
case '=':
|
||||
type = SCC_TOK_LE;
|
||||
goto double_char;
|
||||
case '<': {
|
||||
if (scc_probe_stream_next(stream) == '=') {
|
||||
type = SCC_TOK_ASSIGN_L_SH;
|
||||
goto triple_char;
|
||||
} else {
|
||||
type = SCC_TOK_L_SH;
|
||||
goto double_char;
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
scc_probe_stream_reset(stream), type = SCC_TOK_LT;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case '>':
|
||||
switch (scc_probe_stream_next(stream)) {
|
||||
case '=':
|
||||
type = SCC_TOK_GE;
|
||||
goto double_char;
|
||||
case '>': {
|
||||
if (scc_probe_stream_next(stream) == '=') {
|
||||
type = SCC_TOK_ASSIGN_R_SH;
|
||||
goto triple_char;
|
||||
} else {
|
||||
type = SCC_TOK_R_SH;
|
||||
goto double_char;
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
scc_probe_stream_reset(stream), type = SCC_TOK_GT;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case '~':
|
||||
type = SCC_TOK_BIT_NOT;
|
||||
break;
|
||||
case '!':
|
||||
switch (scc_probe_stream_next(stream)) {
|
||||
case '=':
|
||||
type = SCC_TOK_NEQ;
|
||||
goto double_char;
|
||||
default:
|
||||
scc_probe_stream_reset(stream), type = SCC_TOK_NOT;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
/* clang-format off */
|
||||
case '[': type = SCC_TOK_L_BRACKET; break;
|
||||
case ']': type = SCC_TOK_R_BRACKET; break;
|
||||
case '(': type = SCC_TOK_L_PAREN; break;
|
||||
case ')': type = SCC_TOK_R_PAREN; break;
|
||||
case '{': type = SCC_TOK_L_BRACE; break;
|
||||
case '}': type = SCC_TOK_R_BRACE; break;
|
||||
case ';': type = SCC_TOK_SEMICOLON; break;
|
||||
case ',': type = SCC_TOK_COMMA; break;
|
||||
case ':': type = SCC_TOK_COLON; break;
|
||||
/* clang-format on */
|
||||
case '.':
|
||||
if (scc_probe_stream_next(stream) == '.' &&
|
||||
scc_probe_stream_next(stream) == '.') {
|
||||
type = SCC_TOK_ELLIPSIS;
|
||||
goto triple_char;
|
||||
}
|
||||
type = SCC_TOK_DOT;
|
||||
break;
|
||||
case '?':
|
||||
type = SCC_TOK_COND;
|
||||
break;
|
||||
case '\v':
|
||||
case '\f':
|
||||
case ' ':
|
||||
case '\t':
|
||||
type = SCC_TOK_BLANK;
|
||||
break;
|
||||
case '\r':
|
||||
case '\n':
|
||||
scc_probe_stream_back(stream);
|
||||
scc_lex_parse_skip_endline(stream, &lexer->pos);
|
||||
scc_probe_stream_sync(stream);
|
||||
} else if (is_newline(ch)) {
|
||||
// 换行符:处理 \r 或 \n,以及 \r\n 组合
|
||||
token->type = SCC_TOK_ENDLINE;
|
||||
goto END;
|
||||
case '#':
|
||||
parse_line(lexer, token);
|
||||
token->type = SCC_TOK_SHARP;
|
||||
goto END;
|
||||
case '\0':
|
||||
case scc_stream_eof:
|
||||
// EOF
|
||||
type = SCC_TOK_EOF;
|
||||
break;
|
||||
case '\'': {
|
||||
token->loc = lexer->pos;
|
||||
next_char(lexer, &lex, &cur); // 消费第一个字符
|
||||
if (ch == '\r') {
|
||||
// 尝试消费后面的 \n
|
||||
if (peek_char(lexer, &cur) && cur.character == '\n') {
|
||||
next_char(lexer, &lex, &cur);
|
||||
}
|
||||
}
|
||||
} else if (ch == '/') {
|
||||
// 可能为注释或除号
|
||||
scc_sstream_char_t next = {0};
|
||||
next_char(lexer, &lex, &cur); // 消费 '/'
|
||||
peek_char(lexer, &next);
|
||||
if (next.character == '=') {
|
||||
token->type = SCC_TOK_ASSIGN_DIV;
|
||||
next_char(lexer, &lex, &cur);
|
||||
} else if (next.character == '/') {
|
||||
// 行注释 //
|
||||
token->type = SCC_TOK_LINE_COMMENT;
|
||||
next_char(lexer, &lex, &cur); // 消费 '/'
|
||||
while (peek_char(lexer, &cur) && !is_newline(cur.character)) {
|
||||
next_char(lexer, &lex, &cur);
|
||||
scc_ring_consume(lexer->stream_ref);
|
||||
}
|
||||
// 注释结束,不包含换行符(换行符单独成token)
|
||||
} else if (next.character == '*') {
|
||||
// 块注释 /*
|
||||
token->type = SCC_TOK_BLOCK_COMMENT;
|
||||
next_char(lexer, &lex, &cur); // 消费 '*'
|
||||
while (1) {
|
||||
if (!next_char(lexer, &lex, &cur)) {
|
||||
// 文件结束,注释未闭合
|
||||
LOG_ERROR("Unterminated block comment");
|
||||
break;
|
||||
}
|
||||
if (cur.character == '*' && peek_char(lexer, &next) &&
|
||||
next.character == '/') {
|
||||
next_char(lexer, &lex, &cur); // 消费 '/'
|
||||
break;
|
||||
}
|
||||
scc_ring_consume(lexer->stream_ref);
|
||||
}
|
||||
} else {
|
||||
// 只是除号 /
|
||||
token->type = SCC_TOK_DIV;
|
||||
}
|
||||
} else if (is_identifier_start(ch)) {
|
||||
// 标识符或关键字
|
||||
token->type = SCC_TOK_IDENT; // 暂定
|
||||
while (peek_char(lexer, &cur) && is_identifier_part(cur.character)) {
|
||||
next_char(lexer, &lex, &cur);
|
||||
scc_ring_consume(lexer->stream_ref);
|
||||
}
|
||||
// 检查是否为关键字
|
||||
int idx = keyword_cmp(scc_cstring_as_cstr(&lex), scc_cstring_len(&lex));
|
||||
if (idx != -1) {
|
||||
token->type = keywords[idx].tok;
|
||||
}
|
||||
} else if (is_digit(ch)) {
|
||||
// 数字字面量(整数/浮点)
|
||||
token->type = SCC_TOK_INT_LITERAL; // 先假定整数
|
||||
cbool maybe_float = false;
|
||||
while (1) {
|
||||
next_char(lexer, &lex, &cur); // 消费当前数字
|
||||
if (!peek_char(lexer, &cur))
|
||||
break;
|
||||
ch = cur.character;
|
||||
if (is_digit(ch) || (ch == '.' && !maybe_float)) {
|
||||
if (ch == '.')
|
||||
maybe_float = true;
|
||||
continue;
|
||||
}
|
||||
if (ch == 'e' || ch == 'E' || ch == 'p' || ch == 'P') {
|
||||
maybe_float = true;
|
||||
// 后面可能跟符号或数字
|
||||
continue;
|
||||
}
|
||||
if (ch == 'x' || ch == 'X') {
|
||||
// 十六进制前缀,需特殊处理
|
||||
// 这里简化:将整个序列作为整数(保留前缀)
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (maybe_float)
|
||||
token->type = SCC_TOK_FLOAT_LITERAL;
|
||||
} else if (ch == '\'') {
|
||||
// 字符字面量
|
||||
token->type = SCC_TOK_CHAR_LITERAL;
|
||||
scc_probe_stream_reset(stream);
|
||||
int ch = scc_lex_parse_char(stream, &lexer->pos);
|
||||
scc_probe_stream_sync(stream);
|
||||
if (ch == scc_stream_eof) {
|
||||
LEX_ERROR("Unexpected character literal");
|
||||
token->type = SCC_TOK_UNKNOWN;
|
||||
} else {
|
||||
token->value.ch = ch;
|
||||
next_char(lexer, &lex, &cur); // 开头的 '
|
||||
while (1) {
|
||||
if (!peek_char(lexer, &cur)) {
|
||||
LOG_ERROR("Unterminated character literal");
|
||||
break;
|
||||
}
|
||||
if (cur.character == '\'') {
|
||||
next_char(lexer, &lex, &cur); // 闭引号
|
||||
break;
|
||||
}
|
||||
if (cur.character == '\\') {
|
||||
// 转义序列:原样保存反斜杠和下一个字符
|
||||
next_char(lexer, &lex, &cur);
|
||||
if (!peek_char(lexer, &cur))
|
||||
break;
|
||||
next_char(lexer, &lex, &cur);
|
||||
} else {
|
||||
next_char(lexer, &lex, &cur);
|
||||
}
|
||||
}
|
||||
goto END;
|
||||
}
|
||||
case '"': {
|
||||
token->loc = lexer->pos;
|
||||
} else if (ch == '"') {
|
||||
// 字符串字面量
|
||||
token->type = SCC_TOK_STRING_LITERAL;
|
||||
scc_cstring_t output = scc_cstring_create();
|
||||
scc_probe_stream_reset(stream);
|
||||
if (scc_lex_parse_string(stream, &lexer->pos, &output) == true) {
|
||||
scc_probe_stream_sync(stream);
|
||||
token->value.cstr.data = scc_cstring_as_cstr(&output);
|
||||
token->value.cstr.len = scc_cstring_len(&output);
|
||||
} else {
|
||||
LEX_ERROR("Unexpected string literal");
|
||||
next_char(lexer, &lex, &cur); // 开头的 "
|
||||
while (1) {
|
||||
if (!peek_char(lexer, &cur)) {
|
||||
LOG_ERROR("Unterminated string literal");
|
||||
break;
|
||||
}
|
||||
if (cur.character == '"') {
|
||||
next_char(lexer, &lex, &cur); // 闭引号
|
||||
break;
|
||||
}
|
||||
if (cur.character == '\\') {
|
||||
// 转义序列
|
||||
next_char(lexer, &lex, &cur);
|
||||
if (!peek_char(lexer, &cur))
|
||||
break;
|
||||
next_char(lexer, &lex, &cur);
|
||||
} else {
|
||||
next_char(lexer, &lex, &cur);
|
||||
}
|
||||
scc_ring_consume(lexer->stream_ref);
|
||||
}
|
||||
} else {
|
||||
scc_sstream_char_t next = {0};
|
||||
next_char(lexer, &lex, &cur);
|
||||
peek_char(lexer, &next);
|
||||
switch (ch) {
|
||||
case '=':
|
||||
switch (next.character) {
|
||||
case '=':
|
||||
token->type = SCC_TOK_EQ;
|
||||
next_char(lexer, &lex, &cur);
|
||||
break;
|
||||
default:
|
||||
token->type = SCC_TOK_ASSIGN;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case '+':
|
||||
switch (next.character) {
|
||||
case '+':
|
||||
token->type = SCC_TOK_ADD_ADD;
|
||||
next_char(lexer, &lex, &cur);
|
||||
break;
|
||||
case '=':
|
||||
token->type = SCC_TOK_ASSIGN_ADD;
|
||||
next_char(lexer, &lex, &cur);
|
||||
break;
|
||||
default:
|
||||
token->type = SCC_TOK_ADD;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case '-':
|
||||
switch (next.character) {
|
||||
case '-':
|
||||
token->type = SCC_TOK_SUB_SUB;
|
||||
next_char(lexer, &lex, &cur);
|
||||
break;
|
||||
case '=':
|
||||
token->type = SCC_TOK_ASSIGN_SUB;
|
||||
next_char(lexer, &lex, &cur);
|
||||
break;
|
||||
case '>':
|
||||
token->type = SCC_TOK_DEREF;
|
||||
next_char(lexer, &lex, &cur);
|
||||
break;
|
||||
default:
|
||||
token->type = SCC_TOK_SUB;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case '*':
|
||||
switch (next.character) {
|
||||
case '=':
|
||||
token->type = SCC_TOK_ASSIGN_MUL;
|
||||
next_char(lexer, &lex, &cur);
|
||||
break;
|
||||
default:
|
||||
token->type = SCC_TOK_MUL;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case '%':
|
||||
switch (next.character) {
|
||||
case '=':
|
||||
token->type = SCC_TOK_ASSIGN_MOD;
|
||||
next_char(lexer, &lex, &cur);
|
||||
break;
|
||||
default:
|
||||
token->type = SCC_TOK_MOD;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case '&':
|
||||
switch (next.character) {
|
||||
case '&':
|
||||
token->type = SCC_TOK_AND_AND;
|
||||
next_char(lexer, &lex, &cur);
|
||||
break;
|
||||
case '=':
|
||||
token->type = SCC_TOK_ASSIGN_AND;
|
||||
next_char(lexer, &lex, &cur);
|
||||
break;
|
||||
default:
|
||||
token->type = SCC_TOK_AND;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case '|':
|
||||
switch (next.character) {
|
||||
case '|':
|
||||
token->type = SCC_TOK_OR_OR;
|
||||
next_char(lexer, &lex, &cur);
|
||||
break;
|
||||
case '=':
|
||||
token->type = SCC_TOK_ASSIGN_OR;
|
||||
next_char(lexer, &lex, &cur);
|
||||
break;
|
||||
default:
|
||||
token->type = SCC_TOK_OR;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case '^':
|
||||
switch (next.character) {
|
||||
case '=':
|
||||
token->type = SCC_TOK_ASSIGN_XOR;
|
||||
next_char(lexer, &lex, &cur);
|
||||
break;
|
||||
default:
|
||||
token->type = SCC_TOK_XOR;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case '<':
|
||||
switch (next.character) {
|
||||
case '=':
|
||||
token->type = SCC_TOK_LE;
|
||||
next_char(lexer, &lex, &cur);
|
||||
break;
|
||||
case '<': {
|
||||
next_char(lexer, &lex, &cur);
|
||||
if (peek_char(lexer, &next) && next.character == '=') {
|
||||
token->type = SCC_TOK_ASSIGN_L_SH;
|
||||
next_char(lexer, &lex, &cur);
|
||||
} else {
|
||||
token->type = SCC_TOK_L_SH;
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
token->type = SCC_TOK_LT;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case '>':
|
||||
switch (next.character) {
|
||||
case '=':
|
||||
token->type = SCC_TOK_GE;
|
||||
next_char(lexer, &lex, &cur);
|
||||
break;
|
||||
case '>': {
|
||||
next_char(lexer, &lex, &cur);
|
||||
if (peek_char(lexer, &next) && next.character == '=') {
|
||||
token->type = SCC_TOK_ASSIGN_R_SH;
|
||||
next_char(lexer, &lex, &cur);
|
||||
} else {
|
||||
token->type = SCC_TOK_R_SH;
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
token->type = SCC_TOK_GT;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case '~':
|
||||
token->type = SCC_TOK_BIT_NOT;
|
||||
break;
|
||||
case '!':
|
||||
switch (next.character) {
|
||||
case '=':
|
||||
token->type = SCC_TOK_NEQ;
|
||||
next_char(lexer, &lex, &cur);
|
||||
break;
|
||||
default:
|
||||
token->type = SCC_TOK_NOT;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
/* clang-format off */
|
||||
case '[': token->type = SCC_TOK_L_BRACKET; break;
|
||||
case ']': token->type = SCC_TOK_R_BRACKET; break;
|
||||
case '(': token->type = SCC_TOK_L_PAREN; break;
|
||||
case ')': token->type = SCC_TOK_R_PAREN; break;
|
||||
case '{': token->type = SCC_TOK_L_BRACE; break;
|
||||
case '}': token->type = SCC_TOK_R_BRACE; break;
|
||||
case ';': token->type = SCC_TOK_SEMICOLON; break;
|
||||
case ',': token->type = SCC_TOK_COMMA; break;
|
||||
case ':': token->type = SCC_TOK_COLON; break;
|
||||
/* clang-format on */
|
||||
case '.':
|
||||
if (next.character == '.' && peek_char(lexer, &next) &&
|
||||
next.character == '.') {
|
||||
token->type = SCC_TOK_ELLIPSIS;
|
||||
next_char(lexer, &lex, &cur);
|
||||
next_char(lexer, &lex, &cur);
|
||||
} else {
|
||||
token->type = SCC_TOK_DOT;
|
||||
}
|
||||
break;
|
||||
case '?':
|
||||
token->type = SCC_TOK_COND;
|
||||
break;
|
||||
case '#':
|
||||
token->type = SCC_TOK_SHARP;
|
||||
break;
|
||||
default:
|
||||
token->type = SCC_TOK_UNKNOWN;
|
||||
LEX_ERROR("unsupport char in sourse code `%c`:0x%x", ch, ch);
|
||||
break;
|
||||
}
|
||||
|
||||
goto END;
|
||||
}
|
||||
/* clang-format off */
|
||||
case '0': case '1': case '2': case '3': case '4':
|
||||
case '5': case '6': case '7': case '8': case '9':
|
||||
/* clang-format on */
|
||||
token->loc = lexer->pos;
|
||||
token->type = SCC_TOK_INT_LITERAL;
|
||||
usize output;
|
||||
scc_probe_stream_reset(stream);
|
||||
if (scc_lex_parse_number(stream, &lexer->pos, &output) == true) {
|
||||
scc_probe_stream_sync(stream);
|
||||
token->value.u = output;
|
||||
} else {
|
||||
LEX_ERROR("Unexpected number literal");
|
||||
token->type = SCC_TOK_UNKNOWN;
|
||||
}
|
||||
goto END;
|
||||
/* clang-format off */
|
||||
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
|
||||
case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
|
||||
case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
|
||||
case 'v': case 'w': case 'x': case 'y': case 'z':
|
||||
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
|
||||
case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
|
||||
case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
|
||||
case 'V': case 'W': case 'X': case 'Y': case 'Z': case '_':
|
||||
/* clang-format on */
|
||||
scc_cstring_t str = scc_cstring_create();
|
||||
scc_probe_stream_reset(stream);
|
||||
cbool ret = scc_lex_parse_identifier(stream, &lexer->pos, &str);
|
||||
scc_probe_stream_sync(stream);
|
||||
Assert(ret == true);
|
||||
|
||||
int res = keyword_cmp(scc_cstring_as_cstr(&str), scc_cstring_len(&str));
|
||||
if (res == -1) {
|
||||
token->value.cstr.data = (char *)scc_cstring_as_cstr(&str);
|
||||
token->value.cstr.len = scc_cstring_len(&str);
|
||||
type = SCC_TOK_IDENT;
|
||||
} else {
|
||||
scc_cstring_free(&str);
|
||||
type = keywords[res].tok;
|
||||
}
|
||||
token->type = type;
|
||||
goto END;
|
||||
default:
|
||||
LEX_ERROR("unsupport char in sourse code `%c`:0x%x", ch, ch);
|
||||
break;
|
||||
}
|
||||
goto once_char;
|
||||
triple_char:
|
||||
scc_probe_stream_consume(stream);
|
||||
scc_pos_next(&lexer->pos);
|
||||
double_char:
|
||||
scc_probe_stream_consume(stream);
|
||||
scc_pos_next(&lexer->pos);
|
||||
once_char:
|
||||
scc_probe_stream_consume(stream);
|
||||
scc_pos_next(&lexer->pos);
|
||||
token->type = type;
|
||||
END:
|
||||
LEX_DEBUG("get token `%s` in %s:%d:%d", scc_get_tok_name(token->type),
|
||||
token->loc.name, token->loc.line, token->loc.column);
|
||||
// 设置token
|
||||
scc_ring_consume(lexer->stream_ref);
|
||||
token->type = token->type; // 上面已设
|
||||
token->loc = start_loc;
|
||||
token->lexeme = lex; // 转移所有权
|
||||
LEX_DEBUG("get token `%s` (%s) at %s:%d:%d", scc_get_tok_name(token->type),
|
||||
scc_cstring_as_cstr(&token->lexeme), token->loc.name,
|
||||
token->loc.line, token->loc.col);
|
||||
}
|
||||
|
||||
// scc_lexer_get_token maybe got invalid (with parser)
|
||||
void scc_lexer_get_valid_token(scc_lexer_t *lexer, scc_lexer_tok_t *token) {
|
||||
scc_tok_subtype_t type;
|
||||
scc_tok_subtype_t subtype;
|
||||
do {
|
||||
scc_lexer_get_token(lexer, token);
|
||||
type = scc_get_tok_subtype(token->type);
|
||||
AssertFmt(type != SCC_TOK_SUBTYPE_INVALID,
|
||||
subtype = scc_get_tok_subtype(token->type);
|
||||
AssertFmt(subtype != SCC_TOK_SUBTYPE_INVALID,
|
||||
"Invalid token: `%s` at %s:%d:%d",
|
||||
scc_get_tok_name(token->type), token->loc.name,
|
||||
token->loc.line, token->loc.col);
|
||||
Assert(type != SCC_TOK_SUBTYPE_INVALID);
|
||||
} while (type == SCC_TOK_SUBTYPE_EMPTYSPACE ||
|
||||
type == SCC_TOK_SUBTYPE_COMMENT);
|
||||
} while (subtype == SCC_TOK_SUBTYPE_EMPTYSPACE ||
|
||||
subtype == SCC_TOK_SUBTYPE_COMMENT);
|
||||
}
|
||||
|
||||
66
libs/lexer/src/main.c
Normal file
66
libs/lexer/src/main.c
Normal file
@@ -0,0 +1,66 @@
|
||||
#include <lexer.h>
|
||||
#include <lexer_log.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
/// gcc -g ../lexer.c ../token.c test_lexer.c -o test_lexer
|
||||
/*
|
||||
tok_tConstant {
|
||||
int have;
|
||||
union {
|
||||
char ch;
|
||||
int i;
|
||||
float f;
|
||||
double d;
|
||||
long long ll;
|
||||
char* str;
|
||||
};
|
||||
};
|
||||
*/
|
||||
|
||||
int g_num;
|
||||
int g_num_arr[3];
|
||||
int main(int argc, char *argv[]) {
|
||||
// int num = 0;
|
||||
if (argc == 3 && strcmp(argv[2], "--debug") == 0) {
|
||||
log_set_level(NULL, LOG_LEVEL_ALL);
|
||||
} else {
|
||||
// FIXME it is a hack lexer_logger
|
||||
log_set_level(&__scc_lexer_log, LOG_LEVEL_NOTSET);
|
||||
log_set_level(NULL, LOG_LEVEL_INFO | LOG_LEVEL_WARN | LOG_LEVEL_ERROR |
|
||||
LOG_LEVEL_FATAL);
|
||||
}
|
||||
|
||||
const char *file_name = __FILE__;
|
||||
if (argc == 2) {
|
||||
file_name = argv[1];
|
||||
}
|
||||
|
||||
scc_lexer_t lexer;
|
||||
scc_sstream_t stream;
|
||||
scc_sstream_init(&stream, file_name, 16);
|
||||
scc_sstream_ring_t *ref = scc_sstream_ref_ring(&stream);
|
||||
scc_lexer_init(&lexer, ref);
|
||||
scc_lexer_tok_t token;
|
||||
|
||||
while (1) {
|
||||
scc_lexer_get_valid_token(&lexer, &token);
|
||||
if (token.type == SCC_TOK_EOF) {
|
||||
break;
|
||||
}
|
||||
LOG_DEBUG("get token [%-8s] `%s` at %s:%d:%d",
|
||||
scc_get_tok_name(token.type),
|
||||
scc_cstring_as_cstr(&token.lexeme), token.loc.name,
|
||||
token.loc.line, token.loc.col);
|
||||
// LOG_DEBUG("%s", token.val.str);
|
||||
// printf("line: %d, column: %d, type: %3d, typename: %s\n",
|
||||
// lexer.line, lexer.index, token.type,
|
||||
// scc_get_tok_name(token.type));
|
||||
}
|
||||
scc_sstream_drop_ring(ref);
|
||||
scc_sstream_drop(&stream);
|
||||
|
||||
LOG_INFO("Lexer is Ok...");
|
||||
return 0;
|
||||
}
|
||||
403
libs/lexer/tests/test_lexer.c
Normal file
403
libs/lexer/tests/test_lexer.c
Normal file
@@ -0,0 +1,403 @@
|
||||
// test_lexer.c
|
||||
#include <lexer.h>
|
||||
#include <string.h>
|
||||
#include <utest/acutest.h>
|
||||
|
||||
// 辅助函数:释放 token 的 lexeme
|
||||
static void free_token(scc_lexer_tok_t *tok) { scc_cstring_free(&tok->lexeme); }
|
||||
|
||||
// 单 token 测试宏(检查类型)
|
||||
#define TEST_TOKEN(input, expected_type) \
|
||||
do { \
|
||||
scc_lexer_t lexer; \
|
||||
scc_lexer_tok_t token; \
|
||||
scc_sstream_t stream; \
|
||||
scc_sstream_init_by_buffer(&stream, input, strlen(input), 0, 16); \
|
||||
scc_sstream_ring_t *ref = scc_sstream_ref_ring(&stream); \
|
||||
scc_lexer_init(&lexer, ref); \
|
||||
scc_lexer_get_token(&lexer, &token); \
|
||||
\
|
||||
TEST_CHECK(token.type == expected_type); \
|
||||
TEST_MSG("Input: '%s'", input); \
|
||||
TEST_MSG("Expected: %s", scc_get_tok_name(expected_type)); \
|
||||
TEST_MSG("Got: %s", scc_get_tok_name(token.type)); \
|
||||
\
|
||||
free_token(&token); \
|
||||
scc_sstream_drop_ring(ref); \
|
||||
scc_sstream_drop(&stream); \
|
||||
} while (0)
|
||||
|
||||
// 多 token 序列测试宏(接受类型数组)
|
||||
#define TEST_SEQUENCE(input, ...) \
|
||||
do { \
|
||||
scc_lexer_t lexer; \
|
||||
scc_lexer_tok_t token; \
|
||||
scc_sstream_t stream; \
|
||||
scc_sstream_init_by_buffer(&stream, input, strlen(input), 0, 16); \
|
||||
scc_sstream_ring_t *ref = scc_sstream_ref_ring(&stream); \
|
||||
scc_lexer_init(&lexer, ref); \
|
||||
\
|
||||
scc_tok_type_t expected[] = {__VA_ARGS__}; \
|
||||
size_t count = sizeof(expected) / sizeof(expected[0]); \
|
||||
for (size_t i = 0; i < count; i++) { \
|
||||
scc_lexer_get_token(&lexer, &token); \
|
||||
TEST_CHECK(token.type == expected[i]); \
|
||||
TEST_MSG("Token %zu: input '%s'", i, input); \
|
||||
TEST_MSG("Expected: %s", scc_get_tok_name(expected[i])); \
|
||||
TEST_MSG("Got: %s", scc_get_tok_name(token.type)); \
|
||||
free_token(&token); \
|
||||
} \
|
||||
\
|
||||
scc_sstream_drop_ring(ref); \
|
||||
scc_sstream_drop(&stream); \
|
||||
} while (0)
|
||||
|
||||
// ============================ 测试用例 ============================
|
||||
|
||||
void test_operators() {
|
||||
TEST_CASE("Arithmetic operators");
|
||||
TEST_TOKEN("+", SCC_TOK_ADD);
|
||||
TEST_TOKEN("++", SCC_TOK_ADD_ADD);
|
||||
TEST_TOKEN("+=", SCC_TOK_ASSIGN_ADD);
|
||||
TEST_TOKEN("-", SCC_TOK_SUB);
|
||||
TEST_TOKEN("--", SCC_TOK_SUB_SUB);
|
||||
TEST_TOKEN("-=", SCC_TOK_ASSIGN_SUB);
|
||||
TEST_TOKEN("*", SCC_TOK_MUL);
|
||||
TEST_TOKEN("*=", SCC_TOK_ASSIGN_MUL);
|
||||
TEST_TOKEN("/", SCC_TOK_DIV);
|
||||
TEST_TOKEN("/=", SCC_TOK_ASSIGN_DIV);
|
||||
TEST_TOKEN("%", SCC_TOK_MOD);
|
||||
TEST_TOKEN("%=", SCC_TOK_ASSIGN_MOD);
|
||||
|
||||
TEST_CASE("Bitwise operators");
|
||||
TEST_TOKEN("&", SCC_TOK_AND);
|
||||
TEST_TOKEN("&&", SCC_TOK_AND_AND);
|
||||
TEST_TOKEN("&=", SCC_TOK_ASSIGN_AND);
|
||||
TEST_TOKEN("|", SCC_TOK_OR);
|
||||
TEST_TOKEN("||", SCC_TOK_OR_OR);
|
||||
TEST_TOKEN("|=", SCC_TOK_ASSIGN_OR);
|
||||
TEST_TOKEN("^", SCC_TOK_XOR);
|
||||
TEST_TOKEN("^=", SCC_TOK_ASSIGN_XOR);
|
||||
TEST_TOKEN("~", SCC_TOK_BIT_NOT);
|
||||
TEST_TOKEN("<<", SCC_TOK_L_SH);
|
||||
TEST_TOKEN("<<=", SCC_TOK_ASSIGN_L_SH);
|
||||
TEST_TOKEN(">>", SCC_TOK_R_SH);
|
||||
TEST_TOKEN(">>=", SCC_TOK_ASSIGN_R_SH);
|
||||
|
||||
TEST_CASE("Comparison operators");
|
||||
TEST_TOKEN("==", SCC_TOK_EQ);
|
||||
TEST_TOKEN("!=", SCC_TOK_NEQ);
|
||||
TEST_TOKEN("<", SCC_TOK_LT);
|
||||
TEST_TOKEN("<=", SCC_TOK_LE);
|
||||
TEST_TOKEN(">", SCC_TOK_GT);
|
||||
TEST_TOKEN(">=", SCC_TOK_GE);
|
||||
|
||||
TEST_CASE("Special symbols");
|
||||
TEST_TOKEN("(", SCC_TOK_L_PAREN);
|
||||
TEST_TOKEN(")", SCC_TOK_R_PAREN);
|
||||
TEST_TOKEN("[", SCC_TOK_L_BRACKET);
|
||||
TEST_TOKEN("]", SCC_TOK_R_BRACKET);
|
||||
TEST_TOKEN("{", SCC_TOK_L_BRACE);
|
||||
TEST_TOKEN("}", SCC_TOK_R_BRACE);
|
||||
TEST_TOKEN(";", SCC_TOK_SEMICOLON);
|
||||
TEST_TOKEN(",", SCC_TOK_COMMA);
|
||||
TEST_TOKEN(":", SCC_TOK_COLON);
|
||||
TEST_TOKEN(".", SCC_TOK_DOT);
|
||||
TEST_TOKEN("...", SCC_TOK_ELLIPSIS);
|
||||
TEST_TOKEN("->", SCC_TOK_DEREF);
|
||||
TEST_TOKEN("?", SCC_TOK_COND);
|
||||
}
|
||||
|
||||
void test_keywords() {
|
||||
TEST_CASE("C89 keywords");
|
||||
TEST_TOKEN("while", SCC_TOK_WHILE);
|
||||
TEST_TOKEN("sizeof", SCC_TOK_SIZEOF);
|
||||
TEST_TOKEN("if", SCC_TOK_IF);
|
||||
TEST_TOKEN("else", SCC_TOK_ELSE);
|
||||
TEST_TOKEN("for", SCC_TOK_FOR);
|
||||
TEST_TOKEN("do", SCC_TOK_DO);
|
||||
TEST_TOKEN("switch", SCC_TOK_SWITCH);
|
||||
TEST_TOKEN("case", SCC_TOK_CASE);
|
||||
TEST_TOKEN("default", SCC_TOK_DEFAULT);
|
||||
TEST_TOKEN("break", SCC_TOK_BREAK);
|
||||
TEST_TOKEN("continue", SCC_TOK_CONTINUE);
|
||||
TEST_TOKEN("return", SCC_TOK_RETURN);
|
||||
TEST_TOKEN("goto", SCC_TOK_GOTO);
|
||||
TEST_TOKEN("auto", SCC_TOK_AUTO);
|
||||
TEST_TOKEN("register", SCC_TOK_REGISTER);
|
||||
TEST_TOKEN("static", SCC_TOK_STATIC);
|
||||
TEST_TOKEN("extern", SCC_TOK_EXTERN);
|
||||
TEST_TOKEN("typedef", SCC_TOK_TYPEDEF);
|
||||
TEST_TOKEN("const", SCC_TOK_CONST);
|
||||
TEST_TOKEN("volatile", SCC_TOK_VOLATILE);
|
||||
TEST_TOKEN("signed", SCC_TOK_SIGNED);
|
||||
TEST_TOKEN("unsigned", SCC_TOK_UNSIGNED);
|
||||
TEST_TOKEN("short", SCC_TOK_SHORT);
|
||||
TEST_TOKEN("long", SCC_TOK_LONG);
|
||||
TEST_TOKEN("int", SCC_TOK_INT);
|
||||
TEST_TOKEN("char", SCC_TOK_CHAR);
|
||||
TEST_TOKEN("float", SCC_TOK_FLOAT);
|
||||
TEST_TOKEN("double", SCC_TOK_DOUBLE);
|
||||
TEST_TOKEN("void", SCC_TOK_VOID);
|
||||
TEST_TOKEN("struct", SCC_TOK_STRUCT);
|
||||
TEST_TOKEN("union", SCC_TOK_UNION);
|
||||
TEST_TOKEN("enum", SCC_TOK_ENUM);
|
||||
|
||||
TEST_CASE("C99 keywords");
|
||||
TEST_TOKEN("inline", SCC_TOK_INLINE);
|
||||
TEST_TOKEN("restrict", SCC_TOK_RESTRICT);
|
||||
// _Bool, _Complex, _Imaginary 可根据需要添加
|
||||
|
||||
TEST_CASE("SCC extensions (if enabled)");
|
||||
TEST_TOKEN("asm", SCC_TOK_ASM);
|
||||
TEST_TOKEN("atomic", SCC_TOK_ATOMIC);
|
||||
TEST_TOKEN("bool", SCC_TOK_BOOL);
|
||||
TEST_TOKEN("complex", SCC_TOK_COMPLEX);
|
||||
}
|
||||
|
||||
void test_literals() {
|
||||
TEST_CASE("Integer literals - decimal");
|
||||
TEST_TOKEN("0", SCC_TOK_INT_LITERAL);
|
||||
TEST_TOKEN("123", SCC_TOK_INT_LITERAL);
|
||||
TEST_TOKEN("2147483647", SCC_TOK_INT_LITERAL);
|
||||
TEST_TOKEN("4294967295", SCC_TOK_INT_LITERAL);
|
||||
|
||||
TEST_CASE("Integer literals - hexadecimal");
|
||||
TEST_TOKEN("0x0", SCC_TOK_INT_LITERAL);
|
||||
TEST_TOKEN("0x1A3F", SCC_TOK_INT_LITERAL);
|
||||
TEST_TOKEN("0XABCDEF", SCC_TOK_INT_LITERAL);
|
||||
TEST_TOKEN("0x123abc", SCC_TOK_INT_LITERAL);
|
||||
TEST_TOKEN("0XFF", SCC_TOK_INT_LITERAL);
|
||||
|
||||
TEST_CASE("Integer literals - octal");
|
||||
TEST_TOKEN("0123", SCC_TOK_INT_LITERAL);
|
||||
TEST_TOKEN("0777", SCC_TOK_INT_LITERAL);
|
||||
TEST_TOKEN("0", SCC_TOK_INT_LITERAL); // 0 既是十进制也是八进制
|
||||
|
||||
TEST_CASE("Integer literals - binary (C23 extension)");
|
||||
TEST_TOKEN("0b1010", SCC_TOK_INT_LITERAL);
|
||||
TEST_TOKEN("0B1100", SCC_TOK_INT_LITERAL);
|
||||
TEST_TOKEN("0b0", SCC_TOK_INT_LITERAL);
|
||||
|
||||
TEST_CASE("Integer literals with suffixes");
|
||||
TEST_TOKEN("123U", SCC_TOK_INT_LITERAL);
|
||||
TEST_TOKEN("456L", SCC_TOK_INT_LITERAL);
|
||||
TEST_TOKEN("789UL", SCC_TOK_INT_LITERAL);
|
||||
TEST_TOKEN("0x1FFLL", SCC_TOK_INT_LITERAL);
|
||||
TEST_TOKEN("0b1010ULL", SCC_TOK_INT_LITERAL);
|
||||
|
||||
TEST_CASE("Floating literals - decimal");
|
||||
TEST_TOKEN("0.0", SCC_TOK_FLOAT_LITERAL);
|
||||
TEST_TOKEN("3.14", SCC_TOK_FLOAT_LITERAL);
|
||||
TEST_TOKEN(".5", SCC_TOK_FLOAT_LITERAL);
|
||||
TEST_TOKEN("0.", SCC_TOK_FLOAT_LITERAL);
|
||||
|
||||
TEST_CASE("Floating literals - scientific");
|
||||
TEST_TOKEN("1e10", SCC_TOK_FLOAT_LITERAL);
|
||||
TEST_TOKEN("1E-5", SCC_TOK_FLOAT_LITERAL);
|
||||
TEST_TOKEN("2.5e+3", SCC_TOK_FLOAT_LITERAL);
|
||||
TEST_TOKEN(".1e2", SCC_TOK_FLOAT_LITERAL);
|
||||
TEST_TOKEN("1.e3", SCC_TOK_FLOAT_LITERAL);
|
||||
TEST_TOKEN("123.456e-7", SCC_TOK_FLOAT_LITERAL);
|
||||
|
||||
TEST_CASE("Floating literals - hexadecimal (C99)");
|
||||
TEST_TOKEN("0x1.2p3", SCC_TOK_FLOAT_LITERAL);
|
||||
TEST_TOKEN("0x1p-2", SCC_TOK_FLOAT_LITERAL);
|
||||
TEST_TOKEN("0x0.1p10", SCC_TOK_FLOAT_LITERAL);
|
||||
TEST_TOKEN("0X1.2P3", SCC_TOK_FLOAT_LITERAL);
|
||||
|
||||
TEST_CASE("Floating literals with suffixes");
|
||||
TEST_TOKEN("1.0f", SCC_TOK_FLOAT_LITERAL);
|
||||
TEST_TOKEN("2.0F", SCC_TOK_FLOAT_LITERAL);
|
||||
TEST_TOKEN("3.0l", SCC_TOK_FLOAT_LITERAL);
|
||||
TEST_TOKEN("4.0L", SCC_TOK_FLOAT_LITERAL);
|
||||
|
||||
TEST_CASE("Character literals - simple");
|
||||
TEST_TOKEN("'a'", SCC_TOK_CHAR_LITERAL);
|
||||
TEST_TOKEN("'0'", SCC_TOK_CHAR_LITERAL);
|
||||
TEST_TOKEN("' '", SCC_TOK_CHAR_LITERAL);
|
||||
TEST_TOKEN("'\t'", SCC_TOK_CHAR_LITERAL); // 制表符在单引号内
|
||||
|
||||
TEST_CASE("Character literals - escape sequences");
|
||||
TEST_TOKEN("'\\n'", SCC_TOK_CHAR_LITERAL);
|
||||
TEST_TOKEN("'\\t'", SCC_TOK_CHAR_LITERAL);
|
||||
TEST_TOKEN("'\\\\'", SCC_TOK_CHAR_LITERAL);
|
||||
TEST_TOKEN("'\\''", SCC_TOK_CHAR_LITERAL);
|
||||
TEST_TOKEN("'\\\"'", SCC_TOK_CHAR_LITERAL);
|
||||
TEST_TOKEN("'\\?'", SCC_TOK_CHAR_LITERAL);
|
||||
TEST_TOKEN("'\\0'", SCC_TOK_CHAR_LITERAL);
|
||||
TEST_TOKEN("'\\123'", SCC_TOK_CHAR_LITERAL); // 八进制
|
||||
TEST_TOKEN("'\\xAB'", SCC_TOK_CHAR_LITERAL); // 十六进制
|
||||
|
||||
TEST_CASE("Character literals - multi-byte (implementation defined)");
|
||||
TEST_TOKEN("'ab'", SCC_TOK_CHAR_LITERAL);
|
||||
TEST_TOKEN("'\\x41\\x42'", SCC_TOK_CHAR_LITERAL); // 多个转义
|
||||
|
||||
TEST_CASE("String literals - basic");
|
||||
TEST_TOKEN("\"hello\"", SCC_TOK_STRING_LITERAL);
|
||||
TEST_TOKEN("\"\"", SCC_TOK_STRING_LITERAL);
|
||||
TEST_TOKEN("\"a b c\"", SCC_TOK_STRING_LITERAL);
|
||||
|
||||
TEST_CASE("String literals - escape sequences");
|
||||
TEST_TOKEN("\"a\\nb\\tc\"", SCC_TOK_STRING_LITERAL);
|
||||
TEST_TOKEN("\"\\\\ \\\" \\' \\?\"", SCC_TOK_STRING_LITERAL);
|
||||
TEST_TOKEN("\"\\123\\xAB\"", SCC_TOK_STRING_LITERAL);
|
||||
|
||||
TEST_CASE("String literals - wide and UTF-8 prefixes (C11)");
|
||||
TEST_TOKEN("L\"wide\"", SCC_TOK_STRING_LITERAL);
|
||||
TEST_TOKEN("u\"utf16\"", SCC_TOK_STRING_LITERAL);
|
||||
TEST_TOKEN("U\"utf32\"", SCC_TOK_STRING_LITERAL);
|
||||
TEST_TOKEN("u8\"utf8\"", SCC_TOK_STRING_LITERAL);
|
||||
}
|
||||
|
||||
void test_whitespace() {
|
||||
TEST_CASE("Whitespace characters");
|
||||
TEST_TOKEN(" ", SCC_TOK_BLANK);
|
||||
TEST_TOKEN("\t", SCC_TOK_BLANK);
|
||||
TEST_TOKEN("\v", SCC_TOK_BLANK);
|
||||
TEST_TOKEN("\f", SCC_TOK_BLANK);
|
||||
TEST_TOKEN(" \t\v\f", SCC_TOK_BLANK); // 连续空白应为一个 token
|
||||
}
|
||||
|
||||
void test_newlines() {
|
||||
TEST_CASE("Newline characters");
|
||||
TEST_TOKEN("\n", SCC_TOK_ENDLINE);
|
||||
TEST_TOKEN("\r", SCC_TOK_ENDLINE);
|
||||
TEST_TOKEN("\r\n", SCC_TOK_ENDLINE); // 应视为单个换行符
|
||||
}
|
||||
|
||||
void test_comments() {
|
||||
TEST_CASE("Line comments");
|
||||
TEST_TOKEN("// single line comment", SCC_TOK_LINE_COMMENT);
|
||||
TEST_TOKEN("// comment with // inside", SCC_TOK_LINE_COMMENT);
|
||||
TEST_TOKEN("// comment at end", SCC_TOK_LINE_COMMENT);
|
||||
|
||||
TEST_CASE("Block comments");
|
||||
TEST_TOKEN("/* simple */", SCC_TOK_BLOCK_COMMENT);
|
||||
TEST_TOKEN("/* multi\nline */", SCC_TOK_BLOCK_COMMENT);
|
||||
TEST_TOKEN("/**/", SCC_TOK_BLOCK_COMMENT); // 空注释
|
||||
TEST_TOKEN("/* with * inside */", SCC_TOK_BLOCK_COMMENT);
|
||||
TEST_TOKEN("/* nested /* not allowed in C */",
|
||||
SCC_TOK_BLOCK_COMMENT); // 词法上不会嵌套
|
||||
}
|
||||
|
||||
void test_identifiers() {
|
||||
TEST_CASE("Valid identifiers");
|
||||
TEST_TOKEN("foo", SCC_TOK_IDENT);
|
||||
TEST_TOKEN("_foo", SCC_TOK_IDENT);
|
||||
TEST_TOKEN("foo123", SCC_TOK_IDENT);
|
||||
TEST_TOKEN("foo_bar", SCC_TOK_IDENT);
|
||||
TEST_TOKEN("FOO", SCC_TOK_IDENT);
|
||||
TEST_TOKEN("_", SCC_TOK_IDENT);
|
||||
TEST_TOKEN("__LINE__", SCC_TOK_IDENT); // 预处理宏名也是标识符
|
||||
|
||||
// 超长标识符(假设缓冲区足够)
|
||||
char long_id[1024];
|
||||
memset(long_id, 'a', sizeof(long_id) - 1);
|
||||
long_id[sizeof(long_id) - 1] = '\0';
|
||||
TEST_TOKEN(long_id, SCC_TOK_IDENT);
|
||||
}
|
||||
|
||||
void test_preprocessor() {
|
||||
TEST_CASE("Preprocessor directives - just the # token");
|
||||
TEST_TOKEN("#", SCC_TOK_SHARP);
|
||||
TEST_TOKEN("##", SCC_TOK_SHARP); // 第一个 # 是 token,第二个 # 将是下一个
|
||||
// token(在序列测试中验证)
|
||||
|
||||
// 多 token 序列测试 #include 等
|
||||
TEST_SEQUENCE("#include <stdio.h>", SCC_TOK_SHARP, SCC_TOK_IDENT,
|
||||
SCC_TOK_BLANK, SCC_TOK_LT, SCC_TOK_IDENT, SCC_TOK_DOT,
|
||||
SCC_TOK_IDENT, SCC_TOK_GT);
|
||||
TEST_SEQUENCE("#define FOO 123", SCC_TOK_SHARP, SCC_TOK_IDENT,
|
||||
SCC_TOK_BLANK, SCC_TOK_IDENT, SCC_TOK_BLANK,
|
||||
SCC_TOK_INT_LITERAL);
|
||||
}
|
||||
|
||||
void test_edge_cases() {
|
||||
TEST_CASE("Invalid characters");
|
||||
TEST_TOKEN("@", SCC_TOK_UNKNOWN);
|
||||
TEST_TOKEN("`", SCC_TOK_UNKNOWN);
|
||||
TEST_TOKEN("$", SCC_TOK_UNKNOWN); // 在 C 中不是标识符字符
|
||||
|
||||
TEST_CASE("Empty input");
|
||||
TEST_TOKEN("", SCC_TOK_EOF); // 立即 EOF
|
||||
|
||||
TEST_CASE("Only whitespace");
|
||||
TEST_TOKEN(" \t", SCC_TOK_BLANK);
|
||||
// 之后应该为 EOF,但我们的单 token 测试只取第一个 token
|
||||
|
||||
TEST_CASE("Numbers followed by letters (no suffix)");
|
||||
// 词法上应拆分为数字和标识符
|
||||
TEST_SEQUENCE("123abc", SCC_TOK_INT_LITERAL, SCC_TOK_IDENT);
|
||||
TEST_SEQUENCE("0x123xyz", SCC_TOK_INT_LITERAL, SCC_TOK_IDENT);
|
||||
}
|
||||
|
||||
void test_sequences() {
|
||||
TEST_CASE("Simple expression");
|
||||
TEST_SEQUENCE("a + b * c", SCC_TOK_IDENT, SCC_TOK_BLANK, SCC_TOK_ADD,
|
||||
SCC_TOK_BLANK, SCC_TOK_IDENT, SCC_TOK_BLANK, SCC_TOK_MUL,
|
||||
SCC_TOK_BLANK, SCC_TOK_IDENT);
|
||||
|
||||
TEST_CASE("Function call");
|
||||
TEST_SEQUENCE("func(1, 2);", SCC_TOK_IDENT, SCC_TOK_L_PAREN,
|
||||
SCC_TOK_INT_LITERAL, SCC_TOK_COMMA, SCC_TOK_BLANK,
|
||||
SCC_TOK_INT_LITERAL, SCC_TOK_R_PAREN, SCC_TOK_SEMICOLON);
|
||||
|
||||
TEST_CASE("Multi-character operators");
|
||||
TEST_SEQUENCE(">>=", SCC_TOK_ASSIGN_R_SH);
|
||||
TEST_SEQUENCE("<<=", SCC_TOK_ASSIGN_L_SH);
|
||||
TEST_SEQUENCE("...", SCC_TOK_ELLIPSIS);
|
||||
TEST_SEQUENCE("->", SCC_TOK_DEREF);
|
||||
TEST_SEQUENCE("##", SCC_TOK_SHARP, SCC_TOK_SHARP); // 两个预处理记号
|
||||
|
||||
TEST_CASE("Comments and whitespace interleaved");
|
||||
TEST_SEQUENCE("/* comment */ a // line comment\n b", SCC_TOK_BLOCK_COMMENT,
|
||||
SCC_TOK_BLANK, SCC_TOK_IDENT, SCC_TOK_BLANK,
|
||||
SCC_TOK_LINE_COMMENT, SCC_TOK_ENDLINE, SCC_TOK_BLANK,
|
||||
SCC_TOK_IDENT);
|
||||
|
||||
TEST_CASE("String literals with escapes");
|
||||
TEST_SEQUENCE("\"hello\\nworld\"", SCC_TOK_STRING_LITERAL);
|
||||
TEST_SEQUENCE(
|
||||
"L\"wide\"",
|
||||
SCC_TOK_STRING_LITERAL); // 前缀作为标识符?不,整个是字符串字面量
|
||||
|
||||
TEST_CASE("Character literals with escapes");
|
||||
TEST_SEQUENCE("'\\x41'", SCC_TOK_CHAR_LITERAL);
|
||||
TEST_SEQUENCE("'\\123'", SCC_TOK_CHAR_LITERAL);
|
||||
}
|
||||
|
||||
void test_error_recovery() {
|
||||
// 测试未闭合的字符字面量:词法分析器可能继续直到遇到换行或 EOF
|
||||
// 这里假设它会产生一个 SCC_TOK_CHAR_LITERAL 但包含到结束
|
||||
// 但标准 C 中未闭合是错误,我们可能返回 UNKNOWN
|
||||
TEST_CASE("Unterminated character literal");
|
||||
TEST_TOKEN("'a", SCC_TOK_UNKNOWN); // 取决于实现,可能为 CHAR_LITERAL
|
||||
// 更可靠的测试:序列中下一个 token 是什么
|
||||
TEST_SEQUENCE("'a b", SCC_TOK_UNKNOWN,
|
||||
SCC_TOK_IDENT); // 假设第一个 token 是错误
|
||||
|
||||
TEST_CASE("Unterminated string literal");
|
||||
TEST_TOKEN("\"hello", SCC_TOK_UNKNOWN); // 同样
|
||||
|
||||
TEST_CASE("Unterminated block comment");
|
||||
TEST_SEQUENCE("/* comment",
|
||||
SCC_TOK_BLOCK_COMMENT); // 直到 EOF,可能仍为注释
|
||||
}
|
||||
|
||||
// ============================ 主测试列表 ============================
|
||||
|
||||
TEST_LIST = {
|
||||
{"operators", test_operators},
|
||||
{"keywords", test_keywords},
|
||||
{"literals", test_literals},
|
||||
{"whitespace", test_whitespace},
|
||||
{"newlines", test_newlines},
|
||||
{"comments", test_comments},
|
||||
{"identifiers", test_identifiers},
|
||||
{"preprocessor", test_preprocessor},
|
||||
{"edge_cases", test_edge_cases},
|
||||
{"sequences", test_sequences},
|
||||
{"error_recovery", test_error_recovery},
|
||||
{NULL, NULL},
|
||||
};
|
||||
@@ -1,170 +0,0 @@
|
||||
// test_lexer.c
|
||||
#include <lexer.h>
|
||||
#include <string.h>
|
||||
#include <utest/acutest.h>
|
||||
|
||||
// 测试辅助函数
|
||||
static inline void test_lexer_string(const char *input,
|
||||
scc_tok_type_t expected_type) {
|
||||
scc_lexer_t lexer;
|
||||
scc_lexer_tok_t token;
|
||||
scc_mem_probe_stream_t stream;
|
||||
|
||||
scc_lexer_init(&lexer, scc_mem_probe_stream_init(&stream, input,
|
||||
strlen(input), false));
|
||||
scc_lexer_get_token(&lexer, &token);
|
||||
|
||||
TEST_CHECK(token.type == expected_type);
|
||||
TEST_MSG("Expected: %s", scc_get_tok_name(expected_type));
|
||||
TEST_MSG("Got: %s", scc_get_tok_name(token.type));
|
||||
}
|
||||
|
||||
// 基础运算符测试
|
||||
void test_operators() {
|
||||
TEST_CASE("Arithmetic operators");
|
||||
{
|
||||
test_lexer_string("+", SCC_TOK_ADD);
|
||||
test_lexer_string("++", SCC_TOK_ADD_ADD);
|
||||
test_lexer_string("+=", SCC_TOK_ASSIGN_ADD);
|
||||
test_lexer_string("-", SCC_TOK_SUB);
|
||||
test_lexer_string("--", SCC_TOK_SUB_SUB);
|
||||
test_lexer_string("-=", SCC_TOK_ASSIGN_SUB);
|
||||
test_lexer_string("*", SCC_TOK_MUL);
|
||||
test_lexer_string("*=", SCC_TOK_ASSIGN_MUL);
|
||||
test_lexer_string("/", SCC_TOK_DIV);
|
||||
test_lexer_string("/=", SCC_TOK_ASSIGN_DIV);
|
||||
test_lexer_string("%", SCC_TOK_MOD);
|
||||
test_lexer_string("%=", SCC_TOK_ASSIGN_MOD);
|
||||
}
|
||||
|
||||
TEST_CASE("Bitwise operators");
|
||||
{
|
||||
test_lexer_string("&", SCC_TOK_AND);
|
||||
test_lexer_string("&&", SCC_TOK_AND_AND);
|
||||
test_lexer_string("&=", SCC_TOK_ASSIGN_AND);
|
||||
test_lexer_string("|", SCC_TOK_OR);
|
||||
test_lexer_string("||", SCC_TOK_OR_OR);
|
||||
test_lexer_string("|=", SCC_TOK_ASSIGN_OR);
|
||||
test_lexer_string("^", SCC_TOK_XOR);
|
||||
test_lexer_string("^=", SCC_TOK_ASSIGN_XOR);
|
||||
test_lexer_string("~", SCC_TOK_BIT_NOT);
|
||||
test_lexer_string("<<", SCC_TOK_L_SH);
|
||||
test_lexer_string("<<=", SCC_TOK_ASSIGN_L_SH);
|
||||
test_lexer_string(">>", SCC_TOK_R_SH);
|
||||
test_lexer_string(">>=", SCC_TOK_ASSIGN_R_SH);
|
||||
}
|
||||
|
||||
TEST_CASE("Comparison operators");
|
||||
{
|
||||
test_lexer_string("==", SCC_TOK_EQ);
|
||||
test_lexer_string("!=", SCC_TOK_NEQ);
|
||||
test_lexer_string("<", SCC_TOK_LT);
|
||||
test_lexer_string("<=", SCC_TOK_LE);
|
||||
test_lexer_string(">", SCC_TOK_GT);
|
||||
test_lexer_string(">=", SCC_TOK_GE);
|
||||
}
|
||||
|
||||
TEST_CASE("Special symbols");
|
||||
{
|
||||
test_lexer_string("(", SCC_TOK_L_PAREN);
|
||||
test_lexer_string(")", SCC_TOK_R_PAREN);
|
||||
test_lexer_string("[", SCC_TOK_L_BRACKET);
|
||||
test_lexer_string("]", SCC_TOK_R_BRACKET);
|
||||
test_lexer_string("{", SCC_TOK_L_BRACE);
|
||||
test_lexer_string("}", SCC_TOK_R_BRACE);
|
||||
test_lexer_string(";", SCC_TOK_SEMICOLON);
|
||||
test_lexer_string(",", SCC_TOK_COMMA);
|
||||
test_lexer_string(":", SCC_TOK_COLON);
|
||||
test_lexer_string(".", SCC_TOK_DOT);
|
||||
test_lexer_string("...", SCC_TOK_ELLIPSIS);
|
||||
test_lexer_string("->", SCC_TOK_DEREF);
|
||||
test_lexer_string("?", SCC_TOK_COND);
|
||||
}
|
||||
}
|
||||
|
||||
// 关键字测试
|
||||
void test_keywords() {
|
||||
TEST_CASE("C89 keywords");
|
||||
test_lexer_string("while", SCC_TOK_WHILE);
|
||||
test_lexer_string("sizeof", SCC_TOK_SIZEOF);
|
||||
|
||||
TEST_CASE("C99 keywords");
|
||||
test_lexer_string("restrict", SCC_TOK_RESTRICT);
|
||||
// test_lexer_string("_Bool", SCC_TOK_INT); // 需确认你的类型定义
|
||||
}
|
||||
|
||||
// 字面量测试
|
||||
void test_literals() {
|
||||
TEST_CASE("Integer literals");
|
||||
{
|
||||
// 十进制
|
||||
test_lexer_string("0", SCC_TOK_INT_LITERAL);
|
||||
test_lexer_string("123", SCC_TOK_INT_LITERAL);
|
||||
test_lexer_string("2147483647", SCC_TOK_INT_LITERAL);
|
||||
|
||||
// 十六进制
|
||||
test_lexer_string("0x0", SCC_TOK_INT_LITERAL);
|
||||
test_lexer_string("0x1A3F", SCC_TOK_INT_LITERAL);
|
||||
test_lexer_string("0XABCDEF", SCC_TOK_INT_LITERAL);
|
||||
|
||||
// 八进制
|
||||
test_lexer_string("0123", SCC_TOK_INT_LITERAL);
|
||||
test_lexer_string("0777", SCC_TOK_INT_LITERAL);
|
||||
|
||||
// 边界值测试
|
||||
test_lexer_string("2147483647", SCC_TOK_INT_LITERAL); // INT_MAX
|
||||
test_lexer_string("4294967295", SCC_TOK_INT_LITERAL); // UINT_MAX
|
||||
}
|
||||
|
||||
TEST_CASE("Character literals");
|
||||
{
|
||||
test_lexer_string("'a'", SCC_TOK_CHAR_LITERAL);
|
||||
test_lexer_string("'\\n'", SCC_TOK_CHAR_LITERAL);
|
||||
test_lexer_string("'\\t'", SCC_TOK_CHAR_LITERAL);
|
||||
test_lexer_string("'\\\\'", SCC_TOK_CHAR_LITERAL);
|
||||
test_lexer_string("'\\0'", SCC_TOK_CHAR_LITERAL);
|
||||
}
|
||||
|
||||
TEST_CASE("String literals");
|
||||
{
|
||||
test_lexer_string("\"hello\"", SCC_TOK_STRING_LITERAL);
|
||||
test_lexer_string("\"multi-line\\nstring\"", SCC_TOK_STRING_LITERAL);
|
||||
test_lexer_string("\"escape\\\"quote\"", SCC_TOK_STRING_LITERAL);
|
||||
}
|
||||
|
||||
// TEST_CASE("Floating literals");
|
||||
// test_lexer_string("3.14e-5", SCC_TOK_FLOAT_LITERAL);
|
||||
}
|
||||
|
||||
// 边界测试
|
||||
void test_edge_cases() {
|
||||
// TEST_CASE("Long identifiers");
|
||||
// char long_id[LEXER_MAX_ SCC_TOK_SIZE+2] = {0};
|
||||
// memset(long_id, 'a', LEXER_MAX_ SCC_TOK_SIZE+1);
|
||||
// test_lexer_string(long_id, SCC_TOK_IDENT);
|
||||
|
||||
// TEST_CASE("Buffer boundary");
|
||||
// char boundary[LEXER_BUFFER_SIZE*2] = {0};
|
||||
// memset(boundary, '+', LEXER_BUFFER_SIZE*2-1);
|
||||
// test_lexer_string(boundary, SCC_TOK_ADD);
|
||||
}
|
||||
|
||||
// 错误处理测试
|
||||
// void test_error_handling() {
|
||||
// TEST_CASE("Invalid characters");
|
||||
// cc_lexer_t lexer;
|
||||
// tok_t token;
|
||||
|
||||
// init_lexer(&lexer, "test.c", NULL, test_read);
|
||||
// get_valid_token(&lexer, &token);
|
||||
|
||||
// TEST_CHECK(token.type == SCC_TOK_EOF); // 应触发错误处理
|
||||
// }
|
||||
|
||||
// 测试列表
|
||||
TEST_LIST = {{"operators", test_operators},
|
||||
{"keywords", test_keywords},
|
||||
{"literals", test_literals},
|
||||
{"edge_cases", test_edge_cases},
|
||||
// {"error_handling", test_error_handling},
|
||||
{NULL, NULL}};
|
||||
@@ -1,93 +0,0 @@
|
||||
#include <lexer.h>
|
||||
#include <lexer_log.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
/// gcc -g ../lexer.c ../token.c test_lexer.c -o test_lexer
|
||||
/*
|
||||
tok_tConstant {
|
||||
int have;
|
||||
union {
|
||||
char ch;
|
||||
int i;
|
||||
float f;
|
||||
double d;
|
||||
long long ll;
|
||||
char* str;
|
||||
};
|
||||
};
|
||||
*/
|
||||
|
||||
int g_num;
|
||||
int g_num_arr[3];
|
||||
int main(int argc, char *argv[]) {
|
||||
// int num = 0;
|
||||
if (argc == 3 && strcmp(argv[2], "--debug") == 0) {
|
||||
log_set_level(NULL, LOG_LEVEL_ALL);
|
||||
} else {
|
||||
// FIXME it is a hack lexer_logger
|
||||
log_set_level(&__scc_lexer_log, LOG_LEVEL_NOTSET);
|
||||
log_set_level(NULL, LOG_LEVEL_INFO | LOG_LEVEL_WARN | LOG_LEVEL_ERROR |
|
||||
LOG_LEVEL_FATAL);
|
||||
}
|
||||
|
||||
const char *file_name = __FILE__;
|
||||
if (argc == 2) {
|
||||
file_name = argv[1];
|
||||
}
|
||||
FILE *fp = fopen(file_name, "rb");
|
||||
if (fp == NULL) {
|
||||
perror("open file failed");
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (fseek(fp, 0, SEEK_END) != 0) {
|
||||
perror("fseek failed");
|
||||
return 1;
|
||||
}
|
||||
usize fsize = ftell(fp);
|
||||
LOG_INFO("file size: %zu", fsize);
|
||||
if (fseek(fp, 0, SEEK_SET)) {
|
||||
perror("fseek failed");
|
||||
return 1;
|
||||
}
|
||||
|
||||
char *buffer = (char *)malloc(fsize);
|
||||
|
||||
usize read_ret = fread(buffer, 1, fsize, fp);
|
||||
fclose(fp);
|
||||
if (read_ret != fsize) {
|
||||
LOG_FATAL("fread failed read_ret %u != fsize %u", read_ret, fsize);
|
||||
free(buffer);
|
||||
return 1;
|
||||
}
|
||||
|
||||
scc_lexer_t lexer;
|
||||
scc_mem_probe_stream_t mem_stream = {0};
|
||||
scc_probe_stream_t *stream =
|
||||
scc_mem_probe_stream_init(&mem_stream, buffer, fsize, false);
|
||||
Assert(stream != null);
|
||||
scc_cstring_clear(&stream->name);
|
||||
scc_cstring_append_cstr(&stream->name, file_name, strlen(file_name));
|
||||
scc_lexer_init(&lexer, stream);
|
||||
scc_lexer_tok_t tok;
|
||||
|
||||
while (1) {
|
||||
scc_lexer_get_valid_token(&lexer, &tok);
|
||||
if (tok.type == SCC_TOK_EOF) {
|
||||
break;
|
||||
}
|
||||
LOG_DEBUG("token `%s` at %s:%u:%u", scc_get_tok_name(tok.type),
|
||||
scc_cstring_as_cstr(&tok.loc.name), tok.loc.line,
|
||||
tok.loc.col);
|
||||
Assert(tok.loc.offset <= fsize);
|
||||
// LOG_DEBUG("%s", tok.val.str);
|
||||
// printf("line: %d, column: %d, type: %3d, typename: %s\n",
|
||||
// lexer.line, lexer.index, tok.type, scc_get_tok_name(tok.type));
|
||||
}
|
||||
|
||||
free(buffer);
|
||||
LOG_INFO("Lexer is Ok...");
|
||||
return 0;
|
||||
}
|
||||
@@ -1,5 +1,9 @@
|
||||
[package]
|
||||
name = "scc_lex_parser"
|
||||
name = "sstream"
|
||||
version = "0.1.0"
|
||||
authors = []
|
||||
description = ""
|
||||
|
||||
dependencies = [{ name = "scc_core", path = "../../runtime/scc_core" }]
|
||||
# features = {}
|
||||
# default_features = []
|
||||
32
libs/sstream/include/scc_pos.h
Normal file
32
libs/sstream/include/scc_pos.h
Normal file
@@ -0,0 +1,32 @@
|
||||
#ifndef __SCC_POS_H__
|
||||
#define __SCC_POS_H__
|
||||
|
||||
#include <scc_core_str.h>
|
||||
#include <scc_core_type.h>
|
||||
|
||||
typedef struct scc_pos {
|
||||
const char *name;
|
||||
usize line;
|
||||
usize col;
|
||||
usize offset;
|
||||
} scc_pos_t;
|
||||
|
||||
static inline scc_pos_t scc_pos_create() { return (scc_pos_t){0, 1, 1, 0}; }
|
||||
|
||||
static inline void scc_pos_next(scc_pos_t *pos) {
|
||||
pos->offset++;
|
||||
pos->col++;
|
||||
}
|
||||
|
||||
static inline void scc_pos_next_offset(scc_pos_t *pos, int offset) {
|
||||
pos->offset += offset;
|
||||
pos->offset += offset;
|
||||
}
|
||||
|
||||
static inline void scc_pos_next_line(scc_pos_t *pos) {
|
||||
pos->offset++;
|
||||
pos->line++;
|
||||
pos->col = 1;
|
||||
}
|
||||
|
||||
#endif /* __SCC_POS_H__ */
|
||||
33
libs/sstream/include/scc_sstream.h
Normal file
33
libs/sstream/include/scc_sstream.h
Normal file
@@ -0,0 +1,33 @@
|
||||
#ifndef __SCC_SSTREAM_H__
|
||||
#define __SCC_SSTREAM_H__
|
||||
|
||||
#include "scc_pos.h"
|
||||
#include <scc_core.h>
|
||||
#include <scc_core_ring.h>
|
||||
|
||||
typedef struct {
|
||||
scc_pos_t pos;
|
||||
int character;
|
||||
} scc_sstream_char_t;
|
||||
|
||||
typedef SCC_RING(scc_sstream_char_t) scc_sstream_ring_t;
|
||||
|
||||
typedef struct {
|
||||
const char *fname;
|
||||
scc_pos_t pos; // 当前消费位置 (可选,可由 ring 推导)
|
||||
int used; // 是否仍然在使用
|
||||
int owned_src; // 是否拥有src内存 即是否需要释放
|
||||
const char *src; // 文件内容缓冲区 (由 sstream 管理)
|
||||
usize len; // 缓冲区长度
|
||||
scc_pos_t fill_pos; // 内部填充位置
|
||||
scc_sstream_ring_t ring;
|
||||
} scc_sstream_t;
|
||||
|
||||
int scc_sstream_init(scc_sstream_t *stream, const char *fname, int ring_size);
|
||||
int scc_sstream_init_by_buffer(scc_sstream_t *stream, const char *buffer,
|
||||
usize len, int owned, int ring_size);
|
||||
scc_sstream_ring_t *scc_sstream_ref_ring(scc_sstream_t *stream);
|
||||
void scc_sstream_drop_ring(scc_sstream_ring_t *ring);
|
||||
void scc_sstream_drop(scc_sstream_t *stream);
|
||||
|
||||
#endif /* __SCC_SSTREAM_H__ */
|
||||
51
libs/sstream/src/main.c
Normal file
51
libs/sstream/src/main.c
Normal file
@@ -0,0 +1,51 @@
|
||||
#include "scc_sstream.h"
|
||||
#include <stdio.h>
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
const char *filename = (argc > 1) ? argv[1] : __FILE__; // 默认读取自身
|
||||
scc_sstream_t stream;
|
||||
scc_sstream_ring_t *ring;
|
||||
|
||||
// 初始化
|
||||
if (scc_sstream_init(&stream, filename, 16) != 0) {
|
||||
fprintf(stderr, "Failed to initialize stream for %s\n", filename);
|
||||
return 1;
|
||||
}
|
||||
ring = scc_sstream_ref_ring(&stream);
|
||||
Assert(ring != null);
|
||||
|
||||
printf("Reading file: %s\n", filename);
|
||||
|
||||
scc_sstream_char_t elem;
|
||||
cbool ok;
|
||||
int char_count = 0;
|
||||
int line_count = 0;
|
||||
|
||||
// 循环读取所有字符
|
||||
while (1) {
|
||||
scc_ring_next_consume(*ring, elem, ok);
|
||||
if (!ok)
|
||||
break; // 文件结束或错误
|
||||
|
||||
char_count++;
|
||||
if (elem.character == '\n')
|
||||
line_count++;
|
||||
|
||||
// 打印前 200 个字符的位置信息(避免刷屏)
|
||||
if (char_count <= 200) {
|
||||
printf("char[%d]: '%c' (line %zu, col %zu)\n", char_count,
|
||||
elem.character == '\n' ? ' '
|
||||
: elem.character, // 换行符显示为空格
|
||||
elem.pos.line, elem.pos.col);
|
||||
}
|
||||
}
|
||||
|
||||
printf("\nSummary:\n");
|
||||
printf(" Total characters: %d\n", char_count);
|
||||
printf(" Total lines: %d\n", line_count);
|
||||
|
||||
// 释放资源
|
||||
scc_sstream_drop_ring(ring);
|
||||
scc_sstream_drop(&stream);
|
||||
return 0;
|
||||
}
|
||||
145
libs/sstream/src/scc_sstream.c
Normal file
145
libs/sstream/src/scc_sstream.c
Normal file
@@ -0,0 +1,145 @@
|
||||
#include <scc_sstream.h>
|
||||
|
||||
// 内部扫描函数:从指定位置扫描下一个有效字符
|
||||
static int sstream_scan_at(scc_sstream_t *stream, scc_pos_t scan_pos,
|
||||
scc_pos_t *out_char_pos, scc_pos_t *out_next_pos) {
|
||||
while (1) {
|
||||
if (scan_pos.offset >= stream->len)
|
||||
return -1; // EOF
|
||||
|
||||
scc_pos_t start = scan_pos;
|
||||
char c = stream->src[scan_pos.offset];
|
||||
|
||||
// 处理反斜杠换行
|
||||
if (c == '\\') {
|
||||
usize next_off = scan_pos.offset + 1;
|
||||
if (next_off < stream->len) {
|
||||
char n = stream->src[next_off];
|
||||
if (n == '\n') {
|
||||
// 跳过 '\' 和 '\n'
|
||||
scan_pos.offset += 2;
|
||||
scan_pos.line++;
|
||||
scan_pos.col = 1;
|
||||
continue;
|
||||
} else if (n == '\r' && next_off + 1 < stream->len &&
|
||||
stream->src[next_off + 1] == '\n') {
|
||||
// 跳过 '\' + '\r' + '\n'
|
||||
scan_pos.offset += 3;
|
||||
scan_pos.line++;
|
||||
scan_pos.col = 1;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 处理 \r\n 转换为 \n
|
||||
if (c == '\r') {
|
||||
usize next_off = scan_pos.offset + 1;
|
||||
if (next_off < stream->len && stream->src[next_off] == '\n') {
|
||||
if (out_char_pos)
|
||||
*out_char_pos = start;
|
||||
// 下一个位置:偏移+2,行+1,列=1
|
||||
scan_pos.offset += 2;
|
||||
scan_pos.line++;
|
||||
scan_pos.col = 1;
|
||||
if (out_next_pos)
|
||||
*out_next_pos = scan_pos;
|
||||
return '\n';
|
||||
}
|
||||
}
|
||||
|
||||
// 普通字符(包括单独的 '\n'、'\r' 等)
|
||||
if (out_char_pos)
|
||||
*out_char_pos = start;
|
||||
// 计算下一个位置
|
||||
scan_pos.offset++;
|
||||
if (c == '\n') {
|
||||
scan_pos.line++;
|
||||
scan_pos.col = 1;
|
||||
} else {
|
||||
scan_pos.col++;
|
||||
}
|
||||
if (out_next_pos)
|
||||
*out_next_pos = scan_pos;
|
||||
return c;
|
||||
}
|
||||
}
|
||||
|
||||
// 环形缓冲区填充回调(通过 userdata 获取流对象)
|
||||
static cbool fill_func(scc_sstream_char_t *out, void *userdata) {
|
||||
scc_sstream_t *stream = (scc_sstream_t *)userdata;
|
||||
if (stream->fill_pos.offset >= stream->len)
|
||||
return false; // 已到文件尾
|
||||
|
||||
int ch = sstream_scan_at(stream, stream->fill_pos, &out->pos,
|
||||
&(stream->fill_pos));
|
||||
if (ch == -1)
|
||||
return false;
|
||||
out->character = ch;
|
||||
return true;
|
||||
}
|
||||
|
||||
int scc_sstream_init(scc_sstream_t *stream, const char *fname, int ring_size) {
|
||||
Assert(stream != null && fname != null);
|
||||
scc_file_t file = scc_fopen(fname, SCC_FILE_READ);
|
||||
usize fsize = scc_fsize(file);
|
||||
if (fsize == 0) {
|
||||
LOG_WARN("file size is 0");
|
||||
scc_fclose(file);
|
||||
return 0;
|
||||
}
|
||||
char *buffer = (char *)scc_malloc(fsize);
|
||||
scc_memset(buffer, 0, fsize);
|
||||
usize read_ret = scc_fread(file, buffer, fsize);
|
||||
Assert(read_ret == fsize); /* read bytes assert it */
|
||||
scc_fclose(file);
|
||||
|
||||
scc_sstream_init_by_buffer(stream, buffer, read_ret, 1, ring_size);
|
||||
stream->fname = fname;
|
||||
stream->fill_pos.name = stream->fname;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int scc_sstream_init_by_buffer(scc_sstream_t *stream, const char *buffer,
|
||||
usize len, int owned, int ring_size) {
|
||||
stream->fname = "<buffer>";
|
||||
stream->fill_pos = scc_pos_create();
|
||||
stream->fill_pos.name = stream->fname;
|
||||
stream->src = buffer;
|
||||
stream->len = len;
|
||||
stream->owned_src = owned;
|
||||
|
||||
scc_ring_init(stream->ring, ring_size <= 0 ? 64 : ring_size, fill_func,
|
||||
stream);
|
||||
stream->used = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
scc_sstream_ring_t *scc_sstream_ref_ring(scc_sstream_t *stream) {
|
||||
Assert(stream != null);
|
||||
stream->used++;
|
||||
return &stream->ring;
|
||||
}
|
||||
|
||||
void scc_sstream_drop_ring(scc_sstream_ring_t *ring) {
|
||||
Assert(ring != null && ring->userdata != null);
|
||||
scc_sstream_t *stream = (scc_sstream_t *)ring->userdata;
|
||||
if (stream->used > 0) {
|
||||
stream->used--;
|
||||
} else {
|
||||
LOG_WARN("double drop sstream ring");
|
||||
}
|
||||
}
|
||||
|
||||
void scc_sstream_drop(scc_sstream_t *stream) {
|
||||
Assert(stream != null);
|
||||
if (stream->used) {
|
||||
LOG_FATAL("drop sstream must be drop ring before ref [%d]",
|
||||
stream->used);
|
||||
}
|
||||
if (stream->src && stream->owned_src) {
|
||||
scc_free((void *)stream->src);
|
||||
stream->src = null;
|
||||
}
|
||||
scc_ring_free(stream->ring);
|
||||
}
|
||||
@@ -157,40 +157,128 @@ class VectorPrinter(gdb.ValuePrinter):
|
||||
|
||||
|
||||
class HashTablePrinter(gdb.ValuePrinter):
|
||||
def __init__(self, val: gdb.Value):
|
||||
self.val: gdb.Value = val
|
||||
"""打印 scc_hashtable_t 结构"""
|
||||
|
||||
def __init__(self, val):
|
||||
self.val = val
|
||||
|
||||
@staticmethod
|
||||
def check_type(val: gdb.Value) -> bool:
|
||||
if val.type.name in ["scc_hashtable_t", "scc_hashtable"]:
|
||||
def check_type(val):
|
||||
# 通过类型名或关键字段检查
|
||||
type_name = val.type.name
|
||||
if type_name and type_name in ("scc_hashtable_t", "scc_hashtable"):
|
||||
return True
|
||||
try:
|
||||
fields = {f.name for f in val.type.fields()}
|
||||
required = {"entries", "count", "tombstone_count", "hash_func", "key_cmp"}
|
||||
if required.issubset(fields):
|
||||
return True
|
||||
except:
|
||||
pass
|
||||
return False
|
||||
|
||||
def to_string(self):
|
||||
count = self.val["count"]
|
||||
tombstone = self.val["tombstone_count"]
|
||||
cap = self.val["entries"]["size"] # 总槽位数
|
||||
return f"hashtable(count={count}, tombstone={tombstone}, capacity={cap})"
|
||||
|
||||
def append_printer():
|
||||
"注册方式一:传统append方法(您之前有效的方式)self"
|
||||
gdb.pretty_printers.append(
|
||||
lambda val: VectorPrinter(val) if VectorPrinter.check_type(val) else None
|
||||
)
|
||||
def display_hint(self):
|
||||
return "map"
|
||||
|
||||
def num_children(self):
|
||||
return int(self.val["count"])
|
||||
|
||||
def children(self):
|
||||
entries = self.val["entries"]
|
||||
size = int(entries["size"])
|
||||
data = entries["data"]
|
||||
if size == 0 or data == 0:
|
||||
return
|
||||
# ENTRY_ACTIVE = 1(根据枚举定义)
|
||||
for i in range(size):
|
||||
entry = data[i]
|
||||
state = int(entry["state"])
|
||||
if state == 1: # 只输出有效条目
|
||||
yield (f"[{i}]", entry)
|
||||
|
||||
|
||||
def register_new_printer():
|
||||
"注册方式二:新版注册方法(备用方案)"
|
||||
class StrPoolPrinter(gdb.ValuePrinter):
|
||||
"""打印 scc_strpool_t,将键值作为字符串展示"""
|
||||
|
||||
def str_lookup_function(val):
|
||||
if VectorPrinter.check_type(val) is False:
|
||||
return None
|
||||
ret = VectorPrinter(val)
|
||||
# print(
|
||||
# f"ret {ret}, type {val.type.name}, {[(i.name, i.type) for i in val.type.fields()]}"
|
||||
# )
|
||||
return ret
|
||||
def __init__(self, val):
|
||||
self.val = val
|
||||
self.ht = val["ht"] # 内部哈希表
|
||||
|
||||
gdb.printing.register_pretty_printer(gdb.current_objfile(), str_lookup_function)
|
||||
# if gdb.current_progspace() is not None:
|
||||
# pts = gdb.current_progspace().pretty_printers
|
||||
# print(pts, len(pts))
|
||||
# pts.append(str_lookup_function)
|
||||
@staticmethod
|
||||
def check_type(val):
|
||||
type_name = val.type.name
|
||||
if type_name and type_name == "scc_strpool_t":
|
||||
return True
|
||||
try:
|
||||
fields = {f.name for f in val.type.fields()}
|
||||
if "ht" in fields:
|
||||
# 可进一步检查 ht 的类型,但非必须
|
||||
return True
|
||||
except:
|
||||
pass
|
||||
return False
|
||||
|
||||
def to_string(self):
|
||||
count = self.ht["count"]
|
||||
cap = self.ht["entries"]["size"]
|
||||
return f"strpool(count={count}, capacity={cap})"
|
||||
|
||||
def display_hint(self):
|
||||
return "map"
|
||||
|
||||
def num_children(self):
|
||||
return int(self.ht["count"])
|
||||
|
||||
def children(self):
|
||||
entries = self.ht["entries"]
|
||||
size = int(entries["size"])
|
||||
data = entries["data"]
|
||||
if size == 0 or data == 0:
|
||||
return
|
||||
const_char_ptr = gdb.lookup_type("const char").pointer()
|
||||
char_ptr = gdb.lookup_type("char").pointer()
|
||||
|
||||
for i in range(size):
|
||||
entry = data[i]
|
||||
state = int(entry["state"])
|
||||
if state == 1: # ACTIVE
|
||||
key_val = entry["key"]
|
||||
value_val = entry["value"]
|
||||
|
||||
# 尝试将 void* 转为字符串
|
||||
try:
|
||||
key_str = key_val.cast(const_char_ptr).string()
|
||||
except:
|
||||
key_str = str(key_val) # 失败则回退到地址
|
||||
|
||||
try:
|
||||
value_str = value_val.cast(char_ptr).string()
|
||||
except:
|
||||
value_str = str(value_val)
|
||||
|
||||
# 使用带引号的字符串作为名称,值直接是字符串
|
||||
yield (repr(key_str), value_str)
|
||||
|
||||
|
||||
def register_pretty_printers():
|
||||
"""统一的查找函数,注册所有打印机"""
|
||||
|
||||
def lookup_function(val):
|
||||
if VectorPrinter.check_type(val):
|
||||
return VectorPrinter(val)
|
||||
if HashTablePrinter.check_type(val):
|
||||
return HashTablePrinter(val)
|
||||
if StrPoolPrinter.check_type(val):
|
||||
return StrPoolPrinter(val)
|
||||
return None
|
||||
|
||||
gdb.printing.register_pretty_printer(gdb.current_objfile(), lookup_function)
|
||||
|
||||
|
||||
class VectorInfoCommand(gdb.Command):
|
||||
@@ -216,7 +304,5 @@ class VectorInfoCommand(gdb.Command):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# 双重注册保证兼容性
|
||||
# append_printer() # 保留您原来有效的方式
|
||||
register_new_printer() # 添加新版注册
|
||||
register_pretty_printers()
|
||||
VectorInfoCommand()
|
||||
|
||||
@@ -6,9 +6,7 @@
|
||||
#include <scc_core_impl.h>
|
||||
#include <scc_core_macro.h>
|
||||
#include <scc_core_mem.h>
|
||||
#include <scc_core_pos.h>
|
||||
#include <scc_core_str.h>
|
||||
#include <scc_core_stream.h>
|
||||
#include <scc_core_vec.h>
|
||||
|
||||
#endif // __SCC_CORE_H__
|
||||
|
||||
@@ -18,6 +18,7 @@ typedef enum {
|
||||
|
||||
scc_file_t scc_fopen(const char *path, scc_fmode_t mode);
|
||||
void scc_fclose(scc_file_t file);
|
||||
usize scc_fsize(scc_file_t file);
|
||||
usize scc_fread(scc_file_t file, void *buffer, usize size);
|
||||
usize scc_fwrite(scc_file_t file, const void *buffer, usize size);
|
||||
cbool scc_fexists(const char *path);
|
||||
|
||||
@@ -1,28 +0,0 @@
|
||||
#ifndef __SCC_CORE_POS_H__
|
||||
#define __SCC_CORE_POS_H__
|
||||
|
||||
#include "scc_core_str.h"
|
||||
#include "scc_core_type.h"
|
||||
typedef struct scc_pos {
|
||||
scc_cstring_t name;
|
||||
usize line;
|
||||
usize col;
|
||||
usize offset;
|
||||
} scc_pos_t;
|
||||
|
||||
static inline scc_pos_t scc_pos_create() {
|
||||
return (scc_pos_t){scc_cstring_create(), 1, 1, 0};
|
||||
}
|
||||
|
||||
static inline void scc_pos_next(scc_pos_t *pos) {
|
||||
pos->offset++;
|
||||
pos->col++;
|
||||
}
|
||||
|
||||
static inline void scc_pos_next_line(scc_pos_t *pos) {
|
||||
pos->offset++;
|
||||
pos->line++;
|
||||
pos->col = 1;
|
||||
}
|
||||
|
||||
#endif /* __SCC_CORE_POS_H__ */
|
||||
178
runtime/scc_core/include/scc_core_ring.h
Normal file
178
runtime/scc_core/include/scc_core_ring.h
Normal file
@@ -0,0 +1,178 @@
|
||||
#ifndef __SCC_CORE_RING_H__
|
||||
#define __SCC_CORE_RING_H__
|
||||
|
||||
#include <scc_core.h>
|
||||
|
||||
/**
|
||||
* @def SCC_RING(type)
|
||||
* @brief 声明环形缓冲区结构体
|
||||
* @param type 存储的元素类型
|
||||
*
|
||||
* 生成包含以下字段的结构体:
|
||||
* - data: 元素数组
|
||||
* - cap: 容量
|
||||
* - head: 已消费的逻辑索引
|
||||
* - probe: 预览索引
|
||||
* - tail: 已填充的逻辑末尾索引
|
||||
* - fill: 填充回调函数 (当需要新元素时调用)
|
||||
*/
|
||||
#define SCC_RING(type) \
|
||||
struct { \
|
||||
type *data; \
|
||||
usize cap; \
|
||||
usize head; \
|
||||
usize probe; \
|
||||
usize tail; \
|
||||
cbool (*fill)(type * out, void *userdata); \
|
||||
void *userdata; \
|
||||
}
|
||||
|
||||
// ==================== 内部辅助宏 (不直接使用) ====================
|
||||
|
||||
#define scc_ring_phys(ring, idx) ((idx) % (ring).cap)
|
||||
|
||||
/**
|
||||
* @brief 确保 probe 位置有数据可用 (尝试填充)
|
||||
* @param ring 环形缓冲区变量
|
||||
* @param ok 变量名 (如 int ok_flag) ,宏会将其设置为 true 或 false
|
||||
*/
|
||||
#define scc_ring_ensure(ring, ok) \
|
||||
do { \
|
||||
ok = 1; \
|
||||
if ((ring).probe < (ring).tail) \
|
||||
break; \
|
||||
/* probe == tail,需要填充新元素 */ \
|
||||
if (!(ring).fill) { \
|
||||
ok = 0; \
|
||||
break; \
|
||||
} \
|
||||
if ((ring).tail - (ring).head >= (ring).cap) { \
|
||||
ok = 0; /* 缓冲区满,无法填充 */ \
|
||||
break; \
|
||||
} \
|
||||
usize phys_tail = scc_ring_phys(ring, (ring).tail); \
|
||||
if (!(ring).fill(&(ring).data[phys_tail], (ring).userdata)) { \
|
||||
ok = 0; \
|
||||
break; \
|
||||
} \
|
||||
(ring).tail++; \
|
||||
} while (0)
|
||||
|
||||
// ==================== 用户操作宏 ====================
|
||||
|
||||
/**
|
||||
* @brief 初始化环形缓冲区
|
||||
* @param ring 环形缓冲区变量
|
||||
* @param cap 容量
|
||||
* @param fill_func 填充回调函数 (可传 NULL)
|
||||
*
|
||||
* 内存分配失败由 scc_malloc 内部处理 (如 LOG_FATAL)
|
||||
*/
|
||||
#define scc_ring_init(ring, _cap, fill_func, _userdata) \
|
||||
do { \
|
||||
(ring).data = scc_malloc((_cap) * sizeof(*(ring).data)); \
|
||||
(ring).cap = (_cap); \
|
||||
(ring).head = 0; \
|
||||
(ring).probe = 0; \
|
||||
(ring).tail = 0; \
|
||||
(ring).fill = (fill_func); \
|
||||
(ring).userdata = (_userdata); \
|
||||
} while (0)
|
||||
|
||||
/**
|
||||
* @brief 释放环形缓冲区内存
|
||||
* @param ring 环形缓冲区变量
|
||||
*/
|
||||
#define scc_ring_free(ring) \
|
||||
do { \
|
||||
scc_free((ring).data); \
|
||||
(ring).data = NULL; \
|
||||
(ring).cap = (ring).head = (ring).probe = (ring).tail = 0; \
|
||||
} while (0)
|
||||
|
||||
/**
|
||||
* @brief 预览 probe 位置的元素 (不移动 probe)
|
||||
* @param ring 环形缓冲区变量
|
||||
* @param val 变量名,用于接收元素值 (例如 int ch)
|
||||
* @param ok 变量名,用于接收成功状态 (cbool 类型)
|
||||
*/
|
||||
#define scc_ring_peek(ring, val, ok) \
|
||||
do { \
|
||||
scc_ring_ensure(ring, ok); \
|
||||
if (!(ok)) \
|
||||
break; \
|
||||
if ((ring).probe >= (ring).tail) { \
|
||||
ok = 0; \
|
||||
break; \
|
||||
} \
|
||||
usize _phys = scc_ring_phys(ring, (ring).probe); \
|
||||
val = (ring).data[_phys]; \
|
||||
} while (0)
|
||||
|
||||
/**
|
||||
* @brief 获取 probe 位置的元素,并将 probe 前进一步
|
||||
* @param ring 环形缓冲区变量
|
||||
* @param val 变量名,用于接收元素值 (例如 int ch)
|
||||
* @param ok 变量名,用于接收成功状态 (cbool 类型)
|
||||
*/
|
||||
#define scc_ring_next(ring, val, ok) \
|
||||
do { \
|
||||
scc_ring_ensure(ring, ok); \
|
||||
if (!(ok)) \
|
||||
break; \
|
||||
if ((ring).probe >= (ring).tail) { \
|
||||
ok = 0; \
|
||||
break; \
|
||||
} \
|
||||
usize _phys = scc_ring_phys(ring, (ring).probe); \
|
||||
val = (ring).data[_phys]; \
|
||||
(ring).probe++; \
|
||||
} while (0)
|
||||
|
||||
/**
|
||||
* @brief 将 probe 后退一步 (不能低于 head)
|
||||
* @param ring 环形缓冲区变量
|
||||
* @param ok 变量名,用于接收成功状态 (cbool 类型)
|
||||
*/
|
||||
#define scc_ring_back(ring, ok) \
|
||||
do { \
|
||||
if ((ring).probe > (ring).head) { \
|
||||
(ring).probe--; \
|
||||
ok = 1; \
|
||||
} else { \
|
||||
ok = 0; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
/**
|
||||
* @brief 将 probe 重置为 head
|
||||
* @param ring 环形缓冲区变量
|
||||
*/
|
||||
#define scc_ring_reset(ring) ((ring).probe = (ring).head)
|
||||
|
||||
/**
|
||||
* @brief 将 head 移动到 probe 位置,标记 probe 之前的元素为已消费
|
||||
* @param ring 环形缓冲区变量
|
||||
*/
|
||||
#define scc_ring_consume(ring) ((ring).head = (ring).probe)
|
||||
|
||||
/**
|
||||
* @brief 返回 probe 到 tail 之间的元素个数 (可预览数量)
|
||||
* @param ring 环形缓冲区变量
|
||||
* @return 可预览元素个数
|
||||
*/
|
||||
#define scc_ring_available(ring) ((ring).tail - (ring).probe)
|
||||
|
||||
/**
|
||||
* @brief 获取 probe 位置的元素,并将 probe 前进一步同时标记为已消费
|
||||
* @param ring 环形缓冲区变量
|
||||
* @param val 变量名,用于接收元素值 (例如 int ch)
|
||||
* @param ok 变量名,用于接收成功状态 (cbool 类型)
|
||||
*/
|
||||
#define scc_ring_next_consume(ring, val, ok) \
|
||||
do { \
|
||||
scc_ring_next(ring, val, ok); \
|
||||
scc_ring_consume(ring); \
|
||||
} while (0)
|
||||
|
||||
#endif /* __SCC_CORE_RING_H__ */
|
||||
@@ -1,130 +0,0 @@
|
||||
#ifndef __SMCC_CORE_PROBE_STREAM_H__
|
||||
#define __SMCC_CORE_PROBE_STREAM_H__
|
||||
|
||||
#include "scc_core_impl.h"
|
||||
#include "scc_core_macro.h"
|
||||
#include "scc_core_mem.h"
|
||||
#include "scc_core_str.h"
|
||||
|
||||
struct scc_probe_stream;
|
||||
typedef struct scc_probe_stream scc_probe_stream_t;
|
||||
|
||||
#define scc_stream_eof (-1)
|
||||
|
||||
/**
|
||||
* @brief 带探针的流接口
|
||||
*
|
||||
* 这个流提供了双指针机制:当前读取位置(头指针)和探针位置(尾指针)。
|
||||
* 尾指针只能向前移动,用于查看而不消费。
|
||||
* 头指针可以前进或单次后退,但不能一直后退到尾指针后面。
|
||||
*/
|
||||
struct scc_probe_stream {
|
||||
scc_cstring_t name;
|
||||
|
||||
/// @brief 消费头指针处的字符(移动头指针)
|
||||
int (*consume)(scc_probe_stream_t *stream);
|
||||
|
||||
/// @brief 查看当前探针位置的字符,不移动任何指针
|
||||
int (*peek)(scc_probe_stream_t *stream);
|
||||
|
||||
/// @brief 移动探针位置并返回字符
|
||||
int (*next)(scc_probe_stream_t *stream);
|
||||
|
||||
/// @brief 回退一个字符(单次后退,探针位置后退一步)
|
||||
cbool (*back)(scc_probe_stream_t *stream);
|
||||
|
||||
/// @brief 移动头指针到探针位置
|
||||
void (*sync)(scc_probe_stream_t *stream);
|
||||
|
||||
/// @brief 重置探针位置到头指针位置
|
||||
void (*reset)(scc_probe_stream_t *stream);
|
||||
|
||||
/// @brief 读取指定数量的字符到缓冲区
|
||||
usize (*read_buf)(scc_probe_stream_t *stream, char *buffer, usize count);
|
||||
|
||||
/// @brief 检查是否到达流末尾
|
||||
cbool (*is_at_end)(scc_probe_stream_t *stream);
|
||||
|
||||
/// @brief 销毁流并释放资源
|
||||
void (*drop)(scc_probe_stream_t *stream);
|
||||
};
|
||||
|
||||
static inline int scc_probe_stream_consume(scc_probe_stream_t *self) {
|
||||
return self->consume(self);
|
||||
}
|
||||
|
||||
static inline int scc_probe_stream_peek(scc_probe_stream_t *self) {
|
||||
return self->peek(self);
|
||||
}
|
||||
|
||||
static inline int scc_probe_stream_next(scc_probe_stream_t *self) {
|
||||
return self->next(self);
|
||||
}
|
||||
|
||||
static inline void scc_probe_stream_sync(scc_probe_stream_t *self) {
|
||||
self->sync(self);
|
||||
}
|
||||
|
||||
static inline cbool scc_probe_stream_back(scc_probe_stream_t *self) {
|
||||
return self->back(self);
|
||||
}
|
||||
|
||||
static inline void scc_probe_stream_reset(scc_probe_stream_t *self) {
|
||||
self->reset(self);
|
||||
}
|
||||
|
||||
static inline usize scc_probe_stream_read_buf(scc_probe_stream_t *self,
|
||||
char *buffer, usize count) {
|
||||
return self->read_buf(self, buffer, count);
|
||||
}
|
||||
|
||||
static inline cbool scc_probe_stream_is_at_end(scc_probe_stream_t *self) {
|
||||
return self->is_at_end(self);
|
||||
}
|
||||
|
||||
static inline cbool scc_probe_stream_has_more(scc_probe_stream_t *self) {
|
||||
return !self->is_at_end(self);
|
||||
}
|
||||
|
||||
static inline void scc_probe_stream_drop(scc_probe_stream_t *self) {
|
||||
self->drop(self);
|
||||
}
|
||||
|
||||
#ifndef __SCC_NO_MEM_PROBE_STREAM__
|
||||
/**
|
||||
* @brief 内存探针流结构
|
||||
*/
|
||||
typedef struct scc_mem_probe_stream {
|
||||
scc_probe_stream_t stream;
|
||||
const char *data;
|
||||
usize data_length;
|
||||
usize curr_pos; // 当前读取位置
|
||||
usize probe_pos; // 探针位置(用于peek)
|
||||
cbool owned; // 是否拥有数据(如果拥有将会自动释放)
|
||||
} scc_mem_probe_stream_t;
|
||||
|
||||
/**
|
||||
* @brief 初始化内存探针流(由你负责scc_mem_probe_stream_t的释放)
|
||||
*
|
||||
* @param stream 流结构指针
|
||||
* @param data 数据指针
|
||||
* @param length 数据长度
|
||||
* @param owned 是否拥有数据(如果拥有将会自动释放)
|
||||
* @return core_probe_stream_t* 成功返回流指针,失败返回NULL
|
||||
*/
|
||||
scc_probe_stream_t *scc_mem_probe_stream_init(scc_mem_probe_stream_t *stream,
|
||||
const char *data, usize length,
|
||||
cbool owned);
|
||||
/**
|
||||
* @brief 构造内存探针流(其中drop会自动释放内存)
|
||||
*
|
||||
* @param data
|
||||
* @param length
|
||||
* @param owned 是否拥有数据(如果拥有将会自动释放)
|
||||
* @return scc_probe_stream_t*
|
||||
*/
|
||||
scc_probe_stream_t *scc_mem_probe_stream_alloc(const char *data, usize length,
|
||||
cbool owned);
|
||||
#endif
|
||||
|
||||
#endif /* __SMCC_CORE_PROBE_STREAM_H__ */
|
||||
@@ -53,6 +53,20 @@ void scc_fclose(scc_file_t file) {
|
||||
}
|
||||
}
|
||||
|
||||
usize scc_fsize(scc_file_t file) {
|
||||
FILE *fp = (FILE *)file;
|
||||
if (fseek(fp, 0, SEEK_END) != 0) {
|
||||
perror("fseek failed");
|
||||
return 0;
|
||||
}
|
||||
usize fsize = ftell(fp);
|
||||
if (fseek(fp, 0, SEEK_SET)) {
|
||||
perror("fseek failed");
|
||||
return 0;
|
||||
}
|
||||
return fsize;
|
||||
}
|
||||
|
||||
usize scc_fread(scc_file_t file, void *buffer, usize size) {
|
||||
if (!file || !buffer)
|
||||
return 0;
|
||||
|
||||
@@ -1,183 +0,0 @@
|
||||
#include <scc_core_log.h>
|
||||
#include <scc_core_stream.h>
|
||||
|
||||
#ifndef __SCC_CORE_NO_MEM_PROBE_STREAM__
|
||||
|
||||
static int mem_probe_stream_consume(scc_probe_stream_t *_stream) {
|
||||
Assert(_stream != null);
|
||||
scc_mem_probe_stream_t *stream = (scc_mem_probe_stream_t *)_stream;
|
||||
|
||||
if (stream->curr_pos >= stream->data_length) {
|
||||
return scc_stream_eof;
|
||||
}
|
||||
|
||||
unsigned char ch = stream->data[stream->curr_pos++];
|
||||
// 如果探针位置落后于当前读取位置,则更新探针位置
|
||||
if (stream->probe_pos < stream->curr_pos) {
|
||||
stream->probe_pos = stream->curr_pos;
|
||||
}
|
||||
return (int)ch;
|
||||
}
|
||||
|
||||
static int mem_probe_stream_peek(scc_probe_stream_t *_stream) {
|
||||
Assert(_stream != null);
|
||||
scc_mem_probe_stream_t *stream = (scc_mem_probe_stream_t *)_stream;
|
||||
|
||||
if (stream->probe_pos >= stream->data_length) {
|
||||
return scc_stream_eof;
|
||||
}
|
||||
|
||||
// 只查看而不移动探针位置
|
||||
return (int)(unsigned char)stream->data[stream->probe_pos];
|
||||
}
|
||||
|
||||
static int mem_probe_stream_next(scc_probe_stream_t *_stream) {
|
||||
Assert(_stream != null);
|
||||
scc_mem_probe_stream_t *stream = (scc_mem_probe_stream_t *)_stream;
|
||||
|
||||
if (stream->probe_pos >= stream->data_length) {
|
||||
return scc_stream_eof;
|
||||
}
|
||||
|
||||
// 返回探针位置的字符,并将探针位置向前移动
|
||||
int ch = (int)(unsigned char)stream->data[stream->probe_pos];
|
||||
stream->probe_pos++;
|
||||
return ch;
|
||||
}
|
||||
|
||||
static void mem_probe_stream_sync(scc_probe_stream_t *_stream) {
|
||||
Assert(_stream != null);
|
||||
scc_mem_probe_stream_t *stream = (scc_mem_probe_stream_t *)_stream;
|
||||
|
||||
// 移动头指针到探针位置(消费已查看的字符)
|
||||
if (stream->probe_pos > stream->curr_pos) {
|
||||
stream->curr_pos = stream->probe_pos;
|
||||
}
|
||||
}
|
||||
|
||||
static cbool mem_probe_stream_back(scc_probe_stream_t *_stream) {
|
||||
Assert(_stream != null);
|
||||
scc_mem_probe_stream_t *stream = (scc_mem_probe_stream_t *)_stream;
|
||||
|
||||
// 只能回退一个字符
|
||||
if (stream->probe_pos == 0)
|
||||
return false;
|
||||
if (stream->curr_pos + 1 > stream->probe_pos)
|
||||
return false;
|
||||
|
||||
stream->probe_pos--;
|
||||
return true;
|
||||
}
|
||||
|
||||
static usize mem_probe_stream_read_buf(scc_probe_stream_t *_stream,
|
||||
char *buffer, usize count) {
|
||||
Assert(_stream != null);
|
||||
scc_mem_probe_stream_t *stream = (scc_mem_probe_stream_t *)_stream;
|
||||
|
||||
if (buffer == null) {
|
||||
LOG_WARN("Buffer is null");
|
||||
return 0;
|
||||
}
|
||||
|
||||
usize remaining = stream->data_length - stream->curr_pos;
|
||||
usize to_read = (remaining < count) ? remaining : count;
|
||||
|
||||
if (to_read > 0) {
|
||||
scc_memcpy(buffer, stream->data + stream->curr_pos, to_read);
|
||||
stream->curr_pos += to_read;
|
||||
// 更新探针位置
|
||||
if (stream->probe_pos < stream->curr_pos) {
|
||||
stream->probe_pos = stream->curr_pos;
|
||||
}
|
||||
} else {
|
||||
LOG_WARN("Reading past end of stream [maybe count is too large or "
|
||||
"negative?]");
|
||||
}
|
||||
|
||||
return to_read;
|
||||
}
|
||||
|
||||
static void mem_probe_stream_reset(scc_probe_stream_t *_stream) {
|
||||
Assert(_stream != null);
|
||||
scc_mem_probe_stream_t *stream = (scc_mem_probe_stream_t *)_stream;
|
||||
|
||||
// 重置探针位置到头指针位置
|
||||
stream->probe_pos = stream->curr_pos;
|
||||
}
|
||||
|
||||
static cbool mem_probe_stream_is_at_end(scc_probe_stream_t *_stream) {
|
||||
Assert(_stream != null);
|
||||
scc_mem_probe_stream_t *stream = (scc_mem_probe_stream_t *)_stream;
|
||||
|
||||
return stream->curr_pos >= stream->data_length;
|
||||
}
|
||||
|
||||
static void mem_probe_stream_drop(scc_probe_stream_t *_stream) {
|
||||
Assert(_stream != null);
|
||||
scc_mem_probe_stream_t *stream = (scc_mem_probe_stream_t *)_stream;
|
||||
|
||||
scc_cstring_free(&stream->stream.name);
|
||||
|
||||
if (stream->owned) {
|
||||
scc_free((void *)stream->data);
|
||||
stream->data = null;
|
||||
}
|
||||
}
|
||||
|
||||
scc_probe_stream_t *scc_mem_probe_stream_init(scc_mem_probe_stream_t *stream,
|
||||
const char *data, usize length,
|
||||
cbool owned) {
|
||||
if (stream == null || data == null) {
|
||||
LOG_ERROR("param error");
|
||||
return null;
|
||||
}
|
||||
|
||||
if (length == 0) {
|
||||
LOG_WARN("input memory is empty");
|
||||
owned = false;
|
||||
}
|
||||
|
||||
stream->owned = owned;
|
||||
stream->data = data;
|
||||
stream->data_length = length;
|
||||
stream->curr_pos = 0;
|
||||
stream->probe_pos = 0;
|
||||
|
||||
stream->stream.name = scc_cstring_from_cstr("mem_probe_stream");
|
||||
|
||||
// 设置函数指针
|
||||
stream->stream.consume = mem_probe_stream_consume;
|
||||
stream->stream.peek = mem_probe_stream_peek;
|
||||
stream->stream.next = mem_probe_stream_next;
|
||||
stream->stream.sync = mem_probe_stream_sync;
|
||||
stream->stream.back = mem_probe_stream_back;
|
||||
stream->stream.read_buf = mem_probe_stream_read_buf;
|
||||
stream->stream.reset = mem_probe_stream_reset;
|
||||
stream->stream.is_at_end = mem_probe_stream_is_at_end;
|
||||
stream->stream.drop = mem_probe_stream_drop;
|
||||
|
||||
return (scc_probe_stream_t *)stream;
|
||||
}
|
||||
|
||||
static void scc_owned_mem_stream_drop(scc_probe_stream_t *_stream) {
|
||||
scc_mem_probe_stream_t *stream = (scc_mem_probe_stream_t *)_stream;
|
||||
mem_probe_stream_drop(_stream);
|
||||
scc_free(stream);
|
||||
}
|
||||
|
||||
scc_probe_stream_t *scc_mem_probe_stream_alloc(const char *data, usize length,
|
||||
cbool owned) {
|
||||
scc_mem_probe_stream_t *stream =
|
||||
(scc_mem_probe_stream_t *)scc_malloc(sizeof(scc_mem_probe_stream_t));
|
||||
if (stream == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
scc_probe_stream_t *ret =
|
||||
scc_mem_probe_stream_init(stream, data, length, owned);
|
||||
stream->stream.drop = scc_owned_mem_stream_drop;
|
||||
Assert(ret != null);
|
||||
return ret;
|
||||
}
|
||||
|
||||
#endif /* __SCC_CORE_NO_MEM_PROBE_STREAM__ */
|
||||
326
runtime/scc_core/tests/test_core_ring.c
Normal file
326
runtime/scc_core/tests/test_core_ring.c
Normal file
@@ -0,0 +1,326 @@
|
||||
#include <scc_core.h>
|
||||
#include <scc_core_ring.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <utest/acutest.h>
|
||||
|
||||
// 为测试定义简单的 token 类型(包含动态字符串)
|
||||
typedef struct {
|
||||
int id;
|
||||
char *data;
|
||||
} test_token_t;
|
||||
|
||||
// 定义环形缓冲区类型别名(方便使用)
|
||||
typedef SCC_RING(char) char_ring_t;
|
||||
typedef SCC_RING(test_token_t) token_ring_t;
|
||||
|
||||
/* ------------------- 字符流测试辅助 ------------------ */
|
||||
static const char *test_chars = "abcdefghijklmnopqrstuvwxyz";
|
||||
static size_t char_index = 0;
|
||||
|
||||
cbool char_fill(char *out, void *userdata) {
|
||||
(void)userdata;
|
||||
if (char_index < strlen(test_chars)) {
|
||||
*out = test_chars[char_index++];
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void reset_char_fill(void) { char_index = 0; }
|
||||
|
||||
/* ------------------- token 流测试辅助 ------------------ */
|
||||
static int token_id = 0;
|
||||
|
||||
cbool token_fill(test_token_t *out, void *userdata) {
|
||||
(void)userdata;
|
||||
if (token_id < 10) { // 只产生 10 个 token
|
||||
out->id = token_id;
|
||||
out->data = (char *)scc_malloc(20);
|
||||
if (!out->data)
|
||||
return false;
|
||||
snprintf_(out->data, 20, "token%d", token_id);
|
||||
token_id++;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void reset_token_fill(void) { token_id = 0; }
|
||||
|
||||
void free_token(test_token_t *tok) {
|
||||
if (tok->data) {
|
||||
scc_free(tok->data);
|
||||
tok->data = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/* ==================== 字符环形缓冲区测试 ==================== */
|
||||
void test_char_ring_basic(void) {
|
||||
reset_char_fill();
|
||||
char_ring_t ring;
|
||||
scc_ring_init(ring, 4, char_fill, 0);
|
||||
char c;
|
||||
cbool ok;
|
||||
|
||||
scc_ring_next(ring, c, ok);
|
||||
TEST_CHECK(ok == true);
|
||||
TEST_CHECK(c == 'a');
|
||||
scc_ring_next(ring, c, ok);
|
||||
TEST_CHECK(ok == true);
|
||||
TEST_CHECK(c == 'b');
|
||||
|
||||
// peek
|
||||
scc_ring_peek(ring, c, ok);
|
||||
TEST_CHECK(ok == true);
|
||||
TEST_CHECK(c == 'c');
|
||||
scc_ring_next(ring, c, ok);
|
||||
TEST_CHECK(ok == true);
|
||||
TEST_CHECK(c == 'c');
|
||||
|
||||
// back
|
||||
scc_ring_back(ring, ok);
|
||||
TEST_CHECK(ok == true);
|
||||
scc_ring_peek(ring, c, ok);
|
||||
TEST_CHECK(ok == true);
|
||||
TEST_CHECK(c == 'c');
|
||||
|
||||
// consume & reset
|
||||
scc_ring_consume(ring);
|
||||
scc_ring_next(ring, c, ok);
|
||||
TEST_CHECK(ok == true);
|
||||
TEST_CHECK(c == 'c');
|
||||
scc_ring_next(ring, c, ok);
|
||||
TEST_CHECK(ok == true);
|
||||
TEST_CHECK(c == 'd');
|
||||
|
||||
scc_ring_reset(ring);
|
||||
scc_ring_peek(ring, c, ok);
|
||||
TEST_CHECK(ok == true);
|
||||
TEST_CHECK(c == 'c');
|
||||
|
||||
scc_ring_back(ring, ok);
|
||||
TEST_CHECK(ok == false); // 不能低于 head
|
||||
|
||||
scc_ring_free(ring);
|
||||
}
|
||||
|
||||
void test_char_ring_full(void) {
|
||||
reset_char_fill();
|
||||
char_ring_t ring;
|
||||
scc_ring_init(ring, 3, char_fill, 0);
|
||||
char c;
|
||||
cbool ok;
|
||||
|
||||
scc_ring_next(ring, c, ok);
|
||||
TEST_CHECK(ok == true); // a
|
||||
scc_ring_next(ring, c, ok);
|
||||
TEST_CHECK(ok == true); // b
|
||||
scc_ring_next(ring, c, ok);
|
||||
TEST_CHECK(ok == true); // c
|
||||
// 缓冲区满,peek 应失败
|
||||
scc_ring_peek(ring, c, ok);
|
||||
TEST_CHECK(ok == false);
|
||||
|
||||
scc_ring_consume(ring); // 释放已读空间
|
||||
scc_ring_next(ring, c, ok);
|
||||
TEST_CHECK(ok == true); // d
|
||||
TEST_CHECK(c == 'd');
|
||||
scc_ring_next(ring, c, ok);
|
||||
TEST_CHECK(ok == true); // e
|
||||
scc_ring_next(ring, c, ok);
|
||||
TEST_CHECK(ok == true); // f
|
||||
scc_ring_peek(ring, c, ok);
|
||||
TEST_CHECK(ok == false); // 再次满
|
||||
|
||||
scc_ring_free(ring);
|
||||
}
|
||||
|
||||
void test_char_ring_eof(void) {
|
||||
reset_char_fill();
|
||||
char_ring_t ring;
|
||||
scc_ring_init(ring, 32, char_fill, 0);
|
||||
char c;
|
||||
cbool ok;
|
||||
|
||||
for (int i = 0; i < 26; i++) {
|
||||
scc_ring_next(ring, c, ok);
|
||||
TEST_CHECK(ok == true);
|
||||
TEST_CHECK(c == test_chars[i]);
|
||||
}
|
||||
scc_ring_next(ring, c, ok);
|
||||
TEST_CHECK(ok == false);
|
||||
scc_ring_peek(ring, c, ok);
|
||||
TEST_CHECK(ok == false);
|
||||
|
||||
scc_ring_free(ring);
|
||||
}
|
||||
|
||||
void test_char_ring_back_boundary(void) {
|
||||
reset_char_fill();
|
||||
char_ring_t ring;
|
||||
scc_ring_init(ring, 4, char_fill, 0);
|
||||
char c;
|
||||
cbool ok;
|
||||
|
||||
scc_ring_next(ring, c, ok);
|
||||
TEST_CHECK(ok == true); // a
|
||||
scc_ring_next(ring, c, ok);
|
||||
TEST_CHECK(ok == true); // b
|
||||
|
||||
scc_ring_back(ring, ok);
|
||||
TEST_CHECK(ok == true);
|
||||
scc_ring_back(ring, ok);
|
||||
TEST_CHECK(ok == true);
|
||||
scc_ring_back(ring, ok);
|
||||
TEST_CHECK(ok == false); // 已到 head
|
||||
|
||||
scc_ring_peek(ring, c, ok);
|
||||
TEST_CHECK(ok == true);
|
||||
TEST_CHECK(c == 'a');
|
||||
|
||||
scc_ring_free(ring);
|
||||
}
|
||||
|
||||
void test_char_ring_consume_reset(void) {
|
||||
reset_char_fill();
|
||||
char_ring_t ring;
|
||||
scc_ring_init(ring, 5, char_fill, 0);
|
||||
char c;
|
||||
cbool ok;
|
||||
|
||||
scc_ring_next(ring, c, ok);
|
||||
TEST_CHECK(ok == true); // a
|
||||
scc_ring_next(ring, c, ok);
|
||||
TEST_CHECK(ok == true); // b
|
||||
scc_ring_next(ring, c, ok);
|
||||
TEST_CHECK(ok == true); // c
|
||||
scc_ring_back(ring, ok);
|
||||
TEST_CHECK(ok == true);
|
||||
scc_ring_back(ring, ok);
|
||||
TEST_CHECK(ok == true);
|
||||
scc_ring_back(ring, ok);
|
||||
TEST_CHECK(ok == true); // 此时 probe 指向 a
|
||||
|
||||
scc_ring_consume(ring); // head 移至 a
|
||||
scc_ring_reset(ring); // probe 也移至 a
|
||||
scc_ring_next(ring, c, ok);
|
||||
TEST_CHECK(ok == true);
|
||||
TEST_CHECK(c == 'a'); // 应该返回 a
|
||||
scc_ring_next(ring, c, ok);
|
||||
TEST_CHECK(ok == true); // b
|
||||
scc_ring_next(ring, c, ok);
|
||||
TEST_CHECK(ok == true); // c
|
||||
|
||||
scc_ring_free(ring);
|
||||
}
|
||||
|
||||
void test_char_ring_wrap(void) {
|
||||
reset_char_fill();
|
||||
char_ring_t ring;
|
||||
scc_ring_init(ring, 3, char_fill, 0);
|
||||
char c;
|
||||
cbool ok;
|
||||
|
||||
for (int i = 0; i < 26; i++) {
|
||||
scc_ring_next(ring, c, ok);
|
||||
TEST_CHECK(ok == true);
|
||||
TEST_CHECK(c == test_chars[i]);
|
||||
scc_ring_consume(ring); // 立即消费,保持缓冲区几乎为空
|
||||
}
|
||||
scc_ring_peek(ring, c, ok);
|
||||
TEST_CHECK(ok == false); // 无数据
|
||||
|
||||
scc_ring_free(ring);
|
||||
}
|
||||
|
||||
/* ==================== token 环形缓冲区测试 ==================== */
|
||||
void test_token_ring_basic(void) {
|
||||
reset_token_fill();
|
||||
token_ring_t ring;
|
||||
scc_ring_init(ring, 3, token_fill, 0);
|
||||
test_token_t tok;
|
||||
cbool ok;
|
||||
|
||||
scc_ring_next(ring, tok, ok);
|
||||
TEST_CHECK(ok == true);
|
||||
TEST_CHECK(tok.id == 0);
|
||||
free_token(&tok);
|
||||
|
||||
scc_ring_next(ring, tok, ok);
|
||||
TEST_CHECK(ok == true);
|
||||
TEST_CHECK(tok.id == 1);
|
||||
free_token(&tok);
|
||||
|
||||
scc_ring_peek(ring, tok, ok);
|
||||
TEST_CHECK(ok == true);
|
||||
TEST_CHECK(tok.id == 2); // peek 不应消费
|
||||
scc_ring_next(ring, tok, ok);
|
||||
TEST_CHECK(ok == true);
|
||||
TEST_CHECK(tok.id == 2);
|
||||
|
||||
scc_ring_back(ring, ok);
|
||||
TEST_CHECK(ok == true);
|
||||
scc_ring_peek(ring, tok, ok);
|
||||
TEST_CHECK(ok == true);
|
||||
TEST_CHECK(tok.id == 2);
|
||||
scc_ring_next(ring, tok, ok);
|
||||
TEST_CHECK(ok == true);
|
||||
TEST_CHECK(tok.id == 2);
|
||||
free_token(&tok);
|
||||
|
||||
scc_ring_consume(ring);
|
||||
// 消费剩余 token
|
||||
while (1) {
|
||||
scc_ring_next(ring, tok, ok);
|
||||
if (!ok)
|
||||
break;
|
||||
free_token(&tok);
|
||||
}
|
||||
scc_ring_free(ring);
|
||||
}
|
||||
|
||||
void test_token_ring_full(void) {
|
||||
reset_token_fill();
|
||||
token_ring_t ring;
|
||||
scc_ring_init(ring, 2, token_fill, 0);
|
||||
test_token_t tok;
|
||||
cbool ok;
|
||||
|
||||
scc_ring_next(ring, tok, ok);
|
||||
TEST_CHECK(ok == true);
|
||||
TEST_CHECK(tok.id == 0);
|
||||
free_token(&tok);
|
||||
scc_ring_next(ring, tok, ok);
|
||||
TEST_CHECK(ok == true);
|
||||
TEST_CHECK(tok.id == 1);
|
||||
free_token(&tok);
|
||||
scc_ring_peek(ring, tok, ok);
|
||||
TEST_CHECK(ok == false); // 缓冲区满
|
||||
|
||||
scc_ring_consume(ring);
|
||||
scc_ring_next(ring, tok, ok);
|
||||
TEST_CHECK(ok == true);
|
||||
TEST_CHECK(tok.id == 2);
|
||||
free_token(&tok);
|
||||
|
||||
scc_ring_next(ring, tok, ok);
|
||||
TEST_CHECK(ok == true);
|
||||
TEST_CHECK(tok.id == 3);
|
||||
free_token(&tok);
|
||||
scc_ring_peek(ring, tok, ok);
|
||||
TEST_CHECK(ok == false); // 再次满
|
||||
|
||||
scc_ring_free(ring);
|
||||
}
|
||||
|
||||
/* ==================== 测试列表 ==================== */
|
||||
TEST_LIST = {{"test_char_ring_basic", test_char_ring_basic},
|
||||
{"test_char_ring_full", test_char_ring_full},
|
||||
{"test_char_ring_eof", test_char_ring_eof},
|
||||
{"test_char_ring_back_boundary", test_char_ring_back_boundary},
|
||||
{"test_char_ring_consume_reset", test_char_ring_consume_reset},
|
||||
{"test_char_ring_wrap", test_char_ring_wrap},
|
||||
{"test_token_ring_basic", test_token_ring_basic},
|
||||
{"test_token_ring_full", test_token_ring_full},
|
||||
{NULL, NULL}};
|
||||
Reference in New Issue
Block a user