Files
scc/libs/lexer/tests/test_lexer.c
zzy 27a87d17ab feat(lexer): 改进预处理器token测试用例并修复##符号处理
- 将"##" token从SCC_TOK_SHARP修正为SCC_TOK_SHARP_SHARP
- 添加更多预处理器指令测试用例,包括宏定义、错误和警告指令
- 修正序列测试中的##符号处理

fix(pproc): 完善预处理器指令处理逻辑

- 实现#error和#warning指令的具体处理逻辑
- 添加对字符串字面量的错误和警告消息输出
- 优化未处理指令的错误处理流程

fix(pproc): 修复词法分析器流处理边界条件

- 在scc_pproc.c中添加对token获取失败的检查
- 防止在流结束时出现未处理的边界情况
2026-02-19 12:14:56 +08:00

414 lines
17 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
// test_lexer.c
#include <scc_lexer.h>
#include <string.h>
#include <utest/acutest.h>
// 辅助函数:释放 token 的 lexeme
static void free_token(scc_lexer_tok_t *tok) { scc_cstring_free(&tok->lexeme); }
// 单 token 测试宏(检查类型)
#define TEST_TOKEN(input, expected_type) \
do { \
scc_lexer_t lexer; \
scc_lexer_tok_t token; \
scc_sstream_t stream; \
scc_sstream_init_by_buffer(&stream, input, strlen(input), 0, 16); \
scc_sstream_ring_t *ref = scc_sstream_to_ring(&stream); \
scc_lexer_init(&lexer, ref); \
scc_lexer_get_token(&lexer, &token); \
\
TEST_CHECK(token.type == expected_type); \
TEST_MSG("Input: '%s'", input); \
TEST_MSG("Expected: %s", scc_get_tok_name(expected_type)); \
TEST_MSG("Got: %s", scc_get_tok_name(token.type)); \
\
free_token(&token); \
scc_sstream_drop_ring(ref); \
scc_sstream_drop(&stream); \
} while (0)
// 多 token 序列测试宏(接受类型数组)
#define TEST_SEQUENCE(input, ...) \
do { \
scc_lexer_t lexer; \
scc_lexer_tok_t token; \
scc_sstream_t stream; \
scc_sstream_init_by_buffer(&stream, input, strlen(input), 0, 16); \
scc_sstream_ring_t *ref = scc_sstream_to_ring(&stream); \
scc_lexer_init(&lexer, ref); \
\
scc_tok_type_t expected[] = {__VA_ARGS__}; \
size_t count = sizeof(expected) / sizeof(expected[0]); \
for (size_t i = 0; i < count; i++) { \
scc_lexer_get_token(&lexer, &token); \
TEST_CHECK(token.type == expected[i]); \
TEST_MSG("Token %zu: input '%s'", i, input); \
TEST_MSG("Expected: %s", scc_get_tok_name(expected[i])); \
TEST_MSG("Got: %s", scc_get_tok_name(token.type)); \
free_token(&token); \
} \
\
scc_sstream_drop_ring(ref); \
scc_sstream_drop(&stream); \
} while (0)
// ============================ 测试用例 ============================
void test_operators() {
TEST_CASE("Arithmetic operators");
TEST_TOKEN("+", SCC_TOK_ADD);
TEST_TOKEN("++", SCC_TOK_ADD_ADD);
TEST_TOKEN("+=", SCC_TOK_ASSIGN_ADD);
TEST_TOKEN("-", SCC_TOK_SUB);
TEST_TOKEN("--", SCC_TOK_SUB_SUB);
TEST_TOKEN("-=", SCC_TOK_ASSIGN_SUB);
TEST_TOKEN("*", SCC_TOK_MUL);
TEST_TOKEN("*=", SCC_TOK_ASSIGN_MUL);
TEST_TOKEN("/", SCC_TOK_DIV);
TEST_TOKEN("/=", SCC_TOK_ASSIGN_DIV);
TEST_TOKEN("%", SCC_TOK_MOD);
TEST_TOKEN("%=", SCC_TOK_ASSIGN_MOD);
TEST_CASE("Bitwise operators");
TEST_TOKEN("&", SCC_TOK_AND);
TEST_TOKEN("&&", SCC_TOK_AND_AND);
TEST_TOKEN("&=", SCC_TOK_ASSIGN_AND);
TEST_TOKEN("|", SCC_TOK_OR);
TEST_TOKEN("||", SCC_TOK_OR_OR);
TEST_TOKEN("|=", SCC_TOK_ASSIGN_OR);
TEST_TOKEN("^", SCC_TOK_XOR);
TEST_TOKEN("^=", SCC_TOK_ASSIGN_XOR);
TEST_TOKEN("~", SCC_TOK_BIT_NOT);
TEST_TOKEN("<<", SCC_TOK_L_SH);
TEST_TOKEN("<<=", SCC_TOK_ASSIGN_L_SH);
TEST_TOKEN(">>", SCC_TOK_R_SH);
TEST_TOKEN(">>=", SCC_TOK_ASSIGN_R_SH);
TEST_CASE("Comparison operators");
TEST_TOKEN("==", SCC_TOK_EQ);
TEST_TOKEN("!=", SCC_TOK_NEQ);
TEST_TOKEN("<", SCC_TOK_LT);
TEST_TOKEN("<=", SCC_TOK_LE);
TEST_TOKEN(">", SCC_TOK_GT);
TEST_TOKEN(">=", SCC_TOK_GE);
TEST_CASE("Special symbols");
TEST_TOKEN("(", SCC_TOK_L_PAREN);
TEST_TOKEN(")", SCC_TOK_R_PAREN);
TEST_TOKEN("[", SCC_TOK_L_BRACKET);
TEST_TOKEN("]", SCC_TOK_R_BRACKET);
TEST_TOKEN("{", SCC_TOK_L_BRACE);
TEST_TOKEN("}", SCC_TOK_R_BRACE);
TEST_TOKEN(";", SCC_TOK_SEMICOLON);
TEST_TOKEN(",", SCC_TOK_COMMA);
TEST_TOKEN(":", SCC_TOK_COLON);
TEST_TOKEN(".", SCC_TOK_DOT);
TEST_TOKEN("...", SCC_TOK_ELLIPSIS);
TEST_TOKEN("->", SCC_TOK_DEREF);
TEST_TOKEN("?", SCC_TOK_COND);
}
void test_keywords() {
TEST_CASE("C89 keywords");
TEST_TOKEN("while", SCC_TOK_WHILE);
TEST_TOKEN("sizeof", SCC_TOK_SIZEOF);
TEST_TOKEN("if", SCC_TOK_IF);
TEST_TOKEN("else", SCC_TOK_ELSE);
TEST_TOKEN("for", SCC_TOK_FOR);
TEST_TOKEN("do", SCC_TOK_DO);
TEST_TOKEN("switch", SCC_TOK_SWITCH);
TEST_TOKEN("case", SCC_TOK_CASE);
TEST_TOKEN("default", SCC_TOK_DEFAULT);
TEST_TOKEN("break", SCC_TOK_BREAK);
TEST_TOKEN("continue", SCC_TOK_CONTINUE);
TEST_TOKEN("return", SCC_TOK_RETURN);
TEST_TOKEN("goto", SCC_TOK_GOTO);
TEST_TOKEN("auto", SCC_TOK_AUTO);
TEST_TOKEN("register", SCC_TOK_REGISTER);
TEST_TOKEN("static", SCC_TOK_STATIC);
TEST_TOKEN("extern", SCC_TOK_EXTERN);
TEST_TOKEN("typedef", SCC_TOK_TYPEDEF);
TEST_TOKEN("const", SCC_TOK_CONST);
TEST_TOKEN("volatile", SCC_TOK_VOLATILE);
TEST_TOKEN("signed", SCC_TOK_SIGNED);
TEST_TOKEN("unsigned", SCC_TOK_UNSIGNED);
TEST_TOKEN("short", SCC_TOK_SHORT);
TEST_TOKEN("long", SCC_TOK_LONG);
TEST_TOKEN("int", SCC_TOK_INT);
TEST_TOKEN("char", SCC_TOK_CHAR);
TEST_TOKEN("float", SCC_TOK_FLOAT);
TEST_TOKEN("double", SCC_TOK_DOUBLE);
TEST_TOKEN("void", SCC_TOK_VOID);
TEST_TOKEN("struct", SCC_TOK_STRUCT);
TEST_TOKEN("union", SCC_TOK_UNION);
TEST_TOKEN("enum", SCC_TOK_ENUM);
TEST_CASE("C99 keywords");
TEST_TOKEN("inline", SCC_TOK_INLINE);
TEST_TOKEN("restrict", SCC_TOK_RESTRICT);
// _Bool, _Complex, _Imaginary 可根据需要添加
TEST_CASE("SCC extensions (if enabled)");
TEST_TOKEN("asm", SCC_TOK_ASM);
TEST_TOKEN("atomic", SCC_TOK_ATOMIC);
TEST_TOKEN("bool", SCC_TOK_BOOL);
TEST_TOKEN("complex", SCC_TOK_COMPLEX);
}
void test_literals() {
TEST_CASE("Integer literals - decimal");
TEST_TOKEN("0", SCC_TOK_INT_LITERAL);
TEST_TOKEN("123", SCC_TOK_INT_LITERAL);
TEST_TOKEN("2147483647", SCC_TOK_INT_LITERAL);
TEST_TOKEN("4294967295", SCC_TOK_INT_LITERAL);
TEST_CASE("Integer literals - hexadecimal");
TEST_TOKEN("0x0", SCC_TOK_INT_LITERAL);
TEST_TOKEN("0x1A3F", SCC_TOK_INT_LITERAL);
TEST_TOKEN("0XABCDEF", SCC_TOK_INT_LITERAL);
TEST_TOKEN("0x123abc", SCC_TOK_INT_LITERAL);
TEST_TOKEN("0XFF", SCC_TOK_INT_LITERAL);
TEST_CASE("Integer literals - octal");
TEST_TOKEN("0123", SCC_TOK_INT_LITERAL);
TEST_TOKEN("0777", SCC_TOK_INT_LITERAL);
TEST_TOKEN("0", SCC_TOK_INT_LITERAL); // 0 既是十进制也是八进制
TEST_CASE("Integer literals - binary (C23 extension)");
TEST_TOKEN("0b1010", SCC_TOK_INT_LITERAL);
TEST_TOKEN("0B1100", SCC_TOK_INT_LITERAL);
TEST_TOKEN("0b0", SCC_TOK_INT_LITERAL);
TEST_CASE("Integer literals with suffixes");
TEST_TOKEN("123U", SCC_TOK_INT_LITERAL);
TEST_TOKEN("456L", SCC_TOK_INT_LITERAL);
TEST_TOKEN("789UL", SCC_TOK_INT_LITERAL);
TEST_TOKEN("0x1FFLL", SCC_TOK_INT_LITERAL);
TEST_TOKEN("0b1010ULL", SCC_TOK_INT_LITERAL);
TEST_CASE("Floating literals - decimal");
TEST_TOKEN("0.0", SCC_TOK_FLOAT_LITERAL);
TEST_TOKEN("3.14", SCC_TOK_FLOAT_LITERAL);
TEST_TOKEN(".5", SCC_TOK_FLOAT_LITERAL);
TEST_TOKEN("0.", SCC_TOK_FLOAT_LITERAL);
TEST_CASE("Floating literals - scientific");
TEST_TOKEN("1e10", SCC_TOK_FLOAT_LITERAL);
TEST_TOKEN("1E-5", SCC_TOK_FLOAT_LITERAL);
TEST_TOKEN("2.5e+3", SCC_TOK_FLOAT_LITERAL);
TEST_TOKEN(".1e2", SCC_TOK_FLOAT_LITERAL);
TEST_TOKEN("1.e3", SCC_TOK_FLOAT_LITERAL);
TEST_TOKEN("123.456e-7", SCC_TOK_FLOAT_LITERAL);
TEST_CASE("Floating literals - hexadecimal (C99)");
TEST_TOKEN("0x1.2p3", SCC_TOK_FLOAT_LITERAL);
TEST_TOKEN("0x1p-2", SCC_TOK_FLOAT_LITERAL);
TEST_TOKEN("0x0.1p10", SCC_TOK_FLOAT_LITERAL);
TEST_TOKEN("0X1.2P3", SCC_TOK_FLOAT_LITERAL);
TEST_CASE("Floating literals with suffixes");
TEST_TOKEN("1.0f", SCC_TOK_FLOAT_LITERAL);
TEST_TOKEN("2.0F", SCC_TOK_FLOAT_LITERAL);
TEST_TOKEN("3.0l", SCC_TOK_FLOAT_LITERAL);
TEST_TOKEN("4.0L", SCC_TOK_FLOAT_LITERAL);
TEST_CASE("Character literals - simple");
TEST_TOKEN("'a'", SCC_TOK_CHAR_LITERAL);
TEST_TOKEN("'0'", SCC_TOK_CHAR_LITERAL);
TEST_TOKEN("' '", SCC_TOK_CHAR_LITERAL);
TEST_TOKEN("'\t'", SCC_TOK_CHAR_LITERAL); // 制表符在单引号内
TEST_CASE("Character literals - escape sequences");
TEST_TOKEN("'\\n'", SCC_TOK_CHAR_LITERAL);
TEST_TOKEN("'\\t'", SCC_TOK_CHAR_LITERAL);
TEST_TOKEN("'\\\\'", SCC_TOK_CHAR_LITERAL);
TEST_TOKEN("'\\''", SCC_TOK_CHAR_LITERAL);
TEST_TOKEN("'\\\"'", SCC_TOK_CHAR_LITERAL);
TEST_TOKEN("'\\?'", SCC_TOK_CHAR_LITERAL);
TEST_TOKEN("'\\0'", SCC_TOK_CHAR_LITERAL);
TEST_TOKEN("'\\123'", SCC_TOK_CHAR_LITERAL); // 八进制
TEST_TOKEN("'\\xAB'", SCC_TOK_CHAR_LITERAL); // 十六进制
TEST_CASE("Character literals - multi-byte (implementation defined)");
TEST_TOKEN("'ab'", SCC_TOK_CHAR_LITERAL);
TEST_TOKEN("'\\x41\\x42'", SCC_TOK_CHAR_LITERAL); // 多个转义
TEST_CASE("String literals - basic");
TEST_TOKEN("\"hello\"", SCC_TOK_STRING_LITERAL);
TEST_TOKEN("\"\"", SCC_TOK_STRING_LITERAL);
TEST_TOKEN("\"a b c\"", SCC_TOK_STRING_LITERAL);
TEST_CASE("String literals - escape sequences");
TEST_TOKEN("\"a\\nb\\tc\"", SCC_TOK_STRING_LITERAL);
TEST_TOKEN("\"\\\\ \\\" \\' \\?\"", SCC_TOK_STRING_LITERAL);
TEST_TOKEN("\"\\123\\xAB\"", SCC_TOK_STRING_LITERAL);
TEST_CASE("String literals - wide and UTF-8 prefixes (C11)");
TEST_TOKEN("L\"wide\"", SCC_TOK_STRING_LITERAL);
TEST_TOKEN("u\"utf16\"", SCC_TOK_STRING_LITERAL);
TEST_TOKEN("U\"utf32\"", SCC_TOK_STRING_LITERAL);
TEST_TOKEN("u8\"utf8\"", SCC_TOK_STRING_LITERAL);
}
void test_whitespace() {
TEST_CASE("Whitespace characters");
TEST_TOKEN(" ", SCC_TOK_BLANK);
TEST_TOKEN("\t", SCC_TOK_BLANK);
TEST_TOKEN("\v", SCC_TOK_BLANK);
TEST_TOKEN("\f", SCC_TOK_BLANK);
TEST_TOKEN(" \t\v\f", SCC_TOK_BLANK); // 连续空白应为一个 token
}
void test_newlines() {
TEST_CASE("Newline characters");
TEST_TOKEN("\n", SCC_TOK_ENDLINE);
TEST_TOKEN("\r", SCC_TOK_ENDLINE);
TEST_TOKEN("\r\n", SCC_TOK_ENDLINE); // 应视为单个换行符
}
void test_comments() {
TEST_CASE("Line comments");
TEST_TOKEN("// single line comment", SCC_TOK_LINE_COMMENT);
TEST_TOKEN("// comment with // inside", SCC_TOK_LINE_COMMENT);
TEST_TOKEN("// comment at end", SCC_TOK_LINE_COMMENT);
TEST_CASE("Block comments");
TEST_TOKEN("/* simple */", SCC_TOK_BLOCK_COMMENT);
TEST_TOKEN("/* multi\nline */", SCC_TOK_BLOCK_COMMENT);
TEST_TOKEN("/**/", SCC_TOK_BLOCK_COMMENT); // 空注释
TEST_TOKEN("/* with * inside */", SCC_TOK_BLOCK_COMMENT);
TEST_TOKEN("/* nested /* not allowed in C */",
SCC_TOK_BLOCK_COMMENT); // 词法上不会嵌套
}
void test_identifiers() {
TEST_CASE("Valid identifiers");
TEST_TOKEN("foo", SCC_TOK_IDENT);
TEST_TOKEN("_foo", SCC_TOK_IDENT);
TEST_TOKEN("foo123", SCC_TOK_IDENT);
TEST_TOKEN("foo_bar", SCC_TOK_IDENT);
TEST_TOKEN("FOO", SCC_TOK_IDENT);
TEST_TOKEN("_", SCC_TOK_IDENT);
TEST_TOKEN("__LINE__", SCC_TOK_IDENT); // 预处理宏名也是标识符
// 超长标识符(假设缓冲区足够)
char long_id[1024];
memset(long_id, 'a', sizeof(long_id) - 1);
long_id[sizeof(long_id) - 1] = '\0';
TEST_TOKEN(long_id, SCC_TOK_IDENT);
}
void test_preprocessor() {
TEST_CASE("Preprocessor directives - just the # token");
TEST_TOKEN("#", SCC_TOK_SHARP);
TEST_TOKEN("##", SCC_TOK_SHARP_SHARP);
// 多 token 序列测试 #include 等
TEST_SEQUENCE("#include <stdio.h>", SCC_TOK_SHARP, SCC_TOK_IDENT,
SCC_TOK_BLANK, SCC_TOK_LT, SCC_TOK_IDENT, SCC_TOK_DOT,
SCC_TOK_IDENT, SCC_TOK_GT);
TEST_SEQUENCE("#define FOO 123", SCC_TOK_SHARP, SCC_TOK_IDENT,
SCC_TOK_BLANK, SCC_TOK_IDENT, SCC_TOK_BLANK,
SCC_TOK_INT_LITERAL);
TEST_SEQUENCE("#define FOO(x) x + 1", SCC_TOK_SHARP, SCC_TOK_IDENT,
SCC_TOK_BLANK, SCC_TOK_IDENT, SCC_TOK_L_PAREN, SCC_TOK_IDENT,
SCC_TOK_R_PAREN, SCC_TOK_BLANK, SCC_TOK_IDENT, SCC_TOK_BLANK,
SCC_TOK_ADD, SCC_TOK_BLANK, SCC_TOK_INT_LITERAL);
TEST_SEQUENCE("#undef FOO", SCC_TOK_SHARP, SCC_TOK_IDENT, SCC_TOK_BLANK,
SCC_TOK_IDENT);
TEST_SEQUENCE("#error \"This is an error\"", SCC_TOK_SHARP, SCC_TOK_IDENT,
SCC_TOK_BLANK, SCC_TOK_STRING_LITERAL);
TEST_SEQUENCE("#warning \"This is an warning\"\n", SCC_TOK_SHARP,
SCC_TOK_IDENT, SCC_TOK_BLANK, SCC_TOK_STRING_LITERAL,
SCC_TOK_ENDLINE);
}
void test_edge_cases() {
TEST_CASE("Invalid characters");
TEST_TOKEN("@", SCC_TOK_UNKNOWN);
TEST_TOKEN("`", SCC_TOK_UNKNOWN);
TEST_TOKEN("$", SCC_TOK_UNKNOWN); // 在 C 中不是标识符字符
TEST_CASE("Empty input");
TEST_TOKEN("", SCC_TOK_EOF); // 立即 EOF
TEST_CASE("Only whitespace");
TEST_TOKEN(" \t", SCC_TOK_BLANK);
// 之后应该为 EOF但我们的单 token 测试只取第一个 token
TEST_CASE("Numbers followed by letters (no suffix)");
// 词法上应拆分为数字和标识符
TEST_SEQUENCE("123abc", SCC_TOK_INT_LITERAL, SCC_TOK_IDENT);
TEST_SEQUENCE("0x123xyz", SCC_TOK_INT_LITERAL, SCC_TOK_IDENT);
}
void test_sequences() {
TEST_CASE("Simple expression");
TEST_SEQUENCE("a + b * c", SCC_TOK_IDENT, SCC_TOK_BLANK, SCC_TOK_ADD,
SCC_TOK_BLANK, SCC_TOK_IDENT, SCC_TOK_BLANK, SCC_TOK_MUL,
SCC_TOK_BLANK, SCC_TOK_IDENT);
TEST_CASE("Function call");
TEST_SEQUENCE("func(1, 2);", SCC_TOK_IDENT, SCC_TOK_L_PAREN,
SCC_TOK_INT_LITERAL, SCC_TOK_COMMA, SCC_TOK_BLANK,
SCC_TOK_INT_LITERAL, SCC_TOK_R_PAREN, SCC_TOK_SEMICOLON);
TEST_CASE("Multi-character operators");
TEST_SEQUENCE(">>=", SCC_TOK_ASSIGN_R_SH);
TEST_SEQUENCE("<<=", SCC_TOK_ASSIGN_L_SH);
TEST_SEQUENCE("...", SCC_TOK_ELLIPSIS);
TEST_SEQUENCE("->", SCC_TOK_DEREF);
TEST_SEQUENCE("##", SCC_TOK_SHARP_SHARP); // 两个预处理记号
TEST_CASE("Comments and whitespace interleaved");
TEST_SEQUENCE("/* comment */ a // line comment\n b", SCC_TOK_BLOCK_COMMENT,
SCC_TOK_BLANK, SCC_TOK_IDENT, SCC_TOK_BLANK,
SCC_TOK_LINE_COMMENT, SCC_TOK_ENDLINE, SCC_TOK_BLANK,
SCC_TOK_IDENT);
TEST_CASE("String literals with escapes");
TEST_SEQUENCE("\"hello\\nworld\"", SCC_TOK_STRING_LITERAL);
TEST_SEQUENCE(
"L\"wide\"",
SCC_TOK_STRING_LITERAL); // 前缀作为标识符?不,整个是字符串字面量
TEST_CASE("Character literals with escapes");
TEST_SEQUENCE("'\\x41'", SCC_TOK_CHAR_LITERAL);
TEST_SEQUENCE("'\\123'", SCC_TOK_CHAR_LITERAL);
}
void test_error_recovery() {
// 测试未闭合的字符字面量:词法分析器可能继续直到遇到换行或 EOF
// 这里假设它会产生一个 SCC_TOK_CHAR_LITERAL 但包含到结束
// 但标准 C 中未闭合是错误,我们可能返回 UNKNOWN
// TEST_CASE("Unterminated character literal");
// TEST_TOKEN("'a", SCC_TOK_UNKNOWN); // 取决于实现,可能为 CHAR_LITERAL
// // 更可靠的测试:序列中下一个 token 是什么
// TEST_SEQUENCE("'a b", SCC_TOK_UNKNOWN,
// SCC_TOK_IDENT); // 假设第一个 token 是错误
// TEST_CASE("Unterminated string literal");
// TEST_TOKEN("\"hello", SCC_TOK_UNKNOWN); // 同样
// TEST_CASE("Unterminated block comment");
// TEST_SEQUENCE("/* comment",
// SCC_TOK_BLOCK_COMMENT); // 直到 EOF可能仍为注释
}
// ============================ 主测试列表 ============================
TEST_LIST = {
{"operators", test_operators},
{"keywords", test_keywords},
{"literals", test_literals},
{"whitespace", test_whitespace},
{"newlines", test_newlines},
{"comments", test_comments},
{"identifiers", test_identifiers},
{"preprocessor", test_preprocessor},
{"edge_cases", test_edge_cases},
{"sequences", test_sequences},
{"error_recovery", test_error_recovery},
{NULL, NULL},
};