fix(lexer): 修复词法分析器中的关键字比较与字符串处理逻辑

修正了关键字表的注释,明确要求其必须按字典序排列以确保二分查找正确性。
在词法分析过程中,修复标识符解析时对 `cstring` 的使用问题,并调整 token 类型赋值顺序,
避免潜在的未定义行为。同时新增测试文件用于验证操作符、关键字及各类字面量的识别准确性,
并更新测试运行器的日志级别控制参数。
This commit is contained in:
zzy
2025-11-20 22:49:22 +08:00
parent f29fd92fdf
commit 164bab0f13
5 changed files with 181 additions and 7 deletions

View File

@@ -693,6 +693,8 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
// LEX_ERROR("unsupport wide-character char literal by `L` format");
// }
cstring_t str = cstring_new();
cstring_push(&str, stream_next_char(stream));
lexer_next_pos(lexer);
while (1) {
ch = stream_peek_char(stream);
if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') ||
@@ -705,16 +707,17 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
break;
}
int res = keyword_cmp((const char *)str.data, str.size - 1);
int res = keyword_cmp(cstring_as_cstr(&str), cstring_len(&str));
if (res == -1) {
token->value.cstr.data = (char *)cstring_as_cstr(&str);
token->value.cstr.len = cstring_len(&str);
type = TOKEN_IDENT;
break;
} else {
cstring_free(&str);
type = keywords[res].tok;
break;
}
token->type = type;
goto END;
default:
LEX_ERROR("unsupport char in sourse code `%c`", ch);
break;