fix(lexer): 修复词法分析器中的关键字比较与字符串处理逻辑
修正了关键字表的注释,明确要求其必须按字典序排列以确保二分查找正确性。 在词法分析过程中,修复标识符解析时对 `cstring` 的使用问题,并调整 token 类型赋值顺序, 避免潜在的未定义行为。同时新增测试文件用于验证操作符、关键字及各类字面量的识别准确性, 并更新测试运行器的日志级别控制参数。
This commit is contained in:
@@ -693,6 +693,8 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
|
||||
// LEX_ERROR("unsupport wide-character char literal by `L` format");
|
||||
// }
|
||||
cstring_t str = cstring_new();
|
||||
cstring_push(&str, stream_next_char(stream));
|
||||
lexer_next_pos(lexer);
|
||||
while (1) {
|
||||
ch = stream_peek_char(stream);
|
||||
if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') ||
|
||||
@@ -705,16 +707,17 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
|
||||
break;
|
||||
}
|
||||
|
||||
int res = keyword_cmp((const char *)str.data, str.size - 1);
|
||||
int res = keyword_cmp(cstring_as_cstr(&str), cstring_len(&str));
|
||||
if (res == -1) {
|
||||
token->value.cstr.data = (char *)cstring_as_cstr(&str);
|
||||
token->value.cstr.len = cstring_len(&str);
|
||||
type = TOKEN_IDENT;
|
||||
break;
|
||||
} else {
|
||||
cstring_free(&str);
|
||||
type = keywords[res].tok;
|
||||
break;
|
||||
}
|
||||
token->type = type;
|
||||
goto END;
|
||||
default:
|
||||
LEX_ERROR("unsupport char in sourse code `%c`", ch);
|
||||
break;
|
||||
|
||||
Reference in New Issue
Block a user