feat(lex_parser, pprocessor): replace consume with next and remove stream resets

- Replace `scc_probe_stream_consume` with `scc_probe_stream_next` for consistent stream advancement
- Remove redundant `scc_probe_stream_reset` calls before peeking, as `next` and `peek` handle state
- Update `scc_cstring_new` to `scc_cstring_create` and `scc_pos_init` to `scc_pos_create` for naming consistency
- Change `scc_pp_macro_get` parameter to `const scc_cstring_t*` for better const-correctness
- Improves code clarity and maintains proper stream position tracking
This commit is contained in:
zzy
2025-12-28 10:49:29 +08:00
parent 07f5d9331b
commit 09f4ac8de0
20 changed files with 445 additions and 262 deletions

View File

@@ -77,7 +77,7 @@ static inline int keyword_cmp(const char *name, int len) {
void scc_lexer_init(scc_lexer_t *lexer, scc_probe_stream_t *stream) {
lexer->stream = stream;
lexer->pos = scc_pos_init();
lexer->pos = scc_pos_create();
// FIXME
lexer->pos.name = scc_cstring_copy(&stream->name);
}
@@ -91,7 +91,7 @@ static void parse_line(scc_lexer_t *lexer, scc_lexer_tok_t *token) {
int ch = scc_probe_stream_next(stream);
usize n;
scc_cstring_t str = scc_cstring_new();
scc_cstring_t str = scc_cstring_create();
if (ch == scc_stream_eof) {
LEX_WARN("Unexpected EOF at begin");
@@ -113,13 +113,13 @@ static void parse_line(scc_lexer_t *lexer, scc_lexer_tok_t *token) {
}
}
if (scc_lex_parse_number(lexer->stream, &lexer->pos, &n) == false) {
if (scc_lex_parse_number(stream, &lexer->pos, &n) == false) {
LEX_ERROR("Invalid line number");
goto SKIP_LINE;
}
if (scc_probe_stream_consume(stream) != ' ') {
scc_lex_parse_skip_line(lexer->stream, &lexer->pos);
scc_lex_parse_skip_line(stream, &lexer->pos);
token->loc.line = token->value.n;
}
@@ -127,19 +127,21 @@ static void parse_line(scc_lexer_t *lexer, scc_lexer_tok_t *token) {
LEX_ERROR("Invalid `#` line");
goto SKIP_LINE;
}
if (scc_lex_parse_string(lexer->stream, &lexer->pos, &str) == false) {
if (scc_lex_parse_string(stream, &lexer->pos, &str) == false) {
LEX_ERROR("Invalid filename");
goto SKIP_LINE;
}
scc_lex_parse_skip_line(lexer->stream, &lexer->pos);
scc_lex_parse_skip_line(stream, &lexer->pos);
scc_probe_stream_sync(stream);
token->loc.line = n;
// FIXME memory leak
token->loc.name = scc_cstring_copy(&str);
scc_cstring_free(&str);
return;
SKIP_LINE:
scc_lex_parse_skip_line(lexer->stream, &lexer->pos);
scc_lex_parse_skip_line(stream, &lexer->pos);
scc_probe_stream_sync(stream);
ERR:
set_err_token(token);
scc_cstring_free(&str);
@@ -212,11 +214,15 @@ void scc_lexer_get_token(scc_lexer_t *lexer, scc_lexer_tok_t *token) {
type = SCC_TOK_ASSIGN_DIV;
goto double_char;
case '/':
scc_lex_parse_skip_line(lexer->stream, &lexer->pos);
scc_probe_stream_reset(stream);
scc_lex_parse_skip_line(stream, &lexer->pos);
scc_probe_stream_sync(stream);
token->type = SCC_TOK_LINE_COMMENT;
goto END;
case '*':
scc_lex_parse_skip_block_comment(lexer->stream, &lexer->pos);
scc_probe_stream_reset(stream);
scc_lex_parse_skip_block_comment(stream, &lexer->pos);
scc_probe_stream_sync(stream);
token->type = SCC_TOK_BLOCK_COMMENT;
goto END;
default:
@@ -369,7 +375,8 @@ void scc_lexer_get_token(scc_lexer_t *lexer, scc_lexer_tok_t *token) {
break;
case '\r':
case '\n':
scc_lex_parse_skip_endline(lexer->stream, &lexer->pos);
scc_lex_parse_skip_endline(stream, &lexer->pos);
scc_probe_stream_sync(stream);
token->type = SCC_TOK_BLANK;
goto END;
case '#':
@@ -384,7 +391,9 @@ void scc_lexer_get_token(scc_lexer_t *lexer, scc_lexer_tok_t *token) {
case '\'': {
token->loc = lexer->pos;
token->type = SCC_TOK_CHAR_LITERAL;
int ch = scc_lex_parse_char(lexer->stream, &lexer->pos);
scc_probe_stream_reset(stream);
int ch = scc_lex_parse_char(stream, &lexer->pos);
scc_probe_stream_sync(stream);
if (ch == scc_stream_eof) {
LEX_ERROR("Unexpected character literal");
token->type = SCC_TOK_UNKNOWN;
@@ -396,8 +405,10 @@ void scc_lexer_get_token(scc_lexer_t *lexer, scc_lexer_tok_t *token) {
case '"': {
token->loc = lexer->pos;
token->type = SCC_TOK_STRING_LITERAL;
scc_cstring_t output = scc_cstring_new();
if (scc_lex_parse_string(lexer->stream, &lexer->pos, &output) == true) {
scc_cstring_t output = scc_cstring_create();
scc_probe_stream_reset(stream);
if (scc_lex_parse_string(stream, &lexer->pos, &output) == true) {
scc_probe_stream_sync(stream);
token->value.cstr.data = scc_cstring_as_cstr(&output);
token->value.cstr.len = scc_cstring_len(&output);
} else {
@@ -414,7 +425,9 @@ void scc_lexer_get_token(scc_lexer_t *lexer, scc_lexer_tok_t *token) {
token->loc = lexer->pos;
token->type = SCC_TOK_INT_LITERAL;
usize output;
if (scc_lex_parse_number(lexer->stream, &lexer->pos, &output) == true) {
scc_probe_stream_reset(stream);
if (scc_lex_parse_number(stream, &lexer->pos, &output) == true) {
scc_probe_stream_sync(stream);
token->value.n = output;
} else {
LEX_ERROR("Unexpected number literal");
@@ -431,8 +444,10 @@ void scc_lexer_get_token(scc_lexer_t *lexer, scc_lexer_tok_t *token) {
case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
case 'V': case 'W': case 'X': case 'Y': case 'Z': case '_':
/* clang-format on */
scc_cstring_t str = scc_cstring_new();
cbool ret = scc_lex_parse_identifier(lexer->stream, &lexer->pos, &str);
scc_cstring_t str = scc_cstring_create();
scc_probe_stream_reset(stream);
cbool ret = scc_lex_parse_identifier(stream, &lexer->pos, &str);
scc_probe_stream_sync(stream);
Assert(ret == true);
int res = keyword_cmp(scc_cstring_as_cstr(&str), scc_cstring_len(&str));

View File

@@ -28,7 +28,8 @@ int main(int argc, char *argv[]) {
} else {
// FIXME it is a hack lexer_logger
log_set_level(&__smcc_lexer_log, LOG_LEVEL_NOTSET);
log_set_level(NULL, LOG_LEVEL_INFO | LOG_LEVEL_WARN | LOG_LEVEL_ERROR);
log_set_level(NULL, LOG_LEVEL_INFO | LOG_LEVEL_WARN | LOG_LEVEL_ERROR |
LOG_LEVEL_FATAL);
}
const char *file_name = __FILE__;