feat 重构stream流API并适配lex_parse和lexer

This commit is contained in:
zzy
2025-12-08 23:04:11 +08:00
parent 1ab07a5815
commit 36bff64a91
17 changed files with 402 additions and 244 deletions

View File

@@ -75,7 +75,7 @@ static inline int keyword_cmp(const char *name, int len) {
return -1; // Not a keyword.
}
void lexer_init(smcc_lexer_t *lexer, core_stream_t *stream) {
void lexer_init(smcc_lexer_t *lexer, core_probe_stream_t *stream) {
lexer->stream = stream;
lexer->pos = core_pos_init();
// FIXME
@@ -86,9 +86,9 @@ void lexer_init(smcc_lexer_t *lexer, core_stream_t *stream) {
static void parse_line(smcc_lexer_t *lexer, lexer_tok_t *token) {
token->loc = lexer->pos;
core_stream_t *stream = lexer->stream;
core_stream_reset_char(stream);
int ch = core_stream_peek_char(stream);
core_probe_stream_t *stream = lexer->stream;
core_probe_stream_reset(stream);
int ch = core_probe_stream_next(stream);
usize n;
cstring_t str = cstring_new();
@@ -104,7 +104,7 @@ static void parse_line(smcc_lexer_t *lexer, lexer_tok_t *token) {
const char line[] = "line";
for (int i = 0; i < (int)sizeof(line); i++) {
ch = core_stream_next_char(stream);
ch = core_probe_stream_consume(stream);
core_pos_next(&lexer->pos);
if (ch != line[i]) {
LEX_WARN("Maroc does not support in lexer rather in preprocessor, "
@@ -118,12 +118,12 @@ static void parse_line(smcc_lexer_t *lexer, lexer_tok_t *token) {
goto SKIP_LINE;
}
if (core_stream_next_char(stream) != ' ') {
if (core_probe_stream_consume(stream) != ' ') {
lex_parse_skip_line(lexer->stream, &lexer->pos);
token->loc.line = token->value.n;
}
if (core_stream_peek_char(stream) != '"') {
if (core_probe_stream_next(stream) != '"') {
LEX_ERROR("Invalid `#` line");
goto SKIP_LINE;
}
@@ -149,26 +149,26 @@ ERR:
void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
token->loc = lexer->pos;
token->type = TOKEN_UNKNOWN;
core_stream_t *stream = lexer->stream;
core_probe_stream_t *stream = lexer->stream;
core_stream_reset_char(stream);
core_probe_stream_reset(stream);
token_type_t type = TOKEN_UNKNOWN;
int ch = core_stream_peek_char(stream);
int ch = core_probe_stream_next(stream);
// once step
switch (ch) {
case '=':
switch (core_stream_peek_char(stream)) {
switch (core_probe_stream_next(stream)) {
case '=':
type = TOKEN_EQ;
goto double_char;
default:
core_stream_reset_char(stream), type = TOKEN_ASSIGN;
core_probe_stream_reset(stream), type = TOKEN_ASSIGN;
break;
}
break;
case '+':
switch (core_stream_peek_char(stream)) {
switch (core_probe_stream_next(stream)) {
case '+':
type = TOKEN_ADD_ADD;
goto double_char;
@@ -176,12 +176,12 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
type = TOKEN_ASSIGN_ADD;
goto double_char;
default:
core_stream_reset_char(stream), type = TOKEN_ADD;
core_probe_stream_reset(stream), type = TOKEN_ADD;
break;
}
break;
case '-':
switch (core_stream_peek_char(stream)) {
switch (core_probe_stream_next(stream)) {
case '-':
type = TOKEN_SUB_SUB;
goto double_char;
@@ -192,22 +192,22 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
type = TOKEN_DEREF;
goto double_char;
default:
core_stream_reset_char(stream), type = TOKEN_SUB;
core_probe_stream_reset(stream), type = TOKEN_SUB;
break;
}
break;
case '*':
switch (core_stream_peek_char(stream)) {
switch (core_probe_stream_next(stream)) {
case '=':
type = TOKEN_ASSIGN_MUL;
goto double_char;
default:
core_stream_reset_char(stream), type = TOKEN_MUL;
core_probe_stream_reset(stream), type = TOKEN_MUL;
break;
}
break;
case '/':
switch (core_stream_peek_char(stream)) {
switch (core_probe_stream_next(stream)) {
case '=':
type = TOKEN_ASSIGN_DIV;
goto double_char;
@@ -220,22 +220,22 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
token->type = TOKEN_BLOCK_COMMENT;
goto END;
default:
core_stream_reset_char(stream), type = TOKEN_DIV;
core_probe_stream_reset(stream), type = TOKEN_DIV;
break;
}
break;
case '%':
switch (core_stream_peek_char(stream)) {
switch (core_probe_stream_next(stream)) {
case '=':
type = TOKEN_ASSIGN_MOD;
goto double_char;
default:
core_stream_reset_char(stream), type = TOKEN_MOD;
core_probe_stream_reset(stream), type = TOKEN_MOD;
break;
}
break;
case '&':
switch (core_stream_peek_char(stream)) {
switch (core_probe_stream_next(stream)) {
case '&':
type = TOKEN_AND_AND;
goto double_char;
@@ -243,12 +243,12 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
type = TOKEN_ASSIGN_AND;
goto double_char;
default:
core_stream_reset_char(stream), type = TOKEN_AND;
core_probe_stream_reset(stream), type = TOKEN_AND;
break;
}
break;
case '|':
switch (core_stream_peek_char(stream)) {
switch (core_probe_stream_next(stream)) {
case '|':
type = TOKEN_OR_OR;
goto double_char;
@@ -256,27 +256,27 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
type = TOKEN_ASSIGN_OR;
goto double_char;
default:
core_stream_reset_char(stream), type = TOKEN_OR;
core_probe_stream_reset(stream), type = TOKEN_OR;
break;
}
break;
case '^':
switch (core_stream_peek_char(stream)) {
switch (core_probe_stream_next(stream)) {
case '=':
type = TOKEN_ASSIGN_XOR;
goto double_char;
default:
core_stream_reset_char(stream), type = TOKEN_XOR;
core_probe_stream_reset(stream), type = TOKEN_XOR;
break;
}
break;
case '<':
switch (core_stream_peek_char(stream)) {
switch (core_probe_stream_next(stream)) {
case '=':
type = TOKEN_LE;
goto double_char;
case '<': {
if (core_stream_peek_char(stream) == '=') {
if (core_probe_stream_next(stream) == '=') {
type = TOKEN_ASSIGN_L_SH;
goto triple_char;
} else {
@@ -286,17 +286,17 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
break;
}
default:
core_stream_reset_char(stream), type = TOKEN_LT;
core_probe_stream_reset(stream), type = TOKEN_LT;
break;
}
break;
case '>':
switch (core_stream_peek_char(stream)) {
switch (core_probe_stream_next(stream)) {
case '=':
type = TOKEN_GE;
goto double_char;
case '>': {
if (core_stream_peek_char(stream) == '=') {
if (core_probe_stream_next(stream) == '=') {
type = TOKEN_ASSIGN_R_SH;
goto triple_char;
} else {
@@ -306,7 +306,7 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
break;
}
default:
core_stream_reset_char(stream), type = TOKEN_GT;
core_probe_stream_reset(stream), type = TOKEN_GT;
break;
}
break;
@@ -314,12 +314,12 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
type = TOKEN_BIT_NOT;
break;
case '!':
switch (core_stream_peek_char(stream)) {
switch (core_probe_stream_next(stream)) {
case '=':
type = TOKEN_NEQ;
goto double_char;
default:
core_stream_reset_char(stream), type = TOKEN_NOT;
core_probe_stream_reset(stream), type = TOKEN_NOT;
break;
}
break;
@@ -351,8 +351,8 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
type = TOKEN_COLON;
break;
case '.':
if (core_stream_peek_char(stream) == '.' &&
core_stream_peek_char(stream) == '.') {
if (core_probe_stream_next(stream) == '.' &&
core_probe_stream_next(stream) == '.') {
type = TOKEN_ELLIPSIS;
goto triple_char;
}
@@ -452,13 +452,13 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
}
goto once_char;
triple_char:
core_stream_next_char(stream);
core_probe_stream_consume(stream);
core_pos_next(&lexer->pos);
double_char:
core_stream_next_char(stream);
core_probe_stream_consume(stream);
core_pos_next(&lexer->pos);
once_char:
core_stream_next_char(stream);
core_probe_stream_consume(stream);
core_pos_next(&lexer->pos);
token->type = type;
END: