feat: rename core types to scc prefix for consistency

Updated type names from `core_*` to `scc_*` across lex_parser and stream modules to maintain naming consistency within the SCC codebase. This includes changes to function signatures and internal usage of types like `core_probe_stream_t`, `core_pos_t`, and `cstring_t` to their `scc_*` counterparts.
This commit is contained in:
zzy
2025-12-11 13:00:29 +08:00
parent 35c13ee30a
commit d88fa3b8d3
33 changed files with 741 additions and 745 deletions

View File

@@ -32,11 +32,11 @@ David Hanson / drh@drhanson.net
static const struct {
const char *name;
ckeyword_t std_type;
token_type_t tok;
scc_cstd_t std_type;
scc_tok_type_t tok;
} keywords[] = {
#define X(name, subtype, tok, std_type, ...) {#name, std_type, tok},
KEYWORD_TABLE
SCC_CKEYWORD_TABLE
#undef X
};
@@ -75,23 +75,23 @@ static inline int keyword_cmp(const char *name, int len) {
return -1; // Not a keyword.
}
void lexer_init(smcc_lexer_t *lexer, core_probe_stream_t *stream) {
void scc_lexer_init(scc_lexer_t *lexer, scc_probe_stream_t *stream) {
lexer->stream = stream;
lexer->pos = core_pos_init();
lexer->pos = scc_pos_init();
// FIXME
lexer->pos.name = cstring_from_cstr(cstring_as_cstr(&stream->name));
lexer->pos.name = scc_cstring_from_cstr(scc_cstring_as_cstr(&stream->name));
}
#define set_err_token(token) ((token)->type = TOKEN_UNKNOWN)
#define set_err_token(token) ((token)->type = SCC_TOK_UNKNOWN)
static void parse_line(smcc_lexer_t *lexer, lexer_tok_t *token) {
static void parse_line(scc_lexer_t *lexer, lexer_tok_t *token) {
token->loc = lexer->pos;
core_probe_stream_t *stream = lexer->stream;
core_probe_stream_reset(stream);
int ch = core_probe_stream_next(stream);
scc_probe_stream_t *stream = lexer->stream;
scc_probe_stream_reset(stream);
int ch = scc_probe_stream_next(stream);
usize n;
cstring_t str = cstring_new();
scc_cstring_t str = scc_cstring_new();
if (ch == core_stream_eof) {
LEX_WARN("Unexpected EOF at begin");
@@ -104,7 +104,7 @@ static void parse_line(smcc_lexer_t *lexer, lexer_tok_t *token) {
const char line[] = "line";
for (int i = 0; i < (int)sizeof(line); i++) {
ch = core_probe_stream_consume(stream);
ch = scc_probe_stream_consume(stream);
core_pos_next(&lexer->pos);
if (ch != line[i]) {
LEX_WARN("Maroc does not support in lexer rather in preprocessor, "
@@ -118,12 +118,12 @@ static void parse_line(smcc_lexer_t *lexer, lexer_tok_t *token) {
goto SKIP_LINE;
}
if (core_probe_stream_consume(stream) != ' ') {
if (scc_probe_stream_consume(stream) != ' ') {
lex_parse_skip_line(lexer->stream, &lexer->pos);
token->loc.line = token->value.n;
}
if (core_probe_stream_next(stream) != '"') {
if (scc_probe_stream_next(stream) != '"') {
LEX_ERROR("Invalid `#` line");
goto SKIP_LINE;
}
@@ -135,259 +135,259 @@ static void parse_line(smcc_lexer_t *lexer, lexer_tok_t *token) {
lex_parse_skip_line(lexer->stream, &lexer->pos);
token->loc.line = n;
// FIXME memory leak
token->loc.name = cstring_from_cstr(cstring_as_cstr(&str));
cstring_free(&str);
token->loc.name = scc_cstring_from_cstr(scc_cstring_as_cstr(&str));
scc_cstring_free(&str);
return;
SKIP_LINE:
lex_parse_skip_line(lexer->stream, &lexer->pos);
ERR:
set_err_token(token);
cstring_free(&str);
scc_cstring_free(&str);
}
// /zh/c/language/operator_arithmetic.html
void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
void scc_lexer_get_token(scc_lexer_t *lexer, lexer_tok_t *token) {
token->loc = lexer->pos;
token->type = TOKEN_UNKNOWN;
core_probe_stream_t *stream = lexer->stream;
token->type = SCC_TOK_UNKNOWN;
scc_probe_stream_t *stream = lexer->stream;
core_probe_stream_reset(stream);
token_type_t type = TOKEN_UNKNOWN;
int ch = core_probe_stream_next(stream);
scc_probe_stream_reset(stream);
scc_tok_type_t type = SCC_TOK_UNKNOWN;
int ch = scc_probe_stream_next(stream);
// once step
switch (ch) {
case '=':
switch (core_probe_stream_next(stream)) {
switch (scc_probe_stream_next(stream)) {
case '=':
type = TOKEN_EQ;
type = SCC_TOK_EQ;
goto double_char;
default:
core_probe_stream_reset(stream), type = TOKEN_ASSIGN;
scc_probe_stream_reset(stream), type = SCC_TOK_ASSIGN;
break;
}
break;
case '+':
switch (core_probe_stream_next(stream)) {
switch (scc_probe_stream_next(stream)) {
case '+':
type = TOKEN_ADD_ADD;
type = SCC_TOK_ADD_ADD;
goto double_char;
case '=':
type = TOKEN_ASSIGN_ADD;
type = SCC_TOK_ASSIGN_ADD;
goto double_char;
default:
core_probe_stream_reset(stream), type = TOKEN_ADD;
scc_probe_stream_reset(stream), type = SCC_TOK_ADD;
break;
}
break;
case '-':
switch (core_probe_stream_next(stream)) {
switch (scc_probe_stream_next(stream)) {
case '-':
type = TOKEN_SUB_SUB;
type = SCC_TOK_SUB_SUB;
goto double_char;
case '=':
type = TOKEN_ASSIGN_SUB;
type = SCC_TOK_ASSIGN_SUB;
goto double_char;
case '>':
type = TOKEN_DEREF;
type = SCC_TOK_DEREF;
goto double_char;
default:
core_probe_stream_reset(stream), type = TOKEN_SUB;
scc_probe_stream_reset(stream), type = SCC_TOK_SUB;
break;
}
break;
case '*':
switch (core_probe_stream_next(stream)) {
switch (scc_probe_stream_next(stream)) {
case '=':
type = TOKEN_ASSIGN_MUL;
type = SCC_TOK_ASSIGN_MUL;
goto double_char;
default:
core_probe_stream_reset(stream), type = TOKEN_MUL;
scc_probe_stream_reset(stream), type = SCC_TOK_MUL;
break;
}
break;
case '/':
switch (core_probe_stream_next(stream)) {
switch (scc_probe_stream_next(stream)) {
case '=':
type = TOKEN_ASSIGN_DIV;
type = SCC_TOK_ASSIGN_DIV;
goto double_char;
case '/':
lex_parse_skip_line(lexer->stream, &lexer->pos);
token->type = TOKEN_LINE_COMMENT;
token->type = SCC_TOK_LINE_COMMENT;
goto END;
case '*':
lex_parse_skip_block_comment(lexer->stream, &lexer->pos);
token->type = TOKEN_BLOCK_COMMENT;
token->type = SCC_TOK_BLOCK_COMMENT;
goto END;
default:
core_probe_stream_reset(stream), type = TOKEN_DIV;
scc_probe_stream_reset(stream), type = SCC_TOK_DIV;
break;
}
break;
case '%':
switch (core_probe_stream_next(stream)) {
switch (scc_probe_stream_next(stream)) {
case '=':
type = TOKEN_ASSIGN_MOD;
type = SCC_TOK_ASSIGN_MOD;
goto double_char;
default:
core_probe_stream_reset(stream), type = TOKEN_MOD;
scc_probe_stream_reset(stream), type = SCC_TOK_MOD;
break;
}
break;
case '&':
switch (core_probe_stream_next(stream)) {
switch (scc_probe_stream_next(stream)) {
case '&':
type = TOKEN_AND_AND;
type = SCC_TOK_AND_AND;
goto double_char;
case '=':
type = TOKEN_ASSIGN_AND;
type = SCC_TOK_ASSIGN_AND;
goto double_char;
default:
core_probe_stream_reset(stream), type = TOKEN_AND;
scc_probe_stream_reset(stream), type = SCC_TOK_AND;
break;
}
break;
case '|':
switch (core_probe_stream_next(stream)) {
switch (scc_probe_stream_next(stream)) {
case '|':
type = TOKEN_OR_OR;
type = SCC_TOK_OR_OR;
goto double_char;
case '=':
type = TOKEN_ASSIGN_OR;
type = SCC_TOK_ASSIGN_OR;
goto double_char;
default:
core_probe_stream_reset(stream), type = TOKEN_OR;
scc_probe_stream_reset(stream), type = SCC_TOK_OR;
break;
}
break;
case '^':
switch (core_probe_stream_next(stream)) {
switch (scc_probe_stream_next(stream)) {
case '=':
type = TOKEN_ASSIGN_XOR;
type = SCC_TOK_ASSIGN_XOR;
goto double_char;
default:
core_probe_stream_reset(stream), type = TOKEN_XOR;
scc_probe_stream_reset(stream), type = SCC_TOK_XOR;
break;
}
break;
case '<':
switch (core_probe_stream_next(stream)) {
switch (scc_probe_stream_next(stream)) {
case '=':
type = TOKEN_LE;
type = SCC_TOK_LE;
goto double_char;
case '<': {
if (core_probe_stream_next(stream) == '=') {
type = TOKEN_ASSIGN_L_SH;
if (scc_probe_stream_next(stream) == '=') {
type = SCC_TOK_ASSIGN_L_SH;
goto triple_char;
} else {
type = TOKEN_L_SH;
type = SCC_TOK_L_SH;
goto double_char;
}
break;
}
default:
core_probe_stream_reset(stream), type = TOKEN_LT;
scc_probe_stream_reset(stream), type = SCC_TOK_LT;
break;
}
break;
case '>':
switch (core_probe_stream_next(stream)) {
switch (scc_probe_stream_next(stream)) {
case '=':
type = TOKEN_GE;
type = SCC_TOK_GE;
goto double_char;
case '>': {
if (core_probe_stream_next(stream) == '=') {
type = TOKEN_ASSIGN_R_SH;
if (scc_probe_stream_next(stream) == '=') {
type = SCC_TOK_ASSIGN_R_SH;
goto triple_char;
} else {
type = TOKEN_R_SH;
type = SCC_TOK_R_SH;
goto double_char;
}
break;
}
default:
core_probe_stream_reset(stream), type = TOKEN_GT;
scc_probe_stream_reset(stream), type = SCC_TOK_GT;
break;
}
break;
case '~':
type = TOKEN_BIT_NOT;
type = SCC_TOK_BIT_NOT;
break;
case '!':
switch (core_probe_stream_next(stream)) {
switch (scc_probe_stream_next(stream)) {
case '=':
type = TOKEN_NEQ;
type = SCC_TOK_NEQ;
goto double_char;
default:
core_probe_stream_reset(stream), type = TOKEN_NOT;
scc_probe_stream_reset(stream), type = SCC_TOK_NOT;
break;
}
break;
case '[':
type = TOKEN_L_BRACKET;
type = SCC_TOK_L_BRACKET;
break;
case ']':
type = TOKEN_R_BRACKET;
type = SCC_TOK_R_BRACKET;
break;
case '(':
type = TOKEN_L_PAREN;
type = SCC_TOK_L_PAREN;
break;
case ')':
type = TOKEN_R_PAREN;
type = SCC_TOK_R_PAREN;
break;
case '{':
type = TOKEN_L_BRACE;
type = SCC_TOK_L_BRACE;
break;
case '}':
type = TOKEN_R_BRACE;
type = SCC_TOK_R_BRACE;
break;
case ';':
type = TOKEN_SEMICOLON;
type = SCC_TOK_SEMICOLON;
break;
case ',':
type = TOKEN_COMMA;
type = SCC_TOK_COMMA;
break;
case ':':
type = TOKEN_COLON;
type = SCC_TOK_COLON;
break;
case '.':
if (core_probe_stream_next(stream) == '.' &&
core_probe_stream_next(stream) == '.') {
type = TOKEN_ELLIPSIS;
if (scc_probe_stream_next(stream) == '.' &&
scc_probe_stream_next(stream) == '.') {
type = SCC_TOK_ELLIPSIS;
goto triple_char;
}
type = TOKEN_DOT;
type = SCC_TOK_DOT;
break;
case '?':
type = TOKEN_COND;
type = SCC_TOK_COND;
break;
case '\v':
case '\f':
case ' ':
case '\t':
type = TOKEN_BLANK;
type = SCC_TOK_BLANK;
break;
case '\r':
case '\n':
lex_parse_skip_endline(lexer->stream, &lexer->pos);
token->type = TOKEN_BLANK;
token->type = SCC_TOK_BLANK;
goto END;
case '#':
parse_line(lexer, token);
token->type = TOKEN_BLANK;
token->type = SCC_TOK_BLANK;
goto END;
case '\0':
case core_stream_eof:
// EOF
type = TOKEN_EOF;
type = SCC_TOK_EOF;
break;
case '\'': {
token->loc = lexer->pos;
token->type = TOKEN_CHAR_LITERAL;
token->type = SCC_TOK_CHAR_LITERAL;
int ch = lex_parse_char(lexer->stream, &lexer->pos);
if (ch == core_stream_eof) {
LEX_ERROR("Unexpected character literal");
token->type = TOKEN_UNKNOWN;
token->type = SCC_TOK_UNKNOWN;
} else {
token->value.ch = ch;
}
@@ -395,14 +395,14 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
}
case '"': {
token->loc = lexer->pos;
token->type = TOKEN_STRING_LITERAL;
cstring_t output = cstring_new();
token->type = SCC_TOK_STRING_LITERAL;
scc_cstring_t output = scc_cstring_new();
if (lex_parse_string(lexer->stream, &lexer->pos, &output) == true) {
token->value.cstr.data = cstring_as_cstr(&output);
token->value.cstr.len = cstring_len(&output);
token->value.cstr.data = scc_cstring_as_cstr(&output);
token->value.cstr.len = scc_cstring_len(&output);
} else {
LEX_ERROR("Unexpected string literal");
token->type = TOKEN_UNKNOWN;
token->type = SCC_TOK_UNKNOWN;
}
goto END;
@@ -412,13 +412,13 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
case '5': case '6': case '7': case '8': case '9':
/* clang-format on */
token->loc = lexer->pos;
token->type = TOKEN_INT_LITERAL;
token->type = SCC_TOK_INT_LITERAL;
usize output;
if (lex_parse_number(lexer->stream, &lexer->pos, &output) == true) {
token->value.n = output;
} else {
LEX_ERROR("Unexpected number literal");
token->type = TOKEN_UNKNOWN;
token->type = SCC_TOK_UNKNOWN;
}
goto END;
/* clang-format off */
@@ -431,17 +431,17 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
case 'V': case 'W': case 'X': case 'Y': case 'Z': case '_':
/* clang-format on */
cstring_t str = cstring_new();
scc_cstring_t str = scc_cstring_new();
cbool ret = lex_parse_identifier(lexer->stream, &lexer->pos, &str);
Assert(ret == true);
int res = keyword_cmp(cstring_as_cstr(&str), cstring_len(&str));
int res = keyword_cmp(scc_cstring_as_cstr(&str), scc_cstring_len(&str));
if (res == -1) {
token->value.cstr.data = (char *)cstring_as_cstr(&str);
token->value.cstr.len = cstring_len(&str);
type = TOKEN_IDENT;
token->value.cstr.data = (char *)scc_cstring_as_cstr(&str);
token->value.cstr.len = scc_cstring_len(&str);
type = SCC_TOK_IDENT;
} else {
cstring_free(&str);
scc_cstring_free(&str);
type = keywords[res].tok;
}
token->type = type;
@@ -452,29 +452,31 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
}
goto once_char;
triple_char:
core_probe_stream_consume(stream);
scc_probe_stream_consume(stream);
core_pos_next(&lexer->pos);
double_char:
core_probe_stream_consume(stream);
scc_probe_stream_consume(stream);
core_pos_next(&lexer->pos);
once_char:
core_probe_stream_consume(stream);
scc_probe_stream_consume(stream);
core_pos_next(&lexer->pos);
token->type = type;
END:
LEX_DEBUG("get token `%s` in %s:%d:%d", get_tok_name(token->type),
LEX_DEBUG("get token `%s` in %s:%d:%d", scc_get_tok_name(token->type),
token->loc.name, token->loc.line, token->loc.column);
}
// lexer_get_token maybe got invalid (with parser)
void lexer_get_valid_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
token_subtype_t type;
// scc_lexer_get_token maybe got invalid (with parser)
void scc_lexer_get_valid_token(scc_lexer_t *lexer, lexer_tok_t *token) {
scc_tok_subtype_t type;
do {
lexer_get_token(lexer, token);
type = get_tok_subtype(token->type);
AssertFmt(type != TK_BASIC_INVALID, "Invalid token: `%s` at %s:%d:%d",
get_tok_name(token->type), token->loc.name, token->loc.line,
token->loc.col);
Assert(type != TK_BASIC_INVALID);
} while (type == TK_BASIC_EMPTYSPACE || type == TK_BASIC_COMMENT);
scc_lexer_get_token(lexer, token);
type = scc_get_tok_subtype(token->type);
AssertFmt(type != SCC_TOK_SUBTYPE_INVALID,
"Invalid token: `%s` at %s:%d:%d",
scc_get_tok_name(token->type), token->loc.name,
token->loc.line, token->loc.col);
Assert(type != SCC_TOK_SUBTYPE_INVALID);
} while (type == SCC_TOK_SUBTYPE_EMPTYSPACE ||
type == SCC_TOK_SUBTYPE_COMMENT);
}