|
|
|
|
@@ -26,6 +26,7 @@ the distribution and installation instructions.
|
|
|
|
|
Chris Fraser / cwf@aya.yale.edu
|
|
|
|
|
David Hanson / drh@drhanson.net
|
|
|
|
|
*/
|
|
|
|
|
#include <lex_parser.h>
|
|
|
|
|
#include <lexer.h>
|
|
|
|
|
#include <lexer_log.h>
|
|
|
|
|
|
|
|
|
|
@@ -76,303 +77,21 @@ static inline int keyword_cmp(const char *name, int len) {
|
|
|
|
|
|
|
|
|
|
void lexer_init(smcc_lexer_t *lexer, core_stream_t *stream) {
|
|
|
|
|
lexer->stream = stream;
|
|
|
|
|
lexer->pos = (lexer_loc_t){
|
|
|
|
|
.name = cstring_as_cstr(&stream->name),
|
|
|
|
|
.name_len = cstring_len(&stream->name),
|
|
|
|
|
.line = 1,
|
|
|
|
|
.column = 1,
|
|
|
|
|
.offset = 0,
|
|
|
|
|
};
|
|
|
|
|
lexer->pos = core_pos_init();
|
|
|
|
|
// FIXME
|
|
|
|
|
lexer->pos.name = cstring_from_cstr(cstring_as_cstr(&stream->name));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#define stream_reset_char(stream) ((stream)->reset_char(stream))
|
|
|
|
|
#define stream_next_char(stream) ((stream)->next_char(stream))
|
|
|
|
|
#define stream_peek_char(stream) ((stream)->peek_char(stream))
|
|
|
|
|
#define lexer_next_pos(lexer) ((lexer)->pos.column++, (lexer)->pos.offset++)
|
|
|
|
|
#define lexer_next_line(lexer) ((lexer)->pos.line++, (lexer)->pos.column = 1)
|
|
|
|
|
#define set_err_token(token) ((token)->type = TOKEN_UNKNOWN)
|
|
|
|
|
|
|
|
|
|
static void skip_newline(smcc_lexer_t *lexer, lexer_tok_t *token) {
|
|
|
|
|
core_stream_t *stream = lexer->stream;
|
|
|
|
|
token->type = TOKEN_LINE_COMMENT;
|
|
|
|
|
|
|
|
|
|
// 循环直到遇到换行符或文件结束
|
|
|
|
|
while (1) {
|
|
|
|
|
int ch = stream_next_char(stream);
|
|
|
|
|
|
|
|
|
|
if (ch == core_stream_eof) {
|
|
|
|
|
// 到达文件末尾,直接返回
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// 更新位置信息
|
|
|
|
|
lexer_next_pos(lexer);
|
|
|
|
|
if (ch == '\n') {
|
|
|
|
|
// 遇到换行符,增加行号并重置列号
|
|
|
|
|
lexer_next_line(lexer);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void skip_block_comment(smcc_lexer_t *lexer, lexer_tok_t *token) {
|
|
|
|
|
core_stream_t *stream = lexer->stream;
|
|
|
|
|
token->type = TOKEN_BLOCK_COMMENT;
|
|
|
|
|
int ch;
|
|
|
|
|
|
|
|
|
|
stream_reset_char(stream);
|
|
|
|
|
ch = stream_next_char(stream);
|
|
|
|
|
lexer_next_pos(lexer);
|
|
|
|
|
// FIXME Assertion
|
|
|
|
|
Assert(ch == '/');
|
|
|
|
|
ch = stream_next_char(stream);
|
|
|
|
|
lexer_next_pos(lexer);
|
|
|
|
|
Assert(ch == '*');
|
|
|
|
|
// 我们已经识别了 "/*",现在需要找到 "*/"
|
|
|
|
|
while (1) {
|
|
|
|
|
ch = stream_next_char(stream);
|
|
|
|
|
lexer_next_pos(lexer);
|
|
|
|
|
|
|
|
|
|
if (ch == core_stream_eof) {
|
|
|
|
|
// 未闭合的块注释
|
|
|
|
|
LEX_WARN("Unterminated block comment");
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// LEX_ERROR("%c", ch);
|
|
|
|
|
|
|
|
|
|
// 更新位置信息
|
|
|
|
|
if (ch == '\n') {
|
|
|
|
|
lexer_next_line(lexer);
|
|
|
|
|
} else if (ch == '*') {
|
|
|
|
|
// 查看下一个字符是否是 '/'
|
|
|
|
|
int next_ch = stream_peek_char(stream);
|
|
|
|
|
|
|
|
|
|
if (next_ch == '/') {
|
|
|
|
|
// 消费 '/' 字符
|
|
|
|
|
stream_next_char(stream);
|
|
|
|
|
|
|
|
|
|
// 更新位置信息
|
|
|
|
|
lexer_next_pos(lexer);
|
|
|
|
|
|
|
|
|
|
// 成功找到注释结束标记
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// TODO escape character not enough
|
|
|
|
|
static inline int got_slash(int peek) {
|
|
|
|
|
switch (peek) {
|
|
|
|
|
case '\\':
|
|
|
|
|
return '\\';
|
|
|
|
|
case '\'':
|
|
|
|
|
return '\'';
|
|
|
|
|
case '\"':
|
|
|
|
|
return '\"';
|
|
|
|
|
case '\?':
|
|
|
|
|
return '\?';
|
|
|
|
|
case '0':
|
|
|
|
|
return '\0';
|
|
|
|
|
|
|
|
|
|
case 'b':
|
|
|
|
|
return '\b';
|
|
|
|
|
case 'f':
|
|
|
|
|
return '\f';
|
|
|
|
|
case 'n':
|
|
|
|
|
return '\n';
|
|
|
|
|
case 'r':
|
|
|
|
|
return '\r';
|
|
|
|
|
case 't':
|
|
|
|
|
return '\t';
|
|
|
|
|
case 'v':
|
|
|
|
|
return '\v';
|
|
|
|
|
default:
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
return -1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void parse_char(smcc_lexer_t *lexer, lexer_tok_t *token) {
|
|
|
|
|
token->loc = lexer->pos;
|
|
|
|
|
token->type = TOKEN_CHAR_LITERAL;
|
|
|
|
|
core_stream_t *stream = lexer->stream;
|
|
|
|
|
stream_reset_char(stream);
|
|
|
|
|
int ch = stream_peek_char(stream);
|
|
|
|
|
|
|
|
|
|
if (ch == core_stream_eof) {
|
|
|
|
|
LEX_WARN("Unexpected EOF at begin");
|
|
|
|
|
goto ERR;
|
|
|
|
|
} else if (ch != '\'') {
|
|
|
|
|
LEX_WARN("Unexpected character '%c' at begin", ch);
|
|
|
|
|
goto ERR;
|
|
|
|
|
}
|
|
|
|
|
stream_next_char(stream);
|
|
|
|
|
lexer_next_pos(lexer);
|
|
|
|
|
|
|
|
|
|
ch = stream_next_char(stream);
|
|
|
|
|
lexer_next_pos(lexer);
|
|
|
|
|
|
|
|
|
|
if (ch == core_stream_eof) {
|
|
|
|
|
LEX_WARN("Unexpected EOF at middle");
|
|
|
|
|
goto ERR;
|
|
|
|
|
} else if (ch == '\\') {
|
|
|
|
|
ch = stream_next_char(stream);
|
|
|
|
|
lexer_next_pos(lexer);
|
|
|
|
|
if ((ch = got_slash(ch)) == -1) {
|
|
|
|
|
LEX_ERROR("Invalid escape character");
|
|
|
|
|
// TODO 特殊情况处理
|
|
|
|
|
goto ERR;
|
|
|
|
|
}
|
|
|
|
|
token->value.ch = ch;
|
|
|
|
|
} else {
|
|
|
|
|
token->value.ch = ch;
|
|
|
|
|
}
|
|
|
|
|
if ((ch = stream_next_char(stream)) != '\'') {
|
|
|
|
|
LEX_ERROR("Unclosed character literal '%c' at end, expect `'`", ch);
|
|
|
|
|
lexer_next_pos(lexer);
|
|
|
|
|
goto ERR;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return;
|
|
|
|
|
ERR:
|
|
|
|
|
set_err_token(token);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void parse_string(smcc_lexer_t *lexer, lexer_tok_t *token) {
|
|
|
|
|
token->loc = lexer->pos;
|
|
|
|
|
token->type = TOKEN_STRING_LITERAL;
|
|
|
|
|
core_stream_t *stream = lexer->stream;
|
|
|
|
|
stream_reset_char(stream);
|
|
|
|
|
int ch = stream_peek_char(stream);
|
|
|
|
|
|
|
|
|
|
if (ch == core_stream_eof) {
|
|
|
|
|
LEX_WARN("Unexpected EOF at begin");
|
|
|
|
|
goto ERR;
|
|
|
|
|
} else if (ch != '"') {
|
|
|
|
|
LEX_WARN("Unexpected character '%c' at begin", ch);
|
|
|
|
|
goto ERR;
|
|
|
|
|
}
|
|
|
|
|
stream_next_char(stream);
|
|
|
|
|
lexer_next_pos(lexer);
|
|
|
|
|
|
|
|
|
|
int base = 0;
|
|
|
|
|
cstring_t str = cstring_new();
|
|
|
|
|
while (1) {
|
|
|
|
|
ch = stream_peek_char(stream);
|
|
|
|
|
|
|
|
|
|
if (ch == core_stream_eof) {
|
|
|
|
|
LEX_ERROR("Unexpected EOF at string literal");
|
|
|
|
|
break;
|
|
|
|
|
} else if (ch == '\n') {
|
|
|
|
|
LEX_ERROR("Unexpected newline at string literal");
|
|
|
|
|
break;
|
|
|
|
|
} else if (ch == '\\') {
|
|
|
|
|
// TODO bad practice and maybe bugs here
|
|
|
|
|
stream_next_char(stream);
|
|
|
|
|
ch = stream_next_char(stream);
|
|
|
|
|
int val = got_slash(ch);
|
|
|
|
|
if (val == -1) {
|
|
|
|
|
LEX_ERROR("Invalid escape character it is \\%c [%d]", ch, ch);
|
|
|
|
|
} else {
|
|
|
|
|
cstring_push(&str, val);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
} else if (ch == '"') {
|
|
|
|
|
stream_next_char(stream);
|
|
|
|
|
lexer_next_pos(lexer);
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
stream_next_char(stream);
|
|
|
|
|
lexer_next_pos(lexer);
|
|
|
|
|
cstring_push(&str, ch);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
token->value.cstr.data = (char *)cstring_as_cstr(&str);
|
|
|
|
|
token->value.cstr.len = cstring_len(&str);
|
|
|
|
|
return;
|
|
|
|
|
ERR:
|
|
|
|
|
set_err_token(token);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void parse_number(smcc_lexer_t *lexer, lexer_tok_t *token) {
|
|
|
|
|
token->loc = lexer->pos;
|
|
|
|
|
core_stream_t *stream = lexer->stream;
|
|
|
|
|
stream_reset_char(stream);
|
|
|
|
|
int ch = stream_peek_char(stream);
|
|
|
|
|
int base = 0;
|
|
|
|
|
if (ch == core_stream_eof) {
|
|
|
|
|
LEX_WARN("Unexpected EOF at begin");
|
|
|
|
|
goto ERR;
|
|
|
|
|
} else if (ch == '0') {
|
|
|
|
|
ch = stream_peek_char(stream);
|
|
|
|
|
if (ch == 'x' || ch == 'X') {
|
|
|
|
|
base = 16;
|
|
|
|
|
stream_next_char(stream);
|
|
|
|
|
lexer_next_pos(lexer);
|
|
|
|
|
stream_next_char(stream);
|
|
|
|
|
lexer_next_pos(lexer);
|
|
|
|
|
} else if (ch == 'b' || ch == 'B') {
|
|
|
|
|
// FIXME C23 external integer base
|
|
|
|
|
base = 2;
|
|
|
|
|
stream_next_char(stream);
|
|
|
|
|
lexer_next_pos(lexer);
|
|
|
|
|
stream_next_char(stream);
|
|
|
|
|
lexer_next_pos(lexer);
|
|
|
|
|
} else if (ch >= '0' && ch <= '7') {
|
|
|
|
|
base = 8;
|
|
|
|
|
stream_next_char(stream);
|
|
|
|
|
lexer_next_pos(lexer);
|
|
|
|
|
} else {
|
|
|
|
|
base = 10;
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
base = 10;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// 解析整数部分
|
|
|
|
|
stream_reset_char(stream);
|
|
|
|
|
int tmp = 0;
|
|
|
|
|
token->value.n = 0;
|
|
|
|
|
while (1) {
|
|
|
|
|
ch = stream_peek_char(stream);
|
|
|
|
|
|
|
|
|
|
if (ch == core_stream_eof) {
|
|
|
|
|
break;
|
|
|
|
|
} else if (ch >= 'a' && ch <= 'z') {
|
|
|
|
|
tmp = ch - 'a' + 10;
|
|
|
|
|
} else if (ch >= 'A' && ch <= 'Z') {
|
|
|
|
|
tmp = ch - 'A' + 10;
|
|
|
|
|
} else if (ch >= '0' && ch <= '9') {
|
|
|
|
|
tmp = ch - '0';
|
|
|
|
|
} else {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (tmp >= base) {
|
|
|
|
|
LOG_ERROR("Invalid digit");
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
stream_next_char(stream);
|
|
|
|
|
lexer_next_pos(lexer);
|
|
|
|
|
token->value.n = token->value.n * base + tmp;
|
|
|
|
|
// TODO number overflow
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
token->type = TOKEN_INT_LITERAL;
|
|
|
|
|
return;
|
|
|
|
|
ERR:
|
|
|
|
|
set_err_token(token);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void parse_line(smcc_lexer_t *lexer, lexer_tok_t *token) {
|
|
|
|
|
token->loc = lexer->pos;
|
|
|
|
|
core_stream_t *stream = lexer->stream;
|
|
|
|
|
stream_reset_char(stream);
|
|
|
|
|
int ch = stream_peek_char(stream);
|
|
|
|
|
core_stream_reset_char(stream);
|
|
|
|
|
int ch = core_stream_peek_char(stream);
|
|
|
|
|
|
|
|
|
|
usize n;
|
|
|
|
|
cstring_t str = cstring_new();
|
|
|
|
|
|
|
|
|
|
if (ch == core_stream_eof) {
|
|
|
|
|
LEX_WARN("Unexpected EOF at begin");
|
|
|
|
|
@@ -384,9 +103,9 @@ static void parse_line(smcc_lexer_t *lexer, lexer_tok_t *token) {
|
|
|
|
|
|
|
|
|
|
const char line[] = "line";
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < sizeof(line); i++) {
|
|
|
|
|
ch = stream_next_char(stream);
|
|
|
|
|
lexer_next_pos(lexer);
|
|
|
|
|
for (int i = 0; i < (int)sizeof(line); i++) {
|
|
|
|
|
ch = core_stream_next_char(stream);
|
|
|
|
|
core_pos_next(&lexer->pos);
|
|
|
|
|
if (ch != line[i]) {
|
|
|
|
|
LEX_WARN("Maroc does not support in lexer rather in preprocessor, "
|
|
|
|
|
"it will be ignored");
|
|
|
|
|
@@ -394,38 +113,36 @@ static void parse_line(smcc_lexer_t *lexer, lexer_tok_t *token) {
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
parse_number(lexer, token);
|
|
|
|
|
if (token->type != TOKEN_INT_LITERAL) {
|
|
|
|
|
if (lex_parse_number(lexer->stream, &lexer->pos, &n) == false) {
|
|
|
|
|
LEX_ERROR("Invalid line number");
|
|
|
|
|
goto SKIP_LINE;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (stream_next_char(stream) != ' ') {
|
|
|
|
|
skip_newline(lexer, token);
|
|
|
|
|
if (core_stream_next_char(stream) != ' ') {
|
|
|
|
|
lex_parse_skip_line(lexer->stream, &lexer->pos);
|
|
|
|
|
token->loc.line = token->value.n;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (stream_peek_char(stream) != '"') {
|
|
|
|
|
if (core_stream_peek_char(stream) != '"') {
|
|
|
|
|
LEX_ERROR("Invalid `#` line");
|
|
|
|
|
goto SKIP_LINE;
|
|
|
|
|
}
|
|
|
|
|
parse_string(lexer, token);
|
|
|
|
|
if (token->type != TOKEN_STRING_LITERAL) {
|
|
|
|
|
if (lex_parse_string(lexer->stream, &lexer->pos, &str) == false) {
|
|
|
|
|
LEX_ERROR("Invalid filename");
|
|
|
|
|
goto SKIP_LINE;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
skip_newline(lexer, token);
|
|
|
|
|
token->loc.line = token->value.n;
|
|
|
|
|
lex_parse_skip_line(lexer->stream, &lexer->pos);
|
|
|
|
|
token->loc.line = n;
|
|
|
|
|
// FIXME memory leak
|
|
|
|
|
token->loc.name = cstring_as_cstr((const cstring_t *)&token->value.cstr);
|
|
|
|
|
token->loc.name_len = cstring_len((const cstring_t *)&token->value.cstr);
|
|
|
|
|
|
|
|
|
|
token->loc.name = cstring_from_cstr(cstring_as_cstr(&str));
|
|
|
|
|
cstring_free(&str);
|
|
|
|
|
return;
|
|
|
|
|
SKIP_LINE:
|
|
|
|
|
skip_newline(lexer, token);
|
|
|
|
|
lex_parse_skip_line(lexer->stream, &lexer->pos);
|
|
|
|
|
ERR:
|
|
|
|
|
set_err_token(token);
|
|
|
|
|
cstring_free(&str);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// /zh/c/language/operator_arithmetic.html
|
|
|
|
|
@@ -434,24 +151,24 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
|
|
|
|
|
token->type = TOKEN_UNKNOWN;
|
|
|
|
|
core_stream_t *stream = lexer->stream;
|
|
|
|
|
|
|
|
|
|
stream_reset_char(stream);
|
|
|
|
|
core_stream_reset_char(stream);
|
|
|
|
|
token_type_t type = TOKEN_UNKNOWN;
|
|
|
|
|
int ch = stream_peek_char(stream);
|
|
|
|
|
int ch = core_stream_peek_char(stream);
|
|
|
|
|
|
|
|
|
|
// once step
|
|
|
|
|
switch (ch) {
|
|
|
|
|
case '=':
|
|
|
|
|
switch (stream_peek_char(stream)) {
|
|
|
|
|
switch (core_stream_peek_char(stream)) {
|
|
|
|
|
case '=':
|
|
|
|
|
type = TOKEN_EQ;
|
|
|
|
|
goto double_char;
|
|
|
|
|
default:
|
|
|
|
|
stream_reset_char(stream), type = TOKEN_ASSIGN;
|
|
|
|
|
core_stream_reset_char(stream), type = TOKEN_ASSIGN;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
case '+':
|
|
|
|
|
switch (stream_peek_char(stream)) {
|
|
|
|
|
switch (core_stream_peek_char(stream)) {
|
|
|
|
|
case '+':
|
|
|
|
|
type = TOKEN_ADD_ADD;
|
|
|
|
|
goto double_char;
|
|
|
|
|
@@ -459,12 +176,12 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
|
|
|
|
|
type = TOKEN_ASSIGN_ADD;
|
|
|
|
|
goto double_char;
|
|
|
|
|
default:
|
|
|
|
|
stream_reset_char(stream), type = TOKEN_ADD;
|
|
|
|
|
core_stream_reset_char(stream), type = TOKEN_ADD;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
case '-':
|
|
|
|
|
switch (stream_peek_char(stream)) {
|
|
|
|
|
switch (core_stream_peek_char(stream)) {
|
|
|
|
|
case '-':
|
|
|
|
|
type = TOKEN_SUB_SUB;
|
|
|
|
|
goto double_char;
|
|
|
|
|
@@ -475,48 +192,50 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
|
|
|
|
|
type = TOKEN_DEREF;
|
|
|
|
|
goto double_char;
|
|
|
|
|
default:
|
|
|
|
|
stream_reset_char(stream), type = TOKEN_SUB;
|
|
|
|
|
core_stream_reset_char(stream), type = TOKEN_SUB;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
case '*':
|
|
|
|
|
switch (stream_peek_char(stream)) {
|
|
|
|
|
switch (core_stream_peek_char(stream)) {
|
|
|
|
|
case '=':
|
|
|
|
|
type = TOKEN_ASSIGN_MUL;
|
|
|
|
|
goto double_char;
|
|
|
|
|
default:
|
|
|
|
|
stream_reset_char(stream), type = TOKEN_MUL;
|
|
|
|
|
core_stream_reset_char(stream), type = TOKEN_MUL;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
case '/':
|
|
|
|
|
switch (stream_peek_char(stream)) {
|
|
|
|
|
switch (core_stream_peek_char(stream)) {
|
|
|
|
|
case '=':
|
|
|
|
|
type = TOKEN_ASSIGN_DIV;
|
|
|
|
|
goto double_char;
|
|
|
|
|
case '/':
|
|
|
|
|
skip_newline(lexer, token);
|
|
|
|
|
lex_parse_skip_line(lexer->stream, &lexer->pos);
|
|
|
|
|
token->type = TOKEN_LINE_COMMENT;
|
|
|
|
|
goto END;
|
|
|
|
|
case '*':
|
|
|
|
|
skip_block_comment(lexer, token);
|
|
|
|
|
lex_parse_skip_block_comment(lexer->stream, &lexer->pos);
|
|
|
|
|
token->type = TOKEN_BLOCK_COMMENT;
|
|
|
|
|
goto END;
|
|
|
|
|
default:
|
|
|
|
|
stream_reset_char(stream), type = TOKEN_DIV;
|
|
|
|
|
core_stream_reset_char(stream), type = TOKEN_DIV;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
case '%':
|
|
|
|
|
switch (stream_peek_char(stream)) {
|
|
|
|
|
switch (core_stream_peek_char(stream)) {
|
|
|
|
|
case '=':
|
|
|
|
|
type = TOKEN_ASSIGN_MOD;
|
|
|
|
|
goto double_char;
|
|
|
|
|
default:
|
|
|
|
|
stream_reset_char(stream), type = TOKEN_MOD;
|
|
|
|
|
core_stream_reset_char(stream), type = TOKEN_MOD;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
case '&':
|
|
|
|
|
switch (stream_peek_char(stream)) {
|
|
|
|
|
switch (core_stream_peek_char(stream)) {
|
|
|
|
|
case '&':
|
|
|
|
|
type = TOKEN_AND_AND;
|
|
|
|
|
goto double_char;
|
|
|
|
|
@@ -524,12 +243,12 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
|
|
|
|
|
type = TOKEN_ASSIGN_AND;
|
|
|
|
|
goto double_char;
|
|
|
|
|
default:
|
|
|
|
|
stream_reset_char(stream), type = TOKEN_AND;
|
|
|
|
|
core_stream_reset_char(stream), type = TOKEN_AND;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
case '|':
|
|
|
|
|
switch (stream_peek_char(stream)) {
|
|
|
|
|
switch (core_stream_peek_char(stream)) {
|
|
|
|
|
case '|':
|
|
|
|
|
type = TOKEN_OR_OR;
|
|
|
|
|
goto double_char;
|
|
|
|
|
@@ -537,27 +256,27 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
|
|
|
|
|
type = TOKEN_ASSIGN_OR;
|
|
|
|
|
goto double_char;
|
|
|
|
|
default:
|
|
|
|
|
stream_reset_char(stream), type = TOKEN_OR;
|
|
|
|
|
core_stream_reset_char(stream), type = TOKEN_OR;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
case '^':
|
|
|
|
|
switch (stream_peek_char(stream)) {
|
|
|
|
|
switch (core_stream_peek_char(stream)) {
|
|
|
|
|
case '=':
|
|
|
|
|
type = TOKEN_ASSIGN_XOR;
|
|
|
|
|
goto double_char;
|
|
|
|
|
default:
|
|
|
|
|
stream_reset_char(stream), type = TOKEN_XOR;
|
|
|
|
|
core_stream_reset_char(stream), type = TOKEN_XOR;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
case '<':
|
|
|
|
|
switch (stream_peek_char(stream)) {
|
|
|
|
|
switch (core_stream_peek_char(stream)) {
|
|
|
|
|
case '=':
|
|
|
|
|
type = TOKEN_LE;
|
|
|
|
|
goto double_char;
|
|
|
|
|
case '<': {
|
|
|
|
|
if (stream_peek_char(stream) == '=') {
|
|
|
|
|
if (core_stream_peek_char(stream) == '=') {
|
|
|
|
|
type = TOKEN_ASSIGN_L_SH;
|
|
|
|
|
goto triple_char;
|
|
|
|
|
} else {
|
|
|
|
|
@@ -567,17 +286,17 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
default:
|
|
|
|
|
stream_reset_char(stream), type = TOKEN_LT;
|
|
|
|
|
core_stream_reset_char(stream), type = TOKEN_LT;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
case '>':
|
|
|
|
|
switch (stream_peek_char(stream)) {
|
|
|
|
|
switch (core_stream_peek_char(stream)) {
|
|
|
|
|
case '=':
|
|
|
|
|
type = TOKEN_GE;
|
|
|
|
|
goto double_char;
|
|
|
|
|
case '>': {
|
|
|
|
|
if (stream_peek_char(stream) == '=') {
|
|
|
|
|
if (core_stream_peek_char(stream) == '=') {
|
|
|
|
|
type = TOKEN_ASSIGN_R_SH;
|
|
|
|
|
goto triple_char;
|
|
|
|
|
} else {
|
|
|
|
|
@@ -587,7 +306,7 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
default:
|
|
|
|
|
stream_reset_char(stream), type = TOKEN_GT;
|
|
|
|
|
core_stream_reset_char(stream), type = TOKEN_GT;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
@@ -595,12 +314,12 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
|
|
|
|
|
type = TOKEN_BIT_NOT;
|
|
|
|
|
break;
|
|
|
|
|
case '!':
|
|
|
|
|
switch (stream_peek_char(stream)) {
|
|
|
|
|
switch (core_stream_peek_char(stream)) {
|
|
|
|
|
case '=':
|
|
|
|
|
type = TOKEN_NEQ;
|
|
|
|
|
goto double_char;
|
|
|
|
|
default:
|
|
|
|
|
stream_reset_char(stream), type = TOKEN_NOT;
|
|
|
|
|
core_stream_reset_char(stream), type = TOKEN_NOT;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
@@ -632,8 +351,8 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
|
|
|
|
|
type = TOKEN_COLON;
|
|
|
|
|
break;
|
|
|
|
|
case '.':
|
|
|
|
|
if (stream_peek_char(stream) == '.' &&
|
|
|
|
|
stream_peek_char(stream) == '.') {
|
|
|
|
|
if (core_stream_peek_char(stream) == '.' &&
|
|
|
|
|
core_stream_peek_char(stream) == '.') {
|
|
|
|
|
type = TOKEN_ELLIPSIS;
|
|
|
|
|
goto triple_char;
|
|
|
|
|
}
|
|
|
|
|
@@ -643,17 +362,14 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
|
|
|
|
|
type = TOKEN_COND;
|
|
|
|
|
break;
|
|
|
|
|
case '\v':
|
|
|
|
|
case '\r':
|
|
|
|
|
case '\f':
|
|
|
|
|
case ' ':
|
|
|
|
|
case '\t':
|
|
|
|
|
type = TOKEN_BLANK;
|
|
|
|
|
break;
|
|
|
|
|
case '\r':
|
|
|
|
|
case '\n':
|
|
|
|
|
// you need to flush a newline or blank
|
|
|
|
|
stream_next_char(stream);
|
|
|
|
|
lexer_next_line(lexer);
|
|
|
|
|
// FIXME some error
|
|
|
|
|
lex_parse_skip_endline(lexer->stream, &lexer->pos);
|
|
|
|
|
token->type = TOKEN_BLANK;
|
|
|
|
|
goto END;
|
|
|
|
|
case '#':
|
|
|
|
|
@@ -665,17 +381,45 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
|
|
|
|
|
// EOF
|
|
|
|
|
type = TOKEN_EOF;
|
|
|
|
|
break;
|
|
|
|
|
case '\'':
|
|
|
|
|
parse_char(lexer, token);
|
|
|
|
|
case '\'': {
|
|
|
|
|
token->loc = lexer->pos;
|
|
|
|
|
token->type = TOKEN_CHAR_LITERAL;
|
|
|
|
|
int ch = lex_parse_char(lexer->stream, &lexer->pos);
|
|
|
|
|
if (ch == core_stream_eof) {
|
|
|
|
|
LEX_ERROR("Unexpected character literal");
|
|
|
|
|
token->type = TOKEN_UNKNOWN;
|
|
|
|
|
} else {
|
|
|
|
|
token->value.ch = ch;
|
|
|
|
|
}
|
|
|
|
|
goto END;
|
|
|
|
|
case '"':
|
|
|
|
|
parse_string(lexer, token);
|
|
|
|
|
}
|
|
|
|
|
case '"': {
|
|
|
|
|
token->loc = lexer->pos;
|
|
|
|
|
token->type = TOKEN_STRING_LITERAL;
|
|
|
|
|
cstring_t output = cstring_new();
|
|
|
|
|
if (lex_parse_string(lexer->stream, &lexer->pos, &output) == true) {
|
|
|
|
|
token->value.cstr.data = cstring_as_cstr(&output);
|
|
|
|
|
token->value.cstr.len = cstring_len(&output);
|
|
|
|
|
} else {
|
|
|
|
|
LEX_ERROR("Unexpected string literal");
|
|
|
|
|
token->type = TOKEN_UNKNOWN;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
goto END;
|
|
|
|
|
}
|
|
|
|
|
/* clang-format off */
|
|
|
|
|
case '0': case '1': case '2': case '3': case '4':
|
|
|
|
|
case '5': case '6': case '7': case '8': case '9':
|
|
|
|
|
/* clang-format on */
|
|
|
|
|
parse_number(lexer, token);
|
|
|
|
|
token->loc = lexer->pos;
|
|
|
|
|
token->type = TOKEN_INT_LITERAL;
|
|
|
|
|
usize output;
|
|
|
|
|
if (lex_parse_number(lexer->stream, &lexer->pos, &output) == true) {
|
|
|
|
|
token->value.n = output;
|
|
|
|
|
} else {
|
|
|
|
|
LEX_ERROR("Unexpected number literal");
|
|
|
|
|
token->type = TOKEN_UNKNOWN;
|
|
|
|
|
}
|
|
|
|
|
goto END;
|
|
|
|
|
/* clang-format off */
|
|
|
|
|
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
|
|
|
|
|
@@ -687,25 +431,9 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
|
|
|
|
|
case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
|
|
|
|
|
case 'V': case 'W': case 'X': case 'Y': case 'Z': case '_':
|
|
|
|
|
/* clang-format on */
|
|
|
|
|
// TOKEN_IDENT
|
|
|
|
|
// TODO
|
|
|
|
|
// if ((ch == 'L' && ch == '\'') || (ch == 'L' && ch == '"')) {
|
|
|
|
|
// LEX_ERROR("unsupport wide-character char literal by `L` format");
|
|
|
|
|
// }
|
|
|
|
|
cstring_t str = cstring_new();
|
|
|
|
|
cstring_push(&str, stream_next_char(stream));
|
|
|
|
|
lexer_next_pos(lexer);
|
|
|
|
|
while (1) {
|
|
|
|
|
ch = stream_peek_char(stream);
|
|
|
|
|
if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') ||
|
|
|
|
|
(ch == '_') || (ch >= '0' && ch <= '9')) {
|
|
|
|
|
stream_next_char(stream);
|
|
|
|
|
lexer_next_pos(lexer);
|
|
|
|
|
cstring_push(&str, ch);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
cbool ret = lex_parse_identifier(lexer->stream, &lexer->pos, &str);
|
|
|
|
|
Assert(ret == true);
|
|
|
|
|
|
|
|
|
|
int res = keyword_cmp(cstring_as_cstr(&str), cstring_len(&str));
|
|
|
|
|
if (res == -1) {
|
|
|
|
|
@@ -724,14 +452,14 @@ void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
|
|
|
|
|
}
|
|
|
|
|
goto once_char;
|
|
|
|
|
triple_char:
|
|
|
|
|
stream_next_char(stream);
|
|
|
|
|
lexer_next_pos(lexer);
|
|
|
|
|
core_stream_next_char(stream);
|
|
|
|
|
core_pos_next(&lexer->pos);
|
|
|
|
|
double_char:
|
|
|
|
|
stream_next_char(stream);
|
|
|
|
|
lexer_next_pos(lexer);
|
|
|
|
|
core_stream_next_char(stream);
|
|
|
|
|
core_pos_next(&lexer->pos);
|
|
|
|
|
once_char:
|
|
|
|
|
stream_next_char(stream);
|
|
|
|
|
lexer_next_pos(lexer);
|
|
|
|
|
core_stream_next_char(stream);
|
|
|
|
|
core_pos_next(&lexer->pos);
|
|
|
|
|
token->type = type;
|
|
|
|
|
END:
|
|
|
|
|
LEX_DEBUG("get token `%s` in %s:%d:%d", get_tok_name(token->type),
|
|
|
|
|
@@ -746,6 +474,7 @@ void lexer_get_valid_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
|
|
|
|
|
type = get_tok_subtype(token->type);
|
|
|
|
|
AssertFmt(type != TK_BASIC_INVALID, "Invalid token: `%s` at %s:%d:%d",
|
|
|
|
|
get_tok_name(token->type), token->loc.name, token->loc.line,
|
|
|
|
|
token->loc.column);
|
|
|
|
|
token->loc.col);
|
|
|
|
|
Assert(type != TK_BASIC_INVALID);
|
|
|
|
|
} while (type == TK_BASIC_EMPTYSPACE || type == TK_BASIC_COMMENT);
|
|
|
|
|
}
|
|
|
|
|
|