format using clang-format to formate code

This commit is contained in:
zzy
2025-11-20 17:55:08 +08:00
parent 9762cf8a2b
commit d1fafa830d
27 changed files with 1047 additions and 766 deletions

View File

@@ -6,8 +6,8 @@
#ifndef __SMCC_CC_LEXER_H__
#define __SMCC_CC_LEXER_H__
#include <libcore.h>
#include "lexer_token.h"
#include <libcore.h>
typedef struct lexer_loc {
const char *name;
@@ -25,11 +25,11 @@ typedef struct lexer_token {
/**
* @brief 词法分析器核心结构体
*
*
* 封装词法分析所需的状态信息和缓冲区管理
*/
typedef struct cc_lexer {
core_stream_t* stream;
core_stream_t *stream;
lexer_loc_t pos;
} smcc_lexer_t;
@@ -38,24 +38,24 @@ typedef struct cc_lexer {
* @param[out] lexer 要初始化的词法分析器实例
* @param[in] stream 输入流对象指针
*/
void lexer_init(smcc_lexer_t* lexer, core_stream_t* stream);
void lexer_init(smcc_lexer_t *lexer, core_stream_t *stream);
/**
* @brief 获取原始token
* @param[in] lexer 词法分析器实例
* @param[out] token 输出token存储位置
*
*
* 此函数会返回所有类型的token包括空白符等无效token
*/
void lexer_get_token(smcc_lexer_t* lexer, lexer_tok_t* token);
void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token);
/**
* @brief 获取有效token
* @param[in] lexer 词法分析器实例
* @param[out] token 输出token存储位置
*
*
* 此函数会自动跳过空白符等无效token返回对语法分析有意义的token
*/
void lexer_get_valid_token(smcc_lexer_t* lexer, lexer_tok_t* token);
void lexer_get_valid_token(smcc_lexer_t *lexer, lexer_tok_t *token);
#endif

View File

@@ -8,39 +8,39 @@
#endif
#if LEX_LOG_LEVEL <= 1
#define LEX_NOTSET( fmt, ...) MLOG_NOTSET(&__smcc_lexer_log, fmt, ##__VA_ARGS__)
#define LEX_NOTSET(fmt, ...) MLOG_NOTSET(&__smcc_lexer_log, fmt, ##__VA_ARGS__)
#else
#define LEX_NOTSET( fmt, ...)
#define LEX_NOTSET(fmt, ...)
#endif
#if LEX_LOG_LEVEL <= 2
#define LEX_DEBUG( fmt, ...) MLOG_DEBUG(&__smcc_lexer_log, fmt, ##__VA_ARGS__)
#define LEX_DEBUG(fmt, ...) MLOG_DEBUG(&__smcc_lexer_log, fmt, ##__VA_ARGS__)
#else
#define LEX_DEBUG( fmt, ...)
#define LEX_DEBUG(fmt, ...)
#endif
#if LEX_LOG_LEVEL <= 3
#define LEX_INFO( fmt, ...) MLOG_INFO(&__smcc_lexer_log, fmt, ##__VA_ARGS__)
#define LEX_INFO(fmt, ...) MLOG_INFO(&__smcc_lexer_log, fmt, ##__VA_ARGS__)
#else
#define LEX_INFO( fmt, ...)
#define LEX_INFO(fmt, ...)
#endif
#if LEX_LOG_LEVEL <= 4
#define LEX_WARN( fmt, ...) MLOG_WARN(&__smcc_lexer_log, fmt, ##__VA_ARGS__)
#define LEX_WARN(fmt, ...) MLOG_WARN(&__smcc_lexer_log, fmt, ##__VA_ARGS__)
#else
#define LEX_WARN( fmt, ...)
#define LEX_WARN(fmt, ...)
#endif
#if LEX_LOG_LEVEL <= 5
#define LEX_ERROR( fmt, ...) MLOG_ERROR(&__smcc_lexer_log, fmt, ##__VA_ARGS__)
#define LEX_ERROR(fmt, ...) MLOG_ERROR(&__smcc_lexer_log, fmt, ##__VA_ARGS__)
#else
#define LEX_ERROR( fmt, ...)
#define LEX_ERROR(fmt, ...)
#endif
#if LEX_LOG_LEVEL <= 6
#define LEX_FATAL( fmt, ...) MLOG_FATAL(&__smcc_lexer_log, fmt, ##__VA_ARGS__)
#define LEX_FATAL(fmt, ...) MLOG_FATAL(&__smcc_lexer_log, fmt, ##__VA_ARGS__)
#else
#define LEX_FATAL( fmt, ...)
#define LEX_FATAL(fmt, ...)
#endif
extern logger_t __smcc_lexer_log;

View File

@@ -10,6 +10,7 @@ typedef enum ckeyword {
} ckeyword_t;
// Using Binary Search To Fast Find Keyword
/* clang-format off */
#define KEYWORD_TABLE \
X(asm , TK_BASIC_KEYWORD , TOKEN_ASM , CEXT_ASM) \
X(break , TK_BASIC_KEYWORD , TOKEN_BREAK , CSTD_C89) \
@@ -105,33 +106,34 @@ typedef enum ckeyword {
X(char_literal , TK_BASIC_LITERAL, TOKEN_CHAR_LITERAL ) \
X(string_literal , TK_BASIC_LITERAL, TOKEN_STRING_LITERAL ) \
// END
/* clang-format on */
// 定义TokenType枚举
typedef enum cc_tktype {
// 处理普通token
#define X(str, subtype, tok) tok,
// 处理普通token
#define X(str, subtype, tok) tok,
TOKEN_TABLE
#undef X
// 处理关键字(保持原有格式)
#define X(name, subtype, tok, std) tok,
KEYWORD_TABLE
#undef X
#undef X
// 处理关键字(保持原有格式)
#define X(name, subtype, tok, std) tok,
KEYWORD_TABLE
#undef X
} token_type_t;
typedef enum token_subtype {
TK_BASIC_INVALID, // 错误占位
TK_BASIC_KEYWORD, // 关键字
TK_BASIC_OPERATOR, // 操作符
TK_BASIC_IDENTIFIER, // 标识符
TK_BASIC_LITERAL, // 字面量
TK_BASIC_INVALID, // 错误占位
TK_BASIC_KEYWORD, // 关键字
TK_BASIC_OPERATOR, // 操作符
TK_BASIC_IDENTIFIER, // 标识符
TK_BASIC_LITERAL, // 字面量
TK_BASIC_EMPTYSPACE, // 空白
TK_BASIC_COMMENT, // 注释
TK_BASIC_EOF // 结束标记
TK_BASIC_EMPTYSPACE, // 空白
TK_BASIC_COMMENT, // 注释
TK_BASIC_EOF // 结束标记
} token_subtype_t;
token_subtype_t get_tok_subtype(token_type_t type);
const char* get_tok_name(token_type_t type);
const char *get_tok_name(token_type_t type);
#endif

View File

@@ -1,6 +1,6 @@
/**
* 仿照LCCompiler的词法分析部分
*
*
* 如下为LCC的README in 2025.2
This hierarchy is the distribution for lcc version 4.2.
@@ -26,43 +26,45 @@ the distribution and installation instructions.
Chris Fraser / cwf@aya.yale.edu
David Hanson / drh@drhanson.net
*/
#include <lexer_log.h>
#include <lexer.h>
#include <lexer_log.h>
static const struct {
const char* name;
const char *name;
ckeyword_t std_type;
token_type_t tok;
} keywords[] = {
#define X(name, subtype, tok, std_type,...) { #name, std_type, tok },
#define X(name, subtype, tok, std_type, ...) {#name, std_type, tok},
KEYWORD_TABLE
#undef X
#undef X
};
// by using binary search to find the keyword
static inline int keyword_cmp(const char* name, int len) {
static inline int keyword_cmp(const char *name, int len) {
int low = 0;
int high = sizeof(keywords) / sizeof(keywords[0]) - 1;
while (low <= high) {
int mid = (low + high) / 2;
const char *key = keywords[mid].name;
int cmp = 0;
// 自定义字符串比较逻辑
for (int i = 0; i < len; i++) {
if (name[i] != key[i]) {
cmp = (unsigned char)name[i] - (unsigned char)key[i];
break;
}
if (name[i] == '\0') break; // 遇到终止符提前结束
if (name[i] == '\0')
break; // 遇到终止符提前结束
}
if (cmp == 0) {
// 完全匹配检查(长度相同)
if (key[len] == '\0') return mid;
if (key[len] == '\0')
return mid;
cmp = -1; // 当前关键词比输入长
}
if (cmp < 0) {
high = mid - 1;
} else {
@@ -72,9 +74,9 @@ static inline int keyword_cmp(const char* name, int len) {
return -1; // Not a keyword.
}
void lexer_init(smcc_lexer_t* lexer, core_stream_t* stream) {
void lexer_init(smcc_lexer_t *lexer, core_stream_t *stream) {
lexer->stream = stream;
lexer->pos = (lexer_loc_t) {
lexer->pos = (lexer_loc_t){
.name = cstring_as_cstr(&stream->name),
.name_len = cstring_len(&stream->name),
.line = 1,
@@ -83,26 +85,26 @@ void lexer_init(smcc_lexer_t* lexer, core_stream_t* stream) {
};
}
#define stream_reset_char(stream) ((stream)->reset_char(stream))
#define stream_next_char(stream) ((stream)->next_char(stream))
#define stream_peek_char(stream) ((stream)->peek_char(stream))
#define lexer_next_pos(lexer) ((lexer)->pos.column ++, (lexer)->pos.offset ++)
#define lexer_next_line(lexer) ((lexer)->pos.line ++, (lexer)->pos.column = 1)
#define set_err_token(token) ((token)->type = TOKEN_UNKNOWN)
#define stream_reset_char(stream) ((stream)->reset_char(stream))
#define stream_next_char(stream) ((stream)->next_char(stream))
#define stream_peek_char(stream) ((stream)->peek_char(stream))
#define lexer_next_pos(lexer) ((lexer)->pos.column++, (lexer)->pos.offset++)
#define lexer_next_line(lexer) ((lexer)->pos.line++, (lexer)->pos.column = 1)
#define set_err_token(token) ((token)->type = TOKEN_UNKNOWN)
static void skip_newline(smcc_lexer_t* lexer, lexer_tok_t* token) {
core_stream_t* stream = lexer->stream;
static void skip_newline(smcc_lexer_t *lexer, lexer_tok_t *token) {
core_stream_t *stream = lexer->stream;
token->type = TOKEN_LINE_COMMENT;
// 循环直到遇到换行符或文件结束
while (1) {
int ch = stream_next_char(stream);
if (ch == core_stream_eof) {
// 到达文件末尾,直接返回
return;
}
// 更新位置信息
lexer_next_pos(lexer);
if (ch == '\n') {
@@ -113,19 +115,19 @@ static void skip_newline(smcc_lexer_t* lexer, lexer_tok_t* token) {
}
}
static void skip_block_comment(smcc_lexer_t* lexer, lexer_tok_t* token) {
core_stream_t* stream = lexer->stream;
static void skip_block_comment(smcc_lexer_t *lexer, lexer_tok_t *token) {
core_stream_t *stream = lexer->stream;
token->type = TOKEN_BLOCK_COMMENT;
int ch;
stream_reset_char(stream);
ch = stream_next_char(stream);
lexer_next_pos(lexer);
// FIXME Assertion
Assert (ch == '/');
Assert(ch == '/');
ch = stream_next_char(stream);
lexer_next_pos(lexer);
Assert (ch == '*');
Assert(ch == '*');
// 我们已经识别了 "/*",现在需要找到 "*/"
while (1) {
ch = stream_next_char(stream);
@@ -136,7 +138,7 @@ static void skip_block_comment(smcc_lexer_t* lexer, lexer_tok_t* token) {
LEX_WARN("Unterminated block comment");
return;
}
// LEX_ERROR("%c", ch);
// 更新位置信息
@@ -149,10 +151,10 @@ static void skip_block_comment(smcc_lexer_t* lexer, lexer_tok_t* token) {
if (next_ch == '/') {
// 消费 '/' 字符
stream_next_char(stream);
// 更新位置信息
lexer_next_pos(lexer);
// 成功找到注释结束标记
return;
}
@@ -163,24 +165,36 @@ static void skip_block_comment(smcc_lexer_t* lexer, lexer_tok_t* token) {
// TODO escape character not enough
static inline int got_slash(int peek) {
switch (peek) {
case '\\': return '\\';
case '\'': return '\'';
case '\"': return '\"';
case '\?': return '\?';
case '0': return '\0';
case '\\':
return '\\';
case '\'':
return '\'';
case '\"':
return '\"';
case '\?':
return '\?';
case '0':
return '\0';
case 'b': return '\b';
case 'f': return '\f';
case 'n': return '\n';
case 'r': return '\r';
case 't': return '\t';
case 'v': return '\v';
default: break;
case 'b':
return '\b';
case 'f':
return '\f';
case 'n':
return '\n';
case 'r':
return '\r';
case 't':
return '\t';
case 'v':
return '\v';
default:
break;
}
return -1;
}
static void parse_char(smcc_lexer_t* lexer, lexer_tok_t* token) {
static void parse_char(smcc_lexer_t *lexer, lexer_tok_t *token) {
token->loc = lexer->pos;
token->type = TOKEN_CHAR_LITERAL;
core_stream_t *stream = lexer->stream;
@@ -226,7 +240,7 @@ ERR:
set_err_token(token);
}
static void parse_string(smcc_lexer_t* lexer, lexer_tok_t* token) {
static void parse_string(smcc_lexer_t *lexer, lexer_tok_t *token) {
token->loc = lexer->pos;
token->type = TOKEN_STRING_LITERAL;
core_stream_t *stream = lexer->stream;
@@ -242,12 +256,12 @@ static void parse_string(smcc_lexer_t* lexer, lexer_tok_t* token) {
}
stream_next_char(stream);
lexer_next_pos(lexer);
int base = 0;
cstring_t str = cstring_new();
while (1) {
ch = stream_peek_char(stream);
if (ch == core_stream_eof) {
LEX_ERROR("Unexpected EOF at string literal");
break;
@@ -276,14 +290,14 @@ static void parse_string(smcc_lexer_t* lexer, lexer_tok_t* token) {
cstring_push(&str, ch);
}
token->value.cstr.data = (char*)cstring_as_cstr(&str);
token->value.cstr.data = (char *)cstring_as_cstr(&str);
token->value.cstr.len = cstring_len(&str);
return;
ERR:
set_err_token(token);
}
static void parse_number(smcc_lexer_t* lexer, lexer_tok_t* token) {
static void parse_number(smcc_lexer_t *lexer, lexer_tok_t *token) {
token->loc = lexer->pos;
core_stream_t *stream = lexer->stream;
stream_reset_char(stream);
@@ -354,7 +368,7 @@ ERR:
set_err_token(token);
}
static void parse_line(smcc_lexer_t* lexer, lexer_tok_t* token) {
static void parse_line(smcc_lexer_t *lexer, lexer_tok_t *token) {
token->loc = lexer->pos;
core_stream_t *stream = lexer->stream;
stream_reset_char(stream);
@@ -374,7 +388,8 @@ static void parse_line(smcc_lexer_t* lexer, lexer_tok_t* token) {
ch = stream_next_char(stream);
lexer_next_pos(lexer);
if (ch != line[i]) {
LEX_WARN("Maroc does not support in lexer rather in preprocessor, it will be ignored");
LEX_WARN("Maroc does not support in lexer rather in preprocessor, "
"it will be ignored");
goto SKIP_LINE;
}
}
@@ -414,7 +429,7 @@ ERR:
}
// /zh/c/language/operator_arithmetic.html
void lexer_get_token(smcc_lexer_t* lexer, lexer_tok_t* token) {
void lexer_get_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
token->loc = lexer->pos;
token->type = TOKEN_UNKNOWN;
core_stream_t *stream = lexer->stream;
@@ -427,122 +442,213 @@ void lexer_get_token(smcc_lexer_t* lexer, lexer_tok_t* token) {
switch (ch) {
case '=':
switch (stream_peek_char(stream)) {
case '=': type = TOKEN_EQ; goto double_char;
default: stream_reset_char(stream), type = TOKEN_ASSIGN; break;
} break;
case '=':
type = TOKEN_EQ;
goto double_char;
default:
stream_reset_char(stream), type = TOKEN_ASSIGN;
break;
}
break;
case '+':
switch (stream_peek_char(stream)) {
case '+': type = TOKEN_ADD_ADD; goto double_char;
case '=': type = TOKEN_ASSIGN_ADD; goto double_char;
default: stream_reset_char(stream), type = TOKEN_ADD; break;
} break;
case '+':
type = TOKEN_ADD_ADD;
goto double_char;
case '=':
type = TOKEN_ASSIGN_ADD;
goto double_char;
default:
stream_reset_char(stream), type = TOKEN_ADD;
break;
}
break;
case '-':
switch (stream_peek_char(stream)) {
case '-': type = TOKEN_SUB_SUB; goto double_char;
case '=': type = TOKEN_ASSIGN_SUB; goto double_char;
case '>': type = TOKEN_DEREF; goto double_char;
default: stream_reset_char(stream), type = TOKEN_SUB; break;
} break;
case '-':
type = TOKEN_SUB_SUB;
goto double_char;
case '=':
type = TOKEN_ASSIGN_SUB;
goto double_char;
case '>':
type = TOKEN_DEREF;
goto double_char;
default:
stream_reset_char(stream), type = TOKEN_SUB;
break;
}
break;
case '*':
switch (stream_peek_char(stream)) {
case '=': type = TOKEN_ASSIGN_MUL; goto double_char;
default: stream_reset_char(stream), type = TOKEN_MUL; break;
} break;
case '=':
type = TOKEN_ASSIGN_MUL;
goto double_char;
default:
stream_reset_char(stream), type = TOKEN_MUL;
break;
}
break;
case '/':
switch (stream_peek_char(stream)) {
case '=': type = TOKEN_ASSIGN_DIV; goto double_char;
case '/': skip_newline(lexer, token); goto END;
case '*': skip_block_comment(lexer, token); goto END;
default: stream_reset_char(stream), type = TOKEN_DIV; break;
} break;
case '=':
type = TOKEN_ASSIGN_DIV;
goto double_char;
case '/':
skip_newline(lexer, token);
goto END;
case '*':
skip_block_comment(lexer, token);
goto END;
default:
stream_reset_char(stream), type = TOKEN_DIV;
break;
}
break;
case '%':
switch (stream_peek_char(stream)) {
case '=': type = TOKEN_ASSIGN_MOD; goto double_char;
default: stream_reset_char(stream), type = TOKEN_MOD; break;
} break;
case '=':
type = TOKEN_ASSIGN_MOD;
goto double_char;
default:
stream_reset_char(stream), type = TOKEN_MOD;
break;
}
break;
case '&':
switch (stream_peek_char(stream)) {
case '&': type = TOKEN_AND_AND; goto double_char;
case '=': type = TOKEN_ASSIGN_AND; goto double_char;
default: stream_reset_char(stream), type = TOKEN_AND; break;
} break;
case '&':
type = TOKEN_AND_AND;
goto double_char;
case '=':
type = TOKEN_ASSIGN_AND;
goto double_char;
default:
stream_reset_char(stream), type = TOKEN_AND;
break;
}
break;
case '|':
switch (stream_peek_char(stream)) {
case '|': type = TOKEN_OR_OR; goto double_char;
case '=': type = TOKEN_ASSIGN_OR; goto double_char;
default: stream_reset_char(stream), type = TOKEN_OR; break;
} break;
case '|':
type = TOKEN_OR_OR;
goto double_char;
case '=':
type = TOKEN_ASSIGN_OR;
goto double_char;
default:
stream_reset_char(stream), type = TOKEN_OR;
break;
}
break;
case '^':
switch (stream_peek_char(stream)) {
case '=': type = TOKEN_ASSIGN_XOR; goto double_char;
default: stream_reset_char(stream), type = TOKEN_XOR; break;
} break;
case '=':
type = TOKEN_ASSIGN_XOR;
goto double_char;
default:
stream_reset_char(stream), type = TOKEN_XOR;
break;
}
break;
case '<':
switch (stream_peek_char(stream)) {
case '=': type = TOKEN_LE; goto double_char;
case '<': {
if (stream_peek_char(stream) == '=') {
type = TOKEN_ASSIGN_L_SH;
goto triple_char;
} else {
type = TOKEN_L_SH;
goto double_char;
}
break;
case '=':
type = TOKEN_LE;
goto double_char;
case '<': {
if (stream_peek_char(stream) == '=') {
type = TOKEN_ASSIGN_L_SH;
goto triple_char;
} else {
type = TOKEN_L_SH;
goto double_char;
}
default: stream_reset_char(stream), type = TOKEN_LT; break;
} break;
break;
}
default:
stream_reset_char(stream), type = TOKEN_LT;
break;
}
break;
case '>':
switch (stream_peek_char(stream)) {
case '=': type = TOKEN_GE; goto double_char;
case '>': {
if (stream_peek_char(stream) == '=') {
type = TOKEN_ASSIGN_R_SH;
goto triple_char;
} else {
type = TOKEN_R_SH;
goto double_char;
}
break;
case '=':
type = TOKEN_GE;
goto double_char;
case '>': {
if (stream_peek_char(stream) == '=') {
type = TOKEN_ASSIGN_R_SH;
goto triple_char;
} else {
type = TOKEN_R_SH;
goto double_char;
}
default: stream_reset_char(stream), type = TOKEN_GT; break;
} break;
break;
}
default:
stream_reset_char(stream), type = TOKEN_GT;
break;
}
break;
case '~':
type = TOKEN_BIT_NOT; break;
type = TOKEN_BIT_NOT;
break;
case '!':
switch (stream_peek_char(stream)) {
case '=': type = TOKEN_NEQ; goto double_char;
default: stream_reset_char(stream), type = TOKEN_NOT; break;
} break;
case '=':
type = TOKEN_NEQ;
goto double_char;
default:
stream_reset_char(stream), type = TOKEN_NOT;
break;
}
break;
case '[':
type = TOKEN_L_BRACKET; break;
type = TOKEN_L_BRACKET;
break;
case ']':
type = TOKEN_R_BRACKET; break;
type = TOKEN_R_BRACKET;
break;
case '(':
type = TOKEN_L_PAREN; break;
type = TOKEN_L_PAREN;
break;
case ')':
type = TOKEN_R_PAREN; break;
type = TOKEN_R_PAREN;
break;
case '{':
type = TOKEN_L_BRACE; break;
type = TOKEN_L_BRACE;
break;
case '}':
type = TOKEN_R_BRACE; break;
type = TOKEN_R_BRACE;
break;
case ';':
type = TOKEN_SEMICOLON; break;
type = TOKEN_SEMICOLON;
break;
case ',':
type = TOKEN_COMMA; break;
type = TOKEN_COMMA;
break;
case ':':
type = TOKEN_COLON; break;
type = TOKEN_COLON;
break;
case '.':
if (stream_peek_char(stream) == '.' && stream_peek_char(stream) == '.') {
if (stream_peek_char(stream) == '.' &&
stream_peek_char(stream) == '.') {
type = TOKEN_ELLIPSIS;
goto triple_char;
}
type = TOKEN_DOT; break;
type = TOKEN_DOT;
break;
case '?':
type = TOKEN_COND; break;
case '\v': case '\r': case '\f':
case ' ': case '\t':
type = TOKEN_BLANK; break;
type = TOKEN_COND;
break;
case '\v':
case '\r':
case '\f':
case ' ':
case '\t':
type = TOKEN_BLANK;
break;
case '\n':
// you need to flush a newline or blank
stream_next_char(stream);
@@ -565,19 +671,22 @@ void lexer_get_token(smcc_lexer_t* lexer, lexer_tok_t* token) {
case '"':
parse_string(lexer, token);
goto END;
/* clang-format off */
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
/* clang-format on */
parse_number(lexer, token);
goto END;
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':case 'Y': case 'Z':
case '_':
/* clang-format off */
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
case 'v': case 'w': case 'x': case 'y': case 'z':
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
case 'V': case 'W': case 'X': case 'Y': case 'Z': case '_':
/* clang-format on */
// TOKEN_IDENT
// TODO
// if ((ch == 'L' && ch == '\'') || (ch == 'L' && ch == '"')) {
@@ -596,13 +705,15 @@ void lexer_get_token(smcc_lexer_t* lexer, lexer_tok_t* token) {
break;
}
int res = keyword_cmp((const char*)str.data, str.len);
int res = keyword_cmp((const char *)str.data, str.size - 1);
if (res == -1) {
token->value.cstr.data = (char*)cstring_as_cstr(&str);
token->value.cstr.data = (char *)cstring_as_cstr(&str);
token->value.cstr.len = cstring_len(&str);
type = TOKEN_IDENT; break;
type = TOKEN_IDENT;
break;
} else {
type = keywords[res].tok; break;
type = keywords[res].tok;
break;
}
default:
LEX_ERROR("unsupport char in sourse code `%c`", ch);
@@ -621,16 +732,17 @@ once_char:
token->type = type;
END:
LEX_DEBUG("get token `%s` in %s:%d:%d", get_tok_name(token->type),
token->loc.name, token->loc.line, token->loc.column);
token->loc.name, token->loc.line, token->loc.column);
}
// lexer_get_token maybe got invalid (with parser)
void lexer_get_valid_token(smcc_lexer_t* lexer, lexer_tok_t* token) {
void lexer_get_valid_token(smcc_lexer_t *lexer, lexer_tok_t *token) {
token_subtype_t type;
do {
lexer_get_token(lexer, token);
type = get_tok_subtype(token->type);
AssertFmt(type != TK_BASIC_INVALID, "Invalid token: `%s` at %s:%d:%d",
get_tok_name(token->type), token->loc.name, token->loc.line, token->loc.column);
get_tok_name(token->type), token->loc.name, token->loc.line,
token->loc.column);
} while (type == TK_BASIC_EMPTYSPACE || type == TK_BASIC_COMMENT);
}

View File

@@ -1,4 +1,2 @@
int main() {
}
int main() {}

View File

@@ -1,7 +1,7 @@
#include <lexer.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
/// gcc -g ../lexer.c ../token.c test_lexer.c -o test_lexer
/*
@@ -20,19 +20,17 @@ tok_tConstant {
int g_num;
int g_num_arr[3];
int main(int argc, char* argv[]) {
int main(int argc, char *argv[]) {
// int num = 0;
if (argc == 3 && strcmp(argv[2], "-nodebug") == 0) {
log_set_level(NULL, LOG_LEVEL_INFO
| LOG_LEVEL_WARN
| LOG_LEVEL_ERROR);
log_set_level(NULL, LOG_LEVEL_INFO | LOG_LEVEL_WARN | LOG_LEVEL_ERROR);
}
const char* file_name = __FILE__;
const char *file_name = __FILE__;
if (argc == 2) {
file_name = argv[1];
}
FILE* fp = fopen(file_name, "rb");
FILE *fp = fopen(file_name, "rb");
if (fp == NULL) {
perror("open file failed");
return 1;
@@ -50,7 +48,7 @@ int main(int argc, char* argv[]) {
return 1;
}
char* buffer = (char*) malloc(fsize);
char *buffer = (char *)malloc(fsize);
usize read_ret = fread(buffer, 1, fsize, fp);
fclose(fp);
@@ -62,7 +60,8 @@ int main(int argc, char* argv[]) {
smcc_lexer_t lexer;
core_mem_stream_t mem_stream = {0};
core_stream_t* stream = core_mem_stream_init(&mem_stream, buffer, fsize, false);
core_stream_t *stream =
core_mem_stream_init(&mem_stream, buffer, fsize, false);
Assert(stream != null);
cstring_clear(&stream->name);
cstring_push_cstr(&stream->name, file_name, strlen(file_name));
@@ -74,7 +73,8 @@ int main(int argc, char* argv[]) {
if (tok.type == TOKEN_EOF) {
break;
}
LOG_DEBUG("token `%s` at %s:%u:%u", get_tok_name(tok.type), tok.loc.name, tok.loc.line, tok.loc.column);
LOG_DEBUG("token `%s` at %s:%u:%u", get_tok_name(tok.type),
tok.loc.name, tok.loc.line, tok.loc.column);
Assert(tok.loc.offset <= fsize);
// LOG_DEBUG("%s", tok.val.str);
// printf("line: %d, column: %d, type: %3d, typename: %s\n",