feat(frontend): 重构词法分析器
- 添加 .gitignore 文件,忽略编译器生成的二进制文件 - 重构 lexer.c 文件,改进了关键字处理和字符串处理 - 更新前端的前端、解析器和 AST 相关文件,以适应新的词法分析器 - 优化了 token 相关的定义和函数,引入了新的 token 类型
This commit is contained in:
parent
05c637e594
commit
2b4857001c
22
.gitignore
vendored
Normal file
22
.gitignore
vendored
Normal file
@ -0,0 +1,22 @@
|
||||
.vscode/
|
||||
|
||||
# smcc compiler generated files
|
||||
*.bin
|
||||
|
||||
# linux binary files
|
||||
*.o
|
||||
*.a
|
||||
*.so
|
||||
*.out
|
||||
|
||||
# windows binary files
|
||||
*.obj
|
||||
*.lib
|
||||
*.dll
|
||||
*.exe
|
||||
|
||||
# developed notes
|
||||
note.md
|
||||
|
||||
# python
|
||||
.venv
|
@ -7,7 +7,7 @@
|
||||
// 指令编码联合体(自动处理小端序)
|
||||
typedef union rv32code {
|
||||
uint32_t code;
|
||||
u8_t bytes[4];
|
||||
uint8_t bytes[4];
|
||||
} rv32code_t;
|
||||
|
||||
#include "../../frontend/frontend.h"
|
||||
|
@ -4,14 +4,16 @@
|
||||
|
||||
ast_node_t* frontend(const char* file, void* stream, sread_fn sread) {
|
||||
init_lib_core();
|
||||
strpool_t strpool;
|
||||
init_strpool(&strpool);
|
||||
|
||||
lexer_t lexer;
|
||||
init_lexer(&lexer, file, stream, sread);
|
||||
init_lexer(&lexer, file, stream, sread, &strpool);
|
||||
|
||||
symtab_t symtab;
|
||||
init_symtab(&symtab);
|
||||
|
||||
parser_t parser;
|
||||
parser_t parser;
|
||||
init_parser(&parser, &lexer, &symtab);
|
||||
parse_prog(&parser);
|
||||
|
||||
|
@ -34,7 +34,7 @@ David Hanson / drh@drhanson.net
|
||||
static const struct {
|
||||
const char* name;
|
||||
enum CSTD_KEYWORD std_type;
|
||||
tok_type_t tok;
|
||||
cc_tktype_t tok;
|
||||
} keywords[] = {
|
||||
#define X(name, std_type, tok, ...) { #name, std_type, tok },
|
||||
KEYWORD_TABLE
|
||||
@ -74,19 +74,17 @@ static inline int keyword_cmp(const char* name, int len) {
|
||||
return -1; // Not a keyword.
|
||||
}
|
||||
|
||||
void init_lexer(lexer_t* lexer, const char* file_name, void* stream, lexer_sread_fn sread) {
|
||||
init_lib_core();
|
||||
|
||||
lexer->cur_ptr = lexer->end_ptr = (unsigned char*)&(lexer->buffer);
|
||||
lexer->index = 1;
|
||||
lexer->line = 1;
|
||||
void init_lexer(lexer_t* lexer, const char* file_name, void* stream, lexer_sread_fn sread, strpool_t* strpool) {
|
||||
lexer->strpool = strpool;
|
||||
lexer->cur_ptr = lexer->end_ptr = (char*)&(lexer->buffer);
|
||||
lexer->loc.fname = strpool_intern(lexer->strpool, file_name);
|
||||
lexer->loc.line = 1;
|
||||
lexer->loc.col = 1;
|
||||
|
||||
lexer->stream = stream;
|
||||
lexer->sread = sread;
|
||||
|
||||
for (int i = 0; i < sizeof(lexer->buffer) / sizeof(lexer->buffer[0]); i++) {
|
||||
lexer->buffer[i] = 0;
|
||||
}
|
||||
rt_memset(lexer->buffer, 0, sizeof(lexer->buffer));
|
||||
}
|
||||
|
||||
static void flush_buffer(lexer_t* lexer) {
|
||||
@ -94,7 +92,7 @@ static void flush_buffer(lexer_t* lexer) {
|
||||
for (int i = 0; i < num; i++) {
|
||||
lexer->buffer[i] = lexer->cur_ptr[i];
|
||||
}
|
||||
lexer->cur_ptr = (unsigned char*)lexer->buffer;
|
||||
lexer->cur_ptr = lexer->buffer;
|
||||
|
||||
int read_size = LEXER_BUFFER_SIZE - num;
|
||||
// TODO rt_size_t to int maybe lose precision
|
||||
@ -128,19 +126,20 @@ static void goto_block_comment(lexer_t* lexer) {
|
||||
flush_buffer(lexer);
|
||||
}
|
||||
|
||||
if (*lexer->cur_ptr == '\0') {
|
||||
if (lexer->cur_ptr[0] == '\0') {
|
||||
break;
|
||||
} else if (lexer->cur_ptr[0] == '*' && lexer->cur_ptr[1] == '/') {
|
||||
lexer->cur_ptr += 2;
|
||||
break;
|
||||
} else {
|
||||
if (lexer->cur_ptr[0] == '\n') lexer->loc.line++;
|
||||
lexer->cur_ptr++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO escape character not enough
|
||||
static char got_slash(unsigned char* peek) {
|
||||
static char got_slash(char* peek) {
|
||||
switch (*peek) {
|
||||
case '\\': return '\\';
|
||||
case '\'': return '\'';
|
||||
@ -162,7 +161,7 @@ static char got_slash(unsigned char* peek) {
|
||||
|
||||
static void parse_char_literal(lexer_t* lexer, tok_t* token) {
|
||||
char val = 0;
|
||||
unsigned char* peek = lexer->cur_ptr + 1;
|
||||
char* peek = lexer->cur_ptr + 1;
|
||||
if (*peek == '\\') {
|
||||
peek++;
|
||||
val = got_slash(peek);
|
||||
@ -172,16 +171,14 @@ static void parse_char_literal(lexer_t* lexer, tok_t* token) {
|
||||
}
|
||||
|
||||
if (*peek++ != '\'') LEX_ERROR("Unclosed character literal");
|
||||
token->val.ch = val;
|
||||
lexer->cur_ptr = peek;
|
||||
token->val.have = 1;
|
||||
token->type = TOKEN_CHAR_LITERAL;
|
||||
token->val.ch = val;
|
||||
}
|
||||
|
||||
static void parse_string_literal(lexer_t* lexer, tok_t* token) {
|
||||
unsigned char* peek = lexer->cur_ptr + 1;
|
||||
char* peek = lexer->cur_ptr + 1;
|
||||
// TODO string literal size check
|
||||
char* dest = token->val.str = rt._malloc(LEXER_MAX_TOKEN_SIZE + 1);
|
||||
static char dest[LEXER_MAX_TOKEN_SIZE + 1];
|
||||
int len = 0;
|
||||
|
||||
while (*peek != '"') {
|
||||
@ -196,14 +193,15 @@ static void parse_string_literal(lexer_t* lexer, tok_t* token) {
|
||||
dest[len++] = *peek++;
|
||||
}
|
||||
dest[len] = '\0';
|
||||
lexer->cur_ptr = peek + 1;
|
||||
token->val.have = 1;
|
||||
token->type = TOKEN_STRING_LITERAL;
|
||||
lexer->cur_ptr = peek + 1; // 1 is `"`
|
||||
lexer->loc.len = len + 2; // 2 is `"` `"`
|
||||
|
||||
token->val.str = strpool_intern(lexer->strpool, dest);
|
||||
}
|
||||
|
||||
// FIXME it write by AI maybe error
|
||||
static void parse_number(lexer_t* lexer, tok_t* token) {
|
||||
unsigned char* peek = lexer->cur_ptr;
|
||||
char* peek = lexer->cur_ptr;
|
||||
int base = 10;
|
||||
int is_float = 0;
|
||||
long long int_val = 0;
|
||||
@ -278,14 +276,15 @@ static void parse_number(lexer_t* lexer, tok_t* token) {
|
||||
}
|
||||
|
||||
// 存储结果
|
||||
// TODO
|
||||
lexer->loc.len = peek - lexer->cur_ptr;
|
||||
lexer->cur_ptr = peek;
|
||||
token->val.have = 1;
|
||||
if (is_float) {
|
||||
token->val.d = float_val;
|
||||
token->type = TOKEN_FLOAT_LITERAL;
|
||||
token->val.f32 = float_val;
|
||||
token->sub_type = TOKEN_FLOAT_LITERAL;
|
||||
} else {
|
||||
token->val.ll = int_val;
|
||||
token->type = TOKEN_INT_LITERAL;
|
||||
token->val.i = int_val;
|
||||
token->sub_type = TOKEN_INT_LITERAL;
|
||||
}
|
||||
}
|
||||
|
||||
@ -296,160 +295,159 @@ void get_token(lexer_t* lexer, tok_t* token) {
|
||||
if (lexer->end_ptr - lexer->cur_ptr < GOT_ONE_TOKEN_BUF_SIZE) {
|
||||
flush_buffer(lexer);
|
||||
}
|
||||
register unsigned char* peek = lexer->cur_ptr;
|
||||
|
||||
// 快速跳过空白符
|
||||
while (*peek == ' ' || *peek == '\t') {
|
||||
if (peek == lexer->end_ptr) {
|
||||
break;
|
||||
}
|
||||
peek++;
|
||||
}
|
||||
if (peek != lexer->cur_ptr) {
|
||||
// To TOKEN_FLUSH
|
||||
lexer->cur_ptr = peek;
|
||||
token->type = TOKEN_FLUSH;
|
||||
}
|
||||
|
||||
tok_type_t tok = TOKEN_INIT;
|
||||
tok_val_t constant;
|
||||
constant.have = 0;
|
||||
|
||||
register char* peek = lexer->cur_ptr;
|
||||
|
||||
cc_tktype_t tk_type = TOKEN_INIT;
|
||||
ctype_t literal = { 0 };
|
||||
|
||||
// once step
|
||||
switch (*peek++) {
|
||||
case '=':
|
||||
case '=':
|
||||
switch (*peek++) {
|
||||
case '=': tok = TOKEN_EQ; break;
|
||||
default: peek--, tok = TOKEN_ASSIGN; break;
|
||||
case '=': tk_type = TOKEN_EQ; break;
|
||||
default: peek--, tk_type = TOKEN_ASSIGN; break;
|
||||
} break;
|
||||
case '+':
|
||||
switch (*peek++) {
|
||||
case '+': tok = TOKEN_ADD_ADD; break;
|
||||
case '=': tok = TOKEN_ASSIGN_ADD; break;
|
||||
default: peek--, tok = TOKEN_ADD; break;
|
||||
case '+': tk_type = TOKEN_ADD_ADD; break;
|
||||
case '=': tk_type = TOKEN_ASSIGN_ADD; break;
|
||||
default: peek--, tk_type = TOKEN_ADD; break;
|
||||
} break;
|
||||
case '-':
|
||||
switch (*peek++) {
|
||||
case '-': tok = TOKEN_SUB_SUB; break;
|
||||
case '=': tok = TOKEN_ASSIGN_SUB; break;
|
||||
case '-': tk_type = TOKEN_SUB_SUB; break;
|
||||
case '=': tk_type = TOKEN_ASSIGN_SUB; break;
|
||||
|
||||
case '>': tok = TOKEN_DEREF; break;
|
||||
default: peek--, tok = TOKEN_SUB; break;
|
||||
case '>': tk_type = TOKEN_DEREF; break;
|
||||
default: peek--, tk_type = TOKEN_SUB; break;
|
||||
} break;
|
||||
case '*':
|
||||
switch (*peek++) {
|
||||
case '=': tok = TOKEN_ASSIGN_MUL; break;
|
||||
default: peek--, tok = TOKEN_MUL; break;
|
||||
case '=': tk_type = TOKEN_ASSIGN_MUL; break;
|
||||
default: peek--, tk_type = TOKEN_MUL; break;
|
||||
} break;
|
||||
case '/':
|
||||
switch (*peek++) {
|
||||
case '=': tok = TOKEN_ASSIGN_DIV; break;
|
||||
case '=': tk_type = TOKEN_ASSIGN_DIV; break;
|
||||
case '/': {
|
||||
// need get a new line to parse
|
||||
goto_newline(lexer);
|
||||
tok = TOKEN_LINE_COMMENT;
|
||||
tk_type = TOKEN_LINE_COMMENT;
|
||||
goto END;
|
||||
}
|
||||
case '*': {
|
||||
lexer->cur_ptr = peek;
|
||||
goto_block_comment(lexer);
|
||||
tok = TOKEN_BLOCK_COMMENT;
|
||||
tk_type = TOKEN_BLOCK_COMMENT;
|
||||
goto END;
|
||||
}
|
||||
default: peek--, tok = TOKEN_DIV; break;
|
||||
default: peek--, tk_type = TOKEN_DIV; break;
|
||||
} break;
|
||||
case '%':
|
||||
switch (*peek++) {
|
||||
case '=': tok = TOKEN_ASSIGN_MOD; break;
|
||||
default: peek--, tok = TOKEN_MOD; break;
|
||||
case '=': tk_type = TOKEN_ASSIGN_MOD; break;
|
||||
default: peek--, tk_type = TOKEN_MOD; break;
|
||||
} break;
|
||||
case '&':
|
||||
switch (*peek++) {
|
||||
case '&': tok = TOKEN_AND_AND; break;
|
||||
case '=': tok = TOKEN_ASSIGN_AND; break;
|
||||
default: peek--, tok = TOKEN_AND; break;
|
||||
case '&': tk_type = TOKEN_AND_AND; break;
|
||||
case '=': tk_type = TOKEN_ASSIGN_AND; break;
|
||||
default: peek--, tk_type = TOKEN_AND; break;
|
||||
} break;
|
||||
case '|':
|
||||
switch (*peek++) {
|
||||
case '|': tok = TOKEN_OR_OR; break;
|
||||
case '=': tok = TOKEN_ASSIGN_OR; break;
|
||||
default: peek--, tok = TOKEN_OR; break;
|
||||
case '|': tk_type = TOKEN_OR_OR; break;
|
||||
case '=': tk_type = TOKEN_ASSIGN_OR; break;
|
||||
default: peek--, tk_type = TOKEN_OR; break;
|
||||
} break;
|
||||
case '^':
|
||||
switch (*peek++) {
|
||||
case '=': tok = TOKEN_ASSIGN_XOR; break;
|
||||
default: peek--, tok = TOKEN_XOR; break;
|
||||
case '=': tk_type = TOKEN_ASSIGN_XOR; break;
|
||||
default: peek--, tk_type = TOKEN_XOR; break;
|
||||
} break;
|
||||
case '<':
|
||||
switch (*peek++) {
|
||||
case '=': tok = TOKEN_LE; break;
|
||||
case '<': tok = (*peek == '=') ? (peek++, TOKEN_ASSIGN_L_SH) : TOKEN_L_SH; break;
|
||||
default: peek--, tok = TOKEN_LT; break;
|
||||
case '=': tk_type = TOKEN_LE; break;
|
||||
case '<': tk_type = (*peek == '=') ? (peek++, TOKEN_ASSIGN_L_SH) : TOKEN_L_SH; break;
|
||||
default: peek--, tk_type = TOKEN_LT; break;
|
||||
} break;
|
||||
case '>':
|
||||
switch (*peek++) {
|
||||
case '=': tok = TOKEN_GE; break;
|
||||
case '>': tok = (*peek == '=') ? (peek++, TOKEN_ASSIGN_R_SH) : TOKEN_R_SH; break;
|
||||
default: peek--, tok = TOKEN_GT; break;
|
||||
case '=': tk_type = TOKEN_GE; break;
|
||||
case '>': tk_type = (*peek == '=') ? (peek++, TOKEN_ASSIGN_R_SH) : TOKEN_R_SH; break;
|
||||
default: peek--, tk_type = TOKEN_GT; break;
|
||||
} break;
|
||||
case '~':
|
||||
tok = TOKEN_BIT_NOT; break;
|
||||
tk_type = TOKEN_BIT_NOT; break;
|
||||
case '!':
|
||||
switch (*peek++) {
|
||||
case '=': tok = TOKEN_NEQ; break;
|
||||
default: peek--, tok = TOKEN_NOT; break;
|
||||
case '=': tk_type = TOKEN_NEQ; break;
|
||||
default: peek--, tk_type = TOKEN_NOT; break;
|
||||
} break;
|
||||
case '[':
|
||||
tok = TOKEN_L_BRACKET; break;
|
||||
tk_type = TOKEN_L_BRACKET; break;
|
||||
case ']':
|
||||
tok = TOKEN_R_BRACKET; break;
|
||||
tk_type = TOKEN_R_BRACKET; break;
|
||||
case '(':
|
||||
tok = TOKEN_L_PAREN; break;
|
||||
tk_type = TOKEN_L_PAREN; break;
|
||||
case ')':
|
||||
tok = TOKEN_R_PAREN; break;
|
||||
tk_type = TOKEN_R_PAREN; break;
|
||||
case '{':
|
||||
tok = TOKEN_L_BRACE; break;
|
||||
tk_type = TOKEN_L_BRACE; break;
|
||||
case '}':
|
||||
tok = TOKEN_R_BRACE; break;
|
||||
tk_type = TOKEN_R_BRACE; break;
|
||||
case ';':
|
||||
tok = TOKEN_SEMICOLON; break;
|
||||
tk_type = TOKEN_SEMICOLON; break;
|
||||
case ',':
|
||||
tok = TOKEN_COMMA; break;
|
||||
tk_type = TOKEN_COMMA; break;
|
||||
case ':':
|
||||
tok = TOKEN_COLON; break;
|
||||
tk_type = TOKEN_COLON; break;
|
||||
case '.':
|
||||
if (peek[0] == '.' && peek[1] == '.') {
|
||||
peek += 2;
|
||||
tok = TOKEN_ELLIPSIS;
|
||||
tk_type = TOKEN_ELLIPSIS;
|
||||
} else {
|
||||
tok = TOKEN_DOT;
|
||||
tk_type = TOKEN_DOT;
|
||||
}
|
||||
break;
|
||||
case '?':
|
||||
tok = TOKEN_COND; break;
|
||||
case '\v': case '\r': case '\f': // FIXME it parse as a blank character
|
||||
tok = TOKEN_FLUSH; break;
|
||||
case '\n':
|
||||
tk_type = TOKEN_COND; break;
|
||||
case '\v': case '\r': case '\f':
|
||||
case ' ': case '\t':
|
||||
tk_type = TOKEN_BLANK; break;
|
||||
case '\n':
|
||||
// you need to flush a newline or blank
|
||||
lexer->line++;
|
||||
tok = TOKEN_FLUSH; break;
|
||||
lexer->loc.line += 1;
|
||||
lexer->loc.col = -1;
|
||||
lexer->loc.len = 1;
|
||||
tk_type = TOKEN_BLANK;
|
||||
break;
|
||||
case '#':
|
||||
LEX_WARN("Marroc does not support in lexer rather in preprocessor, it will be ignored");
|
||||
// TODO make line or file comment to change
|
||||
LEX_WARN("Maroc does not support in lexer rather in preprocessor, it will be ignored");
|
||||
goto_newline(lexer);
|
||||
tok = TOKEN_FLUSH;
|
||||
tk_type = TOKEN_BLANK;
|
||||
goto END;
|
||||
case '\0':
|
||||
// EOF
|
||||
tok = TOKEN_EOF;
|
||||
tk_type = TOKEN_EOF;
|
||||
goto END;
|
||||
case '\'':
|
||||
return parse_char_literal(lexer, token);
|
||||
return;
|
||||
parse_char_literal(lexer, token);
|
||||
literal = token->val;
|
||||
tk_type = TOKEN_CHAR_LITERAL;
|
||||
goto END; break;
|
||||
case '"':
|
||||
return parse_string_literal(lexer, token);
|
||||
parse_string_literal(lexer, token);
|
||||
literal = token->val;
|
||||
tk_type = TOKEN_STRING_LITERAL;
|
||||
goto END; break;
|
||||
case '0': case '1': case '2': case '3': case '4':
|
||||
case '5': case '6': case '7': case '8': case '9':
|
||||
return parse_number(lexer, token);
|
||||
parse_number(lexer, token);
|
||||
// TODO Make it easy
|
||||
literal = token->val;
|
||||
tk_type = token->sub_type;
|
||||
goto END; break;
|
||||
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
|
||||
case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
|
||||
case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
|
||||
@ -475,39 +473,53 @@ void get_token(lexer_t* lexer, tok_t* token) {
|
||||
break;
|
||||
}
|
||||
|
||||
int res = keyword_cmp((const char*)lexer->cur_ptr, peek - (lexer->cur_ptr));
|
||||
int strlen = peek - lexer->cur_ptr;
|
||||
int res = keyword_cmp((const char*)lexer->cur_ptr, strlen);
|
||||
if (res == -1) {
|
||||
int strlen = peek - lexer->cur_ptr;
|
||||
unsigned char* str = rt._malloc(strlen + 1);
|
||||
constant.have = 1;
|
||||
constant.str = (char*)str;
|
||||
for (int i = 0; i < strlen; i++) {
|
||||
str[i] = lexer->cur_ptr[i];
|
||||
}
|
||||
str[strlen] = '\0';
|
||||
constant.have = 1;
|
||||
constant.str = (char*)str;
|
||||
tok = TOKEN_IDENT; break;
|
||||
char prev = lexer->cur_ptr[strlen];
|
||||
lexer->cur_ptr[strlen] = '\0';
|
||||
literal.str = strpool_intern(lexer->strpool, lexer->cur_ptr);
|
||||
lexer->cur_ptr[strlen] = prev;
|
||||
tk_type = TOKEN_IDENT; break;
|
||||
} else {
|
||||
tok = keywords[res].tok; break;
|
||||
tk_type = keywords[res].tok; break;
|
||||
}
|
||||
default:
|
||||
LEX_ERROR("unsupport char in sourse code `%c`", *(lexer->cur_ptr));
|
||||
break;
|
||||
}
|
||||
|
||||
lexer->loc.len = peek - lexer->cur_ptr;
|
||||
lexer->cur_ptr = peek;
|
||||
END:
|
||||
token->val = constant;
|
||||
token->type = tok;
|
||||
LEX_DEBUG("get token `%s` (ch: %c, int: %d)", get_tok_name(token->type), token->val.ch, token->val.i);
|
||||
lexer->loc.col += lexer->loc.len;
|
||||
lexer->loc.len = 0;
|
||||
|
||||
token->val = literal;
|
||||
token->sub_type = tk_type;
|
||||
token->loc = lexer->loc;
|
||||
static const tok_basic_type_t tok_type_map[] = {
|
||||
// 普通token使用#str
|
||||
#define X(str, basic, tok) [tok] = basic,
|
||||
TOKEN_TABLE
|
||||
#undef X
|
||||
|
||||
// 关键字使用#name
|
||||
#define X(name, std, tok) [tok] = TK_BASIC_KEYWORD,
|
||||
KEYWORD_TABLE
|
||||
#undef X
|
||||
};
|
||||
token->type = tok_type_map[tk_type];
|
||||
LEX_DEBUG("get token `%s` in %s:%d:%d", get_tok_name(tk_type),
|
||||
token->loc.fname, token->loc.line, token->loc.col);
|
||||
}
|
||||
|
||||
// get_token maybe got invalid (with parser)
|
||||
void get_valid_token(lexer_t* lexer, tok_t* token) {
|
||||
tok_type_t type;
|
||||
tok_basic_type_t type;
|
||||
do {
|
||||
get_token(lexer, token);
|
||||
type = token->type;
|
||||
} while (type == TOKEN_FLUSH || type == TOKEN_LINE_COMMENT || type == TOKEN_BLOCK_COMMENT);
|
||||
Assert(type != TK_BASIC_INVALID);
|
||||
} while (type == TK_BASIC_WHITESPACE || type == TK_BASIC_COMMENT);
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
#ifndef __SMCC_LEXER_H__
|
||||
#define __SMCC_LEXER_H__
|
||||
#ifndef __SMCC_CC_LEXER_H__
|
||||
#define __SMCC_CC_LEXER_H__
|
||||
|
||||
#include <lib/core.h>
|
||||
#include "token.h"
|
||||
@ -14,25 +14,25 @@ typedef int (*lexer_sread_fn)(void *dst_buf, int dst_size,
|
||||
int elem_size, int count, void *stream);
|
||||
|
||||
typedef struct lexer {
|
||||
int line;
|
||||
int index;
|
||||
// const char current_file_name[LEXER_BUFFER_SIZE+1];
|
||||
loc_t loc;
|
||||
|
||||
unsigned char* cur_ptr; // 当前扫描的字符,但是还没有开始扫描
|
||||
unsigned char* end_ptr; // 缓冲区最后一个字符的下一个位置
|
||||
char* cur_ptr; // 当前扫描的字符,但是还没有开始扫描
|
||||
char* end_ptr; // 缓冲区最后一个字符的下一个位置
|
||||
char buffer[LEXER_BUFFER_SIZE+1];
|
||||
|
||||
lexer_sread_fn sread;
|
||||
void* stream;
|
||||
|
||||
strpool_t* strpool;
|
||||
} lexer_t;
|
||||
|
||||
void init_lexer(lexer_t* lexer, const char* file_name, void* stream,
|
||||
lexer_sread_fn sread);
|
||||
lexer_sread_fn sread, strpool_t* strpool);
|
||||
|
||||
// pure token getter it will included empty token like TOKEN_FLUSH
|
||||
// pure token getter it will included empty token like TOKEN_BLANK
|
||||
void get_token(lexer_t* lexer, tok_t* token);
|
||||
|
||||
// get_token maybe got invalid (with parser as TOKEN_FLUSH)
|
||||
// get_token maybe got invalid (with parser as TOKEN_BLANK)
|
||||
void get_valid_token(lexer_t* lexer, tok_t* token);
|
||||
|
||||
#endif
|
||||
|
@ -3,11 +3,44 @@
|
||||
|
||||
#include <lib/rt/rt.h>
|
||||
|
||||
#define LEX_NOTSET( fmt, ...) LOG_NOTSET("LEXER: " fmt, ##__VA_ARGS__)
|
||||
#define LEX_DEBUG( fmt, ...) LOG_DEBUG("LEXER: " fmt, ##__VA_ARGS__)
|
||||
#define LEX_INFO( fmt, ...) LOG_INFO("LEXER: " fmt, ##__VA_ARGS__)
|
||||
#define LEX_WARN( fmt, ...) LOG_WARN("LEXER: " fmt, ##__VA_ARGS__)
|
||||
#define LEX_ERROR( fmt, ...) LOG_ERROR("LEXER: " fmt, ##__VA_ARGS__)
|
||||
#define LEX_FATAL( fmt, ...) LOG_FATAL("LEXER: " fmt, ##__VA_ARGS__)
|
||||
#ifndef LEX_LOG_LEVEL
|
||||
#define LEX_LOG_LEVEL 4
|
||||
#endif
|
||||
|
||||
#if LEX_LOG_LEVEL <= 1
|
||||
#define LEX_NOTSET( fmt, ...) LOG_NOTSET("LEXER: " fmt, ##__VA_ARGS__)
|
||||
#else
|
||||
#define LEX_NOTSET( fmt, ...)
|
||||
#endif
|
||||
|
||||
#if LEX_LOG_LEVEL <= 2
|
||||
#define LEX_DEBUG( fmt, ...) LOG_DEBUG( "LEXER: " fmt, ##__VA_ARGS__)
|
||||
#else
|
||||
#define LEX_DEBUG( fmt, ...)
|
||||
#endif
|
||||
|
||||
#if LEX_LOG_LEVEL <= 3
|
||||
#define LEX_INFO( fmt, ...) LOG_INFO( "LEXER: " fmt, ##__VA_ARGS__)
|
||||
#else
|
||||
#define LEX_INFO( fmt, ...)
|
||||
#endif
|
||||
|
||||
#if LEX_LOG_LEVEL <= 4
|
||||
#define LEX_WARN( fmt, ...) LOG_WARN( "LEXER: " fmt, ##__VA_ARGS__)
|
||||
#else
|
||||
#define LEX_WARN( fmt, ...)
|
||||
#endif
|
||||
|
||||
#if LEX_LOG_LEVEL <= 5
|
||||
#define LEX_ERROR( fmt, ...) LOG_ERROR("LEXER: " fmt, ##__VA_ARGS__)
|
||||
#else
|
||||
#define LEX_ERROR( fmt, ...)
|
||||
#endif
|
||||
|
||||
#if LEX_LOG_LEVEL <= 6
|
||||
#define LEX_FATAL( fmt, ...) LOG_FATAL("LEXER: " fmt, ##__VA_ARGS__)
|
||||
#else
|
||||
#define LEX_FATAL( fmt, ...)
|
||||
#endif
|
||||
|
||||
#endif // __SMCC_LEXER_LOG_H__
|
||||
|
@ -1,5 +1,5 @@
|
||||
CC = gcc
|
||||
CFLAGS = -g -Wall -I../../../..
|
||||
CFLAGS = -g -Wall -I../../../.. -DLEX_LOG_LEVEL=4
|
||||
SRC = ../lexer.c ../token.c
|
||||
LIB = -L../../../../lib -lcore
|
||||
|
||||
|
@ -38,14 +38,18 @@ int main(int argc, char* argv[]) {
|
||||
printf("open file success\n");
|
||||
|
||||
lexer_t lexer;
|
||||
init_lexer(&lexer, file_name, fp, (lexer_sread_fn)fread_s);
|
||||
strpool_t strpool;
|
||||
init_strpool(&strpool);
|
||||
init_lexer(&lexer, file_name, fp, (lexer_sread_fn)fread_s, &strpool);
|
||||
tok_t tok;
|
||||
|
||||
while (1) {
|
||||
get_valid_token(&lexer, &tok);
|
||||
if (tok.type == TOKEN_EOF) {
|
||||
if (tok.sub_type == TOKEN_EOF) {
|
||||
break;
|
||||
}
|
||||
LOG_DEBUG("tk type `%s` in %s:%d:%d", get_tok_name(tok.sub_type), tok.loc.fname, tok.loc.line, tok.loc.col);
|
||||
// LOG_DEBUG("%s", tok.val.str);
|
||||
// printf("line: %d, column: %d, type: %3d, typename: %s\n",
|
||||
// lexer.line, lexer.index, tok.type, get_tok_name(tok.type));
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
// test_lexer.c
|
||||
#include "../../../../libcore/acutest.h"
|
||||
#include <lib/acutest.h>
|
||||
#include "../lexer.h"
|
||||
#include <string.h>
|
||||
|
||||
@ -13,7 +13,7 @@ int test_read(void *dst_buf, int dst_size, int elem_size, int count, void *strea
|
||||
}
|
||||
|
||||
// 测试辅助函数
|
||||
static inline void test_lexer_string(const char* input, tok_type_t expected_type) {
|
||||
static inline void test_lexer_string(const char* input, cc_tktype_t expected_type) {
|
||||
lexer_t lexer;
|
||||
tok_t token;
|
||||
|
||||
|
@ -52,14 +52,14 @@ tok_t *peek_tok(tok_stream_t *tokbuf) {
|
||||
return &(tokbuf->buf[idx]);
|
||||
}
|
||||
|
||||
tok_type_t peek_tok_type(tok_stream_t* tokbuf) {
|
||||
return peek_tok(tokbuf)->type;
|
||||
cc_tktype_t peek_tok_type(tok_stream_t* tokbuf) {
|
||||
return peek_tok(tokbuf)->sub_type;
|
||||
}
|
||||
|
||||
int expect_pop_tok(tok_stream_t* tokbuf, tok_type_t type) {
|
||||
int expect_pop_tok(tok_stream_t* tokbuf, cc_tktype_t type) {
|
||||
flush_peek_tok(tokbuf);
|
||||
tok_t* tok = peek_tok(tokbuf);
|
||||
if (tok->type != type) {
|
||||
if (tok->sub_type != type) {
|
||||
LEX_ERROR("expected tok `%s` but got `%s`", get_tok_name(type), get_tok_name(tok->type));
|
||||
return 0;
|
||||
} else {
|
||||
@ -71,7 +71,7 @@ int expect_pop_tok(tok_stream_t* tokbuf, tok_type_t type) {
|
||||
// 生成字符串映射(根据需求选择#str或#name)
|
||||
static const char* token_strings[] = {
|
||||
// 普通token使用#str
|
||||
#define X(str, tok) [tok] = #str,
|
||||
#define X(str, basic, tok) [tok] = #str,
|
||||
TOKEN_TABLE
|
||||
#undef X
|
||||
|
||||
@ -81,6 +81,6 @@ static const char* token_strings[] = {
|
||||
#undef X
|
||||
};
|
||||
|
||||
const char* get_tok_name(tok_type_t type) {
|
||||
const char* get_tok_name(cc_tktype_t type) {
|
||||
return token_strings[type];
|
||||
}
|
||||
|
@ -1,5 +1,7 @@
|
||||
#ifndef __TOKEN_H__
|
||||
#define __TOKEN_H__
|
||||
#ifndef __SMCC_CC_TOKEN_H__
|
||||
#define __SMCC_CC_TOKEN_H__
|
||||
|
||||
#include <lib/utils/utils.h>
|
||||
|
||||
enum CSTD_KEYWORD {
|
||||
CSTD_C89,
|
||||
@ -46,68 +48,68 @@ enum CSTD_KEYWORD {
|
||||
// KEYWORD_TABLE
|
||||
|
||||
#define TOKEN_TABLE \
|
||||
X(EOF , TOKEN_EOF) \
|
||||
X(init , TOKEN_INIT) \
|
||||
X(flush , TOKEN_FLUSH) \
|
||||
X("==" , TOKEN_EQ) \
|
||||
X("=" , TOKEN_ASSIGN) \
|
||||
X("++" , TOKEN_ADD_ADD) \
|
||||
X("+=" , TOKEN_ASSIGN_ADD) \
|
||||
X("+" , TOKEN_ADD) \
|
||||
X("--" , TOKEN_SUB_SUB) \
|
||||
X("-=" , TOKEN_ASSIGN_SUB) \
|
||||
X("->" , TOKEN_DEREF) \
|
||||
X("-" , TOKEN_SUB) \
|
||||
X("*=" , TOKEN_ASSIGN_MUL) \
|
||||
X("*" , TOKEN_MUL) \
|
||||
X("/=" , TOKEN_ASSIGN_DIV) \
|
||||
X("/" , TOKEN_DIV) \
|
||||
X("//" , TOKEN_LINE_COMMENT) \
|
||||
X("/* */" , TOKEN_BLOCK_COMMENT) \
|
||||
X("%=" , TOKEN_ASSIGN_MOD) \
|
||||
X("%" , TOKEN_MOD) \
|
||||
X("&&" , TOKEN_AND_AND) \
|
||||
X("&=" , TOKEN_ASSIGN_AND) \
|
||||
X("&" , TOKEN_AND) \
|
||||
X("||" , TOKEN_OR_OR) \
|
||||
X("|=" , TOKEN_ASSIGN_OR) \
|
||||
X("|" , TOKEN_OR) \
|
||||
X("^=" , TOKEN_ASSIGN_XOR) \
|
||||
X("^" , TOKEN_XOR) \
|
||||
X("<<=" , TOKEN_ASSIGN_L_SH) \
|
||||
X("<<" , TOKEN_L_SH) \
|
||||
X("<=" , TOKEN_LE) \
|
||||
X("<" , TOKEN_LT) \
|
||||
X(">>=" , TOKEN_ASSIGN_R_SH) \
|
||||
X(">>" , TOKEN_R_SH) \
|
||||
X(">=" , TOKEN_GE) \
|
||||
X(">" , TOKEN_GT) \
|
||||
X("!" , TOKEN_NOT) \
|
||||
X("!=" , TOKEN_NEQ) \
|
||||
X("~" , TOKEN_BIT_NOT) \
|
||||
X("[" , TOKEN_L_BRACKET) \
|
||||
X("]" , TOKEN_R_BRACKET) \
|
||||
X("(" , TOKEN_L_PAREN) \
|
||||
X(")" , TOKEN_R_PAREN) \
|
||||
X("{" , TOKEN_L_BRACE) \
|
||||
X("}" , TOKEN_R_BRACE) \
|
||||
X(";" , TOKEN_SEMICOLON) \
|
||||
X("," , TOKEN_COMMA) \
|
||||
X(":" , TOKEN_COLON) \
|
||||
X("." , TOKEN_DOT) \
|
||||
X("..." , TOKEN_ELLIPSIS) \
|
||||
X("?" , TOKEN_COND) \
|
||||
X(identifier , TOKEN_IDENT) \
|
||||
X(int_literal , TOKEN_INT_LITERAL) \
|
||||
X(float_literal , TOKEN_FLOAT_LITERAL) \
|
||||
X(char_literal , TOKEN_CHAR_LITERAL) \
|
||||
X(string_literal , TOKEN_STRING_LITERAL) \
|
||||
X(init , TK_BASIC_INVALID, TOKEN_INIT) \
|
||||
X(EOF , TK_BASIC_EOF, TOKEN_EOF) \
|
||||
X(blank , TK_BASIC_WHITESPACE, TOKEN_BLANK) \
|
||||
X("==" , TK_BASIC_OPERATOR, TOKEN_EQ) \
|
||||
X("=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN) \
|
||||
X("++" , TK_BASIC_OPERATOR, TOKEN_ADD_ADD) \
|
||||
X("+=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_ADD) \
|
||||
X("+" , TK_BASIC_OPERATOR, TOKEN_ADD) \
|
||||
X("--" , TK_BASIC_OPERATOR, TOKEN_SUB_SUB) \
|
||||
X("-=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_SUB) \
|
||||
X("->" , TK_BASIC_OPERATOR, TOKEN_DEREF) \
|
||||
X("-" , TK_BASIC_OPERATOR, TOKEN_SUB) \
|
||||
X("*=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_MUL) \
|
||||
X("*" , TK_BASIC_OPERATOR, TOKEN_MUL) \
|
||||
X("/=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_DIV) \
|
||||
X("/" , TK_BASIC_OPERATOR, TOKEN_DIV) \
|
||||
X("//" , TK_BASIC_COMMENT , TOKEN_LINE_COMMENT) \
|
||||
X("/* */" , TK_BASIC_COMMENT , TOKEN_BLOCK_COMMENT) \
|
||||
X("%=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_MOD) \
|
||||
X("%" , TK_BASIC_OPERATOR, TOKEN_MOD) \
|
||||
X("&&" , TK_BASIC_OPERATOR, TOKEN_AND_AND) \
|
||||
X("&=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_AND) \
|
||||
X("&" , TK_BASIC_OPERATOR, TOKEN_AND) \
|
||||
X("||" , TK_BASIC_OPERATOR, TOKEN_OR_OR) \
|
||||
X("|=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_OR) \
|
||||
X("|" , TK_BASIC_OPERATOR, TOKEN_OR) \
|
||||
X("^=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_XOR) \
|
||||
X("^" , TK_BASIC_OPERATOR, TOKEN_XOR) \
|
||||
X("<<=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_L_SH) \
|
||||
X("<<" , TK_BASIC_OPERATOR, TOKEN_L_SH) \
|
||||
X("<=" , TK_BASIC_OPERATOR, TOKEN_LE) \
|
||||
X("<" , TK_BASIC_OPERATOR, TOKEN_LT) \
|
||||
X(">>=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_R_SH) \
|
||||
X(">>" , TK_BASIC_OPERATOR, TOKEN_R_SH) \
|
||||
X(">=" , TK_BASIC_OPERATOR, TOKEN_GE) \
|
||||
X(">" , TK_BASIC_OPERATOR, TOKEN_GT) \
|
||||
X("!" , TK_BASIC_OPERATOR, TOKEN_NOT) \
|
||||
X("!=" , TK_BASIC_OPERATOR, TOKEN_NEQ) \
|
||||
X("~" , TK_BASIC_OPERATOR, TOKEN_BIT_NOT) \
|
||||
X("[" , TK_BASIC_OPERATOR, TOKEN_L_BRACKET) \
|
||||
X("]" , TK_BASIC_OPERATOR, TOKEN_R_BRACKET) \
|
||||
X("(" , TK_BASIC_OPERATOR, TOKEN_L_PAREN) \
|
||||
X(")" , TK_BASIC_OPERATOR, TOKEN_R_PAREN) \
|
||||
X("{" , TK_BASIC_OPERATOR, TOKEN_L_BRACE) \
|
||||
X("}" , TK_BASIC_OPERATOR, TOKEN_R_BRACE) \
|
||||
X(";" , TK_BASIC_OPERATOR, TOKEN_SEMICOLON) \
|
||||
X("," , TK_BASIC_OPERATOR, TOKEN_COMMA) \
|
||||
X(":" , TK_BASIC_OPERATOR, TOKEN_COLON) \
|
||||
X("." , TK_BASIC_OPERATOR, TOKEN_DOT) \
|
||||
X("..." , TK_BASIC_OPERATOR, TOKEN_ELLIPSIS) \
|
||||
X("?" , TK_BASIC_OPERATOR, TOKEN_COND) \
|
||||
X(ident , TK_BASIC_IDENTIFIER, TOKEN_IDENT) \
|
||||
X(int_literal , TK_BASIC_LITERAL, TOKEN_INT_LITERAL) \
|
||||
X(float_literal , TK_BASIC_LITERAL, TOKEN_FLOAT_LITERAL) \
|
||||
X(char_literal , TK_BASIC_LITERAL, TOKEN_CHAR_LITERAL) \
|
||||
X(string_literal , TK_BASIC_LITERAL, TOKEN_STRING_LITERAL) \
|
||||
// END
|
||||
|
||||
// 定义TokenType枚举
|
||||
typedef enum tok_type {
|
||||
typedef enum cc_tktype {
|
||||
// 处理普通token
|
||||
#define X(str, tok) tok,
|
||||
#define X(str, basic, tok) tok,
|
||||
TOKEN_TABLE
|
||||
#undef X
|
||||
|
||||
@ -115,24 +117,7 @@ typedef enum tok_type {
|
||||
#define X(name, std, tok) tok,
|
||||
KEYWORD_TABLE
|
||||
#undef X
|
||||
} tok_type_t;
|
||||
|
||||
typedef struct tok_val {
|
||||
int have;
|
||||
union {
|
||||
char ch;
|
||||
int i;
|
||||
float f;
|
||||
double d;
|
||||
long long ll;
|
||||
char* str;
|
||||
};
|
||||
} tok_val_t;
|
||||
|
||||
typedef struct tok {
|
||||
tok_type_t type;
|
||||
tok_val_t val;
|
||||
} tok_t;
|
||||
} cc_tktype_t;
|
||||
|
||||
typedef struct tok_stream {
|
||||
int cur;
|
||||
@ -150,8 +135,8 @@ void init_tokbuf(tok_stream_t* tokbuf, void* stream, tok_stream_get_func gettok)
|
||||
tok_t* peek_tok(tok_stream_t* tokbuf);
|
||||
tok_t* pop_tok(tok_stream_t* tokbuf);
|
||||
void flush_peek_tok(tok_stream_t* tokbuf);
|
||||
tok_type_t peek_tok_type(tok_stream_t* tokbuf);
|
||||
int expect_pop_tok(tok_stream_t* tokbuf, tok_type_t type);
|
||||
const char* get_tok_name(tok_type_t type);
|
||||
cc_tktype_t peek_tok_type(tok_stream_t* tokbuf);
|
||||
int expect_pop_tok(tok_stream_t* tokbuf, cc_tktype_t type);
|
||||
const char* get_tok_name(cc_tktype_t type);
|
||||
|
||||
#endif
|
||||
|
@ -19,7 +19,7 @@ ast_node_t* parse_block(parser_t* parser) {
|
||||
symtab_enter_scope(parser->symtab);
|
||||
tok_stream_t *tokbuf = &parser->tokbuf;
|
||||
flush_peek_tok(tokbuf);
|
||||
tok_type_t ttype;
|
||||
cc_tktype_t ttype;
|
||||
ast_node_t* node = new_ast_node_block();
|
||||
|
||||
expect_pop_tok(tokbuf, TOKEN_L_BRACE);
|
||||
|
@ -37,7 +37,7 @@ int peek_decl(tok_stream_t* tokbuf) {
|
||||
|
||||
ast_node_t* parse_decl_val(parser_t* parser) {
|
||||
tok_stream_t* tokbuf = &parser->tokbuf;
|
||||
tok_type_t ttype;
|
||||
cc_tktype_t ttype;
|
||||
flush_peek_tok(tokbuf);
|
||||
|
||||
ast_node_t* node;
|
||||
@ -69,7 +69,7 @@ ast_node_t* parse_decl_val(parser_t* parser) {
|
||||
ast_node_t* parse_decl(parser_t* parser) {
|
||||
tok_stream_t* tokbuf = &parser->tokbuf;
|
||||
flush_peek_tok(tokbuf);
|
||||
tok_type_t ttype;
|
||||
cc_tktype_t ttype;
|
||||
ast_node_t* node;
|
||||
|
||||
if (peek_decl(tokbuf) == 0) {
|
||||
|
@ -82,7 +82,7 @@ static ast_node_t* parse_comma(tok_stream_t* tokbuf, symtab_t *symtab, ast_node_
|
||||
|
||||
static ast_node_t* parse_assign(tok_stream_t* tokbuf, symtab_t *symtab, ast_node_t* left) {
|
||||
flush_peek_tok(tokbuf);
|
||||
tok_type_t ttype = peek_tok_type(tokbuf);
|
||||
cc_tktype_t ttype = peek_tok_type(tokbuf);
|
||||
pop_tok(tokbuf);
|
||||
ast_node_t* node = new_ast_node();
|
||||
node->type = NT_ASSIGN;
|
||||
@ -133,7 +133,7 @@ static ast_node_t* parse_assign(tok_stream_t* tokbuf, symtab_t *symtab, ast_node
|
||||
|
||||
static ast_node_t* parse_cmp(tok_stream_t* tokbuf, symtab_t *symtab, ast_node_t* left) {
|
||||
flush_peek_tok(tokbuf);
|
||||
tok_type_t ttype = peek_tok_type(tokbuf);
|
||||
cc_tktype_t ttype = peek_tok_type(tokbuf);
|
||||
pop_tok(tokbuf);
|
||||
ast_node_t* node = new_ast_node();
|
||||
// saved left
|
||||
@ -171,7 +171,7 @@ static ast_node_t* parse_cmp(tok_stream_t* tokbuf, symtab_t *symtab, ast_node_t*
|
||||
|
||||
static ast_node_t* parse_cal(tok_stream_t* tokbuf, symtab_t *symtab, ast_node_t* left) {
|
||||
flush_peek_tok(tokbuf);
|
||||
tok_type_t ttype = peek_tok_type(tokbuf);
|
||||
cc_tktype_t ttype = peek_tok_type(tokbuf);
|
||||
pop_tok(tokbuf);
|
||||
ast_node_t* node = new_ast_node();
|
||||
node->expr.left = left;
|
||||
@ -238,7 +238,7 @@ static ast_node_t* parse_call(tok_stream_t* tokbuf, symtab_t *symtab, ast_node_t
|
||||
vector_init(node->call.params->params.params);
|
||||
pop_tok(tokbuf); // 跳过 '('
|
||||
|
||||
tok_type_t ttype;
|
||||
cc_tktype_t ttype;
|
||||
while (1) {
|
||||
flush_peek_tok(tokbuf);
|
||||
ttype = peek_tok_type(tokbuf);
|
||||
@ -330,7 +330,7 @@ static ast_node_t *parse_primary_expression(tok_stream_t* tokbuf, symtab_t *symt
|
||||
node->type = NT_TERM_VAL;
|
||||
node->syms.tok = *tok;
|
||||
|
||||
switch (tok->type) {
|
||||
switch (tok->sub_type) {
|
||||
case TOKEN_INT_LITERAL:
|
||||
// node->data.data_type = TYPE_INT;
|
||||
break;
|
||||
@ -344,7 +344,7 @@ static ast_node_t *parse_primary_expression(tok_stream_t* tokbuf, symtab_t *symt
|
||||
// node->data.data_type = TYPE_POINTER;
|
||||
case TOKEN_IDENT:
|
||||
node = expect_pop_ident(tokbuf);
|
||||
tok_type_t ttype = peek_tok_type(tokbuf);
|
||||
cc_tktype_t ttype = peek_tok_type(tokbuf);
|
||||
if (ttype == TOKEN_L_PAREN) {
|
||||
node = parse_call(tokbuf, symtab, node);
|
||||
} else {
|
||||
@ -365,7 +365,7 @@ END:
|
||||
}
|
||||
|
||||
static ast_node_t *parse_subexpression(tok_stream_t* tokbuf, symtab_t *symtab, enum Precedence prec) {
|
||||
tok_type_t ttype;
|
||||
cc_tktype_t ttype;
|
||||
struct expr_prec_table_t* work;
|
||||
ast_node_t* left;
|
||||
|
||||
@ -400,7 +400,7 @@ ast_node_t* parse_expr(parser_t* parser) {
|
||||
tok_stream_t* tokbuf = &(parser->tokbuf);
|
||||
symtab_t *symtab = parser->symtab;
|
||||
flush_peek_tok(tokbuf);
|
||||
tok_type_t ttype = peek_tok_type(tokbuf);
|
||||
cc_tktype_t ttype = peek_tok_type(tokbuf);
|
||||
switch (ttype) {
|
||||
case TOKEN_NOT:
|
||||
case TOKEN_AND:
|
||||
|
@ -9,7 +9,7 @@
|
||||
// TODO 语义分析压入符号表
|
||||
static void parse_params(parser_t* parser, tok_stream_t* cache, ast_node_t* node) {
|
||||
flush_peek_tok(cache);
|
||||
tok_type_t ttype;
|
||||
cc_tktype_t ttype;
|
||||
ast_node_t *params = new_ast_node();
|
||||
node->decl_func.params = params;
|
||||
vector_init(params->params.params);
|
||||
@ -79,7 +79,7 @@ ast_type_t check_is_func_decl(tok_stream_t* tokbuf, tok_stream_t* cache) {
|
||||
LOG_ERROR("function parameter list too long");
|
||||
}
|
||||
cache->buf[cache->size++] = *tok;
|
||||
switch (tok->type) {
|
||||
switch (tok->sub_type) {
|
||||
case TOKEN_L_PAREN:
|
||||
depth++;
|
||||
break;
|
||||
|
@ -4,7 +4,7 @@
|
||||
ast_node_t* parse_stmt(parser_t* parser) {
|
||||
tok_stream_t* tokbuf = &parser->tokbuf;
|
||||
flush_peek_tok(tokbuf);
|
||||
tok_type_t ttype = peek_tok_type(tokbuf);
|
||||
cc_tktype_t ttype = peek_tok_type(tokbuf);
|
||||
ast_node_t* node = new_ast_node();
|
||||
switch (ttype) {
|
||||
case TOKEN_IF: {
|
||||
|
@ -3,8 +3,8 @@
|
||||
#include "../type.h"
|
||||
|
||||
ast_node_t* new_ast_ident_node(tok_t* tok) {
|
||||
if (tok->type != TOKEN_IDENT) {
|
||||
LOG_ERROR("syntax error: want identifier but got %d", tok->type);
|
||||
if (tok->sub_type != TOKEN_IDENT) {
|
||||
LOG_ERROR("syntax error: want identifier but got %d", tok->sub_type);
|
||||
}
|
||||
ast_node_t* node = new_ast_node();
|
||||
node->type = NT_TERM_IDENT;
|
||||
@ -24,7 +24,7 @@ ast_node_t* expect_pop_ident(tok_stream_t* tokbuf) {
|
||||
ast_node_t* parse_type(parser_t* parser) {
|
||||
tok_stream_t* tokbuf = &parser->tokbuf;
|
||||
flush_peek_tok(tokbuf);
|
||||
tok_type_t ttype = peek_tok_type(tokbuf);
|
||||
cc_tktype_t ttype = peek_tok_type(tokbuf);
|
||||
data_type_t dtype;
|
||||
switch(ttype) {
|
||||
case TOKEN_VOID: dtype = TYPE_VOID; break;
|
||||
|
@ -1,53 +0,0 @@
|
||||
// hashmap.c
|
||||
#include "hashmap.h"
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
// DJB2哈希算法
|
||||
static unsigned long hash(const char* str) {
|
||||
unsigned long hash = 5381;
|
||||
int c;
|
||||
while ((c = *str++))
|
||||
hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
|
||||
return hash % HMAP_SIZE;
|
||||
}
|
||||
|
||||
void hmap_init(HashMap* map) {
|
||||
memset(map->buckets, 0, sizeof(map->buckets));
|
||||
}
|
||||
|
||||
void hmap_put(HashMap* map, const char* key, void* value) {
|
||||
unsigned long idx = hash(key);
|
||||
HashMapEntry* entry = malloc(sizeof(HashMapEntry));
|
||||
entry->key = strdup(key);
|
||||
entry->value = value;
|
||||
entry->next = map->buckets[idx];
|
||||
map->buckets[idx] = entry;
|
||||
}
|
||||
|
||||
void* hmap_get(HashMap* map, const char* key) {
|
||||
unsigned long idx = hash(key);
|
||||
HashMapEntry* entry = map->buckets[idx];
|
||||
while (entry) {
|
||||
if (strcmp(entry->key, key) == 0)
|
||||
return entry->value;
|
||||
entry = entry->next;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int hmap_contains(HashMap* map, const char* key) {
|
||||
return hmap_get(map, key) != NULL;
|
||||
}
|
||||
|
||||
void hmap_destroy(HashMap* map) {
|
||||
for (int i = 0; i < HMAP_SIZE; i++) {
|
||||
HashMapEntry* entry = map->buckets[i];
|
||||
while (entry) {
|
||||
HashMapEntry* next = entry->next;
|
||||
free(entry->key);
|
||||
free(entry);
|
||||
entry = next;
|
||||
}
|
||||
}
|
||||
}
|
@ -1,31 +0,0 @@
|
||||
#ifndef HASHMAP_H
|
||||
#define HASHMAP_H
|
||||
|
||||
#define HMAP_SIZE 64
|
||||
|
||||
typedef struct HashMapEntry {
|
||||
char* key;
|
||||
void* value;
|
||||
struct HashMapEntry* next;
|
||||
} HashMapEntry;
|
||||
|
||||
typedef struct {
|
||||
HashMapEntry* buckets[HMAP_SIZE];
|
||||
} HashMap;
|
||||
|
||||
// 初始化哈希表
|
||||
void hmap_init(HashMap* map);
|
||||
|
||||
// 插入键值对
|
||||
void hmap_put(HashMap* map, const char* key, void* value);
|
||||
|
||||
// 查找键值
|
||||
void* hmap_get(HashMap* map, const char* key);
|
||||
|
||||
// 检查键是否存在
|
||||
int hmap_contains(HashMap* map, const char* key);
|
||||
|
||||
// 释放哈希表内存(不释放value)
|
||||
void hmap_destroy(HashMap* map);
|
||||
|
||||
#endif
|
@ -1,43 +0,0 @@
|
||||
// scope.c
|
||||
#include "scope.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
typedef struct Scope Scope;
|
||||
|
||||
Scope* scope_create(Scope* parent) {
|
||||
Scope* scope = malloc(sizeof(Scope));
|
||||
hmap_init(&scope->symbols);
|
||||
scope->parent = parent;
|
||||
scope->base_offset = 0;
|
||||
scope->cur_offset = 0;
|
||||
return scope;
|
||||
}
|
||||
|
||||
void scope_destroy(Scope* scope) {
|
||||
hmap_destroy(&scope->symbols);
|
||||
free(scope);
|
||||
}
|
||||
|
||||
void scope_insert(Scope* scope, const char* name, void* symbol) {
|
||||
if (hmap_contains(&scope->symbols, name)) {
|
||||
// 处理重复定义错误
|
||||
fprintf(stderr, "Error: Symbol '%s' already defined\n", name);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
hmap_put(&scope->symbols, name, symbol);
|
||||
}
|
||||
|
||||
void* scope_lookup(Scope* scope, const char* name) {
|
||||
void* symbol = NULL;
|
||||
while (scope) {
|
||||
symbol = hmap_get(&scope->symbols, name);
|
||||
if (symbol) break;
|
||||
scope = scope->parent;
|
||||
}
|
||||
return symbol;
|
||||
}
|
||||
|
||||
void* scope_lookup_current(Scope* scope, const char* name) {
|
||||
return hmap_get(&scope->symbols, name);
|
||||
}
|
@ -1,28 +0,0 @@
|
||||
#ifndef SCOPE_H
|
||||
#define SCOPE_H
|
||||
|
||||
#include "hashmap.h"
|
||||
|
||||
struct Scope {
|
||||
HashMap symbols; // 当前作用域符号表
|
||||
struct Scope* parent; // 上层作用域
|
||||
int base_offset;
|
||||
int cur_offset;
|
||||
};
|
||||
|
||||
// 创建新作用域(父作用域可为NULL)
|
||||
struct Scope* scope_create(struct Scope* parent);
|
||||
|
||||
// 销毁作用域
|
||||
void scope_destroy(struct Scope* scope);
|
||||
|
||||
// 在当前作用域插入符号
|
||||
void scope_insert(struct Scope* scope, const char* name, void* symbol);
|
||||
|
||||
// 逐级查找符号
|
||||
void* scope_lookup(struct Scope* scope, const char* name);
|
||||
|
||||
// 仅在当前作用域查找
|
||||
void* scope_lookup_current(struct Scope* scope, const char* name);
|
||||
|
||||
#endif
|
@ -1,50 +0,0 @@
|
||||
// symtab.c
|
||||
#include "../../frontend.h"
|
||||
#include <lib/core.h>
|
||||
#include "scope.h"
|
||||
#include "symtab.h"
|
||||
|
||||
typedef symtab_t symtab_t;
|
||||
typedef struct Scope Scope;
|
||||
|
||||
void init_symtab(symtab_t* symtab) {
|
||||
symtab->global_scope = scope_create(NULL);
|
||||
symtab->cur_scope = symtab->global_scope;
|
||||
}
|
||||
|
||||
void del_symtab(symtab_t* symtab) {
|
||||
scope_destroy(symtab->global_scope);
|
||||
}
|
||||
|
||||
void symtab_enter_scope(symtab_t* symtab) {
|
||||
struct Scope* scope = scope_create(symtab->cur_scope);
|
||||
scope->base_offset = symtab->cur_scope->base_offset + symtab->cur_scope->cur_offset;
|
||||
symtab->cur_scope = scope;
|
||||
}
|
||||
|
||||
void symtab_leave_scope(symtab_t* symtab) {
|
||||
Scope * scope = symtab->cur_scope;
|
||||
if (scope == NULL) {
|
||||
LOG_ERROR("cannot leave NULL scope or global scope");
|
||||
}
|
||||
symtab->cur_scope = symtab->cur_scope->parent;
|
||||
scope_destroy(scope);
|
||||
}
|
||||
|
||||
void* symtab_add_symbol(symtab_t* symtab, const char* name, void* ast_node, int can_duplicate) {
|
||||
struct Scope* scope = symtab->cur_scope;
|
||||
void* node = scope_lookup_current(scope, name);
|
||||
if (node != NULL) {
|
||||
if (!can_duplicate) {
|
||||
LOG_ERROR("duplicate symbol %s", name);
|
||||
}
|
||||
return node;
|
||||
}
|
||||
|
||||
scope_insert(scope, name, ast_node);
|
||||
return node;
|
||||
}
|
||||
|
||||
void* symtab_lookup_symbol(symtab_t* symtab, const char* name) {
|
||||
return scope_lookup(symtab->cur_scope, name);
|
||||
}
|
@ -1,18 +0,0 @@
|
||||
// symtab.h
|
||||
#ifndef __SYMTAB_H__
|
||||
#define __SYMTAB_H__
|
||||
|
||||
typedef struct symtab {
|
||||
struct Scope* cur_scope;
|
||||
struct Scope* global_scope;
|
||||
} symtab_t;
|
||||
|
||||
void init_symtab(symtab_t* symtab);
|
||||
void del_symtab(symtab_t* symtab);
|
||||
|
||||
void symtab_enter_scope(symtab_t* symtab);
|
||||
void symtab_leave_scope(symtab_t* symtab);
|
||||
void* symtab_add_symbol(symtab_t* symtab, const char* name, void* ast_node, int can_duplicate);
|
||||
void* symtab_lookup_symbol(symtab_t* symtab, const char* name);
|
||||
|
||||
#endif
|
@ -6,6 +6,7 @@
|
||||
// gcc -g ../parser.c ../../lexer/lexer.c ../ast/ast.c ../ast/block.c ../ast/decl.c ../ast/expr.c ../ast/func.c ../ast/program.c ../ast/stmt.c ../ast/term.c ../symtab/hashmap.c ../symtab/scope.c ../symtab/symtab.c test_parser.c -o test_parser
|
||||
// gcc -g test_parser.c -L../.. -lfrontend -o test_parser
|
||||
int main(int argc, char** argv) {
|
||||
init_lib_core();
|
||||
const char* file_name = "test_file.c";
|
||||
if (argc == 2) {
|
||||
file_name = argv[1];
|
||||
@ -17,8 +18,10 @@ int main(int argc, char** argv) {
|
||||
}
|
||||
printf("open file success\n");
|
||||
|
||||
struct Lexer lexer;
|
||||
init_lexer(&lexer, file_name, fp, (lexer_sread_fn)fread_s);
|
||||
lexer_t lexer;
|
||||
strpool_t strpool;
|
||||
init_strpool(&strpool);
|
||||
init_lexer(&lexer, file_name, fp, (lexer_sread_fn)fread_s, &strpool);
|
||||
|
||||
struct SymbolTable symtab;
|
||||
init_symtab(&symtab);
|
||||
|
15
lib/Makefile
15
lib/Makefile
@ -7,14 +7,27 @@ CFLAGS = -g -Wall -I..
|
||||
RT_DIR = ./rt
|
||||
LOG_DIR = ./rt/log
|
||||
|
||||
# 源文件列表
|
||||
# basic rt lib
|
||||
SRCS = \
|
||||
$(RT_DIR)/std/rt_std.c \
|
||||
./core.c \
|
||||
$(RT_DIR)/rt.c \
|
||||
$(RT_DIR)/rt_alloc.c \
|
||||
$(RT_DIR)/rt_string.c \
|
||||
$(LOG_DIR)/log.c
|
||||
|
||||
# utils lib
|
||||
UTILS_DIR = ./utils
|
||||
DS_DIR = $(UTILS_DIR)/ds
|
||||
STRPOOL_DIR = $(UTILS_DIR)/strpool
|
||||
SYMTAB_DIR = $(UTILS_DIR)/symtab
|
||||
TOKBUF_DIR = $(UTILS_DIR)/tokbuf
|
||||
SRCS += \
|
||||
$(DS_DIR)/hashtable.c \
|
||||
$(STRPOOL_DIR)/strpool.c \
|
||||
# $(SYMTAB_DIR)/symtab.c \
|
||||
# $(TOKBUF_DIR)/tokbuf.c
|
||||
|
||||
# 生成目标文件列表
|
||||
OBJS = $(SRCS:.c=.o)
|
||||
|
||||
|
@ -1,142 +1,129 @@
|
||||
#include "hashtable.h"
|
||||
|
||||
#define LOAD_FACTOR 0.75f
|
||||
// 素数表用于桶扩容(最后一个元素为最大允许容量)
|
||||
static const int PRIME_CAPACITIES[] = {
|
||||
11, 23, 47, 97, 193, 389, 769, 1543, 3079,
|
||||
6151, 12289, 24593, 49157, 98317, 196613, 393241,
|
||||
786433, 1572869, 3145739, 6291469, 12582917, 25165843
|
||||
};
|
||||
#define INIT_HASH_TABLE_SIZE (32)
|
||||
|
||||
// 私有函数声明
|
||||
static u32_t calc_hash(const char* str, int len);
|
||||
static void rehash(hash_table_t* ht);
|
||||
|
||||
hash_table_t* new_hash_table(int init_size, int max_cap) {
|
||||
hash_table_t* ht = salloc_alloc(sizeof(hash_table_t));
|
||||
hash_table_init(ht, init_size, max_cap);
|
||||
return ht;
|
||||
void hashtable_init(hash_table_t* ht) {
|
||||
vector_init(ht->entries);
|
||||
ht->count = 0;
|
||||
ht->tombstone_count = 0;
|
||||
Assert(ht->key_cmp != NULL && ht->hash_func != NULL);
|
||||
}
|
||||
|
||||
static inline get_real_size(int size) {
|
||||
// 查找第一个不小于size的素数容量
|
||||
int cap_idx = 0;
|
||||
if (size < 0) {
|
||||
return PRIME_CAPACITIES[SMCC_ARRLEN(PRIME_CAPACITIES)-1];
|
||||
}
|
||||
while (PRIME_CAPACITIES[cap_idx] < size && cap_idx < SMCC_ARRLEN(PRIME_CAPACITIES)-1) {
|
||||
cap_idx++;
|
||||
}
|
||||
return PRIME_CAPACITIES[cap_idx];
|
||||
static int next_power_of_two(int n) {
|
||||
n--;
|
||||
n |= n >> 1;
|
||||
n |= n >> 2;
|
||||
n |= n >> 4;
|
||||
n |= n >> 8;
|
||||
n |= n >> 16;
|
||||
return n + 1;
|
||||
}
|
||||
|
||||
void hash_table_init(hash_table_t* ht, int init_size, int max_cap) {
|
||||
// 限制最大容量索引
|
||||
ht->max_cap = get_real_size(max_cap);
|
||||
// 应用实际容量
|
||||
ht->cap = get_real_size(init_size);
|
||||
ht->size = 0;
|
||||
ht->buckets = NULL;
|
||||
ht->buckets = salloc_realloc(ht->buckets, sizeof(hash_node_t*) * ht->cap);
|
||||
}
|
||||
static hash_entry_t* find_entry(hash_table_t* ht, const void* key, u32_t hash) {
|
||||
if (ht->entries.cap == 0) return NULL;
|
||||
|
||||
u32_t index = hash & (ht->entries.cap - 1); // 容量是2的幂
|
||||
u32_t probe = 0;
|
||||
|
||||
void hash_table_insert(hash_table_t* ht, const char* str, int len) {
|
||||
// 自动扩容检查
|
||||
if (ht->size >= ht->cap * LOAD_FACTOR && ht->cap < ht->max_cap) {
|
||||
rehash(ht);
|
||||
}
|
||||
|
||||
if (ht->size >= ht->cap) {
|
||||
LOG_TRACE("Hash table size exceeds maximum capacity. Consider increasing max_capacity.");
|
||||
}
|
||||
|
||||
// 计算哈希值
|
||||
u32_t hash = calc_hash(str, len);
|
||||
int bucket_idx = hash % ht->cap;
|
||||
|
||||
// 检查重复
|
||||
hash_node_t* node = ht->buckets[bucket_idx];
|
||||
while (node) {
|
||||
if (node->hash == hash &&
|
||||
node->len == len &&
|
||||
memcmp(node->str, str, len) == 0) {
|
||||
return; // 已存在
|
||||
hash_entry_t* tombstone = NULL;
|
||||
|
||||
while (1) {
|
||||
hash_entry_t* entry = &vector_at(ht->entries, index);
|
||||
if (entry->state == ENTRY_EMPTY) {
|
||||
return tombstone ? tombstone : entry;
|
||||
}
|
||||
node = node->next;
|
||||
}
|
||||
|
||||
// 创建新节点
|
||||
hash_node_t* new_node = salloc_alloc(sizeof(hash_node_t));
|
||||
new_node->str = str;
|
||||
new_node->len = len;
|
||||
new_node->hash = hash;
|
||||
new_node->next = ht->buckets[bucket_idx];
|
||||
ht->buckets[bucket_idx] = new_node;
|
||||
ht->size++;
|
||||
}
|
||||
|
||||
hash_node_t* hash_table_find(hash_table_t* ht, const char* str, int len) {
|
||||
u32_t hash = calc_hash(str, len);
|
||||
int bucket_idx = hash % ht->cap;
|
||||
|
||||
hash_node_t* node = ht->buckets[bucket_idx];
|
||||
while (node) {
|
||||
if (node->hash == hash &&
|
||||
node->len == len &&
|
||||
memcmp(node->str, str, len) == 0) {
|
||||
return node;
|
||||
|
||||
if (entry->state == ENTRY_TOMBSTONE) {
|
||||
if (!tombstone) tombstone = entry;
|
||||
} else if (entry->hash == hash && ht->key_cmp(entry->key, key) == 0) {
|
||||
return entry;
|
||||
}
|
||||
node = node->next;
|
||||
|
||||
// Liner finding
|
||||
index = (index + 1) & (ht->entries.cap - 1);
|
||||
probe++;
|
||||
if (probe >= ht->entries.cap) break;
|
||||
}
|
||||
LOG_ERROR("hashset_find: hash table is full");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void rehash(hash_table_t* ht) {
|
||||
int old_cap = ht->cap;
|
||||
hash_node_t** old_buckets = ht->buckets;
|
||||
static void adjust_capacity(hash_table_t* ht, int new_cap) {
|
||||
new_cap = next_power_of_two(new_cap);
|
||||
Assert(new_cap >= ht->entries.cap);
|
||||
|
||||
// 查找下一个素数容量
|
||||
int new_cap_idx = 0;
|
||||
while (PRIME_CAPACITIES[new_cap_idx] <= old_cap &&
|
||||
new_cap_idx < ht->max_cap) {
|
||||
new_cap_idx++;
|
||||
}
|
||||
ht->cap = PRIME_CAPACITIES[new_cap_idx];
|
||||
vector_header(old_entries, hash_entry_t);
|
||||
old_entries.data = ht->entries.data;
|
||||
old_entries.cap = ht->entries.cap;
|
||||
|
||||
// 分配新桶数组
|
||||
ht->buckets = salloc_alloc(sizeof(hash_node_t*) * ht->cap);
|
||||
memset(ht->buckets, 0, sizeof(hash_node_t*) * ht->cap);
|
||||
// Not used size but for gdb python extention debug
|
||||
ht->entries.size = new_cap;
|
||||
ht->entries.cap = new_cap;
|
||||
ht->entries.data = salloc_realloc(NULL, new_cap * sizeof(hash_entry_t));
|
||||
rt_memset(ht->entries.data, 0, new_cap * sizeof(hash_entry_t));
|
||||
|
||||
// 重新哈希所有节点
|
||||
for (int i = 0; i < old_cap; i++) {
|
||||
hash_node_t* node = old_buckets[i];
|
||||
while (node) {
|
||||
hash_node_t* next = node->next;
|
||||
int new_bucket = node->hash % ht->cap;
|
||||
node->next = ht->buckets[new_bucket];
|
||||
ht->buckets[new_bucket] = node;
|
||||
node = next;
|
||||
// rehash the all of the old data
|
||||
for (rt_size_t i = 0; i < old_entries.cap; i++) {
|
||||
hash_entry_t* entry = &vector_at(old_entries, i);
|
||||
if (entry->state == ENTRY_ACTIVE) {
|
||||
hash_entry_t* dest = find_entry(ht, entry->key, entry->hash);
|
||||
*dest = *entry;
|
||||
}
|
||||
}
|
||||
|
||||
salloc_free(old_buckets);
|
||||
vector_free(old_entries);
|
||||
ht->tombstone_count = 0;
|
||||
}
|
||||
|
||||
static u32_t calc_hash(const char* str, int len) {
|
||||
// 使用与HASH_FNV_1A宏一致的算法
|
||||
rt_strhash(str);
|
||||
}
|
||||
|
||||
void hash_table_destroy(hash_table_t* ht) {
|
||||
for (int i = 0; i < ht->cap; i++) {
|
||||
hash_node_t* node = ht->buckets[i];
|
||||
while (node) {
|
||||
hash_node_t* next = node->next;
|
||||
salloc_free(node);
|
||||
node = next;
|
||||
}
|
||||
void* hashtable_set(hash_table_t* ht, const void* key, void* value) {
|
||||
if (ht->count + ht->tombstone_count >= ht->entries.cap * 0.75) {
|
||||
int new_cap = ht->entries.cap < INIT_HASH_TABLE_SIZE ? INIT_HASH_TABLE_SIZE : ht->entries.cap * 2;
|
||||
adjust_capacity(ht, new_cap);
|
||||
}
|
||||
salloc_free(ht->buckets);
|
||||
ht->buckets = NULL;
|
||||
ht->size = ht->cap = 0;
|
||||
}
|
||||
|
||||
u32_t hash = ht->hash_func(key);
|
||||
hash_entry_t* entry = find_entry(ht, key, hash);
|
||||
|
||||
void* old_value = NULL;
|
||||
if (entry->state == ENTRY_ACTIVE) {
|
||||
old_value = entry->value;
|
||||
} else {
|
||||
if (entry->state == ENTRY_TOMBSTONE) ht->tombstone_count--;
|
||||
ht->count++;
|
||||
}
|
||||
|
||||
entry->key = key;
|
||||
entry->value = value;
|
||||
entry->hash = hash;
|
||||
entry->state = ENTRY_ACTIVE;
|
||||
return old_value;
|
||||
}
|
||||
|
||||
void* hashtable_get(hash_table_t* ht, const void* key) {
|
||||
if (ht->entries.cap == 0) return NULL;
|
||||
|
||||
u32_t hash = ht->hash_func(key);
|
||||
hash_entry_t* entry = find_entry(ht, key, hash);
|
||||
return (entry && entry->state == ENTRY_ACTIVE) ? entry->value : NULL;
|
||||
}
|
||||
|
||||
void* hashtable_del(hash_table_t* ht, const void* key) {
|
||||
if (ht->entries.cap == 0) return NULL;
|
||||
|
||||
u32_t hash = ht->hash_func(key);
|
||||
hash_entry_t* entry = find_entry(ht, key, hash);
|
||||
|
||||
if (entry == NULL || entry->state != ENTRY_ACTIVE) return NULL;
|
||||
|
||||
void* value = entry->value;
|
||||
entry->state = ENTRY_TOMBSTONE;
|
||||
ht->count--;
|
||||
ht->tombstone_count++;
|
||||
return value;
|
||||
}
|
||||
|
||||
void hashtable_destory(hash_table_t* ht) {
|
||||
vector_free(ht->entries);
|
||||
ht->count = 0;
|
||||
ht->tombstone_count = 0;
|
||||
}
|
||||
|
@ -1,27 +1,39 @@
|
||||
#ifndef __SMCC_HASHTABLE_H__
|
||||
#define __SMCC_HASHTABLE_H__
|
||||
|
||||
#include <lib/rt/rt.h>
|
||||
#include <lib/rt/rt_alloc.h>
|
||||
#include "vector.h"
|
||||
|
||||
typedef struct hash_node {
|
||||
const char* str;
|
||||
int len;
|
||||
u32_t hash;
|
||||
struct hash_node* next;
|
||||
} hash_node_t;
|
||||
// 哈希表条目状态标记
|
||||
typedef enum hash_table_entry_state {
|
||||
ENTRY_EMPTY,
|
||||
ENTRY_ACTIVE,
|
||||
ENTRY_TOMBSTONE
|
||||
} ht_entry_state_t;
|
||||
|
||||
// 哈希表条目结构(不管理key/value内存)
|
||||
typedef struct hash_entry {
|
||||
const void* key; // 由调用者管理
|
||||
void* value; // 由调用者管理
|
||||
u32_t hash; // 预计算哈希值
|
||||
ht_entry_state_t state; // 条目状态
|
||||
} hash_entry_t;
|
||||
|
||||
// 哈希表主体结构
|
||||
typedef struct hash_table {
|
||||
hash_node_t** buckets;
|
||||
int size;
|
||||
int cap;
|
||||
int max_cap;
|
||||
vector_header(entries, hash_entry_t); // 使用vector管理条目
|
||||
u32_t count; // 有效条目数(不含墓碑)
|
||||
u32_t tombstone_count; // 墓碑数量
|
||||
u32_t (*hash_func)(const void* key);
|
||||
int(*key_cmp)(const void* key1, const void* key2);
|
||||
} hash_table_t;
|
||||
|
||||
hash_table_t* new_hash_table(int init_size, int max_cap);
|
||||
void hash_table_init(hash_table_t* ht, int init_size, int max_cap);
|
||||
void hash_table_destroy(hash_table_t* ht);
|
||||
// WARN you need set hash_func and key_cmp before use
|
||||
void hashtable_init(hash_table_t* ht) ;
|
||||
|
||||
void hash_table_insert(hash_table_t* ht, const char* str, int len);
|
||||
hash_node_t* hash_table_find(hash_table_t* ht, const char* str, int len);
|
||||
void* hashtable_set(hash_table_t* ht, const void* key, void* value);
|
||||
void* hashtable_get(hash_table_t* ht, const void* key);
|
||||
void* hashtable_get(hash_table_t* ht, const void* key);
|
||||
void hashtable_destory(hash_table_t* ht);
|
||||
|
||||
#endif // __SMCC_HASHTABLE_H__
|
||||
|
@ -0,0 +1,32 @@
|
||||
#include "strpool.h"
|
||||
|
||||
void init_strpool(strpool_t* pool) {
|
||||
lalloc_init(&pool->stralloc);
|
||||
|
||||
pool->ht.hash_func = (u32_t(*)(const void*))rt_strhash;
|
||||
pool->ht.key_cmp = (int(*)(const void*, const void*))rt_strcmp;
|
||||
hashtable_init(&pool->ht);
|
||||
}
|
||||
|
||||
const char* strpool_intern(strpool_t* pool, const char* str) {
|
||||
void* existing = hashtable_get(&pool->ht, str);
|
||||
if (existing) {
|
||||
return existing;
|
||||
}
|
||||
|
||||
rt_size_t len = rt_strlen(str) + 1;
|
||||
char* new_str = lalloc_alloc(&pool->stralloc, len);
|
||||
if (!new_str) {
|
||||
LOG_ERROR("strpool: Failed to allocate memory for string");
|
||||
return NULL;
|
||||
}
|
||||
rt_memcpy(new_str, str, len);
|
||||
|
||||
hashtable_set(&pool->ht, new_str, new_str);
|
||||
return new_str;
|
||||
}
|
||||
|
||||
void strpool_destroy(strpool_t* pool) {
|
||||
hashtable_destory(&pool->ht);
|
||||
lalloc_destroy(&pool->stralloc);
|
||||
}
|
@ -2,11 +2,16 @@
|
||||
#define __SMCC_STRPOOL_H__
|
||||
|
||||
#include <lib/core.h>
|
||||
#include "../ds/hash.h"
|
||||
typedef struct strpool {
|
||||
long_alloc_t *long_alloc;
|
||||
} strpool_t;
|
||||
#include <lib/rt/rt_alloc.h>
|
||||
#include <lib/utils/ds/hashtable.h>
|
||||
|
||||
void new_strpool();
|
||||
typedef struct strpool {
|
||||
hash_table_t ht; // 用于快速查找字符串
|
||||
long_alloc_t stralloc; // 专门用于字符串存储的分配器
|
||||
} strpool_t;
|
||||
|
||||
void init_strpool(strpool_t* pool);
|
||||
const char* strpool_intern(strpool_t* pool, const char* str);
|
||||
void strpool_destroy(strpool_t* pool);
|
||||
|
||||
#endif // __SMCC_STRPOOL_H__
|
||||
|
@ -0,0 +1,6 @@
|
||||
#ifndef __SMCC_SYMTABL_H__
|
||||
#define __SMCC_SYMTABL_H__
|
||||
|
||||
|
||||
|
||||
#endif
|
@ -7,18 +7,20 @@ typedef struct loc {
|
||||
const char *fname;
|
||||
int line;
|
||||
int col;
|
||||
short len;
|
||||
int len;
|
||||
} loc_t;
|
||||
|
||||
typedef enum tok_type {
|
||||
typedef enum tok_basic_type {
|
||||
TK_BASIC_INVALID, // 错误占位
|
||||
TK_BASIC_KEYWORD, // 关键字
|
||||
TK_BASIC_OPERATOR, // 操作符
|
||||
TK_BASIC_IDENTIFIER, // 标识符
|
||||
TK_BASIC_LITERAL, // 字面量
|
||||
TK_BASIC_PUNCTUATOR, // 标点符号
|
||||
|
||||
TK_BASIC_WHITESPACE, // 空白
|
||||
TK_BASIC_COMMENT, // 注释
|
||||
TK_BASIC_EOF // 结束标记
|
||||
} tok_type_t;
|
||||
} tok_basic_type_t;
|
||||
|
||||
typedef union ctype {
|
||||
u8_t u8;
|
||||
@ -34,10 +36,15 @@ typedef union ctype {
|
||||
iptr_t iptr;
|
||||
uptr_t uptr;
|
||||
void* ptr;
|
||||
char ch;
|
||||
int i;
|
||||
|
||||
// MUST BE strpool ptr
|
||||
const char* str;
|
||||
} ctype_t;
|
||||
|
||||
typedef struct tok {
|
||||
tok_type_t type;
|
||||
tok_basic_type_t type;
|
||||
int sub_type;
|
||||
loc_t loc;
|
||||
ctype_t val;
|
8
lib/utils/utils.h
Normal file
8
lib/utils/utils.h
Normal file
@ -0,0 +1,8 @@
|
||||
#ifndef __SMCC_LIB_UTILS_H__
|
||||
#define __SMCC_LIB_UTILS_H__
|
||||
|
||||
#include "strpool/strpool.h"
|
||||
#include "symtab/symtab.h"
|
||||
#include "tokbuf/tokbuf.h"
|
||||
|
||||
#endif
|
Loading…
x
Reference in New Issue
Block a user