refactor: 重构前端代码并添加日志功能
- 重命名和重构了多个文件,包括 lexer、parser 和 AST 相关代码 - 添加了日志功能,使用 LOG_* 宏替代原有的 error 和 warn 函数 - 优化了错误处理和内存分配方式 - 调整了代码结构,提高了模块化和可读性
This commit is contained in:
@ -26,8 +26,8 @@ the distribution and installation instructions.
|
||||
Chris Fraser / cwf@aya.yale.edu
|
||||
David Hanson / drh@drhanson.net
|
||||
*/
|
||||
#define FRONTEND_IMPLEMENTATION
|
||||
#include "../frontend.h"
|
||||
#include <lib/core.h>
|
||||
#include "lexer_log.h"
|
||||
#include "token.h"
|
||||
#include "lexer.h"
|
||||
|
||||
@ -74,8 +74,9 @@ static inline int keyword_cmp(const char* name, int len) {
|
||||
return -1; // Not a keyword.
|
||||
}
|
||||
|
||||
void init_lexer(lexer_t* lexer, const char* file_name, void* stream, lexer_sread_fn sread)
|
||||
{
|
||||
void init_lexer(lexer_t* lexer, const char* file_name, void* stream, lexer_sread_fn sread) {
|
||||
init_lib_core();
|
||||
|
||||
lexer->cur_ptr = lexer->end_ptr = (unsigned char*)&(lexer->buffer);
|
||||
lexer->index = 1;
|
||||
lexer->line = 1;
|
||||
@ -96,10 +97,10 @@ static void flush_buffer(lexer_t* lexer) {
|
||||
lexer->cur_ptr = (unsigned char*)lexer->buffer;
|
||||
|
||||
int read_size = LEXER_BUFFER_SIZE - num;
|
||||
// TODO size_t to int maybe lose precision
|
||||
// TODO rt_size_t to int maybe lose precision
|
||||
int got_size = lexer->sread(lexer->buffer + num, read_size, 1, read_size, lexer->stream);
|
||||
if (got_size < 0) {
|
||||
error("lexer read error");
|
||||
LEX_ERROR("lexer read error");
|
||||
} else if (got_size < read_size) {
|
||||
lexer->end_ptr += got_size;
|
||||
lexer->end_ptr[0] = '\0'; // EOF
|
||||
@ -107,7 +108,7 @@ static void flush_buffer(lexer_t* lexer) {
|
||||
} else if (got_size == read_size) {
|
||||
lexer->end_ptr += got_size;
|
||||
} else {
|
||||
error("lexer read error imposible got_size > read_size maybe overflow?");
|
||||
LEX_ERROR("lexer read error imposible got_size > read_size maybe overflow?");
|
||||
}
|
||||
}
|
||||
|
||||
@ -153,8 +154,10 @@ static char got_slash(unsigned char* peek) {
|
||||
case 'r': return '\r';
|
||||
case 't': return '\t';
|
||||
case 'v': return '\v';
|
||||
default: error("Unknown escape character");
|
||||
default: break;
|
||||
}
|
||||
LEX_ERROR("Unknown escape character");
|
||||
return -1;
|
||||
}
|
||||
|
||||
static void parse_char_literal(lexer_t* lexer, tok_t* token) {
|
||||
@ -168,7 +171,7 @@ static void parse_char_literal(lexer_t* lexer, tok_t* token) {
|
||||
val = *peek++;
|
||||
}
|
||||
|
||||
if (*peek++ != '\'') error("Unclosed character literal");
|
||||
if (*peek++ != '\'') LEX_ERROR("Unclosed character literal");
|
||||
token->val.ch = val;
|
||||
lexer->cur_ptr = peek;
|
||||
token->val.have = 1;
|
||||
@ -178,7 +181,7 @@ static void parse_char_literal(lexer_t* lexer, tok_t* token) {
|
||||
static void parse_string_literal(lexer_t* lexer, tok_t* token) {
|
||||
unsigned char* peek = lexer->cur_ptr + 1;
|
||||
// TODO string literal size check
|
||||
char* dest = token->val.str = xmalloc(LEXER_MAX_TOKEN_SIZE + 1);
|
||||
char* dest = token->val.str = rt._malloc(LEXER_MAX_TOKEN_SIZE + 1);
|
||||
int len = 0;
|
||||
|
||||
while (*peek != '"') {
|
||||
@ -189,7 +192,7 @@ static void parse_string_literal(lexer_t* lexer, tok_t* token) {
|
||||
*peek = got_slash(peek);
|
||||
}
|
||||
|
||||
if (len >= LEXER_MAX_TOKEN_SIZE) error("String too long");
|
||||
if (len >= LEXER_MAX_TOKEN_SIZE) LEX_ERROR("String too long");
|
||||
dest[len++] = *peek++;
|
||||
}
|
||||
dest[len] = '\0';
|
||||
@ -431,7 +434,7 @@ void get_token(lexer_t* lexer, tok_t* token) {
|
||||
lexer->line++;
|
||||
tok = TOKEN_FLUSH; break;
|
||||
case '#':
|
||||
warn("TODO: #define\n");
|
||||
LEX_WARN("Marroc does not support in lexer rather in preprocessor, it will be ignored");
|
||||
goto_newline(lexer);
|
||||
tok = TOKEN_FLUSH;
|
||||
goto END;
|
||||
@ -458,14 +461,14 @@ void get_token(lexer_t* lexer, tok_t* token) {
|
||||
case '_':
|
||||
// TOKEN_IDENT
|
||||
if ((*peek == 'L' && *peek == '\'') || (*peek == 'L' && *peek == '"')) {
|
||||
error("unsupport wide-character char literal by `L` format");
|
||||
LEX_ERROR("unsupport wide-character char literal by `L` format");
|
||||
}
|
||||
while (1) {
|
||||
if (peek == lexer->end_ptr) {
|
||||
error("unsupport outof 64 length identifier");
|
||||
LEX_ERROR("unsupport outof 64 length identifier");
|
||||
}
|
||||
if ((*peek >= 'a' && *peek <= 'z') || (*peek >= 'A' && *peek <= 'Z') ||
|
||||
(*peek == '_') || (*peek >= '0' && *peek <= '9')) {
|
||||
(*peek == '_') || (*peek >= '0' && *peek <= '9')) {
|
||||
peek++;
|
||||
continue;
|
||||
}
|
||||
@ -475,7 +478,7 @@ void get_token(lexer_t* lexer, tok_t* token) {
|
||||
int res = keyword_cmp((const char*)lexer->cur_ptr, peek - (lexer->cur_ptr));
|
||||
if (res == -1) {
|
||||
int strlen = peek - lexer->cur_ptr;
|
||||
unsigned char* str = xmalloc(strlen + 1);
|
||||
unsigned char* str = rt._malloc(strlen + 1);
|
||||
constant.have = 1;
|
||||
constant.str = (char*)str;
|
||||
for (int i = 0; i < strlen; i++) {
|
||||
@ -489,7 +492,7 @@ void get_token(lexer_t* lexer, tok_t* token) {
|
||||
tok = keywords[res].tok; break;
|
||||
}
|
||||
default:
|
||||
error("unsupport char in sourse code `%c`", *(lexer->cur_ptr));
|
||||
LEX_ERROR("unsupport char in sourse code `%c`", *(lexer->cur_ptr));
|
||||
break;
|
||||
}
|
||||
|
||||
@ -497,6 +500,7 @@ void get_token(lexer_t* lexer, tok_t* token) {
|
||||
END:
|
||||
token->val = constant;
|
||||
token->type = tok;
|
||||
LEX_DEBUG("get token `%s` (ch: %c, int: %d)", get_tok_name(token->type), token->val.ch, token->val.i);
|
||||
}
|
||||
|
||||
// get_token maybe got invalid (with parser)
|
||||
@ -507,4 +511,3 @@ void get_valid_token(lexer_t* lexer, tok_t* token) {
|
||||
type = token->type;
|
||||
} while (type == TOKEN_FLUSH || type == TOKEN_LINE_COMMENT || type == TOKEN_BLOCK_COMMENT);
|
||||
}
|
||||
|
||||
|
@ -1,6 +1,7 @@
|
||||
#ifndef __LEXER_H__
|
||||
#define __LEXER_H__
|
||||
#ifndef __SMCC_LEXER_H__
|
||||
#define __SMCC_LEXER_H__
|
||||
|
||||
#include <lib/core.h>
|
||||
#include "token.h"
|
||||
#ifndef LEXER_MAX_TOKEN_SIZE
|
||||
#define LEXER_MAX_TOKEN_SIZE 63
|
||||
|
13
ccompiler/frontend/lexer/lexer_log.h
Normal file
13
ccompiler/frontend/lexer/lexer_log.h
Normal file
@ -0,0 +1,13 @@
|
||||
#ifndef __SMCC_LEXER_LOG_H__
|
||||
#define __SMCC_LEXER_LOG_H__
|
||||
|
||||
#include <lib/rt/rt.h>
|
||||
|
||||
#define LEX_NOTSET( fmt, ...) LOG_NOTSET("LEXER: " fmt, ##__VA_ARGS__)
|
||||
#define LEX_DEBUG( fmt, ...) LOG_DEBUG("LEXER: " fmt, ##__VA_ARGS__)
|
||||
#define LEX_INFO( fmt, ...) LOG_INFO("LEXER: " fmt, ##__VA_ARGS__)
|
||||
#define LEX_WARN( fmt, ...) LOG_WARN("LEXER: " fmt, ##__VA_ARGS__)
|
||||
#define LEX_ERROR( fmt, ...) LOG_ERROR("LEXER: " fmt, ##__VA_ARGS__)
|
||||
#define LEX_FATAL( fmt, ...) LOG_FATAL("LEXER: " fmt, ##__VA_ARGS__)
|
||||
|
||||
#endif // __SMCC_LEXER_LOG_H__
|
@ -1,6 +1,7 @@
|
||||
CC = gcc
|
||||
CFLAGS = -g -Wall
|
||||
CFLAGS = -g -Wall -I../../../..
|
||||
SRC = ../lexer.c ../token.c
|
||||
LIB = -L../../../../lib -lcore
|
||||
|
||||
all = test_all
|
||||
|
||||
@ -8,10 +9,10 @@ test_all: test
|
||||
./test
|
||||
|
||||
run:
|
||||
$(CC) $(CFLAGS) $(SRC) run.c -o run
|
||||
$(CC) $(CFLAGS) $(SRC) run.c $(LIB) -o run
|
||||
|
||||
test:
|
||||
$(CC) $(CFLAGS) $(SRC) -o test test.c
|
||||
$(CC) $(CFLAGS) $(SRC) $(LIB) -o test test.c
|
||||
|
||||
clean:
|
||||
rm -f test run
|
||||
|
@ -1,5 +1,6 @@
|
||||
#include "../lexer.h"
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
// gcc -g ../lexer.c ../token.c test_lexer.c -o test_lexer
|
||||
/*
|
||||
tok_tConstant {
|
||||
@ -18,9 +19,14 @@ tok_tConstant {
|
||||
int g_num;
|
||||
int g_num_arr[3];
|
||||
int main(int argc, char* argv[]) {
|
||||
int num = 0;
|
||||
// int num = 0;
|
||||
// You Must Be Call
|
||||
init_lib_core();
|
||||
if (argc == 3 && strcmp(argv[2], "-nodebug") == 0) {
|
||||
log_set_level(NULL, LOG_LEVEL_ALL & ~LOG_LEVEL_DEBUG);
|
||||
}
|
||||
|
||||
const char* file_name = "test_lexer.c";
|
||||
const char* file_name = "run.c";
|
||||
if (argc == 2) {
|
||||
file_name = argv[1];
|
||||
}
|
||||
@ -32,7 +38,7 @@ int main(int argc, char* argv[]) {
|
||||
printf("open file success\n");
|
||||
|
||||
lexer_t lexer;
|
||||
init_lexer(&lexer, "test_lexter.c", fp, (lexer_sread_fn)fread_s);
|
||||
init_lexer(&lexer, file_name, fp, (lexer_sread_fn)fread_s);
|
||||
tok_t tok;
|
||||
|
||||
while (1) {
|
||||
@ -40,7 +46,7 @@ int main(int argc, char* argv[]) {
|
||||
if (tok.type == TOKEN_EOF) {
|
||||
break;
|
||||
}
|
||||
printf("line: %d, column: %d, type: %3d, typename: %s\n",
|
||||
lexer.line, lexer.index, tok.type, get_tok_name(tok.type));
|
||||
// printf("line: %d, column: %d, type: %3d, typename: %s\n",
|
||||
// lexer.line, lexer.index, tok.type, get_tok_name(tok.type));
|
||||
}
|
||||
}
|
||||
|
@ -1,12 +1,12 @@
|
||||
#define FRONTEND_IMPLEMENTATION
|
||||
#include "../frontend.h"
|
||||
#include <lib/core.h>
|
||||
#include "lexer_log.h"
|
||||
#include "token.h"
|
||||
|
||||
#define ROUND_IDX(idx) ((idx) % tokbuf->cap)
|
||||
|
||||
tok_t* pop_tok(tok_buf_t* tokbuf) {
|
||||
tok_t* pop_tok(tok_stream_t* tokbuf) {
|
||||
if (tokbuf->size == 0) {
|
||||
error("no token to pop");
|
||||
LEX_ERROR("no token to pop");
|
||||
return NULL;
|
||||
}
|
||||
int idx = tokbuf->cur;
|
||||
@ -15,11 +15,11 @@ tok_t* pop_tok(tok_buf_t* tokbuf) {
|
||||
return tokbuf->buf + idx;
|
||||
}
|
||||
|
||||
void flush_peek_tok(tok_buf_t* tokbuf) {
|
||||
void flush_peek_tok(tok_stream_t* tokbuf) {
|
||||
tokbuf->peek = tokbuf->cur;
|
||||
}
|
||||
|
||||
void init_tokbuf(tok_buf_t *tokbuf, void *stream, get_tokbuf_func gettok) {
|
||||
void init_tokbuf(tok_stream_t *tokbuf, void *stream, tok_stream_get_func gettok) {
|
||||
tokbuf->cur = 0;
|
||||
tokbuf->end = 0;
|
||||
tokbuf->peek = 0;
|
||||
@ -30,18 +30,19 @@ void init_tokbuf(tok_buf_t *tokbuf, void *stream, get_tokbuf_func gettok) {
|
||||
tokbuf->cap = 0;
|
||||
}
|
||||
|
||||
tok_t *peek_tok(tok_buf_t *tokbuf) {
|
||||
tok_t *peek_tok(tok_stream_t *tokbuf) {
|
||||
Assert(tokbuf->size <= tokbuf->cap);
|
||||
int idx = tokbuf->peek;
|
||||
tokbuf->peek = ROUND_IDX(idx + 1);
|
||||
if (tokbuf->size >= tokbuf->cap) {
|
||||
error("peek too deep, outof array size");
|
||||
}
|
||||
|
||||
if (idx == tokbuf->end) {
|
||||
if (tokbuf->size == tokbuf->cap) {
|
||||
error("peek_tok buffer overflow");
|
||||
LEX_ERROR("peek_tok buffer overflow");
|
||||
return NULL;
|
||||
}
|
||||
if (tokbuf->gettok == NULL) {
|
||||
error("peek_tok can not got tok");
|
||||
LEX_ERROR("peek_tok can not got tok");
|
||||
return NULL;
|
||||
}
|
||||
tokbuf->gettok(tokbuf->stream, &(tokbuf->buf[idx]));
|
||||
tokbuf->size++;
|
||||
@ -51,15 +52,16 @@ tok_t *peek_tok(tok_buf_t *tokbuf) {
|
||||
return &(tokbuf->buf[idx]);
|
||||
}
|
||||
|
||||
tok_type_t peek_tok_type(tok_buf_t* tokbuf) {
|
||||
tok_type_t peek_tok_type(tok_stream_t* tokbuf) {
|
||||
return peek_tok(tokbuf)->type;
|
||||
}
|
||||
|
||||
int expect_pop_tok(tok_buf_t* tokbuf, tok_type_t type) {
|
||||
int expect_pop_tok(tok_stream_t* tokbuf, tok_type_t type) {
|
||||
flush_peek_tok(tokbuf);
|
||||
tok_t* tok = peek_tok(tokbuf);
|
||||
if (tok->type != type) {
|
||||
error("expected tok: %s, got %s", get_tok_name(type), get_tok_name(tok->type));
|
||||
LEX_ERROR("expected tok `%s` but got `%s`", get_tok_name(type), get_tok_name(tok->type));
|
||||
return 0;
|
||||
} else {
|
||||
pop_tok(tokbuf);
|
||||
}
|
||||
|
@ -51,10 +51,10 @@ enum CSTD_KEYWORD {
|
||||
X(flush , TOKEN_FLUSH) \
|
||||
X("==" , TOKEN_EQ) \
|
||||
X("=" , TOKEN_ASSIGN) \
|
||||
X("++" , TOKEN_ADD_ADD) \
|
||||
X("++" , TOKEN_ADD_ADD) \
|
||||
X("+=" , TOKEN_ASSIGN_ADD) \
|
||||
X("+" , TOKEN_ADD) \
|
||||
X("--" , TOKEN_SUB_SUB) \
|
||||
X("--" , TOKEN_SUB_SUB) \
|
||||
X("-=" , TOKEN_ASSIGN_SUB) \
|
||||
X("->" , TOKEN_DEREF) \
|
||||
X("-" , TOKEN_SUB) \
|
||||
@ -134,7 +134,7 @@ typedef struct tok {
|
||||
tok_val_t val;
|
||||
} tok_t;
|
||||
|
||||
typedef struct tok_buf {
|
||||
typedef struct tok_stream {
|
||||
int cur;
|
||||
int end;
|
||||
int peek;
|
||||
@ -143,15 +143,15 @@ typedef struct tok_buf {
|
||||
tok_t* buf;
|
||||
void* stream;
|
||||
void (*gettok)(void* stream, tok_t* token);
|
||||
} tok_buf_t;
|
||||
} tok_stream_t;
|
||||
|
||||
typedef void(*get_tokbuf_func)(void* stream, tok_t* token);
|
||||
void init_tokbuf(tok_buf_t* tokbuf, void* stream, get_tokbuf_func gettok);
|
||||
tok_t* peek_tok(tok_buf_t* tokbuf);
|
||||
tok_t* pop_tok(tok_buf_t* tokbuf);
|
||||
void flush_peek_tok(tok_buf_t* tokbuf);
|
||||
tok_type_t peek_tok_type(tok_buf_t* tokbuf);
|
||||
int expect_pop_tok(tok_buf_t* tokbuf, tok_type_t type);
|
||||
typedef void(*tok_stream_get_func)(void* stream, tok_t* token);
|
||||
void init_tokbuf(tok_stream_t* tokbuf, void* stream, tok_stream_get_func gettok);
|
||||
tok_t* peek_tok(tok_stream_t* tokbuf);
|
||||
tok_t* pop_tok(tok_stream_t* tokbuf);
|
||||
void flush_peek_tok(tok_stream_t* tokbuf);
|
||||
tok_type_t peek_tok_type(tok_stream_t* tokbuf);
|
||||
int expect_pop_tok(tok_stream_t* tokbuf, tok_type_t type);
|
||||
const char* get_tok_name(tok_type_t type);
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
Reference in New Issue
Block a user