commit 09299e339c9a792f612833efc74598a73db8e797 Author: ZZY <2450266535@qq.com> Date: Wed Mar 5 15:45:19 2025 +0800 init basic diff --git a/ccompiler/frontend/Makefile b/ccompiler/frontend/Makefile new file mode 100644 index 0000000..f115087 --- /dev/null +++ b/ccompiler/frontend/Makefile @@ -0,0 +1,46 @@ +# 编译器设置 +CC = gcc +AR = ar +CFLAGS = -g + +# 源文件路径 +LEXER_DIR = ./lexer +PARSER_DIR = ./parser +AST_DIR = ./parser/ast +SYMTAB_DIR = ./parser/symtab + +# 源文件列表 +SRCS = \ + frontend.c \ + $(LEXER_DIR)/lexer.c \ + $(PARSER_DIR)/parser.c \ + $(AST_DIR)/ast.c \ + $(AST_DIR)/block.c \ + $(AST_DIR)/decl.c \ + $(AST_DIR)/expr.c \ + $(AST_DIR)/func.c \ + $(AST_DIR)/program.c \ + $(AST_DIR)/stmt.c \ + $(AST_DIR)/term.c \ + $(SYMTAB_DIR)/hashmap.c \ + $(SYMTAB_DIR)/scope.c \ + $(SYMTAB_DIR)/symtab.c \ + +# 生成目标文件列表 +OBJS = $(SRCS:.c=.o) + +# 最终目标 +TARGET = libfrontend.a + +all: $(TARGET) + +$(TARGET): $(OBJS) + $(AR) rcs $@ $^ + +%.o: %.c + $(CC) $(CFLAGS) -c -o $@ $< + +clean: + rm -f $(OBJS) $(TARGET) + +.PHONY: all clean \ No newline at end of file diff --git a/ccompiler/frontend/frontend.c b/ccompiler/frontend/frontend.c new file mode 100644 index 0000000..9d4b190 --- /dev/null +++ b/ccompiler/frontend/frontend.c @@ -0,0 +1,18 @@ +#include "lexer/lexer.h" +#include "parser/symtab/symtab.h" +#include "frontend.h" + +struct ASTNode* frontend(const char* file, void* stream, sread_fn sread) { + struct Lexer lexer; + init_lexer(&lexer, file, stream, sread); + + struct SymbolTable symtab; + init_symtab(&symtab); + + struct Parser parser; + init_parser(&parser, &lexer, &symtab); + parse_prog(&parser); + + // TODO Free the resourse + return parser.root; +} diff --git a/ccompiler/frontend/frontend.h b/ccompiler/frontend/frontend.h new file mode 100644 index 0000000..bc4714e --- /dev/null +++ b/ccompiler/frontend/frontend.h @@ -0,0 +1,24 @@ +#ifndef __FRONTEND_H__ +#define __FRONTEND_H__ + +#ifndef error +#include +#include +#define STD_LIBRARY +#define error(...) do { fprintf(stderr, __VA_ARGS__); exit(1); } while (0) +#endif +#ifndef warn +#include +#define STD_LIBRARY +#define warn(...) do { fprintf(stdout, __VA_ARGS__); } while (0) +#endif + +#define xmalloc(size) malloc(size) + +#include "parser/parser.h" +#include "parser/ast/ast.h" + +typedef int (*sread_fn)(void *dst_buf, int dst_size, int elem_size, int count, void *stream); +struct ASTNode* frontend(const char* file, void* stream, sread_fn sread); + +#endif \ No newline at end of file diff --git a/ccompiler/frontend/lexer/README.md b/ccompiler/frontend/lexer/README.md new file mode 100644 index 0000000..eed0351 --- /dev/null +++ b/ccompiler/frontend/lexer/README.md @@ -0,0 +1,5 @@ +# 词法分析 + +参考LCC的此分析部分 + +主要使用 LL(n) 硬编码查找token diff --git a/ccompiler/frontend/lexer/lexer.c b/ccompiler/frontend/lexer/lexer.c new file mode 100644 index 0000000..c3e735f --- /dev/null +++ b/ccompiler/frontend/lexer/lexer.c @@ -0,0 +1,523 @@ +/** + * 仿照LCCompiler的词法分析部分 + * + * 如下为LCC的README in 2025.2 +This hierarchy is the distribution for lcc version 4.2. + +lcc version 3.x is described in the book "A Retargetable C Compiler: +Design and Implementation" (Addison-Wesley, 1995, ISBN 0-8053-1670-1). +There are significant differences between 3.x and 4.x, most notably in +the intermediate code. For details, see +https://drh.github.io/lcc/documents/interface4.pdf. + +VERSION 4.2 IS INCOMPATIBLE WITH EARLIER VERSIONS OF LCC. DO NOT +UNLOAD THIS DISTRIBUTION ON TOP OF A 3.X DISTRIBUTION. + +LCC is a C89 ("ANSI C") compiler designed to be highly retargetable. + +LOG describes the changes since the last release. + +CPYRIGHT describes the conditions under you can use, copy, modify, and +distribute lcc or works derived from lcc. + +doc/install.html is an HTML file that gives a complete description of +the distribution and installation instructions. + +Chris Fraser / cwf@aya.yale.edu +David Hanson / drh@drhanson.net + */ +#include "../frontend.h" +#include "lexer.h" + +static const struct { + const char* name; + enum CSTD_KEYWORD std_type; + enum TokenType tok; +} keywords[] = { + #define X(name, std_type, tok, ...) { #name, std_type, tok }, + KEYWORD_TABLE + #undef X +}; + +// by using binary search to find the keyword +static inline int keyword_cmp(const char* name, int len) { + int low = 0; + int high = sizeof(keywords) / sizeof(keywords[0]) - 1; + while (low <= high) { + int mid = (low + high) / 2; + const char *key = keywords[mid].name; + int cmp = 0; + + // 自定义字符串比较逻辑 + for (int i = 0; i < len; i++) { + if (name[i] != key[i]) { + cmp = (unsigned char)name[i] - (unsigned char)key[i]; + break; + } + if (name[i] == '\0') break; // 遇到终止符提前结束 + } + + if (cmp == 0) { + // 完全匹配检查(长度相同) + if (key[len] == '\0') return mid; + cmp = -1; // 当前关键词比输入长 + } + + if (cmp < 0) { + high = mid - 1; + } else { + low = mid + 1; + } + } + return -1; // Not a keyword. +} + +void init_lexer(struct Lexer* lexer, const char* file_name, void* stream, lexer_sread_fn sread) +{ + lexer->cur_ptr = lexer->end_ptr = (unsigned char*)&(lexer->buffer); + lexer->index = 1; + lexer->line = 1; + + lexer->stream = stream; + lexer->sread = sread; + + for (int i = 0; i < sizeof(lexer->buffer) / sizeof(lexer->buffer[0]); i++) { + lexer->buffer[i] = 0; + } +} + +static void flush_buffer(struct Lexer* lexer) { + int num = lexer->end_ptr - lexer->cur_ptr; + for (int i = 0; i < num; i++) { + lexer->buffer[i] = lexer->cur_ptr[i]; + } + lexer->cur_ptr = lexer->buffer; + + int read_size = LEXER_BUFFER_SIZE - num; + // TODO size_t to int maybe lose precision + int got_size = lexer->sread(lexer->buffer + num, read_size, 1, read_size, lexer->stream); + if (got_size < 0) { + error("lexer read error"); + } else if (got_size < read_size) { + lexer->end_ptr += got_size; + lexer->end_ptr[0] = '\0'; // EOF + lexer->end_ptr++; + } else if (got_size == read_size) { + lexer->end_ptr += got_size; + } else { + error("lexer read error imposible got_size > read_size maybe overflow?"); + } +} + +static void goto_newline(struct Lexer* lexer) { + do { + if (lexer->cur_ptr == lexer->end_ptr) { + flush_buffer(lexer); + lexer->cur_ptr--; + } + lexer->cur_ptr++; + } while (*lexer->cur_ptr != '\n' && *lexer->cur_ptr != '\0'); +} + +static void goto_block_comment(struct Lexer* lexer) { + while (1) { + if (lexer->end_ptr - lexer->cur_ptr < 2) { + flush_buffer(lexer); + } + + if (*lexer->cur_ptr == '\0') { + break; + } else if (lexer->cur_ptr[0] == '*' && lexer->cur_ptr[1] == '/') { + lexer->cur_ptr += 2; + break; + } else { + lexer->cur_ptr++; + } + } +} + +// TODO escape character not enough +static char got_slash(unsigned char* peek) { + switch (*peek) { + case '\\': return '\\'; + case '\'': return '\''; + case '\"': return '\"'; + case '\?': return '\?'; + case '0': return '\0'; + + case 'b': return '\b'; + case 'f': return '\f'; + case 'n': return '\n'; + case 'r': return '\r'; + case 't': return '\t'; + case 'v': return '\v'; + default: error("Unknown escape character"); + } +} + +static void parse_char_literal(struct Lexer* lexer, struct Token* token) { + char val = 0; + unsigned char* peek = lexer->cur_ptr + 1; + if (*peek == '\\') { + peek++; + val = got_slash(peek); + } else { + val = *peek; + } + + if (*peek != '\'') error("Unclosed character literal"); + token->constant.ch = val; + lexer->cur_ptr = peek + 1; + token->constant.have = 1; + token->type = TOKEN_CHAR_LITERAL; +} + +static void parse_string_literal(struct Lexer* lexer, struct Token* token) { + unsigned char* peek = lexer->cur_ptr + 1; + // TODO string literal size check + char* dest = token->constant.str = xmalloc(LEXER_MAX_TOKEN_SIZE + 1); + int len = 0; + + while (*peek != '"') { + if (peek >= lexer->end_ptr) flush_buffer(lexer); + + if (*peek == '\\') { // 处理转义 + peek++; + *peek = got_slash(peek); + } + + if (len >= LEXER_MAX_TOKEN_SIZE) error("String too long"); + dest[len++] = *peek++; + } + dest[len] = '\0'; + lexer->cur_ptr = peek + 1; + token->constant.have = 1; + token->type = TOKEN_STRING_LITERAL; +} + +// FIXME it write by AI maybe error +static void parse_number(struct Lexer* lexer, struct Token* token) { + unsigned char* peek = lexer->cur_ptr; + int base = 10; + int is_float = 0; + long long int_val = 0; + double float_val = 0.0; + double fraction = 1.0; + + // 判断进制 + if (*peek == '0') { + peek++; + switch (*peek) { + case 'x': + case 'X': + base = 16; + default: + base = 8; + } + } + + // 解析整数部分 + while (1) { + int digit = -1; + if (*peek >= '0' && *peek <= '9') { + digit = *peek - '0'; + } else if (base == 16) { + if (*peek >= 'a' && *peek <= 'f') digit = *peek - 'a' + 10; + else if (*peek >= 'A' && *peek <= 'F') digit = *peek - 'A' + 10; + } + + if (digit < 0 || digit >= base) break; + + if (!is_float) { + int_val = int_val * base + digit; + } else { + float_val = float_val * base + digit; + fraction *= base; + } + peek++; + } + + // 解析浮点数 + if (*peek == '.' && base == 10) { + is_float = 1; + float_val = int_val; + peek++; + + while (*peek >= '0' && *peek <= '9') { + float_val = float_val * 10.0 + (*peek - '0'); + fraction *= 10.0; + peek++; + } + float_val /= fraction; + } + + // 解析科学计数法 + if ((*peek == 'e' || *peek == 'E') && base == 10) { + is_float = 1; + peek++; + int exp_sign = 1; + int exponent = 0; + + if (*peek == '+') peek++; + else if (*peek == '-') { + exp_sign = -1; + peek++; + } + + while (*peek >= '0' && *peek <= '9') { + exponent = exponent * 10 + (*peek - '0'); + peek++; + } + // float_val *= pow(10.0, exp_sign * exponent); + } + + // 存储结果 + lexer->cur_ptr = peek; + token->constant.have = 1; + if (is_float) { + token->constant.d = float_val; + token->type = TOKEN_FLOAT_LITERAL; + } else { + token->constant.ll = int_val; + token->type = TOKEN_INT_LITERAL; + } +} + +#define GOT_ONE_TOKEN_BUF_SIZE 64 +// /zh/c/language/operator_arithmetic.html +void get_token(struct Lexer* lexer, struct Token* token) { + // 需要保证缓冲区始终可读 + if (lexer->end_ptr - lexer->cur_ptr < GOT_ONE_TOKEN_BUF_SIZE) { + flush_buffer(lexer); + } + register unsigned char* peek = lexer->cur_ptr; + + // 快速跳过空白符 + while (*peek == ' ' || *peek == '\t') { + if (peek == lexer->end_ptr) { + break; + } + peek++; + } + if (peek != lexer->cur_ptr) { + // To TOKEN_FLUSH + lexer->cur_ptr = peek; + token->type = TOKEN_FLUSH; + } + + enum TokenType tok = TOKEN_INIT; + struct TokenConstant constant; + constant.have = 0; + + // once step + switch (*peek++) { + case '=': + switch (*peek++) { + case '=': tok = TOKEN_EQ; break; + default: peek--, tok = TOKEN_ASSIGN; break; + } break; + case '+': + switch (*peek++) { + case '+': tok = TOKEN_ADD_ADD; break; + case '=': tok = TOKEN_ASSIGN_ADD; break; + default: peek--, tok = TOKEN_ADD; break; + } break; + case '-': + switch (*peek++) { + case '-': tok = TOKEN_SUB_SUB; break; + case '=': tok = TOKEN_ASSIGN_SUB; break; + + case '>': tok = TOKEN_DEREF; break; + default: peek--, tok = TOKEN_SUB; break; + } break; + case '*': + switch (*peek++) { + case '=': tok = TOKEN_ASSIGN_MUL; break; + default: peek--, tok = TOKEN_MUL; break; + } break; + case '/': + switch (*peek++) { + case '=': tok = TOKEN_ASSIGN_DIV; break; + case '/': { + // need get a new line to parse + goto_newline(lexer); + tok = TOKEN_LINE_COMMENT; + goto END; + } + case '*': { + lexer->cur_ptr = peek; + goto_block_comment(lexer); + tok = TOKEN_BLOCK_COMMENT; + goto END; + } + default: peek--, tok = TOKEN_DIV; break; + } break; + case '%': + switch (*peek++) { + case '=': tok = TOKEN_ASSIGN_MOD; break; + default: peek--, tok = TOKEN_MOD; break; + } break; + case '&': + switch (*peek++) { + case '&': tok = TOKEN_AND_AND; break; + case '=': tok = TOKEN_ASSIGN_AND; break; + default: peek--, tok = TOKEN_AND; break; + } break; + case '|': + switch (*peek++) { + case '|': tok = TOKEN_OR_OR; break; + case '=': tok = TOKEN_ASSIGN_OR; break; + default: peek--, tok = TOKEN_OR; break; + } break; + case '^': + switch (*peek++) { + case '=': tok = TOKEN_ASSIGN_XOR; break; + default: peek--, tok = TOKEN_XOR; break; + } break; + case '<': + switch (*peek++) { + case '=': tok = TOKEN_LE; break; + case '<': tok = (*peek == '=') ? (peek++, TOKEN_ASSIGN_L_SH) : TOKEN_L_SH; break; + default: peek--, tok = TOKEN_LT; break; + } break; + case '>': + switch (*peek++) { + case '=': tok = TOKEN_GE; break; + case '>': tok = (*peek == '=') ? (peek++, TOKEN_ASSIGN_R_SH) : TOKEN_R_SH; break; + default: peek--, tok = TOKEN_GT; break; + } break; + case '~': + tok = TOKEN_BIT_NOT; break; + case '!': + switch (*peek++) { + case '=': tok = TOKEN_NEQ; break; + default: peek--, tok = TOKEN_NOT; break; + } + case '[': + tok = TOKEN_L_BRACKET; break; + case ']': + tok = TOKEN_R_BRACKET; break; + case '(': + tok = TOKEN_L_PAREN; break; + case ')': + tok = TOKEN_R_PAREN; break; + case '{': + tok = TOKEN_L_BRACE; break; + case '}': + tok = TOKEN_R_BRACE; break; + case ';': + tok = TOKEN_SEMICOLON; break; + case ',': + tok = TOKEN_COMMA; break; + case ':': + tok = TOKEN_COLON; break; + case '.': + if (peek[0] == '.' && peek[1] == '.') { + peek += 2; + tok = TOKEN_ELLIPSIS; + } else { + tok = TOKEN_DOT; + } + break; + case '?': + tok = TOKEN_COND; break; + case '\v': case '\r': case '\f': // FIXME it parse as a blank character + tok = TOKEN_FLUSH; break; + case '\n': + // you need to flush a newline or blank + lexer->line++; + tok = TOKEN_FLUSH; break; + case '#': + warn("TODO: #define\n"); + goto_newline(lexer); + tok = TOKEN_FLUSH; + goto END; + case '\0': + // EOF + tok = TOKEN_EOF; + goto END; + case '\'': + return parse_char_literal(lexer, token); + return; + case '"': + return parse_string_literal(lexer, token); + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return parse_number(lexer, token); + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': + case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': + case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': + case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': + case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':case 'Y': case 'Z': + case '_': + // TOKEN_IDENT + if (*peek == 'L' && *peek == '\'' || *peek == 'L' && *peek == '"') { + error("unsupport wide-character char literal by `L` format"); + } + while (1) { + if (peek == lexer->end_ptr) { + error("unsupport outof 64 length identifier"); + } + if ((*peek >= 'a' && *peek <= 'z') || (*peek >= 'A' && *peek <= 'Z') || + (*peek == '_') || (*peek >= '0' && *peek <= '9')) { + peek++; + continue; + } + break; + } + + int res = keyword_cmp(lexer->cur_ptr, peek - (lexer->cur_ptr)); + if (res == -1) { + int strlen = peek - lexer->cur_ptr; + unsigned char* str = xmalloc(strlen + 1); + constant.have = 1; + constant.str = str; + for (int i = 0; i < strlen; i++) { + str[i] = lexer->cur_ptr[i]; + } + str[strlen] = '\0'; + constant.have = 1; + constant.str = str; + tok = TOKEN_IDENT; break; + } else { + tok = keywords[res].tok; break; + } + default: + error("unsupport char in sourse code `%c`", *(lexer->cur_ptr)); + break; + } + + lexer->cur_ptr = peek; +END: + token->constant = constant; + token->type = tok; +} + +// get_token maybe got invalid (with parser) +void get_valid_token(struct Lexer* lexer, struct Token* token) { + enum TokenType type; + do { + get_token(lexer, token); + type = token->type; + } while (type == TOKEN_FLUSH || type == TOKEN_LINE_COMMENT || type == TOKEN_BLOCK_COMMENT); +} + +// 生成字符串映射(根据需求选择#str或#name) +static const char* token_strings[] = { + // 普通token使用#str + #define X(str, tok) [tok] = #str, + TOKEN_TABLE + #undef X + + // 关键字使用#name + #define X(name, std, tok) [tok] = #name, + KEYWORD_TABLE + #undef X +}; + +const char* get_token_name(enum TokenType type) { + return token_strings[type]; +} diff --git a/ccompiler/frontend/lexer/lexer.h b/ccompiler/frontend/lexer/lexer.h new file mode 100644 index 0000000..57a8d26 --- /dev/null +++ b/ccompiler/frontend/lexer/lexer.h @@ -0,0 +1,40 @@ +#ifndef __LEXER_H__ +#define __LEXER_H__ + +#include "token.h" +#define LEXER_MAX_TOKEN_SIZE 63 +#define LEXER_BUFFER_SIZE 4095 + +typedef int (*lexer_sread_fn)(void *dst_buf, int dst_size, + int elem_size, int count, void *stream); + +struct Lexer { + int line; + int index; + // const char current_file_name[LEXER_BUFFER_SIZE+1]; + + unsigned char* cur_ptr; // 当前扫描的字符,但是还没有开始扫描 + unsigned char* end_ptr; // 缓冲区最后一个字符的下一个位置 + char buffer[LEXER_BUFFER_SIZE+1]; + + lexer_sread_fn sread; + void* stream; +}; + +struct Token { + enum TokenType type; + struct TokenConstant constant; +}; + +void init_lexer(struct Lexer* lexer, const char* file_name, void* stream, + lexer_sread_fn sread); + +// +void get_token(struct Lexer* lexer, struct Token* token); + +// get_token maybe got invalid (with parser as TOKEN_FLUSH) +void get_valid_token(struct Lexer* lexer, struct Token* token); + +const char* get_token_name(enum TokenType token); + +#endif diff --git a/ccompiler/frontend/lexer/tests/test_lexer.c b/ccompiler/frontend/lexer/tests/test_lexer.c new file mode 100644 index 0000000..ffdb335 --- /dev/null +++ b/ccompiler/frontend/lexer/tests/test_lexer.c @@ -0,0 +1,46 @@ +#include "../lexer.h" +#include +// gcc -g ../lexer.c test_lexer.c -o test_lexer +/* +struct TokenConstant { + int have; + union { + char ch; + int i; + float f; + double d; + long long ll; + char* str; + }; +}; +*/ + +int g_num; +int g_num_arr[3]; +int main(int argc, char* argv[]) { + int num = 0; + + const char* file_name = "test_lexer.c"; + if (argc == 2) { + file_name = argv[1]; + } + FILE* fp = fopen(file_name, "r"); + if (fp == NULL) { + perror("open file failed"); + return 1; + } + printf("open file success\n"); + + struct Lexer lexer; + init_lexer(&lexer, "test_lexter.c", fp, (lexer_sread_fn)fread_s); + struct Token tok; + + while (1) { + get_valid_token(&lexer, &tok); + if (tok.type == TOKEN_EOF) { + break; + } + printf("line: %d, column: %d, type: %3d, typename: %s\n", + lexer.line, lexer.index, tok.type, get_token_name(tok.type)); + } +} diff --git a/ccompiler/frontend/lexer/token.h b/ccompiler/frontend/lexer/token.h new file mode 100644 index 0000000..81458dd --- /dev/null +++ b/ccompiler/frontend/lexer/token.h @@ -0,0 +1,250 @@ +#ifndef __TOKEN_H__ +#define __TOKEN_H__ + +enum CSTD_KEYWORD { + CSTD_C89, + CSTD_C99, + CEXT_ASM, +}; + +// Using Binary Search To Fast Find Keyword +#define KEYWORD_TABLE \ + X(asm , CEXT_ASM, TOKEN_ASM) \ + X(break , CSTD_C89, TOKEN_BREAK) \ + X(case , CSTD_C89, TOKEN_CASE) \ + X(char , CSTD_C89, TOKEN_CHAR) \ + X(const , CSTD_C89, TOKEN_CONST) \ + X(continue , CSTD_C89, TOKEN_CONTINUE) \ + X(default , CSTD_C89, TOKEN_DEFAULT) \ + X(do , CSTD_C89, TOKEN_DO) \ + X(double , CSTD_C89, TOKEN_DOUBLE) \ + X(else , CSTD_C89, TOKEN_ELSE) \ + X(enum , CSTD_C89, TOKEN_ENUM) \ + X(extern , CSTD_C89, TOKEN_EXTERN) \ + X(float , CSTD_C89, TOKEN_FLOAT) \ + X(for , CSTD_C89, TOKEN_FOR) \ + X(goto , CSTD_C89, TOKEN_GOTO) \ + X(if , CSTD_C89, TOKEN_IF) \ + X(inline , CSTD_C99, TOKEN_INLINE) \ + X(int , CSTD_C89, TOKEN_INT) \ + X(long , CSTD_C89, TOKEN_LONG) \ + X(register , CSTD_C89, TOKEN_REGISTER) \ + X(restrict , CSTD_C99, TOKEN_RESTRICT) \ + X(return , CSTD_C89, TOKEN_RETURN) \ + X(short , CSTD_C89, TOKEN_SHORT) \ + X(signed , CSTD_C89, TOKEN_SIGNED) \ + X(sizeof , CSTD_C89, TOKEN_SIZEOF) \ + X(static , CSTD_C89, TOKEN_STATIC) \ + X(struct , CSTD_C89, TOKEN_STRUCT) \ + X(switch , CSTD_C89, TOKEN_SWITCH) \ + X(typedef , CSTD_C89, TOKEN_TYPEDEF) \ + X(union , CSTD_C89, TOKEN_UNION) \ + X(unsigned , CSTD_C89, TOKEN_UNSIGNED) \ + X(void , CSTD_C89, TOKEN_VOID) \ + X(volatile , CSTD_C89, TOKEN_VOLATILE) \ + X(while , CSTD_C89, TOKEN_WHILE) \ + // KEYWORD_TABLE + +#define TOKEN_TABLE \ + X(EOF , TOKEN_EOF) \ + X(init , TOKEN_INIT) \ + X(flush , TOKEN_FLUSH) \ + X("==" , TOKEN_EQ) \ + X("=" , TOKEN_ASSIGN) \ + X("++" , TOKEN_ADD_ADD) \ + X("+=" , TOKEN_ASSIGN_ADD) \ + X("+" , TOKEN_ADD) \ + X("--" , TOKEN_SUB_SUB) \ + X("-=" , TOKEN_ASSIGN_SUB) \ + X("->" , TOKEN_DEREF) \ + X("-" , TOKEN_SUB) \ + X("*=" , TOKEN_ASSIGN_MUL) \ + X("*" , TOKEN_MUL) \ + X("/=" , TOKEN_ASSIGN_DIV) \ + X("/" , TOKEN_DIV) \ + X("//" , TOKEN_LINE_COMMENT) \ + X("/* */" , TOKEN_BLOCK_COMMENT) \ + X("%=" , TOKEN_ASSIGN_MOD) \ + X("%" , TOKEN_MOD) \ + X("&&" , TOKEN_AND_AND) \ + X("&=" , TOKEN_ASSIGN_AND) \ + X("&" , TOKEN_AND) \ + X("||" , TOKEN_OR_OR) \ + X("|=" , TOKEN_ASSIGN_OR) \ + X("|" , TOKEN_OR) \ + X("^=" , TOKEN_ASSIGN_XOR) \ + X("^" , TOKEN_XOR) \ + X("<<=" , TOKEN_ASSIGN_L_SH) \ + X("<<" , TOKEN_L_SH) \ + X("<=" , TOKEN_LE) \ + X("<" , TOKEN_LT) \ + X(">>=" , TOKEN_ASSIGN_R_SH) \ + X(">>" , TOKEN_R_SH) \ + X(">=" , TOKEN_GE) \ + X(">" , TOKEN_GT) \ + X("!" , TOKEN_NOT) \ + X("!=" , TOKEN_NEQ) \ + X("~" , TOKEN_BIT_NOT) \ + X("[" , TOKEN_L_BRACKET) \ + X("]" , TOKEN_R_BRACKET) \ + X("(" , TOKEN_L_PAREN) \ + X(")" , TOKEN_R_PAREN) \ + X("{" , TOKEN_L_BRACE) \ + X("}" , TOKEN_R_BRACE) \ + X(";" , TOKEN_SEMICOLON) \ + X("," , TOKEN_COMMA) \ + X(":" , TOKEN_COLON) \ + X("." , TOKEN_DOT) \ + X("..." , TOKEN_ELLIPSIS) \ + X("?" , TOKEN_COND) \ + X(identifier , TOKEN_IDENT) \ + X(int_literal , TOKEN_INT_LITERAL) \ + X(float_literal , TOKEN_FLOAT_LITERAL) \ + X(char_literal , TOKEN_CHAR_LITERAL) \ + X(string_literal , TOKEN_STRING_LITERAL) \ + // END + +// 定义TokenType枚举 +enum TokenType { + // 处理普通token + #define X(str, tok) tok, + TOKEN_TABLE + #undef X + + // 处理关键字(保持原有格式) + #define X(name, std, tok) tok, + KEYWORD_TABLE + #undef X +}; + +struct TokenConstant { + int have; + union { + char ch; + int i; + float f; + double d; + long long ll; + char* str; + }; +}; + +// "break" +// "case" +// "char" +// "const" +// "continue" +// "default" +// "do" +// "double" +// "else" +// "enum" +// "extern" +// "float" +// "for" +// "goto" +// "if" +// "inline (C99)" +// "int" +// "long" +// "register" +// "restrict (C99)" +// "return" +// "short" +// "signed" +// "sizeof" +// "static" +// "struct" +// "switch" +// "typedef" +// "union" +// "unsigned" +// "void" +// "volatile" +// "while" + +// alignas (C23) +// alignof (C23) +// auto +// bool (C23) +// constexpr (C23) +// false (C23) +// nullptr (C23) +// static_assert (C23) +// thread_local (C23) +// true (C23) +// typeof (C23) +// typeof_unqual (C23) +// _Alignas (C11) +// _Alignof (C11) +// _Atomic (C11) +// _BitInt (C23) +// _Bool (C99) +// _Complex (C99) +// _Decimal128 (C23) +// _Decimal32 (C23) +// _Decimal64 (C23) +// _Generic (C11) +// _Imaginary (C99) +// _Noreturn (C11) +// _Static_assert (C11) +// _Thread_local (C11) + +// a = b +// a += b +// a -= b +// a *= b +// a /= b +// a %= b +// a &= b +// a |= b +// a ^= b +// a <<= b +// a >>= b + +// ++a +// --a +// a++ +// a-- + +// +a +// -a +// a + b +// a - b +// a * b +// a / b +// a % b +// ~a +// a & b +// a | b +// a ^ b +// a << b +// a >> b + +// !a +// a && b +// a || b + +// a == b +// a != b +// a < b +// a > b +// a <= b +// a >= b + +// a[b] +// *a +// &a +// a->b +// a.b + +// a(...) +// a, b +// (type) a +// a ? b : c +// sizeof + +// _Alignof +// (C11) + +#endif \ No newline at end of file diff --git a/ccompiler/frontend/parser/ast/README.md b/ccompiler/frontend/parser/ast/README.md new file mode 100644 index 0000000..6303b46 --- /dev/null +++ b/ccompiler/frontend/parser/ast/README.md @@ -0,0 +1,18 @@ +- ast.c 作为抽象语法树的定义 + +- block.c 作为块的实现主要用于处理作用域,需要符号表 + +- decl.c 作为声明的实现,其中主要携带变量声明,函数声明见 func.c ,需要符号表 + +- func.c 作为函数的实现,其中主要携带函数声明,以及函数定义,需要符号表 + +- expr.c 作为表达式的实现。需要符号表 + +- stmt.c 作为语句的实现。需要表达式类型判断合法性 + +- term.c 作为终结符的实现。需要表达式类型判断合法性 + +- program.c 作为词法分析(语义分析)入口函数,可以根据parser结构生成AST + +其中stmt参考cppreference +其中expr参考AI以及CParser diff --git a/ccompiler/frontend/parser/ast/ast.c b/ccompiler/frontend/parser/ast/ast.c new file mode 100644 index 0000000..9612c6b --- /dev/null +++ b/ccompiler/frontend/parser/ast/ast.c @@ -0,0 +1,173 @@ +#include "ast.h" +#include "../parser.h" +struct ASTNode* new_ast_node(void) { + struct ASTNode* node = xmalloc(sizeof(struct ASTNode)); + init_ast_node(node); + return node; +} + +void init_ast_node(struct ASTNode* node) { + node->type = NT_INIT; + + for (int i = 0; i < sizeof(node->children) / sizeof(node->children[0]); i++) { + node->children[i] = NULL; + } +} + +struct ASTNode* find_ast_node(struct ASTNode* node, enum ASTType type) { + +} + +#include +static void pnt_depth(int depth) { + for (int i = 0; i < depth; i++) { + printf(" "); + } +} + +void pnt_ast(struct ASTNode* node, int depth) { + if (!node) return; + pnt_depth(depth); + switch (node->type) { + case NT_ROOT: + for (int i = 0; i < node->root.child_size; i++) { + pnt_ast(node->root.children[i], depth); + } + return; + + case NT_ADD : printf("+ \n"); break; // (expr) + (expr) + case NT_SUB : printf("- \n"); break; // (expr) - (expr) + case NT_MUL : printf("* \n"); break; // (expr) * (expr) + case NT_DIV : printf("/ \n"); break; // (expr) / (expr) + case NT_MOD : printf("%%\n"); break; // (expr) % (expr) + case NT_AND : printf("& \n"); break; // (expr) & (expr) + case NT_OR : printf("| \n"); break; // (expr) | (expr) + case NT_XOR : printf("^ \n"); break; // (expr) ^ (expr) + case NT_L_SH : printf("<<\n"); break; // (expr) << (expr) + case NT_R_SH : printf(">>\n"); break; // (expr) >> (expr) + case NT_EQ : printf("==\n"); break; // (expr) == (expr) + case NT_NEQ : printf("!=\n"); break; // (expr) != (expr) + case NT_LE : printf("<=\n"); break; // (expr) <= (expr) + case NT_GE : printf(">=\n"); break; // (expr) >= (expr) + case NT_LT : printf("< \n"); break; // (expr) < (expr) + case NT_GT : printf("> \n"); break; // (expr) > (expr) + case NT_AND_AND : printf("&&\n"); break; // (expr) && (expr) + case NT_OR_OR : printf("||\n"); break; // (expr) || (expr) + case NT_NOT : printf("! \n"); break; // ! (expr) + case NT_BIT_NOT : printf("~ \n"); break; // ~ (expr) + case NT_COMMA : printf(", \n"); break; // expr, expr 逗号运算符 + case NT_ASSIGN : printf("= \n"); break; // (expr) = (expr) + // case NT_COND : // (expr) ? (expr) : (expr) + + case NT_STMT_EMPTY : // ; + printf(";\n"); + break; + case NT_STMT_IF : // if (cond) { ... } [else {...}] + printf("if"); + pnt_ast(node->if_stmt.cond, depth+1); + pnt_ast(node->if_stmt.if_stmt, depth+1); + if (node->if_stmt.else_stmt) { + pnt_depth(depth); + printf("else"); + pnt_ast(node->if_stmt.else_stmt, depth+1); + } + break; + case NT_STMT_WHILE : // while (cond) { ... } + printf("while\n"); + pnt_ast(node->while_stmt.cond, depth+1); + pnt_ast(node->while_stmt.body, depth+1); + break; + case NT_STMT_DOWHILE : // do {...} while (cond) + printf("do-while\n"); + pnt_ast(node->do_while_stmt.body, depth+1); + pnt_ast(node->do_while_stmt.cond, depth+1); + break; + case NT_STMT_FOR : // for (init; cond; iter) {...} + printf("for\n"); + if (node->for_stmt.init) + pnt_ast(node->for_stmt.init, depth+1); + if (node->for_stmt.cond) + pnt_ast(node->for_stmt.cond, depth+1); + if (node->for_stmt.iter) + pnt_ast(node->for_stmt.iter, depth+1); + pnt_ast(node->for_stmt.body, depth+1); + break; + case NT_STMT_SWITCH : // switch (expr) { case ... } + case NT_STMT_BREAK : // break; + case NT_STMT_CONTINUE : // continue; + case NT_STMT_GOTO : // goto label; + case NT_STMT_CASE : // case const_expr: + case NT_STMT_DEFAULT : // default: + case NT_STMT_LABEL : // label: + break; + case NT_STMT_BLOCK : // { ... } + printf("{\n"); + for (int i = 0; i < node->block.child_size; i++) { + pnt_ast(node->block.children[i], depth+1); + } + pnt_depth(depth); + printf("}\n"); + break; + case NT_STMT_RETURN : // return expr; + printf("return"); + if (node->return_stmt.expr_stmt) { + printf(" "); + pnt_ast(node->return_stmt.expr_stmt, depth+1); + } else { + printf("\n"); + } + break; + case NT_STMT_EXPR : // expr; + printf("stmt\n"); + pnt_ast(node->expr_stmt.expr_stmt, depth); + pnt_depth(depth); + printf(";\n"); + break; + case NT_DECL_VAR : // type name; or type name = expr; + printf("decl_val\n"); + break; + case NT_DECL_FUNC: // type func_name(param_list); + printf("decl func %s\n", node->func.name->syms.tok.constant.str); + break; + case NT_FUNC : // type func_name(param_list) {...} + printf("def func %s\n", node->func.name->syms.tok.constant.str); + // pnt_ast(node->child.func.params, depth); + pnt_ast(node->func.body, depth); + // pnt_ast(node->child.func.ret, depth); + break; + case NT_PARAM : // 函数形参 + printf("param\n"); + case NT_ARG_LIST : // 实参列表(需要与NT_CALL配合) + printf("arg_list\n"); + case NT_TERM_CALL : // func (expr) + printf("call\n"); + break; + case NT_TERM_IDENT: + printf("%s\n", node->syms.tok.constant.str); + break; + case NT_TERM_VAL : // Terminal Symbols like constant, identifier, keyword + struct Token * tok = &node->syms.tok; + switch (tok->type) { + case TOKEN_CHAR_LITERAL: + printf("%c\n", tok->constant.ch); + break; + case TOKEN_INT_LITERAL: + printf("%d\n", tok->constant.i); + break; + case TOKEN_STRING_LITERAL: + printf("%s\n", tok->constant.str); + break; + default: + printf("unknown term val\n"); + break; + } + default: + break; + } + + // 通用子节点递归处理 + if (node->type <= NT_ASSIGN) { // 表达式类统一处理子节点 + if (node->expr.left) pnt_ast(node->expr.left, depth+1); + if (node->expr.right) pnt_ast(node->expr.right, depth + 1); + } +} diff --git a/ccompiler/frontend/parser/ast/ast.h b/ccompiler/frontend/parser/ast/ast.h new file mode 100644 index 0000000..cf77d59 --- /dev/null +++ b/ccompiler/frontend/parser/ast/ast.h @@ -0,0 +1,191 @@ +#ifndef __AST_H__ +#define __AST_H__ + +#include "../../frontend.h" +#include "../../lexer/lexer.h" +#include "../type.h" + +enum ASTType { + NT_INIT, + NT_ROOT, // global scope in root node + NT_ADD, // (expr) + (expr) + NT_SUB, // (expr) - (expr) + NT_MUL, // (expr) * (expr) + NT_DIV, // (expr) / (expr) + NT_MOD, // (expr) % (expr) + NT_AND, // (expr) & (expr) + NT_OR, // (expr) | (expr) + NT_XOR, // (expr) ^ (expr) + NT_L_SH, // (expr) << (expr) + NT_R_SH, // (expr) >> (expr) + NT_EQ, // (expr) == (expr) + NT_NEQ, // (expr) != (expr) + NT_LE, // (expr) <= (expr) + NT_GE, // (expr) >= (expr) + NT_LT, // (expr) < (expr) + NT_GT, // (expr) > (expr) + NT_AND_AND, // (expr) && (expr) + NT_OR_OR, // (expr) || (expr) + NT_NOT, // ! (expr) + NT_BIT_NOT, // ~ (expr) + NT_COND, // (expr) ? (expr) : (expr) + NT_COMMA, // expr, expr 逗号运算符 + NT_ASSIGN, // (expr) = (expr) + + NT_ADDRESS, // &expr (取地址) + NT_DEREF, // *expr (解引用) + NT_INDEX, // arr[index] (数组访问) + NT_MEMBER, // struct.member + NT_PTR_MEMBER,// ptr->member + + NT_CAST, // (type)expr 强制类型转换 + NT_SIZEOF, // sizeof(type|expr) +// NT_ALIGNOF, // _Alignof(type) (C11) + + NT_STMT_EMPTY, // ; + NT_STMT_IF, // if (cond) { ... } [else {...}] + NT_STMT_WHILE, // while (cond) { ... } + NT_STMT_DOWHILE, // do {...} while (cond) + NT_STMT_FOR, // for (init; cond; iter) {...} + NT_STMT_SWITCH, // switch (expr) { case ... } + NT_STMT_BREAK, // break; + NT_STMT_CONTINUE, // continue; + NT_STMT_GOTO, // goto label; + NT_STMT_CASE, // case const_expr: + NT_STMT_DEFAULT, // default: + NT_STMT_LABEL, // label: + NT_STMT_BLOCK, // { ... } + NT_STMT_RETURN, // return expr; + NT_STMT_EXPR, // expr; + + NT_BLOCK, + // NT_TYPE_BASE, // 基础类型节点 + // NT_TYPE_PTR, // 指针类型 + // NT_TYPE_ARRAY, // 数组类型 + // NT_TYPE_FUNC, // 函数类型 + // NT_TYPE_QUAL, // 限定符节点 + + NT_DECL_VAR, // type name; or type name = expr; + NT_DECL_FUNC, // type func_name(param_list); + NT_FUNC, // type func_name(param_list) {...} + NT_PARAM, // 函数形参 + NT_ARG_LIST, // 实参列表(需要与NT_CALL配合) + + NT_TERM_CALL, // func (expr) + NT_TERM_VAL, + NT_TERM_IDENT, + NT_TERM_TYPE, +}; + +struct ASTNode { + enum ASTType type; + + union { + void *children[6]; + struct { + struct ASTNode** children; + int child_size; + } root; + struct { + struct ASTNode** children; // array of children + int child_size; + } block; + struct { + struct ASTNode* decl_node; + struct Token tok; + } syms; + struct { + struct ASTNode *arr; + int size; + } params; + struct { + const char* name; + struct ASTNode* params; + struct ASTNode* func_decl; + } call; + struct { + struct ASTNode *type; + struct ASTNode *name; + struct ASTNode *expr_stmt; // optional + void* data; + } decl_val; + struct { + struct ASTNode *ret; + struct ASTNode *name; + struct ASTNode *params; // array of params + void* data; + } func_decl; + struct { + struct ASTNode *ret; + struct ASTNode *name; + struct ASTNode *params; // array of params + struct ASTNode *body; // optional + } func; + struct { + struct ASTNode *left; + struct ASTNode *right; + struct ASTNode *optional; // optional + } expr; + struct { + struct ASTNode *cond; + struct ASTNode *if_stmt; + struct ASTNode *else_stmt; // optional + } if_stmt; + struct { + struct ASTNode *cond; + struct ASTNode *body; + } switch_stmt; + struct { + struct ASTNode *cond; + struct ASTNode *body; + } while_stmt; + struct { + struct ASTNode *body; + struct ASTNode *cond; + } do_while_stmt; + struct { + struct ASTNode *init; + struct ASTNode *cond; // optional + struct ASTNode *iter; // optional + struct ASTNode *body; + } for_stmt; + struct { + struct ASTNode *expr_stmt; // optional + } return_stmt; + struct { + struct ASTNode *label; + } goto_stmt; + struct { + struct ASTNode *label; + } label_stmt; + struct { + struct ASTNode *block; + } block_stmt; + struct { + struct ASTNode *expr_stmt; + } expr_stmt; + }; +}; + +struct ASTNode* new_ast_node(void); +void init_ast_node(struct ASTNode* node); +void pnt_ast(struct ASTNode* node, int depth); + +struct Parser; +typedef struct ASTNode* (*parse_func_t) (struct Parser*); + +void parse_prog(struct Parser* parser); +struct ASTNode* parse_block(struct Parser* parser); +struct ASTNode* parse_stmt(struct Parser* parser); +struct ASTNode* parse_expr(struct Parser* parser); +struct ASTNode* parse_func(struct Parser* parser); +struct ASTNode* parse_decl(struct Parser* parser); + +struct ASTNode* parse_ident(struct Parser* parser); +struct ASTNode* parse_type(struct Parser* parser); + +int peek_decl(struct Parser* parser); + +struct ASTNode* parser_ident_without_pop(struct Parser* parser); + +#endif diff --git a/ccompiler/frontend/parser/ast/block.c b/ccompiler/frontend/parser/ast/block.c new file mode 100644 index 0000000..525c795 --- /dev/null +++ b/ccompiler/frontend/parser/ast/block.c @@ -0,0 +1,50 @@ + +#include "../parser.h" +#include "ast.h" +#include "../symtab/symtab.h" + +#ifndef BLOCK_MAX_NODE +#define BLOCK_MAX_NODE (1024) +#endif + +struct ASTNode* parse_block(struct Parser* parser) { + symtab_enter_scope(parser->symtab); + + // parse_decl(parser); // decl_var + enum TokenType ttype; + struct ASTNode* node = new_ast_node(); + node->type = NT_BLOCK; + flushpeektok(parser); + ttype = peektoktype(parser); + if (ttype != TOKEN_L_BRACE) { + error("block need '{' start"); + } + poptok(parser); + + node->block.children = malloc(sizeof(struct ASTNode*) * BLOCK_MAX_NODE); + struct ASTNode* child = NULL; + while (1) { + if (peek_decl(parser) == 1) { + child = parse_decl(parser); + goto ADD_CHILD; + } + + flushpeektok(parser); + ttype = peektoktype(parser); + switch (ttype) { + case TOKEN_R_BRACE: + poptok(parser); + goto END; + default: + child = parse_stmt(parser); + goto ADD_CHILD; + break; + } + continue; + ADD_CHILD: + node->block.children[node->block.child_size++] = child; + } +END: + symtab_leave_scope(parser->symtab); + return node; +} diff --git a/ccompiler/frontend/parser/ast/decl.c b/ccompiler/frontend/parser/ast/decl.c new file mode 100644 index 0000000..56b1414 --- /dev/null +++ b/ccompiler/frontend/parser/ast/decl.c @@ -0,0 +1,94 @@ +#include "../parser.h" +#include "ast.h" +#include "../symtab/symtab.h" + +/** + * 0 false + * 1 true + */ +int peek_decl(struct Parser* parser) { + flushpeektok(parser); + switch (peektoktype(parser)) { + case TOKEN_STATIC: + case TOKEN_EXTERN: + case TOKEN_REGISTER: + case TOKEN_TYPEDEF: + error("not impliment"); + break; + default: + flushpeektok(parser); + } + + switch (peektoktype(parser)) { + case TOKEN_VOID: + case TOKEN_CHAR: + case TOKEN_SHORT: + case TOKEN_INT: + case TOKEN_LONG: + case TOKEN_FLOAT: + case TOKEN_DOUBLE: + return 1; + default: + flushpeektok(parser); + } +} + +struct ASTNode* parse_decl_val(struct Parser* parser) { + flushpeektok(parser); + // parse_type + enum TokenType ttype; + struct ASTNode* node; + + struct ASTNode* type_node = parse_type(parser); + struct ASTNode* name_node = parser_ident_without_pop(parser); + + node = new_ast_node(); + node->decl_val.type = type_node; + node->decl_val.name = name_node; + node->type = NT_DECL_VAR; + symtab_add_symbol(parser->symtab, name_node->syms.tok.constant.str, node); + + ttype = peektoktype(parser); + if (ttype == TOKEN_ASSIGN) { + node->decl_val.expr_stmt = parse_stmt(parser); + if (node->decl_val.expr_stmt->type != NT_STMT_EXPR) { + error("parser_decl_val want stmt_expr"); + } + } else if (ttype == TOKEN_SEMICOLON) { + poptok(parser); + expecttok(parser, TOKEN_SEMICOLON); + } else { + error("parser_decl_val syntax error"); + } + return node; +} + +// 类型解析入口改进 +struct ASTNode* parse_decl(struct Parser* parser) { + flushpeektok(parser); + int idx; + enum TokenType ttype; + struct ASTNode* node; + + if (peek_decl(parser) == 0) { + error("syntax error expect decl_val TYPE"); + } + if (peektoktype(parser) != TOKEN_IDENT) { + error("syntax error expect decl_val IDENT"); + } + + ttype = peektoktype(parser); + switch (ttype) { + case TOKEN_L_PAREN: // ( + node = parse_func(parser); + break; + case TOKEN_ASSIGN: + case TOKEN_SEMICOLON: + node = parse_decl_val(parser); + break; + default: + error("syntax error expect decl_val ASSIGN or SEMICOLON"); + return NULL; + } + return node; +} diff --git a/ccompiler/frontend/parser/ast/expr.c b/ccompiler/frontend/parser/ast/expr.c new file mode 100644 index 0000000..a34368e --- /dev/null +++ b/ccompiler/frontend/parser/ast/expr.c @@ -0,0 +1,409 @@ +#include "../parser.h" +#include "ast.h" +#include "../symtab/symtab.h" + +// Copy from `CParse` +/** + * Operator precedence classes + */ +enum Precedence { + PREC_BOTTOM, + PREC_EXPRESSION, /* , left to right */ + PREC_ASSIGNMENT, /* = += -= *= /= %= <<= >>= &= ^= |= right to left */ + PREC_CONDITIONAL, /* ?: right to left */ + PREC_LOGICAL_OR, /* || left to right */ + PREC_LOGICAL_AND, /* && left to right */ + PREC_OR, /* | left to right */ + PREC_XOR, /* ^ left to right */ + PREC_AND, /* & left to right */ + PREC_EQUALITY, /* == != left to right */ + PREC_RELATIONAL, /* < <= > >= left to right */ + PREC_SHIFT, /* << >> left to right */ + PREC_ADDITIVE, /* + - left to right */ + PREC_MULTIPLICATIVE, /* * / % left to right */ + PREC_CAST, /* (type) right to left */ + PREC_UNARY, /* ! ~ ++ -- + - * & sizeof right to left */ + PREC_POSTFIX, /* () [] -> . left to right */ + PREC_PRIMARY, + PREC_TOP +}; + +enum ParseType { + INFIX_PARSER, + PREFIX_PARSER, +}; + +static struct ASTNode *parse_subexpression(struct Parser* parser, enum Precedence prec); + +static struct ASTNode* gen_node2(struct ASTNode* left, struct ASTNode* right, + enum ASTType type) { + struct ASTNode* node = new_ast_node(); + node->type = type; + node->expr.left = left; + node->expr.right = right; + // switch (type) { + // case NT_ADD : printf("+ \n"); break; // (expr) + (expr) + // case NT_SUB : printf("- \n"); break; // (expr) - (expr) + // case NT_MUL : printf("* \n"); break; // (expr) * (expr) + // case NT_DIV : printf("/ \n"); break; // (expr) / (expr) + // case NT_MOD : printf("%%\n"); break; // (expr) % (expr) + // case NT_AND : printf("& \n"); break; // (expr) & (expr) + // case NT_OR : printf("| \n"); break; // (expr) | (expr) + // case NT_XOR : printf("^ \n"); break; // (expr) ^ (expr) + // case NT_L_SH : printf("<<\n"); break; // (expr) << (expr) + // case NT_R_SH : printf(">>\n"); break; // (expr) >> (expr) + // case NT_EQ : printf("==\n"); break; // (expr) == (expr) + // case NT_NEQ : printf("!=\n"); break; // (expr) != (expr) + // case NT_LE : printf("<=\n"); break; // (expr) <= (expr) + // case NT_GE : printf(">=\n"); break; // (expr) >= (expr) + // case NT_LT : printf("< \n"); break; // (expr) < (expr) + // case NT_GT : printf("> \n"); break; // (expr) > (expr) + // case NT_AND_AND : printf("&&\n"); break; // (expr) && (expr) + // case NT_OR_OR : printf("||\n"); break; // (expr) || (expr) + // case NT_NOT : printf("! \n"); break; // ! (expr) + // case NT_BIT_NOT : printf("~ \n"); break; // ~ (expr) + // case NT_COMMA : printf(", \n"); break; // expr, expr 逗号运算符 + // case NT_ASSIGN : printf("= \n"); break; // (expr) = (expr) + // // case NT_COND : // (expr) ? (expr) : (expr) + // } +} + +static struct ASTNode* parse_comma(struct Parser* parser, struct ASTNode* left) { + struct ASTNode* node = new_ast_node(); + node->type = NT_COMMA; + node->expr.left = left; + node->expr.right = parse_subexpression(parser, PREC_EXPRESSION); +} + +static struct ASTNode* parse_assign(struct Parser* parser, struct ASTNode* left) { + flushpeektok(parser); + enum TokenType ttype = peektoktype(parser); + poptok(parser); + struct ASTNode* node = new_ast_node(); + node->type = NT_ASSIGN; + // saved left + node->expr.left = left; + enum Precedence next = PREC_ASSIGNMENT + 1; + switch (ttype) { + case TOKEN_ASSIGN : + left = parse_subexpression(parser, next); + break; + case TOKEN_ASSIGN_ADD : + left = gen_node2(left, parse_subexpression(parser, next), NT_ADD); + break; + case TOKEN_ASSIGN_SUB : + left = gen_node2(left, parse_subexpression(parser, next), NT_SUB); + break; + case TOKEN_ASSIGN_MUL : + left = gen_node2(left, parse_subexpression(parser, next), NT_MUL); + break; + case TOKEN_ASSIGN_DIV : + left = gen_node2(left, parse_subexpression(parser, next), NT_DIV); + break; + case TOKEN_ASSIGN_MOD : + left = gen_node2(left, parse_subexpression(parser, next), NT_MOD); + break; + case TOKEN_ASSIGN_L_SH : + left = gen_node2(left, parse_subexpression(parser, next), NT_L_SH); + break; + case TOKEN_ASSIGN_R_SH : + left = gen_node2(left, parse_subexpression(parser, next), NT_R_SH); + break; + case TOKEN_ASSIGN_AND : + left = gen_node2(left, parse_subexpression(parser, next), NT_AND); + break; + case TOKEN_ASSIGN_OR : + left = gen_node2(left, parse_subexpression(parser, next), NT_OR); + break; + case TOKEN_ASSIGN_XOR : + left = gen_node2(left, parse_subexpression(parser, next), NT_XOR); + break; + default: + error("unsupported operator"); + break; + } + node->expr.right = left; +} + +static struct ASTNode* parse_cmp(struct Parser* parser, struct ASTNode* left) { + flushpeektok(parser); + enum TokenType ttype = peektoktype(parser); + poptok(parser); + struct ASTNode* node = new_ast_node(); + // saved left + node->expr.left = left; + switch (ttype) { + case TOKEN_EQ: + node->type = NT_EQ; + node->expr.right = parse_subexpression(parser, PREC_EQUALITY); + break; + case TOKEN_NEQ: + node->type = NT_NEQ; + node->expr.right = parse_subexpression(parser, PREC_EQUALITY); + break; + case TOKEN_LT: + node->type = NT_LT; + node->expr.right = parse_subexpression(parser, PREC_RELATIONAL); + break; + case TOKEN_GT: + node->type = NT_GT; + node->expr.right = parse_subexpression(parser, PREC_RELATIONAL); + break; + case TOKEN_LE: + node->type = NT_LE; + node->expr.right = parse_subexpression(parser, PREC_RELATIONAL); + break; + case TOKEN_GE: + node->type = NT_GE; + node->expr.right = parse_subexpression(parser, PREC_RELATIONAL); + break; + default: + error("invalid operator"); + } +} + +static struct ASTNode* parse_cal(struct Parser* parser, struct ASTNode* left) { + flushpeektok(parser); + enum TokenType ttype = peektoktype(parser); + poptok(parser); + struct ASTNode* node = new_ast_node(); + node->expr.left = left; + switch (ttype) { + case TOKEN_OR_OR: + node->type = NT_OR_OR; + node->expr.right = parse_subexpression(parser, PREC_LOGICAL_OR); + break; + case TOKEN_AND_AND: + node->type = NT_AND_AND; + node->expr.right = parse_subexpression(parser, PREC_LOGICAL_AND); + break; + case TOKEN_OR: + node->type = NT_OR; + node->expr.right = parse_subexpression(parser, PREC_OR); + break; + case TOKEN_XOR: + node->type = NT_XOR; + node->expr.right = parse_subexpression(parser, PREC_XOR); + break; + case TOKEN_AND: + node->type = NT_AND; + node->expr.right = parse_subexpression(parser, PREC_AND); + break; + case TOKEN_L_SH: + node->type = NT_L_SH; + node->expr.right = parse_subexpression(parser, PREC_SHIFT); + break; + case TOKEN_R_SH: + node->type = NT_R_SH; + node->expr.right = parse_subexpression(parser, PREC_SHIFT); + break; + case TOKEN_ADD: + node->type = NT_ADD; + node->expr.right = parse_subexpression(parser, PREC_ADDITIVE); + break; + case TOKEN_SUB: + node->type = NT_SUB; + node->expr.right = parse_subexpression(parser, PREC_ADDITIVE); + break; + case TOKEN_MUL: + node->type = NT_MUL; + node->expr.right = parse_subexpression(parser, PREC_MULTIPLICATIVE); + break; + case TOKEN_DIV: + node->type = NT_DIV; + node->expr.right = parse_subexpression(parser, PREC_MULTIPLICATIVE); + break; + case TOKEN_MOD: + node->type = NT_MOD; + node->expr.right = parse_subexpression(parser, PREC_MULTIPLICATIVE); + break; + default: + break; + } + return node; +} + + +// 新增函数调用解析 +static struct ASTNode* parse_call(struct Parser* parser, struct ASTNode* ident) { + struct ASTNode* node = new_ast_node(); + node->type = NT_TERM_CALL; + poptok(parser); // 跳过 '(' + + enum TokenType ttype; + // 解析参数列表 + while ((ttype = peektoktype(parser)) != TOKEN_R_PAREN) { + // add_arg(node, parse_expr(parser)); + if (ttype == TOKEN_COMMA) poptok(parser); + else poptok(parser); + } + poptok(parser); // 跳过 ')' + + char* name = ident->syms.tok.constant.str; + void* sym = symtab_lookup_symbol(parser->symtab, name); + if (sym == NULL) { + error("function not decl %s", name); + } + node->call.name = name; + node->call.params = NULL; + node->call.func_decl = sym; + return node; +} + +static struct ASTNode* parse_paren(struct Parser* parser, struct ASTNode* left) { + flushpeektok(parser); + enum TokenType ttype; + expecttok(parser, TOKEN_L_PAREN); + left = parse_subexpression(parser, PREC_EXPRESSION); + flushpeektok(parser); + expecttok(parser, TOKEN_R_PAREN); + return left; +} + +typedef struct ASTNode* (*parse_expr_fun_t)(struct Parser*, struct ASTNode*); +static struct expr_prec_table_t { + parse_expr_fun_t parser; + enum Precedence prec; + enum ParseType ptype; +} expr_table [256] = { + [TOKEN_COMMA] = {parse_comma, PREC_EXPRESSION, INFIX_PARSER}, + [TOKEN_ASSIGN] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER}, + [TOKEN_ASSIGN_ADD] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER}, + [TOKEN_ASSIGN_SUB] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER}, + [TOKEN_ASSIGN_MUL] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER}, + [TOKEN_ASSIGN_DIV] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER}, + [TOKEN_ASSIGN_MOD] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER}, + [TOKEN_ASSIGN_L_SH] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER}, + [TOKEN_ASSIGN_R_SH] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER}, + [TOKEN_ASSIGN_AND] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER}, + [TOKEN_ASSIGN_OR] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER}, + [TOKEN_ASSIGN_XOR] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER}, + + [TOKEN_OR_OR] = {parse_cal, PREC_LOGICAL_OR , INFIX_PARSER}, + [TOKEN_AND_AND] = {parse_cal, PREC_LOGICAL_AND, INFIX_PARSER}, + [TOKEN_OR] = {parse_cal, PREC_OR , INFIX_PARSER}, + [TOKEN_XOR] = {parse_cal, PREC_XOR , INFIX_PARSER}, + [TOKEN_AND] = {parse_cal, PREC_AND , INFIX_PARSER}, + + [TOKEN_EQ] = {parse_cmp, PREC_EQUALITY, INFIX_PARSER}, + [TOKEN_NEQ] = {parse_cmp, PREC_EQUALITY, INFIX_PARSER}, + [TOKEN_LT] = {parse_cmp, PREC_RELATIONAL, INFIX_PARSER}, + [TOKEN_LE] = {parse_cmp, PREC_RELATIONAL, INFIX_PARSER}, + [TOKEN_GT] = {parse_cmp, PREC_RELATIONAL, INFIX_PARSER}, + [TOKEN_GE] = {parse_cmp, PREC_RELATIONAL, INFIX_PARSER}, + + [TOKEN_L_SH] = {parse_cal, PREC_SHIFT , INFIX_PARSER}, + [TOKEN_R_SH] = {parse_cal, PREC_SHIFT , INFIX_PARSER}, + [TOKEN_ADD] = {parse_cal, PREC_ADDITIVE , INFIX_PARSER}, + [TOKEN_SUB] = {parse_cal, PREC_ADDITIVE , INFIX_PARSER}, + [TOKEN_MUL] = {parse_cal, PREC_MULTIPLICATIVE , INFIX_PARSER}, + [TOKEN_DIV] = {parse_cal, PREC_MULTIPLICATIVE , INFIX_PARSER}, + [TOKEN_MOD] = {parse_cal, PREC_MULTIPLICATIVE , INFIX_PARSER}, + + [TOKEN_NOT] = {NULL, PREC_UNARY, PREFIX_PARSER}, + [TOKEN_BIT_NOT] = {NULL, PREC_UNARY, PREFIX_PARSER}, + [TOKEN_ADD_ADD] = {NULL, PREC_UNARY, PREFIX_PARSER}, + [TOKEN_SUB_SUB] = {NULL, PREC_UNARY, PREFIX_PARSER}, + // + - * & sizeof + + [TOKEN_L_PAREN] = {parse_paren, PREC_POSTFIX, INFIX_PARSER}, +}; + +static struct ASTNode *parse_primary_expression(struct Parser* parser) { + flushpeektok(parser); + + struct Token* tok = peektok(parser); + struct ASTNode *node = new_ast_node(); + node->type = NT_TERM_VAL; + node->syms.tok = *tok; + + switch (tok->type) { + case TOKEN_INT_LITERAL: + // node->data.data_type = TYPE_INT; + break; + case TOKEN_FLOAT_LITERAL: + warn("float not supported"); + break; + case TOKEN_CHAR_LITERAL: + // node->data.data_type = TYPE_CHAR; + break; + case TOKEN_STRING_LITERAL: + // node->data.data_type = TYPE_POINTER; + case TOKEN_IDENT: + node = parse_ident(parser); + if (peektoktype(parser) == TOKEN_L_PAREN) { + node = parse_call(parser, node); + } else { + void *sym = symtab_lookup_symbol(parser->symtab, tok->constant.str); + if (sym == NULL) { + error("undefined symbol but use %s", tok->constant.str); + } + node->type = NT_TERM_IDENT; + node->syms.decl_node = sym; + goto END; + } + default: + return NULL; + } + poptok(parser); +END: + return node; +} + +static struct ASTNode *parse_subexpression(struct Parser* parser, enum Precedence prec) { + enum TokenType ttype; + struct expr_prec_table_t* work; + struct ASTNode* left; + + while (1) { + flushpeektok(parser); + ttype = peektoktype(parser); + work = &expr_table[ttype]; + // FIXME + if (ttype == TOKEN_SEMICOLON || ttype == TOKEN_R_PAREN) { + break; + } + if (work == NULL || work->parser == NULL || work->ptype == PREFIX_PARSER) { + if (work->parser != NULL) { + left = work->parser(parser, NULL); + } else { + left = parse_primary_expression(parser); + } + } else if (work->ptype == INFIX_PARSER) { + if (work->parser == NULL) + break; + if (work->prec <= prec) + break; + left = work->parser(parser, left); + } + // assert(left != NULL); + } + + return left; +} + +struct ASTNode* parse_expr(struct Parser* parser) { + flushpeektok(parser); + enum TokenType ttype = peektoktype(parser); + switch (ttype) { + case TOKEN_NOT: + case TOKEN_AND: + case TOKEN_L_PAREN: + case TOKEN_MUL: + case TOKEN_ADD: + case TOKEN_SUB: + case TOKEN_BIT_NOT: + case TOKEN_AND_AND: + case TOKEN_CHAR_LITERAL: + case TOKEN_INT_LITERAL: + case TOKEN_STRING_LITERAL: + case TOKEN_ADD_ADD: + case TOKEN_SUB_SUB: + case TOKEN_SIZEOF: + case TOKEN_IDENT: + return parse_subexpression(parser, PREC_EXPRESSION); + default: + error("Want expr but not got %s", get_token_name(ttype)); + break; + } +} diff --git a/ccompiler/frontend/parser/ast/func.c b/ccompiler/frontend/parser/ast/func.c new file mode 100644 index 0000000..293daab --- /dev/null +++ b/ccompiler/frontend/parser/ast/func.c @@ -0,0 +1,120 @@ +#include "../parser.h" +#include "../symtab/symtab.h" +#include "ast.h" + +#ifndef FUNC_PARAM_CACHE_SIZE +#define FUNC_PARAM_CACHE_SIZE 32 // 合理初始值,可覆盖99%常见情况 +#endif + +struct FuncParamCache { + struct Token tokens[FUNC_PARAM_CACHE_SIZE]; + int read_pos; // 当前读取位置 + int write_pos; // 写入位置 + int depth; // 当前缓存深度 +}; + +static enum TokenType peekcachetype(struct FuncParamCache* cache) { + return cache->tokens[cache->read_pos++].type; +} + +// TODO 语义分析压入符号表 +static void parse_params(struct Parser* parser, struct FuncParamCache* cache, struct ASTNode* node) { + // = peekcachetype(cache); + enum TokenType ttype; + // if (ttype != TOKEN_L_PAREN) { + // error("function expected '('\n"); + // } + struct ASTNode *params = new_ast_node(); + node->func.params = params; + int params_size = 0; + + while ((ttype = peekcachetype(cache)) != TOKEN_R_PAREN) { + switch (ttype) { + case TOKEN_COMMA: + break; + case TOKEN_ELLIPSIS: + ttype = peekcachetype(cache); + if (ttype != TOKEN_R_PAREN) { + error("... must be a last parameter list (expect ')')"); + } + // TODO + error("not implement"); + break; + case TOKEN_IDENT: + params->children[params_size++] = NULL; + break; + default: + // TODO 使用cache的类型解析 + // parse_type(parser); + // TODO type parse + // ttype = peekcachetype(cache); + // ttype = peekcachetype(cache); + // if (ttype != TOKEN_IDENT) { + // node->node_type = NT_DECL_FUNC; + // flushpeektok(parser); + // continue; + // } + // error("function expected ')' or ','\n"); + } + } +} + +enum ASTType check_is_func_decl(struct Parser* parser, struct FuncParamCache* cache) { + cache->depth = 1; + cache->read_pos = 0; + cache->write_pos = 0; + + while (cache->depth) { + struct Token* tok = peektok(parser); + poptok(parser); + if (cache->write_pos >= FUNC_PARAM_CACHE_SIZE - 1) { + error("function parameter list too long"); + } + cache->tokens[cache->write_pos++] = *tok; + switch (tok->type) { + case TOKEN_L_PAREN: + cache->depth++; + break; + case TOKEN_R_PAREN: + cache->depth--; + break; + } + } + + switch (peektoktype(parser)) { + case TOKEN_SEMICOLON: + poptok(parser); + return NT_DECL_FUNC; + case TOKEN_L_BRACE: + return NT_FUNC; + break; + default: + error("function define or decl need '{' or ';' but you don't got"); + } +} + +struct ASTNode* parse_func(struct Parser* parser) { + struct ASTNode* ret_type = parse_type(parser); + struct ASTNode* func_name = parse_ident(parser); + + struct ASTNode* node = new_ast_node(); + node->func.ret = ret_type; + node->func.name = func_name; + + flushpeektok(parser); + expecttok(parser, TOKEN_L_PAREN); + struct FuncParamCache cache; + node->type = check_is_func_decl(parser, &cache); + + symtab_add_symbol(parser->symtab, func_name->syms.tok.constant.str, node); + if (node->type == NT_DECL_FUNC) { + return node; + } + + symtab_enter_scope(parser->symtab); + parse_params(parser, &cache, node); + node->func.body = parse_block(parser); + symtab_leave_scope(parser->symtab); + + return node; +} diff --git a/ccompiler/frontend/parser/ast/program.c b/ccompiler/frontend/parser/ast/program.c new file mode 100644 index 0000000..08281cd --- /dev/null +++ b/ccompiler/frontend/parser/ast/program.c @@ -0,0 +1,29 @@ +#include "../parser.h" +#include "ast.h" + +#ifndef PROG_MAX_NODE_SIZE +#define PROG_MAX_NODE_SIZE (1024 * 4) +#endif + +void parse_prog(struct Parser* parser) { + /** + * Program := (Declaration | Definition)* + * same as + * Program := Declaration* Definition* + */ + int child_size = 0; + parser->root = new_ast_node(); + struct ASTNode* node; + parser->root->root.children = xmalloc(sizeof(struct ASTNode*) * PROG_MAX_NODE_SIZE); + while (1) { + flushpeektok(parser); + if (peektoktype(parser) == TOKEN_EOF) { + break; + } + node = parse_decl(parser); + parser->root->root.children[child_size++] = node; + } + parser->root->type = NT_ROOT; + parser->root->root.child_size = child_size; + return; +} diff --git a/ccompiler/frontend/parser/ast/stmt.c b/ccompiler/frontend/parser/ast/stmt.c new file mode 100644 index 0000000..6797e8b --- /dev/null +++ b/ccompiler/frontend/parser/ast/stmt.c @@ -0,0 +1,240 @@ +#include "../parser.h" +#include "ast.h" + +struct ASTNode* parse_stmt(struct Parser* parser) { + flushpeektok(parser); + enum TokenType ttype = peektoktype(parser); + struct ASTNode* node = new_ast_node(); + switch (ttype) { + case TOKEN_IF: { + /** + * if (exp) stmt + * if (exp) stmt else stmt + */ + poptok(parser); + + expecttok(parser, TOKEN_L_PAREN); + node->if_stmt.cond = parse_expr(parser); + flushpeektok(parser); + expecttok(parser, TOKEN_R_PAREN); + + node->if_stmt.if_stmt = parse_stmt(parser); + ttype = peektoktype(parser); + if (ttype == TOKEN_ELSE) { + poptok(parser); + node->if_stmt.else_stmt = parse_stmt(parser); + } else { + node->if_stmt.else_stmt = NULL; + } + node->type = NT_STMT_IF; + break; + } + case TOKEN_SWITCH: { + /** + * switch (exp) stmt + */ + poptok(parser); + + expecttok(parser, TOKEN_L_PAREN); + node->switch_stmt.cond = parse_expr(parser); + expecttok(parser, TOKEN_R_PAREN); + + node->switch_stmt.body = parse_stmt(parser); + node->type = NT_STMT_SWITCH; + break; + } + case TOKEN_WHILE: { + /** + * while (exp) stmt + */ + poptok(parser); + + expecttok(parser, TOKEN_L_PAREN); + node->while_stmt.cond = parse_expr(parser); + expecttok(parser, TOKEN_R_PAREN); + + node->while_stmt.body = parse_stmt(parser); + node->type = NT_STMT_WHILE; + break; + } + case TOKEN_DO: { + /** + * do stmt while (exp) + */ + poptok(parser); + node->do_while_stmt.body = parse_stmt(parser); + ttype = peektoktype(parser); + if (ttype != TOKEN_WHILE) { + error("expected while after do"); + } + poptok(parser); + expecttok(parser, TOKEN_L_PAREN); + node->do_while_stmt.cond = parse_expr(parser); + expecttok(parser, TOKEN_R_PAREN); + node->type = NT_STMT_DOWHILE; + break; + } + case TOKEN_FOR: { + /** + * for (init; [cond]; [iter]) stmt + */ + // node->children.stmt.for_stmt.init + poptok(parser); + ttype = peektoktype(parser); + if (ttype != TOKEN_L_PAREN) { + error("expected ( after for"); + } + poptok(parser); + + // init expr or init decl_var + // TODO need add this feature + node->for_stmt.init = parse_expr(parser); + expecttok(parser, TOKEN_SEMICOLON); + + // cond expr or null + ttype = peektoktype(parser); + if (ttype != TOKEN_SEMICOLON) { + node->for_stmt.cond = parse_expr(parser); + expecttok(parser, TOKEN_SEMICOLON); + } else { + node->for_stmt.cond = NULL; + poptok(parser); + } + + // iter expr or null + ttype = peektoktype(parser); + if (ttype != TOKEN_R_PAREN) { + node->for_stmt.iter = parse_expr(parser); + expecttok(parser, TOKEN_R_PAREN); + } else { + node->for_stmt.iter = NULL; + poptok(parser); + } + + node->for_stmt.body = parse_stmt(parser); + node->type = NT_STMT_FOR; + break; + } + case TOKEN_BREAK: { + /** + * break ; + */ + // TODO check 导致外围 for、while 或 do-while 循环或 switch 语句终止。 + poptok(parser); + expecttok(parser, TOKEN_SEMICOLON); + + node->type = NT_STMT_BREAK; + break; + } + case TOKEN_CONTINUE: { + /** + * continue ; + */ + // TODO check 导致跳过整个 for、 while 或 do-while 循环体的剩余部分。 + poptok(parser); + expecttok(parser, TOKEN_SEMICOLON); + + node->type = NT_STMT_CONTINUE; + break; + } + case TOKEN_RETURN: { + /** + * return [exp] ; + */ + // TODO 终止当前函数并返回指定值给调用方函数。 + poptok(parser); + ttype = peektoktype(parser); + if (ttype != TOKEN_SEMICOLON) { + node->return_stmt.expr_stmt = parse_expr(parser); + flushpeektok(parser); + expecttok(parser, TOKEN_SEMICOLON); + } else { + node->return_stmt.expr_stmt = NULL; + } + poptok(parser); + node->type = NT_STMT_RETURN; + break; + } + case TOKEN_GOTO: { + /** + * goto label ; + */ + // TODO check label 将控制无条件转移到所欲位置。 + //在无法用约定的构造将控制转移到所欲位置时使用。 + poptok(parser); + // find symbol table + ttype = peektoktype(parser); + if (ttype != TOKEN_IDENT) { + error("expect identifier after goto"); + } + expecttok(parser, TOKEN_SEMICOLON); + // TODO filling label + node->goto_stmt.label = parse_ident(parser); + node->type = NT_STMT_GOTO; + break; + } + case TOKEN_SEMICOLON: { + /** + * ; + * empty stmt using by : + * while () ; + * if () ; + * for () ; + */ + poptok(parser); + node->type = NT_STMT_EMPTY; + break; + } + case TOKEN_L_BRACE: { + /** + * stmt_block like: { (decl_var | stmt) ... } + */ + node->block_stmt.block = parse_block(parser); + node->type = NT_STMT_BLOCK; + break; + } + case TOKEN_IDENT: { + // TODO label goto + if (peektoktype(parser) != TOKEN_COLON) { + goto EXP; + } + node->label_stmt.label = parse_ident(parser); + expecttok(parser, TOKEN_COLON); + node->type = NT_STMT_LABEL; + break; + } + case TOKEN_CASE: { + // TODO label switch + poptok(parser); + error("unimplemented switch label"); + node->label_stmt.label = parse_expr(parser); + // TODO 该表达式为const int + expecttok(parser, TOKEN_COLON); + node->type = NT_STMT_CASE; + break; + } + case TOKEN_DEFAULT: { + // TODO label switch default + poptok(parser); + expecttok(parser, TOKEN_COLON); + node->type = NT_STMT_DEFAULT; + break; + } + default: { + /** + * exp ; + */ + EXP: + node->expr_stmt.expr_stmt = parse_expr(parser); + flushpeektok(parser); + ttype = peektoktype(parser); + if (ttype != TOKEN_SEMICOLON) { + error("exp must end with \";\""); + } + poptok(parser); + node->type = NT_STMT_EXPR; + break; + } + + } +} diff --git a/ccompiler/frontend/parser/ast/term.c b/ccompiler/frontend/parser/ast/term.c new file mode 100644 index 0000000..bff6b2a --- /dev/null +++ b/ccompiler/frontend/parser/ast/term.c @@ -0,0 +1,182 @@ +#include "../parser.h" +#include "../type.h" +#include "ast.h" + +// /* 状态跳转表定义 */ +// typedef void (*StateHandler)(struct Parser*, struct ASTNode**); + +// enum TypeParseState { +// TPS_BASE_TYPE, // 解析基础类型 (int/char等) +// TPS_QUALIFIER, // 解析限定符 (const/volatile) +// TPS_POINTER, // 解析指针 (*) +// TPS_ARRAY, // 解析数组维度 ([n]) +// TPS_FUNC_PARAMS, // 解析函数参数列表 +// TPS_END, +// }; + +// ; + +// /* 状态处理函数前置声明 */ +// static void handle_base_type(struct Parser*, struct ASTNode**); +// static void handle_qualifier(struct Parser*, struct ASTNode**); +// static void handle_pointer(struct Parser*, struct ASTNode**); +// static void handle_array(struct Parser*, struct ASTNode**); +// static void handle_func_params(struct Parser*, struct ASTNode**); +// static void handle_error(struct Parser*, struct ASTNode**); + +// /* 状态跳转表(核心优化部分) */ +// static const struct StateTransition { +// enum TokenType tok; // 触发token +// StateHandler handler; // 处理函数 +// enum TypeParseState next_state; // 下一个状态 +// } state_table[][8] = { +// [TPS_QUALIFIER] = { +// {TOKEN_CONST, handle_qualifier, TPS_QUALIFIER}, +// {TOKEN_VOLATILE, handle_qualifier, TPS_QUALIFIER}, +// {TOKEN_VOID, handle_base_type, TPS_POINTER}, +// {TOKEN_CHAR, handle_base_type, TPS_POINTER}, +// {TOKEN_INT, handle_base_type, TPS_POINTER}, +// {TOKEN_EOF, handle_error, TPS_QUALIFIER}, +// /* 其他token默认处理 */ +// {0, NULL, TPS_BASE_TYPE} +// }, +// [TPS_BASE_TYPE] = { +// {TOKEN_MUL, handle_pointer, TPS_POINTER}, +// {TOKEN_L_BRACKET, handle_array, TPS_ARRAY}, +// {TOKEN_L_PAREN, handle_func_params,TPS_FUNC_PARAMS}, +// {TOKEN_EOF, NULL, TPS_END}, +// {0, NULL, TPS_POINTER} +// }, +// [TPS_POINTER] = { +// {TOKEN_MUL, handle_pointer, TPS_POINTER}, +// {TOKEN_L_BRACKET, handle_array, TPS_ARRAY}, +// {TOKEN_L_PAREN, handle_func_params,TPS_FUNC_PARAMS}, +// {0, NULL, TPS_END} +// }, +// [TPS_ARRAY] = { +// {TOKEN_L_BRACKET, handle_array, TPS_ARRAY}, +// {TOKEN_L_PAREN, handle_func_params,TPS_FUNC_PARAMS}, +// {0, NULL, TPS_END} +// }, +// [TPS_FUNC_PARAMS] = { +// {0, NULL, TPS_END} +// } +// }; + +// /* 新的类型解析函数 */ +// struct ASTNode* parse_type(struct Parser* p) { +// struct ASTNode* type_root = NULL; +// struct ASTNode** current = &type_root; +// enum TypeParseState state = TPS_QUALIFIER; + +// while (state != TPS_END) { +// enum TokenType t = peektoktype(p); +// const struct StateTransition* trans = state_table[state]; + +// // 查找匹配的转换规则 +// while (trans->tok != 0 && trans->tok != t) { +// trans++; +// } + +// if (trans->handler) { +// trans->handler(p, current); +// } else if (trans->tok == 0) { // 默认规则 +// state = trans->next_state; +// continue; +// } else { +// error("syntax error type parse error"); +// } + +// state = trans->next_state; +// } + +// return type_root; +// } + +// /* 具体状态处理函数实现 */ +// static void handle_qualifier(struct Parser* p, struct ASTNode** current) { +// struct ASTNode* node = new_ast_node(); +// node->node_type = NT_TYPE_QUAL; +// node->data.data_type = poptok(p).type; + +// if (*current) { +// (*current)->child.decl.type = node; +// } else { +// *current = node; +// } +// } + +// static void handle_base_type(struct Parser* p, struct ASTNode** current) { +// struct ASTNode* node = new_ast_node(); +// node->node_type = NT_TYPE_BASE; +// node->data.data_type = poptok(p).type; + +// // 链接到当前节点链的末端 +// while (*current && (*current)->child.decl.type) { +// current = &(*current)->child.decl.type; +// } + +// if (*current) { +// (*current)->child.decl.type = node; +// } else { +// *current = node; +// } +// } + +// static void handle_pointer(struct Parser* p, struct ASTNode** current) { +// poptok(p); // 吃掉* +// struct ASTNode* node = new_ast_node(); +// node->node_type = NT_TYPE_PTR; + +// // 插入到当前节点之前 +// node->child.decl.type = *current; +// *current = node; +// } + +// /* 其他处理函数类似实现... */ + +struct ASTNode* parser_ident_without_pop(struct Parser* parser) { + flushpeektok(parser); + struct Token* tok = peektok(parser); + if (tok->type != TOKEN_IDENT) { + error("syntax error: want identifier but got %d", tok->type); + } + struct ASTNode* node = new_ast_node(); + node->type = NT_TERM_IDENT; + node->syms.tok = *tok; + node->syms.decl_node = NULL; + return node; +} + +struct ASTNode* parse_ident(struct Parser* parser) { + struct ASTNode* node = parser_ident_without_pop(parser); + poptok(parser); + return node; +} + +struct ASTNode* parse_type(struct Parser* parser) { + flushpeektok(parser); + enum TokenType ttype = peektoktype(parser); + enum DataType dtype; + switch(ttype) { + case TOKEN_VOID: dtype = TYPE_VOID; break; + case TOKEN_CHAR: dtype = TYPE_CHAR; break; + case TOKEN_SHORT: dtype = TYPE_SHORT; break; + case TOKEN_INT: dtype = TYPE_INT; break; + case TOKEN_LONG: dtype = TYPE_LONG; break; + case TOKEN_FLOAT: dtype = TYPE_FLOAT; break; + case TOKEN_DOUBLE: dtype = TYPE_DOUBLE; break; + default: + error("无效的类型说明符"); + } + + struct ASTNode* node = new_ast_node(); + node->type = NT_TERM_TYPE; + // node->data.data_type = dtype; + poptok(parser); + + if (peektoktype(parser) == TOKEN_MUL) { + poptok(parser); + } + return node; +} diff --git a/ccompiler/frontend/parser/ast/type.c b/ccompiler/frontend/parser/ast/type.c new file mode 100644 index 0000000..6e10717 --- /dev/null +++ b/ccompiler/frontend/parser/ast/type.c @@ -0,0 +1,136 @@ +#include "../parser.h" +#include "../type.h" + +enum TypeParseState { + TPS_BASE_TYPE, // 解析基础类型 (int/char等) + TPS_QUALIFIER, // 解析限定符 (const/volatile) + TPS_POINTER, // 解析指针 (*) + TPS_ARRAY, // 解析数组维度 ([n]) + TPS_FUNC_PARAMS // 解析函数参数列表 +}; + +struct ASTNode* parse_type(struct Parser* p) { + struct ASTNode* type_root = new_ast_node(); + struct ASTNode* current = type_root; + current->type = NT_TYPE_BASE; + + enum TypeParseState state = TPS_QUALIFIER; + int pointer_level = 0; + + while (1) { + enum TokenType t = peektoktype(p); + + switch (state) { + // 基础类型解析 (int, char等) + case TPS_BASE_TYPE: + if (is_base_type(t)) { + // current->data.data_type = token_to_datatype(t); + poptok(p); + state = TPS_POINTER; + } else { + error("Expected type specifier"); + } + break; + + // 类型限定符 (const/volatile) + case TPS_QUALIFIER: + if (t == TOKEN_CONST || t == TOKEN_VOLATILE) { + struct ASTNode* qual_node = new_ast_node(); + qual_node->type = NT_TYPE_QUAL; + qual_node->data.data_type = t; // 复用data_type字段存储限定符 + current->child.decl.type = qual_node; + current = qual_node; + poptok(p); + } else { + state = TPS_BASE_TYPE; + } + break; + + // 指针解析 (*) + case TPS_POINTER: + if (t == TOKEN_MUL) { + struct ASTNode* ptr_node = new_ast_node(); + ptr_node->type = NT_TYPE_PTR; + current->child.decl.type = ptr_node; + current = ptr_node; + pointer_level++; + poptok(p); + } else { + state = TPS_ARRAY; + } + break; + + // 数组维度 ([n]) + case TPS_ARRAY: + if (t == TOKEN_L_BRACKET) { + poptok(p); // 吃掉[ + struct ASTNode* arr_node = new_ast_node(); + arr_node->type = NT_TYPE_ARRAY; + + // 解析数组大小(仅语法检查) + if (peektoktype(p) != TOKEN_R_BRACKET) { + parse_expr(p); // 不计算实际值 + } + + expecttok(p, TOKEN_R_BRACKET); + current->child.decl.type = arr_node; + current = arr_node; + } else { + state = TPS_FUNC_PARAMS; + } + break; + + // 函数参数列表 + case TPS_FUNC_PARAMS: + if (t == TOKEN_L_PAREN) { + struct ASTNode* func_node = new_ast_node(); + func_node->type = NT_TYPE_FUNC; + current->child.decl.type = func_node; + + // 解析参数列表(仅结构,不验证类型) + parse_param_list(p, func_node); + current = func_node; + } else { + return type_root; // 类型解析结束 + } + break; + } + } +} + +// 判断是否是基础类型 +static int is_base_type(enum TokenType t) { + return t >= TOKEN_VOID && t <= TOKEN_DOUBLE; +} + +// // 转换token到数据类型(简化版) +// static enum DataType token_to_datatype(enum TokenType t) { +// static enum DataType map[] = { +// [TOKEN_VOID] = DT_VOID, +// [TOKEN_CHAR] = DT_CHAR, +// [TOKEN_INT] = DT_INT, +// // ...其他类型映射 +// }; +// return map[t]; +// } + +// 解析参数列表(轻量级) +static void parse_param_list(struct Parser* p, struct ASTNode* func) { + expecttok(p, TOKEN_L_PAREN); + + while (peektoktype(p) != TOKEN_R_PAREN) { + struct ASTNode* param = parse_type(p); // 递归解析类型 + + // 允许可选参数名(仅语法检查) + if (peektoktype(p) == TOKEN_IDENT) { + poptok(p); // 吃掉参数名 + } + + if (peektoktype(p) == TOKEN_COMMA) { + poptok(p); + } + } + + expecttok(p, TOKEN_R_PAREN); +} + diff --git a/ccompiler/frontend/parser/parser.c b/ccompiler/frontend/parser/parser.c new file mode 100644 index 0000000..d8cceb8 --- /dev/null +++ b/ccompiler/frontend/parser/parser.c @@ -0,0 +1,67 @@ +#include "parser.h" +#include "type.h" +#include "ast/ast.h" + +int poptok(struct Parser* parser) { + if (parser->size == 0) { + return -1; + } + int idx = parser->cur_idx; + parser->cur_idx = (idx + 1) % PARSER_MAX_TOKEN_QUEUE; + parser->size--; + return 0; +} + +void flushpeektok(struct Parser* parser) { + parser->peek_idx = parser->cur_idx; +} + +struct Token* peektok(struct Parser* parser) { + int idx = parser->peek_idx; + idx = (idx + 1) % PARSER_MAX_TOKEN_QUEUE; + if (parser->size >= PARSER_MAX_TOKEN_QUEUE) { + warn("peek maybe too deep"); + } + if (parser->peek_idx == parser->end_idx) { + if (parser->size == PARSER_MAX_TOKEN_QUEUE) { + // FIXME + error("buffer overflow"); + } + get_valid_token(parser->lexer, &(parser->TokenBuffer[idx])); + parser->size++; + parser->end_idx = idx; + } + + parser->peek_idx = idx; + return &(parser->TokenBuffer[idx]); +} + +enum TokenType peektoktype(struct Parser* parser) { + return peektok(parser)->type; +} + +void expecttok(struct Parser* parser, enum TokenType type) { + struct Token* tok = peektok(parser); + if (tok->type != type) { + error("expected tok: %s, got %s", get_token_name(type), get_token_name(tok->type)); + } else { + poptok(parser); + } +} + +void init_parser(struct Parser* parser, struct Lexer* lexer, struct SymbolTable* symtab) { + parser->cur_node = NULL; + parser->root = NULL; + + parser->cur_idx = 0; + parser->peek_idx = 0; + parser->end_idx = 0; + parser->size = 0; + parser->lexer = lexer; + parser->symtab = symtab; + // TODO +} + +void run_parser(struct Parser* parser) { + parse_prog(parser); +} diff --git a/ccompiler/frontend/parser/parser.h b/ccompiler/frontend/parser/parser.h new file mode 100644 index 0000000..b409393 --- /dev/null +++ b/ccompiler/frontend/parser/parser.h @@ -0,0 +1,33 @@ +#ifndef __PARSER_H__ +#define __PARSER_H__ + +#include "../frontend.h" +#include "../lexer/lexer.h" +// #include "symbol_table/symtab.h" +// #include "ast/ast.h" + +#define PARSER_MAX_TOKEN_QUEUE 16 + +struct Parser { + struct ASTNode* root; + struct ASTNode* cur_node; + + struct Lexer* lexer; + struct SymbolTable* symtab; + int cur_idx; + int peek_idx; + int end_idx; + int size; + struct Token TokenBuffer[PARSER_MAX_TOKEN_QUEUE]; + int err_level; +}; + +void init_parser(struct Parser* parser, struct Lexer* lexer, struct SymbolTable* symtab); +void run_parser(struct Parser* parser); +void flushpeektok(struct Parser* parser); +int poptok(struct Parser* parser); +struct Token* peektok(struct Parser* parser); +enum TokenType peektoktype(struct Parser* parser); +void expecttok(struct Parser* parser, enum TokenType type); + +#endif diff --git a/ccompiler/frontend/parser/symtab/hashmap.c b/ccompiler/frontend/parser/symtab/hashmap.c new file mode 100644 index 0000000..d45593b --- /dev/null +++ b/ccompiler/frontend/parser/symtab/hashmap.c @@ -0,0 +1,53 @@ +// hashmap.c +#include "hashmap.h" +#include +#include + +// DJB2哈希算法 +static unsigned long hash(const char* str) { + unsigned long hash = 5381; + int c; + while ((c = *str++)) + hash = ((hash << 5) + hash) + c; /* hash * 33 + c */ + return hash % HMAP_SIZE; +} + +void hmap_init(HashMap* map) { + memset(map->buckets, 0, sizeof(map->buckets)); +} + +void hmap_put(HashMap* map, const char* key, void* value) { + unsigned long idx = hash(key); + HashMapEntry* entry = malloc(sizeof(HashMapEntry)); + entry->key = strdup(key); + entry->value = value; + entry->next = map->buckets[idx]; + map->buckets[idx] = entry; +} + +void* hmap_get(HashMap* map, const char* key) { + unsigned long idx = hash(key); + HashMapEntry* entry = map->buckets[idx]; + while (entry) { + if (strcmp(entry->key, key) == 0) + return entry->value; + entry = entry->next; + } + return NULL; +} + +int hmap_contains(HashMap* map, const char* key) { + return hmap_get(map, key) != NULL; +} + +void hmap_destroy(HashMap* map) { + for (int i = 0; i < HMAP_SIZE; i++) { + HashMapEntry* entry = map->buckets[i]; + while (entry) { + HashMapEntry* next = entry->next; + free(entry->key); + free(entry); + entry = next; + } + } +} diff --git a/ccompiler/frontend/parser/symtab/hashmap.h b/ccompiler/frontend/parser/symtab/hashmap.h new file mode 100644 index 0000000..b680b43 --- /dev/null +++ b/ccompiler/frontend/parser/symtab/hashmap.h @@ -0,0 +1,31 @@ +#ifndef HASHMAP_H +#define HASHMAP_H + +#define HMAP_SIZE 64 + +typedef struct HashMapEntry { + char* key; + void* value; + struct HashMapEntry* next; +} HashMapEntry; + +typedef struct { + HashMapEntry* buckets[HMAP_SIZE]; +} HashMap; + +// 初始化哈希表 +void hmap_init(HashMap* map); + +// 插入键值对 +void hmap_put(HashMap* map, const char* key, void* value); + +// 查找键值 +void* hmap_get(HashMap* map, const char* key); + +// 检查键是否存在 +int hmap_contains(HashMap* map, const char* key); + +// 释放哈希表内存(不释放value) +void hmap_destroy(HashMap* map); + +#endif \ No newline at end of file diff --git a/ccompiler/frontend/parser/symtab/scope.c b/ccompiler/frontend/parser/symtab/scope.c new file mode 100644 index 0000000..1f1fcc5 --- /dev/null +++ b/ccompiler/frontend/parser/symtab/scope.c @@ -0,0 +1,43 @@ +// scope.c +#include "scope.h" +#include +#include + +typedef struct Scope Scope; + +Scope* scope_create(Scope* parent) { + Scope* scope = malloc(sizeof(Scope)); + hmap_init(&scope->symbols); + scope->parent = parent; + scope->base_offset = 0; + scope->cur_offset = 0; + return scope; +} + +void scope_destroy(Scope* scope) { + hmap_destroy(&scope->symbols); + free(scope); +} + +void scope_insert(Scope* scope, const char* name, void* symbol) { + if (hmap_contains(&scope->symbols, name)) { + // 处理重复定义错误 + fprintf(stderr, "Error: Symbol '%s' already defined\n", name); + exit(EXIT_FAILURE); + } + hmap_put(&scope->symbols, name, symbol); +} + +void* scope_lookup(Scope* scope, const char* name) { + void* symbol = NULL; + while (scope) { + symbol = hmap_get(&scope->symbols, name); + if (symbol) break; + scope = scope->parent; + } + return symbol; +} + +void* scope_lookup_current(Scope* scope, const char* name) { + return hmap_get(&scope->symbols, name); +} diff --git a/ccompiler/frontend/parser/symtab/scope.h b/ccompiler/frontend/parser/symtab/scope.h new file mode 100644 index 0000000..718e9f9 --- /dev/null +++ b/ccompiler/frontend/parser/symtab/scope.h @@ -0,0 +1,28 @@ +#ifndef SCOPE_H +#define SCOPE_H + +#include "hashmap.h" + +struct Scope { + HashMap symbols; // 当前作用域符号表 + struct Scope* parent; // 上层作用域 + int base_offset; + int cur_offset; +}; + +// 创建新作用域(父作用域可为NULL) +struct Scope* scope_create(struct Scope* parent); + +// 销毁作用域 +void scope_destroy(struct Scope* scope); + +// 在当前作用域插入符号 +void scope_insert(struct Scope* scope, const char* name, void* symbol); + +// 逐级查找符号 +void* scope_lookup(struct Scope* scope, const char* name); + +// 仅在当前作用域查找 +void* scope_lookup_current(struct Scope* scope, const char* name); + +#endif diff --git a/ccompiler/frontend/parser/symtab/symtab.c b/ccompiler/frontend/parser/symtab/symtab.c new file mode 100644 index 0000000..026632e --- /dev/null +++ b/ccompiler/frontend/parser/symtab/symtab.c @@ -0,0 +1,45 @@ +// symtab.c +#include "../../frontend.h" +#include "scope.h" +#include "symtab.h" + +typedef struct SymbolTable SymbolTable; +typedef struct Scope Scope; + +void init_symtab(SymbolTable* symtab) { + symtab->global_scope = scope_create(NULL); + symtab->cur_scope = symtab->global_scope; +} + +void del_symtab(SymbolTable* symtab) { + scope_destroy(symtab->global_scope); +} + +void symtab_enter_scope(SymbolTable* symtab) { + struct Scope* scope = scope_create(symtab->cur_scope); + scope->base_offset = symtab->cur_scope->base_offset + symtab->cur_scope->cur_offset; + symtab->cur_scope = scope; +} + +void symtab_leave_scope(SymbolTable* symtab) { + Scope * scope = symtab->cur_scope; + if (scope == NULL) { + error("cannot leave NULL scope or global scope"); + } + symtab->cur_scope = symtab->cur_scope->parent; + scope_destroy(scope); +} + +void symtab_add_symbol(SymbolTable* symtab, const char* name, void* ast_node) { + struct Scope* scope = symtab->cur_scope; + if (scope_lookup_current(scope, name) != NULL) { + // TODO WARNING + // return NULL; + } + + scope_insert(scope, name, ast_node); +} + +void* symtab_lookup_symbol(SymbolTable* symtab, const char* name) { + return scope_lookup(symtab->cur_scope, name); +} diff --git a/ccompiler/frontend/parser/symtab/symtab.h b/ccompiler/frontend/parser/symtab/symtab.h new file mode 100644 index 0000000..8f0a6a6 --- /dev/null +++ b/ccompiler/frontend/parser/symtab/symtab.h @@ -0,0 +1,18 @@ +// symtab.h +#ifndef __SYMTAB_H__ +#define __SYMTAB_H__ + +struct SymbolTable { + struct Scope* cur_scope; + struct Scope* global_scope; +}; + +void init_symtab(struct SymbolTable* symtab); +void del_symtab(struct SymbolTable* symtab); + +void symtab_enter_scope(struct SymbolTable* symtab); +void symtab_leave_scope(struct SymbolTable* symtab); +void symtab_add_symbol(struct SymbolTable* symtab, const char* name, void* ast_node); +void* symtab_lookup_symbol(struct SymbolTable* symtab, const char* name); + +#endif diff --git a/ccompiler/frontend/parser/tests/test.c b/ccompiler/frontend/parser/tests/test.c new file mode 100644 index 0000000..16746bf --- /dev/null +++ b/ccompiler/frontend/parser/tests/test.c @@ -0,0 +1,4 @@ +extern int _print_str(const char* str); +int main(void) { + _print_str("Hello, world!\n"); +} diff --git a/ccompiler/frontend/parser/tests/test_file.c b/ccompiler/frontend/parser/tests/test_file.c new file mode 100644 index 0000000..edf71fc --- /dev/null +++ b/ccompiler/frontend/parser/tests/test_file.c @@ -0,0 +1,14 @@ + +// int __print_str(char* str); +int f(void); + +int main(void) { + int a; + // f(); + // a = 1 + 2 * 3 + 4; + // __print_str("Hello, world!\n"); + a = 3 - f() * (3 + 2) % 6; + // 测试用例: + // if (a) if (2) 3; else b; + // 是否正确解析为 if (a) { if (b) c else d } +} diff --git a/ccompiler/frontend/parser/tests/test_parser.c b/ccompiler/frontend/parser/tests/test_parser.c new file mode 100644 index 0000000..adfd084 --- /dev/null +++ b/ccompiler/frontend/parser/tests/test_parser.c @@ -0,0 +1,34 @@ +#include "../parser.h" +#include "../ast/ast.h" +#include "../symtab/symtab.h" +#include + +// gcc -g ../parser.c ../../lexer/lexer.c ../ast/ast.c ../ast/block.c ../ast/decl.c ../ast/expr.c ../ast/func.c ../ast/program.c ../ast/stmt.c ../ast/term.c ../symtab/hashmap.c ../symtab/scope.c ../symtab/symtab.c test_parser.c -o test_parser +// gcc -g test_parser.c -L../.. -lfrontend -o test_parser +int main(int argc, char** argv) { + const char* file_name = "test_file.c"; + if (argc == 2) { + file_name = argv[1]; + } + FILE* fp = fopen(file_name, "r"); + if (fp == NULL) { + perror("open file failed"); + return 1; + } + printf("open file success\n"); + + struct Lexer lexer; + init_lexer(&lexer, file_name, fp, (lexer_sread_fn)fread_s); + + struct SymbolTable symtab; + init_symtab(&symtab); + + struct Parser parser; + init_parser(&parser, &lexer, &symtab); + parse_prog(&parser); + + printf("parse_end\n"); + pnt_ast(parser.root, 0); + + return 0; +} \ No newline at end of file diff --git a/ccompiler/frontend/parser/type.h b/ccompiler/frontend/parser/type.h new file mode 100644 index 0000000..7c22572 --- /dev/null +++ b/ccompiler/frontend/parser/type.h @@ -0,0 +1,35 @@ +#ifndef __TYPE_H__ +#define __TYPE_H__ + +#include "../lexer/token.h" + +enum DataType { + TYPE_VOID, + TYPE_CHAR, + TYPE_SHORT, + TYPE_INT, + TYPE_LONG, + TYPE_LONG_LONG, + TYPE_FLOAT, + TYPE_DOUBLE, + TYPE_LONG_DOUBLE, + + // prefix + TYPE_SIGNED, + TYPE_UNSIGNED, + // TYPE_BOOL, + // TYPE_COMPLEX, + // TYPE_IMAGINARY, + + TYPE_ENUM, + TYPE_ARRAY, + TYPE_STRUCT, + TYPE_UNION, + TYPE_FUNCTION, + TYPE_POINTER, + TYPE_ATOMIC, + + TYPE_TYPEDEF, +}; + +#endif diff --git a/ccompiler/middleend/ir.c b/ccompiler/middleend/ir.c new file mode 100644 index 0000000..98be859 --- /dev/null +++ b/ccompiler/middleend/ir.c @@ -0,0 +1,299 @@ +#include "ir.h" +#include "../frontend/frontend.h" + +typedef struct ASTNode ASTNode; + +// 上下文结构,记录生成过程中的状态 +typedef struct { + ir_func_t* current_func; // 当前处理的函数 + ir_bblock_t* current_block; // 当前基本块 + uint32_t vreg_counter; // 虚拟寄存器计数器 +} IRGenContext; +IRGenContext ctx; +ir_prog_t prog; +ir_type_t type_i32 = { + .tag = IR_TYPE_INT32, +}; +static inline void init_ir_node_t(ir_node_t* node) { + vector_init(node->used_by); +} + +static inline ir_node_t* new_ir_node_t() { + ir_node_t* node = xmalloc(sizeof(ir_node_t)); + init_ir_node_t(node); +} + +ir_node_t* emit_instr(ir_bblock_t* block) { + if (block == NULL) block = ctx.current_block; + ir_node_t *node = new_ir_node_t(); + vector_push(block->instrs, node); + return vector_at(block->instrs, block->instrs.size - 1); +} + +void emit_br(ir_node_t cond, const char* true_lable, const char* false_lable) { + ir_node_t br = { + .tag = IR_NODE_RET, + .data = { + } + }; + // emit_instr(br, NULL); +} + +ir_node_t* gen_ir_expr(ASTNode* node) { + switch (node->type) { + case NT_TERM_VAL: { + ir_node_t* ir = new_ir_node_t(); + *ir = (ir_node_t) { + .tag = IR_NODE_CONST_INT, + .data.const_int = { + .val = node->syms.tok.constant.i, + }, + }; + return ir; + } + case NT_TERM_IDENT: { + ir_node_t* decl = node->syms.decl_node->decl_val.data; + return decl; + } + case NT_TERM_CALL: { + // TODO + ir_node_t* ir = new_ir_node_t(); + *ir = (ir_node_t) { + .tag = IR_NODE_CALL, + .data.call = { + .callee = NULL, + }, + }; + vector_init(ir->data.call.args); + return ir; + } + default: + goto NEXT; + } + return NULL; +NEXT: + ir_node_t* lhs = gen_ir_expr(node->expr.left); + ir_node_t* rhs = node->expr.right ? gen_ir_expr(node->expr.right) : NULL; + + if (node->type == NT_COMMA) { + return rhs; + } + + ir_node_t* instr = emit_instr(NULL); + vector_push(lhs->used_by, instr); + if (rhs) { vector_push(rhs->used_by, instr); } + + ir_node_t* ret; +#define BINOP(operand) do { \ + *instr = (ir_node_t){ \ + .tag = IR_NODE_OP, \ + .data.op = { \ + .op = operand, \ + .lhs = lhs, \ + .rhs = rhs, \ + }, \ + }; \ + ret = instr; \ + } while (0) + + switch (node->type) { + case NT_ADD :// (expr) + (expr) + BINOP(IR_OP_ADD); + break; + case NT_SUB :// (expr) - (expr) + BINOP(IR_OP_SUB); + break; + case NT_MUL :// (expr) * (expr) + BINOP(IR_OP_MUL); + break; + case NT_DIV :// (expr) / (expr) + BINOP(IR_OP_DIV); + break; + case NT_MOD :// (expr) % (expr) + BINOP(IR_OP_MOD); + break; + case NT_AND :// (expr) & (expr) + BINOP(IR_OP_AND); + break; + case NT_OR :// (expr) | (expr) + BINOP(IR_OP_OR); + break; + case NT_XOR :// (expr) ^ (expr) + BINOP(IR_OP_XOR); + break; + case NT_BIT_NOT :// ~ (expr) + // TODO + // BINOP(IR_OP_NOT); + break; + case NT_L_SH :// (expr) << (expr) + BINOP(IR_OP_SHL); + break; + case NT_R_SH :// (expr) >> (expr) + BINOP(IR_OP_SHR); // Shift right logical. + // TODO + // BINOP(IR_OP_SAR); // Shift right arithmetic. + break; + case NT_EQ :// (expr) == (expr) + BINOP(IR_OP_EQ); + break; + case NT_NEQ :// (expr) != (expr) + BINOP(IR_OP_NEQ); + break; + case NT_LE :// (expr) <= (expr) + BINOP(IR_OP_LE); + break; + case NT_GE :// (expr) >= (expr) + BINOP(IR_OP_GE); + break; + case NT_LT :// (expr) < (expr) + BINOP(IR_OP_LT); + break; + case NT_GT :// (expr) > (expr) + BINOP(IR_OP_GE); + break; + case NT_AND_AND :// (expr) && (expr) + break; + case NT_OR_OR :// (expr) || (expr) + break; + case NT_NOT :// ! (expr) + ir_node_t* zero = xmalloc(sizeof(ir_node_t)); + *zero = (ir_node_t){ + .tag = IR_NODE_CONST_INT, + .data.const_int = { + .val = 0, + }, + }; + *instr = (ir_node_t){ + .tag = IR_NODE_OP, + .data.op = { + .op = IR_OP_EQ, + .lhs = zero, + .rhs = lhs, + }, + }; + ret = instr; + break; + case NT_ASSIGN :// (expr) = (expr) + *instr = (ir_node_t){ + .tag = IR_NODE_STORE, + .data.store = { + .target = lhs, + .value = rhs, + }, + }; + ret = rhs; + break; + // case NT_COND : // (expr) ? (expr) : (expr) + default: + // TODO self error msg + error("Unsupported IR generation for AST node type %d", node->type); + break; + } + return ret; +} + +void gen_ir_from_ast(struct ASTNode* node) { + switch (node->type) { + case NT_ROOT: { + for (int i = 0; i < node->root.child_size; i ++) { + gen_ir_from_ast(node->root.children[i]); + } + } break; + case NT_FUNC: { + ir_func_t *func = xmalloc(sizeof(ir_func_t)); + *func = (ir_func_t) { + .name = node->func.name->syms.tok.constant.str, + }; + vector_init(func->bblocks); + + ir_bblock_t *entry = xmalloc(sizeof(ir_bblock_t)); + *entry = (ir_bblock_t) { + .label = "entry", + }; + vector_init(entry->instrs); + vector_push(func->bblocks, entry); + + IRGenContext prev_ctx = ctx; + ctx = (IRGenContext) { + .current_func = func, + .current_block = vector_at(func->bblocks, 0), + .vreg_counter = 0, + }; + + gen_ir_from_ast(node->func.body); + + ctx = prev_ctx; + vector_push(prog.funcs, func); + } break; + case NT_STMT_RETURN: { + ir_node_t* ret = gen_ir_expr(node->return_stmt.expr_stmt); + ir_node_t* ir = emit_instr(NULL); + *ir = (ir_node_t) { + .tag = IR_NODE_RET, + .data = { + .ret = { + .ret_val = ret, + } + } + }; + break; + } + case NT_BLOCK: { + for (int i = 0; i < node->block.child_size; i ++) { + gen_ir_from_ast(node->block.children[i]); + } + break; + } + case NT_STMT_IF: { + ir_node_t *cond = gen_ir_expr(node->if_stmt.cond); + + // xmalloc(); + // ir_bblock_t then_block = { + // }; + node->if_stmt.if_stmt; + node->if_stmt.else_stmt; + break; + } + case NT_STMT_WHILE: { + node->while_stmt.cond; + node->while_stmt.body; + break; + } + case NT_STMT_DOWHILE: { + node->do_while_stmt.cond; + node->do_while_stmt.body; + break; + } + case NT_STMT_FOR: { + node->for_stmt.init; + node->for_stmt.cond; + node->for_stmt.iter; + node->for_stmt.body; + break; + } + case NT_DECL_VAR: { + ir_node_t* ret_node = emit_instr(NULL); + *ret_node = (ir_node_t) { + .tag = IR_NODE_ALLOC, + .name = node->decl_val.name->syms.tok.constant.str, + .type = &type_i32, + }; + node->decl_val.data = ret_node; + if (node->decl_val.expr_stmt != NULL) { + gen_ir_from_ast(node->decl_val.expr_stmt); + } + break; + } + case NT_STMT_EXPR: { + gen_ir_expr(node->expr_stmt.expr_stmt); + break; + } + case NT_STMT_EMPTY: { + break; + } + default: + // TODO: 错误处理 + error("unknown node type"); + break; + } +} diff --git a/ccompiler/middleend/ir.h b/ccompiler/middleend/ir.h new file mode 100644 index 0000000..0c96308 --- /dev/null +++ b/ccompiler/middleend/ir.h @@ -0,0 +1,155 @@ +// ir_core.h +#ifndef IR_CORE_H +#define IR_CORE_H + +#include "../../libcore/vector.h" +#include +#include + +// 错误码定义 +typedef enum { + IR_EC_SUCCESS = 0, // 成功 + IR_EC_MEMORY_ERROR, // 内存分配失败 + IR_EC_TYPE_MISMATCH, // 类型不匹配 + IR_EC_INVALID_OPERAND, // 无效操作数 + IR_EC_DUPLICATE_SYMBOL, // 符号重定义 +} ir_ecode_t; + +typedef struct { + enum { + IR_TYPE_INT32, + IR_TYPE_PTR, + IR_TYPE_ARRAY, + IR_TYPE_FUNC, + IR_TYPE_VOID, + } tag; + union { + struct { + struct ir_type *base; + size_t len; + } arr; + struct { + struct ir_type *ret; + struct ir_type **params; + size_t param_cnt; + } func; + }; +} ir_type_t; + +typedef struct ir_node ir_node_t; + +typedef struct ir_bblock { + const char *label; + vector_header(instrs, ir_node_t*); + // ir_arr_t used_by; +} ir_bblock_t; // basic block + +typedef struct { + const char *name; + ir_type_t *type; + vector_header(params, ir_node_t*); + vector_header(bblocks, ir_bblock_t*); +} ir_func_t; + +typedef struct { + vector_header(global, ir_node_t*); + vector_header(funcs, ir_func_t*); +} ir_prog_t; + +struct ir_node { + const ir_type_t* type; + const char* name; + vector_header(used_by, ir_node_t*); + enum { + IR_NODE_CONST_INT, + IR_NODE_ALLOC, + IR_NODE_LOAD, + IR_NODE_STORE, + IR_NODE_GET_PTR, + IR_NODE_OP, + IR_NODE_BRANCH, + IR_NODE_JUMP, + IR_NODE_CALL, + IR_NODE_RET, + } tag; + union { + struct { + int32_t val; + } const_int; + struct { + ir_node_t* target; + } load; + struct { + ir_node_t* target; + ir_node_t* value; + } store; + struct { + ir_node_t* src_addr; + ir_node_t* offset; + } get_ptr; + struct { + enum { + /// Not equal to. + IR_OP_NEQ, + /// Equal to. + IR_OP_EQ, + /// Greater than. + IR_OP_GT, + /// Less than. + IR_OP_LT, + /// Greater than or equal to. + IR_OP_GE, + /// Less than or equal to. + IR_OP_LE, + /// Addition. + IR_OP_ADD, + /// Subtraction. + IR_OP_SUB, + /// Multiplication. + IR_OP_MUL, + /// Division. + IR_OP_DIV, + /// Modulo. + IR_OP_MOD, + /// Bitwise AND. + IR_OP_AND, + /// Bitwise OR. + IR_OP_OR, + /// Bitwise XOR. + IR_OP_XOR, + /// Bitwise NOT. + IR_OP_NOT, + /// Shift left logical. + IR_OP_SHL, + /// Shift right logical. + IR_OP_SHR, + /// Shift right arithmetic. + IR_OP_SAR, + } op; + ir_node_t* lhs; + ir_node_t* rhs; + } op; + struct { + ir_node_t* cond; + ir_bblock_t true_bblock; + ir_bblock_t false_bblock; + } branch; + struct { + ir_bblock_t target_bblock; + } jump; + struct { + ir_func_t callee; + vector_header(args, ir_node_t); + } call; + struct { + ir_node_t* ret_val; + } ret; + } data; +}; + +extern ir_prog_t prog; +struct ASTNode; +void gen_ir_from_ast(struct ASTNode* node); + + +#endif // IR_CORE_H diff --git a/ccompiler/middleend/reg_alloc.c b/ccompiler/middleend/reg_alloc.c new file mode 100644 index 0000000..e69de29 diff --git a/ccompiler/middleend/reg_alloc.h b/ccompiler/middleend/reg_alloc.h new file mode 100644 index 0000000..560a35c --- /dev/null +++ b/ccompiler/middleend/reg_alloc.h @@ -0,0 +1,8 @@ +#ifndef __REG_ALLOC_H__ +#define __REG_ALLOC_H__ + +typedef struct { + +} reg_alloc_t; + +#endif \ No newline at end of file diff --git a/ccompiler/middleend/tests/Makefile b/ccompiler/middleend/tests/Makefile new file mode 100644 index 0000000..604bad7 --- /dev/null +++ b/ccompiler/middleend/tests/Makefile @@ -0,0 +1,8 @@ +all: test_ir + + +test_ir: frontend + gcc -g ../ir.c test_ir.c -L../../frontend -lfrontend -o test_ir + +frontend: + make -C ../../frontend diff --git a/ccompiler/middleend/tests/test_file.c b/ccompiler/middleend/tests/test_file.c new file mode 100644 index 0000000..1886a0a --- /dev/null +++ b/ccompiler/middleend/tests/test_file.c @@ -0,0 +1,5 @@ +int main(void) { + int a; + a = 1 + 2 * 3; + return a; +} diff --git a/ccompiler/middleend/tests/test_ir.c b/ccompiler/middleend/tests/test_ir.c new file mode 100644 index 0000000..926e474 --- /dev/null +++ b/ccompiler/middleend/tests/test_ir.c @@ -0,0 +1,18 @@ +#include "../ir.h" +#include "../../frontend/frontend.h" + +int main(int argc, const char** argv) { + const char* file_name = "test_file.c"; + if (argc == 2) { + file_name = argv[1]; + } + FILE* fp = fopen(file_name, "r"); + if (fp == NULL) { + perror("open file failed"); + return 1; + } + printf("open file success\n"); + struct ASTNode* root = frontend("test.c", fp, (sread_fn)fread_s); + gen_ir_from_ast(root); + return 0; +} diff --git a/libcore/acutest.h b/libcore/acutest.h new file mode 100644 index 0000000..5f9cb19 --- /dev/null +++ b/libcore/acutest.h @@ -0,0 +1,1994 @@ +/* + * Acutest -- Another C/C++ Unit Test facility + * + * + * Copyright 2013-2023 Martin Mitáš + * Copyright 2019 Garrett D'Amore + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef ACUTEST_H +#define ACUTEST_H + + +/* Try to auto-detect whether we need to disable C++ exception handling. + * If the detection fails, you may always define TEST_NO_EXCEPTIONS before + * including "acutest.h" manually. */ +#ifdef __cplusplus + #if (__cplusplus >= 199711L && !defined __cpp_exceptions) || \ + ((defined(__GNUC__) || defined(__clang__)) && !defined __EXCEPTIONS) + #ifndef TEST_NO_EXCEPTIONS + #define TEST_NO_EXCEPTIONS + #endif + #endif +#endif + + +/************************ + *** Public interface *** + ************************/ + +/* By default, "acutest.h" provides the main program entry point (function + * main()). However, if the test suite is composed of multiple source files + * which include "acutest.h", then this causes a problem of multiple main() + * definitions. To avoid this problem, #define macro TEST_NO_MAIN in all + * compilation units but one. + */ + +/* Macro to specify list of unit tests in the suite. + * The unit test implementation MUST provide list of unit tests it implements + * with this macro: + * + * TEST_LIST = { + * { "test1_name", test1_func_ptr }, + * { "test2_name", test2_func_ptr }, + * ... + * { NULL, NULL } // zeroed record marking the end of the list + * }; + * + * The list specifies names of each test (must be unique) and pointer to + * a function implementing it. The function does not take any arguments + * and has no return values, i.e. every test function has to be compatible + * with this prototype: + * + * void test_func(void); + * + * Note the list has to be ended with a zeroed record. + */ +#define TEST_LIST const struct acutest_test_ acutest_list_[] + + +/* Macros for testing whether an unit test succeeds or fails. These macros + * can be used arbitrarily in functions implementing the unit tests. + * + * If any condition fails throughout execution of a test, the test fails. + * + * TEST_CHECK takes only one argument (the condition), TEST_CHECK_ allows + * also to specify an error message to print out if the condition fails. + * (It expects printf-like format string and its parameters). The macros + * return non-zero (condition passes) or 0 (condition fails). + * + * That can be useful when more conditions should be checked only if some + * preceding condition passes, as illustrated in this code snippet: + * + * SomeStruct* ptr = allocate_some_struct(); + * if(TEST_CHECK(ptr != NULL)) { + * TEST_CHECK(ptr->member1 < 100); + * TEST_CHECK(ptr->member2 > 200); + * } + */ +#define TEST_CHECK_(cond,...) \ + acutest_check_(!!(cond), __FILE__, __LINE__, __VA_ARGS__) +#define TEST_CHECK(cond) \ + acutest_check_(!!(cond), __FILE__, __LINE__, "%s", #cond) + + +/* These macros are the same as TEST_CHECK_ and TEST_CHECK except that if the + * condition fails, the currently executed unit test is immediately aborted. + * + * That is done either by calling abort() if the unit test is executed as a + * child process; or via longjmp() if the unit test is executed within the + * main Acutest process. + * + * As a side effect of such abortion, your unit tests may cause memory leaks, + * unflushed file descriptors, and other phenomena caused by the abortion. + * + * Therefore you should not use these as a general replacement for TEST_CHECK. + * Use it with some caution, especially if your test causes some other side + * effects to the outside world (e.g. communicating with some server, inserting + * into a database etc.). + */ +#define TEST_ASSERT_(cond,...) \ + do { \ + if(!acutest_check_(!!(cond), __FILE__, __LINE__, __VA_ARGS__)) \ + acutest_abort_(); \ + } while(0) +#define TEST_ASSERT(cond) \ + do { \ + if(!acutest_check_(!!(cond), __FILE__, __LINE__, "%s", #cond)) \ + acutest_abort_(); \ + } while(0) + + +#ifdef __cplusplus +#ifndef TEST_NO_EXCEPTIONS +/* Macros to verify that the code (the 1st argument) throws exception of given + * type (the 2nd argument). (Note these macros are only available in C++.) + * + * TEST_EXCEPTION_ is like TEST_EXCEPTION but accepts custom printf-like + * message. + * + * For example: + * + * TEST_EXCEPTION(function_that_throw(), ExpectedExceptionType); + * + * If the function_that_throw() throws ExpectedExceptionType, the check passes. + * If the function throws anything incompatible with ExpectedExceptionType + * (or if it does not thrown an exception at all), the check fails. + */ +#define TEST_EXCEPTION(code, exctype) \ + do { \ + bool exc_ok_ = false; \ + const char *msg_ = NULL; \ + try { \ + code; \ + msg_ = "No exception thrown."; \ + } catch(exctype const&) { \ + exc_ok_= true; \ + } catch(...) { \ + msg_ = "Unexpected exception thrown."; \ + } \ + acutest_check_(exc_ok_, __FILE__, __LINE__, #code " throws " #exctype);\ + if(msg_ != NULL) \ + acutest_message_("%s", msg_); \ + } while(0) +#define TEST_EXCEPTION_(code, exctype, ...) \ + do { \ + bool exc_ok_ = false; \ + const char *msg_ = NULL; \ + try { \ + code; \ + msg_ = "No exception thrown."; \ + } catch(exctype const&) { \ + exc_ok_= true; \ + } catch(...) { \ + msg_ = "Unexpected exception thrown."; \ + } \ + acutest_check_(exc_ok_, __FILE__, __LINE__, __VA_ARGS__); \ + if(msg_ != NULL) \ + acutest_message_("%s", msg_); \ + } while(0) +#endif /* #ifndef TEST_NO_EXCEPTIONS */ +#endif /* #ifdef __cplusplus */ + + +/* Sometimes it is useful to split execution of more complex unit tests to some + * smaller parts and associate those parts with some names. + * + * This is especially handy if the given unit test is implemented as a loop + * over some vector of multiple testing inputs. Using these macros allow to use + * sort of subtitle for each iteration of the loop (e.g. outputting the input + * itself or a name associated to it), so that if any TEST_CHECK condition + * fails in the loop, it can be easily seen which iteration triggers the + * failure, without the need to manually output the iteration-specific data in + * every single TEST_CHECK inside the loop body. + * + * TEST_CASE allows to specify only single string as the name of the case, + * TEST_CASE_ provides all the power of printf-like string formatting. + * + * Note that the test cases cannot be nested. Starting a new test case ends + * implicitly the previous one. To end the test case explicitly (e.g. to end + * the last test case after exiting the loop), you may use TEST_CASE(NULL). + */ +#define TEST_CASE_(...) acutest_case_(__VA_ARGS__) +#define TEST_CASE(name) acutest_case_("%s", name) + + +/* Maximal output per TEST_CASE call. Longer messages are cut. + * You may define another limit prior including "acutest.h" + */ +#ifndef TEST_CASE_MAXSIZE + #define TEST_CASE_MAXSIZE 64 +#endif + + +/* printf-like macro for outputting an extra information about a failure. + * + * Intended use is to output some computed output versus the expected value, + * e.g. like this: + * + * if(!TEST_CHECK(produced == expected)) { + * TEST_MSG("Expected: %d", expected); + * TEST_MSG("Produced: %d", produced); + * } + * + * Note the message is only written down if the most recent use of any checking + * macro (like e.g. TEST_CHECK or TEST_EXCEPTION) in the current test failed. + * This means the above is equivalent to just this: + * + * TEST_CHECK(produced == expected); + * TEST_MSG("Expected: %d", expected); + * TEST_MSG("Produced: %d", produced); + * + * The macro can deal with multi-line output fairly well. It also automatically + * adds a final new-line if there is none present. + */ +#define TEST_MSG(...) acutest_message_(__VA_ARGS__) + + +/* Maximal output per TEST_MSG call. Longer messages are cut. + * You may define another limit prior including "acutest.h" + */ +#ifndef TEST_MSG_MAXSIZE + #define TEST_MSG_MAXSIZE 1024 +#endif + + +/* Macro for dumping a block of memory. + * + * Its intended use is very similar to what TEST_MSG is for, but instead of + * generating any printf-like message, this is for dumping raw block of a + * memory in a hexadecimal form: + * + * TEST_CHECK(size_produced == size_expected && + * memcmp(addr_produced, addr_expected, size_produced) == 0); + * TEST_DUMP("Expected:", addr_expected, size_expected); + * TEST_DUMP("Produced:", addr_produced, size_produced); + */ +#define TEST_DUMP(title, addr, size) acutest_dump_(title, addr, size) + +/* Maximal output per TEST_DUMP call (in bytes to dump). Longer blocks are cut. + * You may define another limit prior including "acutest.h" + */ +#ifndef TEST_DUMP_MAXSIZE + #define TEST_DUMP_MAXSIZE 1024 +#endif + + +/* Macros for marking the test as SKIPPED. + * Note it can only be used at the beginning of a test, before any other + * checking. + * + * Once used, the best practice is to return from the test routine as soon + * as possible. + */ +#define TEST_SKIP(...) acutest_skip_(__FILE__, __LINE__, __VA_ARGS__) + + +/* Common test initialisation/clean-up + * + * In some test suites, it may be needed to perform some sort of the same + * initialization and/or clean-up in all the tests. + * + * Such test suites may use macros TEST_INIT and/or TEST_FINI prior including + * this header. The expansion of the macro is then used as a body of helper + * function called just before executing every single (TEST_INIT) or just after + * it ends (TEST_FINI). + * + * Examples of various ways how to use the macro TEST_INIT: + * + * #define TEST_INIT my_init_func(); + * #define TEST_INIT my_init_func() // Works even without the semicolon + * #define TEST_INIT setlocale(LC_ALL, NULL); + * #define TEST_INIT { setlocale(LC_ALL, NULL); my_init_func(); } + * + * TEST_FINI is to be used in the same way. + */ + + +/********************** + *** Implementation *** + **********************/ + +/* The unit test files should not rely on anything below. */ + +#include + +/* Enable the use of the non-standard keyword __attribute__ to silence warnings under some compilers */ +#if defined(__GNUC__) || defined(__clang__) + #define ACUTEST_ATTRIBUTE_(attr) __attribute__((attr)) +#else + #define ACUTEST_ATTRIBUTE_(attr) +#endif + +#ifdef __cplusplus + extern "C" { +#endif + +enum acutest_state_ { + ACUTEST_STATE_INITIAL = -4, + ACUTEST_STATE_SELECTED = -3, + ACUTEST_STATE_NEEDTORUN = -2, + + /* By the end all tests should be in one of the following: */ + ACUTEST_STATE_EXCLUDED = -1, + ACUTEST_STATE_SUCCESS = 0, + ACUTEST_STATE_FAILED = 1, + ACUTEST_STATE_SKIPPED = 2 +}; + +int acutest_check_(int cond, const char* file, int line, const char* fmt, ...); +void acutest_case_(const char* fmt, ...); +void acutest_message_(const char* fmt, ...); +void acutest_dump_(const char* title, const void* addr, size_t size); +void acutest_abort_(void) ACUTEST_ATTRIBUTE_(noreturn); +#ifdef __cplusplus + } /* extern "C" */ +#endif + +#ifndef TEST_NO_MAIN + +#include +#include +#include +#include +#include + +#if defined(unix) || defined(__unix__) || defined(__unix) || defined(__APPLE__) + #define ACUTEST_UNIX_ 1 + #include + #include + #include + #include + #include + #include + #include + + #if defined CLOCK_PROCESS_CPUTIME_ID && defined CLOCK_MONOTONIC + #define ACUTEST_HAS_POSIX_TIMER_ 1 + #endif +#endif + +#if defined(_gnu_linux_) || defined(__linux__) + #define ACUTEST_LINUX_ 1 + #include + #include +#endif + +#if defined(_WIN32) || defined(__WIN32__) || defined(__WINDOWS__) + #define ACUTEST_WIN_ 1 + #include + #include +#endif + +#if defined(__APPLE__) + #define ACUTEST_MACOS_ + #include + #include + #include + #include + #include +#endif + +#ifdef __cplusplus +#ifndef TEST_NO_EXCEPTIONS + #include +#endif +#endif + +#ifdef __has_include + #if __has_include() + #include + #endif +#endif + +/* Note our global private identifiers end with '_' to mitigate risk of clash + * with the unit tests implementation. */ + +#ifdef __cplusplus + extern "C" { +#endif + +#ifdef _MSC_VER + /* In the multi-platform code like ours, we cannot use the non-standard + * "safe" functions from Microsoft C lib like e.g. sprintf_s() instead of + * standard sprintf(). Hence, lets disable the warning C4996. */ + #pragma warning(push) + #pragma warning(disable: 4996) +#endif + + +struct acutest_test_ { + const char* name; + void (*func)(void); +}; + +struct acutest_test_data_ { + enum acutest_state_ state; + double duration; +}; + + +extern const struct acutest_test_ acutest_list_[]; + + +static char* acutest_argv0_ = NULL; +static int acutest_list_size_ = 0; +static struct acutest_test_data_* acutest_test_data_ = NULL; +static int acutest_no_exec_ = -1; +static int acutest_no_summary_ = 0; +static int acutest_tap_ = 0; +static int acutest_exclude_mode_ = 0; +static int acutest_worker_ = 0; +static int acutest_worker_index_ = 0; +static int acutest_cond_failed_ = 0; +static FILE *acutest_xml_output_ = NULL; + +static const struct acutest_test_* acutest_current_test_ = NULL; +static int acutest_current_index_ = 0; +static char acutest_case_name_[TEST_CASE_MAXSIZE] = ""; +static int acutest_test_check_count_ = 0; +static int acutest_test_skip_count_ = 0; +static char acutest_test_skip_reason_[256] = ""; +static int acutest_test_already_logged_ = 0; +static int acutest_case_already_logged_ = 0; +static int acutest_verbose_level_ = 2; +static int acutest_test_failures_ = 0; +static int acutest_colorize_ = 0; +static int acutest_timer_ = 0; + +static int acutest_abort_has_jmp_buf_ = 0; +static jmp_buf acutest_abort_jmp_buf_; + +static int +acutest_count_(enum acutest_state_ state) +{ + int i, n; + + for(i = 0, n = 0; i < acutest_list_size_; i++) { + if(acutest_test_data_[i].state == state) + n++; + } + + return n; +} + +static void +acutest_cleanup_(void) +{ + free((void*) acutest_test_data_); +} + +static void ACUTEST_ATTRIBUTE_(noreturn) +acutest_exit_(int exit_code) +{ + acutest_cleanup_(); + exit(exit_code); +} + + +#if defined ACUTEST_WIN_ + typedef LARGE_INTEGER acutest_timer_type_; + static LARGE_INTEGER acutest_timer_freq_; + static acutest_timer_type_ acutest_timer_start_; + static acutest_timer_type_ acutest_timer_end_; + + static void + acutest_timer_init_(void) + { + QueryPerformanceFrequency(´st_timer_freq_); + } + + static void + acutest_timer_get_time_(LARGE_INTEGER* ts) + { + QueryPerformanceCounter(ts); + } + + static double + acutest_timer_diff_(LARGE_INTEGER start, LARGE_INTEGER end) + { + double duration = (double)(end.QuadPart - start.QuadPart); + duration /= (double)acutest_timer_freq_.QuadPart; + return duration; + } + + static void + acutest_timer_print_diff_(void) + { + printf("%.6lf secs", acutest_timer_diff_(acutest_timer_start_, acutest_timer_end_)); + } +#elif defined ACUTEST_HAS_POSIX_TIMER_ + static clockid_t acutest_timer_id_; + typedef struct timespec acutest_timer_type_; + static acutest_timer_type_ acutest_timer_start_; + static acutest_timer_type_ acutest_timer_end_; + + static void + acutest_timer_init_(void) + { + if(acutest_timer_ == 1) + acutest_timer_id_ = CLOCK_MONOTONIC; + else if(acutest_timer_ == 2) + acutest_timer_id_ = CLOCK_PROCESS_CPUTIME_ID; + } + + static void + acutest_timer_get_time_(struct timespec* ts) + { + clock_gettime(acutest_timer_id_, ts); + } + + static double + acutest_timer_diff_(struct timespec start, struct timespec end) + { + return (double)(end.tv_sec - start.tv_sec) + (double)(end.tv_nsec - start.tv_nsec) / 1e9; + } + + static void + acutest_timer_print_diff_(void) + { + printf("%.6lf secs", + acutest_timer_diff_(acutest_timer_start_, acutest_timer_end_)); + } +#else + typedef int acutest_timer_type_; + static acutest_timer_type_ acutest_timer_start_; + static acutest_timer_type_ acutest_timer_end_; + + void + acutest_timer_init_(void) + {} + + static void + acutest_timer_get_time_(int* ts) + { + (void) ts; + } + + static double + acutest_timer_diff_(int start, int end) + { + (void) start; + (void) end; + return 0.0; + } + + static void + acutest_timer_print_diff_(void) + {} +#endif + +#define ACUTEST_COLOR_DEFAULT_ 0 +#define ACUTEST_COLOR_RED_ 1 +#define ACUTEST_COLOR_GREEN_ 2 +#define ACUTEST_COLOR_YELLOW_ 3 +#define ACUTEST_COLOR_DEFAULT_INTENSIVE_ 10 +#define ACUTEST_COLOR_RED_INTENSIVE_ 11 +#define ACUTEST_COLOR_GREEN_INTENSIVE_ 12 +#define ACUTEST_COLOR_YELLOW_INTENSIVE_ 13 + +static int ACUTEST_ATTRIBUTE_(format (printf, 2, 3)) +acutest_colored_printf_(int color, const char* fmt, ...) +{ + va_list args; + char buffer[256]; + int n; + + va_start(args, fmt); + vsnprintf(buffer, sizeof(buffer), fmt, args); + va_end(args); + buffer[sizeof(buffer)-1] = '\0'; + + if(!acutest_colorize_) { + return printf("%s", buffer); + } + +#if defined ACUTEST_UNIX_ + { + const char* col_str; + switch(color) { + case ACUTEST_COLOR_RED_: col_str = "\033[0;31m"; break; + case ACUTEST_COLOR_GREEN_: col_str = "\033[0;32m"; break; + case ACUTEST_COLOR_YELLOW_: col_str = "\033[0;33m"; break; + case ACUTEST_COLOR_RED_INTENSIVE_: col_str = "\033[1;31m"; break; + case ACUTEST_COLOR_GREEN_INTENSIVE_: col_str = "\033[1;32m"; break; + case ACUTEST_COLOR_YELLOW_INTENSIVE_: col_str = "\033[1;33m"; break; + case ACUTEST_COLOR_DEFAULT_INTENSIVE_: col_str = "\033[1m"; break; + default: col_str = "\033[0m"; break; + } + printf("%s", col_str); + n = printf("%s", buffer); + printf("\033[0m"); + return n; + } +#elif defined ACUTEST_WIN_ + { + HANDLE h; + CONSOLE_SCREEN_BUFFER_INFO info; + WORD attr; + + h = GetStdHandle(STD_OUTPUT_HANDLE); + GetConsoleScreenBufferInfo(h, &info); + + switch(color) { + case ACUTEST_COLOR_RED_: attr = FOREGROUND_RED; break; + case ACUTEST_COLOR_GREEN_: attr = FOREGROUND_GREEN; break; + case ACUTEST_COLOR_YELLOW_: attr = FOREGROUND_RED | FOREGROUND_GREEN; break; + case ACUTEST_COLOR_RED_INTENSIVE_: attr = FOREGROUND_RED | FOREGROUND_INTENSITY; break; + case ACUTEST_COLOR_GREEN_INTENSIVE_: attr = FOREGROUND_GREEN | FOREGROUND_INTENSITY; break; + case ACUTEST_COLOR_DEFAULT_INTENSIVE_: attr = FOREGROUND_BLUE | FOREGROUND_GREEN | FOREGROUND_RED | FOREGROUND_INTENSITY; break; + case ACUTEST_COLOR_YELLOW_INTENSIVE_: attr = FOREGROUND_RED | FOREGROUND_GREEN | FOREGROUND_INTENSITY; break; + default: attr = 0; break; + } + if(attr != 0) + SetConsoleTextAttribute(h, attr); + n = printf("%s", buffer); + SetConsoleTextAttribute(h, info.wAttributes); + return n; + } +#else + n = printf("%s", buffer); + return n; +#endif +} + +static const char* +acutest_basename_(const char* path) +{ + const char* name; + + name = strrchr(path, '/'); + if(name != NULL) + name++; + else + name = path; + +#ifdef ACUTEST_WIN_ + { + const char* alt_name; + + alt_name = strrchr(path, '\\'); + if(alt_name != NULL) + alt_name++; + else + alt_name = path; + + if(alt_name > name) + name = alt_name; + } +#endif + + return name; +} + +static void +acutest_begin_test_line_(const struct acutest_test_* test) +{ + if(!acutest_tap_) { + if(acutest_verbose_level_ >= 3) { + acutest_colored_printf_(ACUTEST_COLOR_DEFAULT_INTENSIVE_, "Test %s:\n", test->name); + acutest_test_already_logged_++; + } else if(acutest_verbose_level_ >= 1) { + int n; + char spaces[48]; + + n = acutest_colored_printf_(ACUTEST_COLOR_DEFAULT_INTENSIVE_, "Test %s... ", test->name); + memset(spaces, ' ', sizeof(spaces)); + if(n < (int) sizeof(spaces)) + printf("%.*s", (int) sizeof(spaces) - n, spaces); + } else { + acutest_test_already_logged_ = 1; + } + } +} + +static void +acutest_finish_test_line_(enum acutest_state_ state) +{ + if(acutest_tap_) { + printf("%s %d - %s%s\n", + (state == ACUTEST_STATE_SUCCESS || state == ACUTEST_STATE_SKIPPED) ? "ok" : "not ok", + acutest_current_index_ + 1, + acutest_current_test_->name, + (state == ACUTEST_STATE_SKIPPED) ? " # SKIP" : ""); + + if(state == ACUTEST_STATE_SUCCESS && acutest_timer_) { + printf("# Duration: "); + acutest_timer_print_diff_(); + printf("\n"); + } + } else { + int color; + const char* str; + + switch(state) { + case ACUTEST_STATE_SUCCESS: color = ACUTEST_COLOR_GREEN_INTENSIVE_; str = "OK"; break; + case ACUTEST_STATE_SKIPPED: color = ACUTEST_COLOR_YELLOW_INTENSIVE_; str = "SKIPPED"; break; + case ACUTEST_STATE_FAILED: /* Fall through. */ + default: color = ACUTEST_COLOR_RED_INTENSIVE_; str = "FAILED"; break; + } + + printf("[ "); + acutest_colored_printf_(color, "%s", str); + printf(" ]"); + + if(state == ACUTEST_STATE_SUCCESS && acutest_timer_) { + printf(" "); + acutest_timer_print_diff_(); + } + + printf("\n"); + } +} + +static void +acutest_line_indent_(int level) +{ + static const char spaces[] = " "; + int n = level * 2; + + if(acutest_tap_ && n > 0) { + n--; + printf("#"); + } + + while(n > 16) { + printf("%s", spaces); + n -= 16; + } + printf("%.*s", n, spaces); +} + +void ACUTEST_ATTRIBUTE_(format (printf, 3, 4)) +acutest_skip_(const char* file, int line, const char* fmt, ...) +{ + va_list args; + size_t reason_len; + + va_start(args, fmt); + vsnprintf(acutest_test_skip_reason_, sizeof(acutest_test_skip_reason_), fmt, args); + va_end(args); + acutest_test_skip_reason_[sizeof(acutest_test_skip_reason_)-1] = '\0'; + + /* Remove final dot, if provided; that collides with our other logic. */ + reason_len = strlen(acutest_test_skip_reason_); + if(acutest_test_skip_reason_[reason_len-1] == '.') + acutest_test_skip_reason_[reason_len-1] = '\0'; + + if(acutest_test_check_count_ > 0) { + acutest_check_(0, file, line, "Cannot skip, already performed some checks"); + return; + } + + if(acutest_verbose_level_ >= 2) { + const char *result_str = "skipped"; + int result_color = ACUTEST_COLOR_YELLOW_; + + if(!acutest_test_already_logged_ && acutest_current_test_ != NULL) + acutest_finish_test_line_(ACUTEST_STATE_SKIPPED); + acutest_test_already_logged_++; + + acutest_line_indent_(1); + + if(file != NULL) { + file = acutest_basename_(file); + printf("%s:%d: ", file, line); + } + + printf("%s... ", acutest_test_skip_reason_); + acutest_colored_printf_(result_color, "%s", result_str); + printf("\n"); + acutest_test_already_logged_++; + } + + acutest_test_skip_count_++; +} + +int ACUTEST_ATTRIBUTE_(format (printf, 4, 5)) +acutest_check_(int cond, const char* file, int line, const char* fmt, ...) +{ + const char *result_str; + int result_color; + int verbose_level; + + if(acutest_test_skip_count_) { + /* We've skipped the test. We shouldn't be here: The test implementation + * should have already return before. So lets suppress the following + * output. */ + cond = 1; + goto skip_check; + } + + if(cond) { + result_str = "ok"; + result_color = ACUTEST_COLOR_GREEN_; + verbose_level = 3; + } else { + if(!acutest_test_already_logged_ && acutest_current_test_ != NULL) + acutest_finish_test_line_(ACUTEST_STATE_FAILED); + + acutest_test_failures_++; + acutest_test_already_logged_++; + + result_str = "failed"; + result_color = ACUTEST_COLOR_RED_; + verbose_level = 2; + } + + if(acutest_verbose_level_ >= verbose_level) { + va_list args; + + if(!acutest_case_already_logged_ && acutest_case_name_[0]) { + acutest_line_indent_(1); + acutest_colored_printf_(ACUTEST_COLOR_DEFAULT_INTENSIVE_, "Case %s:\n", acutest_case_name_); + acutest_test_already_logged_++; + acutest_case_already_logged_++; + } + + acutest_line_indent_(acutest_case_name_[0] ? 2 : 1); + if(file != NULL) { + file = acutest_basename_(file); + printf("%s:%d: ", file, line); + } + + va_start(args, fmt); + vprintf(fmt, args); + va_end(args); + + printf("... "); + acutest_colored_printf_(result_color, "%s", result_str); + printf("\n"); + acutest_test_already_logged_++; + } + + acutest_test_check_count_++; + +skip_check: + acutest_cond_failed_ = (cond == 0); + return !acutest_cond_failed_; +} + +void ACUTEST_ATTRIBUTE_(format (printf, 1, 2)) +acutest_case_(const char* fmt, ...) +{ + va_list args; + + if(acutest_verbose_level_ < 2) + return; + + if(acutest_case_name_[0]) { + acutest_case_already_logged_ = 0; + acutest_case_name_[0] = '\0'; + } + + if(fmt == NULL) + return; + + va_start(args, fmt); + vsnprintf(acutest_case_name_, sizeof(acutest_case_name_) - 1, fmt, args); + va_end(args); + acutest_case_name_[sizeof(acutest_case_name_) - 1] = '\0'; + + if(acutest_verbose_level_ >= 3) { + acutest_line_indent_(1); + acutest_colored_printf_(ACUTEST_COLOR_DEFAULT_INTENSIVE_, "Case %s:\n", acutest_case_name_); + acutest_test_already_logged_++; + acutest_case_already_logged_++; + } +} + +void ACUTEST_ATTRIBUTE_(format (printf, 1, 2)) +acutest_message_(const char* fmt, ...) +{ + char buffer[TEST_MSG_MAXSIZE]; + char* line_beg; + char* line_end; + va_list args; + + if(acutest_verbose_level_ < 2) + return; + + /* We allow extra message only when something is already wrong in the + * current test. */ + if(acutest_current_test_ == NULL || !acutest_cond_failed_) + return; + + va_start(args, fmt); + vsnprintf(buffer, TEST_MSG_MAXSIZE, fmt, args); + va_end(args); + buffer[TEST_MSG_MAXSIZE-1] = '\0'; + + line_beg = buffer; + while(1) { + line_end = strchr(line_beg, '\n'); + if(line_end == NULL) + break; + acutest_line_indent_(acutest_case_name_[0] ? 3 : 2); + printf("%.*s\n", (int)(line_end - line_beg), line_beg); + line_beg = line_end + 1; + } + if(line_beg[0] != '\0') { + acutest_line_indent_(acutest_case_name_[0] ? 3 : 2); + printf("%s\n", line_beg); + } +} + +void +acutest_dump_(const char* title, const void* addr, size_t size) +{ + static const size_t BYTES_PER_LINE = 16; + size_t line_beg; + size_t truncate = 0; + + if(acutest_verbose_level_ < 2) + return; + + /* We allow extra message only when something is already wrong in the + * current test. */ + if(acutest_current_test_ == NULL || !acutest_cond_failed_) + return; + + if(size > TEST_DUMP_MAXSIZE) { + truncate = size - TEST_DUMP_MAXSIZE; + size = TEST_DUMP_MAXSIZE; + } + + acutest_line_indent_(acutest_case_name_[0] ? 3 : 2); + printf((title[strlen(title)-1] == ':') ? "%s\n" : "%s:\n", title); + + for(line_beg = 0; line_beg < size; line_beg += BYTES_PER_LINE) { + size_t line_end = line_beg + BYTES_PER_LINE; + size_t off; + + acutest_line_indent_(acutest_case_name_[0] ? 4 : 3); + printf("%08lx: ", (unsigned long)line_beg); + for(off = line_beg; off < line_end; off++) { + if(off < size) + printf(" %02x", ((const unsigned char*)addr)[off]); + else + printf(" "); + } + + printf(" "); + for(off = line_beg; off < line_end; off++) { + unsigned char byte = ((const unsigned char*)addr)[off]; + if(off < size) + printf("%c", (iscntrl(byte) ? '.' : byte)); + else + break; + } + + printf("\n"); + } + + if(truncate > 0) { + acutest_line_indent_(acutest_case_name_[0] ? 4 : 3); + printf(" ... (and more %u bytes)\n", (unsigned) truncate); + } +} + +/* This is called just before each test */ +static void +acutest_init_(const char *test_name) +{ +#ifdef TEST_INIT + TEST_INIT + ; /* Allow for a single unterminated function call */ +#endif + + /* Suppress any warnings about unused variable. */ + (void) test_name; +} + +/* This is called after each test */ +static void +acutest_fini_(const char *test_name) +{ +#ifdef TEST_FINI + TEST_FINI + ; /* Allow for a single unterminated function call */ +#endif + + /* Suppress any warnings about unused variable. */ + (void) test_name; +} + +void +acutest_abort_(void) +{ + if(acutest_abort_has_jmp_buf_) { + longjmp(acutest_abort_jmp_buf_, 1); + } else { + if(acutest_current_test_ != NULL) + acutest_fini_(acutest_current_test_->name); + fflush(stdout); + fflush(stderr); + acutest_exit_(ACUTEST_STATE_FAILED); + } +} + +static void +acutest_list_names_(void) +{ + const struct acutest_test_* test; + + printf("Unit tests:\n"); + for(test = ´st_list_[0]; test->func != NULL; test++) + printf(" %s\n", test->name); +} + +static int +acutest_name_contains_word_(const char* name, const char* pattern) +{ + static const char word_delim[] = " \t-_/.,:;"; + const char* substr; + size_t pattern_len; + + pattern_len = strlen(pattern); + + substr = strstr(name, pattern); + while(substr != NULL) { + int starts_on_word_boundary = (substr == name || strchr(word_delim, substr[-1]) != NULL); + int ends_on_word_boundary = (substr[pattern_len] == '\0' || strchr(word_delim, substr[pattern_len]) != NULL); + + if(starts_on_word_boundary && ends_on_word_boundary) + return 1; + + substr = strstr(substr+1, pattern); + } + + return 0; +} + +static int +acutest_select_(const char* pattern) +{ + int i; + int n = 0; + + /* Try exact match. */ + for(i = 0; i < acutest_list_size_; i++) { + if(strcmp(acutest_list_[i].name, pattern) == 0) { + acutest_test_data_[i].state = ACUTEST_STATE_SELECTED; + n++; + break; + } + } + if(n > 0) + return n; + + /* Try word match. */ + for(i = 0; i < acutest_list_size_; i++) { + if(acutest_name_contains_word_(acutest_list_[i].name, pattern)) { + acutest_test_data_[i].state = ACUTEST_STATE_SELECTED; + n++; + } + } + if(n > 0) + return n; + + /* Try relaxed match. */ + for(i = 0; i < acutest_list_size_; i++) { + if(strstr(acutest_list_[i].name, pattern) != NULL) { + acutest_test_data_[i].state = ACUTEST_STATE_SELECTED; + n++; + } + } + + return n; +} + + +/* Called if anything goes bad in Acutest, or if the unit test ends in other + * way then by normal returning from its function (e.g. exception or some + * abnormal child process termination). */ +static void ACUTEST_ATTRIBUTE_(format (printf, 1, 2)) +acutest_error_(const char* fmt, ...) +{ + if(acutest_verbose_level_ == 0) + return; + + if(acutest_verbose_level_ >= 2) { + va_list args; + + acutest_line_indent_(1); + if(acutest_verbose_level_ >= 3) + acutest_colored_printf_(ACUTEST_COLOR_RED_INTENSIVE_, "ERROR: "); + va_start(args, fmt); + vprintf(fmt, args); + va_end(args); + printf("\n"); + } + + if(acutest_verbose_level_ >= 3) { + printf("\n"); + } +} + +/* Call directly the given test unit function. */ +static enum acutest_state_ +acutest_do_run_(const struct acutest_test_* test, int index) +{ + enum acutest_state_ state = ACUTEST_STATE_FAILED; + + acutest_current_test_ = test; + acutest_current_index_ = index; + acutest_test_failures_ = 0; + acutest_test_already_logged_ = 0; + acutest_test_check_count_ = 0; + acutest_test_skip_count_ = 0; + acutest_cond_failed_ = 0; + +#ifdef __cplusplus +#ifndef TEST_NO_EXCEPTIONS + try { +#endif +#endif + acutest_init_(test->name); + acutest_begin_test_line_(test); + + /* This is good to do in case the test unit crashes. */ + fflush(stdout); + fflush(stderr); + + if(!acutest_worker_) { + acutest_abort_has_jmp_buf_ = 1; + if(setjmp(acutest_abort_jmp_buf_) != 0) + goto aborted; + } + + acutest_timer_get_time_(´st_timer_start_); + test->func(); + +aborted: + acutest_abort_has_jmp_buf_ = 0; + acutest_timer_get_time_(´st_timer_end_); + + if(acutest_test_failures_ > 0) + state = ACUTEST_STATE_FAILED; + else if(acutest_test_skip_count_ > 0) + state = ACUTEST_STATE_SKIPPED; + else + state = ACUTEST_STATE_SUCCESS; + + if(!acutest_test_already_logged_) + acutest_finish_test_line_(state); + + if(acutest_verbose_level_ >= 3) { + acutest_line_indent_(1); + switch(state) { + case ACUTEST_STATE_SUCCESS: + acutest_colored_printf_(ACUTEST_COLOR_GREEN_INTENSIVE_, "SUCCESS: "); + printf("All conditions have passed.\n"); + + if(acutest_timer_) { + acutest_line_indent_(1); + printf("Duration: "); + acutest_timer_print_diff_(); + printf("\n"); + } + break; + + case ACUTEST_STATE_SKIPPED: + acutest_colored_printf_(ACUTEST_COLOR_YELLOW_INTENSIVE_, "SKIPPED: "); + printf("%s.\n", acutest_test_skip_reason_); + break; + + default: + acutest_colored_printf_(ACUTEST_COLOR_RED_INTENSIVE_, "FAILED: "); + printf("%d condition%s %s failed.\n", + acutest_test_failures_, + (acutest_test_failures_ == 1) ? "" : "s", + (acutest_test_failures_ == 1) ? "has" : "have"); + break; + } + printf("\n"); + } + +#ifdef __cplusplus +#ifndef TEST_NO_EXCEPTIONS + } catch(std::exception& e) { + const char* what = e.what(); + acutest_check_(0, NULL, 0, "Threw std::exception"); + if(what != NULL) + acutest_message_("std::exception::what(): %s", what); + + if(acutest_verbose_level_ >= 3) { + acutest_line_indent_(1); + acutest_colored_printf_(ACUTEST_COLOR_RED_INTENSIVE_, "FAILED: "); + printf("C++ exception.\n\n"); + } + } catch(...) { + acutest_check_(0, NULL, 0, "Threw an exception"); + + if(acutest_verbose_level_ >= 3) { + acutest_line_indent_(1); + acutest_colored_printf_(ACUTEST_COLOR_RED_INTENSIVE_, "FAILED: "); + printf("C++ exception.\n\n"); + } + } +#endif +#endif + + acutest_fini_(test->name); + acutest_case_(NULL); + acutest_current_test_ = NULL; + + return state; +} + +/* Trigger the unit test. If possible (and not suppressed) it starts a child + * process who calls acutest_do_run_(), otherwise it calls acutest_do_run_() + * directly. */ +static void +acutest_run_(const struct acutest_test_* test, int index, int master_index) +{ + enum acutest_state_ state = ACUTEST_STATE_FAILED; + acutest_timer_type_ start, end; + + acutest_current_test_ = test; + acutest_test_already_logged_ = 0; + acutest_timer_get_time_(&start); + + if(!acutest_no_exec_) { + +#if defined(ACUTEST_UNIX_) + + pid_t pid; + int exit_code; + + /* Make sure the child starts with empty I/O buffers. */ + fflush(stdout); + fflush(stderr); + + pid = fork(); + if(pid == (pid_t)-1) { + acutest_error_("Cannot fork. %s [%d]", strerror(errno), errno); + } else if(pid == 0) { + /* Child: Do the test. */ + acutest_worker_ = 1; + state = acutest_do_run_(test, index); + acutest_exit_((int) state); + } else { + /* Parent: Wait until child terminates and analyze its exit code. */ + waitpid(pid, &exit_code, 0); + if(WIFEXITED(exit_code)) { + state = (enum acutest_state_) WEXITSTATUS(exit_code); + } else if(WIFSIGNALED(exit_code)) { + char tmp[32]; + const char* signame; + switch(WTERMSIG(exit_code)) { + case SIGINT: signame = "SIGINT"; break; + case SIGHUP: signame = "SIGHUP"; break; + case SIGQUIT: signame = "SIGQUIT"; break; + case SIGABRT: signame = "SIGABRT"; break; + case SIGKILL: signame = "SIGKILL"; break; + case SIGSEGV: signame = "SIGSEGV"; break; + case SIGILL: signame = "SIGILL"; break; + case SIGTERM: signame = "SIGTERM"; break; + default: snprintf(tmp, sizeof(tmp), "signal %d", WTERMSIG(exit_code)); signame = tmp; break; + } + acutest_error_("Test interrupted by %s.", signame); + } else { + acutest_error_("Test ended in an unexpected way [%d].", exit_code); + } + } + +#elif defined(ACUTEST_WIN_) + + char buffer[512] = {0}; + STARTUPINFOA startupInfo; + PROCESS_INFORMATION processInfo; + DWORD exitCode; + + /* Windows has no fork(). So we propagate all info into the child + * through a command line arguments. */ + snprintf(buffer, sizeof(buffer), + "%s --worker=%d %s --no-exec --no-summary %s --verbose=%d --color=%s -- \"%s\"", + acutest_argv0_, index, acutest_timer_ ? "--time" : "", + acutest_tap_ ? "--tap" : "", acutest_verbose_level_, + acutest_colorize_ ? "always" : "never", + test->name); + memset(&startupInfo, 0, sizeof(startupInfo)); + startupInfo.cb = sizeof(STARTUPINFO); + if(CreateProcessA(NULL, buffer, NULL, NULL, FALSE, 0, NULL, NULL, &startupInfo, &processInfo)) { + WaitForSingleObject(processInfo.hProcess, INFINITE); + GetExitCodeProcess(processInfo.hProcess, &exitCode); + CloseHandle(processInfo.hThread); + CloseHandle(processInfo.hProcess); + switch(exitCode) { + case 0: state = ACUTEST_STATE_SUCCESS; break; + case 1: state = ACUTEST_STATE_FAILED; break; + case 2: state = ACUTEST_STATE_SKIPPED; break; + case 3: acutest_error_("Aborted."); break; + case 0xC0000005: acutest_error_("Access violation."); break; + default: acutest_error_("Test ended in an unexpected way [%lu].", exitCode); break; + } + } else { + acutest_error_("Cannot create unit test subprocess [%ld].", GetLastError()); + } + +#else + + /* A platform where we don't know how to run child process. */ + state = acutest_do_run_(test, index); + +#endif + + } else { + /* Child processes suppressed through --no-exec. */ + state = acutest_do_run_(test, index); + } + acutest_timer_get_time_(&end); + + acutest_current_test_ = NULL; + + acutest_test_data_[master_index].state = state; + acutest_test_data_[master_index].duration = acutest_timer_diff_(start, end); +} + +#if defined(ACUTEST_WIN_) +/* Callback for SEH events. */ +static LONG CALLBACK +acutest_seh_exception_filter_(EXCEPTION_POINTERS *ptrs) +{ + acutest_check_(0, NULL, 0, "Unhandled SEH exception"); + acutest_message_("Exception code: 0x%08lx", ptrs->ExceptionRecord->ExceptionCode); + acutest_message_("Exception address: 0x%p", ptrs->ExceptionRecord->ExceptionAddress); + + fflush(stdout); + fflush(stderr); + + return EXCEPTION_EXECUTE_HANDLER; +} +#endif + + +#define ACUTEST_CMDLINE_OPTFLAG_OPTIONALARG_ 0x0001 +#define ACUTEST_CMDLINE_OPTFLAG_REQUIREDARG_ 0x0002 + +#define ACUTEST_CMDLINE_OPTID_NONE_ 0 +#define ACUTEST_CMDLINE_OPTID_UNKNOWN_ (-0x7fffffff + 0) +#define ACUTEST_CMDLINE_OPTID_MISSINGARG_ (-0x7fffffff + 1) +#define ACUTEST_CMDLINE_OPTID_BOGUSARG_ (-0x7fffffff + 2) + +typedef struct acutest_test_CMDLINE_OPTION_ { + char shortname; + const char* longname; + int id; + unsigned flags; +} ACUTEST_CMDLINE_OPTION_; + +static int +acutest_cmdline_handle_short_opt_group_(const ACUTEST_CMDLINE_OPTION_* options, + const char* arggroup, + int (*callback)(int /*optval*/, const char* /*arg*/)) +{ + const ACUTEST_CMDLINE_OPTION_* opt; + int i; + int ret = 0; + + for(i = 0; arggroup[i] != '\0'; i++) { + for(opt = options; opt->id != 0; opt++) { + if(arggroup[i] == opt->shortname) + break; + } + + if(opt->id != 0 && !(opt->flags & ACUTEST_CMDLINE_OPTFLAG_REQUIREDARG_)) { + ret = callback(opt->id, NULL); + } else { + /* Unknown option. */ + char badoptname[3]; + badoptname[0] = '-'; + badoptname[1] = arggroup[i]; + badoptname[2] = '\0'; + ret = callback((opt->id != 0 ? ACUTEST_CMDLINE_OPTID_MISSINGARG_ : ACUTEST_CMDLINE_OPTID_UNKNOWN_), + badoptname); + } + + if(ret != 0) + break; + } + + return ret; +} + +#define ACUTEST_CMDLINE_AUXBUF_SIZE_ 32 + +static int +acutest_cmdline_read_(const ACUTEST_CMDLINE_OPTION_* options, int argc, char** argv, + int (*callback)(int /*optval*/, const char* /*arg*/)) +{ + + const ACUTEST_CMDLINE_OPTION_* opt; + char auxbuf[ACUTEST_CMDLINE_AUXBUF_SIZE_+1]; + int after_doubledash = 0; + int i = 1; + int ret = 0; + + auxbuf[ACUTEST_CMDLINE_AUXBUF_SIZE_] = '\0'; + + while(i < argc) { + if(after_doubledash || strcmp(argv[i], "-") == 0) { + /* Non-option argument. */ + ret = callback(ACUTEST_CMDLINE_OPTID_NONE_, argv[i]); + } else if(strcmp(argv[i], "--") == 0) { + /* End of options. All the remaining members are non-option arguments. */ + after_doubledash = 1; + } else if(argv[i][0] != '-') { + /* Non-option argument. */ + ret = callback(ACUTEST_CMDLINE_OPTID_NONE_, argv[i]); + } else { + for(opt = options; opt->id != 0; opt++) { + if(opt->longname != NULL && strncmp(argv[i], "--", 2) == 0) { + size_t len = strlen(opt->longname); + if(strncmp(argv[i]+2, opt->longname, len) == 0) { + /* Regular long option. */ + if(argv[i][2+len] == '\0') { + /* with no argument provided. */ + if(!(opt->flags & ACUTEST_CMDLINE_OPTFLAG_REQUIREDARG_)) + ret = callback(opt->id, NULL); + else + ret = callback(ACUTEST_CMDLINE_OPTID_MISSINGARG_, argv[i]); + break; + } else if(argv[i][2+len] == '=') { + /* with an argument provided. */ + if(opt->flags & (ACUTEST_CMDLINE_OPTFLAG_OPTIONALARG_ | ACUTEST_CMDLINE_OPTFLAG_REQUIREDARG_)) { + ret = callback(opt->id, argv[i]+2+len+1); + } else { + snprintf(auxbuf, sizeof(auxbuf), "--%s", opt->longname); + ret = callback(ACUTEST_CMDLINE_OPTID_BOGUSARG_, auxbuf); + } + break; + } else { + continue; + } + } + } else if(opt->shortname != '\0' && argv[i][0] == '-') { + if(argv[i][1] == opt->shortname) { + /* Regular short option. */ + if(opt->flags & ACUTEST_CMDLINE_OPTFLAG_REQUIREDARG_) { + if(argv[i][2] != '\0') + ret = callback(opt->id, argv[i]+2); + else if(i+1 < argc) + ret = callback(opt->id, argv[++i]); + else + ret = callback(ACUTEST_CMDLINE_OPTID_MISSINGARG_, argv[i]); + break; + } else { + ret = callback(opt->id, NULL); + + /* There might be more (argument-less) short options + * grouped together. */ + if(ret == 0 && argv[i][2] != '\0') + ret = acutest_cmdline_handle_short_opt_group_(options, argv[i]+2, callback); + break; + } + } + } + } + + if(opt->id == 0) { /* still not handled? */ + if(argv[i][0] != '-') { + /* Non-option argument. */ + ret = callback(ACUTEST_CMDLINE_OPTID_NONE_, argv[i]); + } else { + /* Unknown option. */ + char* badoptname = argv[i]; + + if(strncmp(badoptname, "--", 2) == 0) { + /* Strip any argument from the long option. */ + char* assignment = strchr(badoptname, '='); + if(assignment != NULL) { + size_t len = (size_t)(assignment - badoptname); + if(len > ACUTEST_CMDLINE_AUXBUF_SIZE_) + len = ACUTEST_CMDLINE_AUXBUF_SIZE_; + strncpy(auxbuf, badoptname, len); + auxbuf[len] = '\0'; + badoptname = auxbuf; + } + } + + ret = callback(ACUTEST_CMDLINE_OPTID_UNKNOWN_, badoptname); + } + } + } + + if(ret != 0) + return ret; + i++; + } + + return ret; +} + +static void +acutest_help_(void) +{ + printf("Usage: %s [options] [test...]\n", acutest_argv0_); + printf("\n"); + printf("Run the specified unit tests; or if the option '--exclude' is used, run all\n"); + printf("tests in the suite but those listed. By default, if no tests are specified\n"); + printf("on the command line, all unit tests in the suite are run.\n"); + printf("\n"); + printf("Options:\n"); + printf(" -X, --exclude Execute all unit tests but the listed ones\n"); + printf(" --exec[=WHEN] If supported, execute unit tests as child processes\n"); + printf(" (WHEN is one of 'auto', 'always', 'never')\n"); + printf(" -E, --no-exec Same as --exec=never\n"); +#if defined ACUTEST_WIN_ + printf(" -t, --time Measure test duration\n"); +#elif defined ACUTEST_HAS_POSIX_TIMER_ + printf(" -t, --time Measure test duration (real time)\n"); + printf(" --time=TIMER Measure test duration, using given timer\n"); + printf(" (TIMER is one of 'real', 'cpu')\n"); +#endif + printf(" --no-summary Suppress printing of test results summary\n"); + printf(" --tap Produce TAP-compliant output\n"); + printf(" (See https://testanything.org/)\n"); + printf(" -x, --xml-output=FILE Enable XUnit output to the given file\n"); + printf(" -l, --list List unit tests in the suite and exit\n"); + printf(" -v, --verbose Make output more verbose\n"); + printf(" --verbose=LEVEL Set verbose level to LEVEL:\n"); + printf(" 0 ... Be silent\n"); + printf(" 1 ... Output one line per test (and summary)\n"); + printf(" 2 ... As 1 and failed conditions (this is default)\n"); + printf(" 3 ... As 1 and all conditions (and extended summary)\n"); + printf(" -q, --quiet Same as --verbose=0\n"); + printf(" --color[=WHEN] Enable colorized output\n"); + printf(" (WHEN is one of 'auto', 'always', 'never')\n"); + printf(" --no-color Same as --color=never\n"); + printf(" -h, --help Display this help and exit\n"); + + if(acutest_list_size_ < 16) { + printf("\n"); + acutest_list_names_(); + } +} + +static const ACUTEST_CMDLINE_OPTION_ acutest_cmdline_options_[] = { + { 'X', "exclude", 'X', 0 }, + { 's', "skip", 'X', 0 }, /* kept for compatibility, use --exclude instead */ + { 0, "exec", 'e', ACUTEST_CMDLINE_OPTFLAG_OPTIONALARG_ }, + { 'E', "no-exec", 'E', 0 }, +#if defined ACUTEST_WIN_ + { 't', "time", 't', 0 }, + { 0, "timer", 't', 0 }, /* kept for compatibility */ +#elif defined ACUTEST_HAS_POSIX_TIMER_ + { 't', "time", 't', ACUTEST_CMDLINE_OPTFLAG_OPTIONALARG_ }, + { 0, "timer", 't', ACUTEST_CMDLINE_OPTFLAG_OPTIONALARG_ }, /* kept for compatibility */ +#endif + { 0, "no-summary", 'S', 0 }, + { 0, "tap", 'T', 0 }, + { 'l', "list", 'l', 0 }, + { 'v', "verbose", 'v', ACUTEST_CMDLINE_OPTFLAG_OPTIONALARG_ }, + { 'q', "quiet", 'q', 0 }, + { 0, "color", 'c', ACUTEST_CMDLINE_OPTFLAG_OPTIONALARG_ }, + { 0, "no-color", 'C', 0 }, + { 'h', "help", 'h', 0 }, + { 0, "worker", 'w', ACUTEST_CMDLINE_OPTFLAG_REQUIREDARG_ }, /* internal */ + { 'x', "xml-output", 'x', ACUTEST_CMDLINE_OPTFLAG_REQUIREDARG_ }, + { 0, NULL, 0, 0 } +}; + +static int +acutest_cmdline_callback_(int id, const char* arg) +{ + switch(id) { + case 'X': + acutest_exclude_mode_ = 1; + break; + + case 'e': + if(arg == NULL || strcmp(arg, "always") == 0) { + acutest_no_exec_ = 0; + } else if(strcmp(arg, "never") == 0) { + acutest_no_exec_ = 1; + } else if(strcmp(arg, "auto") == 0) { + /*noop*/ + } else { + fprintf(stderr, "%s: Unrecognized argument '%s' for option --exec.\n", acutest_argv0_, arg); + fprintf(stderr, "Try '%s --help' for more information.\n", acutest_argv0_); + acutest_exit_(2); + } + break; + + case 'E': + acutest_no_exec_ = 1; + break; + + case 't': +#if defined ACUTEST_WIN_ || defined ACUTEST_HAS_POSIX_TIMER_ + if(arg == NULL || strcmp(arg, "real") == 0) { + acutest_timer_ = 1; + #ifndef ACUTEST_WIN_ + } else if(strcmp(arg, "cpu") == 0) { + acutest_timer_ = 2; + #endif + } else { + fprintf(stderr, "%s: Unrecognized argument '%s' for option --time.\n", acutest_argv0_, arg); + fprintf(stderr, "Try '%s --help' for more information.\n", acutest_argv0_); + acutest_exit_(2); + } +#endif + break; + + case 'S': + acutest_no_summary_ = 1; + break; + + case 'T': + acutest_tap_ = 1; + break; + + case 'l': + acutest_list_names_(); + acutest_exit_(0); + break; + + case 'v': + acutest_verbose_level_ = (arg != NULL ? atoi(arg) : acutest_verbose_level_+1); + break; + + case 'q': + acutest_verbose_level_ = 0; + break; + + case 'c': + if(arg == NULL || strcmp(arg, "always") == 0) { + acutest_colorize_ = 1; + } else if(strcmp(arg, "never") == 0) { + acutest_colorize_ = 0; + } else if(strcmp(arg, "auto") == 0) { + /*noop*/ + } else { + fprintf(stderr, "%s: Unrecognized argument '%s' for option --color.\n", acutest_argv0_, arg); + fprintf(stderr, "Try '%s --help' for more information.\n", acutest_argv0_); + acutest_exit_(2); + } + break; + + case 'C': + acutest_colorize_ = 0; + break; + + case 'h': + acutest_help_(); + acutest_exit_(0); + break; + + case 'w': + acutest_worker_ = 1; + acutest_worker_index_ = atoi(arg); + break; + case 'x': + acutest_xml_output_ = fopen(arg, "w"); + if (!acutest_xml_output_) { + fprintf(stderr, "Unable to open '%s': %s\n", arg, strerror(errno)); + acutest_exit_(2); + } + break; + + case 0: + if(acutest_select_(arg) == 0) { + fprintf(stderr, "%s: Unrecognized unit test '%s'\n", acutest_argv0_, arg); + fprintf(stderr, "Try '%s --list' for list of unit tests.\n", acutest_argv0_); + acutest_exit_(2); + } + break; + + case ACUTEST_CMDLINE_OPTID_UNKNOWN_: + fprintf(stderr, "Unrecognized command line option '%s'.\n", arg); + fprintf(stderr, "Try '%s --help' for more information.\n", acutest_argv0_); + acutest_exit_(2); + break; + + case ACUTEST_CMDLINE_OPTID_MISSINGARG_: + fprintf(stderr, "The command line option '%s' requires an argument.\n", arg); + fprintf(stderr, "Try '%s --help' for more information.\n", acutest_argv0_); + acutest_exit_(2); + break; + + case ACUTEST_CMDLINE_OPTID_BOGUSARG_: + fprintf(stderr, "The command line option '%s' does not expect an argument.\n", arg); + fprintf(stderr, "Try '%s --help' for more information.\n", acutest_argv0_); + acutest_exit_(2); + break; + } + + return 0; +} + +static int +acutest_under_debugger_(void) +{ +#ifdef ACUTEST_LINUX_ + /* Scan /proc/self/status for line "TracerPid: [PID]". If such line exists + * and the PID is non-zero, we're being debugged. */ + { + static const int OVERLAP = 32; + int fd; + char buf[512]; + size_t n_read; + pid_t tracer_pid = 0; + + /* Little trick so that we can treat the 1st line the same as any other + * and detect line start easily. */ + buf[0] = '\n'; + n_read = 1; + + fd = open("/proc/self/status", O_RDONLY); + if(fd != -1) { + while(1) { + static const char pattern[] = "\nTracerPid:"; + const char* field; + + while(n_read < sizeof(buf) - 1) { + ssize_t n; + + n = read(fd, buf + n_read, sizeof(buf) - 1 - n_read); + if(n <= 0) + break; + n_read += (size_t)n; + } + buf[n_read] = '\0'; + + field = strstr(buf, pattern); + if(field != NULL && field < buf + sizeof(buf) - OVERLAP) { + tracer_pid = (pid_t) atoi(field + sizeof(pattern) - 1); + break; + } + + if(n_read == sizeof(buf) - 1) { + /* Move the tail with the potentially incomplete line we're + * be looking for to the beginning of the buffer. + * (The OVERLAP must be large enough so the searched line + * can fit in completely.) */ + memmove(buf, buf + sizeof(buf) - 1 - OVERLAP, OVERLAP); + n_read = OVERLAP; + } else { + break; + } + } + + close(fd); + + if(tracer_pid != 0) + return 1; + } + } +#endif + +#ifdef ACUTEST_MACOS_ + /* See https://developer.apple.com/library/archive/qa/qa1361/_index.html */ + { + int mib[4]; + struct kinfo_proc info; + size_t size; + + mib[0] = CTL_KERN; + mib[1] = KERN_PROC; + mib[2] = KERN_PROC_PID; + mib[3] = getpid(); + + size = sizeof(info); + info.kp_proc.p_flag = 0; + sysctl(mib, sizeof(mib) / sizeof(*mib), &info, &size, NULL, 0); + + if(info.kp_proc.p_flag & P_TRACED) + return 1; + } +#endif + +#ifdef ACUTEST_WIN_ + if(IsDebuggerPresent()) + return 1; +#endif + +#ifdef RUNNING_ON_VALGRIND + /* We treat Valgrind as a debugger of sorts. + * (Macro RUNNING_ON_VALGRIND is provided by , if available.) */ + if(RUNNING_ON_VALGRIND) + return 1; +#endif + + return 0; +} + +int +main(int argc, char** argv) +{ + int i, index; + int exit_code = 1; + + acutest_argv0_ = argv[0]; + +#if defined ACUTEST_UNIX_ + acutest_colorize_ = isatty(STDOUT_FILENO); +#elif defined ACUTEST_WIN_ + #if defined _BORLANDC_ + acutest_colorize_ = isatty(_fileno(stdout)); + #else + acutest_colorize_ = _isatty(_fileno(stdout)); + #endif +#else + acutest_colorize_ = 0; +#endif + + /* Count all test units */ + acutest_list_size_ = 0; + for(i = 0; acutest_list_[i].func != NULL; i++) + acutest_list_size_++; + + acutest_test_data_ = (struct acutest_test_data_*)calloc(acutest_list_size_, sizeof(struct acutest_test_data_)); + if(acutest_test_data_ == NULL) { + fprintf(stderr, "Out of memory.\n"); + acutest_exit_(2); + } + + /* Parse options */ + acutest_cmdline_read_(acutest_cmdline_options_, argc, argv, acutest_cmdline_callback_); + + /* Initialize the proper timer. */ + acutest_timer_init_(); + +#if defined(ACUTEST_WIN_) + SetUnhandledExceptionFilter(acutest_seh_exception_filter_); +#ifdef _MSC_VER + _set_abort_behavior(0, _WRITE_ABORT_MSG); +#endif +#endif + + /* Determine what to run. */ + if(acutest_count_(ACUTEST_STATE_SELECTED) > 0) { + enum acutest_state_ if_selected; + enum acutest_state_ if_unselected; + + if(!acutest_exclude_mode_) { + if_selected = ACUTEST_STATE_NEEDTORUN; + if_unselected = ACUTEST_STATE_EXCLUDED; + } else { + if_selected = ACUTEST_STATE_EXCLUDED; + if_unselected = ACUTEST_STATE_NEEDTORUN; + } + + for(i = 0; acutest_list_[i].func != NULL; i++) { + if(acutest_test_data_[i].state == ACUTEST_STATE_SELECTED) + acutest_test_data_[i].state = if_selected; + else + acutest_test_data_[i].state = if_unselected; + } + } else { + /* By default, we want to run all tests. */ + for(i = 0; acutest_list_[i].func != NULL; i++) + acutest_test_data_[i].state = ACUTEST_STATE_NEEDTORUN; + } + + /* By default, we want to suppress running tests as child processes if we + * run just one test, or if we're under debugger: Debugging tests is then + * so much easier. */ + if(acutest_no_exec_ < 0) { + if(acutest_count_(ACUTEST_STATE_NEEDTORUN) <= 1 || acutest_under_debugger_()) + acutest_no_exec_ = 1; + else + acutest_no_exec_ = 0; + } + + if(acutest_tap_) { + /* TAP requires we know test result ("ok", "not ok") before we output + * anything about the test, and this gets problematic for larger verbose + * levels. */ + if(acutest_verbose_level_ > 2) + acutest_verbose_level_ = 2; + + /* TAP harness should provide some summary. */ + acutest_no_summary_ = 1; + + if(!acutest_worker_) + printf("1..%d\n", acutest_count_(ACUTEST_STATE_NEEDTORUN)); + } + + index = acutest_worker_index_; + for(i = 0; acutest_list_[i].func != NULL; i++) { + if(acutest_test_data_[i].state == ACUTEST_STATE_NEEDTORUN) + acutest_run_(´st_list_[i], index++, i); + } + + /* Write a summary */ + if(!acutest_no_summary_ && acutest_verbose_level_ >= 1) { + int n_run, n_success, n_failed ; + + n_run = acutest_list_size_ - acutest_count_(ACUTEST_STATE_EXCLUDED); + n_success = acutest_count_(ACUTEST_STATE_SUCCESS); + n_failed = acutest_count_(ACUTEST_STATE_FAILED); + + if(acutest_verbose_level_ >= 3) { + acutest_colored_printf_(ACUTEST_COLOR_DEFAULT_INTENSIVE_, "Summary:\n"); + + printf(" Count of run unit tests: %4d\n", n_run); + printf(" Count of successful unit tests: %4d\n", n_success); + printf(" Count of failed unit tests: %4d\n", n_failed); + } + + if(n_failed == 0) { + acutest_colored_printf_(ACUTEST_COLOR_GREEN_INTENSIVE_, "SUCCESS:"); + printf(" No unit tests have failed.\n"); + } else { + acutest_colored_printf_(ACUTEST_COLOR_RED_INTENSIVE_, "FAILED:"); + printf(" %d of %d unit tests %s failed.\n", + n_failed, n_run, (n_failed == 1) ? "has" : "have"); + } + + if(acutest_verbose_level_ >= 3) + printf("\n"); + } + + if (acutest_xml_output_) { + const char* suite_name = acutest_basename_(argv[0]); + fprintf(acutest_xml_output_, "\n"); + fprintf(acutest_xml_output_, "\n", + suite_name, + (int)acutest_list_size_, + acutest_count_(ACUTEST_STATE_FAILED), + acutest_count_(ACUTEST_STATE_SKIPPED) + acutest_count_(ACUTEST_STATE_EXCLUDED)); + for(i = 0; acutest_list_[i].func != NULL; i++) { + struct acutest_test_data_ *details = ´st_test_data_[i]; + const char* str_state; + fprintf(acutest_xml_output_, " \n", acutest_list_[i].name, details->duration); + + switch(details->state) { + case ACUTEST_STATE_SUCCESS: str_state = NULL; break; + case ACUTEST_STATE_EXCLUDED: /* Fall through. */ + case ACUTEST_STATE_SKIPPED: str_state = ""; break; + case ACUTEST_STATE_FAILED: /* Fall through. */ + default: str_state = ""; break; + } + + if(str_state != NULL) + fprintf(acutest_xml_output_, " %s\n", str_state); + fprintf(acutest_xml_output_, " \n"); + } + fprintf(acutest_xml_output_, "\n"); + fclose(acutest_xml_output_); + } + + if(acutest_worker_ && acutest_count_(ACUTEST_STATE_EXCLUDED)+1 == acutest_list_size_) { + /* If we are the child process, we need to propagate the test state + * without any moderation. */ + for(i = 0; acutest_list_[i].func != NULL; i++) { + if(acutest_test_data_[i].state != ACUTEST_STATE_EXCLUDED) { + exit_code = (int) acutest_test_data_[i].state; + break; + } + } + } else { + if(acutest_count_(ACUTEST_STATE_FAILED) > 0) + exit_code = 1; + else + exit_code = 0; + } + + acutest_cleanup_(); + return exit_code; +} + + +#endif /* #ifndef TEST_NO_MAIN */ + +#ifdef _MSC_VER + #pragma warning(pop) +#endif + +#ifdef __cplusplus + } /* extern "C" */ +#endif + +#endif /* #ifndef ACUTEST_H */ diff --git a/libcore/libcore.h b/libcore/libcore.h new file mode 100644 index 0000000..6650943 --- /dev/null +++ b/libcore/libcore.h @@ -0,0 +1,10 @@ +#ifndef __STDCORE_H__ +#define __STDCORE_H__ + +#ifndef __NO_LINK_STDLIB +#include +#else +#error "__NO_LINK_STDLIB" +#endif + +#endif diff --git a/libcore/vector-gdb.py b/libcore/vector-gdb.py new file mode 100644 index 0000000..65d5621 --- /dev/null +++ b/libcore/vector-gdb.py @@ -0,0 +1,202 @@ +# # vector_gdb.py +# import gdb +# import re + +# class VectorPrinter: +# """解析宏定义的 vector 结构体""" + +# def __init__(self, val): +# self.val = val + +# def check_vector_type(self): +# """验证是否为合法 vector 结构体""" +# try: +# # 检查是否包含 size/cap/data 字段 +# return all(self.val.type.has_key(field) +# for field in ['size', 'cap', 'data']) +# except gdb.error: +# return False + +# def get_array_view(self): +# """将 data 字段转换为数组视图""" +# if not self.check_vector_type(): +# return None + +# cap = int(self.val['cap']) +# data_ptr = self.val['data'] + +# if cap == 0 or data_ptr == 0: +# return [] + +# # 构造数组类型 (例如 int[cap]) +# element_type = data_ptr.type.target() +# array_type = element_type.array(cap - 1) # C 数组声明语法 + +# return data_ptr.cast(array_type.pointer()).dereference() + +# def to_string(self): +# if not self.check_vector_type(): +# return "Not a vector type" + +# size = self.val['size'] +# cap = self.val['cap'] +# data = self.get_array_view() + +# return (f"vector(size={size}, cap={cap}, data={data})") + +# class VectorInfoCommand(gdb.Command): +# """自定义命令:显示 vector 详细信息""" + +# def __init__(self): +# super(VectorInfoCommand, self).__init__("vector_info", +# gdb.COMMAND_USER) + +# def invoke(self, arg, from_tty): +# val = gdb.parse_and_eval(arg) +# printer = VectorPrinter(val) + +# if not printer.check_vector_type(): +# print(f"'{arg}' is not a vector structure") +# return + +# size = int(val['size']) +# cap = int(val['cap']) +# data = printer.get_array_view() + +# # 输出格式化信息 +# print(f"Vector {arg}:") +# print(f"├─ Size: {size}") +# print(f"├─ Capacity: {cap}") +# print("└─ Data elements [0..{}]:".format(min(size, cap)-1)) + +# for i in range(min(size, cap)): +# try: +# print(f" [{i}]: {data[i]}") +# except gdb.MemoryError: +# print(f" [{i}]: ") + +# def register_printers(): +# """注册自动类型识别""" +# def vector_matcher(val): +# return VectorPrinter(val).check_vector_type() + +# # 使用 lambda 包装以动态创建 printer +# gdb.pretty_printers.append(lambda val: +# VectorPrinter(val) if vector_matcher(val) else None) + +# # 注册命令和打印机 +# VectorInfoCommand() +# register_printers() + +# vector_gdb.py +import gdb +from gdb.printing import PrettyPrinter + +class VectorPrinter: + """兼容新旧注册方式的最终方案""" + + def __init__(self, val: gdb.Value): + self.val:gdb.Value = val + + def check_type(self) -> bool: + """类型检查(兼容匿名结构体)""" + try: + if self.val.type.code != gdb.TYPE_CODE_STRUCT: + return False + fields = self.val.type.fields() + if not fields: + return False + exp = ['size', 'cap', 'data'] + for t in fields: + if t.name in exp: + exp.remove(t.name) + else: + return False + return True + except gdb.error: + return False + + def to_string(self): + if not self.check_type(): + return "Not a vector" + + return "vector({} size={}, cap={})".format( + self.val.address, + self.val['size'], + self.val['cap'], + ) + + def display_hint(self): + return 'array' + + def children(self): + """生成数组元素(关键改进点)""" + if not self.check_type(): + return [] + + size = int(self.val['size']) + cap = int(self.val['cap']) + data_ptr = self.val['data'] + + if cap == 0 or data_ptr == 0: + return [] + + # 使用 GDB 内置数组转换 + array = data_ptr.dereference() + array = array.cast(data_ptr.type.target().array(cap - 1)) + + for i in range(size): + # state = "" if i < size else "" + try: + value = array[i] + yield (f"[{i}] {value.type} {value.address}", value) + except gdb.MemoryError: + yield (f"[{i}]", "") + +# 注册方式一:传统append方法(您之前有效的方式)self +def append_printer(): + gdb.pretty_printers.append( + lambda val: VectorPrinter(val) if VectorPrinter(val).check_type() else None + ) + +# 注册方式二:新版注册方法(备用方案) +def register_new_printer(): + class VectorPrinterLocator(PrettyPrinter): + def __init__(self): + super().__init__("vector_printer") + + def __call__(self, val): + ret = VectorPrinter(val).check_type() + print(f"ret {ret}, type {val.type}, {[(i.name, i.type) for i in val.type.fields()]}") + return None + + gdb.printing.register_pretty_printer( + gdb.current_objfile(), + VectorPrinterLocator() + ) + +# 双重注册保证兼容性 +append_printer() # 保留您原来有效的方式 +# register_new_printer() # 添加新版注册 + +class VectorInfoCommand(gdb.Command): + """保持原有命令不变""" + def __init__(self): + super().__init__("vector_info", gdb.COMMAND_USER) + + def invoke(self, arg, from_tty): + val = gdb.parse_and_eval(arg) + printer = VectorPrinter(val) + + if not printer.check_type(): + print("Invalid vector") + return + + print("=== Vector Details ===") + print("Size:", val['size']) + print("Capacity:", val['cap']) + print("Elements:") + for name, value in printer.children(): + print(f" {name}: {value}") + +VectorInfoCommand() diff --git a/libcore/vector.h b/libcore/vector.h new file mode 100644 index 0000000..bce0348 --- /dev/null +++ b/libcore/vector.h @@ -0,0 +1,54 @@ +// vector.h +#ifndef VECTOR_H +#define VECTOR_H + +#include +#include +#include + +#define vector_header(name, type) \ + struct { \ + size_t size; \ + size_t cap; \ + type *data; \ + } name \ + +#define vector_init(vec) \ + do { \ + (vec).size = 0, \ + (vec).cap = 0, \ + (vec).data = NULL; \ + } while(0) + +#define vector_push(vec, value) \ + do { \ + if (vec.size >= vec.cap) { \ + int cap = vec.cap ? vec.cap * 2 : 8; \ + void* data = realloc(vec.data, cap * sizeof(*vec.data)); \ + if (!data) { \ + fprintf(stderr, "vector_push: realloc failed\n"); \ + exit(1); \ + } \ + (vec).cap = cap; \ + (vec).data = data; \ + } \ + (vec).data[(vec).size++] = value; \ + } while(0) + +#define vector_pop(vec) \ + ((vec).data[--(vec).size]) + +#define vector_at(vec, idx) \ + (((vec).data)[idx]) + +#define vector_idx(vec, ptr) \ + ((ptr) - (vec).data) + +#define vector_free(vec) \ + do { \ + free((vec).data); \ + (vec).data = NULL; \ + (vec).size = (vec).cap = 0; \ + } while(0) + +#endif