init basic

This commit is contained in:
ZZY 2025-03-05 15:45:19 +08:00
commit 09299e339c
42 changed files with 5752 additions and 0 deletions

View File

@ -0,0 +1,46 @@
# 编译器设置
CC = gcc
AR = ar
CFLAGS = -g
# 源文件路径
LEXER_DIR = ./lexer
PARSER_DIR = ./parser
AST_DIR = ./parser/ast
SYMTAB_DIR = ./parser/symtab
# 源文件列表
SRCS = \
frontend.c \
$(LEXER_DIR)/lexer.c \
$(PARSER_DIR)/parser.c \
$(AST_DIR)/ast.c \
$(AST_DIR)/block.c \
$(AST_DIR)/decl.c \
$(AST_DIR)/expr.c \
$(AST_DIR)/func.c \
$(AST_DIR)/program.c \
$(AST_DIR)/stmt.c \
$(AST_DIR)/term.c \
$(SYMTAB_DIR)/hashmap.c \
$(SYMTAB_DIR)/scope.c \
$(SYMTAB_DIR)/symtab.c \
# 生成目标文件列表
OBJS = $(SRCS:.c=.o)
# 最终目标
TARGET = libfrontend.a
all: $(TARGET)
$(TARGET): $(OBJS)
$(AR) rcs $@ $^
%.o: %.c
$(CC) $(CFLAGS) -c -o $@ $<
clean:
rm -f $(OBJS) $(TARGET)
.PHONY: all clean

View File

@ -0,0 +1,18 @@
#include "lexer/lexer.h"
#include "parser/symtab/symtab.h"
#include "frontend.h"
struct ASTNode* frontend(const char* file, void* stream, sread_fn sread) {
struct Lexer lexer;
init_lexer(&lexer, file, stream, sread);
struct SymbolTable symtab;
init_symtab(&symtab);
struct Parser parser;
init_parser(&parser, &lexer, &symtab);
parse_prog(&parser);
// TODO Free the resourse
return parser.root;
}

View File

@ -0,0 +1,24 @@
#ifndef __FRONTEND_H__
#define __FRONTEND_H__
#ifndef error
#include <stdio.h>
#include <stdlib.h>
#define STD_LIBRARY
#define error(...) do { fprintf(stderr, __VA_ARGS__); exit(1); } while (0)
#endif
#ifndef warn
#include <stdio.h>
#define STD_LIBRARY
#define warn(...) do { fprintf(stdout, __VA_ARGS__); } while (0)
#endif
#define xmalloc(size) malloc(size)
#include "parser/parser.h"
#include "parser/ast/ast.h"
typedef int (*sread_fn)(void *dst_buf, int dst_size, int elem_size, int count, void *stream);
struct ASTNode* frontend(const char* file, void* stream, sread_fn sread);
#endif

View File

@ -0,0 +1,5 @@
# 词法分析
参考LCC的此分析部分
主要使用 LL(n) 硬编码查找token

View File

@ -0,0 +1,523 @@
/**
* 仿LCCompiler的词法分析部分
*
* LCC的README in 2025.2
This hierarchy is the distribution for lcc version 4.2.
lcc version 3.x is described in the book "A Retargetable C Compiler:
Design and Implementation" (Addison-Wesley, 1995, ISBN 0-8053-1670-1).
There are significant differences between 3.x and 4.x, most notably in
the intermediate code. For details, see
https://drh.github.io/lcc/documents/interface4.pdf.
VERSION 4.2 IS INCOMPATIBLE WITH EARLIER VERSIONS OF LCC. DO NOT
UNLOAD THIS DISTRIBUTION ON TOP OF A 3.X DISTRIBUTION.
LCC is a C89 ("ANSI C") compiler designed to be highly retargetable.
LOG describes the changes since the last release.
CPYRIGHT describes the conditions under you can use, copy, modify, and
distribute lcc or works derived from lcc.
doc/install.html is an HTML file that gives a complete description of
the distribution and installation instructions.
Chris Fraser / cwf@aya.yale.edu
David Hanson / drh@drhanson.net
*/
#include "../frontend.h"
#include "lexer.h"
static const struct {
const char* name;
enum CSTD_KEYWORD std_type;
enum TokenType tok;
} keywords[] = {
#define X(name, std_type, tok, ...) { #name, std_type, tok },
KEYWORD_TABLE
#undef X
};
// by using binary search to find the keyword
static inline int keyword_cmp(const char* name, int len) {
int low = 0;
int high = sizeof(keywords) / sizeof(keywords[0]) - 1;
while (low <= high) {
int mid = (low + high) / 2;
const char *key = keywords[mid].name;
int cmp = 0;
// 自定义字符串比较逻辑
for (int i = 0; i < len; i++) {
if (name[i] != key[i]) {
cmp = (unsigned char)name[i] - (unsigned char)key[i];
break;
}
if (name[i] == '\0') break; // 遇到终止符提前结束
}
if (cmp == 0) {
// 完全匹配检查(长度相同)
if (key[len] == '\0') return mid;
cmp = -1; // 当前关键词比输入长
}
if (cmp < 0) {
high = mid - 1;
} else {
low = mid + 1;
}
}
return -1; // Not a keyword.
}
void init_lexer(struct Lexer* lexer, const char* file_name, void* stream, lexer_sread_fn sread)
{
lexer->cur_ptr = lexer->end_ptr = (unsigned char*)&(lexer->buffer);
lexer->index = 1;
lexer->line = 1;
lexer->stream = stream;
lexer->sread = sread;
for (int i = 0; i < sizeof(lexer->buffer) / sizeof(lexer->buffer[0]); i++) {
lexer->buffer[i] = 0;
}
}
static void flush_buffer(struct Lexer* lexer) {
int num = lexer->end_ptr - lexer->cur_ptr;
for (int i = 0; i < num; i++) {
lexer->buffer[i] = lexer->cur_ptr[i];
}
lexer->cur_ptr = lexer->buffer;
int read_size = LEXER_BUFFER_SIZE - num;
// TODO size_t to int maybe lose precision
int got_size = lexer->sread(lexer->buffer + num, read_size, 1, read_size, lexer->stream);
if (got_size < 0) {
error("lexer read error");
} else if (got_size < read_size) {
lexer->end_ptr += got_size;
lexer->end_ptr[0] = '\0'; // EOF
lexer->end_ptr++;
} else if (got_size == read_size) {
lexer->end_ptr += got_size;
} else {
error("lexer read error imposible got_size > read_size maybe overflow?");
}
}
static void goto_newline(struct Lexer* lexer) {
do {
if (lexer->cur_ptr == lexer->end_ptr) {
flush_buffer(lexer);
lexer->cur_ptr--;
}
lexer->cur_ptr++;
} while (*lexer->cur_ptr != '\n' && *lexer->cur_ptr != '\0');
}
static void goto_block_comment(struct Lexer* lexer) {
while (1) {
if (lexer->end_ptr - lexer->cur_ptr < 2) {
flush_buffer(lexer);
}
if (*lexer->cur_ptr == '\0') {
break;
} else if (lexer->cur_ptr[0] == '*' && lexer->cur_ptr[1] == '/') {
lexer->cur_ptr += 2;
break;
} else {
lexer->cur_ptr++;
}
}
}
// TODO escape character not enough
static char got_slash(unsigned char* peek) {
switch (*peek) {
case '\\': return '\\';
case '\'': return '\'';
case '\"': return '\"';
case '\?': return '\?';
case '0': return '\0';
case 'b': return '\b';
case 'f': return '\f';
case 'n': return '\n';
case 'r': return '\r';
case 't': return '\t';
case 'v': return '\v';
default: error("Unknown escape character");
}
}
static void parse_char_literal(struct Lexer* lexer, struct Token* token) {
char val = 0;
unsigned char* peek = lexer->cur_ptr + 1;
if (*peek == '\\') {
peek++;
val = got_slash(peek);
} else {
val = *peek;
}
if (*peek != '\'') error("Unclosed character literal");
token->constant.ch = val;
lexer->cur_ptr = peek + 1;
token->constant.have = 1;
token->type = TOKEN_CHAR_LITERAL;
}
static void parse_string_literal(struct Lexer* lexer, struct Token* token) {
unsigned char* peek = lexer->cur_ptr + 1;
// TODO string literal size check
char* dest = token->constant.str = xmalloc(LEXER_MAX_TOKEN_SIZE + 1);
int len = 0;
while (*peek != '"') {
if (peek >= lexer->end_ptr) flush_buffer(lexer);
if (*peek == '\\') { // 处理转义
peek++;
*peek = got_slash(peek);
}
if (len >= LEXER_MAX_TOKEN_SIZE) error("String too long");
dest[len++] = *peek++;
}
dest[len] = '\0';
lexer->cur_ptr = peek + 1;
token->constant.have = 1;
token->type = TOKEN_STRING_LITERAL;
}
// FIXME it write by AI maybe error
static void parse_number(struct Lexer* lexer, struct Token* token) {
unsigned char* peek = lexer->cur_ptr;
int base = 10;
int is_float = 0;
long long int_val = 0;
double float_val = 0.0;
double fraction = 1.0;
// 判断进制
if (*peek == '0') {
peek++;
switch (*peek) {
case 'x':
case 'X':
base = 16;
default:
base = 8;
}
}
// 解析整数部分
while (1) {
int digit = -1;
if (*peek >= '0' && *peek <= '9') {
digit = *peek - '0';
} else if (base == 16) {
if (*peek >= 'a' && *peek <= 'f') digit = *peek - 'a' + 10;
else if (*peek >= 'A' && *peek <= 'F') digit = *peek - 'A' + 10;
}
if (digit < 0 || digit >= base) break;
if (!is_float) {
int_val = int_val * base + digit;
} else {
float_val = float_val * base + digit;
fraction *= base;
}
peek++;
}
// 解析浮点数
if (*peek == '.' && base == 10) {
is_float = 1;
float_val = int_val;
peek++;
while (*peek >= '0' && *peek <= '9') {
float_val = float_val * 10.0 + (*peek - '0');
fraction *= 10.0;
peek++;
}
float_val /= fraction;
}
// 解析科学计数法
if ((*peek == 'e' || *peek == 'E') && base == 10) {
is_float = 1;
peek++;
int exp_sign = 1;
int exponent = 0;
if (*peek == '+') peek++;
else if (*peek == '-') {
exp_sign = -1;
peek++;
}
while (*peek >= '0' && *peek <= '9') {
exponent = exponent * 10 + (*peek - '0');
peek++;
}
// float_val *= pow(10.0, exp_sign * exponent);
}
// 存储结果
lexer->cur_ptr = peek;
token->constant.have = 1;
if (is_float) {
token->constant.d = float_val;
token->type = TOKEN_FLOAT_LITERAL;
} else {
token->constant.ll = int_val;
token->type = TOKEN_INT_LITERAL;
}
}
#define GOT_ONE_TOKEN_BUF_SIZE 64
// /zh/c/language/operator_arithmetic.html
void get_token(struct Lexer* lexer, struct Token* token) {
// 需要保证缓冲区始终可读
if (lexer->end_ptr - lexer->cur_ptr < GOT_ONE_TOKEN_BUF_SIZE) {
flush_buffer(lexer);
}
register unsigned char* peek = lexer->cur_ptr;
// 快速跳过空白符
while (*peek == ' ' || *peek == '\t') {
if (peek == lexer->end_ptr) {
break;
}
peek++;
}
if (peek != lexer->cur_ptr) {
// To TOKEN_FLUSH
lexer->cur_ptr = peek;
token->type = TOKEN_FLUSH;
}
enum TokenType tok = TOKEN_INIT;
struct TokenConstant constant;
constant.have = 0;
// once step
switch (*peek++) {
case '=':
switch (*peek++) {
case '=': tok = TOKEN_EQ; break;
default: peek--, tok = TOKEN_ASSIGN; break;
} break;
case '+':
switch (*peek++) {
case '+': tok = TOKEN_ADD_ADD; break;
case '=': tok = TOKEN_ASSIGN_ADD; break;
default: peek--, tok = TOKEN_ADD; break;
} break;
case '-':
switch (*peek++) {
case '-': tok = TOKEN_SUB_SUB; break;
case '=': tok = TOKEN_ASSIGN_SUB; break;
case '>': tok = TOKEN_DEREF; break;
default: peek--, tok = TOKEN_SUB; break;
} break;
case '*':
switch (*peek++) {
case '=': tok = TOKEN_ASSIGN_MUL; break;
default: peek--, tok = TOKEN_MUL; break;
} break;
case '/':
switch (*peek++) {
case '=': tok = TOKEN_ASSIGN_DIV; break;
case '/': {
// need get a new line to parse
goto_newline(lexer);
tok = TOKEN_LINE_COMMENT;
goto END;
}
case '*': {
lexer->cur_ptr = peek;
goto_block_comment(lexer);
tok = TOKEN_BLOCK_COMMENT;
goto END;
}
default: peek--, tok = TOKEN_DIV; break;
} break;
case '%':
switch (*peek++) {
case '=': tok = TOKEN_ASSIGN_MOD; break;
default: peek--, tok = TOKEN_MOD; break;
} break;
case '&':
switch (*peek++) {
case '&': tok = TOKEN_AND_AND; break;
case '=': tok = TOKEN_ASSIGN_AND; break;
default: peek--, tok = TOKEN_AND; break;
} break;
case '|':
switch (*peek++) {
case '|': tok = TOKEN_OR_OR; break;
case '=': tok = TOKEN_ASSIGN_OR; break;
default: peek--, tok = TOKEN_OR; break;
} break;
case '^':
switch (*peek++) {
case '=': tok = TOKEN_ASSIGN_XOR; break;
default: peek--, tok = TOKEN_XOR; break;
} break;
case '<':
switch (*peek++) {
case '=': tok = TOKEN_LE; break;
case '<': tok = (*peek == '=') ? (peek++, TOKEN_ASSIGN_L_SH) : TOKEN_L_SH; break;
default: peek--, tok = TOKEN_LT; break;
} break;
case '>':
switch (*peek++) {
case '=': tok = TOKEN_GE; break;
case '>': tok = (*peek == '=') ? (peek++, TOKEN_ASSIGN_R_SH) : TOKEN_R_SH; break;
default: peek--, tok = TOKEN_GT; break;
} break;
case '~':
tok = TOKEN_BIT_NOT; break;
case '!':
switch (*peek++) {
case '=': tok = TOKEN_NEQ; break;
default: peek--, tok = TOKEN_NOT; break;
}
case '[':
tok = TOKEN_L_BRACKET; break;
case ']':
tok = TOKEN_R_BRACKET; break;
case '(':
tok = TOKEN_L_PAREN; break;
case ')':
tok = TOKEN_R_PAREN; break;
case '{':
tok = TOKEN_L_BRACE; break;
case '}':
tok = TOKEN_R_BRACE; break;
case ';':
tok = TOKEN_SEMICOLON; break;
case ',':
tok = TOKEN_COMMA; break;
case ':':
tok = TOKEN_COLON; break;
case '.':
if (peek[0] == '.' && peek[1] == '.') {
peek += 2;
tok = TOKEN_ELLIPSIS;
} else {
tok = TOKEN_DOT;
}
break;
case '?':
tok = TOKEN_COND; break;
case '\v': case '\r': case '\f': // FIXME it parse as a blank character
tok = TOKEN_FLUSH; break;
case '\n':
// you need to flush a newline or blank
lexer->line++;
tok = TOKEN_FLUSH; break;
case '#':
warn("TODO: #define\n");
goto_newline(lexer);
tok = TOKEN_FLUSH;
goto END;
case '\0':
// EOF
tok = TOKEN_EOF;
goto END;
case '\'':
return parse_char_literal(lexer, token);
return;
case '"':
return parse_string_literal(lexer, token);
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
return parse_number(lexer, token);
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':case 'Y': case 'Z':
case '_':
// TOKEN_IDENT
if (*peek == 'L' && *peek == '\'' || *peek == 'L' && *peek == '"') {
error("unsupport wide-character char literal by `L` format");
}
while (1) {
if (peek == lexer->end_ptr) {
error("unsupport outof 64 length identifier");
}
if ((*peek >= 'a' && *peek <= 'z') || (*peek >= 'A' && *peek <= 'Z') ||
(*peek == '_') || (*peek >= '0' && *peek <= '9')) {
peek++;
continue;
}
break;
}
int res = keyword_cmp(lexer->cur_ptr, peek - (lexer->cur_ptr));
if (res == -1) {
int strlen = peek - lexer->cur_ptr;
unsigned char* str = xmalloc(strlen + 1);
constant.have = 1;
constant.str = str;
for (int i = 0; i < strlen; i++) {
str[i] = lexer->cur_ptr[i];
}
str[strlen] = '\0';
constant.have = 1;
constant.str = str;
tok = TOKEN_IDENT; break;
} else {
tok = keywords[res].tok; break;
}
default:
error("unsupport char in sourse code `%c`", *(lexer->cur_ptr));
break;
}
lexer->cur_ptr = peek;
END:
token->constant = constant;
token->type = tok;
}
// get_token maybe got invalid (with parser)
void get_valid_token(struct Lexer* lexer, struct Token* token) {
enum TokenType type;
do {
get_token(lexer, token);
type = token->type;
} while (type == TOKEN_FLUSH || type == TOKEN_LINE_COMMENT || type == TOKEN_BLOCK_COMMENT);
}
// 生成字符串映射(根据需求选择#str或#name
static const char* token_strings[] = {
// 普通token使用#str
#define X(str, tok) [tok] = #str,
TOKEN_TABLE
#undef X
// 关键字使用#name
#define X(name, std, tok) [tok] = #name,
KEYWORD_TABLE
#undef X
};
const char* get_token_name(enum TokenType type) {
return token_strings[type];
}

View File

@ -0,0 +1,40 @@
#ifndef __LEXER_H__
#define __LEXER_H__
#include "token.h"
#define LEXER_MAX_TOKEN_SIZE 63
#define LEXER_BUFFER_SIZE 4095
typedef int (*lexer_sread_fn)(void *dst_buf, int dst_size,
int elem_size, int count, void *stream);
struct Lexer {
int line;
int index;
// const char current_file_name[LEXER_BUFFER_SIZE+1];
unsigned char* cur_ptr; // 当前扫描的字符,但是还没有开始扫描
unsigned char* end_ptr; // 缓冲区最后一个字符的下一个位置
char buffer[LEXER_BUFFER_SIZE+1];
lexer_sread_fn sread;
void* stream;
};
struct Token {
enum TokenType type;
struct TokenConstant constant;
};
void init_lexer(struct Lexer* lexer, const char* file_name, void* stream,
lexer_sread_fn sread);
//
void get_token(struct Lexer* lexer, struct Token* token);
// get_token maybe got invalid (with parser as TOKEN_FLUSH)
void get_valid_token(struct Lexer* lexer, struct Token* token);
const char* get_token_name(enum TokenType token);
#endif

View File

@ -0,0 +1,46 @@
#include "../lexer.h"
#include <stdio.h>
// gcc -g ../lexer.c test_lexer.c -o test_lexer
/*
struct TokenConstant {
int have;
union {
char ch;
int i;
float f;
double d;
long long ll;
char* str;
};
};
*/
int g_num;
int g_num_arr[3];
int main(int argc, char* argv[]) {
int num = 0;
const char* file_name = "test_lexer.c";
if (argc == 2) {
file_name = argv[1];
}
FILE* fp = fopen(file_name, "r");
if (fp == NULL) {
perror("open file failed");
return 1;
}
printf("open file success\n");
struct Lexer lexer;
init_lexer(&lexer, "test_lexter.c", fp, (lexer_sread_fn)fread_s);
struct Token tok;
while (1) {
get_valid_token(&lexer, &tok);
if (tok.type == TOKEN_EOF) {
break;
}
printf("line: %d, column: %d, type: %3d, typename: %s\n",
lexer.line, lexer.index, tok.type, get_token_name(tok.type));
}
}

View File

@ -0,0 +1,250 @@
#ifndef __TOKEN_H__
#define __TOKEN_H__
enum CSTD_KEYWORD {
CSTD_C89,
CSTD_C99,
CEXT_ASM,
};
// Using Binary Search To Fast Find Keyword
#define KEYWORD_TABLE \
X(asm , CEXT_ASM, TOKEN_ASM) \
X(break , CSTD_C89, TOKEN_BREAK) \
X(case , CSTD_C89, TOKEN_CASE) \
X(char , CSTD_C89, TOKEN_CHAR) \
X(const , CSTD_C89, TOKEN_CONST) \
X(continue , CSTD_C89, TOKEN_CONTINUE) \
X(default , CSTD_C89, TOKEN_DEFAULT) \
X(do , CSTD_C89, TOKEN_DO) \
X(double , CSTD_C89, TOKEN_DOUBLE) \
X(else , CSTD_C89, TOKEN_ELSE) \
X(enum , CSTD_C89, TOKEN_ENUM) \
X(extern , CSTD_C89, TOKEN_EXTERN) \
X(float , CSTD_C89, TOKEN_FLOAT) \
X(for , CSTD_C89, TOKEN_FOR) \
X(goto , CSTD_C89, TOKEN_GOTO) \
X(if , CSTD_C89, TOKEN_IF) \
X(inline , CSTD_C99, TOKEN_INLINE) \
X(int , CSTD_C89, TOKEN_INT) \
X(long , CSTD_C89, TOKEN_LONG) \
X(register , CSTD_C89, TOKEN_REGISTER) \
X(restrict , CSTD_C99, TOKEN_RESTRICT) \
X(return , CSTD_C89, TOKEN_RETURN) \
X(short , CSTD_C89, TOKEN_SHORT) \
X(signed , CSTD_C89, TOKEN_SIGNED) \
X(sizeof , CSTD_C89, TOKEN_SIZEOF) \
X(static , CSTD_C89, TOKEN_STATIC) \
X(struct , CSTD_C89, TOKEN_STRUCT) \
X(switch , CSTD_C89, TOKEN_SWITCH) \
X(typedef , CSTD_C89, TOKEN_TYPEDEF) \
X(union , CSTD_C89, TOKEN_UNION) \
X(unsigned , CSTD_C89, TOKEN_UNSIGNED) \
X(void , CSTD_C89, TOKEN_VOID) \
X(volatile , CSTD_C89, TOKEN_VOLATILE) \
X(while , CSTD_C89, TOKEN_WHILE) \
// KEYWORD_TABLE
#define TOKEN_TABLE \
X(EOF , TOKEN_EOF) \
X(init , TOKEN_INIT) \
X(flush , TOKEN_FLUSH) \
X("==" , TOKEN_EQ) \
X("=" , TOKEN_ASSIGN) \
X("++" , TOKEN_ADD_ADD) \
X("+=" , TOKEN_ASSIGN_ADD) \
X("+" , TOKEN_ADD) \
X("--" , TOKEN_SUB_SUB) \
X("-=" , TOKEN_ASSIGN_SUB) \
X("->" , TOKEN_DEREF) \
X("-" , TOKEN_SUB) \
X("*=" , TOKEN_ASSIGN_MUL) \
X("*" , TOKEN_MUL) \
X("/=" , TOKEN_ASSIGN_DIV) \
X("/" , TOKEN_DIV) \
X("//" , TOKEN_LINE_COMMENT) \
X("/* */" , TOKEN_BLOCK_COMMENT) \
X("%=" , TOKEN_ASSIGN_MOD) \
X("%" , TOKEN_MOD) \
X("&&" , TOKEN_AND_AND) \
X("&=" , TOKEN_ASSIGN_AND) \
X("&" , TOKEN_AND) \
X("||" , TOKEN_OR_OR) \
X("|=" , TOKEN_ASSIGN_OR) \
X("|" , TOKEN_OR) \
X("^=" , TOKEN_ASSIGN_XOR) \
X("^" , TOKEN_XOR) \
X("<<=" , TOKEN_ASSIGN_L_SH) \
X("<<" , TOKEN_L_SH) \
X("<=" , TOKEN_LE) \
X("<" , TOKEN_LT) \
X(">>=" , TOKEN_ASSIGN_R_SH) \
X(">>" , TOKEN_R_SH) \
X(">=" , TOKEN_GE) \
X(">" , TOKEN_GT) \
X("!" , TOKEN_NOT) \
X("!=" , TOKEN_NEQ) \
X("~" , TOKEN_BIT_NOT) \
X("[" , TOKEN_L_BRACKET) \
X("]" , TOKEN_R_BRACKET) \
X("(" , TOKEN_L_PAREN) \
X(")" , TOKEN_R_PAREN) \
X("{" , TOKEN_L_BRACE) \
X("}" , TOKEN_R_BRACE) \
X(";" , TOKEN_SEMICOLON) \
X("," , TOKEN_COMMA) \
X(":" , TOKEN_COLON) \
X("." , TOKEN_DOT) \
X("..." , TOKEN_ELLIPSIS) \
X("?" , TOKEN_COND) \
X(identifier , TOKEN_IDENT) \
X(int_literal , TOKEN_INT_LITERAL) \
X(float_literal , TOKEN_FLOAT_LITERAL) \
X(char_literal , TOKEN_CHAR_LITERAL) \
X(string_literal , TOKEN_STRING_LITERAL) \
// END
// 定义TokenType枚举
enum TokenType {
// 处理普通token
#define X(str, tok) tok,
TOKEN_TABLE
#undef X
// 处理关键字(保持原有格式)
#define X(name, std, tok) tok,
KEYWORD_TABLE
#undef X
};
struct TokenConstant {
int have;
union {
char ch;
int i;
float f;
double d;
long long ll;
char* str;
};
};
// "break"
// "case"
// "char"
// "const"
// "continue"
// "default"
// "do"
// "double"
// "else"
// "enum"
// "extern"
// "float"
// "for"
// "goto"
// "if"
// "inline (C99)"
// "int"
// "long"
// "register"
// "restrict (C99)"
// "return"
// "short"
// "signed"
// "sizeof"
// "static"
// "struct"
// "switch"
// "typedef"
// "union"
// "unsigned"
// "void"
// "volatile"
// "while"
// alignas (C23)
// alignof (C23)
// auto
// bool (C23)
// constexpr (C23)
// false (C23)
// nullptr (C23)
// static_assert (C23)
// thread_local (C23)
// true (C23)
// typeof (C23)
// typeof_unqual (C23)
// _Alignas (C11)
// _Alignof (C11)
// _Atomic (C11)
// _BitInt (C23)
// _Bool (C99)
// _Complex (C99)
// _Decimal128 (C23)
// _Decimal32 (C23)
// _Decimal64 (C23)
// _Generic (C11)
// _Imaginary (C99)
// _Noreturn (C11)
// _Static_assert (C11)
// _Thread_local (C11)
// a = b
// a += b
// a -= b
// a *= b
// a /= b
// a %= b
// a &= b
// a |= b
// a ^= b
// a <<= b
// a >>= b
// ++a
// --a
// a++
// a--
// +a
// -a
// a + b
// a - b
// a * b
// a / b
// a % b
// ~a
// a & b
// a | b
// a ^ b
// a << b
// a >> b
// !a
// a && b
// a || b
// a == b
// a != b
// a < b
// a > b
// a <= b
// a >= b
// a[b]
// *a
// &a
// a->b
// a.b
// a(...)
// a, b
// (type) a
// a ? b : c
// sizeof
// _Alignof
// (C11)
#endif

View File

@ -0,0 +1,18 @@
- ast.c 作为抽象语法树的定义
- block.c 作为块的实现主要用于处理作用域,需要符号表
- decl.c 作为声明的实现,其中主要携带变量声明,函数声明见 func.c ,需要符号表
- func.c 作为函数的实现,其中主要携带函数声明,以及函数定义,需要符号表
- expr.c 作为表达式的实现。需要符号表
- stmt.c 作为语句的实现。需要表达式类型判断合法性
- term.c 作为终结符的实现。需要表达式类型判断合法性
- program.c 作为词法分析语义分析入口函数可以根据parser结构生成AST
其中stmt参考cppreference
其中expr参考AI以及CParser

View File

@ -0,0 +1,173 @@
#include "ast.h"
#include "../parser.h"
struct ASTNode* new_ast_node(void) {
struct ASTNode* node = xmalloc(sizeof(struct ASTNode));
init_ast_node(node);
return node;
}
void init_ast_node(struct ASTNode* node) {
node->type = NT_INIT;
for (int i = 0; i < sizeof(node->children) / sizeof(node->children[0]); i++) {
node->children[i] = NULL;
}
}
struct ASTNode* find_ast_node(struct ASTNode* node, enum ASTType type) {
}
#include <stdio.h>
static void pnt_depth(int depth) {
for (int i = 0; i < depth; i++) {
printf(" ");
}
}
void pnt_ast(struct ASTNode* node, int depth) {
if (!node) return;
pnt_depth(depth);
switch (node->type) {
case NT_ROOT:
for (int i = 0; i < node->root.child_size; i++) {
pnt_ast(node->root.children[i], depth);
}
return;
case NT_ADD : printf("+ \n"); break; // (expr) + (expr)
case NT_SUB : printf("- \n"); break; // (expr) - (expr)
case NT_MUL : printf("* \n"); break; // (expr) * (expr)
case NT_DIV : printf("/ \n"); break; // (expr) / (expr)
case NT_MOD : printf("%%\n"); break; // (expr) % (expr)
case NT_AND : printf("& \n"); break; // (expr) & (expr)
case NT_OR : printf("| \n"); break; // (expr) | (expr)
case NT_XOR : printf("^ \n"); break; // (expr) ^ (expr)
case NT_L_SH : printf("<<\n"); break; // (expr) << (expr)
case NT_R_SH : printf(">>\n"); break; // (expr) >> (expr)
case NT_EQ : printf("==\n"); break; // (expr) == (expr)
case NT_NEQ : printf("!=\n"); break; // (expr) != (expr)
case NT_LE : printf("<=\n"); break; // (expr) <= (expr)
case NT_GE : printf(">=\n"); break; // (expr) >= (expr)
case NT_LT : printf("< \n"); break; // (expr) < (expr)
case NT_GT : printf("> \n"); break; // (expr) > (expr)
case NT_AND_AND : printf("&&\n"); break; // (expr) && (expr)
case NT_OR_OR : printf("||\n"); break; // (expr) || (expr)
case NT_NOT : printf("! \n"); break; // ! (expr)
case NT_BIT_NOT : printf("~ \n"); break; // ~ (expr)
case NT_COMMA : printf(", \n"); break; // expr, expr 逗号运算符
case NT_ASSIGN : printf("= \n"); break; // (expr) = (expr)
// case NT_COND : // (expr) ? (expr) : (expr)
case NT_STMT_EMPTY : // ;
printf(";\n");
break;
case NT_STMT_IF : // if (cond) { ... } [else {...}]
printf("if");
pnt_ast(node->if_stmt.cond, depth+1);
pnt_ast(node->if_stmt.if_stmt, depth+1);
if (node->if_stmt.else_stmt) {
pnt_depth(depth);
printf("else");
pnt_ast(node->if_stmt.else_stmt, depth+1);
}
break;
case NT_STMT_WHILE : // while (cond) { ... }
printf("while\n");
pnt_ast(node->while_stmt.cond, depth+1);
pnt_ast(node->while_stmt.body, depth+1);
break;
case NT_STMT_DOWHILE : // do {...} while (cond)
printf("do-while\n");
pnt_ast(node->do_while_stmt.body, depth+1);
pnt_ast(node->do_while_stmt.cond, depth+1);
break;
case NT_STMT_FOR : // for (init; cond; iter) {...}
printf("for\n");
if (node->for_stmt.init)
pnt_ast(node->for_stmt.init, depth+1);
if (node->for_stmt.cond)
pnt_ast(node->for_stmt.cond, depth+1);
if (node->for_stmt.iter)
pnt_ast(node->for_stmt.iter, depth+1);
pnt_ast(node->for_stmt.body, depth+1);
break;
case NT_STMT_SWITCH : // switch (expr) { case ... }
case NT_STMT_BREAK : // break;
case NT_STMT_CONTINUE : // continue;
case NT_STMT_GOTO : // goto label;
case NT_STMT_CASE : // case const_expr:
case NT_STMT_DEFAULT : // default:
case NT_STMT_LABEL : // label:
break;
case NT_STMT_BLOCK : // { ... }
printf("{\n");
for (int i = 0; i < node->block.child_size; i++) {
pnt_ast(node->block.children[i], depth+1);
}
pnt_depth(depth);
printf("}\n");
break;
case NT_STMT_RETURN : // return expr;
printf("return");
if (node->return_stmt.expr_stmt) {
printf(" ");
pnt_ast(node->return_stmt.expr_stmt, depth+1);
} else {
printf("\n");
}
break;
case NT_STMT_EXPR : // expr;
printf("stmt\n");
pnt_ast(node->expr_stmt.expr_stmt, depth);
pnt_depth(depth);
printf(";\n");
break;
case NT_DECL_VAR : // type name; or type name = expr;
printf("decl_val\n");
break;
case NT_DECL_FUNC: // type func_name(param_list);
printf("decl func %s\n", node->func.name->syms.tok.constant.str);
break;
case NT_FUNC : // type func_name(param_list) {...}
printf("def func %s\n", node->func.name->syms.tok.constant.str);
// pnt_ast(node->child.func.params, depth);
pnt_ast(node->func.body, depth);
// pnt_ast(node->child.func.ret, depth);
break;
case NT_PARAM : // 函数形参
printf("param\n");
case NT_ARG_LIST : // 实参列表需要与NT_CALL配合
printf("arg_list\n");
case NT_TERM_CALL : // func (expr)
printf("call\n");
break;
case NT_TERM_IDENT:
printf("%s\n", node->syms.tok.constant.str);
break;
case NT_TERM_VAL : // Terminal Symbols like constant, identifier, keyword
struct Token * tok = &node->syms.tok;
switch (tok->type) {
case TOKEN_CHAR_LITERAL:
printf("%c\n", tok->constant.ch);
break;
case TOKEN_INT_LITERAL:
printf("%d\n", tok->constant.i);
break;
case TOKEN_STRING_LITERAL:
printf("%s\n", tok->constant.str);
break;
default:
printf("unknown term val\n");
break;
}
default:
break;
}
// 通用子节点递归处理
if (node->type <= NT_ASSIGN) { // 表达式类统一处理子节点
if (node->expr.left) pnt_ast(node->expr.left, depth+1);
if (node->expr.right) pnt_ast(node->expr.right, depth + 1);
}
}

View File

@ -0,0 +1,191 @@
#ifndef __AST_H__
#define __AST_H__
#include "../../frontend.h"
#include "../../lexer/lexer.h"
#include "../type.h"
enum ASTType {
NT_INIT,
NT_ROOT, // global scope in root node
NT_ADD, // (expr) + (expr)
NT_SUB, // (expr) - (expr)
NT_MUL, // (expr) * (expr)
NT_DIV, // (expr) / (expr)
NT_MOD, // (expr) % (expr)
NT_AND, // (expr) & (expr)
NT_OR, // (expr) | (expr)
NT_XOR, // (expr) ^ (expr)
NT_L_SH, // (expr) << (expr)
NT_R_SH, // (expr) >> (expr)
NT_EQ, // (expr) == (expr)
NT_NEQ, // (expr) != (expr)
NT_LE, // (expr) <= (expr)
NT_GE, // (expr) >= (expr)
NT_LT, // (expr) < (expr)
NT_GT, // (expr) > (expr)
NT_AND_AND, // (expr) && (expr)
NT_OR_OR, // (expr) || (expr)
NT_NOT, // ! (expr)
NT_BIT_NOT, // ~ (expr)
NT_COND, // (expr) ? (expr) : (expr)
NT_COMMA, // expr, expr 逗号运算符
NT_ASSIGN, // (expr) = (expr)
NT_ADDRESS, // &expr (取地址)
NT_DEREF, // *expr (解引用)
NT_INDEX, // arr[index] (数组访问)
NT_MEMBER, // struct.member
NT_PTR_MEMBER,// ptr->member
NT_CAST, // (type)expr 强制类型转换
NT_SIZEOF, // sizeof(type|expr)
// NT_ALIGNOF, // _Alignof(type) (C11)
NT_STMT_EMPTY, // ;
NT_STMT_IF, // if (cond) { ... } [else {...}]
NT_STMT_WHILE, // while (cond) { ... }
NT_STMT_DOWHILE, // do {...} while (cond)
NT_STMT_FOR, // for (init; cond; iter) {...}
NT_STMT_SWITCH, // switch (expr) { case ... }
NT_STMT_BREAK, // break;
NT_STMT_CONTINUE, // continue;
NT_STMT_GOTO, // goto label;
NT_STMT_CASE, // case const_expr:
NT_STMT_DEFAULT, // default:
NT_STMT_LABEL, // label:
NT_STMT_BLOCK, // { ... }
NT_STMT_RETURN, // return expr;
NT_STMT_EXPR, // expr;
NT_BLOCK,
// NT_TYPE_BASE, // 基础类型节点
// NT_TYPE_PTR, // 指针类型
// NT_TYPE_ARRAY, // 数组类型
// NT_TYPE_FUNC, // 函数类型
// NT_TYPE_QUAL, // 限定符节点
NT_DECL_VAR, // type name; or type name = expr;
NT_DECL_FUNC, // type func_name(param_list);
NT_FUNC, // type func_name(param_list) {...}
NT_PARAM, // 函数形参
NT_ARG_LIST, // 实参列表需要与NT_CALL配合
NT_TERM_CALL, // func (expr)
NT_TERM_VAL,
NT_TERM_IDENT,
NT_TERM_TYPE,
};
struct ASTNode {
enum ASTType type;
union {
void *children[6];
struct {
struct ASTNode** children;
int child_size;
} root;
struct {
struct ASTNode** children; // array of children
int child_size;
} block;
struct {
struct ASTNode* decl_node;
struct Token tok;
} syms;
struct {
struct ASTNode *arr;
int size;
} params;
struct {
const char* name;
struct ASTNode* params;
struct ASTNode* func_decl;
} call;
struct {
struct ASTNode *type;
struct ASTNode *name;
struct ASTNode *expr_stmt; // optional
void* data;
} decl_val;
struct {
struct ASTNode *ret;
struct ASTNode *name;
struct ASTNode *params; // array of params
void* data;
} func_decl;
struct {
struct ASTNode *ret;
struct ASTNode *name;
struct ASTNode *params; // array of params
struct ASTNode *body; // optional
} func;
struct {
struct ASTNode *left;
struct ASTNode *right;
struct ASTNode *optional; // optional
} expr;
struct {
struct ASTNode *cond;
struct ASTNode *if_stmt;
struct ASTNode *else_stmt; // optional
} if_stmt;
struct {
struct ASTNode *cond;
struct ASTNode *body;
} switch_stmt;
struct {
struct ASTNode *cond;
struct ASTNode *body;
} while_stmt;
struct {
struct ASTNode *body;
struct ASTNode *cond;
} do_while_stmt;
struct {
struct ASTNode *init;
struct ASTNode *cond; // optional
struct ASTNode *iter; // optional
struct ASTNode *body;
} for_stmt;
struct {
struct ASTNode *expr_stmt; // optional
} return_stmt;
struct {
struct ASTNode *label;
} goto_stmt;
struct {
struct ASTNode *label;
} label_stmt;
struct {
struct ASTNode *block;
} block_stmt;
struct {
struct ASTNode *expr_stmt;
} expr_stmt;
};
};
struct ASTNode* new_ast_node(void);
void init_ast_node(struct ASTNode* node);
void pnt_ast(struct ASTNode* node, int depth);
struct Parser;
typedef struct ASTNode* (*parse_func_t) (struct Parser*);
void parse_prog(struct Parser* parser);
struct ASTNode* parse_block(struct Parser* parser);
struct ASTNode* parse_stmt(struct Parser* parser);
struct ASTNode* parse_expr(struct Parser* parser);
struct ASTNode* parse_func(struct Parser* parser);
struct ASTNode* parse_decl(struct Parser* parser);
struct ASTNode* parse_ident(struct Parser* parser);
struct ASTNode* parse_type(struct Parser* parser);
int peek_decl(struct Parser* parser);
struct ASTNode* parser_ident_without_pop(struct Parser* parser);
#endif

View File

@ -0,0 +1,50 @@
#include "../parser.h"
#include "ast.h"
#include "../symtab/symtab.h"
#ifndef BLOCK_MAX_NODE
#define BLOCK_MAX_NODE (1024)
#endif
struct ASTNode* parse_block(struct Parser* parser) {
symtab_enter_scope(parser->symtab);
// parse_decl(parser); // decl_var
enum TokenType ttype;
struct ASTNode* node = new_ast_node();
node->type = NT_BLOCK;
flushpeektok(parser);
ttype = peektoktype(parser);
if (ttype != TOKEN_L_BRACE) {
error("block need '{' start");
}
poptok(parser);
node->block.children = malloc(sizeof(struct ASTNode*) * BLOCK_MAX_NODE);
struct ASTNode* child = NULL;
while (1) {
if (peek_decl(parser) == 1) {
child = parse_decl(parser);
goto ADD_CHILD;
}
flushpeektok(parser);
ttype = peektoktype(parser);
switch (ttype) {
case TOKEN_R_BRACE:
poptok(parser);
goto END;
default:
child = parse_stmt(parser);
goto ADD_CHILD;
break;
}
continue;
ADD_CHILD:
node->block.children[node->block.child_size++] = child;
}
END:
symtab_leave_scope(parser->symtab);
return node;
}

View File

@ -0,0 +1,94 @@
#include "../parser.h"
#include "ast.h"
#include "../symtab/symtab.h"
/**
* 0 false
* 1 true
*/
int peek_decl(struct Parser* parser) {
flushpeektok(parser);
switch (peektoktype(parser)) {
case TOKEN_STATIC:
case TOKEN_EXTERN:
case TOKEN_REGISTER:
case TOKEN_TYPEDEF:
error("not impliment");
break;
default:
flushpeektok(parser);
}
switch (peektoktype(parser)) {
case TOKEN_VOID:
case TOKEN_CHAR:
case TOKEN_SHORT:
case TOKEN_INT:
case TOKEN_LONG:
case TOKEN_FLOAT:
case TOKEN_DOUBLE:
return 1;
default:
flushpeektok(parser);
}
}
struct ASTNode* parse_decl_val(struct Parser* parser) {
flushpeektok(parser);
// parse_type
enum TokenType ttype;
struct ASTNode* node;
struct ASTNode* type_node = parse_type(parser);
struct ASTNode* name_node = parser_ident_without_pop(parser);
node = new_ast_node();
node->decl_val.type = type_node;
node->decl_val.name = name_node;
node->type = NT_DECL_VAR;
symtab_add_symbol(parser->symtab, name_node->syms.tok.constant.str, node);
ttype = peektoktype(parser);
if (ttype == TOKEN_ASSIGN) {
node->decl_val.expr_stmt = parse_stmt(parser);
if (node->decl_val.expr_stmt->type != NT_STMT_EXPR) {
error("parser_decl_val want stmt_expr");
}
} else if (ttype == TOKEN_SEMICOLON) {
poptok(parser);
expecttok(parser, TOKEN_SEMICOLON);
} else {
error("parser_decl_val syntax error");
}
return node;
}
// 类型解析入口改进
struct ASTNode* parse_decl(struct Parser* parser) {
flushpeektok(parser);
int idx;
enum TokenType ttype;
struct ASTNode* node;
if (peek_decl(parser) == 0) {
error("syntax error expect decl_val TYPE");
}
if (peektoktype(parser) != TOKEN_IDENT) {
error("syntax error expect decl_val IDENT");
}
ttype = peektoktype(parser);
switch (ttype) {
case TOKEN_L_PAREN: // (
node = parse_func(parser);
break;
case TOKEN_ASSIGN:
case TOKEN_SEMICOLON:
node = parse_decl_val(parser);
break;
default:
error("syntax error expect decl_val ASSIGN or SEMICOLON");
return NULL;
}
return node;
}

View File

@ -0,0 +1,409 @@
#include "../parser.h"
#include "ast.h"
#include "../symtab/symtab.h"
// Copy from `CParse`
/**
* Operator precedence classes
*/
enum Precedence {
PREC_BOTTOM,
PREC_EXPRESSION, /* , left to right */
PREC_ASSIGNMENT, /* = += -= *= /= %= <<= >>= &= ^= |= right to left */
PREC_CONDITIONAL, /* ?: right to left */
PREC_LOGICAL_OR, /* || left to right */
PREC_LOGICAL_AND, /* && left to right */
PREC_OR, /* | left to right */
PREC_XOR, /* ^ left to right */
PREC_AND, /* & left to right */
PREC_EQUALITY, /* == != left to right */
PREC_RELATIONAL, /* < <= > >= left to right */
PREC_SHIFT, /* << >> left to right */
PREC_ADDITIVE, /* + - left to right */
PREC_MULTIPLICATIVE, /* * / % left to right */
PREC_CAST, /* (type) right to left */
PREC_UNARY, /* ! ~ ++ -- + - * & sizeof right to left */
PREC_POSTFIX, /* () [] -> . left to right */
PREC_PRIMARY,
PREC_TOP
};
enum ParseType {
INFIX_PARSER,
PREFIX_PARSER,
};
static struct ASTNode *parse_subexpression(struct Parser* parser, enum Precedence prec);
static struct ASTNode* gen_node2(struct ASTNode* left, struct ASTNode* right,
enum ASTType type) {
struct ASTNode* node = new_ast_node();
node->type = type;
node->expr.left = left;
node->expr.right = right;
// switch (type) {
// case NT_ADD : printf("+ \n"); break; // (expr) + (expr)
// case NT_SUB : printf("- \n"); break; // (expr) - (expr)
// case NT_MUL : printf("* \n"); break; // (expr) * (expr)
// case NT_DIV : printf("/ \n"); break; // (expr) / (expr)
// case NT_MOD : printf("%%\n"); break; // (expr) % (expr)
// case NT_AND : printf("& \n"); break; // (expr) & (expr)
// case NT_OR : printf("| \n"); break; // (expr) | (expr)
// case NT_XOR : printf("^ \n"); break; // (expr) ^ (expr)
// case NT_L_SH : printf("<<\n"); break; // (expr) << (expr)
// case NT_R_SH : printf(">>\n"); break; // (expr) >> (expr)
// case NT_EQ : printf("==\n"); break; // (expr) == (expr)
// case NT_NEQ : printf("!=\n"); break; // (expr) != (expr)
// case NT_LE : printf("<=\n"); break; // (expr) <= (expr)
// case NT_GE : printf(">=\n"); break; // (expr) >= (expr)
// case NT_LT : printf("< \n"); break; // (expr) < (expr)
// case NT_GT : printf("> \n"); break; // (expr) > (expr)
// case NT_AND_AND : printf("&&\n"); break; // (expr) && (expr)
// case NT_OR_OR : printf("||\n"); break; // (expr) || (expr)
// case NT_NOT : printf("! \n"); break; // ! (expr)
// case NT_BIT_NOT : printf("~ \n"); break; // ~ (expr)
// case NT_COMMA : printf(", \n"); break; // expr, expr 逗号运算符
// case NT_ASSIGN : printf("= \n"); break; // (expr) = (expr)
// // case NT_COND : // (expr) ? (expr) : (expr)
// }
}
static struct ASTNode* parse_comma(struct Parser* parser, struct ASTNode* left) {
struct ASTNode* node = new_ast_node();
node->type = NT_COMMA;
node->expr.left = left;
node->expr.right = parse_subexpression(parser, PREC_EXPRESSION);
}
static struct ASTNode* parse_assign(struct Parser* parser, struct ASTNode* left) {
flushpeektok(parser);
enum TokenType ttype = peektoktype(parser);
poptok(parser);
struct ASTNode* node = new_ast_node();
node->type = NT_ASSIGN;
// saved left
node->expr.left = left;
enum Precedence next = PREC_ASSIGNMENT + 1;
switch (ttype) {
case TOKEN_ASSIGN :
left = parse_subexpression(parser, next);
break;
case TOKEN_ASSIGN_ADD :
left = gen_node2(left, parse_subexpression(parser, next), NT_ADD);
break;
case TOKEN_ASSIGN_SUB :
left = gen_node2(left, parse_subexpression(parser, next), NT_SUB);
break;
case TOKEN_ASSIGN_MUL :
left = gen_node2(left, parse_subexpression(parser, next), NT_MUL);
break;
case TOKEN_ASSIGN_DIV :
left = gen_node2(left, parse_subexpression(parser, next), NT_DIV);
break;
case TOKEN_ASSIGN_MOD :
left = gen_node2(left, parse_subexpression(parser, next), NT_MOD);
break;
case TOKEN_ASSIGN_L_SH :
left = gen_node2(left, parse_subexpression(parser, next), NT_L_SH);
break;
case TOKEN_ASSIGN_R_SH :
left = gen_node2(left, parse_subexpression(parser, next), NT_R_SH);
break;
case TOKEN_ASSIGN_AND :
left = gen_node2(left, parse_subexpression(parser, next), NT_AND);
break;
case TOKEN_ASSIGN_OR :
left = gen_node2(left, parse_subexpression(parser, next), NT_OR);
break;
case TOKEN_ASSIGN_XOR :
left = gen_node2(left, parse_subexpression(parser, next), NT_XOR);
break;
default:
error("unsupported operator");
break;
}
node->expr.right = left;
}
static struct ASTNode* parse_cmp(struct Parser* parser, struct ASTNode* left) {
flushpeektok(parser);
enum TokenType ttype = peektoktype(parser);
poptok(parser);
struct ASTNode* node = new_ast_node();
// saved left
node->expr.left = left;
switch (ttype) {
case TOKEN_EQ:
node->type = NT_EQ;
node->expr.right = parse_subexpression(parser, PREC_EQUALITY);
break;
case TOKEN_NEQ:
node->type = NT_NEQ;
node->expr.right = parse_subexpression(parser, PREC_EQUALITY);
break;
case TOKEN_LT:
node->type = NT_LT;
node->expr.right = parse_subexpression(parser, PREC_RELATIONAL);
break;
case TOKEN_GT:
node->type = NT_GT;
node->expr.right = parse_subexpression(parser, PREC_RELATIONAL);
break;
case TOKEN_LE:
node->type = NT_LE;
node->expr.right = parse_subexpression(parser, PREC_RELATIONAL);
break;
case TOKEN_GE:
node->type = NT_GE;
node->expr.right = parse_subexpression(parser, PREC_RELATIONAL);
break;
default:
error("invalid operator");
}
}
static struct ASTNode* parse_cal(struct Parser* parser, struct ASTNode* left) {
flushpeektok(parser);
enum TokenType ttype = peektoktype(parser);
poptok(parser);
struct ASTNode* node = new_ast_node();
node->expr.left = left;
switch (ttype) {
case TOKEN_OR_OR:
node->type = NT_OR_OR;
node->expr.right = parse_subexpression(parser, PREC_LOGICAL_OR);
break;
case TOKEN_AND_AND:
node->type = NT_AND_AND;
node->expr.right = parse_subexpression(parser, PREC_LOGICAL_AND);
break;
case TOKEN_OR:
node->type = NT_OR;
node->expr.right = parse_subexpression(parser, PREC_OR);
break;
case TOKEN_XOR:
node->type = NT_XOR;
node->expr.right = parse_subexpression(parser, PREC_XOR);
break;
case TOKEN_AND:
node->type = NT_AND;
node->expr.right = parse_subexpression(parser, PREC_AND);
break;
case TOKEN_L_SH:
node->type = NT_L_SH;
node->expr.right = parse_subexpression(parser, PREC_SHIFT);
break;
case TOKEN_R_SH:
node->type = NT_R_SH;
node->expr.right = parse_subexpression(parser, PREC_SHIFT);
break;
case TOKEN_ADD:
node->type = NT_ADD;
node->expr.right = parse_subexpression(parser, PREC_ADDITIVE);
break;
case TOKEN_SUB:
node->type = NT_SUB;
node->expr.right = parse_subexpression(parser, PREC_ADDITIVE);
break;
case TOKEN_MUL:
node->type = NT_MUL;
node->expr.right = parse_subexpression(parser, PREC_MULTIPLICATIVE);
break;
case TOKEN_DIV:
node->type = NT_DIV;
node->expr.right = parse_subexpression(parser, PREC_MULTIPLICATIVE);
break;
case TOKEN_MOD:
node->type = NT_MOD;
node->expr.right = parse_subexpression(parser, PREC_MULTIPLICATIVE);
break;
default:
break;
}
return node;
}
// 新增函数调用解析
static struct ASTNode* parse_call(struct Parser* parser, struct ASTNode* ident) {
struct ASTNode* node = new_ast_node();
node->type = NT_TERM_CALL;
poptok(parser); // 跳过 '('
enum TokenType ttype;
// 解析参数列表
while ((ttype = peektoktype(parser)) != TOKEN_R_PAREN) {
// add_arg(node, parse_expr(parser));
if (ttype == TOKEN_COMMA) poptok(parser);
else poptok(parser);
}
poptok(parser); // 跳过 ')'
char* name = ident->syms.tok.constant.str;
void* sym = symtab_lookup_symbol(parser->symtab, name);
if (sym == NULL) {
error("function not decl %s", name);
}
node->call.name = name;
node->call.params = NULL;
node->call.func_decl = sym;
return node;
}
static struct ASTNode* parse_paren(struct Parser* parser, struct ASTNode* left) {
flushpeektok(parser);
enum TokenType ttype;
expecttok(parser, TOKEN_L_PAREN);
left = parse_subexpression(parser, PREC_EXPRESSION);
flushpeektok(parser);
expecttok(parser, TOKEN_R_PAREN);
return left;
}
typedef struct ASTNode* (*parse_expr_fun_t)(struct Parser*, struct ASTNode*);
static struct expr_prec_table_t {
parse_expr_fun_t parser;
enum Precedence prec;
enum ParseType ptype;
} expr_table [256] = {
[TOKEN_COMMA] = {parse_comma, PREC_EXPRESSION, INFIX_PARSER},
[TOKEN_ASSIGN] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
[TOKEN_ASSIGN_ADD] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
[TOKEN_ASSIGN_SUB] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
[TOKEN_ASSIGN_MUL] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
[TOKEN_ASSIGN_DIV] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
[TOKEN_ASSIGN_MOD] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
[TOKEN_ASSIGN_L_SH] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
[TOKEN_ASSIGN_R_SH] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
[TOKEN_ASSIGN_AND] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
[TOKEN_ASSIGN_OR] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
[TOKEN_ASSIGN_XOR] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
[TOKEN_OR_OR] = {parse_cal, PREC_LOGICAL_OR , INFIX_PARSER},
[TOKEN_AND_AND] = {parse_cal, PREC_LOGICAL_AND, INFIX_PARSER},
[TOKEN_OR] = {parse_cal, PREC_OR , INFIX_PARSER},
[TOKEN_XOR] = {parse_cal, PREC_XOR , INFIX_PARSER},
[TOKEN_AND] = {parse_cal, PREC_AND , INFIX_PARSER},
[TOKEN_EQ] = {parse_cmp, PREC_EQUALITY, INFIX_PARSER},
[TOKEN_NEQ] = {parse_cmp, PREC_EQUALITY, INFIX_PARSER},
[TOKEN_LT] = {parse_cmp, PREC_RELATIONAL, INFIX_PARSER},
[TOKEN_LE] = {parse_cmp, PREC_RELATIONAL, INFIX_PARSER},
[TOKEN_GT] = {parse_cmp, PREC_RELATIONAL, INFIX_PARSER},
[TOKEN_GE] = {parse_cmp, PREC_RELATIONAL, INFIX_PARSER},
[TOKEN_L_SH] = {parse_cal, PREC_SHIFT , INFIX_PARSER},
[TOKEN_R_SH] = {parse_cal, PREC_SHIFT , INFIX_PARSER},
[TOKEN_ADD] = {parse_cal, PREC_ADDITIVE , INFIX_PARSER},
[TOKEN_SUB] = {parse_cal, PREC_ADDITIVE , INFIX_PARSER},
[TOKEN_MUL] = {parse_cal, PREC_MULTIPLICATIVE , INFIX_PARSER},
[TOKEN_DIV] = {parse_cal, PREC_MULTIPLICATIVE , INFIX_PARSER},
[TOKEN_MOD] = {parse_cal, PREC_MULTIPLICATIVE , INFIX_PARSER},
[TOKEN_NOT] = {NULL, PREC_UNARY, PREFIX_PARSER},
[TOKEN_BIT_NOT] = {NULL, PREC_UNARY, PREFIX_PARSER},
[TOKEN_ADD_ADD] = {NULL, PREC_UNARY, PREFIX_PARSER},
[TOKEN_SUB_SUB] = {NULL, PREC_UNARY, PREFIX_PARSER},
// + - * & sizeof
[TOKEN_L_PAREN] = {parse_paren, PREC_POSTFIX, INFIX_PARSER},
};
static struct ASTNode *parse_primary_expression(struct Parser* parser) {
flushpeektok(parser);
struct Token* tok = peektok(parser);
struct ASTNode *node = new_ast_node();
node->type = NT_TERM_VAL;
node->syms.tok = *tok;
switch (tok->type) {
case TOKEN_INT_LITERAL:
// node->data.data_type = TYPE_INT;
break;
case TOKEN_FLOAT_LITERAL:
warn("float not supported");
break;
case TOKEN_CHAR_LITERAL:
// node->data.data_type = TYPE_CHAR;
break;
case TOKEN_STRING_LITERAL:
// node->data.data_type = TYPE_POINTER;
case TOKEN_IDENT:
node = parse_ident(parser);
if (peektoktype(parser) == TOKEN_L_PAREN) {
node = parse_call(parser, node);
} else {
void *sym = symtab_lookup_symbol(parser->symtab, tok->constant.str);
if (sym == NULL) {
error("undefined symbol but use %s", tok->constant.str);
}
node->type = NT_TERM_IDENT;
node->syms.decl_node = sym;
goto END;
}
default:
return NULL;
}
poptok(parser);
END:
return node;
}
static struct ASTNode *parse_subexpression(struct Parser* parser, enum Precedence prec) {
enum TokenType ttype;
struct expr_prec_table_t* work;
struct ASTNode* left;
while (1) {
flushpeektok(parser);
ttype = peektoktype(parser);
work = &expr_table[ttype];
// FIXME
if (ttype == TOKEN_SEMICOLON || ttype == TOKEN_R_PAREN) {
break;
}
if (work == NULL || work->parser == NULL || work->ptype == PREFIX_PARSER) {
if (work->parser != NULL) {
left = work->parser(parser, NULL);
} else {
left = parse_primary_expression(parser);
}
} else if (work->ptype == INFIX_PARSER) {
if (work->parser == NULL)
break;
if (work->prec <= prec)
break;
left = work->parser(parser, left);
}
// assert(left != NULL);
}
return left;
}
struct ASTNode* parse_expr(struct Parser* parser) {
flushpeektok(parser);
enum TokenType ttype = peektoktype(parser);
switch (ttype) {
case TOKEN_NOT:
case TOKEN_AND:
case TOKEN_L_PAREN:
case TOKEN_MUL:
case TOKEN_ADD:
case TOKEN_SUB:
case TOKEN_BIT_NOT:
case TOKEN_AND_AND:
case TOKEN_CHAR_LITERAL:
case TOKEN_INT_LITERAL:
case TOKEN_STRING_LITERAL:
case TOKEN_ADD_ADD:
case TOKEN_SUB_SUB:
case TOKEN_SIZEOF:
case TOKEN_IDENT:
return parse_subexpression(parser, PREC_EXPRESSION);
default:
error("Want expr but not got %s", get_token_name(ttype));
break;
}
}

View File

@ -0,0 +1,120 @@
#include "../parser.h"
#include "../symtab/symtab.h"
#include "ast.h"
#ifndef FUNC_PARAM_CACHE_SIZE
#define FUNC_PARAM_CACHE_SIZE 32 // 合理初始值可覆盖99%常见情况
#endif
struct FuncParamCache {
struct Token tokens[FUNC_PARAM_CACHE_SIZE];
int read_pos; // 当前读取位置
int write_pos; // 写入位置
int depth; // 当前缓存深度
};
static enum TokenType peekcachetype(struct FuncParamCache* cache) {
return cache->tokens[cache->read_pos++].type;
}
// TODO 语义分析压入符号表
static void parse_params(struct Parser* parser, struct FuncParamCache* cache, struct ASTNode* node) {
// = peekcachetype(cache);
enum TokenType ttype;
// if (ttype != TOKEN_L_PAREN) {
// error("function expected '('\n");
// }
struct ASTNode *params = new_ast_node();
node->func.params = params;
int params_size = 0;
while ((ttype = peekcachetype(cache)) != TOKEN_R_PAREN) {
switch (ttype) {
case TOKEN_COMMA:
break;
case TOKEN_ELLIPSIS:
ttype = peekcachetype(cache);
if (ttype != TOKEN_R_PAREN) {
error("... must be a last parameter list (expect ')')");
}
// TODO
error("not implement");
break;
case TOKEN_IDENT:
params->children[params_size++] = NULL;
break;
default:
// TODO 使用cache的类型解析
// parse_type(parser);
// TODO type parse
// ttype = peekcachetype(cache);
// ttype = peekcachetype(cache);
// if (ttype != TOKEN_IDENT) {
// node->node_type = NT_DECL_FUNC;
// flushpeektok(parser);
// continue;
// }
// error("function expected ')' or ','\n");
}
}
}
enum ASTType check_is_func_decl(struct Parser* parser, struct FuncParamCache* cache) {
cache->depth = 1;
cache->read_pos = 0;
cache->write_pos = 0;
while (cache->depth) {
struct Token* tok = peektok(parser);
poptok(parser);
if (cache->write_pos >= FUNC_PARAM_CACHE_SIZE - 1) {
error("function parameter list too long");
}
cache->tokens[cache->write_pos++] = *tok;
switch (tok->type) {
case TOKEN_L_PAREN:
cache->depth++;
break;
case TOKEN_R_PAREN:
cache->depth--;
break;
}
}
switch (peektoktype(parser)) {
case TOKEN_SEMICOLON:
poptok(parser);
return NT_DECL_FUNC;
case TOKEN_L_BRACE:
return NT_FUNC;
break;
default:
error("function define or decl need '{' or ';' but you don't got");
}
}
struct ASTNode* parse_func(struct Parser* parser) {
struct ASTNode* ret_type = parse_type(parser);
struct ASTNode* func_name = parse_ident(parser);
struct ASTNode* node = new_ast_node();
node->func.ret = ret_type;
node->func.name = func_name;
flushpeektok(parser);
expecttok(parser, TOKEN_L_PAREN);
struct FuncParamCache cache;
node->type = check_is_func_decl(parser, &cache);
symtab_add_symbol(parser->symtab, func_name->syms.tok.constant.str, node);
if (node->type == NT_DECL_FUNC) {
return node;
}
symtab_enter_scope(parser->symtab);
parse_params(parser, &cache, node);
node->func.body = parse_block(parser);
symtab_leave_scope(parser->symtab);
return node;
}

View File

@ -0,0 +1,29 @@
#include "../parser.h"
#include "ast.h"
#ifndef PROG_MAX_NODE_SIZE
#define PROG_MAX_NODE_SIZE (1024 * 4)
#endif
void parse_prog(struct Parser* parser) {
/**
* Program := (Declaration | Definition)*
* same as
* Program := Declaration* Definition*
*/
int child_size = 0;
parser->root = new_ast_node();
struct ASTNode* node;
parser->root->root.children = xmalloc(sizeof(struct ASTNode*) * PROG_MAX_NODE_SIZE);
while (1) {
flushpeektok(parser);
if (peektoktype(parser) == TOKEN_EOF) {
break;
}
node = parse_decl(parser);
parser->root->root.children[child_size++] = node;
}
parser->root->type = NT_ROOT;
parser->root->root.child_size = child_size;
return;
}

View File

@ -0,0 +1,240 @@
#include "../parser.h"
#include "ast.h"
struct ASTNode* parse_stmt(struct Parser* parser) {
flushpeektok(parser);
enum TokenType ttype = peektoktype(parser);
struct ASTNode* node = new_ast_node();
switch (ttype) {
case TOKEN_IF: {
/**
* if (exp) stmt
* if (exp) stmt else stmt
*/
poptok(parser);
expecttok(parser, TOKEN_L_PAREN);
node->if_stmt.cond = parse_expr(parser);
flushpeektok(parser);
expecttok(parser, TOKEN_R_PAREN);
node->if_stmt.if_stmt = parse_stmt(parser);
ttype = peektoktype(parser);
if (ttype == TOKEN_ELSE) {
poptok(parser);
node->if_stmt.else_stmt = parse_stmt(parser);
} else {
node->if_stmt.else_stmt = NULL;
}
node->type = NT_STMT_IF;
break;
}
case TOKEN_SWITCH: {
/**
* switch (exp) stmt
*/
poptok(parser);
expecttok(parser, TOKEN_L_PAREN);
node->switch_stmt.cond = parse_expr(parser);
expecttok(parser, TOKEN_R_PAREN);
node->switch_stmt.body = parse_stmt(parser);
node->type = NT_STMT_SWITCH;
break;
}
case TOKEN_WHILE: {
/**
* while (exp) stmt
*/
poptok(parser);
expecttok(parser, TOKEN_L_PAREN);
node->while_stmt.cond = parse_expr(parser);
expecttok(parser, TOKEN_R_PAREN);
node->while_stmt.body = parse_stmt(parser);
node->type = NT_STMT_WHILE;
break;
}
case TOKEN_DO: {
/**
* do stmt while (exp)
*/
poptok(parser);
node->do_while_stmt.body = parse_stmt(parser);
ttype = peektoktype(parser);
if (ttype != TOKEN_WHILE) {
error("expected while after do");
}
poptok(parser);
expecttok(parser, TOKEN_L_PAREN);
node->do_while_stmt.cond = parse_expr(parser);
expecttok(parser, TOKEN_R_PAREN);
node->type = NT_STMT_DOWHILE;
break;
}
case TOKEN_FOR: {
/**
* for (init; [cond]; [iter]) stmt
*/
// node->children.stmt.for_stmt.init
poptok(parser);
ttype = peektoktype(parser);
if (ttype != TOKEN_L_PAREN) {
error("expected ( after for");
}
poptok(parser);
// init expr or init decl_var
// TODO need add this feature
node->for_stmt.init = parse_expr(parser);
expecttok(parser, TOKEN_SEMICOLON);
// cond expr or null
ttype = peektoktype(parser);
if (ttype != TOKEN_SEMICOLON) {
node->for_stmt.cond = parse_expr(parser);
expecttok(parser, TOKEN_SEMICOLON);
} else {
node->for_stmt.cond = NULL;
poptok(parser);
}
// iter expr or null
ttype = peektoktype(parser);
if (ttype != TOKEN_R_PAREN) {
node->for_stmt.iter = parse_expr(parser);
expecttok(parser, TOKEN_R_PAREN);
} else {
node->for_stmt.iter = NULL;
poptok(parser);
}
node->for_stmt.body = parse_stmt(parser);
node->type = NT_STMT_FOR;
break;
}
case TOKEN_BREAK: {
/**
* break ;
*/
// TODO check 导致外围 for、while 或 do-while 循环或 switch 语句终止。
poptok(parser);
expecttok(parser, TOKEN_SEMICOLON);
node->type = NT_STMT_BREAK;
break;
}
case TOKEN_CONTINUE: {
/**
* continue ;
*/
// TODO check 导致跳过整个 for、 while 或 do-while 循环体的剩余部分。
poptok(parser);
expecttok(parser, TOKEN_SEMICOLON);
node->type = NT_STMT_CONTINUE;
break;
}
case TOKEN_RETURN: {
/**
* return [exp] ;
*/
// TODO 终止当前函数并返回指定值给调用方函数。
poptok(parser);
ttype = peektoktype(parser);
if (ttype != TOKEN_SEMICOLON) {
node->return_stmt.expr_stmt = parse_expr(parser);
flushpeektok(parser);
expecttok(parser, TOKEN_SEMICOLON);
} else {
node->return_stmt.expr_stmt = NULL;
}
poptok(parser);
node->type = NT_STMT_RETURN;
break;
}
case TOKEN_GOTO: {
/**
* goto label ;
*/
// TODO check label 将控制无条件转移到所欲位置。
//在无法用约定的构造将控制转移到所欲位置时使用。
poptok(parser);
// find symbol table
ttype = peektoktype(parser);
if (ttype != TOKEN_IDENT) {
error("expect identifier after goto");
}
expecttok(parser, TOKEN_SEMICOLON);
// TODO filling label
node->goto_stmt.label = parse_ident(parser);
node->type = NT_STMT_GOTO;
break;
}
case TOKEN_SEMICOLON: {
/**
* ;
* empty stmt using by :
* while () ;
* if () ;
* for () ;
*/
poptok(parser);
node->type = NT_STMT_EMPTY;
break;
}
case TOKEN_L_BRACE: {
/**
* stmt_block like: { (decl_var | stmt) ... }
*/
node->block_stmt.block = parse_block(parser);
node->type = NT_STMT_BLOCK;
break;
}
case TOKEN_IDENT: {
// TODO label goto
if (peektoktype(parser) != TOKEN_COLON) {
goto EXP;
}
node->label_stmt.label = parse_ident(parser);
expecttok(parser, TOKEN_COLON);
node->type = NT_STMT_LABEL;
break;
}
case TOKEN_CASE: {
// TODO label switch
poptok(parser);
error("unimplemented switch label");
node->label_stmt.label = parse_expr(parser);
// TODO 该表达式为const int
expecttok(parser, TOKEN_COLON);
node->type = NT_STMT_CASE;
break;
}
case TOKEN_DEFAULT: {
// TODO label switch default
poptok(parser);
expecttok(parser, TOKEN_COLON);
node->type = NT_STMT_DEFAULT;
break;
}
default: {
/**
* exp ;
*/
EXP:
node->expr_stmt.expr_stmt = parse_expr(parser);
flushpeektok(parser);
ttype = peektoktype(parser);
if (ttype != TOKEN_SEMICOLON) {
error("exp must end with \";\"");
}
poptok(parser);
node->type = NT_STMT_EXPR;
break;
}
}
}

View File

@ -0,0 +1,182 @@
#include "../parser.h"
#include "../type.h"
#include "ast.h"
// /* 状态跳转表定义 */
// typedef void (*StateHandler)(struct Parser*, struct ASTNode**);
// enum TypeParseState {
// TPS_BASE_TYPE, // 解析基础类型 (int/char等)
// TPS_QUALIFIER, // 解析限定符 (const/volatile)
// TPS_POINTER, // 解析指针 (*)
// TPS_ARRAY, // 解析数组维度 ([n])
// TPS_FUNC_PARAMS, // 解析函数参数列表
// TPS_END,
// };
// ;
// /* 状态处理函数前置声明 */
// static void handle_base_type(struct Parser*, struct ASTNode**);
// static void handle_qualifier(struct Parser*, struct ASTNode**);
// static void handle_pointer(struct Parser*, struct ASTNode**);
// static void handle_array(struct Parser*, struct ASTNode**);
// static void handle_func_params(struct Parser*, struct ASTNode**);
// static void handle_error(struct Parser*, struct ASTNode**);
// /* 状态跳转表(核心优化部分) */
// static const struct StateTransition {
// enum TokenType tok; // 触发token
// StateHandler handler; // 处理函数
// enum TypeParseState next_state; // 下一个状态
// } state_table[][8] = {
// [TPS_QUALIFIER] = {
// {TOKEN_CONST, handle_qualifier, TPS_QUALIFIER},
// {TOKEN_VOLATILE, handle_qualifier, TPS_QUALIFIER},
// {TOKEN_VOID, handle_base_type, TPS_POINTER},
// {TOKEN_CHAR, handle_base_type, TPS_POINTER},
// {TOKEN_INT, handle_base_type, TPS_POINTER},
// {TOKEN_EOF, handle_error, TPS_QUALIFIER},
// /* 其他token默认处理 */
// {0, NULL, TPS_BASE_TYPE}
// },
// [TPS_BASE_TYPE] = {
// {TOKEN_MUL, handle_pointer, TPS_POINTER},
// {TOKEN_L_BRACKET, handle_array, TPS_ARRAY},
// {TOKEN_L_PAREN, handle_func_params,TPS_FUNC_PARAMS},
// {TOKEN_EOF, NULL, TPS_END},
// {0, NULL, TPS_POINTER}
// },
// [TPS_POINTER] = {
// {TOKEN_MUL, handle_pointer, TPS_POINTER},
// {TOKEN_L_BRACKET, handle_array, TPS_ARRAY},
// {TOKEN_L_PAREN, handle_func_params,TPS_FUNC_PARAMS},
// {0, NULL, TPS_END}
// },
// [TPS_ARRAY] = {
// {TOKEN_L_BRACKET, handle_array, TPS_ARRAY},
// {TOKEN_L_PAREN, handle_func_params,TPS_FUNC_PARAMS},
// {0, NULL, TPS_END}
// },
// [TPS_FUNC_PARAMS] = {
// {0, NULL, TPS_END}
// }
// };
// /* 新的类型解析函数 */
// struct ASTNode* parse_type(struct Parser* p) {
// struct ASTNode* type_root = NULL;
// struct ASTNode** current = &type_root;
// enum TypeParseState state = TPS_QUALIFIER;
// while (state != TPS_END) {
// enum TokenType t = peektoktype(p);
// const struct StateTransition* trans = state_table[state];
// // 查找匹配的转换规则
// while (trans->tok != 0 && trans->tok != t) {
// trans++;
// }
// if (trans->handler) {
// trans->handler(p, current);
// } else if (trans->tok == 0) { // 默认规则
// state = trans->next_state;
// continue;
// } else {
// error("syntax error type parse error");
// }
// state = trans->next_state;
// }
// return type_root;
// }
// /* 具体状态处理函数实现 */
// static void handle_qualifier(struct Parser* p, struct ASTNode** current) {
// struct ASTNode* node = new_ast_node();
// node->node_type = NT_TYPE_QUAL;
// node->data.data_type = poptok(p).type;
// if (*current) {
// (*current)->child.decl.type = node;
// } else {
// *current = node;
// }
// }
// static void handle_base_type(struct Parser* p, struct ASTNode** current) {
// struct ASTNode* node = new_ast_node();
// node->node_type = NT_TYPE_BASE;
// node->data.data_type = poptok(p).type;
// // 链接到当前节点链的末端
// while (*current && (*current)->child.decl.type) {
// current = &(*current)->child.decl.type;
// }
// if (*current) {
// (*current)->child.decl.type = node;
// } else {
// *current = node;
// }
// }
// static void handle_pointer(struct Parser* p, struct ASTNode** current) {
// poptok(p); // 吃掉*
// struct ASTNode* node = new_ast_node();
// node->node_type = NT_TYPE_PTR;
// // 插入到当前节点之前
// node->child.decl.type = *current;
// *current = node;
// }
// /* 其他处理函数类似实现... */
struct ASTNode* parser_ident_without_pop(struct Parser* parser) {
flushpeektok(parser);
struct Token* tok = peektok(parser);
if (tok->type != TOKEN_IDENT) {
error("syntax error: want identifier but got %d", tok->type);
}
struct ASTNode* node = new_ast_node();
node->type = NT_TERM_IDENT;
node->syms.tok = *tok;
node->syms.decl_node = NULL;
return node;
}
struct ASTNode* parse_ident(struct Parser* parser) {
struct ASTNode* node = parser_ident_without_pop(parser);
poptok(parser);
return node;
}
struct ASTNode* parse_type(struct Parser* parser) {
flushpeektok(parser);
enum TokenType ttype = peektoktype(parser);
enum DataType dtype;
switch(ttype) {
case TOKEN_VOID: dtype = TYPE_VOID; break;
case TOKEN_CHAR: dtype = TYPE_CHAR; break;
case TOKEN_SHORT: dtype = TYPE_SHORT; break;
case TOKEN_INT: dtype = TYPE_INT; break;
case TOKEN_LONG: dtype = TYPE_LONG; break;
case TOKEN_FLOAT: dtype = TYPE_FLOAT; break;
case TOKEN_DOUBLE: dtype = TYPE_DOUBLE; break;
default:
error("无效的类型说明符");
}
struct ASTNode* node = new_ast_node();
node->type = NT_TERM_TYPE;
// node->data.data_type = dtype;
poptok(parser);
if (peektoktype(parser) == TOKEN_MUL) {
poptok(parser);
}
return node;
}

View File

@ -0,0 +1,136 @@
#include "../parser.h"
#include "../type.h"
enum TypeParseState {
TPS_BASE_TYPE, // 解析基础类型 (int/char等)
TPS_QUALIFIER, // 解析限定符 (const/volatile)
TPS_POINTER, // 解析指针 (*)
TPS_ARRAY, // 解析数组维度 ([n])
TPS_FUNC_PARAMS // 解析函数参数列表
};
struct ASTNode* parse_type(struct Parser* p) {
struct ASTNode* type_root = new_ast_node();
struct ASTNode* current = type_root;
current->type = NT_TYPE_BASE;
enum TypeParseState state = TPS_QUALIFIER;
int pointer_level = 0;
while (1) {
enum TokenType t = peektoktype(p);
switch (state) {
// 基础类型解析 (int, char等)
case TPS_BASE_TYPE:
if (is_base_type(t)) {
// current->data.data_type = token_to_datatype(t);
poptok(p);
state = TPS_POINTER;
} else {
error("Expected type specifier");
}
break;
// 类型限定符 (const/volatile)
case TPS_QUALIFIER:
if (t == TOKEN_CONST || t == TOKEN_VOLATILE) {
struct ASTNode* qual_node = new_ast_node();
qual_node->type = NT_TYPE_QUAL;
qual_node->data.data_type = t; // 复用data_type字段存储限定符
current->child.decl.type = qual_node;
current = qual_node;
poptok(p);
} else {
state = TPS_BASE_TYPE;
}
break;
// 指针解析 (*)
case TPS_POINTER:
if (t == TOKEN_MUL) {
struct ASTNode* ptr_node = new_ast_node();
ptr_node->type = NT_TYPE_PTR;
current->child.decl.type = ptr_node;
current = ptr_node;
pointer_level++;
poptok(p);
} else {
state = TPS_ARRAY;
}
break;
// 数组维度 ([n])
case TPS_ARRAY:
if (t == TOKEN_L_BRACKET) {
poptok(p); // 吃掉[
struct ASTNode* arr_node = new_ast_node();
arr_node->type = NT_TYPE_ARRAY;
// 解析数组大小(仅语法检查)
if (peektoktype(p) != TOKEN_R_BRACKET) {
parse_expr(p); // 不计算实际值
}
expecttok(p, TOKEN_R_BRACKET);
current->child.decl.type = arr_node;
current = arr_node;
} else {
state = TPS_FUNC_PARAMS;
}
break;
// 函数参数列表
case TPS_FUNC_PARAMS:
if (t == TOKEN_L_PAREN) {
struct ASTNode* func_node = new_ast_node();
func_node->type = NT_TYPE_FUNC;
current->child.decl.type = func_node;
// 解析参数列表(仅结构,不验证类型)
parse_param_list(p, func_node);
current = func_node;
} else {
return type_root; // 类型解析结束
}
break;
}
}
}
// 判断是否是基础类型
static int is_base_type(enum TokenType t) {
return t >= TOKEN_VOID && t <= TOKEN_DOUBLE;
}
// // 转换token到数据类型简化版
// static enum DataType token_to_datatype(enum TokenType t) {
// static enum DataType map[] = {
// [TOKEN_VOID] = DT_VOID,
// [TOKEN_CHAR] = DT_CHAR,
// [TOKEN_INT] = DT_INT,
// // ...其他类型映射
// };
// return map[t];
// }
// 解析参数列表(轻量级)
static void parse_param_list(struct Parser* p, struct ASTNode* func) {
expecttok(p, TOKEN_L_PAREN);
while (peektoktype(p) != TOKEN_R_PAREN) {
struct ASTNode* param = parse_type(p); // 递归解析类型
// 允许可选参数名(仅语法检查)
if (peektoktype(p) == TOKEN_IDENT) {
poptok(p); // 吃掉参数名
}
if (peektoktype(p) == TOKEN_COMMA) {
poptok(p);
}
}
expecttok(p, TOKEN_R_PAREN);
}

View File

@ -0,0 +1,67 @@
#include "parser.h"
#include "type.h"
#include "ast/ast.h"
int poptok(struct Parser* parser) {
if (parser->size == 0) {
return -1;
}
int idx = parser->cur_idx;
parser->cur_idx = (idx + 1) % PARSER_MAX_TOKEN_QUEUE;
parser->size--;
return 0;
}
void flushpeektok(struct Parser* parser) {
parser->peek_idx = parser->cur_idx;
}
struct Token* peektok(struct Parser* parser) {
int idx = parser->peek_idx;
idx = (idx + 1) % PARSER_MAX_TOKEN_QUEUE;
if (parser->size >= PARSER_MAX_TOKEN_QUEUE) {
warn("peek maybe too deep");
}
if (parser->peek_idx == parser->end_idx) {
if (parser->size == PARSER_MAX_TOKEN_QUEUE) {
// FIXME
error("buffer overflow");
}
get_valid_token(parser->lexer, &(parser->TokenBuffer[idx]));
parser->size++;
parser->end_idx = idx;
}
parser->peek_idx = idx;
return &(parser->TokenBuffer[idx]);
}
enum TokenType peektoktype(struct Parser* parser) {
return peektok(parser)->type;
}
void expecttok(struct Parser* parser, enum TokenType type) {
struct Token* tok = peektok(parser);
if (tok->type != type) {
error("expected tok: %s, got %s", get_token_name(type), get_token_name(tok->type));
} else {
poptok(parser);
}
}
void init_parser(struct Parser* parser, struct Lexer* lexer, struct SymbolTable* symtab) {
parser->cur_node = NULL;
parser->root = NULL;
parser->cur_idx = 0;
parser->peek_idx = 0;
parser->end_idx = 0;
parser->size = 0;
parser->lexer = lexer;
parser->symtab = symtab;
// TODO
}
void run_parser(struct Parser* parser) {
parse_prog(parser);
}

View File

@ -0,0 +1,33 @@
#ifndef __PARSER_H__
#define __PARSER_H__
#include "../frontend.h"
#include "../lexer/lexer.h"
// #include "symbol_table/symtab.h"
// #include "ast/ast.h"
#define PARSER_MAX_TOKEN_QUEUE 16
struct Parser {
struct ASTNode* root;
struct ASTNode* cur_node;
struct Lexer* lexer;
struct SymbolTable* symtab;
int cur_idx;
int peek_idx;
int end_idx;
int size;
struct Token TokenBuffer[PARSER_MAX_TOKEN_QUEUE];
int err_level;
};
void init_parser(struct Parser* parser, struct Lexer* lexer, struct SymbolTable* symtab);
void run_parser(struct Parser* parser);
void flushpeektok(struct Parser* parser);
int poptok(struct Parser* parser);
struct Token* peektok(struct Parser* parser);
enum TokenType peektoktype(struct Parser* parser);
void expecttok(struct Parser* parser, enum TokenType type);
#endif

View File

@ -0,0 +1,53 @@
// hashmap.c
#include "hashmap.h"
#include <stdlib.h>
#include <string.h>
// DJB2哈希算法
static unsigned long hash(const char* str) {
unsigned long hash = 5381;
int c;
while ((c = *str++))
hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
return hash % HMAP_SIZE;
}
void hmap_init(HashMap* map) {
memset(map->buckets, 0, sizeof(map->buckets));
}
void hmap_put(HashMap* map, const char* key, void* value) {
unsigned long idx = hash(key);
HashMapEntry* entry = malloc(sizeof(HashMapEntry));
entry->key = strdup(key);
entry->value = value;
entry->next = map->buckets[idx];
map->buckets[idx] = entry;
}
void* hmap_get(HashMap* map, const char* key) {
unsigned long idx = hash(key);
HashMapEntry* entry = map->buckets[idx];
while (entry) {
if (strcmp(entry->key, key) == 0)
return entry->value;
entry = entry->next;
}
return NULL;
}
int hmap_contains(HashMap* map, const char* key) {
return hmap_get(map, key) != NULL;
}
void hmap_destroy(HashMap* map) {
for (int i = 0; i < HMAP_SIZE; i++) {
HashMapEntry* entry = map->buckets[i];
while (entry) {
HashMapEntry* next = entry->next;
free(entry->key);
free(entry);
entry = next;
}
}
}

View File

@ -0,0 +1,31 @@
#ifndef HASHMAP_H
#define HASHMAP_H
#define HMAP_SIZE 64
typedef struct HashMapEntry {
char* key;
void* value;
struct HashMapEntry* next;
} HashMapEntry;
typedef struct {
HashMapEntry* buckets[HMAP_SIZE];
} HashMap;
// 初始化哈希表
void hmap_init(HashMap* map);
// 插入键值对
void hmap_put(HashMap* map, const char* key, void* value);
// 查找键值
void* hmap_get(HashMap* map, const char* key);
// 检查键是否存在
int hmap_contains(HashMap* map, const char* key);
// 释放哈希表内存不释放value
void hmap_destroy(HashMap* map);
#endif

View File

@ -0,0 +1,43 @@
// scope.c
#include "scope.h"
#include <stdio.h>
#include <stdlib.h>
typedef struct Scope Scope;
Scope* scope_create(Scope* parent) {
Scope* scope = malloc(sizeof(Scope));
hmap_init(&scope->symbols);
scope->parent = parent;
scope->base_offset = 0;
scope->cur_offset = 0;
return scope;
}
void scope_destroy(Scope* scope) {
hmap_destroy(&scope->symbols);
free(scope);
}
void scope_insert(Scope* scope, const char* name, void* symbol) {
if (hmap_contains(&scope->symbols, name)) {
// 处理重复定义错误
fprintf(stderr, "Error: Symbol '%s' already defined\n", name);
exit(EXIT_FAILURE);
}
hmap_put(&scope->symbols, name, symbol);
}
void* scope_lookup(Scope* scope, const char* name) {
void* symbol = NULL;
while (scope) {
symbol = hmap_get(&scope->symbols, name);
if (symbol) break;
scope = scope->parent;
}
return symbol;
}
void* scope_lookup_current(Scope* scope, const char* name) {
return hmap_get(&scope->symbols, name);
}

View File

@ -0,0 +1,28 @@
#ifndef SCOPE_H
#define SCOPE_H
#include "hashmap.h"
struct Scope {
HashMap symbols; // 当前作用域符号表
struct Scope* parent; // 上层作用域
int base_offset;
int cur_offset;
};
// 创建新作用域父作用域可为NULL
struct Scope* scope_create(struct Scope* parent);
// 销毁作用域
void scope_destroy(struct Scope* scope);
// 在当前作用域插入符号
void scope_insert(struct Scope* scope, const char* name, void* symbol);
// 逐级查找符号
void* scope_lookup(struct Scope* scope, const char* name);
// 仅在当前作用域查找
void* scope_lookup_current(struct Scope* scope, const char* name);
#endif

View File

@ -0,0 +1,45 @@
// symtab.c
#include "../../frontend.h"
#include "scope.h"
#include "symtab.h"
typedef struct SymbolTable SymbolTable;
typedef struct Scope Scope;
void init_symtab(SymbolTable* symtab) {
symtab->global_scope = scope_create(NULL);
symtab->cur_scope = symtab->global_scope;
}
void del_symtab(SymbolTable* symtab) {
scope_destroy(symtab->global_scope);
}
void symtab_enter_scope(SymbolTable* symtab) {
struct Scope* scope = scope_create(symtab->cur_scope);
scope->base_offset = symtab->cur_scope->base_offset + symtab->cur_scope->cur_offset;
symtab->cur_scope = scope;
}
void symtab_leave_scope(SymbolTable* symtab) {
Scope * scope = symtab->cur_scope;
if (scope == NULL) {
error("cannot leave NULL scope or global scope");
}
symtab->cur_scope = symtab->cur_scope->parent;
scope_destroy(scope);
}
void symtab_add_symbol(SymbolTable* symtab, const char* name, void* ast_node) {
struct Scope* scope = symtab->cur_scope;
if (scope_lookup_current(scope, name) != NULL) {
// TODO WARNING
// return NULL;
}
scope_insert(scope, name, ast_node);
}
void* symtab_lookup_symbol(SymbolTable* symtab, const char* name) {
return scope_lookup(symtab->cur_scope, name);
}

View File

@ -0,0 +1,18 @@
// symtab.h
#ifndef __SYMTAB_H__
#define __SYMTAB_H__
struct SymbolTable {
struct Scope* cur_scope;
struct Scope* global_scope;
};
void init_symtab(struct SymbolTable* symtab);
void del_symtab(struct SymbolTable* symtab);
void symtab_enter_scope(struct SymbolTable* symtab);
void symtab_leave_scope(struct SymbolTable* symtab);
void symtab_add_symbol(struct SymbolTable* symtab, const char* name, void* ast_node);
void* symtab_lookup_symbol(struct SymbolTable* symtab, const char* name);
#endif

View File

@ -0,0 +1,4 @@
extern int _print_str(const char* str);
int main(void) {
_print_str("Hello, world!\n");
}

View File

@ -0,0 +1,14 @@
// int __print_str(char* str);
int f(void);
int main(void) {
int a;
// f();
// a = 1 + 2 * 3 + 4;
// __print_str("Hello, world!\n");
a = 3 - f() * (3 + 2) % 6;
// 测试用例:
// if (a) if (2) 3; else b;
// 是否正确解析为 if (a) { if (b) c else d }
}

View File

@ -0,0 +1,34 @@
#include "../parser.h"
#include "../ast/ast.h"
#include "../symtab/symtab.h"
#include <stdio.h>
// gcc -g ../parser.c ../../lexer/lexer.c ../ast/ast.c ../ast/block.c ../ast/decl.c ../ast/expr.c ../ast/func.c ../ast/program.c ../ast/stmt.c ../ast/term.c ../symtab/hashmap.c ../symtab/scope.c ../symtab/symtab.c test_parser.c -o test_parser
// gcc -g test_parser.c -L../.. -lfrontend -o test_parser
int main(int argc, char** argv) {
const char* file_name = "test_file.c";
if (argc == 2) {
file_name = argv[1];
}
FILE* fp = fopen(file_name, "r");
if (fp == NULL) {
perror("open file failed");
return 1;
}
printf("open file success\n");
struct Lexer lexer;
init_lexer(&lexer, file_name, fp, (lexer_sread_fn)fread_s);
struct SymbolTable symtab;
init_symtab(&symtab);
struct Parser parser;
init_parser(&parser, &lexer, &symtab);
parse_prog(&parser);
printf("parse_end\n");
pnt_ast(parser.root, 0);
return 0;
}

View File

@ -0,0 +1,35 @@
#ifndef __TYPE_H__
#define __TYPE_H__
#include "../lexer/token.h"
enum DataType {
TYPE_VOID,
TYPE_CHAR,
TYPE_SHORT,
TYPE_INT,
TYPE_LONG,
TYPE_LONG_LONG,
TYPE_FLOAT,
TYPE_DOUBLE,
TYPE_LONG_DOUBLE,
// prefix
TYPE_SIGNED,
TYPE_UNSIGNED,
// TYPE_BOOL,
// TYPE_COMPLEX,
// TYPE_IMAGINARY,
TYPE_ENUM,
TYPE_ARRAY,
TYPE_STRUCT,
TYPE_UNION,
TYPE_FUNCTION,
TYPE_POINTER,
TYPE_ATOMIC,
TYPE_TYPEDEF,
};
#endif

299
ccompiler/middleend/ir.c Normal file
View File

@ -0,0 +1,299 @@
#include "ir.h"
#include "../frontend/frontend.h"
typedef struct ASTNode ASTNode;
// 上下文结构,记录生成过程中的状态
typedef struct {
ir_func_t* current_func; // 当前处理的函数
ir_bblock_t* current_block; // 当前基本块
uint32_t vreg_counter; // 虚拟寄存器计数器
} IRGenContext;
IRGenContext ctx;
ir_prog_t prog;
ir_type_t type_i32 = {
.tag = IR_TYPE_INT32,
};
static inline void init_ir_node_t(ir_node_t* node) {
vector_init(node->used_by);
}
static inline ir_node_t* new_ir_node_t() {
ir_node_t* node = xmalloc(sizeof(ir_node_t));
init_ir_node_t(node);
}
ir_node_t* emit_instr(ir_bblock_t* block) {
if (block == NULL) block = ctx.current_block;
ir_node_t *node = new_ir_node_t();
vector_push(block->instrs, node);
return vector_at(block->instrs, block->instrs.size - 1);
}
void emit_br(ir_node_t cond, const char* true_lable, const char* false_lable) {
ir_node_t br = {
.tag = IR_NODE_RET,
.data = {
}
};
// emit_instr(br, NULL);
}
ir_node_t* gen_ir_expr(ASTNode* node) {
switch (node->type) {
case NT_TERM_VAL: {
ir_node_t* ir = new_ir_node_t();
*ir = (ir_node_t) {
.tag = IR_NODE_CONST_INT,
.data.const_int = {
.val = node->syms.tok.constant.i,
},
};
return ir;
}
case NT_TERM_IDENT: {
ir_node_t* decl = node->syms.decl_node->decl_val.data;
return decl;
}
case NT_TERM_CALL: {
// TODO
ir_node_t* ir = new_ir_node_t();
*ir = (ir_node_t) {
.tag = IR_NODE_CALL,
.data.call = {
.callee = NULL,
},
};
vector_init(ir->data.call.args);
return ir;
}
default:
goto NEXT;
}
return NULL;
NEXT:
ir_node_t* lhs = gen_ir_expr(node->expr.left);
ir_node_t* rhs = node->expr.right ? gen_ir_expr(node->expr.right) : NULL;
if (node->type == NT_COMMA) {
return rhs;
}
ir_node_t* instr = emit_instr(NULL);
vector_push(lhs->used_by, instr);
if (rhs) { vector_push(rhs->used_by, instr); }
ir_node_t* ret;
#define BINOP(operand) do { \
*instr = (ir_node_t){ \
.tag = IR_NODE_OP, \
.data.op = { \
.op = operand, \
.lhs = lhs, \
.rhs = rhs, \
}, \
}; \
ret = instr; \
} while (0)
switch (node->type) {
case NT_ADD :// (expr) + (expr)
BINOP(IR_OP_ADD);
break;
case NT_SUB :// (expr) - (expr)
BINOP(IR_OP_SUB);
break;
case NT_MUL :// (expr) * (expr)
BINOP(IR_OP_MUL);
break;
case NT_DIV :// (expr) / (expr)
BINOP(IR_OP_DIV);
break;
case NT_MOD :// (expr) % (expr)
BINOP(IR_OP_MOD);
break;
case NT_AND :// (expr) & (expr)
BINOP(IR_OP_AND);
break;
case NT_OR :// (expr) | (expr)
BINOP(IR_OP_OR);
break;
case NT_XOR :// (expr) ^ (expr)
BINOP(IR_OP_XOR);
break;
case NT_BIT_NOT :// ~ (expr)
// TODO
// BINOP(IR_OP_NOT);
break;
case NT_L_SH :// (expr) << (expr)
BINOP(IR_OP_SHL);
break;
case NT_R_SH :// (expr) >> (expr)
BINOP(IR_OP_SHR); // Shift right logical.
// TODO
// BINOP(IR_OP_SAR); // Shift right arithmetic.
break;
case NT_EQ :// (expr) == (expr)
BINOP(IR_OP_EQ);
break;
case NT_NEQ :// (expr) != (expr)
BINOP(IR_OP_NEQ);
break;
case NT_LE :// (expr) <= (expr)
BINOP(IR_OP_LE);
break;
case NT_GE :// (expr) >= (expr)
BINOP(IR_OP_GE);
break;
case NT_LT :// (expr) < (expr)
BINOP(IR_OP_LT);
break;
case NT_GT :// (expr) > (expr)
BINOP(IR_OP_GE);
break;
case NT_AND_AND :// (expr) && (expr)
break;
case NT_OR_OR :// (expr) || (expr)
break;
case NT_NOT :// ! (expr)
ir_node_t* zero = xmalloc(sizeof(ir_node_t));
*zero = (ir_node_t){
.tag = IR_NODE_CONST_INT,
.data.const_int = {
.val = 0,
},
};
*instr = (ir_node_t){
.tag = IR_NODE_OP,
.data.op = {
.op = IR_OP_EQ,
.lhs = zero,
.rhs = lhs,
},
};
ret = instr;
break;
case NT_ASSIGN :// (expr) = (expr)
*instr = (ir_node_t){
.tag = IR_NODE_STORE,
.data.store = {
.target = lhs,
.value = rhs,
},
};
ret = rhs;
break;
// case NT_COND : // (expr) ? (expr) : (expr)
default:
// TODO self error msg
error("Unsupported IR generation for AST node type %d", node->type);
break;
}
return ret;
}
void gen_ir_from_ast(struct ASTNode* node) {
switch (node->type) {
case NT_ROOT: {
for (int i = 0; i < node->root.child_size; i ++) {
gen_ir_from_ast(node->root.children[i]);
}
} break;
case NT_FUNC: {
ir_func_t *func = xmalloc(sizeof(ir_func_t));
*func = (ir_func_t) {
.name = node->func.name->syms.tok.constant.str,
};
vector_init(func->bblocks);
ir_bblock_t *entry = xmalloc(sizeof(ir_bblock_t));
*entry = (ir_bblock_t) {
.label = "entry",
};
vector_init(entry->instrs);
vector_push(func->bblocks, entry);
IRGenContext prev_ctx = ctx;
ctx = (IRGenContext) {
.current_func = func,
.current_block = vector_at(func->bblocks, 0),
.vreg_counter = 0,
};
gen_ir_from_ast(node->func.body);
ctx = prev_ctx;
vector_push(prog.funcs, func);
} break;
case NT_STMT_RETURN: {
ir_node_t* ret = gen_ir_expr(node->return_stmt.expr_stmt);
ir_node_t* ir = emit_instr(NULL);
*ir = (ir_node_t) {
.tag = IR_NODE_RET,
.data = {
.ret = {
.ret_val = ret,
}
}
};
break;
}
case NT_BLOCK: {
for (int i = 0; i < node->block.child_size; i ++) {
gen_ir_from_ast(node->block.children[i]);
}
break;
}
case NT_STMT_IF: {
ir_node_t *cond = gen_ir_expr(node->if_stmt.cond);
// xmalloc();
// ir_bblock_t then_block = {
// };
node->if_stmt.if_stmt;
node->if_stmt.else_stmt;
break;
}
case NT_STMT_WHILE: {
node->while_stmt.cond;
node->while_stmt.body;
break;
}
case NT_STMT_DOWHILE: {
node->do_while_stmt.cond;
node->do_while_stmt.body;
break;
}
case NT_STMT_FOR: {
node->for_stmt.init;
node->for_stmt.cond;
node->for_stmt.iter;
node->for_stmt.body;
break;
}
case NT_DECL_VAR: {
ir_node_t* ret_node = emit_instr(NULL);
*ret_node = (ir_node_t) {
.tag = IR_NODE_ALLOC,
.name = node->decl_val.name->syms.tok.constant.str,
.type = &type_i32,
};
node->decl_val.data = ret_node;
if (node->decl_val.expr_stmt != NULL) {
gen_ir_from_ast(node->decl_val.expr_stmt);
}
break;
}
case NT_STMT_EXPR: {
gen_ir_expr(node->expr_stmt.expr_stmt);
break;
}
case NT_STMT_EMPTY: {
break;
}
default:
// TODO: 错误处理
error("unknown node type");
break;
}
}

155
ccompiler/middleend/ir.h Normal file
View File

@ -0,0 +1,155 @@
// ir_core.h
#ifndef IR_CORE_H
#define IR_CORE_H
#include "../../libcore/vector.h"
#include <stddef.h>
#include <stdint.h>
// 错误码定义
typedef enum {
IR_EC_SUCCESS = 0, // 成功
IR_EC_MEMORY_ERROR, // 内存分配失败
IR_EC_TYPE_MISMATCH, // 类型不匹配
IR_EC_INVALID_OPERAND, // 无效操作数
IR_EC_DUPLICATE_SYMBOL, // 符号重定义
} ir_ecode_t;
typedef struct {
enum {
IR_TYPE_INT32,
IR_TYPE_PTR,
IR_TYPE_ARRAY,
IR_TYPE_FUNC,
IR_TYPE_VOID,
} tag;
union {
struct {
struct ir_type *base;
size_t len;
} arr;
struct {
struct ir_type *ret;
struct ir_type **params;
size_t param_cnt;
} func;
};
} ir_type_t;
typedef struct ir_node ir_node_t;
typedef struct ir_bblock {
const char *label;
vector_header(instrs, ir_node_t*);
// ir_arr_t used_by;
} ir_bblock_t; // basic block
typedef struct {
const char *name;
ir_type_t *type;
vector_header(params, ir_node_t*);
vector_header(bblocks, ir_bblock_t*);
} ir_func_t;
typedef struct {
vector_header(global, ir_node_t*);
vector_header(funcs, ir_func_t*);
} ir_prog_t;
struct ir_node {
const ir_type_t* type;
const char* name;
vector_header(used_by, ir_node_t*);
enum {
IR_NODE_CONST_INT,
IR_NODE_ALLOC,
IR_NODE_LOAD,
IR_NODE_STORE,
IR_NODE_GET_PTR,
IR_NODE_OP,
IR_NODE_BRANCH,
IR_NODE_JUMP,
IR_NODE_CALL,
IR_NODE_RET,
} tag;
union {
struct {
int32_t val;
} const_int;
struct {
ir_node_t* target;
} load;
struct {
ir_node_t* target;
ir_node_t* value;
} store;
struct {
ir_node_t* src_addr;
ir_node_t* offset;
} get_ptr;
struct {
enum {
/// Not equal to.
IR_OP_NEQ,
/// Equal to.
IR_OP_EQ,
/// Greater than.
IR_OP_GT,
/// Less than.
IR_OP_LT,
/// Greater than or equal to.
IR_OP_GE,
/// Less than or equal to.
IR_OP_LE,
/// Addition.
IR_OP_ADD,
/// Subtraction.
IR_OP_SUB,
/// Multiplication.
IR_OP_MUL,
/// Division.
IR_OP_DIV,
/// Modulo.
IR_OP_MOD,
/// Bitwise AND.
IR_OP_AND,
/// Bitwise OR.
IR_OP_OR,
/// Bitwise XOR.
IR_OP_XOR,
/// Bitwise NOT.
IR_OP_NOT,
/// Shift left logical.
IR_OP_SHL,
/// Shift right logical.
IR_OP_SHR,
/// Shift right arithmetic.
IR_OP_SAR,
} op;
ir_node_t* lhs;
ir_node_t* rhs;
} op;
struct {
ir_node_t* cond;
ir_bblock_t true_bblock;
ir_bblock_t false_bblock;
} branch;
struct {
ir_bblock_t target_bblock;
} jump;
struct {
ir_func_t callee;
vector_header(args, ir_node_t);
} call;
struct {
ir_node_t* ret_val;
} ret;
} data;
};
extern ir_prog_t prog;
struct ASTNode;
void gen_ir_from_ast(struct ASTNode* node);
#endif // IR_CORE_H

View File

View File

@ -0,0 +1,8 @@
#ifndef __REG_ALLOC_H__
#define __REG_ALLOC_H__
typedef struct {
} reg_alloc_t;
#endif

View File

@ -0,0 +1,8 @@
all: test_ir
test_ir: frontend
gcc -g ../ir.c test_ir.c -L../../frontend -lfrontend -o test_ir
frontend:
make -C ../../frontend

View File

@ -0,0 +1,5 @@
int main(void) {
int a;
a = 1 + 2 * 3;
return a;
}

View File

@ -0,0 +1,18 @@
#include "../ir.h"
#include "../../frontend/frontend.h"
int main(int argc, const char** argv) {
const char* file_name = "test_file.c";
if (argc == 2) {
file_name = argv[1];
}
FILE* fp = fopen(file_name, "r");
if (fp == NULL) {
perror("open file failed");
return 1;
}
printf("open file success\n");
struct ASTNode* root = frontend("test.c", fp, (sread_fn)fread_s);
gen_ir_from_ast(root);
return 0;
}

1994
libcore/acutest.h Normal file

File diff suppressed because it is too large Load Diff

10
libcore/libcore.h Normal file
View File

@ -0,0 +1,10 @@
#ifndef __STDCORE_H__
#define __STDCORE_H__
#ifndef __NO_LINK_STDLIB
#include <stdlib.h>
#else
#error "__NO_LINK_STDLIB"
#endif
#endif

202
libcore/vector-gdb.py Normal file
View File

@ -0,0 +1,202 @@
# # vector_gdb.py
# import gdb
# import re
# class VectorPrinter:
# """解析宏定义的 vector 结构体"""
# def __init__(self, val):
# self.val = val
# def check_vector_type(self):
# """验证是否为合法 vector 结构体"""
# try:
# # 检查是否包含 size/cap/data 字段
# return all(self.val.type.has_key(field)
# for field in ['size', 'cap', 'data'])
# except gdb.error:
# return False
# def get_array_view(self):
# """将 data 字段转换为数组视图"""
# if not self.check_vector_type():
# return None
# cap = int(self.val['cap'])
# data_ptr = self.val['data']
# if cap == 0 or data_ptr == 0:
# return []
# # 构造数组类型 (例如 int[cap])
# element_type = data_ptr.type.target()
# array_type = element_type.array(cap - 1) # C 数组声明语法
# return data_ptr.cast(array_type.pointer()).dereference()
# def to_string(self):
# if not self.check_vector_type():
# return "Not a vector type"
# size = self.val['size']
# cap = self.val['cap']
# data = self.get_array_view()
# return (f"vector(size={size}, cap={cap}, data={data})")
# class VectorInfoCommand(gdb.Command):
# """自定义命令:显示 vector 详细信息"""
# def __init__(self):
# super(VectorInfoCommand, self).__init__("vector_info",
# gdb.COMMAND_USER)
# def invoke(self, arg, from_tty):
# val = gdb.parse_and_eval(arg)
# printer = VectorPrinter(val)
# if not printer.check_vector_type():
# print(f"'{arg}' is not a vector structure")
# return
# size = int(val['size'])
# cap = int(val['cap'])
# data = printer.get_array_view()
# # 输出格式化信息
# print(f"Vector {arg}:")
# print(f"├─ Size: {size}")
# print(f"├─ Capacity: {cap}")
# print("└─ Data elements [0..{}]:".format(min(size, cap)-1))
# for i in range(min(size, cap)):
# try:
# print(f" [{i}]: {data[i]}")
# except gdb.MemoryError:
# print(f" [{i}]: <invalid memory>")
# def register_printers():
# """注册自动类型识别"""
# def vector_matcher(val):
# return VectorPrinter(val).check_vector_type()
# # 使用 lambda 包装以动态创建 printer
# gdb.pretty_printers.append(lambda val:
# VectorPrinter(val) if vector_matcher(val) else None)
# # 注册命令和打印机
# VectorInfoCommand()
# register_printers()
# vector_gdb.py
import gdb
from gdb.printing import PrettyPrinter
class VectorPrinter:
"""兼容新旧注册方式的最终方案"""
def __init__(self, val: gdb.Value):
self.val:gdb.Value = val
def check_type(self) -> bool:
"""类型检查(兼容匿名结构体)"""
try:
if self.val.type.code != gdb.TYPE_CODE_STRUCT:
return False
fields = self.val.type.fields()
if not fields:
return False
exp = ['size', 'cap', 'data']
for t in fields:
if t.name in exp:
exp.remove(t.name)
else:
return False
return True
except gdb.error:
return False
def to_string(self):
if not self.check_type():
return "Not a vector"
return "vector({} size={}, cap={})".format(
self.val.address,
self.val['size'],
self.val['cap'],
)
def display_hint(self):
return 'array'
def children(self):
"""生成数组元素(关键改进点)"""
if not self.check_type():
return []
size = int(self.val['size'])
cap = int(self.val['cap'])
data_ptr = self.val['data']
if cap == 0 or data_ptr == 0:
return []
# 使用 GDB 内置数组转换
array = data_ptr.dereference()
array = array.cast(data_ptr.type.target().array(cap - 1))
for i in range(size):
# state = "<used>" if i < size else "<unused>"
try:
value = array[i]
yield (f"[{i}] {value.type} {value.address}", value)
except gdb.MemoryError:
yield (f"[{i}]", "<invalid>")
# 注册方式一传统append方法您之前有效的方式self
def append_printer():
gdb.pretty_printers.append(
lambda val: VectorPrinter(val) if VectorPrinter(val).check_type() else None
)
# 注册方式二:新版注册方法(备用方案)
def register_new_printer():
class VectorPrinterLocator(PrettyPrinter):
def __init__(self):
super().__init__("vector_printer")
def __call__(self, val):
ret = VectorPrinter(val).check_type()
print(f"ret {ret}, type {val.type}, {[(i.name, i.type) for i in val.type.fields()]}")
return None
gdb.printing.register_pretty_printer(
gdb.current_objfile(),
VectorPrinterLocator()
)
# 双重注册保证兼容性
append_printer() # 保留您原来有效的方式
# register_new_printer() # 添加新版注册
class VectorInfoCommand(gdb.Command):
"""保持原有命令不变"""
def __init__(self):
super().__init__("vector_info", gdb.COMMAND_USER)
def invoke(self, arg, from_tty):
val = gdb.parse_and_eval(arg)
printer = VectorPrinter(val)
if not printer.check_type():
print("Invalid vector")
return
print("=== Vector Details ===")
print("Size:", val['size'])
print("Capacity:", val['cap'])
print("Elements:")
for name, value in printer.children():
print(f" {name}: {value}")
VectorInfoCommand()

54
libcore/vector.h Normal file
View File

@ -0,0 +1,54 @@
// vector.h
#ifndef VECTOR_H
#define VECTOR_H
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#define vector_header(name, type) \
struct { \
size_t size; \
size_t cap; \
type *data; \
} name \
#define vector_init(vec) \
do { \
(vec).size = 0, \
(vec).cap = 0, \
(vec).data = NULL; \
} while(0)
#define vector_push(vec, value) \
do { \
if (vec.size >= vec.cap) { \
int cap = vec.cap ? vec.cap * 2 : 8; \
void* data = realloc(vec.data, cap * sizeof(*vec.data)); \
if (!data) { \
fprintf(stderr, "vector_push: realloc failed\n"); \
exit(1); \
} \
(vec).cap = cap; \
(vec).data = data; \
} \
(vec).data[(vec).size++] = value; \
} while(0)
#define vector_pop(vec) \
((vec).data[--(vec).size])
#define vector_at(vec, idx) \
(((vec).data)[idx])
#define vector_idx(vec, ptr) \
((ptr) - (vec).data)
#define vector_free(vec) \
do { \
free((vec).data); \
(vec).data = NULL; \
(vec).size = (vec).cap = 0; \
} while(0)
#endif