feat(parser): 启用parser和ast模块并重构解析器结构

- 在cbuild.toml中启用parser和ast依赖项
- 将AST内置类型枚举重命名为SCC_AST_BUILTIN_TYPE_*前缀格式
- 修复ast_def.h中的类型字段命名,将builtin改为type
- 添加逗号操作符支持到表达式操作符枚举中
- 更新字面量表达式的lexeme字段为const char*指针和owned标志
- 重构解析器头文件结构,分离为parser.h、parser_utils.h、scc_sema.h等
- 实现新的解析器工具函数,包括预览、消费、回溯等功能
- 更新声明解析逻辑,使用新的解析器接口进行token处理
- 添加符号表语义分析功能框架
- 修复词法分析器中token移动时的空指针检查
- 统一使用scc_tree_dump_printf替代直接的scc_printf调用
This commit is contained in:
zzy
2026-03-09 15:25:12 +08:00
parent a805814d3f
commit 1fceeca011
28 changed files with 2759 additions and 1987 deletions

View File

@@ -1,113 +0,0 @@
/**
* @file parser.h
*/
#ifndef __SCC_PARSER_H__
#define __SCC_PARSER_H__
#include "scc_ast.h"
#include <lexer.h>
/**
* @brief 解析器状态
*/
typedef struct scc_parser {
scc_lexer_stream_t *lex_stream; // 词法分析器
scc_sema_callbacks_t sema_callbacks; // 语义分析回调
scc_ast_translation_unit_t *translation_unit; // 翻译单元(根节点)
cbool has_error; // 是否有错误
} scc_parser_t;
/**
* @brief 检查当前 token 类型
*/
static inline cbool scc_parse_is(scc_lexer_stream_t *stream,
scc_tok_type_t type) {
const scc_lexer_tok_t *tok = scc_lexer_stream_current(stream);
return tok->type == type;
}
/**
* @brief 检查前瞻 token 类型
*/
static inline cbool scc_parse_peek_is(scc_lexer_stream_t *stream, usize n,
scc_tok_type_t type) {
const scc_lexer_tok_t *tok = scc_lexer_stream_peek(stream, n);
return tok->type == type;
}
/**
* @brief 如果当前 token 匹配则消费
*/
static inline cbool scc_parse_consume_if(scc_lexer_stream_t *stream,
scc_tok_type_t type) {
if (scc_parse_is(stream, type)) {
scc_lexer_stream_consume(stream);
return true;
}
return false;
}
/**
* @brief 消费当前 token 并返回它
*/
static inline const scc_lexer_tok_t *
scc_parse_consume(scc_lexer_stream_t *stream) {
const scc_lexer_tok_t *tok = scc_lexer_stream_current(stream);
scc_lexer_stream_consume(stream);
return tok;
}
/**
* @brief 初始化解析器
* @param parser 解析器实例
* @param lexer 词法分析器实例
* @param callbacks 语义分析回调(可为 null)
*/
void scc_parser_init(scc_parser_t *parser, scc_lexer_stream_t *lexer,
scc_sema_callbacks_t *callbacks);
/**
* @brief 销毁解析器
* @param parser 解析器实例
*/
void scc_parser_drop(scc_parser_t *parser);
/**
* @brief 解析整个翻译单元
* @param parser 解析器实例
* @return 翻译单元 AST 节点
*/
scc_ast_translation_unit_t *scc_parse_translation_unit(scc_parser_t *parser);
/**
* @brief 解析声明
* @param parser 解析器实例
* @return 声明 AST 节点
*/
scc_ast_decl_t *scc_parse_declaration(scc_parser_t *parser);
/**
* @brief 解析语句
* @param parser 解析器实例
* @return 语句 AST 节点
*/
scc_ast_stmt_t *scc_parse_statement(scc_parser_t *parser);
/**
* @brief 解析表达式
* @param parser 解析器实例
* @return 表达式 AST 节点
*/
scc_ast_expr_t *scc_parse_expression(scc_parser_t *parser);
/**
* @brief 解析类型
* @param parser 解析器实例
* @return 类型 AST 节点
*/
scc_ast_type_t *scc_parse_type(scc_parser_t *parser);
cbool scc_parse_is_declaration_start(scc_parser_t *parser, usize offset);
#endif /* __SCC_PARSER_H__ */

View File

@@ -0,0 +1,69 @@
#ifndef __SCC_PARSER_UTILS_H__
#define __SCC_PARSER_UTILS_H__
#include "scc_parser.h"
static inline const scc_lexer_tok_t *scc_parser_peek(scc_parser_t *parser) {
cbool ok = false;
const scc_lexer_tok_t *tok = null;
scc_ring_unsafe_peek_ref(*parser->ring, tok, ok);
if (ok == false) {
return null;
}
return tok;
}
static inline const scc_lexer_tok_t *scc_parser_next(scc_parser_t *parser) {
cbool ok = false;
const scc_lexer_tok_t *tok = null;
scc_ring_unsafe_next_ref(*parser->ring, tok, ok);
if (ok == false) {
return null;
}
return tok;
}
static inline cbool scc_parser_consume_if(scc_parser_t *parser,
scc_tok_type_t type) {
cbool ok = false;
scc_lexer_tok_t *tok = null;
scc_ring_unsafe_peek_ref(*parser->ring, tok, ok);
if (ok == false) {
return null;
}
if (tok->type == type) {
scc_lexer_tok_drop(tok);
scc_ring_unsafe_pure_next_consume(*parser->ring);
return true;
} else {
return false;
}
}
static inline void scc_parser_store(scc_parser_t *parser) {
parser->checkpoint = _scc_ring_probe(*parser->ring);
}
static inline void scc_parser_restore(scc_parser_t *parser) {
_scc_ring_probe(*parser->ring) = parser->checkpoint;
}
static inline cbool scc_parser_next_consume(scc_parser_t *parser,
scc_lexer_tok_t *tok) {
cbool ok = false;
scc_lexer_tok_t *raw_tok_ref = null;
scc_ring_unsafe_next_ref_consume(*parser->ring, raw_tok_ref, ok);
scc_lexer_tok_move(tok, raw_tok_ref);
return ok;
}
static inline void scc_parser_commit(scc_parser_t *parser) {
// Memory leak
scc_ring_consume(*parser->ring);
}
static inline void scc_parser_reset(scc_parser_t *parser) {
scc_ring_reset(*parser->ring);
}
#endif /* __SCC_PARSER_UTILS_H__ */

View File

@@ -0,0 +1,75 @@
#ifndef __SCC_PARSER_H__
#define __SCC_PARSER_H__
#include "scc_sema.h"
#include <scc_ast.h>
#include <scc_core_ring.h>
#include <scc_lexer_token.h>
/**
* @brief 解析器状态
*/
typedef struct scc_parser {
scc_lexer_tok_ring_t *ring;
usize checkpoint;
scc_sema_callbacks_t sema_callbacks;
scc_ast_translation_unit_t *translation_unit;
int errcode;
} scc_parser_t;
/**
* @brief 初始化解析器
* @param parser 解析器实例
* @param lexer 词法分析器实例
* @param callbacks 语义分析回调(可为 null)
*/
void scc_parser_init(scc_parser_t *parser, scc_lexer_tok_ring_t *tok_ring,
scc_sema_callbacks_t *callbacks);
/**
* @brief 销毁解析器
* @param parser 解析器实例
*/
void scc_parser_drop(scc_parser_t *parser);
/**
* @brief 解析整个翻译单元
* @param parser 解析器实例
* @return 翻译单元 AST 节点
*/
scc_ast_translation_unit_t *scc_parse_translation_unit(scc_parser_t *parser);
/**
* @brief 解析声明
* @param parser 解析器实例
* @return 声明 AST 节点
*/
scc_ast_decl_t *scc_parse_declaration(scc_parser_t *parser);
/**
* @brief 解析语句
* @param parser 解析器实例
* @return 语句 AST 节点
*/
scc_ast_stmt_t *scc_parse_statement(scc_parser_t *parser);
/**
* @brief 解析表达式
* @param parser 解析器实例
* @return 表达式 AST 节点
*/
scc_ast_expr_t *scc_parse_expression(scc_parser_t *parser);
/**
* @brief 解析类型
* @param parser 解析器实例
* @return 类型 AST 节点
*/
scc_ast_type_t *scc_parse_type(scc_parser_t *parser);
static inline scc_ast_type_t *scc_parse_type_name(scc_parser_t *parser) {
return null; // TODO
}
#endif /* __SCC_PARSER_H__ */

View File

@@ -0,0 +1,25 @@
#ifndef __SCC_SEMA_H__
#define __SCC_SEMA_H__
#include <scc_ast.h>
/**
* @brief 语义分析回调函数类型
*/
typedef void (*scc_sema_callback_t)(void *context,
scc_ast_node_type_t node_type, void *node);
/**
* @brief 语义分析回调集合
*/
typedef struct scc_sema_callbacks {
scc_sema_callback_t on_decl;
scc_sema_callback_t on_stmt;
scc_sema_callback_t on_expr;
scc_sema_callback_t on_type;
void *context;
} scc_sema_callbacks_t;
void scc_sema_init(scc_sema_callbacks_t *callbacks);
#endif /* __SCC_SEMA_H__ */

View File

@@ -0,0 +1,28 @@
#ifndef __SCC_SEMA_SYMTAB_H__
#define __SCC_SEMA_SYMTAB_H__
#include <scc_ast.h>
#include <scc_utils.h>
typedef struct scc_parser_scope {
scc_hashtable_t symbols;
struct scc_parser_scope *parent;
} scc_sema_scope_t;
typedef struct {
scc_sema_scope_t root_scope;
scc_sema_scope_t *current_scope;
} scc_sema_symtab_t;
void scc_sema_symtab_init(scc_sema_symtab_t *symtab);
void scc_sema_symtab_drop(scc_sema_symtab_t *symtab);
void scc_sema_symtab_enter_scope(scc_sema_symtab_t *symtab);
void scc_sema_symtab_leave_scope(scc_sema_symtab_t *symtab);
scc_ast_node_t *scc_sema_symtab_add_symbol(scc_sema_symtab_t *symtab,
const char *name,
scc_ast_node_t *ast_node_ref);
scc_ast_node_t *scc_sema_symtab_lookup_symbol(scc_sema_symtab_t *symtab,
const char *name);
#endif /* __SCC_SEMA_SYMTAB_H__ */

View File

@@ -1,156 +1,159 @@
#include <parser.h>
/*
A.2.2 Declarations
(6.7) declaration:
declaration-specifiers init-declarator-list(opt) ;
(6.7) declaration-specifiers:
storage-class-specifier declaration-specifiers(opt)
type-specifier declaration-specifiers(opt)
type-qualifier declaration-specifiers(opt)
function-specifier declaration-specifiers(opt)
(6.7) init-declarator-list:
init-declarator
init-declarator-list , init-declarator
(6.7) init-declarator:
declarator
declarator = initializer
(6.7.1) storage-class-specifier:
typedef
extern
static
auto
register
(6.7.2) type-specifier:
void
char
short
int
long
float
double
signed
unsigned
_Bool
_Complex
struct-or-union-specifier
enum-specifier
typedef-name
(6.7.2.1) struct-or-union-specifier:
struct-or-union identifier(opt) { struct-declaration-list }
struct-or-union identifier
(6.7.2.1) struct-or-union:
struct
union
(6.7.2.1) struct-declaration-list:
struct-declaration
struct-declaration-list struct-declaration
(6.7.2.1) struct-declaration:
specifier-qualifier-list struct-declarator-list ;
(6.7.2.1) specifier-qualifier-list:
type-specifier specifier-qualifier-list(opt)
type-qualifier specifier-qualifier-list(opt)
(6.7.2.1) struct-declarator-list:
struct-declarator
struct-declarator-list , struct-declarator
(6.7.2.1) struct-declarator:
declarator
declarator(opt) : constant-expression
(6.7.2.2) enum-specifier:
enum identifier(opt) { enumerator-list }
enum identifier(opt) { enumerator-list ,}
enum identifier
(6.7.2.2) enumerator-list:
enumerator
enumerator-list , enumerator
(6.7.2.2) enumerator:
enumeration-constant
enumeration-constant = constant-expression
(6.7.3) type-qualifier:
const
restrict
volatile
(6.7.4) function-specifier:
inline
(6.7.5) declarator:
pointer(opt) direct-declarator
(6.7.5) direct-declarator:
identifier
( declarator )
direct-declarator [ type-qualifier-list(opt)
assignment-expression(opt) ]
direct-declarator [ static type-qualifier-list(opt)
assignment-expression ]
direct-declarator [ type-qualifier-list static
assignment-expression ]
direct-declarator [ type-qualifier-list(opt) *]
direct-declarator ( parameter-type-list )
direct-declarator ( identifier-list(opt) )
(6.7.5) pointer:
* type-qualifier-list(opt)
* type-qualifier-list(opt) pointer
(6.7.5) type-qualifier-list:
type-qualifier
type-qualifier-list type-qualifier
(6.7.5) parameter-type-list:
parameter-list
parameter-list , ...
(6.7.5) parameter-list:
parameter-declaration
parameter-list , parameter-declaration
(6.7.5) parameter-declaration:
declaration-specifiers declarator
declaration-specifiers abstract-declarator(opt)
(6.7.5) identifier-list:
identifier
identifier-list , identifier
(6.7.6) type-name:
specifier-qualifier-list abstract-declarator(opt)
(6.7.6) abstract-declarator:
pointer
pointer(opt) direct-abstract-declarator
(6.7.6) direct-abstract-declarator:
( abstract-declarator )
direct-abstract-declarator(opt) [ type-qualifier-list (opt)
assignment-expression(opt) ]
direct-abstract-declarator(opt) [static type-qualifier-list(opt)
assignment-expression ]
direct-abstract-declaratoropt [ type-qualifier-list static
assignment-expression ]
direct-abstract-declarator(opt) [ * ]
direct-abstract-declarator(opt) ( parameter-type-list(opt) )
(6.7.7) typedef-name:
identifier
(6.7.8) initializer:
assignment-expression
{ initializer-list }
{ initializer-list , }
(6.7.8) initializer-list:
designation(opt) initializer
initializer-list , designation(opt) initializer
(6.7.8) designation:
designator-list =
(6.7.8) designator-list:
designator
designator-list designator
(6.7.8) designator:
[ constant-expression ]
. identifier
(6.7) declaration:
declaration-specifiers init-declarator-list(opt) ;
(6.7) declaration-specifiers:
storage-class-specifier declaration-specifiers(opt)
type-specifier declaration-specifiers(opt)
type-qualifier declaration-specifiers(opt)
function-specifier declaration-specifiers(opt)
(6.7) init-declarator-list:
init-declarator
init-declarator-list , init-declarator
(6.7) init-declarator:
declarator
declarator = initializer
(6.7.1) storage-class-specifier:
typedef
extern
static
auto
register
(6.7.2) type-specifier:
void
char
short
int
long
float
double
signed
unsigned
_Bool
_Complex
struct-or-union-specifier
enum-specifier
typedef-name
(6.7.2.1) struct-or-union-specifier:
struct-or-union identifier(opt) { struct-declaration-list }
struct-or-union identifier
(6.7.2.1) struct-or-union:
struct
union
(6.7.2.1) struct-declaration-list:
struct-declaration
struct-declaration-list struct-declaration
(6.7.2.1) struct-declaration:
specifier-qualifier-list struct-declarator-list ;
(6.7.2.1) specifier-qualifier-list:
type-specifier specifier-qualifier-list(opt)
type-qualifier specifier-qualifier-list(opt)
(6.7.2.1) struct-declarator-list:
struct-declarator
struct-declarator-list , struct-declarator
(6.7.2.1) struct-declarator:
declarator
declarator(opt) : constant-expression
(6.7.2.2) enum-specifier:
enum identifier(opt) { enumerator-list }
enum identifier(opt) { enumerator-list ,}
enum identifier
(6.7.2.2) enumerator-list:
enumerator
enumerator-list , enumerator
(6.7.2.2) enumerator:
enumeration-constant
enumeration-constant = constant-expression
(6.7.3) type-qualifier:
const
restrict
volatile
(6.7.4) function-specifier:
inline
(6.7.5) declarator:
pointer(opt) direct-declarator
(6.7.5) direct-declarator:
identifier
( declarator )
direct-declarator [ type-qualifier-list(opt)
assignment-expression(opt) ]
direct-declarator [ static type-qualifier-list(opt)
assignment-expression ]
direct-declarator [ type-qualifier-list static
assignment-expression ]
direct-declarator [ type-qualifier-list(opt) *]
direct-declarator ( parameter-type-list )
direct-declarator ( identifier-list(opt) )
(6.7.5) pointer:
* type-qualifier-list(opt)
* type-qualifier-list(opt) pointer
(6.7.5) type-qualifier-list:
type-qualifier
type-qualifier-list type-qualifier
(6.7.5) parameter-type-list:
parameter-list
parameter-list , ...
(6.7.5) parameter-list:
parameter-declaration
parameter-list , parameter-declaration
(6.7.5) parameter-declaration:
declaration-specifiers declarator
declaration-specifiers abstract-declarator(opt)
(6.7.5) identifier-list:
identifier
identifier-list , identifier
(6.7.6) type-name:
specifier-qualifier-list abstract-declarator(opt)
(6.7.6) abstract-declarator:
pointer
pointer(opt) direct-abstract-declarator
(6.7.6) direct-abstract-declarator:
( abstract-declarator )
direct-abstract-declarator(opt) [ type-qualifier-list (opt)
assignment-expression(opt) ]
direct-abstract-declarator(opt) [static type-qualifier-list(opt)
assignment-expression ]
direct-abstract-declaratoropt [ type-qualifier-list static
assignment-expression ]
direct-abstract-declarator(opt) [ * ]
direct-abstract-declarator(opt) ( parameter-type-list(opt) )
(6.7.7) typedef-name:
identifier
(6.7.8) initializer:
assignment-expression
{ initializer-list }
{ initializer-list , }
(6.7.8) initializer-list:
designation(opt) initializer
initializer-list , designation(opt) initializer
(6.7.8) designation:
designator-list =
(6.7.8) designator-list:
designator
designator-list designator
(6.7.8) designator:
[ constant-expression ]
. identifier
A.2.4 External definitions
(6.9) translation-unit:
external-declaration
translation-unit external-declaration
(6.9) external-declaration:
function-definition
declaration
(6.9.1) function-definition:
declaration-specifiers declarator declaration-list(opt)
compound-statement
(6.9.1) declaration-list:
declaration
declaration-list declaration
(6.9) translation-unit:
external-declaration
translation-unit external-declaration
(6.9) external-declaration:
function-definition
declaration
(6.9.1) function-definition:
declaration-specifiers declarator declaration-listopt compound-statement
(6.9.1) declaration-list:
declaration
declaration-list declaration
*/
#include <parser_utils.h>
#include <scc_parser.h>
scc_ast_decl_t *scc_parse_declaration(scc_parser_t *parser) {
/**
* ISO/IEC 9899:TC3
@@ -171,22 +174,36 @@ scc_ast_decl_t *scc_parse_declaration(scc_parser_t *parser) {
* declarator
* declarator = initializer
*/
if (!scc_parse_is_declaration_start(parser, 0)) {
cbool ok;
const scc_lexer_tok_t *tok_ptr = scc_parser_next(parser);
scc_lexer_tok_t tok;
if (tok_ptr == null) {
return null;
}
scc_ast_type_t *type = scc_parse_type(parser);
scc_ast_type_t *type = scc_malloc(sizeof(scc_ast_type_t));
if (type == null) {
LOG_ERROR("Failed to parse type");
LOG_FATAL("out of memory");
return null;
}
const scc_lexer_tok_t *tok = scc_lexer_stream_current(parser->lex_stream);
if (!scc_lexer_tok_match(tok, SCC_TOK_IDENT)) {
LOG_ERROR("Expected identifier, got %s", scc_get_tok_name(tok->type));
if (tok_ptr->type != SCC_TOK_INT) {
// TODO back it
scc_parser_reset(parser);
return null;
} else {
type->base.type = SCC_AST_TYPE_BUILTIN;
type->base.loc = tok_ptr->loc;
type->builtin.type = SCC_AST_BUILTIN_TYPE_INT;
type->builtin.quals = (scc_ast_decl_specifier_t){0};
}
scc_parser_commit(parser);
ok = scc_parser_next_consume(parser, &tok);
if (ok == false) {
return null;
}
scc_lexer_stream_consume(parser->lex_stream);
scc_ast_decl_t *decl = scc_malloc(sizeof(scc_ast_decl_t));
/*
@@ -202,27 +219,28 @@ scc_ast_decl_t *scc_parse_declaration(scc_parser_t *parser) {
direct-declarator ( parameter-type-list )
direct-declarator ( identifier-listopt )
*/
if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_L_PAREN)) {
if (!scc_parser_consume_if(parser, SCC_TOK_L_PAREN)) {
// TODO
if (scc_parse_consume_if(parser->lex_stream, SCC_TOK_SEMICOLON)) {
if (scc_parser_consume_if(parser, SCC_TOK_SEMICOLON)) {
decl->base.type = SCC_AST_DECL_VAR;
decl->var.type = type;
decl->var.name = tok->value.cstr.data;
decl->var.name = scc_cstring_as_cstr(&tok.lexeme);
decl->var.init = null;
return decl;
} else if (scc_parse_consume_if(parser->lex_stream, SCC_TOK_ASSIGN)) {
goto RETURN;
} else if (scc_parser_consume_if(parser, SCC_TOK_ASSIGN)) {
decl->base.type = SCC_AST_DECL_VAR;
decl->var.type = type;
decl->var.name = tok->value.cstr.data;
decl->var.init = scc_parse_expression(parser);
return decl;
decl->var.name = scc_cstring_as_cstr(&tok.lexeme);
decl->var.init = null; // scc_parse_expression(parser);
goto RETURN;
}
// TODO
return null;
}
// function decl
decl->base.type = SCC_AST_DECL_FUNC;
decl->func.name = tok->value.cstr.data;
decl->func.name = scc_cstring_as_cstr(&tok.lexeme);
decl->func.type = scc_malloc(sizeof(scc_ast_type_t));
decl->func.type->base.type = SCC_AST_TYPE_FUNCTION;
scc_vec_init(decl->func.type->function.param_types);
@@ -231,20 +249,32 @@ scc_ast_decl_t *scc_parse_declaration(scc_parser_t *parser) {
decl->func.type->function.is_variadic = false;
// TODO param type
scc_parse_consume_if(parser->lex_stream, SCC_TOK_VOID);
scc_parser_consume_if(parser, SCC_TOK_VOID);
if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_R_PAREN)) {
if (!scc_parser_consume_if(parser, SCC_TOK_R_PAREN)) {
return null;
}
if (!scc_parse_is(parser->lex_stream, SCC_TOK_L_BRACE)) {
tok_ptr = scc_parser_peek(parser);
if (tok_ptr == null) {
return null;
}
if (tok_ptr->type != SCC_TOK_L_BRACE) {
if (tok_ptr->type == SCC_TOK_SEMICOLON) {
decl->func.body = null;
} else {
return null;
}
}
decl->func.body = scc_parse_statement(parser);
Assert(decl->func.type != null);
Assert(decl->func.type->base.type == SCC_AST_TYPE_FUNCTION);
Assert(decl->func.body != null);
Assert(decl->func.body->base.type == SCC_AST_STMT_COMPOUND);
RETURN:
parser->sema_callbacks.on_decl(parser->sema_callbacks.context,
decl->base.type, decl);
return decl;
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,10 +1,6 @@
/*
415
ISO/IEC 9899:TC3
Committee Draft — Septermber 7, 2007
WG14/N1256
A.2.3 Statements
(6.8)
statement:
labeled-statement
@@ -50,37 +46,37 @@ A.2.3 Statements
break ;
return expression(opt) ;
*/
#include <parser.h>
#include <parser_utils.h>
#include <scc_parser.h>
static inline scc_ast_stmt_t *ast_stmt_alloc() {
scc_ast_stmt_t *stmt = (scc_ast_stmt_t *)scc_malloc(sizeof(scc_ast_stmt_t));
Assert(stmt != null);
stmt->base.type = SCC_AST_TRANSLATION_UNIT;
stmt->base.type = SCC_AST_UNKNOWN;
stmt->base.loc = scc_pos_create();
return stmt;
}
static inline scc_ast_expr_t *ast_parse_paren_expression(scc_parser_t *parser) {
if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_L_PAREN)) {
if (!scc_parser_consume_if(parser, SCC_TOK_L_PAREN)) {
LOG_ERROR("Expected '(' before like `( expression )` .");
}
scc_ast_expr_t *ret = scc_parse_expression(parser);
if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_R_PAREN)) {
LOG_ERROR("Expected ')' after like `( expression )` .");
if (!scc_parser_consume_if(parser, SCC_TOK_R_PAREN)) {
LOG_ERROR("Expected ')' before like `( expression )` .");
}
return ret;
}
static scc_ast_stmt_t *parse_label_statement(scc_parser_t *parser) {
const scc_lexer_tok_t *tok = scc_lexer_stream_current(parser->lex_stream);
if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_IDENT)) {
scc_lexer_tok_t tok = {0};
if (!scc_parser_next_consume(parser, &tok)) {
return null;
}
if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_COLON)) {
LOG_ERROR("Expected constant expression after case.");
if (!scc_parser_consume_if(parser, SCC_TOK_COLON)) {
return null;
}
@@ -93,21 +89,20 @@ static scc_ast_stmt_t *parse_label_statement(scc_parser_t *parser) {
Assert(stmt != null);
stmt->base.type = SCC_AST_STMT_LABEL;
// TODO maybe use cstring
stmt->label_stmt.label = tok->value.cstr.data;
stmt->label_stmt.label = scc_cstring_as_cstr(&tok.lexeme);
stmt->label_stmt.stmt = statement;
return stmt;
}
static scc_ast_stmt_t *parse_case_statement(scc_parser_t *parser) {
if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_CASE)) {
if (!scc_parser_consume_if(parser, SCC_TOK_CASE)) {
return null;
}
scc_ast_expr_t *expr = null;
// TODO = scc_parser_constant_expression();
if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_COLON)) {
if (!scc_parser_consume_if(parser, SCC_TOK_COLON)) {
LOG_ERROR("Expected constant expression after case.");
return null;
}
@@ -126,11 +121,11 @@ static scc_ast_stmt_t *parse_case_statement(scc_parser_t *parser) {
}
static scc_ast_stmt_t *parse_default_statement(scc_parser_t *parser) {
if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_DEFAULT)) {
if (!scc_parser_consume_if(parser, SCC_TOK_DEFAULT)) {
return null;
}
if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_COLON)) {
if (!scc_parser_consume_if(parser, SCC_TOK_COLON)) {
LOG_ERROR("Expected constant expression after case.");
return null;
}
@@ -148,14 +143,14 @@ static scc_ast_stmt_t *parse_default_statement(scc_parser_t *parser) {
}
static scc_ast_stmt_t *parse_compound_statement(scc_parser_t *parser) {
if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_L_BRACE)) {
if (!scc_parser_consume_if(parser, SCC_TOK_L_BRACE)) {
return null;
}
scc_ast_stmt_t *stmt = ast_stmt_alloc();
stmt->base.type = SCC_AST_STMT_COMPOUND;
scc_vec_init(stmt->compound.block_items);
while (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_R_BRACE)) {
while (!scc_parser_consume_if(parser, SCC_TOK_R_BRACE)) {
/// TODO
// scc_parse_is_decl();
scc_ast_node_t *ret = null;
@@ -175,7 +170,7 @@ static scc_ast_stmt_t *parse_compound_statement(scc_parser_t *parser) {
}
static scc_ast_stmt_t *parse_if_statement(scc_parser_t *parser) {
if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_IF)) {
if (!scc_parser_consume_if(parser, SCC_TOK_IF)) {
return null;
}
@@ -186,7 +181,7 @@ static scc_ast_stmt_t *parse_if_statement(scc_parser_t *parser) {
stmt->base.type = SCC_AST_STMT_IF;
stmt->if_stmt.cond = expression;
stmt->if_stmt.then_stmt = statement;
if (scc_parse_consume_if(parser->lex_stream, SCC_TOK_ELSE)) {
if (scc_parser_consume_if(parser, SCC_TOK_ELSE)) {
stmt->if_stmt.opt_else_stmt = scc_parse_statement(parser);
} else {
stmt->if_stmt.opt_else_stmt = null;
@@ -195,7 +190,7 @@ static scc_ast_stmt_t *parse_if_statement(scc_parser_t *parser) {
}
static scc_ast_stmt_t *parse_switch_statement(scc_parser_t *parser) {
if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_SWITCH)) {
if (!scc_parser_consume_if(parser, SCC_TOK_SWITCH)) {
return null;
}
@@ -210,7 +205,7 @@ static scc_ast_stmt_t *parse_switch_statement(scc_parser_t *parser) {
}
static scc_ast_stmt_t *parse_while_statement(scc_parser_t *parser) {
if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_WHILE)) {
if (!scc_parser_consume_if(parser, SCC_TOK_WHILE)) {
return null;
}
@@ -225,13 +220,13 @@ static scc_ast_stmt_t *parse_while_statement(scc_parser_t *parser) {
}
static scc_ast_stmt_t *parse_do_while_statement(scc_parser_t *parser) {
if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_DO)) {
if (!scc_parser_consume_if(parser, SCC_TOK_DO)) {
return null;
}
scc_ast_stmt_t *statement = scc_parse_statement(parser);
if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_WHILE)) {
if (!scc_parser_consume_if(parser, SCC_TOK_WHILE)) {
LOG_ERROR("Expected 'while' after do.");
// TODO 使用更好的错误处理,未来应当采用更好的内存管理器
scc_free(statement);
@@ -247,7 +242,7 @@ static scc_ast_stmt_t *parse_do_while_statement(scc_parser_t *parser) {
}
static scc_ast_stmt_t *parse_for_statement(scc_parser_t *parser) {
if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_FOR)) {
if (!scc_parser_consume_if(parser, SCC_TOK_FOR)) {
return null;
}
@@ -256,7 +251,7 @@ static scc_ast_stmt_t *parse_for_statement(scc_parser_t *parser) {
for ( declaration expression(opt) ; expression(opt) ) statement
*/
if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_L_PAREN)) {
if (!scc_parser_consume_if(parser, SCC_TOK_L_PAREN)) {
LOG_ERROR("Expected '(' before like `( expression )` .");
}
@@ -266,19 +261,19 @@ static scc_ast_stmt_t *parse_for_statement(scc_parser_t *parser) {
// TODO use decl or expr
stmt->for_stmt.init = (scc_ast_type_t *)scc_parse_expression(parser);
if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_SEMICOLON)) {
if (!scc_parser_consume_if(parser, SCC_TOK_SEMICOLON)) {
LOG_ERROR("Expected semicolon in for statement.");
}
stmt->for_stmt.cond = scc_parse_expression(parser);
if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_SEMICOLON)) {
if (!scc_parser_consume_if(parser, SCC_TOK_SEMICOLON)) {
LOG_ERROR("Expected semicolon in for statement.");
}
stmt->for_stmt.iter = scc_parse_expression(parser);
if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_R_PAREN)) {
if (!scc_parser_consume_if(parser, SCC_TOK_R_PAREN)) {
LOG_ERROR("Expected ')' after like `( expression )` .");
}
@@ -290,28 +285,26 @@ static scc_ast_stmt_t *parse_for_statement(scc_parser_t *parser) {
static scc_ast_stmt_t *parse_jump_statement(scc_parser_t *parser) {
scc_ast_stmt_t *stmt = ast_stmt_alloc();
if (scc_parse_consume_if(parser->lex_stream, SCC_TOK_GOTO)) {
if (scc_parser_consume_if(parser, SCC_TOK_GOTO)) {
stmt->base.type = SCC_AST_STMT_GOTO;
if (scc_parse_is(parser->lex_stream, SCC_TOK_IDENT)) {
const scc_lexer_tok_t *tok =
scc_lexer_stream_current(parser->lex_stream);
stmt->goto_stmt.label = tok->value.cstr.data;
scc_lexer_stream_consume(parser->lex_stream);
scc_lexer_tok_t tok = {0};
if (scc_parser_next_consume(parser, &tok)) {
stmt->goto_stmt.label = scc_cstring_as_cstr(&tok.lexeme);
} else {
LOG_ERROR("Expected label after goto.");
}
} else if (scc_parse_consume_if(parser->lex_stream, SCC_TOK_CONTINUE)) {
} else if (scc_parser_consume_if(parser, SCC_TOK_CONTINUE)) {
stmt->base.type = SCC_AST_STMT_CONTINUE;
} else if (scc_parse_consume_if(parser->lex_stream, SCC_TOK_BREAK)) {
} else if (scc_parser_consume_if(parser, SCC_TOK_BREAK)) {
stmt->base.type = SCC_AST_STMT_BREAK;
} else if (scc_parse_consume_if(parser->lex_stream, SCC_TOK_RETURN)) {
} else if (scc_parser_consume_if(parser, SCC_TOK_RETURN)) {
stmt->base.type = SCC_AST_STMT_RETURN;
stmt->return_stmt.expr = scc_parse_expression(parser);
} else {
UNREACHABLE();
}
if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_SEMICOLON)) {
if (!scc_parser_consume_if(parser, SCC_TOK_SEMICOLON)) {
LOG_ERROR("Expected semicolon after jump statement.");
}
return stmt;
@@ -321,7 +314,7 @@ static scc_ast_stmt_t *parse_expression_statement(scc_parser_t *parser) {
scc_ast_stmt_t *stmt = ast_stmt_alloc();
stmt->base.type = SCC_AST_STMT_EXPR;
if (scc_parse_consume_if(parser->lex_stream, SCC_TOK_SEMICOLON)) {
if (scc_parser_consume_if(parser, SCC_TOK_SEMICOLON)) {
stmt->expr.expr = null;
return stmt;
}
@@ -333,15 +326,20 @@ static scc_ast_stmt_t *parse_expression_statement(scc_parser_t *parser) {
return null;
}
if (!scc_parse_consume_if(parser->lex_stream, SCC_TOK_SEMICOLON)) {
if (!scc_parser_consume_if(parser, SCC_TOK_SEMICOLON)) {
LOG_ERROR("Expected semicolon after expression.");
}
return stmt;
}
scc_ast_stmt_t *scc_parse_statement(scc_parser_t *parser) {
const scc_lexer_tok_t *tok = scc_lexer_stream_current(parser->lex_stream);
switch (tok->type) {
scc_ast_stmt_t *stmt;
const scc_lexer_tok_t *tok_ref;
tok_ref = scc_parser_peek(parser);
if (!tok_ref) {
return null;
}
switch (tok_ref->type) {
/*
(6.8.1)
labeled-statement:
@@ -350,15 +348,19 @@ scc_ast_stmt_t *scc_parse_statement(scc_parser_t *parser) {
default : statement
*/
case SCC_TOK_IDENT:
// 注意需要检测下一个 token 是否为冒号,否则将需要判定成表达式语句
if (!scc_parse_peek_is(parser->lex_stream, 1, SCC_TOK_COLON)) {
tok_ref = scc_parser_next(parser);
if (tok_ref == null || tok_ref->type != SCC_TOK_COLON) {
break;
}
return parse_label_statement(parser);
case SCC_TOK_CASE:
return parse_case_statement(parser);
stmt = parse_label_statement(parser);
goto RETURN;
case SCC_TOK_CASE: {
stmt = parse_case_statement(parser);
goto RETURN;
}
case SCC_TOK_DEFAULT:
return parse_default_statement(parser);
stmt = parse_default_statement(parser);
goto RETURN;
/*
(6.8.2)
compound-statement:
@@ -373,7 +375,8 @@ scc_ast_stmt_t *scc_parse_statement(scc_parser_t *parser) {
statement
*/
case SCC_TOK_L_BRACE:
return parse_compound_statement(parser);
stmt = parse_compound_statement(parser);
goto RETURN;
/*
(6.8.4)
selection-statement:
@@ -382,9 +385,11 @@ scc_ast_stmt_t *scc_parse_statement(scc_parser_t *parser) {
switch ( expression ) statement
*/
case SCC_TOK_IF:
return parse_if_statement(parser);
stmt = parse_if_statement(parser);
goto RETURN;
case SCC_TOK_SWITCH:
return parse_switch_statement(parser);
stmt = parse_switch_statement(parser);
goto RETURN;
/*
(6.8.5)
iteration-statement:
@@ -396,11 +401,14 @@ scc_ast_stmt_t *scc_parse_statement(scc_parser_t *parser) {
statement
*/
case SCC_TOK_WHILE:
return parse_while_statement(parser);
stmt = parse_while_statement(parser);
goto RETURN;
case SCC_TOK_DO:
return parse_do_while_statement(parser);
stmt = parse_do_while_statement(parser);
goto RETURN;
case SCC_TOK_FOR:
return parse_for_statement(parser);
stmt = parse_for_statement(parser);
goto RETURN;
/*
(6.8.6)
jump-statement:
@@ -413,7 +421,8 @@ scc_ast_stmt_t *scc_parse_statement(scc_parser_t *parser) {
case SCC_TOK_CONTINUE:
case SCC_TOK_BREAK:
case SCC_TOK_RETURN:
return parse_jump_statement(parser);
stmt = parse_jump_statement(parser);
goto RETURN;
default:
break;
}
@@ -422,5 +431,10 @@ scc_ast_stmt_t *scc_parse_statement(scc_parser_t *parser) {
expression-statement:
expression(opt) ;
*/
return parse_expression_statement(parser);
stmt = parse_expression_statement(parser);
RETURN:
scc_parser_reset(parser);
parser->sema_callbacks.on_stmt(parser->sema_callbacks.context,
stmt->base.type, stmt);
return stmt;
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,11 +1,5 @@
/**
* @file parser.c
* @brief
*/
#include "parser.h"
#include <log.h>
#include <parser_utils.h>
#include <scc_parser.h>
static void dummy_sema_callback(void *context, scc_ast_node_type_t node_type,
void *node) {
(void)context;
@@ -17,11 +11,11 @@ static void dummy_sema_callback(void *context, scc_ast_node_type_t node_type,
#define ASSIGN_PTR_OR_DEFAULT(assigned_val, value, default) \
assigned_val = value ? value : default
void scc_parser_init(scc_parser_t *parser, scc_lexer_stream_t *lexer,
void scc_parser_init(scc_parser_t *parser, scc_lexer_tok_ring_t *tok_ring,
scc_sema_callbacks_t *callbacks) {
Assert(parser != null && lexer != null);
parser->lex_stream = lexer;
parser->has_error = false;
Assert(parser != null && tok_ring != null);
parser->ring = tok_ring;
parser->errcode = 0;
parser->translation_unit = null;
if (callbacks) {
ASSIGN_PTR_OR_DEFAULT(parser->sema_callbacks.on_decl,
@@ -38,15 +32,12 @@ void scc_parser_init(scc_parser_t *parser, scc_lexer_stream_t *lexer,
parser->sema_callbacks.on_stmt = dummy_sema_callback;
parser->sema_callbacks.on_expr = dummy_sema_callback;
parser->sema_callbacks.on_type = dummy_sema_callback;
parser->sema_callbacks.context = dummy_sema_callback;
parser->sema_callbacks.context = null;
}
// // ONLY FOR INIT TYPE
// parser->current_token.type = SCC_TOK_UNKNOWN;
}
void scc_parser_drop(scc_parser_t *parser) {
// TODO: 释放 AST 内存
(void)parser;
}
scc_ast_translation_unit_t *scc_parse_translation_unit(scc_parser_t *parser) {
@@ -62,7 +53,8 @@ scc_ast_translation_unit_t *scc_parse_translation_unit(scc_parser_t *parser) {
* same as
* Program := Declaration* Definition*
*/
do {
cbool matched = false;
while (1) {
scc_ast_decl_t *decl = scc_parse_declaration(parser);
if (decl != null) {
scc_vec_push(unit->declarations, decl);
@@ -70,11 +62,16 @@ scc_ast_translation_unit_t *scc_parse_translation_unit(scc_parser_t *parser) {
break;
// MAYBE return or next
}
} while (!scc_lexer_tok_match(scc_lexer_stream_current(parser->lex_stream),
SCC_TOK_EOF) &&
!parser->has_error);
if (parser->errcode != 0) { // FIXME errcode
break;
}
const scc_lexer_tok_t *tok = scc_parser_peek(parser);
if (tok == null || tok->type == SCC_TOK_EOF) {
break;
}
}
if (parser->has_error) {
if (parser->errcode) {
// TODO: 清理
scc_free(unit);
return null;

View File

@@ -0,0 +1,15 @@
#include <scc_sema.h>
#include <sema_symtab.h>
void scc_sema_init(scc_sema_callbacks_t *callbacks) {
scc_sema_symtab_t *sema_symtab = scc_malloc(sizeof(scc_sema_symtab_t));
if (sema_symtab == null) {
LOG_FATAL("out of memory");
return;
}
callbacks->context = sema_symtab;
callbacks->on_decl = null;
callbacks->on_expr = null;
callbacks->on_stmt = null;
callbacks->on_type = null;
}

View File

@@ -0,0 +1,58 @@
#include <sema_symtab.h>
void scc_sema_symtab_init(scc_sema_symtab_t *symtab) {
symtab->root_scope.parent = null;
scc_hashtable_init(&symtab->root_scope.symbols,
(scc_hashtable_hash_func_t)scc_strcmp,
(scc_hashtable_equal_func_t)scc_strhash32);
symtab->current_scope = &symtab->root_scope;
}
void scc_sema_symtab_drop(scc_sema_symtab_t *symtab) {
while (symtab->current_scope != null) {
scc_hashtable_drop(&symtab->current_scope->symbols);
symtab->current_scope = symtab->current_scope->parent;
}
}
void scc_sema_symtab_enter_scope(scc_sema_symtab_t *symtab) {
scc_sema_scope_t *scope = scc_malloc(sizeof(scc_sema_scope_t));
if (scope == null) {
LOG_FATAL("out of memory");
return;
}
scope->parent = symtab->current_scope;
scc_hashtable_init(&scope->symbols, (scc_hashtable_hash_func_t)scc_strcmp,
(scc_hashtable_equal_func_t)scc_strhash32);
symtab->current_scope = scope;
}
void scc_sema_symtab_leave_scope(scc_sema_symtab_t *symtab) {
if (symtab->current_scope == &symtab->root_scope) {
LOG_ERROR("out of scope");
return;
}
scc_hashtable_drop(&symtab->current_scope->symbols);
symtab->current_scope = symtab->current_scope->parent;
}
scc_ast_node_t *scc_sema_symtab_add_symbol(scc_sema_symtab_t *symtab,
const char *name,
scc_ast_node_t *ast_node_ref) {
return scc_hashtable_set(&symtab->current_scope->symbols, name,
ast_node_ref);
}
scc_ast_node_t *scc_sema_symtab_lookup_symbol(scc_sema_symtab_t *symtab,
const char *name) {
scc_ast_node_t *node = null;
for (scc_sema_scope_t *scope = symtab->current_scope; scope != null;
scope = scope->parent) {
node = scc_hashtable_get(&scope->symbols, name);
if (node != null) {
return node;
}
}
return null;
}

View File

@@ -0,0 +1,453 @@
#include <assert.h>
#include <scc_lexer.h>
#include <scc_parser.h>
#include <string.h>
#include <utest/acutest.h>
typedef scc_ast_node_t *(*scc_parse_node_func)(scc_parser_t *parser);
static scc_ast_node_t *process_input(const char *input,
scc_parse_node_func parse_func) {
int res = 0;
scc_sstream_t mem_stream;
res = scc_sstream_init_by_buffer(&mem_stream, input, strlen(input), false,
16);
Assert(res == 0);
scc_lexer_t lexer;
scc_lexer_init(&lexer, scc_sstream_to_ring(&mem_stream));
scc_lexer_tok_ring_t *tok_ring = scc_lexer_to_ring(&lexer, 8, false);
scc_parser_t parser;
scc_parser_init(&parser, tok_ring, null);
scc_ast_node_t *ret = parse_func(&parser);
cbool not_eof = false;
scc_ring_not_eof(*parser.ring, not_eof);
Assert(!not_eof == true);
scc_lexer_drop_ring(parser.ring);
scc_parser_drop(&parser);
scc_lexer_drop(&lexer);
scc_sstream_drop(&mem_stream);
return ret;
}
typedef void (*scc_tree_dump_output_t)(void *userdata, const char *fmt, ...);
#define BUFFER_SIZE (4096)
char expect_buffer[BUFFER_SIZE];
char output_buffer[BUFFER_SIZE];
static void dump2buffer(void *_buffer, const char *fmt, ...) {
char *buffer = _buffer;
va_list args;
va_start(args, fmt);
scc_vsnprintf(buffer + strlen(buffer), BUFFER_SIZE - strlen(buffer) - 1,
fmt, args);
va_end(args);
}
#define SCC_CHECK_AST(expect_node_ptr, str, parse_func) \
do { \
scc_ast_node_t *output_node_ptr = \
process_input(str, (scc_parse_node_func)parse_func); \
scc_tree_dump_ctx_t ctx; \
expect_buffer[0] = '\n', expect_buffer[1] = '\0'; \
scc_tree_dump_ctx_init(&ctx, true, dump2buffer, expect_buffer); \
scc_ast_dump_node(&ctx, expect_node_ptr); \
scc_tree_dump_ctx_drop(&ctx); \
output_buffer[0] = '\n', output_buffer[1] = '\0'; \
scc_tree_dump_ctx_init(&ctx, true, dump2buffer, output_buffer); \
scc_ast_dump_node(&ctx, output_node_ptr); \
scc_tree_dump_ctx_drop(&ctx); \
TEST_CHECK(strcmp(output_buffer, expect_buffer) == 0); \
TEST_MSG("Expected: %s", expect_buffer); \
TEST_MSG("Produced: %s", output_buffer); \
} while (0);
static void test_parser_unit(void) {
scc_ast_decl_t int_decl = {
.base.type = SCC_AST_DECL_VAR,
.var.name = "a",
.var.init = null,
.var.type = &(scc_ast_type_t){.base.type = SCC_AST_TYPE_BUILTIN,
.builtin.type = SCC_AST_BUILTIN_TYPE_INT},
};
SCC_CHECK_AST(&int_decl.base, "int a;", scc_parse_declaration);
scc_ast_decl_t func_decl = {
.base.type = SCC_AST_DECL_FUNC,
.func.name = "main",
.func.body =
&(scc_ast_stmt_t){
.base.type = SCC_AST_STMT_COMPOUND,
.compound.block_items = {0},
},
.func.type =
&(scc_ast_type_t){
.base.type = SCC_AST_TYPE_FUNCTION,
.function.is_variadic = false,
.function.param_types = {0},
.function.return_type =
&(scc_ast_type_t){.base.type = SCC_AST_TYPE_BUILTIN,
.builtin.type = SCC_AST_BUILTIN_TYPE_INT},
},
};
SCC_CHECK_AST(&func_decl.base, "int main(void) {}", scc_parse_declaration);
scc_ast_decl_t *decls[] = {&func_decl};
scc_ast_translation_unit_t tu = {
.base.type = SCC_AST_TRANSLATION_UNIT,
.declarations.data = decls,
.declarations.cap = 1,
.declarations.size = 1,
};
SCC_CHECK_AST(&tu.base, "int main(void) {}", scc_parse_translation_unit);
// SCC_CHECK_AST(&func_decl.base, "int main(void);", scc_parse_declaration);
{
scc_ast_node_t *items[] = {
(scc_ast_node_t *)&(scc_ast_stmt_t){
.base.type = SCC_AST_STMT_RETURN,
.return_stmt.expr =
&(scc_ast_expr_t){
.base.type = SCC_AST_EXPR_INT_LITERAL,
.literal.lexme = "0",
},
},
};
scc_ast_decl_t func_decl = {
.base.type = SCC_AST_DECL_FUNC,
.func.name = "main",
.func.body =
&(scc_ast_stmt_t){
.base.type = SCC_AST_STMT_COMPOUND,
.compound.block_items.cap = 1,
.compound.block_items.size = 1,
.compound.block_items.data = items,
},
.func.type =
&(scc_ast_type_t){
.base.type = SCC_AST_TYPE_FUNCTION,
.function.is_variadic = false,
.function.param_types = {0},
.function.return_type =
&(scc_ast_type_t){.base.type = SCC_AST_TYPE_BUILTIN,
.builtin.type =
SCC_AST_BUILTIN_TYPE_INT},
},
};
scc_ast_decl_t *decls[] = {&func_decl};
scc_ast_translation_unit_t tu = {
.base.type = SCC_AST_TRANSLATION_UNIT,
.declarations.cap = 1,
.declarations.size = 1,
.declarations.data = decls,
};
SCC_CHECK_AST(&tu.base, "int main(void) { return 0; }",
scc_parse_translation_unit);
}
}
static scc_ast_expr_t make_binary(scc_ast_expr_op_t op, scc_ast_expr_t *lhs,
scc_ast_expr_t *rhs) {
scc_ast_expr_t expr = {.base.type = SCC_AST_EXPR_BINARY};
expr.binary.op = op;
expr.binary.lhs = lhs;
expr.binary.rhs = rhs;
return expr;
}
static scc_ast_expr_t make_unary(scc_ast_expr_op_t op,
scc_ast_expr_t *operand) {
scc_ast_expr_t expr = {.base.type = SCC_AST_EXPR_UNARY};
expr.unary.op = op;
expr.unary.operand = operand;
return expr;
}
static scc_ast_expr_t make_identifier(char *name) {
scc_ast_expr_t expr = {.base.type = SCC_AST_EXPR_IDENTIFIER};
expr.identifier.name = name;
return expr;
}
static scc_ast_expr_t make_int_literal(char *val) {
scc_ast_expr_t expr = {.base.type = SCC_AST_EXPR_INT_LITERAL};
expr.literal.lexme = val;
return expr;
}
static scc_ast_expr_t make_float_literal(char *val) {
scc_ast_expr_t expr = {.base.type = SCC_AST_EXPR_FLOAT_LITERAL};
expr.literal.lexme = val;
return expr;
}
static scc_ast_expr_t make_string_literal(char *val) {
scc_ast_expr_t expr = {.base.type = SCC_AST_EXPR_STRING_LITERAL};
expr.literal.lexme = val;
return expr;
}
static scc_ast_expr_t make_char_literal(char *val) {
scc_ast_expr_t expr = {.base.type = SCC_AST_EXPR_CHAR_LITERAL};
expr.literal.lexme = val;
return expr;
}
static scc_ast_expr_t make_conditional(scc_ast_expr_t *cond,
scc_ast_expr_t *then_expr,
scc_ast_expr_t *else_expr) {
scc_ast_expr_t expr = {.base.type = SCC_AST_EXPR_COND};
expr.cond.cond = cond;
expr.cond.then_expr = then_expr;
expr.cond.else_expr = else_expr;
return expr;
}
static scc_ast_expr_t make_call(scc_ast_expr_t *callee,
scc_ast_expr_vec_t *args) {
scc_ast_expr_t expr = {.base.type = SCC_AST_EXPR_CALL};
expr.call.callee = callee;
// 注意args 需要提前初始化,此处简化处理,实际测试中可能需要动态分配
// 我们将在具体测试中手动初始化 args 数组
return expr;
}
static void test_parser_expression(void) {
// 1. 基本表达式:标识符、整数常量、字符串字面量、括号
{
scc_ast_expr_t ident = make_identifier("x");
SCC_CHECK_AST(&ident.base, "x", scc_parse_expression);
scc_ast_expr_t int_lit = make_int_literal("42");
SCC_CHECK_AST(&int_lit.base, "42", scc_parse_expression);
scc_ast_expr_t str_lit = make_string_literal("\"hello\"");
SCC_CHECK_AST(&str_lit.base, "\"hello\"", scc_parse_expression);
// 括号表达式
scc_ast_expr_t paren_ident = make_identifier("y");
SCC_CHECK_AST(&paren_ident.base, "(y)", scc_parse_expression);
}
// 2. 后缀表达式
{
// 数组下标a[10]
scc_ast_expr_t a = make_identifier("a");
scc_ast_expr_t index = make_int_literal("10");
scc_ast_expr_t subscript = {.base.type = SCC_AST_EXPR_ARRAY_SUBSCRIPT};
subscript.subscript.array = &a;
subscript.subscript.index = &index;
SCC_CHECK_AST(&subscript.base, "a[10]", scc_parse_expression);
// 函数调用f()
scc_ast_expr_t f = make_identifier("f");
scc_ast_expr_t call = {.base.type = SCC_AST_EXPR_CALL};
call.call.callee = &f;
scc_ast_expr_vec_t args;
scc_vec_init(args);
call.call.args = args; // 空参数列表
SCC_CHECK_AST(&call.base, "f()", scc_parse_expression);
// 函数调用带参数f(1, x)
scc_ast_expr_t f2 = make_identifier("f");
scc_ast_expr_t arg1 = make_int_literal("1");
scc_ast_expr_t arg2 = make_identifier("x");
scc_ast_expr_vec_t args2;
scc_vec_init(args2);
scc_vec_push(args2, &arg1);
scc_vec_push(args2, &arg2);
scc_ast_expr_t call2 = {.base.type = SCC_AST_EXPR_CALL};
call2.call.callee = &f2;
call2.call.args = args2;
SCC_CHECK_AST(&call2.base, "f(1, x)", scc_parse_expression);
// 成员访问 . 和 ->
scc_ast_expr_t s = make_identifier("s");
scc_ast_expr_t dot = {.base.type = SCC_AST_EXPR_MEMBER};
dot.member.base = &s;
dot.member.member_name = "field";
SCC_CHECK_AST(&dot.base, "s.field", scc_parse_expression);
scc_ast_expr_t p = make_identifier("p");
scc_ast_expr_t arrow = {.base.type = SCC_AST_EXPR_PTR_MEMBER};
arrow.ptr_member.base = &p;
arrow.ptr_member.member_name = "field";
SCC_CHECK_AST(&arrow.base, "p->field", scc_parse_expression);
// 后缀 ++/--
scc_ast_expr_t x = make_identifier("x");
scc_ast_expr_t post_inc = make_unary(SCC_AST_OP_POSTFIX_INCREMENT, &x);
SCC_CHECK_AST(&post_inc.base, "x++", scc_parse_expression);
scc_ast_expr_t post_dec = make_unary(SCC_AST_OP_POSTFIX_DECREMENT, &x);
SCC_CHECK_AST(&post_dec.base, "x--", scc_parse_expression);
// 复合字面量 TODO: (int){1,2} 需要更复杂的构造,暂略
// SCC_CHECK_AST(..., "(int){1,2}", scc_parse_expression);
}
// 3. 一元表达式
{
scc_ast_expr_t x = make_identifier("x");
scc_ast_expr_t pre_inc = make_unary(SCC_AST_OP_PREFIX_INCREMENT, &x);
SCC_CHECK_AST(&pre_inc.base, "++x", scc_parse_expression);
scc_ast_expr_t pre_dec = make_unary(SCC_AST_OP_PREFIX_DECREMENT, &x);
SCC_CHECK_AST(&pre_dec.base, "--x", scc_parse_expression);
scc_ast_expr_t addr = make_unary(SCC_AST_OP_ADDRESS_OF, &x);
SCC_CHECK_AST(&addr.base, "&x", scc_parse_expression);
scc_ast_expr_t deref = make_unary(SCC_AST_OP_INDIRECTION, &x);
SCC_CHECK_AST(&deref.base, "*x", scc_parse_expression);
scc_ast_expr_t plus = make_unary(SCC_AST_OP_UNARY_PLUS, &x);
SCC_CHECK_AST(&plus.base, "+x", scc_parse_expression);
scc_ast_expr_t minus = make_unary(SCC_AST_OP_UNARY_MINUS, &x);
SCC_CHECK_AST(&minus.base, "-x", scc_parse_expression);
scc_ast_expr_t bit_not = make_unary(SCC_AST_OP_BITWISE_NOT, &x);
SCC_CHECK_AST(&bit_not.base, "~x", scc_parse_expression);
scc_ast_expr_t log_not = make_unary(SCC_AST_OP_LOGICAL_NOT, &x);
SCC_CHECK_AST(&log_not.base, "!x", scc_parse_expression);
// sizeof 两种形式
// sizeof 表达式
scc_ast_expr_t sizeof_expr = {.base.type = SCC_AST_EXPR_SIZE_OF};
sizeof_expr.attr_of.expr = &x;
SCC_CHECK_AST(&sizeof_expr.base, "sizeof x", scc_parse_expression);
// sizeof(类型名) 需要构造类型节点暂时略用TODO
// SCC_CHECK_AST(..., "sizeof(int)", scc_parse_expression);
}
// 4. 类型转换
{
// (int)x
// 需要构造类型节点这里简化用TODO
// scc_ast_type_t int_type = { .base.type = SCC_AST_TYPE_BUILTIN,
// .builtin.type = SCC_AST_BUILTIN_TYPE_INT }; scc_ast_expr_t x =
// make_identifier("x"); scc_ast_expr_t cast = { .base.type =
// SCC_AST_EXPR_CAST }; cast.cast.type = &int_type; cast.cast.expr = &x;
// SCC_CHECK_AST(&cast.base, "(int)x", scc_parse_expression);
}
// 5. 二元运算符(按优先级测试)
{
scc_ast_expr_t a = make_identifier("a");
scc_ast_expr_t b = make_identifier("b");
scc_ast_expr_t c = make_identifier("c");
scc_ast_expr_t d = make_identifier("d");
// 乘除模优先级高于加减
scc_ast_expr_t mul = make_binary(SCC_AST_OP_MUL, &a, &b);
scc_ast_expr_t add = make_binary(SCC_AST_OP_ADD, &mul, &c);
SCC_CHECK_AST(&add.base, "a * b + c", scc_parse_expression);
// 左结合性 a - b - c => (a - b) - c
scc_ast_expr_t sub1 = make_binary(SCC_AST_OP_SUB, &a, &b);
scc_ast_expr_t sub2 = make_binary(SCC_AST_OP_SUB, &sub1, &c);
SCC_CHECK_AST(&sub2.base, "a - b - c", scc_parse_expression);
// 移位
scc_ast_expr_t shift = make_binary(SCC_AST_OP_LEFT_SHIFT, &a, &b);
SCC_CHECK_AST(&shift.base, "a << b", scc_parse_expression);
// 关系
scc_ast_expr_t lt = make_binary(SCC_AST_OP_LESS, &a, &b);
SCC_CHECK_AST(&lt.base, "a < b", scc_parse_expression);
// 相等
scc_ast_expr_t eq = make_binary(SCC_AST_OP_EQUAL, &a, &b);
SCC_CHECK_AST(&eq.base, "a == b", scc_parse_expression);
// 按位与、异或、或的优先级:& 高于 ^ 高于 |
scc_ast_expr_t bitand = make_binary(SCC_AST_OP_BITWISE_AND, &a, &b);
scc_ast_expr_t bitxor =
make_binary(SCC_AST_OP_BITWISE_XOR, &bitand, &c);
scc_ast_expr_t bitor = make_binary(SCC_AST_OP_BITWISE_OR, &bitxor, &d);
SCC_CHECK_AST(&bitor.base, "a & b ^ c | d", scc_parse_expression);
// 逻辑与、或:&& 高于 ||
scc_ast_expr_t logand = make_binary(SCC_AST_OP_LOGICAL_AND, &a, &b);
scc_ast_expr_t logor = make_binary(SCC_AST_OP_LOGICAL_OR, &logand, &c);
SCC_CHECK_AST(&logor.base, "a && b || c", scc_parse_expression);
}
// 6. 三元运算符
{
scc_ast_expr_t cond = make_identifier("a");
scc_ast_expr_t then_expr = make_identifier("b");
scc_ast_expr_t else_expr = make_identifier("c");
scc_ast_expr_t cond_expr =
make_conditional(&cond, &then_expr, &else_expr);
SCC_CHECK_AST(&cond_expr.base, "a ? b : c", scc_parse_expression);
// 右结合性 a ? b : c ? d : e => a ? b : (c ? d : e)
scc_ast_expr_t cond2 = make_identifier("c");
scc_ast_expr_t then2 = make_identifier("d");
scc_ast_expr_t else2 = make_identifier("e");
scc_ast_expr_t inner_cond = make_conditional(&cond2, &then2, &else2);
scc_ast_expr_t outer_cond =
make_conditional(&cond, &then_expr, &inner_cond);
SCC_CHECK_AST(&outer_cond.base, "a ? b : c ? d : e",
scc_parse_expression);
}
// 7. 赋值运算符(右结合)
{
// scc_ast_expr_t a = make_identifier("a");
// scc_ast_expr_t b = make_identifier("b");
// scc_ast_expr_t c = make_identifier("c");
// scc_ast_expr_t assign1 = make_binary(SCC_AST_OP_ASSIGN, &a, &b);
// scc_ast_expr_t assign2 =
// make_binary(SCC_AST_OP_ASSIGN, &assign1, &c); // a = (b = c)
// SCC_CHECK_AST(&assign2.base, "a = b = c", scc_parse_expression);
// scc_ast_expr_t add_assign = make_binary(SCC_AST_OP_ASSIGN_ADD, &a,
// &b); SCC_CHECK_AST(&add_assign.base, "a += b", scc_parse_expression);
}
// 8. 逗号运算符
{
scc_ast_expr_t a = make_identifier("a");
scc_ast_expr_t b = make_identifier("b");
scc_ast_expr_t comma1 = make_binary(SCC_AST_OP_COMMA, &a, &b);
SCC_CHECK_AST(&comma1.base, "a, b", scc_parse_expression);
}
// 9. 混合优先级测试
{
scc_ast_expr_t a = make_identifier("a");
scc_ast_expr_t b = make_identifier("b");
scc_ast_expr_t c = make_identifier("c");
scc_ast_expr_t d = make_identifier("d");
// a + b * c - d => (a + (b * c)) - d
scc_ast_expr_t mul = make_binary(SCC_AST_OP_MUL, &b, &c);
scc_ast_expr_t add = make_binary(SCC_AST_OP_ADD, &a, &mul);
scc_ast_expr_t sub = make_binary(SCC_AST_OP_SUB, &add, &d);
SCC_CHECK_AST(&sub.base, "a + b * c - d", scc_parse_expression);
// *p++ => *(p++)
scc_ast_expr_t p = make_identifier("p");
scc_ast_expr_t post_inc = make_unary(SCC_AST_OP_POSTFIX_INCREMENT, &p);
scc_ast_expr_t deref = make_unary(SCC_AST_OP_INDIRECTION, &post_inc);
SCC_CHECK_AST(&deref.base, "*p++", scc_parse_expression);
}
}
TEST_LIST = {
{"parser_unit", test_parser_unit},
{"parser_expression", test_parser_expression},
{NULL, NULL},
};