From 02a6c684f1c1c691c56dda4db3b6a9578482ed04 Mon Sep 17 00:00:00 2001 From: zzy <2450266535@qq.com> Date: Thu, 19 Mar 2026 12:11:57 +0800 Subject: [PATCH] =?UTF-8?q?feat(ast2ir):=20=E6=B7=BB=E5=8A=A0=E5=B7=A6?= =?UTF-8?q?=E5=80=BC=E6=A0=87=E8=AF=86=E6=94=AF=E6=8C=81=E4=BB=A5=E6=94=B9?= =?UTF-8?q?=E5=96=84=E8=A1=A8=E8=BE=BE=E5=BC=8F=E5=A4=84=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 在 scc_ast2ir_expr 函数中添加 is_lvalue 参数来区分左值和右值表达式 - 更新二元表达式处理逻辑,特别是赋值操作符的处理 - 改进标识符表达式的处理,根据是否为左值决定返回存储位置还是加载值 - 修复哈希比较函数的实现 - 移除调试相关的注释代码 refactor(parser): 优化语法分析器错误处理和控制流 - 移除不必要的错误恢复辅助注释 - 修改表达式解析的控制流程,将直接返回改为使用 break 语句 - 添加语义分析回调,在解析完成后进行标识符查找和验证 refactor(sema): 增强语义分析阶段的符号表管理 - 改进标识符查找逻辑,增加对非变量标识符的检查 - 扩展声明处理范围,包括变量和参数声明的符号表注册 - 为函数声明添加作用域管理 fix(parser): 修正单元测试中的类型定义 - 将 long long 类型定义改为 int 类型,解决测试兼容性问题 refactor(sccf): 重构文件格式定义和构建器实现 - 重命名符号类型枚举值 OBJECT 为 EXTERN - 重命名段类型枚举值 RELOC 为 RELOCS - 修正结构体字段命名的一致性问题 - 重新设计 SCCF 构建器的数据结构和API - 添加符号表、字符串表和重定位表的构建支持 refactor(target): 重命名Windows PE相关类型定义 - 将 scc_winpe_* 类型重命名为 scc_pe_* 以保持命名一致性 chore: 添加 sccf2target 模块用于格式转换 - 创建新的库模块用于 SCCF 到目标格式的转换 - 实现 PE 格式转换的基本功能 - 添加示例程序演示格式转换过程 --- libs/ast2ir/include/scc_ast2ir.h | 3 +- libs/ast2ir/src/scc_ast2ir.c | 69 ++++--- libs/ir/src/ir_dump.c | 16 +- libs/parser/src/parse_expr.c | 20 +- libs/parser/src/scc_sema.c | 28 ++- libs/parser/tests/test_parser_unit.c | 2 +- libs/sccf/include/sccf.h | 48 ++--- libs/sccf/include/sccf_builder.h | 54 ++++- libs/sccf/include/sccf_linker.h | 2 +- libs/sccf/include/sccf_utils.h | 9 +- libs/sccf/src/sccf_builder.c | 97 ++++++++- libs/target/pe/include/scc_pe_idata.h | 8 +- libs/target/pe/src/scc_pe_idata.c | 4 +- libs/target/pe/tests/test_pe_write_idata.c | 2 +- libs/target/sccf2target/cbuild.toml | 12 ++ libs/target/sccf2target/include/sccf2pe.h | 4 + libs/target/sccf2target/src/main.c | 69 +++++++ libs/target/sccf2target/src/sccf2pe.c | 229 +++++++++++++++++++++ 18 files changed, 575 insertions(+), 101 deletions(-) create mode 100644 libs/target/sccf2target/cbuild.toml create mode 100644 libs/target/sccf2target/include/sccf2pe.h create mode 100644 libs/target/sccf2target/src/main.c create mode 100644 libs/target/sccf2target/src/sccf2pe.c diff --git a/libs/ast2ir/include/scc_ast2ir.h b/libs/ast2ir/include/scc_ast2ir.h index 9b787ca..011e5b6 100644 --- a/libs/ast2ir/include/scc_ast2ir.h +++ b/libs/ast2ir/include/scc_ast2ir.h @@ -18,7 +18,8 @@ void scc_ast2ir_ctx_init(scc_ast2ir_ctx_t *ctx, const scc_type_abi_t *abi); void scc_ast2ir_translation_unit(scc_ast2ir_ctx_t *ctx, scc_ast_translation_unit_t *tu); void scc_ast2ir_decl(scc_ast2ir_ctx_t *ctx, scc_ast_decl_t *decl); -scc_ir_node_ref_t scc_ast2ir_expr(scc_ast2ir_ctx_t *ctx, scc_ast_expr_t *expr); +scc_ir_node_ref_t scc_ast2ir_expr(scc_ast2ir_ctx_t *ctx, scc_ast_expr_t *expr, + cbool is_lvalue); void scc_ast2ir_stmt(scc_ast2ir_ctx_t *ctx, scc_ast_stmt_t *stmt); scc_ir_type_ref_t scc_ast2ir_type(scc_ast2ir_ctx_t *ctx, scc_ast_type_t *ast_type); diff --git a/libs/ast2ir/src/scc_ast2ir.c b/libs/ast2ir/src/scc_ast2ir.c index f9bed28..d3d40f4 100644 --- a/libs/ast2ir/src/scc_ast2ir.c +++ b/libs/ast2ir/src/scc_ast2ir.c @@ -92,7 +92,8 @@ scc_ir_type_ref_t scc_ast2ir_type(scc_ast2ir_ctx_t *ctx, * @param expr * @return scc_ir_node_ref_t */ -scc_ir_node_ref_t scc_ast2ir_expr(scc_ast2ir_ctx_t *ctx, scc_ast_expr_t *expr) { +scc_ir_node_ref_t scc_ast2ir_expr(scc_ast2ir_ctx_t *ctx, scc_ast_expr_t *expr, + cbool is_lvalue) { if (ctx == null || expr == null) { LOG_ERROR("args is null"); return 0; @@ -101,8 +102,27 @@ scc_ir_node_ref_t scc_ast2ir_expr(scc_ast2ir_ctx_t *ctx, scc_ast_expr_t *expr) { switch (expr->base.type) { case SCC_AST_EXPR_BINARY: { scc_ir_node_ref_t lhs, rhs; - lhs = scc_ast2ir_expr(ctx, expr->binary.lhs); - rhs = scc_ast2ir_expr(ctx, expr->binary.rhs); + cbool is_assign = false; + switch (expr->binary.op) { + case SCC_AST_OP_ASSIGN: // = + case SCC_AST_OP_ASSIGN_ADD: // += + case SCC_AST_OP_ASSIGN_SUB: // -= + case SCC_AST_OP_ASSIGN_MUL: // *= + case SCC_AST_OP_ASSIGN_DIV: // /= + case SCC_AST_OP_ASSIGN_MOD: // %= + case SCC_AST_OP_ASSIGN_AND: // &= + case SCC_AST_OP_ASSIGN_XOR: // ^= + case SCC_AST_OP_ASSIGN_OR: // |= + case SCC_AST_OP_ASSIGN_LSHIFT: // <<= + case SCC_AST_OP_ASSIGN_RSHIFT: // >>= + is_assign = true; + break; + default: + is_assign = false; + break; + } + lhs = scc_ast2ir_expr(ctx, expr->binary.lhs, is_assign); + rhs = scc_ast2ir_expr(ctx, expr->binary.rhs, false); // 映射操作符 scc_ir_op_type_t op; @@ -124,17 +144,6 @@ scc_ir_node_ref_t scc_ast2ir_expr(scc_ast2ir_ctx_t *ctx, scc_ast_expr_t *expr) { case SCC_AST_OP_LESS_EQUAL: op = SCC_IR_OP_LE; break; case SCC_AST_OP_GREATER: op = SCC_IR_OP_GT; break; case SCC_AST_OP_GREATER_EQUAL: op = SCC_IR_OP_GE; break; - // SCC_AST_OP_ASSIGN, // = - // SCC_AST_OP_ASSIGN_ADD, // += - // SCC_AST_OP_ASSIGN_SUB, // -= - // SCC_AST_OP_ASSIGN_MUL, // *= - // SCC_AST_OP_ASSIGN_DIV, // /= - // SCC_AST_OP_ASSIGN_MOD, // %= - // SCC_AST_OP_ASSIGN_AND, // &= - // SCC_AST_OP_ASSIGN_XOR, // ^= - // SCC_AST_OP_ASSIGN_OR, // |= - // SCC_AST_OP_ASSIGN_LSHIFT, // <<= - // SCC_AST_OP_ASSIGN_RSHIFT, // >>= case SCC_AST_OP_ASSIGN: { return scc_ir_builder_store(&ctx->builder, lhs, rhs); } @@ -194,7 +203,8 @@ scc_ir_node_ref_t scc_ast2ir_expr(scc_ast2ir_ctx_t *ctx, scc_ast_expr_t *expr) { } case SCC_AST_EXPR_UNARY: { - scc_ir_node_ref_t operand = scc_ast2ir_expr(ctx, expr->unary.operand); + scc_ir_node_ref_t operand = + scc_ast2ir_expr(ctx, expr->unary.operand, is_lvalue); // 映射一元操作符 switch (expr->unary.op) { @@ -235,7 +245,7 @@ scc_ir_node_ref_t scc_ast2ir_expr(scc_ast2ir_ctx_t *ctx, scc_ast_expr_t *expr) { scc_vec_foreach(expr->call.args, i) { scc_ast_expr_t *arg_expr = scc_vec_at(expr->call.args, i); scc_ir_node_ref_t arg_node; - arg_node = scc_ast2ir_expr(ctx, arg_expr); + arg_node = scc_ast2ir_expr(ctx, arg_expr, false); scc_vec_push(args, arg_node); } } @@ -273,10 +283,17 @@ scc_ir_node_ref_t scc_ast2ir_expr(scc_ast2ir_ctx_t *ctx, scc_ast_expr_t *expr) { // SCC_AST_EXPR_STRING_LITERAL, // 字符串字面量 case SCC_AST_EXPR_IDENTIFIER: { + if (expr->identifier._target == null) { + LOG_ERROR("unknown identifier"); + } // FIXME hack hashtable scc_ir_node_ref_t in = (scc_ir_node_ref_t)(usize)scc_hashtable_get( &ctx->node2ir, expr->identifier._target); - return scc_ir_builder_load(&ctx->builder, in); + if (is_lvalue) { + return in; + } else { + return scc_ir_builder_load(&ctx->builder, in); + } } default: @@ -315,7 +332,7 @@ void scc_ast2ir_stmt(scc_ast2ir_ctx_t *ctx, scc_ast_stmt_t *stmt) { } case SCC_AST_STMT_EXPR: { - scc_ast2ir_expr(ctx, stmt->expr.expr); + scc_ast2ir_expr(ctx, stmt->expr.expr, false); break; } @@ -336,7 +353,8 @@ void scc_ast2ir_stmt(scc_ast2ir_ctx_t *ctx, scc_ast_stmt_t *stmt) { scc_ir_bblock_ref_t merge_block = scc_ir_builder_bblock(&ctx->builder, "if_merge"); - scc_ir_node_ref_t cond_node = scc_ast2ir_expr(ctx, stmt->if_stmt.cond); + scc_ir_node_ref_t cond_node = + scc_ast2ir_expr(ctx, stmt->if_stmt.cond, false); scc_ir_builder_branch(&ctx->builder, cond_node, true_block, false_block); @@ -368,7 +386,7 @@ void scc_ast2ir_stmt(scc_ast2ir_ctx_t *ctx, scc_ast_stmt_t *stmt) { scc_ir_builder_set_current_bblock(&ctx->builder, cond_block); scc_ir_node_ref_t cond_node = - scc_ast2ir_expr(ctx, stmt->while_stmt.cond); + scc_ast2ir_expr(ctx, stmt->while_stmt.cond, false); scc_ir_builder_branch(&ctx->builder, cond_node, body_block, exit_block); scc_ir_builder_set_current_bblock(&ctx->builder, body_block); @@ -395,7 +413,7 @@ void scc_ast2ir_stmt(scc_ast2ir_ctx_t *ctx, scc_ast_stmt_t *stmt) { scc_ir_builder_set_current_bblock(&ctx->builder, cond_block); scc_ir_node_ref_t cond_node = - scc_ast2ir_expr(ctx, stmt->while_stmt.cond); + scc_ast2ir_expr(ctx, stmt->while_stmt.cond, false); scc_ir_builder_branch(&ctx->builder, cond_node, body_block, exit_block); scc_ir_builder_set_current_bblock(&ctx->builder, exit_block); @@ -421,7 +439,7 @@ void scc_ast2ir_stmt(scc_ast2ir_ctx_t *ctx, scc_ast_stmt_t *stmt) { case SCC_AST_STMT_RETURN: { if (stmt->return_stmt.expr) { scc_ir_node_ref_t ret_val_node = - scc_ast2ir_expr(ctx, stmt->return_stmt.expr); + scc_ast2ir_expr(ctx, stmt->return_stmt.expr, false); scc_ir_builder_ret(&ctx->builder, ret_val_node); } else { scc_ir_builder_ret_void(&ctx->builder); @@ -457,11 +475,14 @@ void scc_ast2ir_decl(scc_ast2ir_ctx_t *ctx, scc_ast_decl_t *decl) { scc_ir_node_ref_t alloc_val_node = scc_ir_builder_alloca(&ctx->builder, ir_type, decl->name); + scc_hashtable_set(&ctx->node2ir, decl, (void *)(usize)alloc_val_node); + // 如果有初始化表达式 if (!decl->var.init) { break; } - scc_ir_node_ref_t init_val_node = scc_ast2ir_expr(ctx, decl->var.init); + scc_ir_node_ref_t init_val_node = + scc_ast2ir_expr(ctx, decl->var.init, false); scc_ir_builder_store(&ctx->builder, alloc_val_node, init_val_node); break; } @@ -518,7 +539,7 @@ void scc_ast2ir_translation_unit(scc_ast2ir_ctx_t *ctx, static u32 scc_hash_node(const void *key) { return (u32)(usize)key; } static int scc_cmp_node(const void *key1, const void *key2) { - return key1 == key2; + return (u32)(usize)key1 - (u32)(usize)key2; } void scc_ast2ir_ctx_init(scc_ast2ir_ctx_t *ctx, const scc_type_abi_t *abi) { diff --git a/libs/ir/src/ir_dump.c b/libs/ir/src/ir_dump.c index f5fa5d6..0949a73 100644 --- a/libs/ir/src/ir_dump.c +++ b/libs/ir/src/ir_dump.c @@ -610,14 +610,14 @@ void scc_ir_dump_node_linear(scc_ir_dump_ctx_t *ctx, remaining = sizeof(buff) - (p - buff); - if (node->type != 0) { - p += scc_snprintf(p, remaining, " `"); - remaining = sizeof(buff) - (p - buff); - // p += dump_type_to_buf(ctx, p, remaining, node->type); - // remaining = sizeof(buff) - (p - buff); - p += scc_snprintf(p, remaining, "`"); - remaining = sizeof(buff) - (p - buff); - } + // if (node->type != 0) { + // p += scc_snprintf(p, remaining, " `"); + // remaining = sizeof(buff) - (p - buff); + // // p += dump_type_to_buf(ctx, p, remaining, node->type); + // // remaining = sizeof(buff) - (p - buff); + // p += scc_snprintf(p, remaining, "`"); + // remaining = sizeof(buff) - (p - buff); + // } p += scc_snprintf(p, remaining, " = "); remaining = sizeof(buff) - (p - buff); diff --git a/libs/parser/src/parse_expr.c b/libs/parser/src/parse_expr.c index ca22867..07dd65f 100644 --- a/libs/parser/src/parse_expr.c +++ b/libs/parser/src/parse_expr.c @@ -321,7 +321,6 @@ static scc_ast_expr_op_t map_token_to_assign_op(scc_tok_type_t type) { } } -/* ---------------------------- 错误恢复辅助 ---------------------------- */ // 跳过直到遇到同步 token(分号、右括号、逗号、EOF) static void parser_sync(scc_parser_t *parser) { const scc_lexer_tok_t *tok_ptr; @@ -340,7 +339,6 @@ static void parser_sync(scc_parser_t *parser) { } } -/* ---------------------------- 通用二元解析器 ---------------------------- */ static scc_ast_expr_t *parse_expression_with_precedence(scc_parser_t *parser, int min_prec) { // 从最底层(cast-expression)开始 @@ -790,7 +788,7 @@ static scc_ast_expr_t *parse_primary_expression(scc_parser_t *parser) { Assert(expr != null); scc_ast_expr_identifier_init(expr, scc_cstring_as_cstr(&tok.lexeme), tok.loc); - return expr; + break; } case SCC_TOK_INT_LITERAL: { if (!scc_parser_next_consume(parser, &tok)) @@ -799,7 +797,7 @@ static scc_ast_expr_t *parse_primary_expression(scc_parser_t *parser) { Assert(expr != null); scc_ast_expr_literal_int_init(expr, scc_cstring_as_cstr(&tok.lexeme), false, tok.loc); - return expr; + break; } case SCC_TOK_FLOAT_LITERAL: { if (!scc_parser_next_consume(parser, &tok)) @@ -808,7 +806,7 @@ static scc_ast_expr_t *parse_primary_expression(scc_parser_t *parser) { Assert(expr != null); scc_ast_expr_literal_float_init(expr, scc_cstring_as_cstr(&tok.lexeme), false, tok.loc); - return expr; + break; } case SCC_TOK_CHAR_LITERAL: { if (!scc_parser_next_consume(parser, &tok)) @@ -817,7 +815,7 @@ static scc_ast_expr_t *parse_primary_expression(scc_parser_t *parser) { Assert(expr != null); scc_ast_expr_literal_char_init(expr, scc_cstring_as_cstr(&tok.lexeme), false, tok.loc); - return expr; + break; } case SCC_TOK_STRING_LITERAL: { scc_cstring_t string = scc_cstring_create(); @@ -841,7 +839,7 @@ static scc_ast_expr_t *parse_primary_expression(scc_parser_t *parser) { // FIXME loc scc_ast_expr_literal_string_init(expr, scc_cstring_as_cstr(&string), true, tok.loc); - return expr; + break; } case SCC_TOK_L_PAREN: scc_parser_next_consume(parser, null); @@ -850,10 +848,16 @@ static scc_ast_expr_t *parse_primary_expression(scc_parser_t *parser) { SCC_ERROR(scc_parser_got_current_pos(parser), "Expected ')' after expression"); } - return expr; + break; default: + break; + } + + if (expr == null) { return null; } + scc_parse_expr_sema(parser, expr); + return expr; } scc_ast_expr_t *scc_parse_expression(scc_parser_t *parser) { diff --git a/libs/parser/src/scc_sema.c b/libs/parser/src/scc_sema.c index 434a371..d510419 100644 --- a/libs/parser/src/scc_sema.c +++ b/libs/parser/src/scc_sema.c @@ -18,15 +18,21 @@ static void expr_callback(void *context, scc_ast_node_type_t node_type, if (node_type == SCC_AST_UNKNOWN || node == null) { return; } - scc_ast_expr_t *decl = SCC_AST_CAST_TO(scc_ast_expr_t, node); + scc_ast_expr_t *expr = SCC_AST_CAST_TO(scc_ast_expr_t, node); if (node_type == SCC_AST_EXPR_IDENTIFIER) { scc_ast_node_t *node = - scc_sema_symtab_lookup_symbol(sema_symtab, decl->identifier.name); + scc_sema_symtab_lookup_symbol(sema_symtab, expr->identifier.name); if (node == null) { - SCC_ERROR(decl->base.loc, "Identifier '%s' not found", - decl->identifier.name); + SCC_ERROR(expr->base.loc, "Identifier '%s' not found", + expr->identifier.name); + } else if (!SCC_AST_IS_A(scc_ast_decl_t, node)) { + SCC_ERROR(expr->base.loc, "Identifier '%s' is not a variable", + expr->identifier.name); + } else { + expr->identifier._target = SCC_AST_CAST_TO(scc_ast_decl_t, node); } } + return; } @@ -50,6 +56,8 @@ static void stmt_callback(void *context, scc_ast_node_type_t node_type, static void decl_callback(void *context, scc_ast_node_type_t node_type, void *node) { scc_sema_symtab_t *sema_symtab = context; + + // Function declaration scope if (node_type == scc_ast_decl_t_BEGIN) { scc_sema_symtab_enter_scope(sema_symtab); return; @@ -67,27 +75,31 @@ static void decl_callback(void *context, scc_ast_node_type_t node_type, if (decl->name == null) { return; } - if (decl->base.type == SCC_AST_DECL_STRUCT) { + if (node_type == SCC_AST_DECL_STRUCT) { scc_ast_type_struct_init(type, decl->name, decl, decl->base.loc); scc_cstring_t name = scc_cstring_from_cstr("$S_"); scc_cstring_append_cstr(&name, decl->name, scc_strlen(decl->name)); scc_sema_symtab_add_symbol(sema_symtab, scc_cstring_as_cstr(&name), &type->base); - } else if (decl->base.type == SCC_AST_DECL_UNION) { + } else if (node_type == SCC_AST_DECL_UNION) { scc_ast_type_union_init(type, decl->name, decl, decl->base.loc); scc_cstring_t name = scc_cstring_from_cstr("$U_"); scc_cstring_append_cstr(&name, decl->name, scc_strlen(decl->name)); scc_sema_symtab_add_symbol(sema_symtab, scc_cstring_as_cstr(&name), &type->base); - } else if (decl->base.type == SCC_AST_DECL_ENUM) { + } else if (node_type == SCC_AST_DECL_ENUM) { scc_ast_type_enum_init(type, decl->name, decl, decl->base.loc); scc_cstring_t name = scc_cstring_from_cstr("$E_"); scc_cstring_append_cstr(&name, decl->name, scc_strlen(decl->name)); scc_sema_symtab_add_symbol(sema_symtab, scc_cstring_as_cstr(&name), &type->base); - } else if (decl->base.type == SCC_AST_DECL_TYPEDEF) { + } else if (node_type == SCC_AST_DECL_TYPEDEF) { scc_ast_type_typedef_init(type, decl->name, decl, decl->base.loc); scc_sema_symtab_add_symbol(sema_symtab, decl->name, &type->base); + } else if (node_type == SCC_AST_DECL_VAR) { + scc_sema_symtab_add_symbol(sema_symtab, decl->name, &decl->base); + } else if (node_type == SCC_AST_DECL_PARAM) { + scc_sema_symtab_add_symbol(sema_symtab, decl->name, &decl->base); } return; } diff --git a/libs/parser/tests/test_parser_unit.c b/libs/parser/tests/test_parser_unit.c index 8c87ec0..9e35cb4 100644 --- a/libs/parser/tests/test_parser_unit.c +++ b/libs/parser/tests/test_parser_unit.c @@ -467,7 +467,7 @@ static void test_parser_unit(void) { scc_ast_translation_unit_init(&tu, &decls, scc_pos_create()); SCC_CHECK_AST_WITH_SEMA( &tu.base, - "typedef long long size_t;" + "typedef int size_t;" "typedef void *(*func_t)(size_t a, int b, ...);", scc_parse_translation_unit); } diff --git a/libs/sccf/include/sccf.h b/libs/sccf/include/sccf.h index 3542a85..145c83f 100644 --- a/libs/sccf/include/sccf.h +++ b/libs/sccf/include/sccf.h @@ -16,10 +16,6 @@ #define sccf_size_t uint64_t #define sccf_isize_t int64_t -#ifdef __cplusplus -extern "C" { -#endif - /** SCCF魔数 */ #define SCCF_MAGIC "SCCFmt\0\0" @@ -47,7 +43,7 @@ typedef enum { SCCF_SYM_TYPE_UNDEF = 0, ///< 未定义 SCCF_SYM_TYPE_FUNC = 1, ///< 函数 SCCF_SYM_TYPE_DATA = 2, ///< 数据 - SCCF_SYM_TYPE_OBJECT = 3, ///< 对象 + SCCF_SYM_TYPE_EXTERN = 3, ///< 外部符号 } sccf_sym_type_t; /** 符号绑定类型 */ @@ -73,7 +69,7 @@ typedef enum { SCCF_SECT_UNINIT_DATA = 4, ///< BSS段(未初始化数据) SCCF_SECT_SYMTAB = 5, ///< 符号表 SCCF_SECT_STRTAB = 6, ///< 字符串表 - SCCF_SECT_RELOC = 7, ///< 重定位表 + SCCF_SECT_RELOCS = 7, ///< 重定位表 } sccf_sect_type_t; /** 重定位类型 */ @@ -99,37 +95,37 @@ typedef struct sccf_header { * @brief SCCF段 */ typedef struct { - sccf_byte_t name[8]; ///< 段名称 仅供展示 eg. emoji or ".text" - sccf_enum_t scf_sect_type; ///< 段类型 (内部实际区分的方式) - sccf_size_t size; ///< 段数据实际的大小 - sccf_size_t data_size; ///< 段数据的有效数据大小 - sccf_size_t addralign; ///< 内存对齐要求 (字节对齐, 如 1,2,4,8,...) - sccf_size_t info; ///< 段信息 (如条目数,链接索引等) - sccf_size_t reserved[2]; ///< 保留 + sccf_byte_t name[8]; ///< 段名称 仅供展示 eg. emoji or ".text" + sccf_enum_t sccf_sect_type; ///< 段类型 (内部实际区分的方式) + sccf_size_t size; ///< 段数据实际的大小 + sccf_size_t data_size; ///< 段数据的有效数据大小 + sccf_size_t addralign; ///< 内存对齐要求 (字节对齐, 如 1,2,4,8,...) + sccf_size_t info; ///< 段信息 (如条目数,链接索引等) + sccf_size_t reserved[2]; ///< 保留 } sccf_sect_header_t; /** * @brief SCCF符号表 */ typedef struct { - sccf_size_t name_offset; ///< 符号名称在字符串表中的偏移量 - sccf_enum_t scf_sym_type; ///< 符号类型 - sccf_enum_t scf_sym_bind; ///< 符号绑定类型 - sccf_enum_t scf_sym_vis; ///< 符号可见性 - sccf_enum_t scf_sect_type; ///< 该符号的段类型 - sccf_size_t scf_sect_offset; ///< 该符号在段中的偏移量 - sccf_size_t scf_sym_size; ///< 该符号符号选中的大小 + sccf_size_t name_offset; ///< 符号名称在字符串表中的偏移量 + sccf_enum_t sccf_sym_type; ///< 符号类型 + sccf_enum_t sccf_sym_bind; ///< 符号绑定类型 + sccf_enum_t sccf_sym_vis; ///< 符号可见性 + sccf_enum_t sccf_sect_type; ///< 该符号的段类型 + sccf_size_t sccf_sect_offset; ///< 该符号在段中的偏移量 + sccf_size_t sccf_sym_size; ///< 该符号选中的大小 } sccf_sym_t; /** * @brief SCCF重定向条目 */ typedef struct { - sccf_size_t offset; ///< 在数据段中的偏移量 - sccf_size_t sym_idx; ///< 符号索引 sccf_enum_t type; ///< 重定位类型 - sccf_enum_t sect_type; ///< 段类型(代码段/数据段) - sccf_isize_t addend; ///< 加数 + sccf_enum_t sect_type; ///< 需要重定位的段类型(代码段/数据段) + sccf_size_t sym_idx; ///< 符号索引(重定向指向的符号) + sccf_size_t offset; ///< 在段中的偏移量(用于重定向的地址) + sccf_isize_t addend; ///< 加数(用于获取相对位置时PC的额外值) } sccf_reloc_t; /** @@ -158,8 +154,4 @@ typedef struct { * }; */ -#ifdef __cplusplus -} -#endif - #endif /* __SCC_FORMAT_H__ */ diff --git a/libs/sccf/include/sccf_builder.h b/libs/sccf/include/sccf_builder.h index 88f9d5b..13c182c 100644 --- a/libs/sccf/include/sccf_builder.h +++ b/libs/sccf/include/sccf_builder.h @@ -4,24 +4,66 @@ #include "sccf_utils.h" #include -typedef SCC_VEC(sccf_sym_t) sccf_sym_vec_t; -typedef SCC_VEC(sccf_reloc_t) sccf_reloc_vec_t; - typedef struct { sccf_t sccf; int aligned; - scc_strpool_t strpool; - scc_hashtable_t str2offset; - sccf_sym_vec_t syms; + sccf_strtab_t strtab; sccf_reloc_vec_t relocs; + sccf_sym_vec_t symtab; + scc_hashtable_t str2sym; + scc_hashtable_t str2offset; } sccf_builder_t; void sccf_builder_init(sccf_builder_t *builder); +usize sccf_builder_add_symbol(sccf_builder_t *builder, const char *name, + sccf_sym_t *sym); +usize sccf_builder_get_symbol_idx(sccf_builder_t *builder, const char *name); +static inline sccf_sym_t * +sccf_builder_get_symbol_unsafe(sccf_builder_t *builder, usize idx) { + return &scc_vec_at(builder->symtab, idx); +} +void sccf_builder_add_reloc(sccf_builder_t *builder, sccf_reloc_t reloc); + +/** + * @brief 必须确保参数合法 + * + * @param builder + * @param sect_header + * @param sect_data + */ void sccf_builder_add_section(sccf_builder_t *builder, sccf_sect_header_t *sect_header, sccf_sect_data_t *sect_data); +static inline void sccf_builder_add_text_section(sccf_builder_t *builder, + sccf_sect_data_t *sect_data) { + sccf_sect_header_t text_header = { + .name = ".text", + .addralign = 1, + .data_size = scc_vec_size(*sect_data), + .sccf_sect_type = SCCF_SECT_CODE, + .size = scc_vec_size(*sect_data), + .info = 0, + .reserved = {0}, + }; + sccf_builder_add_section(builder, &text_header, sect_data); +} +static inline void sccf_builder_add_data_section(sccf_builder_t *builder, + sccf_sect_data_t *sect_data) { + sccf_sect_header_t text_header = { + .name = ".data", + .addralign = 1, + .data_size = scc_vec_size(*sect_data), + .sccf_sect_type = SCCF_SECT_DATA, + .size = scc_vec_size(*sect_data), + .info = 0, + .reserved = {0}, + }; + sccf_builder_add_section(builder, &text_header, sect_data); +} + +const sccf_t *sccf_builder_to_sccf(sccf_builder_t *builder); void sccf_builder_to_buffer(sccf_builder_t *builder, sccf_buffer_t *buffer); void sccf_builder_to_file(sccf_builder_t *builder, const char *file_path); diff --git a/libs/sccf/include/sccf_linker.h b/libs/sccf/include/sccf_linker.h index afdc276..1236101 100644 --- a/libs/sccf/include/sccf_linker.h +++ b/libs/sccf/include/sccf_linker.h @@ -11,7 +11,7 @@ typedef struct { sccf_vec_t link_sccfs; scc_strpool_t strpool; scc_hashtable_t str2offset; - sccf_sym_vec_t syms; + sccf_sym_vec_t symtab; sccf_reloc_vec_t relocs; } sccf_linker_t; diff --git a/libs/sccf/include/sccf_utils.h b/libs/sccf/include/sccf_utils.h index aad1ae0..d0524ec 100644 --- a/libs/sccf/include/sccf_utils.h +++ b/libs/sccf/include/sccf_utils.h @@ -100,7 +100,7 @@ static inline usize sccf_find_sect_by_type(u8 *base, sccf_enum_t type) { const sccf_header_t *hdr = (const sccf_header_t *)base; for (usize i = 0; i < (usize)hdr->sect_header_num; ++i) { sccf_sect_header_t *sh = sccf_sect_header(base, i); - if (sh->scf_sect_type == type) + if (sh->sccf_sect_type == type) return i; } return (usize)hdr->sect_header_num; @@ -167,6 +167,9 @@ typedef SCC_VEC(u8) sccf_buffer_t; typedef SCC_VEC(u8) sccf_sect_data_t; typedef SCC_VEC(sccf_sect_data_t) sccf_sect_data_vec_t; typedef SCC_VEC(sccf_sect_header_t) sccf_sect_header_vec_t; +typedef SCC_VEC(sccf_sym_t) sccf_sym_vec_t; +typedef SCC_VEC(sccf_reloc_t) sccf_reloc_vec_t; +typedef SCC_VEC(char) sccf_strtab_t; typedef struct { sccf_header_t header; @@ -241,7 +244,7 @@ static inline void sccf_parse(sccf_t *sccf, sccf_buffer_t *buffer, int copied) { * @param[in] sccf * @return usize */ -static inline usize sccf_size(sccf_t *sccf) { +static inline usize sccf_size(const sccf_t *sccf) { if (scc_vec_size(sccf->sect_datas) != scc_vec_size(sccf->sect_headers) || scc_vec_size(sccf->sect_headers) != sccf->header.sect_header_num) { Panic(); @@ -265,7 +268,7 @@ static inline usize sccf_size(sccf_t *sccf) { * @param[in] sccf * @param[out] buffer */ -static inline void sccf_write(sccf_t *sccf, sccf_buffer_t *buffer) { +static inline void sccf_write(const sccf_t *sccf, sccf_buffer_t *buffer) { usize size = sccf_size(sccf); if (scc_vec_size(*buffer) < size) { scc_vec_realloc(*buffer, size); diff --git a/libs/sccf/src/sccf_builder.c b/libs/sccf/src/sccf_builder.c index 5556c3b..f3f0517 100644 --- a/libs/sccf/src/sccf_builder.c +++ b/libs/sccf/src/sccf_builder.c @@ -3,12 +3,53 @@ void sccf_builder_init(sccf_builder_t *builder) { builder->aligned = 64; sccf_init(&builder->sccf); - scc_strpool_init(&builder->strpool); scc_hashtable_init(&builder->str2offset, (scc_hashtable_hash_func_t)scc_strhash32, (scc_hashtable_equal_func_t)scc_strcmp); + scc_hashtable_init(&builder->str2sym, + (scc_hashtable_hash_func_t)scc_strhash32, + (scc_hashtable_equal_func_t)scc_strcmp); + scc_vec_init(builder->strtab); scc_vec_init(builder->relocs); - scc_vec_init(builder->syms); + scc_vec_init(builder->symtab); + + ///< Push null + scc_vec_push(builder->strtab, (char)'\0'); + ///< Push null + scc_vec_push(builder->symtab, (sccf_sym_t){0}); +} + +usize sccf_builder_add_symbol(sccf_builder_t *builder, const char *name, + sccf_sym_t *sym) { + usize offset = 0; + offset = (usize)scc_hashtable_get(&builder->str2offset, name); + if (offset == 0) { + offset = scc_vec_size(builder->strtab); + scc_hashtable_set(&builder->str2offset, name, (void *)offset); + while (*name) { + scc_vec_push(builder->strtab, *name); + name++; + } + scc_vec_push(builder->strtab, '\0'); + } + sym->name_offset = offset; + + usize sym_idx = scc_vec_size(builder->symtab); + offset = (usize)scc_hashtable_get(&builder->str2sym, name); + if (offset == 0) { + scc_hashtable_set(&builder->str2sym, name, (void *)sym_idx); + } + scc_vec_push(builder->symtab, *sym); + return sym_idx; +} + +usize sccf_builder_get_symbol_idx(sccf_builder_t *builder, const char *name) { + usize offset = (usize)scc_hashtable_get(&builder->str2sym, name); + return offset; +} + +void sccf_builder_add_reloc(sccf_builder_t *builder, sccf_reloc_t reloc) { + scc_vec_push(builder->relocs, reloc); } void sccf_builder_add_section(sccf_builder_t *builder, @@ -20,14 +61,58 @@ void sccf_builder_add_section(sccf_builder_t *builder, scc_vec_push(builder->sccf.sect_datas, *sect_data); } -void sccf_builder_to_buffer(sccf_builder_t *builder, sccf_buffer_t *buffer) { - Assert(builder != null && buffer != null); +const sccf_t *sccf_builder_to_sccf(sccf_builder_t *builder) { // TODO symtab strtab reloc // sccf_sect_header_t symtab_header; // sccf_sect_data_t symtab_data; - // sccf_builder_add_section(builder, &symtab_header, &symtab_data); + sccf_sect_header_t sect_header; + if (scc_vec_size(builder->strtab)) { + sect_header = (sccf_sect_header_t){ + .name = ".strtab", + .info = 0, + .data_size = scc_vec_size(builder->strtab), + .addralign = 1, + .size = scc_vec_size(builder->strtab), + .sccf_sect_type = SCCF_SECT_STRTAB, + }; + // TODO 转换成 u8[] + sccf_builder_add_section(builder, §_header, + (void *)&builder->strtab); + } - sccf_write(&builder->sccf, buffer); + if (scc_vec_size(builder->symtab)) { + sect_header = (sccf_sect_header_t){ + .name = ".symtab", + .info = 0, + .data_size = scc_vec_size(builder->symtab), + .addralign = 1, + .size = scc_vec_size(builder->symtab), + .sccf_sect_type = SCCF_SECT_SYMTAB, + }; + // TODO 转换成 u8[] + sccf_builder_add_section(builder, §_header, + (void *)&builder->symtab); + } + + if (scc_vec_size(builder->relocs)) { + sect_header = (sccf_sect_header_t){ + .name = ".relocs", + .info = 0, + .data_size = scc_vec_size(builder->relocs), + .addralign = 1, + .size = scc_vec_size(builder->relocs), + .sccf_sect_type = SCCF_SECT_RELOCS, + }; + // TODO 转换成 u8[] + sccf_builder_add_section(builder, §_header, + (void *)&builder->relocs); + } + return &builder->sccf; +} + +void sccf_builder_to_buffer(sccf_builder_t *builder, sccf_buffer_t *buffer) { + Assert(builder != null && buffer != null); + sccf_write(sccf_builder_to_sccf(builder), buffer); } void sccf_builder_to_file(sccf_builder_t *builder, const char *file_path) { diff --git a/libs/target/pe/include/scc_pe_idata.h b/libs/target/pe/include/scc_pe_idata.h index 7f3d508..13cc574 100644 --- a/libs/target/pe/include/scc_pe_idata.h +++ b/libs/target/pe/include/scc_pe_idata.h @@ -12,12 +12,12 @@ typedef struct { u32 section_offset; ///< 在idata中的偏移 } scc_pe_hnt_builder_t; -typedef SCC_VEC(const char *) scc_winpe_name_vec_t; +typedef SCC_VEC(const char *) scc_pe_name_vec_t; typedef struct { const char *name; - scc_winpe_name_vec_t symbol_names; -} scc_winpe_idata_lib_t; -typedef SCC_VEC(scc_winpe_idata_lib_t) scc_pe_idata_lib_vec_t; + scc_pe_name_vec_t symbol_names; +} scc_pe_idata_lib_t; +typedef SCC_VEC(scc_pe_idata_lib_t) scc_pe_idata_lib_vec_t; typedef struct { scc_pe_buffer_t buffer; ///< 导入表数据 diff --git a/libs/target/pe/src/scc_pe_idata.c b/libs/target/pe/src/scc_pe_idata.c index bfac113..8239fb7 100644 --- a/libs/target/pe/src/scc_pe_idata.c +++ b/libs/target/pe/src/scc_pe_idata.c @@ -71,7 +71,7 @@ u32 scc_pe_reserve_idata(scc_pe_idata_builder_t *builder) { sizeof(IMAGE_IMPORT_DESCRIPTOR); scc_vec_foreach(builder->idata_libs, i) { - scc_winpe_idata_lib_t *lib = &scc_vec_at(builder->idata_libs, i); + scc_pe_idata_lib_t *lib = &scc_vec_at(builder->idata_libs, i); idata_size += (scc_vec_size(lib->symbol_names) + 1) * 2 * sizeof(IMAGE_THUNK_DATA64); scc_winpe_hnt_builder_push(&builder->hnt_builder, lib->name, 0); @@ -98,7 +98,7 @@ scc_pe_buffer_t scc_pe_construct_idata(scc_pe_idata_builder_t *builder, usize current_offset = (import_file_count + 1) * sizeof(IMAGE_IMPORT_DESCRIPTOR); scc_vec_foreach(builder->idata_libs, i) { - scc_winpe_idata_lib_t *lib = &scc_vec_at(builder->idata_libs, i); + scc_pe_idata_lib_t *lib = &scc_vec_at(builder->idata_libs, i); scc_winpe_lookup_table_vec_t lookup_table; scc_vec_init(lookup_table); diff --git a/libs/target/pe/tests/test_pe_write_idata.c b/libs/target/pe/tests/test_pe_write_idata.c index c2e74a9..6492326 100644 --- a/libs/target/pe/tests/test_pe_write_idata.c +++ b/libs/target/pe/tests/test_pe_write_idata.c @@ -33,7 +33,7 @@ int main() { scc_pe_idata_builder_t idata_builder; scc_pe_idata_lib_vec_t idata_libs; scc_vec_init(idata_libs); - scc_winpe_idata_lib_t ucrtbase; + scc_pe_idata_lib_t ucrtbase; ucrtbase.name = "ucrtbase.dll"; scc_vec_init(ucrtbase.symbol_names); scc_vec_push(ucrtbase.symbol_names, "puts"); diff --git a/libs/target/sccf2target/cbuild.toml b/libs/target/sccf2target/cbuild.toml new file mode 100644 index 0000000..ba628e3 --- /dev/null +++ b/libs/target/sccf2target/cbuild.toml @@ -0,0 +1,12 @@ +[package] +name = "sccf2target" +version = "0.1.0" +authors = [] +description = "" + +dependencies = [ + { name = "sccf", path = "../../sccf" }, + { name = "pe", path = "../pe" }, +] +# features = {} +# default_features = [] diff --git a/libs/target/sccf2target/include/sccf2pe.h b/libs/target/sccf2target/include/sccf2pe.h new file mode 100644 index 0000000..337c7fb --- /dev/null +++ b/libs/target/sccf2target/include/sccf2pe.h @@ -0,0 +1,4 @@ +#include +#include + +void sccf2pe(scc_pe_builder_t *builder, const sccf_t *sccf); diff --git a/libs/target/sccf2target/src/main.c b/libs/target/sccf2target/src/main.c new file mode 100644 index 0000000..67ec264 --- /dev/null +++ b/libs/target/sccf2target/src/main.c @@ -0,0 +1,69 @@ +#include +#include +#include +#include + +int main() { + char data[] = "Hello, World from SCC PE Builder!\n\0"; + + /* clang-format off */ + char code[] = { + // sub rsp, 0x28 ; 为函数调用分配栈空间 + 0x48, 0x83, 0xEC, 0x28, + // lea rcx, [rip + data_offset] ; 将字符串地址加载到RCX(第一个参数) + 0x48, 0x8D, 0x0D, 0x00, 0x00, 0x00, 0x00, + // call qword ptr [rip + puts_iat] ; 通过IAT调用puts + 0xFF, 0x15, 0x00, 0x00, 0x00, 0x00, + // add rsp, 0x28 ; 恢复栈空间 + 0x48, 0x83, 0xC4, 0x28, + // xor eax, eax ; 设置返回值为0 + 0x33, 0xC0, + // ret ; 返回 + 0xC3, + }; + /* clang-format on */ + + sccf_builder_t builder; + sccf_builder_init(&builder); + sccf_sect_data_t text_section = { + .data = (u8 *)code, .size = sizeof(code), .cap = sizeof(code)}; + sccf_sect_data_t data_section = { + .data = (u8 *)data, .size = sizeof(data), .cap = sizeof(data)}; + sccf_builder_add_text_section(&builder, &text_section); + sccf_builder_add_data_section(&builder, &data_section); + usize str_idx = + sccf_builder_add_symbol(&builder, "str_data", + &(sccf_sym_t){ + .sccf_sect_offset = 0, + .sccf_sect_type = SCCF_SECT_DATA, + .sccf_sym_bind = SCCF_SYM_BIND_GLOBAL, + .sccf_sym_size = sizeof(data), + .sccf_sym_type = SCCF_SYM_TYPE_DATA, + .sccf_sym_vis = SCCF_SYM_VIS_DEFAULT, + }); + usize puts_idx = + sccf_builder_add_symbol(&builder, "puts", + &(sccf_sym_t){ + .sccf_sect_offset = 0, + .sccf_sect_type = SCCF_SECT_NONE, + .sccf_sym_bind = SCCF_SYM_BIND_GLOBAL, + .sccf_sym_size = 8, + .sccf_sym_type = SCCF_SYM_TYPE_EXTERN, + .sccf_sym_vis = SCCF_SYM_VIS_DEFAULT, + }); + sccf_builder_add_reloc(&builder, (sccf_reloc_t){.addend = 4, + .offset = 7, + .sect_type = SCCF_SECT_CODE, + .sym_idx = str_idx, + .type = SCCF_RELOC_REL}); + sccf_builder_add_reloc(&builder, (sccf_reloc_t){.addend = 4, + .offset = 13, + .sect_type = SCCF_SECT_CODE, + .sym_idx = puts_idx, + .type = SCCF_RELOC_REL}); + const sccf_t *sccf = sccf_builder_to_sccf(&builder); + + scc_pe_builder_t pe_builder; + sccf2pe(&pe_builder, sccf); + scc_pe_dump_to_file(&pe_builder, __FILE__ "/../../test.exe"); +} diff --git a/libs/target/sccf2target/src/sccf2pe.c b/libs/target/sccf2target/src/sccf2pe.c new file mode 100644 index 0000000..9d09386 --- /dev/null +++ b/libs/target/sccf2target/src/sccf2pe.c @@ -0,0 +1,229 @@ +#include +#include +#include + +typedef struct { + scc_hashtable_t str2libsym; + scc_pe_idata_lib_vec_t idata_libs; +} pe_idata_lib_ctx_t; + +static void load_from_def(pe_idata_lib_ctx_t *ctx, const char *file_path, + const char *dll_name) { + /* + LIBRARY + EXPORTS + name @number + ... + */ + scc_cstring_t fpath = scc_cstring_from_cstr(file_path); + scc_cstring_append_ch(&fpath, '/'); + scc_cstring_append_cstr(&fpath, dll_name, scc_strlen(dll_name)); + scc_cstring_append_cstr(&fpath, ".def", 4); + const char *fname = scc_cstring_as_cstr(&fpath); + scc_file_t fp = scc_fopen(fname, SCC_FILE_READ); + if (fp == null) { + LOG_ERROR("load_from_def file read error: %s", fname); + return; + } + + usize fsize = scc_fsize(fp); + char *buffer = scc_malloc(fsize); + Assert(buffer != null); + + usize read_size = scc_fread(fp, buffer, fsize); + Assert(read_size == fsize); + scc_fclose(fp); + + scc_pe_name_vec_t symbol_names; + usize line = 0; + for (usize i = 0; i < fsize; i += 1) { + if (buffer[i] == '\n') { + line += 1; + } + if (line < 2) { + continue; + } + if (buffer[i] == ' ') { + continue; + } + for (usize j = i; j < fsize; j += 1) { + if (buffer[j] == ' ') { + buffer[j] = '\0'; + break; + } + } + // FIXME memory leak + scc_hashtable_set(&ctx->str2libsym, buffer + i, (void *)dll_name); + } +} + +static void pe_idata_lib_init(pe_idata_lib_ctx_t *ctx) { + // Got .dll.def + + scc_hashtable_init(&ctx->str2libsym, + (scc_hashtable_hash_func_t)scc_strhash32, + (scc_hashtable_equal_func_t)scc_strcmp); + scc_vec_init(ctx->idata_libs); + load_from_def(ctx, "./.dll_def", "ucrtbase.dll"); +} + +static cbool pe_idata_get(pe_idata_lib_ctx_t *ctx, const char *name) { + const char *lib_name = scc_hashtable_get(&ctx->str2libsym, name); + if (lib_name == null) { + return false; + } + + scc_pe_idata_lib_t *lib = null; + scc_vec_foreach(ctx->idata_libs, i) { + scc_pe_idata_lib_t *idata_lib = &scc_vec_at(ctx->idata_libs, i); + if (scc_strcmp(lib_name, idata_lib->name) == 0) { + lib = idata_lib; + break; + } + } + if (lib == null) { + scc_pe_idata_lib_t new_lib; + new_lib.name = lib_name; + scc_vec_init(new_lib.symbol_names); + scc_vec_push(ctx->idata_libs, new_lib); + lib = &scc_vec_at(ctx->idata_libs, scc_vec_size(ctx->idata_libs) - 1); + } + + scc_vec_push(lib->symbol_names, name); + return true; +} + +void sccf2pe(scc_pe_builder_t *builder, const sccf_t *sccf) { + scc_pe_builder_init(builder, true, 4096, 512); + sccf_strtab_t strtab; + scc_vec_init(strtab); + sccf_reloc_vec_t relocs; + scc_vec_init(relocs); + sccf_sym_vec_t symtab; + scc_vec_init(symtab); + + sccf_sect_data_t *text_data = null; + + scc_pe_reserve_header(builder, 3); + + scc_pe_section_range code_range = {0}; + scc_pe_section_range data_range = {0}; + scc_pe_section_range idata_range = {0}; + + scc_vec_foreach(sccf->sect_headers, i) { + sccf_sect_header_t *sect_header = &scc_vec_at(sccf->sect_headers, i); + sccf_sect_data_t *sect_data = &scc_vec_at(sccf->sect_datas, i); + if (sect_header->sccf_sect_type == SCCF_SECT_CODE) { + text_data = sect_data; + code_range = scc_pe_reserve_text_section_header( + builder, scc_vec_size(*sect_data)); + } else if (sect_header->sccf_sect_type == SCCF_SECT_DATA) { + data_range = scc_pe_reserve_data_section_header( + builder, scc_vec_size(*sect_data)); + } else if (sect_header->sccf_sect_type == SCCF_SECT_STRTAB) { + scc_vec_unsafe_from_buffer( + strtab, (char *)scc_vec_unsafe_get_data(*sect_data), + scc_vec_size(*sect_data)); + } else if (sect_header->sccf_sect_type == SCCF_SECT_RELOCS) { + scc_vec_unsafe_from_buffer( + relocs, (sccf_reloc_t *)scc_vec_unsafe_get_data(*sect_data), + scc_vec_size(*sect_data)); + } else if (sect_header->sccf_sect_type == SCCF_SECT_SYMTAB) { + scc_vec_unsafe_from_buffer( + symtab, (sccf_sym_t *)scc_vec_unsafe_get_data(*sect_data), + scc_vec_size(*sect_data)); + } + } + + pe_idata_lib_ctx_t idata_lib_ctx; + pe_idata_lib_init(&idata_lib_ctx); + scc_vec_foreach(symtab, i) { + sccf_sym_t *sym = &scc_vec_at(symtab, i); + if (sym->sccf_sym_type == SCCF_SYM_TYPE_EXTERN) { + const char *name = + (const char *)&scc_vec_at(strtab, sym->name_offset); + if (pe_idata_get(&idata_lib_ctx, name) == false) { + LOG_ERROR("link error: symbol [%s] not found", name); + } + } + } + scc_pe_idata_builder_t idata_builder; + scc_pe_idata_builder_init(&idata_builder, &idata_lib_ctx.idata_libs); + u32 idata_size = scc_pe_reserve_idata(&idata_builder); + idata_range = scc_pe_reserve_idata_section_header(builder, idata_size); + scc_pe_buffer_t idata_buffer = + scc_pe_construct_idata(&idata_builder, &idata_range); + + u32 entry_point_offset = 0; + u64 base_address = 0x140000000; + u32 entry_point = code_range.virual_address + entry_point_offset; + scc_pe_config_t config = (scc_pe_config_t){ + .machine = IMAGE_FILE_MACHINE_AMD64, + .time_date_stamp = 0, + .characteristics = + IMAGE_FILE_EXECUTABLE_IMAGE | IMAGE_FILE_LARGE_ADDRESS_AWARE, + .major_linker_version = 14, + .minor_linker_version = 0, + .address_of_entry_point = entry_point, + .image_base = base_address, + .major_operating_system_version = 6, + .minor_operating_system_version = 0, + .major_image_version = 0, + .minor_image_version = 0, + .major_subsystem_version = 6, + .minor_subsystem_version = 0, + .subsystem = IMAGE_SUBSYSTEM_WINDOWS_CUI, + .dll_characteristics = IMAGE_DLLCHARACTERISTICS_HIGH_ENTROPY_VA | + IMAGE_DLLCHARACTERISTICS_DYNAMIC_BASE | + IMAGE_DLLCHARACTERISTICS_NX_COMPAT, + .size_of_stack_reserve = 0x100000, + .size_of_stack_commit = 0x1000, + .size_of_heap_reserve = 0x100000, + .size_of_heap_commit = 0x1000, + }; + + scc_vec_foreach(relocs, i) { + sccf_reloc_t *reloc = &scc_vec_at(relocs, i); + sccf_sym_t *sym = &scc_vec_at(symtab, reloc->sym_idx); + const char *name = &scc_vec_at(strtab, sym->name_offset); + + u32 rva = 0; + if (sym->sccf_sym_type == SCCF_SYM_TYPE_EXTERN) { + rva = scc_pe_idata_get_symbol_rva(&idata_builder, name); + } else if (sym->sccf_sect_type == SCCF_SECT_DATA) { + rva = data_range.virual_address + sym->sccf_sect_offset; + } else if (sym->sccf_sect_type == SCCF_SECT_CODE) { + rva = code_range.virual_address + sym->sccf_sect_offset; + } else { + Panic("unsupported reloc symbol type"); + } + + Assert(rva != 0); + if (reloc->type == SCCF_RELOC_ABS) { + TODO(); + } + Assert(reloc->sect_type == SCCF_SECT_CODE); + rva -= code_range.virual_address + reloc->offset + reloc->addend; + Assert(text_data != null); + // FIXME 需要确保宿主机与目标机器大小端一致 + *(u32 *)(scc_vec_unsafe_get_data(*text_data) + reloc->offset) = rva; + } + + scc_pe_write_header(builder, &config); + scc_vec_foreach(sccf->sect_headers, i) { + sccf_sect_header_t *sect_header = &scc_vec_at(sccf->sect_headers, i); + sccf_sect_data_t *sect_data = &scc_vec_at(sccf->sect_datas, i); + if (sect_header->sccf_sect_type == SCCF_SECT_CODE) { + scc_pe_write_section(builder, &code_range, + (u8 *)scc_vec_unsafe_get_data(*sect_data), + scc_vec_size(*sect_data)); + } else if (sect_header->sccf_sect_type == SCCF_SECT_DATA) { + scc_pe_write_section(builder, &data_range, + (u8 *)scc_vec_unsafe_get_data(*sect_data), + scc_vec_size(*sect_data)); + } + } + scc_pe_write_section(builder, &idata_range, + scc_vec_unsafe_get_data(idata_buffer), + scc_vec_size(idata_buffer)); +}