From cabd1710edec4125493cd12627278eef529749cb Mon Sep 17 00:00:00 2001 From: zzy <2450266535@qq.com> Date: Mon, 16 Mar 2026 11:02:32 +0800 Subject: [PATCH] =?UTF-8?q?feat(parser):=20=E4=BD=BF=E7=94=A8=E9=9D=99?= =?UTF-8?q?=E6=80=81=E6=95=B0=E7=BB=84=E5=88=9D=E5=A7=8B=E5=8C=96=E6=B5=8B?= =?UTF-8?q?=E8=AF=95=E5=90=91=E9=87=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 将多个测试用例中的 `scc_vec_unsafe_from_array` 替换为 `scc_vec_unsafe_from_static_array` 以提高性能 - 此更改影响了 `test_parser_unit` 和 `test_parser_type` 函数中的多个位置 feat(sccf): 添加SCC格式支持和相关工具 - 创建新的 sccf 库用于处理 SCCF (SCC Format) 文件格式 - 实现了基本的文件格式定义,包括头部、段表、符号表等结构 - 添加了构建器和链接器的基本框架 - 包含格式化工具的初始实现 refactor(main): 修复输出文件检查逻辑 - 修正主函数中输出文件检查条件,确保在 fp 为 null 时正确处理 - 更新输出消息显示逻辑以匹配文件操作状态 --- libs/parser/tests/test_parser_unit.c | 46 ++-- libs/sccf/README.md | 1 + libs/sccf/cbuild.toml | 8 + libs/sccf/include/sccf.h | 165 +++++++++++++ libs/sccf/include/sccf_builder.h | 28 +++ libs/sccf/include/sccf_linker.h | 18 ++ libs/sccf/include/sccf_utils.h | 296 ++++++++++++++++++++++++ libs/sccf/src/main.c | 8 + libs/sccf/src/sccf_builder.c | 51 ++++ libs/sccf/tests/test_sccf_amd64.c | 0 runtime/scc_core/include/scc_core_vec.h | 11 +- src/main.c | 9 +- 12 files changed, 614 insertions(+), 27 deletions(-) create mode 100644 libs/sccf/README.md create mode 100644 libs/sccf/cbuild.toml create mode 100644 libs/sccf/include/sccf.h create mode 100644 libs/sccf/include/sccf_builder.h create mode 100644 libs/sccf/include/sccf_linker.h create mode 100644 libs/sccf/include/sccf_utils.h create mode 100644 libs/sccf/src/main.c create mode 100644 libs/sccf/src/sccf_builder.c create mode 100644 libs/sccf/tests/test_sccf_amd64.c diff --git a/libs/parser/tests/test_parser_unit.c b/libs/parser/tests/test_parser_unit.c index 90a0657..035d00e 100644 --- a/libs/parser/tests/test_parser_unit.c +++ b/libs/parser/tests/test_parser_unit.c @@ -112,7 +112,7 @@ static void test_parser_unit(void) { scc_ast_decl_t void_decl; scc_ast_decl_param_init(&void_decl, &scc_ast_builtin_type_void, null); scc_ast_decl_t *array[] = {&void_decl}; - scc_vec_unsafe_from_array(func_params, array); + scc_vec_unsafe_from_static_array(func_params, array); scc_ast_type_function_init(&func_type, &scc_ast_builtin_type_int, &func_params); // 构造复合语句块(空) @@ -134,7 +134,7 @@ static void test_parser_unit(void) { scc_ast_decl_t void_decl; scc_ast_decl_param_init(&void_decl, &scc_ast_builtin_type_void, null); scc_ast_decl_t *array[] = {&void_decl}; - scc_vec_unsafe_from_array(func_params, array); + scc_vec_unsafe_from_static_array(func_params, array); scc_ast_type_function_init(&func_type, &scc_ast_builtin_type_int, &func_params); @@ -179,7 +179,7 @@ static void test_parser_unit(void) { scc_ast_decl_t void_decl; scc_ast_decl_param_init(&void_decl, &scc_ast_builtin_type_void, null); scc_ast_decl_t *array[] = {&void_decl}; - scc_vec_unsafe_from_array(func_params, array); + scc_vec_unsafe_from_static_array(func_params, array); scc_ast_type_function_init(&func_type, &scc_ast_builtin_type_int, &func_params); @@ -374,7 +374,7 @@ static void test_parser_unit(void) { scc_ast_decl_param_init( ¶m2, (scc_ast_type_t *)&scc_ast_builtin_type_va_list, null); scc_ast_decl_t *params_array[] = {¶m0, ¶m1, ¶m2}; - scc_vec_unsafe_from_array(params, params_array); + scc_vec_unsafe_from_static_array(params, params_array); scc_ast_type_t decl_func_type; scc_ast_type_function_init(&decl_func_type, (scc_ast_type_t *)&scc_ast_builtin_type_int, @@ -398,7 +398,7 @@ static void test_parser_unit(void) { scc_ast_node_t *array[] = {&typedef_decl.base, &i32a_decl.base}; scc_ast_stmt_t stmt; scc_ast_block_item_vec_t items; - scc_vec_unsafe_from_array(items, array); + scc_vec_unsafe_from_static_array(items, array); scc_ast_stmt_compound_init(&stmt, &items); SCC_CHECK_AST_WITH_SEMA(&stmt.base, "{typedef int int32_t;int32_t a;}", scc_parse_statement); @@ -414,7 +414,7 @@ static void test_parser_unit(void) { scc_ast_decl_val_init(&void_ptr_a_decl, &void_ptr_type, "a", null); scc_ast_node_t *array2[] = {&void_ptr_decl.base, &void_ptr_a_decl.base}; - scc_vec_unsafe_from_array(items, array2); + scc_vec_unsafe_from_static_array(items, array2); scc_ast_stmt_compound_init(&stmt, &items); SCC_CHECK_AST_WITH_SEMA(&stmt.base, "{typedef void* void_ptr; void_ptr a;}", @@ -452,7 +452,7 @@ static void test_parser_unit(void) { scc_ast_node_t *array[] = {&typedef_decl.base, &typedef_impl_decl.base}; scc_ast_stmt_t stmt; scc_ast_block_item_vec_t items; - scc_vec_unsafe_from_array(items, array); + scc_vec_unsafe_from_static_array(items, array); scc_ast_stmt_compound_init(&stmt, &items); SCC_CHECK_AST_WITH_SEMA( &stmt.base, "{typedef struct { int x; } struct_t; struct_t a;}", @@ -474,7 +474,7 @@ static void test_parser_unit(void) { scc_ast_decl_param_init(¶m3, &scc_ast_builtin_type_va_list, null); scc_ast_decl_t *params_array[] = {¶m1, ¶m2, ¶m3}; scc_ast_decl_vec_t func_params; - scc_vec_unsafe_from_array(func_params, params_array); + scc_vec_unsafe_from_static_array(func_params, params_array); scc_ast_type_t func_type; scc_ast_type_t return_type; @@ -487,7 +487,7 @@ static void test_parser_unit(void) { scc_ast_decl_t *decls_array[] = {&type_decl, &func_decl}; scc_ast_translation_unit_t tu; scc_ast_decl_vec_t decls; - scc_vec_unsafe_from_array(decls, decls_array); + scc_vec_unsafe_from_static_array(decls, decls_array); scc_ast_translation_unit_init(&tu, &decls); SCC_CHECK_AST_WITH_SEMA(&tu.base, "typedef long long size_t;" @@ -500,7 +500,7 @@ static void test_parser_unit(void) { scc_ast_decl_typedef_init(&type_func_ptr_decl, "func_t", &type_func_ptr_type); scc_ast_decl_t *decls_array2[] = {&type_decl, &type_func_ptr_decl}; - scc_vec_unsafe_from_array(decls, decls_array2); + scc_vec_unsafe_from_static_array(decls, decls_array2); scc_ast_translation_unit_init(&tu, &decls); SCC_CHECK_AST_WITH_SEMA( &tu.base, @@ -578,11 +578,11 @@ static void test_parser_unit(void) { scc_ast_expr_vec_t lhs_exprs; scc_ast_expr_t *lhs_array[] = {&lhs1, &lhs2, &lhs3}; - scc_vec_unsafe_from_array(lhs_exprs, lhs_array); + scc_vec_unsafe_from_static_array(lhs_exprs, lhs_array); scc_ast_expr_vec_t rhs_exprs; scc_ast_expr_t *rhs_array[] = {&rhs1, &rhs2, &rhs3}; - scc_vec_unsafe_from_array(rhs_exprs, rhs_array); + scc_vec_unsafe_from_static_array(rhs_exprs, rhs_array); scc_ast_expr_t expr; scc_ast_expr_compound_init(&expr, &lvalue, &lhs_exprs, &rhs_exprs); @@ -608,9 +608,9 @@ static void test_parser_unit(void) { scc_ast_expr_array_subscript_init(&lhs7, &lhs6, &rl0); scc_ast_expr_t *lhs_array_hard[] = {&lhs5, &lhs7}; - scc_vec_unsafe_from_array(lhs_exprs, lhs_array_hard); + scc_vec_unsafe_from_static_array(lhs_exprs, lhs_array_hard); scc_ast_expr_t *rhs_array_hard[] = {&rhs2, &rhs3}; - scc_vec_unsafe_from_array(rhs_exprs, rhs_array_hard); + scc_vec_unsafe_from_static_array(rhs_exprs, rhs_array_hard); scc_ast_expr_compound_init(&expr, &lvalue, &lhs_exprs, &rhs_exprs); SCC_CHECK_AST(&expr.base, "(void){.a.b = 0, .c[0] = 0}", scc_parse_expression); @@ -834,7 +834,7 @@ static void test_parser_unit(void) { scc_ast_decl_t void_decl; scc_ast_decl_param_init(&void_decl, &scc_ast_builtin_type_void, null); scc_ast_decl_t *array[] = {&void_decl}; - scc_vec_unsafe_from_array(func_params, array); + scc_vec_unsafe_from_static_array(func_params, array); scc_ast_type_function_init(&func_type, &ptr_to_array, &func_params); // 无参数 @@ -863,7 +863,7 @@ static void test_parser_unit(void) { scc_ast_decl_param_init(¶m1, &scc_ast_builtin_type_va_list, null); scc_ast_decl_t *func_hard_array[] = {¶m1, ¶m2, ¶m3}; scc_ast_decl_vec_t func_hard_params; - scc_vec_unsafe_from_array(func_hard_params, func_hard_array); + scc_vec_unsafe_from_static_array(func_hard_params, func_hard_array); scc_ast_type_function_init(&func_hard_type, &ptr_to_array, &func_hard_params); scc_ast_decl_func_init(&func_hard_decl, &func_hard_type, "bar", null); @@ -873,7 +873,7 @@ static void test_parser_unit(void) { &typedef_func_decl, &func_hard_decl, }; - scc_vec_unsafe_from_array(decls, decls_array); + scc_vec_unsafe_from_static_array(decls, decls_array); scc_ast_translation_unit_t tu; scc_ast_translation_unit_init(&tu, &decls); // SCC_CHECK_AST_WITH_SEMA( @@ -1241,7 +1241,7 @@ static void test_parser_type(void) { scc_ast_decl_param_init(&void_decl, &scc_ast_builtin_type_void, null); scc_ast_decl_t *array[] = {&void_decl}; - scc_vec_unsafe_from_array(func_params, array); + scc_vec_unsafe_from_static_array(func_params, array); scc_ast_type_function_init( &func_void, (scc_ast_type_t *)&scc_ast_builtin_type_int, &func_params); @@ -1486,7 +1486,7 @@ static void test_parser_type(void) { scc_ast_decl_t void_decl; scc_ast_decl_param_init(&void_decl, &scc_ast_builtin_type_void, null); scc_ast_decl_t *array[] = {&void_decl}; - scc_vec_unsafe_from_array(func_params, array); + scc_vec_unsafe_from_static_array(func_params, array); scc_ast_type_function_init(&func_void, (scc_ast_type_t *)&scc_ast_builtin_type_int, &func_params); @@ -1575,7 +1575,7 @@ static void test_parser_type(void) { scc_ast_decl_t void_decl; scc_ast_decl_param_init(&void_decl, &scc_ast_builtin_type_void, null); scc_ast_decl_t *array[] = {&void_decl}; - scc_vec_unsafe_from_array(func_params, array); + scc_vec_unsafe_from_static_array(func_params, array); scc_ast_type_function_init(&func_type, &ptr_to_array, &func_params); // 无参数 @@ -1590,7 +1590,7 @@ static void test_parser_type(void) { // 1) 函数类型:返回 int,无参数 scc_ast_type_t func_type2; scc_ast_decl_vec_t func_params2; - scc_vec_unsafe_from_array(func_params2, array); + scc_vec_unsafe_from_static_array(func_params2, array); scc_ast_type_function_init(&func_type2, (scc_ast_type_t *)&scc_ast_builtin_type_int, &func_params2); @@ -1678,7 +1678,7 @@ static void test_parser_type(void) { scc_ast_expr_identifier_init(&blue, "BLUE"); scc_ast_expr_vec_t enumerators; scc_ast_expr_t *array[] = {&red, &green, &blue}; - scc_vec_unsafe_from_array(enumerators, array); + scc_vec_unsafe_from_static_array(enumerators, array); scc_ast_decl_t enum_def; scc_ast_decl_enum_init(&enum_def, null, &enumerators); @@ -1687,7 +1687,7 @@ static void test_parser_type(void) { SCC_CHECK_AST(&enum_type.base, "enum { RED, GREEN, BLUE }", scc_parse_type_name); - scc_vec_unsafe_from_array(enumerators, array); + scc_vec_unsafe_from_static_array(enumerators, array); scc_ast_decl_enum_init(&enum_def, "E", &enumerators); scc_ast_type_enum_init(&enum_type, "E", &enum_def); SCC_CHECK_AST(&enum_type.base, "enum E { RED, GREEN, BLUE, }", diff --git a/libs/sccf/README.md b/libs/sccf/README.md new file mode 100644 index 0000000..b161b05 --- /dev/null +++ b/libs/sccf/README.md @@ -0,0 +1 @@ +SCC Format diff --git a/libs/sccf/cbuild.toml b/libs/sccf/cbuild.toml new file mode 100644 index 0000000..025be91 --- /dev/null +++ b/libs/sccf/cbuild.toml @@ -0,0 +1,8 @@ +[package] +name = "scc_format" +version = "0.1.0" + +dependencies = [ + { name = "scc_core", path = "../../runtime/scc_core" }, + { name = "scc_utils", path = "../../runtime/scc_utils" }, +] diff --git a/libs/sccf/include/sccf.h b/libs/sccf/include/sccf.h new file mode 100644 index 0000000..3542a85 --- /dev/null +++ b/libs/sccf/include/sccf.h @@ -0,0 +1,165 @@ +/** + * @file sccf.h + * @brief scc format (sccf) 头文件 + * + * SCCF是一个极简的可执行可链接文件格式, 专注于内部链接处理, + * 同时提供外部符号导入/导出的抽象接口。 + */ + +#ifndef __SCC_FORMAT_H__ +#define __SCC_FORMAT_H__ + +#include + +#define sccf_byte_t uint8_t +#define sccf_enum_t uint32_t +#define sccf_size_t uint64_t +#define sccf_isize_t int64_t + +#ifdef __cplusplus +extern "C" { +#endif + +/** SCCF魔数 */ +#define SCCF_MAGIC "SCCFmt\0\0" + +/** SCCF版本号 */ +#define SCCF_VERSION 1 + +/** 架构类型 */ +typedef enum { + SCCF_ARCH_UNKNOWN = 0, ///< 未知占位符架构 + SCCF_ARCH_RISCV32 = 1, ///< RISC-V 32位 小端 + SCCF_ARCH_RISCV64 = 2, ///< RISC-V 64位 小端 + SCCF_ARCH_X86 = 3, ///< x86 小端 + SCCF_ARCH_AMD64 = 4, ///< AMD64 小端 +} sccf_arch_t; + +/** 文件标志位 */ +typedef enum { + SCCF_TYPE_FLAG_EXECUTABLE = 0x01, ///< 可执行文件 + SCCF_TYPE_FLAG_RELOCATABLE = 0x02, ///< 可重定位文件 + SCCF_TYPE_FLAG_EXE_RELOC = 0x03, ///< 内部链接后的可执行文件 +} sccf_type_flags_t; + +/** 符号类型 */ +typedef enum { + SCCF_SYM_TYPE_UNDEF = 0, ///< 未定义 + SCCF_SYM_TYPE_FUNC = 1, ///< 函数 + SCCF_SYM_TYPE_DATA = 2, ///< 数据 + SCCF_SYM_TYPE_OBJECT = 3, ///< 对象 +} sccf_sym_type_t; + +/** 符号绑定类型 */ +typedef enum { + SCCF_SYM_BIND_LOCAL = 0, ///< 局部符号 + SCCF_SYM_BIND_GLOBAL = 1, ///< 全局符号 + SCCF_SYM_BIND_WEAK = 2, ///< 弱引用 +} sccf_sym_bind_t; + +/** 符号可见性 */ +typedef enum { + SCCF_SYM_VIS_DEFAULT = 0, ///< 默认可见性 + SCCF_SYM_VIS_HIDDEN = 1, ///< 隐藏 + SCCF_SYM_VIS_PROTECTED = 2, ///< 受保护 +} sccf_sym_vis_t; + +/** 段类型 */ +typedef enum { + SCCF_SECT_NONE = 0, ///< 无 + SCCF_SECT_CODE = 1, ///< 代码段 + SCCF_SECT_DATA = 2, ///< 数据段 + SCCF_SECT_RODATA = 3, ///< 只读数据段 + SCCF_SECT_UNINIT_DATA = 4, ///< BSS段(未初始化数据) + SCCF_SECT_SYMTAB = 5, ///< 符号表 + SCCF_SECT_STRTAB = 6, ///< 字符串表 + SCCF_SECT_RELOC = 7, ///< 重定位表 +} sccf_sect_type_t; + +/** 重定位类型 */ +typedef enum { + SCCF_RELOC_ABS = 1, ///< 绝对地址 + SCCF_RELOC_REL = 2, ///< 相对地址 +} sccf_reloc_type_t; + +/** + * @brief SCCF文件头 + */ +typedef struct sccf_header { + sccf_byte_t magic[8]; ///< 魔数: "SCCFmt\0\0" + sccf_enum_t type; ///< 类型 + sccf_enum_t version; ///< 版本号 + sccf_enum_t arch; ///< 架构 + sccf_size_t entry_point; ///< 入口点在代码段的相对地址 + sccf_size_t sect_header_num; ///< 节头数量 + sccf_size_t reserved[4]; ///< 保留字段, 可置零 +} sccf_header_t; + +/** + * @brief SCCF段 + */ +typedef struct { + sccf_byte_t name[8]; ///< 段名称 仅供展示 eg. emoji or ".text" + sccf_enum_t scf_sect_type; ///< 段类型 (内部实际区分的方式) + sccf_size_t size; ///< 段数据实际的大小 + sccf_size_t data_size; ///< 段数据的有效数据大小 + sccf_size_t addralign; ///< 内存对齐要求 (字节对齐, 如 1,2,4,8,...) + sccf_size_t info; ///< 段信息 (如条目数,链接索引等) + sccf_size_t reserved[2]; ///< 保留 +} sccf_sect_header_t; + +/** + * @brief SCCF符号表 + */ +typedef struct { + sccf_size_t name_offset; ///< 符号名称在字符串表中的偏移量 + sccf_enum_t scf_sym_type; ///< 符号类型 + sccf_enum_t scf_sym_bind; ///< 符号绑定类型 + sccf_enum_t scf_sym_vis; ///< 符号可见性 + sccf_enum_t scf_sect_type; ///< 该符号的段类型 + sccf_size_t scf_sect_offset; ///< 该符号在段中的偏移量 + sccf_size_t scf_sym_size; ///< 该符号符号选中的大小 +} sccf_sym_t; + +/** + * @brief SCCF重定向条目 + */ +typedef struct { + sccf_size_t offset; ///< 在数据段中的偏移量 + sccf_size_t sym_idx; ///< 符号索引 + sccf_enum_t type; ///< 重定位类型 + sccf_enum_t sect_type; ///< 段类型(代码段/数据段) + sccf_isize_t addend; ///< 加数 +} sccf_reloc_t; + +/** + * @brief sccf 格式结构(可选 填充到某种自定义的对齐 + * 默认启用sect_datas的64bytes对齐规则) + * + * @warning 其中sccf_sect_header_t的顺序必须和sccf_sect_data_t一致, + * 但是不规定sect_headers内部的顺序下面顺序仅供参考 + * + * sccf_header_t header; + * sccf_sect_header_t sect_headers[header.sect_header_num] = { + * [0] = text_section, + * [1] = data_section, + * [2] = strtab_section, + * [3] = symtab_section, + * [4] = reloc_section, + * [5-...] = other_section, + * }; + * sccf_sect_data_t sect_datas[header.sect_header_num][...] = { + * [0] = text_section_data, + * [1] = data_section_data, + * [2] = strtab_section_data, + * [3] = symtab_section_data, + * [4] = reloc_section_data, + * [5-...] = other_section_data, + * }; + */ + +#ifdef __cplusplus +} +#endif + +#endif /* __SCC_FORMAT_H__ */ diff --git a/libs/sccf/include/sccf_builder.h b/libs/sccf/include/sccf_builder.h new file mode 100644 index 0000000..88f9d5b --- /dev/null +++ b/libs/sccf/include/sccf_builder.h @@ -0,0 +1,28 @@ +#ifndef __SCC_FORMAT_BUILDER_H__ +#define __SCC_FORMAT_BUILDER_H__ + +#include "sccf_utils.h" +#include + +typedef SCC_VEC(sccf_sym_t) sccf_sym_vec_t; +typedef SCC_VEC(sccf_reloc_t) sccf_reloc_vec_t; + +typedef struct { + sccf_t sccf; + int aligned; + scc_strpool_t strpool; + scc_hashtable_t str2offset; + sccf_sym_vec_t syms; + sccf_reloc_vec_t relocs; +} sccf_builder_t; + +void sccf_builder_init(sccf_builder_t *builder); + +void sccf_builder_add_section(sccf_builder_t *builder, + sccf_sect_header_t *sect_header, + sccf_sect_data_t *sect_data); + +void sccf_builder_to_buffer(sccf_builder_t *builder, sccf_buffer_t *buffer); +void sccf_builder_to_file(sccf_builder_t *builder, const char *file_path); + +#endif /* __SCC_FORMAT_BUILDER_H__ */ diff --git a/libs/sccf/include/sccf_linker.h b/libs/sccf/include/sccf_linker.h new file mode 100644 index 0000000..afdc276 --- /dev/null +++ b/libs/sccf/include/sccf_linker.h @@ -0,0 +1,18 @@ +#ifndef __SCC_FORMAT_LINKER_H__ +#define __SCC_FORMAT_LINKER_H__ + +#include "sccf_builder.h" + +typedef SCC_VEC(sccf_t) sccf_vec_t; + +typedef struct { + sccf_builder_t builder; + + sccf_vec_t link_sccfs; + scc_strpool_t strpool; + scc_hashtable_t str2offset; + sccf_sym_vec_t syms; + sccf_reloc_vec_t relocs; +} sccf_linker_t; + +#endif /* __SCC_FORMAT_LINKER_H__ */ diff --git a/libs/sccf/include/sccf_utils.h b/libs/sccf/include/sccf_utils.h new file mode 100644 index 0000000..aad1ae0 --- /dev/null +++ b/libs/sccf/include/sccf_utils.h @@ -0,0 +1,296 @@ +/** + * @file sccf_utils.h + * @brief SCCF 格式辅助函数 + * + * 提供基于节头表的节定位、查找和遍历函数, 不涉及字节序转换。 + * 假设文件已经为主机字节序。 + */ + +#ifndef __SCC_FORMAT_UTILS_H__ +#define __SCC_FORMAT_UTILS_H__ + +#include "sccf.h" +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @name 节头表定位 + * @{ + */ + +/** + * @brief 获取节头表的起始地址 + * @param base 文件缓冲区起始地址 + * @return 节头表指针 (需强制转换为 sccf_sect_header_t*) + */ +static inline u8 *sccf_sect_header_table(u8 *base) { + return base + sizeof(sccf_header_t); +} + +/** + * @brief 获取指定索引的节头指针 + * @param base 文件缓冲区起始地址 + * @param idx 节索引 (0 <= idx < sect_header_num) + * @return 指向该节头的指针, 若索引无效返回 null + */ +static inline sccf_sect_header_t *sccf_sect_header(u8 *base, usize idx) { + sccf_header_t *hdr = (sccf_header_t *)base; + if (idx >= (usize)hdr->sect_header_num) + return null; + u8 *table = sccf_sect_header_table(base); + return (sccf_sect_header_t *)(table + idx * sizeof(sccf_sect_header_t)); +} + +/** @} */ + +/** + * @name 节数据定位 + * @{ + */ + +/** + * @brief 计算指定索引的节数据在文件中的偏移 + * @param base 文件缓冲区起始地址 + * @param idx 节索引 + * @return 数据起始偏移 (相对于文件头) , 若索引无效返回 0, 有效值始终大于 0 + */ +static inline usize sccf_sect_data_offset(u8 *base, usize idx) { + const sccf_header_t *hdr = (const sccf_header_t *)base; + if (idx >= (usize)hdr->sect_header_num) + return 0; + + usize offset = sizeof(sccf_header_t) + + (usize)hdr->sect_header_num * sizeof(sccf_sect_header_t); + for (usize i = 0; i < idx; ++i) { + const sccf_sect_header_t *sh = sccf_sect_header(base, i); + offset += + (usize)sh->size; /* 使用 size 字段作为节数据在文件中的实际大小 */ + } + return offset; +} + +/** + * @brief 获取指定索引的节数据起始地址 + * @param base 文件缓冲区起始地址 + * @param idx 节索引 + * @return 数据起始地址, 若索引无效返回 null + */ +static inline u8 *sccf_sect_data(u8 *base, usize idx) { + usize off = sccf_sect_data_offset(base, idx); + return (off == 0) ? null : base + off; +} + +/** @} */ + +/** + * @name 节查找 + * @{ + */ + +/** + * @brief 查找指定类型的第一个节 + * @param base 文件缓冲区起始地址 + * @param type 要查找的节类型 (如 SCCF_SECT_CODE) + * @return 节索引, 未找到返回 hdr->sect_header_num + */ +static inline usize sccf_find_sect_by_type(u8 *base, sccf_enum_t type) { + const sccf_header_t *hdr = (const sccf_header_t *)base; + for (usize i = 0; i < (usize)hdr->sect_header_num; ++i) { + sccf_sect_header_t *sh = sccf_sect_header(base, i); + if (sh->scf_sect_type == type) + return i; + } + return (usize)hdr->sect_header_num; +} + +/** + * @brief 查找指定名称的节 + * @param base 文件缓冲区起始地址 + * @param name 8字节名称 (不足补零, 可用 memcmp 比较) + * @return 节索引, 未找到返回 hdr->sect_header_num + */ +static inline usize sccf_find_sect_by_name(u8 *base, const u8 name[8]) { + const sccf_header_t *hdr = (const sccf_header_t *)base; + for (usize i = 0; i < (usize)hdr->sect_header_num; ++i) { + const sccf_sect_header_t *sh = sccf_sect_header(base, i); + if (scc_memcmp(sh->name, name, 8) == 0) + return i; + } + return (usize)hdr->sect_header_num; +} + +/** @} */ + +/** + * @name 节遍历 + * @{ + */ + +/** + * @brief 遍历所有节的回调函数类型 + * @param idx 节索引 + * @param sh 节头指针 + * @param data 节数据起始地址 + * @param size 节数据大小 (即节头的 size 字段) + * @param user 用户自定义数据 + */ +typedef void (*sccf_sect_visit_fn)(usize idx, const sccf_sect_header_t *sh, + const u8 *data, usize size, void *user); + +/** + * @brief 遍历所有节, 对每个节调用回调函数 + * @param base 文件缓冲区起始地址 + * @param visit 回调函数 + * @param user 用户数据透传 + */ +static inline void sccf_foreach_sect(u8 *base, sccf_sect_visit_fn visit, + void *user) { + const sccf_header_t *hdr = (const sccf_header_t *)base; + for (usize i = 0; i < (usize)hdr->sect_header_num; ++i) { + sccf_sect_header_t *sh = sccf_sect_header(base, i); + const u8 *data = sccf_sect_data(base, i); + if (data) { + visit(i, sh, data, (usize)sh->size, user); + } + } +} + +/** @} */ + +/** + * @defgroup 序列化和反序列化 + */ +typedef SCC_VEC(u8) sccf_buffer_t; +typedef SCC_VEC(u8) sccf_sect_data_t; +typedef SCC_VEC(sccf_sect_data_t) sccf_sect_data_vec_t; +typedef SCC_VEC(sccf_sect_header_t) sccf_sect_header_vec_t; + +typedef struct { + sccf_header_t header; + sccf_sect_header_vec_t sect_headers; + sccf_sect_data_vec_t sect_datas; +} sccf_t; + +/** + * @brief 初始化sccf + * @param[out] sccf + */ +static inline void sccf_init(sccf_t *sccf) { + scc_memcpy(sccf->header.magic, SCCF_MAGIC, sizeof(sccf->header.magic)); + sccf->header.type = 0; + sccf->header.version = SCCF_VERSION; + sccf->header.arch = SCCF_ARCH_UNKNOWN; + sccf->header.entry_point = 0; + sccf->header.sect_header_num = 0; + scc_memset(sccf->header.reserved, 0, sizeof(sccf->header.reserved)); + scc_vec_init(sccf->sect_headers); + scc_vec_init(sccf->sect_datas); +} + +/** + * @brief 从buffer解析到sccf可以选择是否复制, 如果不复制则不能修改 + * @warning 注意内存释放, 如果选择复制的话 + * + * @param[out] sccf + * @param[in] buffer + * @param[in] copied + */ +static inline void sccf_parse(sccf_t *sccf, sccf_buffer_t *buffer, int copied) { + u8 *data = scc_vec_unsafe_get_data(*buffer); + sccf->header = *(sccf_header_t *)data; + usize offset = sizeof(sccf_header_t); + if (copied) { + scc_vec_init(sccf->sect_headers); + for (usize i = 0; i < sccf->header.sect_header_num; i += 1) { + scc_vec_push(sccf->sect_headers, + *(sccf_sect_header_t *)(data + offset)); + offset += sizeof(sccf_sect_header_t); + } + } else { + u8 *header_table = data + offset; + scc_vec_unsafe_from_buffer(sccf->sect_headers, + (sccf_sect_header_t *)header_table, + sccf->header.sect_header_num); + offset += sizeof(sccf_sect_header_t) * sccf->header.sect_header_num; + } + + scc_vec_init(sccf->sect_datas); + scc_vec_foreach(sccf->sect_headers, i) { + sccf_sect_header_t *header = &scc_vec_at(sccf->sect_headers, i); + sccf_sect_data_t sect_data; + scc_vec_init(sect_data); + if (copied) { + scc_vec_realloc(sect_data, header->size); + scc_memcpy(scc_vec_unsafe_get_data(sect_data), data + offset, + header->size); + scc_vec_size(sect_data) = header->size; + } else { + scc_vec_unsafe_from_buffer(sect_data, data + offset, header->size); + } + scc_vec_push(sccf->sect_datas, sect_data); + offset += header->size; + } +} + +/** + * @brief 获取sccf大小 + * + * @param[in] sccf + * @return usize + */ +static inline usize sccf_size(sccf_t *sccf) { + if (scc_vec_size(sccf->sect_datas) != scc_vec_size(sccf->sect_headers) || + scc_vec_size(sccf->sect_headers) != sccf->header.sect_header_num) { + Panic(); + } + usize size = sizeof(sccf_header_t); + size += sizeof(sccf_sect_header_t) * sccf->header.sect_header_num; + scc_vec_foreach(sccf->sect_headers, i) { + sccf_sect_header_t *sccf_sect_header = + &scc_vec_at(sccf->sect_headers, i); + Assert(scc_vec_size(scc_vec_at(sccf->sect_datas, i)) == + sccf_sect_header->size); + size += sccf_sect_header->size; + } + return size; +} + +/** + * @brief 将sccf转换成buffer + * @warning buffer需要被初始化且为空 + * + * @param[in] sccf + * @param[out] buffer + */ +static inline void sccf_write(sccf_t *sccf, sccf_buffer_t *buffer) { + usize size = sccf_size(sccf); + if (scc_vec_size(*buffer) < size) { + scc_vec_realloc(*buffer, size); + scc_vec_size(*buffer) = size; + } + u8 *data = scc_vec_unsafe_get_data(*buffer); + usize offset = 0; + scc_memcpy(data, &sccf->header, sizeof(sccf_header_t)); + data += sizeof(sccf_header_t); + scc_memcpy(data, scc_vec_unsafe_get_data(sccf->sect_headers), + sizeof(sccf_sect_header_t) * sccf->header.sect_header_num); + data += sizeof(sccf_sect_header_t) * sccf->header.sect_header_num; + + scc_vec_foreach(sccf->sect_datas, i) { + sccf_sect_data_t *sect_data = &scc_vec_at(sccf->sect_datas, i); + scc_memcpy(data, scc_vec_unsafe_get_data(*sect_data), sect_data->size); + data += sect_data->size; + } + if (data - scc_vec_unsafe_get_data(*buffer) != size) { + Panic(); + } +} + +#ifdef __cplusplus +} +#endif + +#endif /* __SCC_FORMAT_UTILS_H__ */ diff --git a/libs/sccf/src/main.c b/libs/sccf/src/main.c new file mode 100644 index 0000000..cf92b71 --- /dev/null +++ b/libs/sccf/src/main.c @@ -0,0 +1,8 @@ +#include + +int main(void) { + sccf_builder_t sccf_builder; + sccf_builder_init(&sccf_builder); + sccf_builder_to_file(&sccf_builder, "test.o"); + return 0; +} diff --git a/libs/sccf/src/sccf_builder.c b/libs/sccf/src/sccf_builder.c new file mode 100644 index 0000000..5556c3b --- /dev/null +++ b/libs/sccf/src/sccf_builder.c @@ -0,0 +1,51 @@ +#include + +void sccf_builder_init(sccf_builder_t *builder) { + builder->aligned = 64; + sccf_init(&builder->sccf); + scc_strpool_init(&builder->strpool); + scc_hashtable_init(&builder->str2offset, + (scc_hashtable_hash_func_t)scc_strhash32, + (scc_hashtable_equal_func_t)scc_strcmp); + scc_vec_init(builder->relocs); + scc_vec_init(builder->syms); +} + +void sccf_builder_add_section(sccf_builder_t *builder, + sccf_sect_header_t *sect_header, + sccf_sect_data_t *sect_data) { + Assert((usize)(sect_header->size) == scc_vec_size(*sect_data)); + builder->sccf.header.sect_header_num += 1; + scc_vec_push(builder->sccf.sect_headers, *sect_header); + scc_vec_push(builder->sccf.sect_datas, *sect_data); +} + +void sccf_builder_to_buffer(sccf_builder_t *builder, sccf_buffer_t *buffer) { + Assert(builder != null && buffer != null); + // TODO symtab strtab reloc + // sccf_sect_header_t symtab_header; + // sccf_sect_data_t symtab_data; + // sccf_builder_add_section(builder, &symtab_header, &symtab_data); + + sccf_write(&builder->sccf, buffer); +} + +void sccf_builder_to_file(sccf_builder_t *builder, const char *file_path) { + Assert(builder != null && file_path != null); + scc_file_t fp = scc_fopen(file_path, SCC_FILE_WRITE); + if (fp == null) { + LOG_ERROR("file can't open %s", file_path); + return; + } + sccf_buffer_t buffer; + scc_vec_init(buffer); + sccf_builder_to_buffer(builder, &buffer); + usize write_size = + scc_fwrite(fp, scc_vec_unsafe_get_data(buffer), scc_vec_size(buffer)); + if (write_size != scc_vec_size(buffer)) { + LOG_ERROR("file write failed expect write %zu but got %zu", + scc_vec_size(buffer), write_size); + } + scc_vec_free(buffer); + scc_fclose(fp); +} diff --git a/libs/sccf/tests/test_sccf_amd64.c b/libs/sccf/tests/test_sccf_amd64.c new file mode 100644 index 0000000..e69de29 diff --git a/runtime/scc_core/include/scc_core_vec.h b/runtime/scc_core/include/scc_core_vec.h index fa38c63..21248f1 100644 --- a/runtime/scc_core/include/scc_core_vec.h +++ b/runtime/scc_core/include/scc_core_vec.h @@ -151,9 +151,16 @@ typedef size_t usize; (vec).size = (vec).cap = 0; \ } while (0) -#define scc_vec_unsafe_get_data(vec) (vec).data +#define scc_vec_unsafe_get_data(vec) ((vec).data) -#define scc_vec_unsafe_from_array(vec, array) \ +#define scc_vec_unsafe_from_buffer(vec, buffer, buffer_size) \ + do { \ + (vec).size = buffer_size; \ + (vec).cap = (vec).size; \ + (vec).data = buffer; \ + } while (0) + +#define scc_vec_unsafe_from_static_array(vec, array) \ do { \ (vec).size = sizeof(array) / sizeof((array)[0]); \ (vec).cap = (vec).size; \ diff --git a/src/main.c b/src/main.c index 96ddb9d..02b4364 100644 --- a/src/main.c +++ b/src/main.c @@ -288,7 +288,7 @@ int main(int argc, const char **argv, const char **envp) { if (config.emit_pp) { scc_lexer_tok_ring_t *tok_ring = scc_pproc_to_ring(&pproc, 8, true, true); - if (config.output_file == null) { + if (fp == null) { print_ring(tok_ring, config.verbose); } else { print_file(tok_ring, fp); @@ -356,6 +356,11 @@ sstream_drop: return 0; } - scc_printf("output exe at %s\n", config.output_file); + if (fp == null) { + scc_printf("output exe at %s\n", config.output_file); + } else { + scc_printf("output exe at %s\n", config.output_file); + } + return 0; }