feat(ast2ir): 添加左值标识支持以改善表达式处理

- 在 scc_ast2ir_expr 函数中添加 is_lvalue 参数来区分左值和右值表达式
- 更新二元表达式处理逻辑,特别是赋值操作符的处理
- 改进标识符表达式的处理,根据是否为左值决定返回存储位置还是加载值
- 修复哈希比较函数的实现
- 移除调试相关的注释代码

refactor(parser): 优化语法分析器错误处理和控制流

- 移除不必要的错误恢复辅助注释
- 修改表达式解析的控制流程,将直接返回改为使用 break 语句
- 添加语义分析回调,在解析完成后进行标识符查找和验证

refactor(sema): 增强语义分析阶段的符号表管理

- 改进标识符查找逻辑,增加对非变量标识符的检查
- 扩展声明处理范围,包括变量和参数声明的符号表注册
- 为函数声明添加作用域管理

fix(parser): 修正单元测试中的类型定义

- 将 long long 类型定义改为 int 类型,解决测试兼容性问题

refactor(sccf): 重构文件格式定义和构建器实现

- 重命名符号类型枚举值 OBJECT 为 EXTERN
- 重命名段类型枚举值 RELOC 为 RELOCS
- 修正结构体字段命名的一致性问题
- 重新设计 SCCF 构建器的数据结构和API
- 添加符号表、字符串表和重定位表的构建支持

refactor(target): 重命名Windows PE相关类型定义

- 将 scc_winpe_* 类型重命名为 scc_pe_* 以保持命名一致性

chore: 添加 sccf2target 模块用于格式转换

- 创建新的库模块用于 SCCF 到目标格式的转换
- 实现 PE 格式转换的基本功能
- 添加示例程序演示格式转换过程
This commit is contained in:
zzy
2026-03-19 12:11:57 +08:00
parent 5f915ba8d3
commit 02a6c684f1
18 changed files with 575 additions and 101 deletions

View File

@@ -12,12 +12,12 @@ typedef struct {
u32 section_offset; ///< 在idata中的偏移
} scc_pe_hnt_builder_t;
typedef SCC_VEC(const char *) scc_winpe_name_vec_t;
typedef SCC_VEC(const char *) scc_pe_name_vec_t;
typedef struct {
const char *name;
scc_winpe_name_vec_t symbol_names;
} scc_winpe_idata_lib_t;
typedef SCC_VEC(scc_winpe_idata_lib_t) scc_pe_idata_lib_vec_t;
scc_pe_name_vec_t symbol_names;
} scc_pe_idata_lib_t;
typedef SCC_VEC(scc_pe_idata_lib_t) scc_pe_idata_lib_vec_t;
typedef struct {
scc_pe_buffer_t buffer; ///< 导入表数据

View File

@@ -71,7 +71,7 @@ u32 scc_pe_reserve_idata(scc_pe_idata_builder_t *builder) {
sizeof(IMAGE_IMPORT_DESCRIPTOR);
scc_vec_foreach(builder->idata_libs, i) {
scc_winpe_idata_lib_t *lib = &scc_vec_at(builder->idata_libs, i);
scc_pe_idata_lib_t *lib = &scc_vec_at(builder->idata_libs, i);
idata_size += (scc_vec_size(lib->symbol_names) + 1) * 2 *
sizeof(IMAGE_THUNK_DATA64);
scc_winpe_hnt_builder_push(&builder->hnt_builder, lib->name, 0);
@@ -98,7 +98,7 @@ scc_pe_buffer_t scc_pe_construct_idata(scc_pe_idata_builder_t *builder,
usize current_offset =
(import_file_count + 1) * sizeof(IMAGE_IMPORT_DESCRIPTOR);
scc_vec_foreach(builder->idata_libs, i) {
scc_winpe_idata_lib_t *lib = &scc_vec_at(builder->idata_libs, i);
scc_pe_idata_lib_t *lib = &scc_vec_at(builder->idata_libs, i);
scc_winpe_lookup_table_vec_t lookup_table;
scc_vec_init(lookup_table);

View File

@@ -33,7 +33,7 @@ int main() {
scc_pe_idata_builder_t idata_builder;
scc_pe_idata_lib_vec_t idata_libs;
scc_vec_init(idata_libs);
scc_winpe_idata_lib_t ucrtbase;
scc_pe_idata_lib_t ucrtbase;
ucrtbase.name = "ucrtbase.dll";
scc_vec_init(ucrtbase.symbol_names);
scc_vec_push(ucrtbase.symbol_names, "puts");

View File

@@ -0,0 +1,12 @@
[package]
name = "sccf2target"
version = "0.1.0"
authors = []
description = ""
dependencies = [
{ name = "sccf", path = "../../sccf" },
{ name = "pe", path = "../pe" },
]
# features = {}
# default_features = []

View File

@@ -0,0 +1,4 @@
#include <scc_pe_builder.h>
#include <sccf_builder.h>
void sccf2pe(scc_pe_builder_t *builder, const sccf_t *sccf);

View File

@@ -0,0 +1,69 @@
#include <sccf.h>
#include <sccf2pe.h>
#include <sccf_builder.h>
#include <stdio.h>
int main() {
char data[] = "Hello, World from SCC PE Builder!\n\0";
/* clang-format off */
char code[] = {
// sub rsp, 0x28 ; 为函数调用分配栈空间
0x48, 0x83, 0xEC, 0x28,
// lea rcx, [rip + data_offset] ; 将字符串地址加载到RCX第一个参数
0x48, 0x8D, 0x0D, 0x00, 0x00, 0x00, 0x00,
// call qword ptr [rip + puts_iat] ; 通过IAT调用puts
0xFF, 0x15, 0x00, 0x00, 0x00, 0x00,
// add rsp, 0x28 ; 恢复栈空间
0x48, 0x83, 0xC4, 0x28,
// xor eax, eax ; 设置返回值为0
0x33, 0xC0,
// ret ; 返回
0xC3,
};
/* clang-format on */
sccf_builder_t builder;
sccf_builder_init(&builder);
sccf_sect_data_t text_section = {
.data = (u8 *)code, .size = sizeof(code), .cap = sizeof(code)};
sccf_sect_data_t data_section = {
.data = (u8 *)data, .size = sizeof(data), .cap = sizeof(data)};
sccf_builder_add_text_section(&builder, &text_section);
sccf_builder_add_data_section(&builder, &data_section);
usize str_idx =
sccf_builder_add_symbol(&builder, "str_data",
&(sccf_sym_t){
.sccf_sect_offset = 0,
.sccf_sect_type = SCCF_SECT_DATA,
.sccf_sym_bind = SCCF_SYM_BIND_GLOBAL,
.sccf_sym_size = sizeof(data),
.sccf_sym_type = SCCF_SYM_TYPE_DATA,
.sccf_sym_vis = SCCF_SYM_VIS_DEFAULT,
});
usize puts_idx =
sccf_builder_add_symbol(&builder, "puts",
&(sccf_sym_t){
.sccf_sect_offset = 0,
.sccf_sect_type = SCCF_SECT_NONE,
.sccf_sym_bind = SCCF_SYM_BIND_GLOBAL,
.sccf_sym_size = 8,
.sccf_sym_type = SCCF_SYM_TYPE_EXTERN,
.sccf_sym_vis = SCCF_SYM_VIS_DEFAULT,
});
sccf_builder_add_reloc(&builder, (sccf_reloc_t){.addend = 4,
.offset = 7,
.sect_type = SCCF_SECT_CODE,
.sym_idx = str_idx,
.type = SCCF_RELOC_REL});
sccf_builder_add_reloc(&builder, (sccf_reloc_t){.addend = 4,
.offset = 13,
.sect_type = SCCF_SECT_CODE,
.sym_idx = puts_idx,
.type = SCCF_RELOC_REL});
const sccf_t *sccf = sccf_builder_to_sccf(&builder);
scc_pe_builder_t pe_builder;
sccf2pe(&pe_builder, sccf);
scc_pe_dump_to_file(&pe_builder, __FILE__ "/../../test.exe");
}

View File

@@ -0,0 +1,229 @@
#include <scc_pe_idata.h>
#include <sccf2pe.h>
#include <sccf_utils.h>
typedef struct {
scc_hashtable_t str2libsym;
scc_pe_idata_lib_vec_t idata_libs;
} pe_idata_lib_ctx_t;
static void load_from_def(pe_idata_lib_ctx_t *ctx, const char *file_path,
const char *dll_name) {
/*
LIBRARY <path name>
EXPORTS
name @number
...
*/
scc_cstring_t fpath = scc_cstring_from_cstr(file_path);
scc_cstring_append_ch(&fpath, '/');
scc_cstring_append_cstr(&fpath, dll_name, scc_strlen(dll_name));
scc_cstring_append_cstr(&fpath, ".def", 4);
const char *fname = scc_cstring_as_cstr(&fpath);
scc_file_t fp = scc_fopen(fname, SCC_FILE_READ);
if (fp == null) {
LOG_ERROR("load_from_def file read error: %s", fname);
return;
}
usize fsize = scc_fsize(fp);
char *buffer = scc_malloc(fsize);
Assert(buffer != null);
usize read_size = scc_fread(fp, buffer, fsize);
Assert(read_size == fsize);
scc_fclose(fp);
scc_pe_name_vec_t symbol_names;
usize line = 0;
for (usize i = 0; i < fsize; i += 1) {
if (buffer[i] == '\n') {
line += 1;
}
if (line < 2) {
continue;
}
if (buffer[i] == ' ') {
continue;
}
for (usize j = i; j < fsize; j += 1) {
if (buffer[j] == ' ') {
buffer[j] = '\0';
break;
}
}
// FIXME memory leak
scc_hashtable_set(&ctx->str2libsym, buffer + i, (void *)dll_name);
}
}
static void pe_idata_lib_init(pe_idata_lib_ctx_t *ctx) {
// Got .dll.def
scc_hashtable_init(&ctx->str2libsym,
(scc_hashtable_hash_func_t)scc_strhash32,
(scc_hashtable_equal_func_t)scc_strcmp);
scc_vec_init(ctx->idata_libs);
load_from_def(ctx, "./.dll_def", "ucrtbase.dll");
}
static cbool pe_idata_get(pe_idata_lib_ctx_t *ctx, const char *name) {
const char *lib_name = scc_hashtable_get(&ctx->str2libsym, name);
if (lib_name == null) {
return false;
}
scc_pe_idata_lib_t *lib = null;
scc_vec_foreach(ctx->idata_libs, i) {
scc_pe_idata_lib_t *idata_lib = &scc_vec_at(ctx->idata_libs, i);
if (scc_strcmp(lib_name, idata_lib->name) == 0) {
lib = idata_lib;
break;
}
}
if (lib == null) {
scc_pe_idata_lib_t new_lib;
new_lib.name = lib_name;
scc_vec_init(new_lib.symbol_names);
scc_vec_push(ctx->idata_libs, new_lib);
lib = &scc_vec_at(ctx->idata_libs, scc_vec_size(ctx->idata_libs) - 1);
}
scc_vec_push(lib->symbol_names, name);
return true;
}
void sccf2pe(scc_pe_builder_t *builder, const sccf_t *sccf) {
scc_pe_builder_init(builder, true, 4096, 512);
sccf_strtab_t strtab;
scc_vec_init(strtab);
sccf_reloc_vec_t relocs;
scc_vec_init(relocs);
sccf_sym_vec_t symtab;
scc_vec_init(symtab);
sccf_sect_data_t *text_data = null;
scc_pe_reserve_header(builder, 3);
scc_pe_section_range code_range = {0};
scc_pe_section_range data_range = {0};
scc_pe_section_range idata_range = {0};
scc_vec_foreach(sccf->sect_headers, i) {
sccf_sect_header_t *sect_header = &scc_vec_at(sccf->sect_headers, i);
sccf_sect_data_t *sect_data = &scc_vec_at(sccf->sect_datas, i);
if (sect_header->sccf_sect_type == SCCF_SECT_CODE) {
text_data = sect_data;
code_range = scc_pe_reserve_text_section_header(
builder, scc_vec_size(*sect_data));
} else if (sect_header->sccf_sect_type == SCCF_SECT_DATA) {
data_range = scc_pe_reserve_data_section_header(
builder, scc_vec_size(*sect_data));
} else if (sect_header->sccf_sect_type == SCCF_SECT_STRTAB) {
scc_vec_unsafe_from_buffer(
strtab, (char *)scc_vec_unsafe_get_data(*sect_data),
scc_vec_size(*sect_data));
} else if (sect_header->sccf_sect_type == SCCF_SECT_RELOCS) {
scc_vec_unsafe_from_buffer(
relocs, (sccf_reloc_t *)scc_vec_unsafe_get_data(*sect_data),
scc_vec_size(*sect_data));
} else if (sect_header->sccf_sect_type == SCCF_SECT_SYMTAB) {
scc_vec_unsafe_from_buffer(
symtab, (sccf_sym_t *)scc_vec_unsafe_get_data(*sect_data),
scc_vec_size(*sect_data));
}
}
pe_idata_lib_ctx_t idata_lib_ctx;
pe_idata_lib_init(&idata_lib_ctx);
scc_vec_foreach(symtab, i) {
sccf_sym_t *sym = &scc_vec_at(symtab, i);
if (sym->sccf_sym_type == SCCF_SYM_TYPE_EXTERN) {
const char *name =
(const char *)&scc_vec_at(strtab, sym->name_offset);
if (pe_idata_get(&idata_lib_ctx, name) == false) {
LOG_ERROR("link error: symbol [%s] not found", name);
}
}
}
scc_pe_idata_builder_t idata_builder;
scc_pe_idata_builder_init(&idata_builder, &idata_lib_ctx.idata_libs);
u32 idata_size = scc_pe_reserve_idata(&idata_builder);
idata_range = scc_pe_reserve_idata_section_header(builder, idata_size);
scc_pe_buffer_t idata_buffer =
scc_pe_construct_idata(&idata_builder, &idata_range);
u32 entry_point_offset = 0;
u64 base_address = 0x140000000;
u32 entry_point = code_range.virual_address + entry_point_offset;
scc_pe_config_t config = (scc_pe_config_t){
.machine = IMAGE_FILE_MACHINE_AMD64,
.time_date_stamp = 0,
.characteristics =
IMAGE_FILE_EXECUTABLE_IMAGE | IMAGE_FILE_LARGE_ADDRESS_AWARE,
.major_linker_version = 14,
.minor_linker_version = 0,
.address_of_entry_point = entry_point,
.image_base = base_address,
.major_operating_system_version = 6,
.minor_operating_system_version = 0,
.major_image_version = 0,
.minor_image_version = 0,
.major_subsystem_version = 6,
.minor_subsystem_version = 0,
.subsystem = IMAGE_SUBSYSTEM_WINDOWS_CUI,
.dll_characteristics = IMAGE_DLLCHARACTERISTICS_HIGH_ENTROPY_VA |
IMAGE_DLLCHARACTERISTICS_DYNAMIC_BASE |
IMAGE_DLLCHARACTERISTICS_NX_COMPAT,
.size_of_stack_reserve = 0x100000,
.size_of_stack_commit = 0x1000,
.size_of_heap_reserve = 0x100000,
.size_of_heap_commit = 0x1000,
};
scc_vec_foreach(relocs, i) {
sccf_reloc_t *reloc = &scc_vec_at(relocs, i);
sccf_sym_t *sym = &scc_vec_at(symtab, reloc->sym_idx);
const char *name = &scc_vec_at(strtab, sym->name_offset);
u32 rva = 0;
if (sym->sccf_sym_type == SCCF_SYM_TYPE_EXTERN) {
rva = scc_pe_idata_get_symbol_rva(&idata_builder, name);
} else if (sym->sccf_sect_type == SCCF_SECT_DATA) {
rva = data_range.virual_address + sym->sccf_sect_offset;
} else if (sym->sccf_sect_type == SCCF_SECT_CODE) {
rva = code_range.virual_address + sym->sccf_sect_offset;
} else {
Panic("unsupported reloc symbol type");
}
Assert(rva != 0);
if (reloc->type == SCCF_RELOC_ABS) {
TODO();
}
Assert(reloc->sect_type == SCCF_SECT_CODE);
rva -= code_range.virual_address + reloc->offset + reloc->addend;
Assert(text_data != null);
// FIXME 需要确保宿主机与目标机器大小端一致
*(u32 *)(scc_vec_unsafe_get_data(*text_data) + reloc->offset) = rva;
}
scc_pe_write_header(builder, &config);
scc_vec_foreach(sccf->sect_headers, i) {
sccf_sect_header_t *sect_header = &scc_vec_at(sccf->sect_headers, i);
sccf_sect_data_t *sect_data = &scc_vec_at(sccf->sect_datas, i);
if (sect_header->sccf_sect_type == SCCF_SECT_CODE) {
scc_pe_write_section(builder, &code_range,
(u8 *)scc_vec_unsafe_get_data(*sect_data),
scc_vec_size(*sect_data));
} else if (sect_header->sccf_sect_type == SCCF_SECT_DATA) {
scc_pe_write_section(builder, &data_range,
(u8 *)scc_vec_unsafe_get_data(*sect_data),
scc_vec_size(*sect_data));
}
}
scc_pe_write_section(builder, &idata_range,
scc_vec_unsafe_get_data(idata_buffer),
scc_vec_size(idata_buffer));
}