feat: add SCF format library and rename components to SCC prefix

- Introduce new SCF (SCC Format) library with header, implementation, and test files
- SCF is a minimal executable/linkable format focused on internal linking with external symbol import/export abstraction
- Rename lexer and lex_parser packages from 'smcc_' to 'scc_' prefix for consistency
- Update hashmap implementation to use 'scc_' prefix for types and structures
- Add build configuration for new format library with dependencies on libcore and libutils
This commit is contained in:
zzy
2025-12-11 17:29:12 +08:00
parent d88fa3b8d3
commit 3aaf3a3991
13 changed files with 1048 additions and 56 deletions

0
libs/format/README.md Normal file
View File

8
libs/format/cbuild.toml Normal file
View File

@@ -0,0 +1,8 @@
[package]
name = "scc_format"
version = "0.1.0"
dependencies = [
{ name = "libcore", path = "../../runtime/libcore" },
{ name = "libutils", path = "../../runtime/libutils" },
]

150
libs/format/include/scf.h Normal file
View File

@@ -0,0 +1,150 @@
/**
* @file scf.h
* @brief scc format (SMF) 头文件
*
* SCF是一个极简的可执行可链接文件格式专注于内部链接处理
* 同时提供外部符号导入/导出的抽象接口。
*/
#ifndef __SCC_FORMAT_H__
#define __SCC_FORMAT_H__
#include <stddef.h>
#include <stdint.h>
#define scf_byte_t uint8_t
#define scf_enum_t uint32_t
#define scf_size_t uint32_t
#ifdef __cplusplus
extern "C" {
#endif
/** SCF魔数 */
#define SCF_MAGIC "SCF\0"
/** SCF版本号 */
#define SCF_VERSION 1
/** 架构类型 */
typedef enum {
SCF_ARCH_UNKNOWN = 0,
SCF_ARCH_RV32 = 1,
SCF_ARCH_RV64 = 2,
SCF_ARCH_X86 = 3,
SCF_ARCH_X64 = 4,
} scf_arch_t;
/** 文件标志位 */
typedef enum {
SCF_FLAG_EXECUTABLE = 0x01, // 可执行文件
SCF_FLAG_RELOCATABLE = 0x02, // 可重定位文件
SCF_FLAG_EXE_RELOC =
SCF_FLAG_EXECUTABLE | SCF_FLAG_RELOCATABLE, // 内部链接后的可执行文件
} scf_flags_t;
/** 符号类型 */
typedef enum {
SCF_SYM_TYPE_UNDEF = 0, // 未定义
SCF_SYM_TYPE_FUNC = 1, // 函数
SCF_SYM_TYPE_DATA = 2, // 数据
SCF_SYM_TYPE_OBJECT = 3, // 对象
} scf_sym_type_t;
/** 符号绑定类型 */
typedef enum {
SCF_SYM_BIND_LOCAL = 0, // 局部符号
SCF_SYM_BIND_GLOBAL = 1, // 全局符号
SCF_SYM_BIND_WEAK = 2, // 弱引用
} scf_sym_bind_t;
/** 符号可见性 */
typedef enum {
SCF_SYM_VIS_DEFAULT = 0, // 默认可见性
SCF_SYM_VIS_HIDDEN = 1, // 隐藏
SCF_SYM_VIS_PROTECTED = 2, // 受保护
} scf_sym_vis_t;
/** 段类型 */
typedef enum {
SCF_SECT_NONE = 0, // 无
SCF_SECT_CODE = 1, // 代码段
SCF_SECT_DATA = 2, // 数据段
SCF_SECT_BSS = 3, // BSS段未初始化数据
SCF_SECT_RODATA = 4, // 只读数据
} scf_sect_type_t;
/** 重定位类型 */
typedef enum {
SCF_RELOC_ABS = 1, // 绝对地址
SCF_RELOC_REL = 2, // 相对地址
SCF_RELOC_PC = 3, // PC相对
} scf_reloc_type_t;
/**
* @brief SCF文件头
*/
typedef struct {
scf_byte_t magic[4]; // 魔数: "SCF\0"
scf_enum_t type; // 类型
scf_enum_t version; // 版本号
scf_enum_t arch; // 架构
scf_enum_t flags; // 标志位
scf_size_t entry_point; // 入口点地址
scf_size_t data_size;
scf_size_t code_size;
scf_size_t strtab_size;
scf_size_t sym_count;
scf_size_t reloc_count;
} scf_header_t;
/**
* @brief SCF段
*/
typedef struct {
scf_size_t size;
scf_enum_t scf_sect_type;
scf_byte_t data[1];
} scf_sect_t;
/**
* @brief SCF符号表
*/
typedef struct {
scf_size_t name_offset;
scf_enum_t scf_sym_type;
scf_enum_t scf_sym_bind;
scf_enum_t scf_sym_vis;
scf_enum_t scf_sect_type;
scf_size_t scf_sect_offset;
scf_size_t scf_sym_size;
} scf_sym_t;
/**
* @brief SCF重定向条目
*/
typedef struct {
scf_size_t offset; // 在段中的偏移量
scf_size_t sym_idx; // 符号索引
scf_enum_t type; // 重定位类型
scf_enum_t sect_type; // 段类型(代码段/数据段)
scf_size_t addend; // 加数
} scf_reloc_t;
/*
scf 文件结构
scf_header_t header;
scf_sect_data_t text;
scf_sect_data_t data;
scf_sect_data_t symtab;
scf_sect_data_t reloc;
scf_sect_data_t strtab;
*/
#ifdef __cplusplus
}
#endif
#endif /* __SCC_FORMAT_H__ */

View File

@@ -0,0 +1,42 @@
#ifndef __SCC_FORMAT_IMPL_H__
#define __SCC_FORMAT_IMPL_H__
#include <libcore.h>
#include <libutils.h>
#include <scf.h>
typedef SCC_VEC(u8) scf_sect_data_t;
typedef SCC_VEC(scf_sym_t) scf_sym_vec_t;
typedef SCC_VEC(scf_reloc_t) scf_reloc_vec_t;
typedef struct {
scf_header_t header;
scf_sect_data_t text;
scf_sect_data_t data;
scf_sect_data_t symtab;
scf_sect_data_t reloc;
scf_sect_data_t strtab;
scc_strpool_t strpool;
scc_hashtable_t str2idx;
scf_sym_vec_t syms;
scf_reloc_vec_t relocs;
} scf_t;
void scf_init(scf_t *scf);
cbool scf_parse(scf_t *scf, const char *buffer, usize size);
cbool scf_write(scf_t *scf, char *buffer, usize size);
cbool scf_exchange_section(scf_t *scf, scf_sect_type_t type,
scf_sect_data_t **section);
cbool scf_add_sym(scf_t *scf, scf_sym_t *sym);
cbool scf_add_reloc(scf_t *scf, scf_reloc_t *reloc);
cbool scf_apply_relocations(scf_t *scf);
cbool scf_write_done(scf_t *scf); // 在写入前进行内部整理
cbool scf_check_valid(scf_t *scf);
typedef SCC_VEC(scf_t) scf_vec_t;
cbool scf_link_all(scf_vec_t scfs, scf_t *outscf);
#endif /* __SCC_FORMAT_IMPL_H__ */

469
libs/format/src/scf.c Normal file
View File

@@ -0,0 +1,469 @@
/**
* @file scf.c
* @brief SCF 格式实现
*/
#include <scf_impl.h>
/**
* @brief 初始化 SCF 结构
* @param scf 指向 scf_t 结构的指针
*/
void scf_init(scf_t *scf) {
if (!scf) {
return;
}
scc_memset(&scf->header, 0, sizeof(scf->header));
scc_memcpy(scf->header.magic, SCF_MAGIC, 4);
scf->header.version = SCF_VERSION;
scf->header.arch = SCF_ARCH_UNKNOWN;
scf->header.flags = 0;
scf->header.entry_point = 0;
scf->header.data_size = 0;
scf->header.code_size = 0;
scf->header.strtab_size = 0;
scf->header.sym_count = 0;
scf->header.reloc_count = 0;
scc_vec_init(scf->text);
scc_vec_init(scf->data);
scc_vec_init(scf->symtab);
scc_vec_init(scf->reloc);
scc_vec_init(scf->strtab);
scc_strpool_init(&scf->strpool);
scf->str2idx.hash_func = (void *)scc_strhash32;
scf->str2idx.key_cmp = (void *)scc_strcmp;
scc_hashtable_init(&scf->str2idx);
scc_vec_init(scf->syms);
scc_vec_init(scf->relocs);
}
/**
* @brief 从缓冲区读取并解析 SCF 数据
* @param scf 指向 scf_t 结构的指针
* @param buffer 输入缓冲区
* @param size 缓冲区大小
* @return 成功返回 true失败返回 false
*/
cbool scf_parse(scf_t *scf, const char *buffer, usize size) {
if (!scf || !buffer || size < sizeof(scf_header_t)) {
return false;
}
// 读取头部
const scf_header_t *header = (const scf_header_t *)buffer;
if (scc_memcmp(header->magic, SCF_MAGIC, 4) != 0) {
return false;
}
scf->header = *header;
// 计算各段偏移
usize offset = sizeof(scf_header_t);
// 读取 text 段
if (scf->header.code_size > 0) {
if (offset + scf->header.code_size > size) {
return false;
}
// 调整 text 向量大小
while (scf->text.size < scf->header.code_size) {
scc_vec_push(scf->text, 0);
}
scc_memcpy(scf->text.data, buffer + offset, scf->header.code_size);
offset += scf->header.code_size;
}
// 读取 data 段
if (scf->header.data_size > 0) {
if (offset + scf->header.data_size > size) {
return false;
}
// 调整 data 向量大小
while (scf->data.size < scf->header.data_size) {
scc_vec_push(scf->data, 0);
}
scc_memcpy(scf->data.data, buffer + offset, scf->header.data_size);
offset += scf->header.data_size;
}
// 读取符号表
if (scf->header.sym_count > 0) {
usize symtab_size = scf->header.sym_count * sizeof(scf_sym_t);
if (offset + symtab_size > size) {
return false;
}
// 调整 syms 向量大小
while (scf->syms.size < scf->header.sym_count) {
scf_sym_t sym = {0};
scc_vec_push(scf->syms, sym);
}
scc_memcpy(scf->syms.data, buffer + offset, symtab_size);
offset += symtab_size;
}
// 读取重定位表
if (scf->header.reloc_count > 0) {
usize reloc_size = scf->header.reloc_count * sizeof(scf_reloc_t);
if (offset + reloc_size > size) {
return false;
}
// 调整 relocs 向量大小
while (scf->relocs.size < scf->header.reloc_count) {
scf_reloc_t reloc = {0};
scc_vec_push(scf->relocs, reloc);
}
scc_memcpy(scf->relocs.data, buffer + offset, reloc_size);
offset += reloc_size;
}
// 读取字符串表
if (scf->header.strtab_size > 0) {
if (offset + scf->header.strtab_size > size) {
return false;
}
// 调整 strtab 向量大小
while (scf->strtab.size < scf->header.strtab_size) {
scc_vec_push(scf->strtab, 0);
}
scc_memcpy(scf->strtab.data, buffer + offset, scf->header.strtab_size);
offset += scf->header.strtab_size;
}
// 允许 offset <= size因为缓冲区可能比实际数据大
if (offset > size) {
return false;
}
return true;
}
/**
* @brief 将 SCF 数据写入缓冲区
* @param scf 指向 scf_t 结构的指针
* @param buffer 输出缓冲区
* @param size 缓冲区大小
* @return 成功返回 true失败返回 false
*/
cbool scf_write(scf_t *scf, char *buffer, usize size) {
if (!scf || !buffer) {
return false;
}
// 计算所需大小
usize needed = sizeof(scf_header_t);
needed += scf->header.code_size; // text 段
needed += scf->header.data_size; // data 段
needed += scf->header.sym_count * sizeof(scf_sym_t);
needed += scf->header.reloc_count * sizeof(scf_reloc_t);
needed += scf->header.strtab_size;
if (size < needed) {
return false;
}
// 写入头部
scf_header_t *header = (scf_header_t *)buffer;
*header = scf->header;
usize offset = sizeof(scf_header_t);
// 写入 text 段
if (scf->header.code_size > 0) {
scc_memcpy(buffer + offset, scf->text.data, scf->header.code_size);
offset += scf->header.code_size;
}
// 写入 data 段
if (scf->header.data_size > 0) {
scc_memcpy(buffer + offset, scf->data.data, scf->header.data_size);
offset += scf->header.data_size;
}
// 写入符号表
if (scf->header.sym_count > 0) {
usize symtab_size = scf->header.sym_count * sizeof(scf_sym_t);
scc_memcpy(buffer + offset, scf->syms.data, symtab_size);
offset += symtab_size;
}
// 写入重定位表
if (scf->header.reloc_count > 0) {
usize reloc_size = scf->header.reloc_count * sizeof(scf_reloc_t);
scc_memcpy(buffer + offset, scf->relocs.data, reloc_size);
offset += reloc_size;
}
// 写入字符串表
if (scf->header.strtab_size > 0) {
scc_memcpy(buffer + offset, scf->strtab.data, scf->header.strtab_size);
offset += scf->header.strtab_size;
}
Assert(offset <= size);
return true;
}
/**
* @brief 交换段数据
* @param scf 指向 scf_t 结构的指针
* @param type 段类型
* @param section 指向新段数据的指针(双向交换)
* @return 成功返回 true失败返回 false
*/
cbool scf_exchange_section(scf_t *scf, scf_sect_type_t type,
scf_sect_data_t **section) {
if (!scf || !section || !*section) {
return false;
}
scf_sect_data_t *target_section = NULL;
scf_size_t *size_field = NULL;
// 根据类型选择目标段和大小字段
switch (type) {
case SCF_SECT_CODE:
target_section = &scf->text;
size_field = &scf->header.code_size;
break;
case SCF_SECT_DATA:
target_section = &scf->data;
size_field = &scf->header.data_size;
break;
case SCF_SECT_RODATA:
// 当前实现中没有单独的rodata段使用data段
target_section = &scf->data;
size_field = &scf->header.data_size;
break;
case SCF_SECT_BSS:
// BSS段不存储数据只记录大小
// 这里暂时不支持交换BSS段
return false;
default:
return false;
}
// 交换段数据
scf_sect_data_t temp = *target_section;
*target_section = **section;
**section = temp;
// 更新大小字段
if (size_field) {
// 保存旧大小(如果需要的话)
// scf_size_t temp_size = *size_field; // 未使用,注释掉
*size_field = (scf_size_t)(*target_section).size;
// 如果调用者需要知道新的大小,可以在这里设置
// 但当前API没有提供这个功能
}
return true;
}
/**
* @brief 添加符号到符号表
* @param scf 指向 scf_t 结构的指针
* @param sym 指向要添加的符号的指针
* @return 成功返回 true失败返回 false
*/
cbool scf_add_sym(scf_t *scf, scf_sym_t *sym) {
if (!scf || !sym) {
return false;
}
// 添加到符号向量
scc_vec_push(scf->syms, *sym);
scf->header.sym_count++;
return true;
}
/**
* @brief 添加重定位条目
* @param scf 指向 scf_t 结构的指针
* @param reloc 指向要添加的重定位条目的指针
* @return 成功返回 true失败返回 false
*/
cbool scf_add_reloc(scf_t *scf, scf_reloc_t *reloc) {
if (!scf || !reloc) {
return false;
}
// 添加到重定位向量
scc_vec_push(scf->relocs, *reloc);
scf->header.reloc_count++;
return true;
}
/**
* @brief 检查 SCF 结构是否有效
* @param scf 指向 scf_t 结构的指针
* @return 有效返回 true无效返回 false
*/
cbool scf_check_valid(scf_t *scf) {
if (!scf) {
return false;
}
// 检查魔数
if (scc_memcmp(scf->header.magic, SCF_MAGIC, 4) != 0) {
return false;
}
// 检查版本
if (scf->header.version != SCF_VERSION) {
return false;
}
// 检查架构
if (scf->header.arch > SCF_ARCH_X64) {
return false;
}
// 检查各段大小是否一致
if (scf->header.code_size != scf->text.size) {
return false;
}
if (scf->header.data_size != scf->data.size) {
return false;
}
if (scf->header.sym_count != scf->syms.size) {
return false;
}
if (scf->header.reloc_count != scf->relocs.size) {
return false;
}
if (scf->header.strtab_size != scf->strtab.size) {
return false;
}
return true;
}
/**
* @brief 应用重定位到段数据
* @param scf 指向 scf_t 结构的指针
* @return 成功返回 true失败返回 false
*/
cbool scf_apply_relocations(scf_t *scf) {
if (!scf) {
return false;
}
// 遍历所有重定位条目
for (usize i = 0; i < scf->relocs.size; i++) {
scf_reloc_t *reloc = &scf->relocs.data[i];
// 根据段类型选择目标段
scf_sect_data_t *target_section = NULL;
switch (reloc->sect_type) {
case SCF_SECT_CODE:
target_section = &scf->text;
break;
case SCF_SECT_DATA:
case SCF_SECT_RODATA:
target_section = &scf->data;
break;
default:
// 不支持的段类型
continue;
}
// 检查偏移量是否有效
if (reloc->offset + sizeof(scf_size_t) > target_section->size) {
// 偏移量超出段范围
return false;
}
// 获取符号地址
scf_size_t symbol_address = 0;
if (reloc->sym_idx < scf->syms.size) {
scf_sym_t *sym = &scf->syms.data[reloc->sym_idx];
symbol_address = sym->scf_sect_offset;
}
// 计算重定位值
scf_size_t relocation_value = symbol_address + reloc->addend;
// 根据重定位类型应用
scf_size_t *target =
(scf_size_t *)(target_section->data + reloc->offset);
switch (reloc->type) {
case SCF_RELOC_ABS:
// 绝对地址:直接替换
*target = relocation_value;
break;
case SCF_RELOC_REL:
// 相对地址:计算相对于当前位置的偏移
// 使用 uintptr_t 进行安全的指针到整数转换
*target = relocation_value - (scf_size_t)(uintptr_t)target;
break;
case SCF_RELOC_PC:
// PC相对计算相对于PC的偏移
// 使用 uintptr_t 进行安全的指针到整数转换
*target = relocation_value - (scf_size_t)(uintptr_t)(target + 1);
break;
default:
// 不支持的重定位类型
return false;
}
}
return true;
}
/**
* @brief 在写入前进行内部整理
* @param scf 指向 scf_t 结构的指针
* @return 成功返回 true失败返回 false
*/
cbool scf_write_done(scf_t *scf) {
if (!scf) {
return false;
}
// 应用所有重定位
if (!scf_apply_relocations(scf)) {
return false;
}
// 更新头部中的大小字段
scf->header.code_size = (scf_size_t)scf->text.size;
scf->header.data_size = (scf_size_t)scf->data.size;
scf->header.sym_count = (scf_size_t)scf->syms.size;
scf->header.reloc_count = (scf_size_t)scf->relocs.size;
scf->header.strtab_size = (scf_size_t)scf->strtab.size;
// 设置标志位为内部链接后的可执行文件
scf->header.flags |= SCF_FLAG_EXE_RELOC;
return true;
}
/**
* @brief 链接多个 SCF 文件
* @param scfs 包含多个 SCF 文件的向量
* @param outscf 输出链接后的 SCF 文件
* @return 成功返回 true失败返回 false
*/
cbool scf_link_all(scf_vec_t scfs, scf_t *outscf) {
if (!outscf || scfs.size == 0) {
return false;
}
// 初始化输出 SCF
scf_init(outscf);
// 简单实现:只链接第一个文件
// 实际实现应该合并所有文件的段、解析符号引用、应用重定位等
if (scfs.size > 0) {
scf_t *first = &scfs.data[0];
// 这里应该进行深拷贝,但为了简单起见,我们只复制头部
outscf->header = first->header;
}
return true;
}

View File

@@ -0,0 +1,109 @@
/**
* @file test_scf.c
* @brief SCF format tests
*/
#include <scf_impl.h>
#include <stdio.h>
#include <string.h>
int main() {
printf("Testing SCF format implementation...\n");
// Test 1: Initialization
scf_t scf;
scf_init(&scf);
if (memcmp(scf.header.magic, SCF_MAGIC, 4) != 0) {
printf("FAIL: Magic number incorrect\n");
return 1;
}
if (scf.header.version != SCF_VERSION) {
printf("FAIL: Version incorrect\n");
return 1;
}
if (scf.header.arch != SCF_ARCH_UNKNOWN) {
printf("FAIL: Architecture incorrect\n");
return 1;
}
printf("Test 1 PASSED: Initialization successful\n");
// Test 2: Adding symbols
scf_sym_t sym = {0};
sym.name_offset = 0;
sym.scf_sym_type = SCF_SYM_TYPE_FUNC;
sym.scf_sym_bind = SCF_SYM_BIND_GLOBAL;
sym.scf_sym_vis = SCF_SYM_VIS_DEFAULT;
sym.scf_sect_type = SCF_SECT_CODE;
sym.scf_sect_offset = 0;
sym.scf_sym_size = 16;
if (!scf_add_sym(&scf, &sym)) {
printf("FAIL: Cannot add symbol\n");
return 1;
}
if (scf.header.sym_count != 1) {
printf("FAIL: Symbol count incorrect\n");
return 1;
}
printf("Test 2 PASSED: Symbol addition successful\n");
// Test 3: Adding relocations
scf_reloc_t reloc = {0};
reloc.offset = 0; // 偏移量
reloc.sym_idx = 0;
reloc.type = SCF_RELOC_ABS;
reloc.sect_type = SCF_SECT_CODE; // 代码段
reloc.addend = 0;
if (!scf_add_reloc(&scf, &reloc)) {
printf("FAIL: Cannot add relocation\n");
return 1;
}
if (scf.header.reloc_count != 1) {
printf("FAIL: Relocation count incorrect\n");
return 1;
}
printf("Test 3 PASSED: Relocation addition successful\n");
// Test 4: Checking validity
if (!scf_check_valid(&scf)) {
printf("FAIL: SCF structure invalid\n");
return 1;
}
printf("Test 4 PASSED: SCF structure valid\n");
// Test 5: Writing and reading
char buffer[1024];
if (!scf_write(&scf, buffer, sizeof(buffer))) {
printf("FAIL: Cannot write to buffer\n");
return 1;
}
scf_t scf2;
scf_init(&scf2);
if (!scf_parse(&scf2, buffer, sizeof(buffer))) {
printf("FAIL: Cannot read from buffer\n");
return 1;
}
// Compare the two structures
if (memcmp(&scf.header, &scf2.header, sizeof(scf_header_t)) != 0) {
printf("FAIL: Header mismatch\n");
return 1;
}
printf("Test 5 PASSED: Write/read successful\n");
printf("All tests passed!\n");
return 0;
}

View File

@@ -0,0 +1,184 @@
/**
* @file test_scf_x64.c
* @brief SCF x64 architecture tests
*
* This test creates a simple x64 executable similar to the Rust example
* provided by the user.
*/
#include <scf_impl.h>
#include <stdio.h>
#include <string.h>
int main() {
printf("Testing SCF x64 format implementation...\n");
// Test 1: Initialize SCF for x64 architecture
scf_t scf;
scf_init(&scf);
// Set architecture to x64
scf.header.arch = SCF_ARCH_X64;
scf.header.flags = SCF_FLAG_EXE_RELOC;
printf("Test 1 PASSED: x64 initialization successful\n");
// Test 2: Add .text section with x64 machine code
// x64 machine code for:
// sub rsp, 0x28
// lea rcx, [rip + data_offset] ; will be relocated
// call [rip + printf_iat] ; will be relocated
// add rsp, 0x28
// xor eax, eax
// ret
unsigned char x64_code[] = {
0x48, 0x83, 0xEC, 0x28, // sub rsp, 0x28
0x48, 0x8D, 0x0D, 0x00, 0x00, 0x00,
0x00, // lea rcx, [rip + 0] (to be relocated)
0xFF, 0x15, 0x00, 0x00, 0x00, 0x00, // call [rip + 0] (to be relocated)
0x48, 0x83, 0xC4, 0x28, // add rsp, 0x28
0x33, 0xC0, // xor eax, eax
0xC3 // ret
};
// Add code to .text section
for (usize i = 0; i < sizeof(x64_code); i++) {
scc_vec_push(scf.text, x64_code[i]);
}
scf.header.code_size = (scf_size_t)scf.text.size;
printf("Test 2 PASSED: x64 code added to .text section\n");
// Test 3: Add .data section with string
const char hello_world[] = "Hello, World from SCF x64 Test!\n\0";
for (usize i = 0; i < sizeof(hello_world); i++) {
scc_vec_push(scf.data, hello_world[i]);
}
scf.header.data_size = (scf_size_t)scf.data.size;
printf("Test 3 PASSED: Data string added to .data section\n");
// Test 4: Add symbols
scf_sym_t data_sym = {0};
data_sym.name_offset = 0; // Would need string table for actual names
data_sym.scf_sym_type = SCF_SYM_TYPE_DATA;
data_sym.scf_sym_bind = SCF_SYM_BIND_GLOBAL;
data_sym.scf_sym_vis = SCF_SYM_VIS_DEFAULT;
data_sym.scf_sect_type = SCF_SECT_DATA;
data_sym.scf_sect_offset = 0; // Start of data section
data_sym.scf_sym_size = sizeof(hello_world);
if (!scf_add_sym(&scf, &data_sym)) {
printf("FAIL: Cannot add data symbol\n");
return 1;
}
scf_sym_t code_sym = {0};
code_sym.name_offset = 0;
code_sym.scf_sym_type = SCF_SYM_TYPE_FUNC;
code_sym.scf_sym_bind = SCF_SYM_BIND_GLOBAL;
code_sym.scf_sym_vis = SCF_SYM_VIS_DEFAULT;
code_sym.scf_sect_type = SCF_SECT_CODE;
code_sym.scf_sect_offset = 0; // Start of code section
code_sym.scf_sym_size = sizeof(x64_code);
if (!scf_add_sym(&scf, &code_sym)) {
printf("FAIL: Cannot add code symbol\n");
return 1;
}
printf("Test 4 PASSED: Symbols added\n");
// Test 5: Add relocations
// First relocation: data reference at offset 7 in code (lea rcx, [rip +
// data_offset])
scf_reloc_t data_reloc = {0};
data_reloc.offset = 7; // Offset in code section
data_reloc.sym_idx = 0; // Index of data symbol (first symbol added)
data_reloc.type = SCF_RELOC_PC; // PC-relative relocation
data_reloc.sect_type = SCF_SECT_CODE;
data_reloc.addend = -4; // RIP-relative addressing adjustment
if (!scf_add_reloc(&scf, &data_reloc)) {
printf("FAIL: Cannot add data relocation\n");
return 1;
}
// Second relocation: external function reference at offset 13 in code (call
// [rip + printf_iat])
scf_reloc_t func_reloc = {0};
func_reloc.offset = 13; // Offset in code section
func_reloc.sym_idx =
1; // Index of code symbol (would be external in real case)
func_reloc.type = SCF_RELOC_PC; // PC-relative relocation
func_reloc.sect_type = SCF_SECT_CODE;
func_reloc.addend = -4; // RIP-relative addressing adjustment
if (!scf_add_reloc(&scf, &func_reloc)) {
printf("FAIL: Cannot add function relocation\n");
return 1;
}
printf("Test 5 PASSED: Relocations added\n");
// Test 6: Apply relocations
if (!scf_apply_relocations(&scf)) {
printf("FAIL: Cannot apply relocations\n");
return 1;
}
printf("Test 6 PASSED: Relocations applied\n");
// Test 7: Prepare for writing (internal整理)
if (!scf_write_done(&scf)) {
printf("FAIL: Cannot prepare for writing\n");
return 1;
}
printf("Test 7 PASSED: Prepared for writing\n");
// Test 8: Write to buffer
char buffer[4096];
if (!scf_write(&scf, buffer, sizeof(buffer))) {
printf("FAIL: Cannot write to buffer\n");
return 1;
}
printf("Test 8 PASSED: Written to buffer\n");
// Test 9: Parse from buffer
scf_t scf2;
scf_init(&scf2);
if (!scf_parse(&scf2, buffer, sizeof(buffer))) {
printf("FAIL: Cannot parse from buffer\n");
return 1;
}
// Verify architecture
if (scf2.header.arch != SCF_ARCH_X64) {
printf("FAIL: Architecture not preserved\n");
return 1;
}
printf("Test 9 PASSED: Parsed from buffer, architecture preserved\n");
// Test 10: Verify structure
if (!scf_check_valid(&scf2)) {
printf("FAIL: Parsed structure invalid\n");
return 1;
}
printf("Test 10 PASSED: Parsed structure valid\n");
printf("\nAll x64 tests passed!\n");
printf("Created SCF file with:\n");
printf(" Architecture: x64\n");
printf(" Code size: %u bytes\n", scf.header.code_size);
printf(" Data size: %u bytes\n", scf.header.data_size);
printf(" Symbols: %u\n", scf.header.sym_count);
printf(" Relocations: %u\n", scf.header.reloc_count);
printf(" Flags: 0x%x\n", scf.header.flags);
return 0;
}

View File

@@ -1,5 +1,5 @@
[package]
name = "smcc_lex_parser"
name = "scc_lex_parser"
version = "0.1.0"
dependencies = [{ name = "libcore", path = "../../runtime/libcore" }]

View File

@@ -1,8 +1,8 @@
[package]
name = "smcc_lex"
name = "scc_lex"
version = "0.1.0"
dependencies = [
{ name = "libcore", path = "../../runtime/libcore" },
{ name = "smcc_lex_parser", path = "../lex_parser" },
{ name = "lex_parser", path = "../lex_parser" },
]

View File

@@ -14,35 +14,35 @@
* @enum hp_entry_state_t
* @brief 哈希表条目状态标识
*/
typedef enum hashmap_entry_state {
typedef enum scc_hashmap_entry_state {
ENTRY_EMPTY, /**< 空槽位(从未使用过) */
ENTRY_ACTIVE, /**< 有效条目(包含键值对) */
ENTRY_TOMBSTONE /**< 墓碑标记(已删除条目) */
} hp_entry_state_t;
} scc_hashtable_entry_state_t;
/**
* @struct hashmap_entry_t
* @struct scc_hashtable_entry_t
* @brief 哈希表条目结构
*
* @note key/value内存由调用者管理哈希表不负责其生命周期
*/
typedef struct hashmap_entry {
const void *key; /**< 键指针(不可变) */
void *value; /**< 值指针 */
u32 hash; /**< 预计算的哈希值(避免重复计算) */
hp_entry_state_t state; /**< 当前条目状态 */
} hashmap_entry_t;
typedef struct scc_hashtable_entry {
const void *key; /**< 键指针(不可变) */
void *value; /**< 值指针 */
u32 hash; /**< 预计算的哈希值(避免重复计算) */
scc_hashtable_entry_state_t state; /**< 当前条目状态 */
} scc_hashtable_entry_t;
/**
* @struct hashmap_t
* @struct scc_hashtable_t
* @brief 哈希表主体结构
*
* 使用开放寻址法实现,采用墓碑标记处理删除操作
*/
typedef struct smcc_hashmap {
SCC_VEC(hashmap_entry_t) entries; /**< 条目存储容器 */
u32 count; /**< 有效条目数量(不含墓碑) */
u32 tombstone_count; /**< 墓碑条目数量 */
typedef struct scc_hashtable {
SCC_VEC(scc_hashtable_entry_t) entries; /**< 条目存储容器 */
u32 count; /**< 有效条目数量(不含墓碑) */
u32 tombstone_count; /**< 墓碑条目数量 */
/**
* @brief 哈希函数指针
* @param key 键指针
@@ -56,7 +56,7 @@ typedef struct smcc_hashmap {
* @return 相同返回0不同返回非0
*/
int (*key_cmp)(const void *key1, const void *key2);
} hashmap_t;
} scc_hashtable_t;
/**
* @brief 初始化哈希表结构
@@ -64,7 +64,7 @@ typedef struct smcc_hashmap {
*
* @warning 必须设置hash_func和key_cmp后才能使用
*/
void hashmap_init(hashmap_t *ht);
void scc_hashtable_init(scc_hashtable_t *ht);
/**
* @brief 插入/更新键值对
@@ -73,7 +73,7 @@ void hashmap_init(hashmap_t *ht);
* @param value 值指针
* @return 被替换的旧值指针无替换返回NULL
*/
void *hashmap_set(hashmap_t *ht, const void *key, void *value);
void *scc_hashtable_set(scc_hashtable_t *ht, const void *key, void *value);
/**
* @brief 查找键对应值
@@ -81,7 +81,7 @@ void *hashmap_set(hashmap_t *ht, const void *key, void *value);
* @param key 查找键指针
* @return 找到返回值指针未找到返回NULL
*/
void *hashmap_get(hashmap_t *ht, const void *key);
void *scc_hashtable_get(scc_hashtable_t *ht, const void *key);
/**
* @brief 删除键值对
@@ -91,7 +91,7 @@ void *hashmap_get(hashmap_t *ht, const void *key);
*
* @note 实际采用墓碑标记方式删除
*/
void *hashmap_del(hashmap_t *ht, const void *key);
void *scc_hashtable_del(scc_hashtable_t *ht, const void *key);
/**
* @brief 销毁哈希表
@@ -99,17 +99,18 @@ void *hashmap_del(hashmap_t *ht, const void *key);
*
* @note 仅释放哈希表内部内存不会释放key/value内存
*/
void hashmap_drop(hashmap_t *ht);
void scc_hashtable_drop(scc_hashtable_t *ht);
/**
* @typedef hashmap_iter_fn
* @typedef scc_hashtable_iter_fn
* @brief 哈希表迭代回调函数类型
* @param key 当前键指针
* @param value 当前值指针
* @param context 用户上下文指针
* @return 返回非0停止迭代
*/
typedef int (*hashmap_iter_fn)(const void *key, void *value, void *context);
typedef int (*scc_hashtable_iter_fn)(const void *key, void *value,
void *context);
/**
* @brief 遍历哈希表所有有效条目
@@ -117,6 +118,7 @@ typedef int (*hashmap_iter_fn)(const void *key, void *value, void *context);
* @param iter_func 迭代回调函数
* @param context 用户上下文指针
*/
void hashmap_foreach(hashmap_t *ht, hashmap_iter_fn iter_func, void *context);
void scc_hashmap_foreach(scc_hashtable_t *ht, scc_hashtable_iter_fn iter_func,
void *context);
#endif /* __SCC_HASHMAP_H__ */

View File

@@ -18,14 +18,14 @@
* 组合哈希表和专用内存分配器实现的高效字符串存储池
*/
typedef struct strpool {
hashmap_t ht; /**< 哈希表用于快速查找已存储字符串 */
} strpool_t;
scc_hashtable_t ht; /**< 哈希表用于快速查找已存储字符串 */
} scc_strpool_t;
/**
* @brief 初始化字符串池
* @param pool 字符串池实例指针
*/
void init_strpool(strpool_t *pool);
void scc_strpool_init(scc_strpool_t *pool);
/**
* @brief 驻留字符串到池中
@@ -36,7 +36,7 @@ void init_strpool(strpool_t *pool);
* @note 返回值生命周期与字符串池一致
* @note 重复插入相同字符串会返回已有指针
*/
const char *strpool_intern(strpool_t *pool, const char *str);
const char *scc_strpool_intern(scc_strpool_t *pool, const char *str);
/**
* @brief 销毁字符串池
@@ -45,6 +45,25 @@ const char *strpool_intern(strpool_t *pool, const char *str);
* @warning 销毁后已获取的字符串指针将失效
* @note 会自动释放所有驻留字符串内存
*/
void strpool_destroy(strpool_t *pool);
void scc_strpool_drop(scc_strpool_t *pool);
/**
* @typedef scc_hashtable_iter_fn
* @brief 哈希表迭代回调函数类型
* @param key 当前键指针
* @param value 当前值指针
* @param context 用户上下文指针
* @return 返回非0停止迭代
*/
typedef int (*scc_strpool_iter_fn)(const char *key, char *value, void *context);
/**
* @brief 遍历字符串表所有有效条目
* @param ht 字符串表实例指针
* @param iter_func 迭代回调函数
* @param context 用户上下文指针
*/
void scc_strpool_foreach(scc_strpool_t *pool, scc_strpool_iter_fn iter_func,
void *context);
#endif /* __SCC_STRPOOL_H__ */

View File

@@ -4,7 +4,7 @@
#define SCC_INIT_HASHMAP_SIZE (32)
#endif
void hashmap_init(hashmap_t *ht) {
void scc_hashtable_init(scc_hashtable_t *ht) {
scc_vec_init(ht->entries);
ht->count = 0;
ht->tombstone_count = 0;
@@ -21,17 +21,18 @@ static int next_power_of_two(int n) {
return n + 1;
}
static hashmap_entry_t *find_entry(hashmap_t *ht, const void *key, u32 hash) {
static scc_hashtable_entry_t *find_entry(scc_hashtable_t *ht, const void *key,
u32 hash) {
if (ht->entries.cap == 0)
return NULL;
u32 index = hash & (ht->entries.cap - 1); // 容量是2的幂
u32 probe = 0;
hashmap_entry_t *tombstone = NULL;
scc_hashtable_entry_t *tombstone = NULL;
while (1) {
hashmap_entry_t *entry = &scc_vec_at(ht->entries, index);
scc_hashtable_entry_t *entry = &scc_vec_at(ht->entries, index);
if (entry->state == ENTRY_EMPTY) {
return tombstone ? tombstone : entry;
}
@@ -53,25 +54,27 @@ static hashmap_entry_t *find_entry(hashmap_t *ht, const void *key, u32 hash) {
return NULL;
}
static void adjust_capacity(hashmap_t *ht, int new_cap) {
static void adjust_capacity(scc_hashtable_t *ht, usize new_cap) {
new_cap = next_power_of_two(new_cap);
Assert(new_cap >= ht->entries.cap);
SCC_VEC(hashmap_entry_t) old_entries;
SCC_VEC(scc_hashtable_entry_t) old_entries;
old_entries.data = ht->entries.data;
old_entries.cap = ht->entries.cap;
// Not used size but for gdb python extention debug
ht->entries.size = new_cap;
ht->entries.cap = new_cap;
ht->entries.data = scc_realloc(NULL, new_cap * sizeof(hashmap_entry_t));
scc_memset(ht->entries.data, 0, new_cap * sizeof(hashmap_entry_t));
ht->entries.data =
scc_realloc(NULL, new_cap * sizeof(scc_hashtable_entry_t));
scc_memset(ht->entries.data, 0, new_cap * sizeof(scc_hashtable_entry_t));
// rehash the all of the old data
for (usize i = 0; i < old_entries.cap; i++) {
hashmap_entry_t *entry = &scc_vec_at(old_entries, i);
scc_hashtable_entry_t *entry = &scc_vec_at(old_entries, i);
if (entry->state == ENTRY_ACTIVE) {
hashmap_entry_t *dest = find_entry(ht, entry->key, entry->hash);
scc_hashtable_entry_t *dest =
find_entry(ht, entry->key, entry->hash);
*dest = *entry;
}
}
@@ -80,7 +83,7 @@ static void adjust_capacity(hashmap_t *ht, int new_cap) {
ht->tombstone_count = 0;
}
void *hashmap_set(hashmap_t *ht, const void *key, void *value) {
void *scc_hashtable_set(scc_hashtable_t *ht, const void *key, void *value) {
if (ht->count + ht->tombstone_count >= ht->entries.cap * 0.75) {
int new_cap = ht->entries.cap < SCC_INIT_HASHMAP_SIZE
? SCC_INIT_HASHMAP_SIZE
@@ -89,7 +92,7 @@ void *hashmap_set(hashmap_t *ht, const void *key, void *value) {
}
u32 hash = ht->hash_func(key);
hashmap_entry_t *entry = find_entry(ht, key, hash);
scc_hashtable_entry_t *entry = find_entry(ht, key, hash);
void *old_value = NULL;
if (entry->state == ENTRY_ACTIVE) {
@@ -107,21 +110,21 @@ void *hashmap_set(hashmap_t *ht, const void *key, void *value) {
return old_value;
}
void *hashmap_get(hashmap_t *ht, const void *key) {
void *scc_hashtable_get(scc_hashtable_t *ht, const void *key) {
if (ht->entries.cap == 0)
return NULL;
u32 hash = ht->hash_func(key);
hashmap_entry_t *entry = find_entry(ht, key, hash);
scc_hashtable_entry_t *entry = find_entry(ht, key, hash);
return (entry && entry->state == ENTRY_ACTIVE) ? entry->value : NULL;
}
void *hashmap_del(hashmap_t *ht, const void *key) {
void *scc_hashtable_del(scc_hashtable_t *ht, const void *key) {
if (ht->entries.cap == 0)
return NULL;
u32 hash = ht->hash_func(key);
hashmap_entry_t *entry = find_entry(ht, key, hash);
scc_hashtable_entry_t *entry = find_entry(ht, key, hash);
if (entry == NULL || entry->state != ENTRY_ACTIVE)
return NULL;
@@ -133,15 +136,16 @@ void *hashmap_del(hashmap_t *ht, const void *key) {
return value;
}
void hashmap_drop(hashmap_t *ht) {
void scc_hashtable_drop(scc_hashtable_t *ht) {
scc_vec_free(ht->entries);
ht->count = 0;
ht->tombstone_count = 0;
}
void hashmap_foreach(hashmap_t *ht, hashmap_iter_fn iter_func, void *context) {
void scc_hashmap_foreach(scc_hashtable_t *ht, scc_hashtable_iter_fn iter_func,
void *context) {
for (usize i = 0; i < ht->entries.cap; i++) {
hashmap_entry_t *entry = &scc_vec_at(ht->entries, i);
scc_hashtable_entry_t *entry = &scc_vec_at(ht->entries, i);
if (entry->state == ENTRY_ACTIVE) {
if (!iter_func(entry->key, entry->value, context)) {
break; // enable callback function terminal the iter

View File

@@ -1,13 +1,13 @@
#include "strpool.h"
void init_strpool(strpool_t *pool) {
void scc_strpool_init(scc_strpool_t *pool) {
pool->ht.hash_func = (u32 (*)(const void *))scc_strhash32;
pool->ht.key_cmp = (int (*)(const void *, const void *))scc_strcmp;
hashmap_init(&pool->ht);
scc_hashtable_init(&pool->ht);
}
const char *strpool_intern(strpool_t *pool, const char *str) {
void *existing = hashmap_get(&pool->ht, str);
const char *scc_strpool_intern(scc_strpool_t *pool, const char *str) {
void *existing = scc_hashtable_get(&pool->ht, str);
if (existing) {
return existing;
}
@@ -20,8 +20,13 @@ const char *strpool_intern(strpool_t *pool, const char *str) {
}
scc_memcpy(new_str, str, len);
hashmap_set(&pool->ht, new_str, new_str);
scc_hashtable_set(&pool->ht, new_str, new_str);
return new_str;
}
void strpool_destroy(strpool_t *pool) { hashmap_drop(&pool->ht); }
void scc_strpool_drop(scc_strpool_t *pool) { scc_hashtable_drop(&pool->ht); }
void scc_strpool_foreach(scc_strpool_t *pool, scc_strpool_iter_fn iter_func,
void *context) {
scc_hashmap_foreach(&pool->ht, (scc_hashtable_iter_fn)iter_func, context);
}