feat(build): 引入新的 Python 构建系统并移除旧 Makefile

新增基于 Python 的构建脚本 `cbuild.py`,支持包管理、依赖解析和模块化编译。
同时添加 `.gitignore` 忽略 `build` 目录,并在 `justfile` 中更新构建命令。
移除了原有的 `lib/Makefile` 和主目录下的相关 make 规则,统一使用新构建系统。
This commit is contained in:
zzy
2025-11-20 10:44:59 +08:00
parent 8d97fe896c
commit e22811f2f5
140 changed files with 1996 additions and 10098 deletions

3
.gitignore vendored
View File

@@ -26,3 +26,6 @@ note.md
# python
.venv
# cbuilder
build

View File

@@ -3,12 +3,3 @@ build-docs:
docs: build-docs
python -m http.server -d docs/html
smcc-build:
make -C src
smcc-clean:
make -C src clean
smcc-test: smcc-build
make -C tests/simple

15
README.md Normal file
View File

@@ -0,0 +1,15 @@
# Simple Models C Compiler
> Smaller Compiler(SMCC)
This is a simple C compiler that generates executable code from a simple c99 sub programming language. The language supports basic operations such as arithmetic, logical, conditional statements and if else while for switch case statements and function calls and system calls.
## Features
- 隔离标准库
- 轻量化
- 模块化
- 自举构建

512
cbuild.py Normal file
View File

@@ -0,0 +1,512 @@
# build.py
from abc import ABC, abstractmethod
import tomllib
import pprint
import subprocess
from pathlib import Path
from dataclasses import dataclass, field
from enum import Enum, auto
import logging
import argparse
import sys
from typing import Self
@dataclass
class Dependency:
"""依赖配置"""
name: str
path: str
version: str = "0.0.0"
optional: bool = False
@dataclass
class Feature:
"""特性配置"""
name: str
description: str = ""
dependencies: list[str] = field(default_factory=list)
class PackageConfig:
"""包配置类, 用于解析和管理cbuild.toml配置"""
CONFIG_FILE = "cbuild.toml"
def __init__(self, config_path: Path):
package_file = config_path / self.CONFIG_FILE
if not package_file.exists():
raise ValueError(f"Package path {package_file} does not exist")
with open(package_file, "rb") as file:
raw_config = tomllib.load(file)
self.config: dict = raw_config.get("package", {})
self.path: Path = config_path
def __str__(self) -> str:
return pprint.pformat(self.config)
@property
def name(self) -> str:
"""获取包的名称
Returns:
str: 包的名称,如果未定义则返回空字符串
"""
return self.config.get("name", "")
@property
def version(self) -> str:
"""获取包的版本
Returns:
str: 包的版本号,如果未定义则返回空字符串
"""
return self.config.get("version", "")
@property
def default_features(self) -> list[str]:
"""获取默认启用的特性列表
Returns:
list[str]: 默认特性的名称列表
"""
return self.config.get("default_features", [])
@property
def features(self) -> list[Feature]:
"""获取所有可用特性及其依赖
Returns:
list[Feature]: 特性对象列表
"""
features_data = self.config.get("features", {})
features = []
for feature in features_data:
if isinstance(feature, str):
feature = Feature(name=feature)
elif isinstance(feature, dict):
name = feature.get("name", None)
if name is None:
continue
feature = Feature(
name=name,
description=feature.get("description", ""),
dependencies=feature.get("dependencies", [])
)
features.append(feature)
return features
@property
def dependencies(self) -> list[Dependency]:
"""获取包的依赖列表
Returns:
list[Dependency]: 依赖对象列表
"""
deps_data = self.config.get("dependencies", [])
dependencies = []
for dep_dict in deps_data:
if isinstance(dep_dict, dict):
dependency = Dependency(
name=dep_dict.get("name", ""),
path=dep_dict.get("path", ""),
version=dep_dict.get("version", "0.0.0"),
optional=dep_dict.get("optional", False)
)
dependencies.append(dependency)
return dependencies
@property
def authors(self) -> list[str]:
"""获取作者列表
Returns:
list[str]: 作者列表
"""
return self.config.get("authors", [])
@property
def description(self) -> str:
"""获取包的描述
Returns:
str: 包的描述文本
"""
return self.config.get("description", "")
@dataclass(frozen=True)
class BuildPath:
"""path"""
root_path: Path
tests_path: Path
output_path: Path
object_path: Path
src_path: Path
default_bin_path: Path
default_lib_path: Path
@classmethod
def from_root_path(cls, root_path: Path) -> Self:
"""_summary_
Args:
output_path (Path): _description_
Returns:
Self: _description_
"""
src_path = root_path / "src"
output_path = root_path / "build"
return cls(
root_path = root_path,
tests_path = root_path / "tests",
output_path = output_path ,
object_path = output_path / "obj",
src_path = src_path,
default_bin_path = src_path / "main.c",
default_lib_path = src_path / "lib.c",
)
class TargetType(Enum):
"""目标文件类型枚举"""
MAIN_EXECUTABLE = auto()
EXECUTABLE = auto()
TEST_EXECUTABLE = auto()
STATIC_LIBRARY = auto()
SHARED_LIBRARY = auto()
@dataclass
class Target:
"""目标文件信息"""
name: str
type: TargetType
source: Path
object: Path
output: Path
class CBuildContext:
"""构建上下文,管理所有包的信息"""
def __init__(self, package: PackageConfig, build_path: BuildPath | None = None):
self.package = package
self.path = BuildPath.from_root_path(package.path) \
if build_path is None else build_path
@property
def sources_path(self) -> list[Path]:
"""获取所有包的源文件路径"""
return list(self.path.src_path.glob("**/*.c"))
@property
def tests_path(self) -> list[Path]:
"""获取所有测试源文件路径 eg. `tests/test_*.c`"""
test_sources = []
test_path = self.path.root_path / "tests"
if test_path.exists():
for file in test_path.glob("test_*.c"):
test_sources.append(file)
return test_sources
@property
def includes(self) -> list[Path]:
"""获取包的包含路径"""
includes = [self.path.root_path / "include"]
# check folders available
deps = self.get_dependencies()
for dep in deps:
includes.extend(dep.includes)
return [inc for inc in includes if inc.exists()]
def get_targets(self) -> list[Target]:
"""获取所有构建目标"""
targets = []
# TODO
ext = ".exe"
# 添加主可执行文件目标
if self.path.default_bin_path.exists():
targets.append(Target(
name=self.package.name,
type=TargetType.MAIN_EXECUTABLE,
source=self.path.default_bin_path,
object=self.get_object_path(self.path.default_bin_path),
output=self.path.output_path / f"{self.package.name}{ext}",
))
# 添加静态库目标
if self.path.default_lib_path.exists():
targets.append(Target(
name=f"lib{self.package.name}",
type=TargetType.STATIC_LIBRARY,
source=self.path.default_lib_path,
object=self.get_object_path(self.path.default_lib_path),
output=self.path.output_path / f"lib{self.package.name}.a"
))
# 添加测试目标
if self.path.tests_path.exists():
for test_source in self.tests_path:
targets.append(Target(
name=test_source.stem,
type=TargetType.TEST_EXECUTABLE,
source=test_source,
object=self.get_object_path(test_source),
output=self.path.output_path / f"{test_source.stem}{ext}"
))
return targets
def get_build_components(self) -> list[tuple[Path, Path]]:
"""获取所有需要编译的源文件及目标文件(排除主文件即main.c, lib.c, bin/*.c)"""
objs = []
for path in self.sources_path:
if path == self.path.default_bin_path:
continue
if path == self.path.default_lib_path:
continue
objs.append((path, self.get_object_path(path)))
# logging.debug("[+] build_components: %s", objs)
deps = self.get_dependencies()
for dep in deps:
objs.extend(dep.get_build_components())
return objs
def get_object_path(self, source_path: Path) -> Path:
"""将源文件路径映射到对象文件路径
Args:
source_path (Path): 源文件路径(.c文件)
Returns:
Path: 对应的对象文件路径(.o文件)
"""
# 确保输出目录存在
objects_dir = self.path.object_path
objects_dir.mkdir(parents=True, exist_ok=True)
# 生成相对于src目录的路径结构
try:
relative_path = source_path.relative_to(self.path.src_path)
object_path = objects_dir / relative_path.with_suffix('.o')
# 确保对象文件的目录存在
object_path.parent.mkdir(parents=True, exist_ok=True)
return object_path
except ValueError:
# 如果源文件不在src目录下使用文件名作为对象文件名
return objects_dir / source_path.with_suffix('.o').name
def get_dependencies(self) -> list[Self]:
"""_summary_
Returns:
list[CBuildContext]: _description_
"""
deps = []
for dep in self.package.dependencies:
ctx = CBuildContext(PackageConfig(Path(self.package.path / dep.path)))
deps.append(ctx)
deps.extend(ctx.get_dependencies())
return deps
@dataclass
class CacheEntry:
"""缓存条目"""
source_hash: str
object_hash: str
compile_time: float
includes_hash: str
class BuildCache:
"""构建缓存管理器"""
pass
class Compiler(ABC):
"""编译器抽象类"""
@abstractmethod
def compile(self, sources: Path, output: Path, includes: list[Path], flags: list[str]):
"""编译源文件"""
@abstractmethod
def link(self, objects: list[Path], libraries: list[str], flags: list[str], output: Path):
"""链接对象文件"""
def compile_all(self, sources: list[tuple[Path, Path]], includes: list[Path], flags: list[str]):
"""编译所有源文件"""
for source, output in sources:
self.compile(source, output, includes, flags)
def cmd(self, cmd: str | list[str]):
"""执行命令并处理错误输出"""
try:
logging.debug("command: `%s`", ' '.join(cmd) if isinstance(cmd, list) else cmd)
subprocess.run(cmd, check=True)
except subprocess.CalledProcessError as e:
# 输出详细错误信息帮助调试
cmd_str = ' '.join(e.cmd) if isinstance(e.cmd, list) else e.cmd
logging.error("command running error: [%d]`%s`", e.returncode, cmd_str)
raise
class ClangCompiler(Compiler):
"""Clang编译器"""
def compile(self, sources: Path, output: Path, includes: list[Path], flags: list[str]):
"""编译源文件"""
cmd = ["clang"]
cmd.extend(flags)
cmd.extend(["-c", str(sources), "-o", str(output)])
cmd.extend(f"-I{inc}" for inc in includes)
self.cmd(cmd)
def link(self, objects: list[Path], libraries: list[str], flags: list[str], output: Path):
"""链接对象文件"""
cmd = ["clang"]
cmd.extend(flags)
cmd.extend(["-o", str(output)])
cmd.extend(str(obj) for obj in objects)
cmd.extend(lib for lib in libraries)
self.cmd(cmd)
class GccCompiler(Compiler):
"""Gcc编译器"""
def compile(self, sources: Path, output: Path, includes: list[Path], flags: list[str]):
"""编译源文件"""
cmd = ["gcc"]
cmd.extend(flags)
cmd.extend(["-c", str(sources), "-o", str(output)])
cmd.extend(f"-I{inc}" for inc in includes)
self.cmd(cmd)
def link(self, objects: list[Path], libraries: list[str], flags: list[str], output: Path):
"""链接对象文件"""
cmd = ["gcc"]
objs = set(str(i.absolute()) for i in objects)
cmd.extend(flags)
cmd.extend(["-o", str(output)])
cmd.extend(objs)
cmd.extend(lib for lib in libraries)
self.cmd(cmd)
class CPackageBuilder:
"""包构建器"""
# TODO
EXT = ".exe"
def __init__(self, package_path: Path, compiler: Compiler):
self.package = PackageConfig(package_path)
self.context = CBuildContext(self.package, None)
self.compiler: Compiler = compiler
self.global_flags = ["-g"]
def _build_ctx(self) -> list[Path]:
"""构建上下文"""
# 确保输出目录存在
self.context.path.output_path.mkdir(parents=True, exist_ok=True)
self.context.path.object_path.mkdir(parents=True, exist_ok=True)
# TODO use cache and add dependency include and flags
deps = self.context.get_dependencies()
path_map = self.context.get_build_components()
self.compiler.compile_all(path_map, self.context.includes, self.global_flags)
return [pair[1] for pair in path_map]
def build(self):
"""构建包"""
object_files = self._build_ctx()
targets = self.context.get_targets()
for target in targets:
match target.type:
case TargetType.MAIN_EXECUTABLE:
self.compiler.compile(target.source, target.object, self.context.includes,
self.global_flags)
object_files.append(target.object)
self.compiler.link(object_files, [], self.global_flags, target.output)
object_files.remove(target.object)
case TargetType.TEST_EXECUTABLE:
self.compiler.compile(target.source, target.object, self.context.includes,
self.global_flags)
object_files.append(target.object)
self.compiler.link(object_files, [], self.global_flags, target.output)
object_files.remove(target.object)
logging.info("Building is Ok...")
def run(self):
"""运行项目"""
targets = [target for target in self.context.get_targets() \
if target.type == TargetType.MAIN_EXECUTABLE]
if len(targets) != 1:
logging.error("not have target to run")
subprocess.run(targets[0].output, check=False)
def tests(self):
"""运行测试"""
targets = [target for target in self.context.get_targets() \
if target.type == TargetType.TEST_EXECUTABLE]
passed = 0
failed = 0
for target in targets:
name = target.name
print(f"运行测试: {name}")
logging.debug("test run %s", target.output)
try:
result = subprocess.run(target.output,
check=True,
# capture_output=True,
# text=True,
timeout=30)
if result.returncode == 0:
print(f" ✓ 测试 {name} 通过")
passed += 1
else:
print(f" ✗ 测试 {name} 失败")
if result.stdout:
print(f" 输出: {result.stdout}")
if result.stderr:
print(f" 错误: {result.stderr}")
failed += 1
except subprocess.TimeoutExpired:
print(f" ✗ 测试 {name} 超时")
failed += 1
except subprocess.SubprocessError as e:
print(f" ✗ 测试 {name} 运行异常: {e}")
failed += 1
print(f"\n测试结果: {passed} 通过, {failed} 失败")
def main():
"""main"""
parser = argparse.ArgumentParser(description="Simple C Package Manager")
parser.add_argument("command", choices=["build", "run", "test"],
help="Command to execute")
parser.add_argument("--verbose", "-v", action="store_true",
help="enable the logging to debug (defalut: false)")
parser.add_argument("--path", "-p", default=".",
help="Path to the package (default: current directory)")
args = parser.parse_args()
package_path = Path(args.path)
if args.verbose:
logging.getLogger().setLevel(logging.NOTSET)
compiler = GccCompiler()
try:
if args.command == "build":
builder = CPackageBuilder(package_path, compiler)
builder.build()
print("build is Ok...")
elif args.command == "run":
builder = CPackageBuilder(package_path, compiler)
bin_path = builder.context.path.default_bin_path
if not bin_path.exists():
print(f"{bin_path} not exist")
return
builder.build()
builder.run()
elif args.command == "test":
builder = CPackageBuilder(package_path, compiler)
builder.build()
builder.tests()
except Exception as e:
print(f"Error: {e}", file=sys.stderr)
sys.exit(1)
if __name__ == "__main__":
# builder = CPackageBuilder(Path("./runtime/libcore/"), ClangCompiler())
# builder.build()
main()

11
justfile Normal file
View File

@@ -0,0 +1,11 @@
list:
just --list
build-lexer:
python build.py build -p libs/lexer
build-docs:
doxygen Doxyfile
docs: build-docs
python -m http.server -d docs/html

View File

@@ -1,48 +0,0 @@
# 编译器设置
CC = gcc
AR = ar
CFLAGS = -g -Wall -I..
# 源文件路径
RT_DIR = ./rt
LOG_DIR = ./rt/log
# basic rt lib
SRCS = \
$(RT_DIR)/std/rt_std.c \
./core.c \
$(RT_DIR)/rt.c \
$(RT_DIR)/rt_alloc.c \
$(RT_DIR)/rt_string.c \
$(LOG_DIR)/log.c
# utils lib
UTILS_DIR = ./utils
DS_DIR = $(UTILS_DIR)/ds
STRPOOL_DIR = $(UTILS_DIR)/strpool
SYMTAB_DIR = $(UTILS_DIR)/symtab
TOKBUF_DIR = $(UTILS_DIR)/tokbuf
SRCS += \
$(DS_DIR)/hashtable.c \
$(STRPOOL_DIR)/strpool.c \
# $(SYMTAB_DIR)/symtab.c \
# $(TOKBUF_DIR)/tokbuf.c
# 生成目标文件列表
OBJS = $(SRCS:.c=.o)
# 最终目标
TARGET = libcore.a
all: $(TARGET)
$(TARGET): $(OBJS)
$(AR) rcs $@ $^
%.o: %.c
$(CC) $(CFLAGS) -c -o $@ $<
clean:
rm -f $(OBJS) $(TARGET)
.PHONY: all clean

File diff suppressed because it is too large Load Diff

View File

@@ -1,10 +0,0 @@
#include "core.h"
void init_lib_core() {
static int init = 0;
if (init) return;
init_rt();
init += 1;
Assert(init == 1);
}

View File

@@ -1,28 +0,0 @@
/**
* @file core.h
* @brief 核心库初始化接口
*
* 定义SMCC库的核心初始化函数和基础服务配置
*/
#ifndef __SMCC_LIB_CORE_H__
#define __SMCC_LIB_CORE_H__
#include "rt/rt.h"
/**
* @brief 初始化核心库组件
*
* 此函数必须在调用任何其他库函数之前调用,可能负责:
* - TODO 初始化内存管理系统
* - TODO 注册基础信号处理器
* - TODO 配置默认日志系统
* - 设置运行时环境
*
* @note 必须作为程序启动后第一个调用的库函数
* @warning 重复调用不会导致未定义行为,你必须为此至少调用一次否则会导致未定义行为
* @see rt.h 中相关的运行时环境配置
*/
void init_lib_core();
#endif // __SMCC_LIB_CORE_H__

View File

@@ -1,59 +0,0 @@
/**
* @file color.h
* @brief ANSI终端颜色控制码定义
*
* 提供跨平台的终端文本颜色和样式控制支持
*/
#ifndef __SMCC_TERMINAL_COLOR_H__
#define __SMCC_TERMINAL_COLOR_H__
/// @name 前景色控制码
/// @{
#define ANSI_FG_BLACK "\33[30m" ///< 黑色前景
#define ANSI_FG_RED "\33[31m" ///< 红色前景
#define ANSI_FG_GREEN "\33[32m" ///< 绿色前景
#define ANSI_FG_YELLOW "\33[33m" ///< 黄色前景
#define ANSI_FG_BLUE "\33[34m" ///< 蓝色前景
#define ANSI_FG_MAGENTA "\33[35m" ///< 品红色前景
#define ANSI_FG_CYAN "\33[36m" ///< 青色前景
#define ANSI_FG_WHITE "\33[37m" ///< 白色前景
/// @}
/// @name 背景色控制码
/// @{
#define ANSI_BG_BLACK "\33[40m" ///< 黑色背景
#define ANSI_BG_RED "\33[41m" ///< 红色背景
#define ANSI_BG_GREEN "\33[42m" ///< 绿色背景
#define ANSI_BG_YELLOW "\33[43m" ///< 黄色背景
#define ANSI_BG_BLUE "\33[44m" ///< 蓝色背景
#define ANSI_BG_MAGENTA "\33[45m" ///< 品红色背景原始代码此处应为45m
#define ANSI_BG_CYAN "\33[46m" ///< 青色背景
#define ANSI_BG_WHITE "\33[47m" ///< 白色背景
/// @}
/// @name 文字样式控制码
/// @{
#define ANSI_UNDERLINED "\33[4m" ///< 下划线样式
#define ANSI_BOLD "\33[1m" ///< 粗体样式
#define ANSI_NONE "\33[0m" ///< 重置所有样式
/// @}
/**
* @def ANSI_FMT
* @brief 安全文本格式化宏
* @param str 目标字符串
* @param fmt ANSI格式序列可组合多个样式
*
* @note 当定义ANSI_FMT_DISABLE时自动禁用颜色输出
* @code
* printf(ANSI_FMT("Warning!", ANSI_FG_YELLOW ANSI_BOLD));
* @endcode
*/
#ifndef ANSI_FMT_DISABLE
#define ANSI_FMT(str, fmt) fmt str ANSI_NONE ///< 启用样式包裹
#else
#define ANSI_FMT(str, fmt) str ///< 禁用样式输出
#endif
#endif // __SMCC_TERMINAL_COLOR_H__

View File

@@ -1,72 +0,0 @@
#include "log.h"
typedef void (*log_handler)(
log_level_t level,
const char* module,
const char* file,
int line,
const char* message
);
static void default_handler(log_level_t level, const char* module, const char* file, int line, const char* message) {
if (!rt_stderr) return;
const char* level_str;
switch (level) {
case LOG_LEVEL_DEBUG: level_str = "DEBUG"; break;
case LOG_LEVEL_INFO: level_str = "INFO "; break;
case LOG_LEVEL_WARN: level_str = "WARN "; break;
case LOG_LEVEL_ERROR: level_str = "ERROR"; break;
case LOG_LEVEL_FATAL: level_str = "FATAL"; break;
case LOG_LEVEL_TRACE: level_str = "TRACE"; break;
default: level_str = "NOTSET"; break;
}
/// @note: 定义 __LOG_NO_COLOR__ 会取消颜色输出
#ifndef __LOG_NO_COLOR__
const char* color_code = ANSI_NONE;
switch (level) {
case LOG_LEVEL_DEBUG: color_code = ANSI_FG_CYAN; break;
case LOG_LEVEL_INFO: color_code = ANSI_FG_GREEN; break;
case LOG_LEVEL_TRACE: color_code = ANSI_FG_BLUE; break;
case LOG_LEVEL_WARN: color_code = ANSI_FG_YELLOW; break;
case LOG_LEVEL_ERROR: color_code = ANSI_FG_RED; break;
case LOG_LEVEL_FATAL: color_code = ANSI_FG_RED ANSI_UNDERLINED; break; // 增强对比度
default: color_code = ANSI_NONE;
}
rt.fprintf(rt_stderr, ANSI_BOLD "%s[%s] - %s - %s:%d | %s" ANSI_NONE "\n", color_code,
level_str, module, file, line, message);
#else
rt.fprintf(rt_stderr, "[%s] %s:%d | %s: %s\n",
level_str, file, line, module, message);
#endif
if (level & LOG_LEVEL_FATAL) {
rt.exit(-LOG_LEVEL_FATAL);
}
}
static logger_t root_logger = {
.name = "root",
.level = LOG_LEVEL_ALL,
.handler = default_handler,
};
void init_logger(logger_t* logger, const char* name) {
logger->name = name;
logger->handler = default_handler;
log_set_level(logger, LOG_LEVEL_ALL);
}
logger_t* log_get(const char* name) {
return &root_logger;
}
void log_set_level(logger_t* logger, log_level_t level) {
if (logger) logger->level = level;
else root_logger.level = level;
}
void log_set_handler(logger_t* logger, log_handler handler) {
if (logger) logger->handler = handler;
else root_logger.handler = handler;
}

View File

@@ -1,161 +0,0 @@
/**
* @file log.h
* @brief 日志系统核心模块(支持多级日志、断言和异常处理)
*/
#ifndef __SMCC_LOG_H__
#define __SMCC_LOG_H__
#include "../std/rt_api_def.h"
#include "color.h"
/**
* @brief 日志级别枚举
*
* 定义日志系统的输出级别和组合标志位
*/
typedef enum log_level {
LOG_LEVEL_NOTSET = 0, ///< 未设置级别(继承默认配置)
LOG_LEVEL_DEBUG = 1 << 0, ///< 调试信息(开发阶段详细信息)
LOG_LEVEL_INFO = 1 << 1, ///< 常规信息(系统运行状态)
LOG_LEVEL_WARN = 1 << 2, ///< 警告信息(潜在问题提示)
LOG_LEVEL_ERROR = 1 << 3, ///< 错误信息(可恢复的错误)
LOG_LEVEL_FATAL = 1 << 4, ///< 致命错误(导致程序终止的严重错误)
LOG_LEVEL_TRACE = 1 << 5, ///< 追踪(性能追踪或者栈帧追踪)
LOG_LEVEL_ALL = 0xFF, ///< 全级别标志(组合所有日志级别)
} log_level_t;
/**
* @brief 日志处理回调函数类型
* @param level 日志级别
* @param module 模块名称可为NULL
* @param file 源文件名
* @param line 代码行号
* @param message 格式化后的日志消息
* @todo 待实现模块名称,输入的模块名称,都将被忽略
*/
typedef void (*log_handler)(
log_level_t level,
const char* module,
const char* file,
int line,
const char* message
);
#ifndef LOGGER_MAX_BUF_SIZE
#define LOGGER_MAX_BUF_SIZE 512 ///< 单条日志最大缓冲区尺寸
#endif
/**
* @brief 日志器实例结构体
*
* 每个日志器实例维护独立的配置和缓冲区
*/
typedef struct logger {
const char* name; ///< 日志器名称(用于模块区分)
log_level_t level; ///< 当前设置的日志级别
log_handler handler; ///< 日志处理回调函数
char buf[LOGGER_MAX_BUF_SIZE]; ///< 格式化缓冲区
} logger_t;
/**
* @brief 初始化日志实例 其余参数设置为默认值
* @param[in] logger 日志器实例指针
* @param[in] name 日志器名称NULL表示获取默认日志器名称
*/
void init_logger(logger_t* logger, const char* name);
// TODO log_set(); 暂未实现 日志注册
/**
* @brief 获取或创建日志器实例
* @param[in] name 日志器名称NULL表示获取默认日志器
* @return 日志器实例指针
* @warning 若没有找到相应日志器则会返回根日志器
*/
logger_t* log_get(const char* name);
/**
* @brief 设置日志级别
* @param[in] logger 目标日志器实例
* @param[in] level 要设置的日志级别(可组合多个级别)
*/
void log_set_level(logger_t* logger, log_level_t level);
/**
* @brief 设置自定义日志处理器
* @param[in] logger 目标日志器实例
* @param[in] handler 自定义处理函数NULL恢复默认处理
*/
void log_set_handler(logger_t* logger, log_handler handler);
#ifndef LOG_MAX_MAROC_BUF_SIZE
#define LOG_MAX_MAROC_BUF_SIZE LOGGER_MAX_BUF_SIZE ///< 宏展开缓冲区尺寸
#endif
/**
* @def _LOG
* @brief 内部日志宏(供其他日志宏调用)
* @param _module_ 模块实例NULL表示使用默认日志器
* @param _level_ 日志级别
* @param _msg_ 格式字符串
* @param ... 可变参数列表
*/
#define _LOG(_module_, _level_, _msg_, ...) \
do { \
logger_t* _logger; \
if (!_module_) { \
_logger = log_get(NULL); \
} \
else _logger = _module_; \
if (_logger && _logger->handler && (_logger->level & (_level_))) { \
rt.snprintf(_logger->buf, sizeof(_logger->buf), (_msg_), ##__VA_ARGS__); \
_logger->handler((_level_), _logger->name, __FILE__, __LINE__, _logger->buf); \
} \
} while(0)
/// @name 模块日志宏
/// @{
#define MLOG_NOTSET(module, ...) _LOG(module, LOG_LEVEL_NOTSET, __VA_ARGS__) ///< 未分类日志
#define MLOG_DEBUG( module, ...) _LOG(module, LOG_LEVEL_DEBUG, __VA_ARGS__) ///< 调试日志需启用DEBUG级别
#define MLOG_INFO( module, ...) _LOG(module, LOG_LEVEL_INFO, __VA_ARGS__) ///< 信息日志(常规运行日志)
#define MLOG_WARN( module, ...) _LOG(module, LOG_LEVEL_WARN, __VA_ARGS__) ///< 警告日志(潜在问题)
#define MLOG_ERROR( module, ...) _LOG(module, LOG_LEVEL_ERROR, __VA_ARGS__) ///< 错误日志(可恢复错误)
#define MLOG_FATAL( module, ...) _LOG(module, LOG_LEVEL_FATAL, __VA_ARGS__) ///< 致命错误日志(程序终止前)
#define MLOG_TRACE( module, ...) _LOG(module, LOG_LEVEL_TRACE, __VA_ARGS__) ///< 追踪日志(调用栈跟踪)
/// @}
/// @name 快捷日志宏
/// @{
#define LOG_NOTSET(...) _LOG(NULL, LOG_LEVEL_NOTSET, __VA_ARGS__) ///< 未分类日志
#define LOG_DEBUG(...) _LOG(NULL, LOG_LEVEL_DEBUG, __VA_ARGS__) ///< 调试日志需启用DEBUG级别
#define LOG_INFO(...) _LOG(NULL, LOG_LEVEL_INFO, __VA_ARGS__) ///< 信息日志(常规运行日志)
#define LOG_WARN(...) _LOG(NULL, LOG_LEVEL_WARN, __VA_ARGS__) ///< 警告日志(潜在问题)
#define LOG_ERROR(...) _LOG(NULL, LOG_LEVEL_ERROR, __VA_ARGS__) ///< 错误日志(可恢复错误)
#define LOG_FATAL(...) _LOG(NULL, LOG_LEVEL_FATAL, __VA_ARGS__) ///< 致命错误日志(程序终止前)
#define LOG_TRACE(...) _LOG(NULL, LOG_LEVEL_TRACE, __VA_ARGS__) ///< 追踪日志(调用栈跟踪)
/// @}
/**
* @def _Assert
* @brief 断言检查内部宏
* @param cond 检查条件表达式
* @param ... 错误信息参数(格式字符串+参数)
*/
#define _Assert(cond, ...) \
do { \
if (!(cond)) { \
LOG_FATAL(__VA_ARGS__); \
} \
} while (0)
/// @name 断言工具宏
/// @{
#define AssertFmt(cond, format, ...) _Assert(cond, "Assertion Failure: " format, ## __VA_ARGS__) ///< 带格式的断言检查
#define PanicFmt(format, ...) _Assert(0, "Panic: " format, ## __VA_ARGS__) ///< 立即触发致命错误
#define Assert(cond) AssertFmt(cond, "cond is `" SMCC_STR(cond) "`") ///< 基础断言检查
#define Panic(...) PanicFmt(__VA_ARGS__) ///< 触发致命错误(带自定义消息)
#define TODO() PanicFmt("TODO please implement me") ///< 标记未实现代码(触发致命错误)
/// @}
#endif // __SMCC_LOG_H__

View File

@@ -1,10 +0,0 @@
#include "rt.h"
void init_rt() {
// TODO Choice OS
#ifndef __SMCC_NO_OS_STD__
#include "std/rt_std.h"
init_rt_std();
#endif
return;
}

View File

@@ -1,18 +0,0 @@
#ifndef __SMCC_RT_H__
#define __SMCC_RT_H__
#include "std/rt_api_def.h"
#include "std/rt_type.h"
#include "log/log.h"
#include "rt_alloc.h"
#include "rt_string.h"
void init_rt();
// define
#define _SMCC_STR(str) #str
#define SMCC_STR(str) _SMCC_STR(str)
#define SMCC_ARRLEN(arr) (sizeof(arr) / sizeof(arr[0]))
#endif // __SMCC_RT_H__

View File

@@ -1,135 +0,0 @@
#include "rt_alloc.h"
#define ALLOCATOR_PAGE_SIZE (4096)
/* Simple / Static Allocator */
void* salloc_alloc(int size) {
// TODO do some trace
return rt._malloc(size);
}
void* salloc_realloc(void* ptr, int size) {
return rt._realloc(ptr, size);
}
void salloc_free(void* ptr) {
// TODO do some trace
rt._free(ptr);
}
/* Fixed Allocator */
#define PAGE_ALIGN(size) (((size) + ALLOCATOR_PAGE_SIZE -1) & ~(ALLOCATOR_PAGE_SIZE-1))
void falloc_init(fixed_alloc_t* fa, int fixed_size, int init_blocks) {
fa->free_list = NULL;
fa->page_list = NULL;
// 确保块大小至少能存放指针(用于空闲链表)
const int min_size = sizeof(void*);
fa->block_size = (fixed_size < min_size) ? min_size :
(fixed_size + 15) & ~15; // 16字节对齐
// 计算每页块数(优化缓存利用率)
const int page_size = ALLOCATOR_PAGE_SIZE - sizeof(void*);
fa->blocks_per_page = page_size / fa->block_size;
// TODO copy paste 需要使用函数抽象 申请过程
// 预分配初始页
void* page = salloc_alloc(PAGE_ALIGN(fa->block_size * init_blocks));
unsigned char* p = (unsigned char*)page;
for (int i = 0; i < init_blocks; ++i) {
void** block = (void**)p;
*block = fa->free_list;
fa->free_list = block;
p += fa->block_size;
}
*(void**)page = fa->page_list;
fa->page_list = page;
}
void* falloc_alloc(fixed_alloc_t* fa) {
if (!fa->free_list) {
// 分配新页(带页头保存链表指针)
void* page = salloc_alloc(ALLOCATOR_PAGE_SIZE);
unsigned char* p = (unsigned char*)page + sizeof(void*);
// 链接新页块到空闲链表
for (int i = 0; i < fa->blocks_per_page; ++i) {
void** block = (void**)p;
*block = fa->free_list;
fa->free_list = block;
p += fa->block_size;
}
*(void**)page = fa->page_list;
fa->page_list = page;
}
void* block = fa->free_list;
fa->free_list = *(void**)block;
return (void*)((unsigned char*)block + sizeof(void*)); // 跳过链表指针
}
void falloc_free(fixed_alloc_t* fa, void* ptr) {
if (!fa || !ptr) return;
void** block = (void**)((u8_t*)ptr - sizeof(void*));
*block = fa->free_list;
fa->free_list = block;
}
void falloc_destroy(fixed_alloc_t* fa) {
if (!fa) return;
// 逆向释放所有内存页(保持地址连续性)
void* current_page = fa->page_list;
while (current_page) {
void* next_page = *(void**)current_page; // 页头保存了链表指针
salloc_free(current_page);
current_page = next_page;
}
// 防御性清零(防止悬垂指针)
fa->free_list = NULL;
fa->blocks_per_page = 0;
fa->block_size = 0;
fa->page_list = NULL;
}
/* Long Allocator */
void lalloc_init(long_alloc_t* la) {
la->current = NULL;
la->block_size = ALLOCATOR_PAGE_SIZE; // 初始块大小
}
void* lalloc_alloc(long_alloc_t* la, int size) {
size = (size + 15) & ~15; // 16字节对齐
if (!la->current || (la->current->used + size) > la->block_size) {
int new_size = la->block_size;
if (new_size < size + sizeof(long_block_t))
new_size = size + sizeof(long_block_t);
long_block_t* new_block = (long_block_t*)salloc_alloc(new_size);
new_block->next = la->current;
new_block->used = sizeof(long_block_t);
la->current = new_block;
la->block_size = new_size;
}
void* ptr = (unsigned char*)la->current + la->current->used;
la->current->used += size;
return ptr;
}
void lalloc_destroy(long_alloc_t* la) {
while (la->current) {
long_block_t* prev = la->current->next;
salloc_free(la->current);
la->current = prev;
}
}

View File

@@ -1,132 +0,0 @@
/**
* @file rt_alloc.h
* @brief 内存分配器接口
*
* 提供三种内存分配器实现:简单分配器、固定大小分配器和长块分配器
*/
#ifndef __SMCC_RT_ALLOC_H__
#define __SMCC_RT_ALLOC_H__
#include "std/rt_api_def.h"
/** @defgroup simple_allocator 简单分配器 */
/**
* @brief 分配指定大小的内存块
* @param size 请求分配的内存大小(字节)
* @return 成功返回内存指针失败返回NULL
*/
void* salloc_alloc(int size);
/**
* @brief 重新分配内存块
* @param ptr 原内存指针
* @param size 新内存大小(字节)
* @return 成功返回新内存指针失败返回NULL
*/
void* salloc_realloc(void* ptr, int size);
/**
* @brief 释放内存块
* @param ptr 要释放的内存指针
*/
void salloc_free(void* ptr);
/** @defgroup fixed_allocator 固定大小分配器 */
/**
* @struct fixed_alloc_t
* @brief 固定大小内存分配器上下文
*/
typedef struct fixed_alloc {
/** @brief 内存页链表头指针 */
void* page_list;
/** @brief 空闲块链表头指针 */
void* free_list;
/** @brief 每个内存块的固定大小 */
int block_size;
/** @brief 每页包含的块数量 */
int blocks_per_page;
} fixed_alloc_t;
/**
* @brief 初始化固定大小分配器
* @param fa 分配器上下文指针
* @param fixed_size 每个内存块的固定大小
* @param init_size 初始预分配块数量
*/
void falloc_init(fixed_alloc_t* fa, int fixed_size, int init_size);
/**
* @brief 分配固定大小内存块
* @param fa 分配器上下文指针
* @return 成功返回内存指针失败返回NULL
*/
void* falloc_alloc(fixed_alloc_t* fa);
/**
* @brief 释放内存块
* @param fa 分配器上下文指针
* @param ptr 要释放的内存指针
*/
void falloc_free(fixed_alloc_t* fa, void* ptr);
/**
* @brief 销毁分配器并释放所有内存页
* @param fa 分配器上下文指针
*/
void falloc_destroy(fixed_alloc_t* fa);
/** @defgroup long_allocator 长块分配器 */
/**
* @struct long_block_t
* @brief 长块内存块头结构
*/
typedef struct long_block {
/** @brief 指向下一个内存块的指针 */
struct long_block* next;
/** @brief 当前块使用状态标志 */
int used;
} long_block_t;
/**
* @struct long_alloc_t
* @brief 长块分配器上下文
*/
typedef struct long_alloc {
/** @brief 当前内存块指针 */
long_block_t* current;
/** @brief 内存块的标准大小 */
int block_size;
} long_alloc_t;
/**
* @brief 初始化长块分配器
* @param la 分配器上下文指针
*/
void lalloc_init(long_alloc_t* la);
/**
* @brief 分配指定大小的内存块
* @param la 分配器上下文指针
* @param size 请求分配的内存大小
* @return 成功返回内存指针失败返回NULL
*/
void* lalloc_alloc(long_alloc_t* la, int size);
/**
* @brief 标记释放内存块(实际在块耗尽时统一释放)
* @param la 分配器上下文指针
* @param ptr 要释放的内存指针
*/
void lalloc_free(long_alloc_t* la, void* ptr);
/**
* @brief 销毁分配器并释放所有内存块
* @param la 分配器上下文指针
*/
void lalloc_destroy(long_alloc_t* la);
#endif // __SMCC_RT_ALLOC_H__

View File

@@ -1,49 +0,0 @@
#include "rt_string.h"
int rt_memcmp(const void* s1, const void* s2, rt_size_t n) {
const unsigned char *p1 = s1, *p2 = s2;
for (rt_size_t i = 0; i < n; ++i) {
if (p1[i] != p2[i])
return p1[i] - p2[i];
}
return 0;
}
int rt_strcmp(const char* s1, const char* s2) {
while (*s1 && *s2 && (*s1 == *s2)) {
s1++;
s2++;
}
return *(const unsigned char*)s1 - *(const unsigned char*)s2;
}
void* rt_memcpy(void* restrict dest, const void* restrict src, rt_size_t n) {
u8_t* d = dest;
const u8_t* s = src;
for (rt_size_t i = 0; i < n; ++i)
d[i] = s[i];
return dest;
}
void* rt_memset(void* dest, int val, rt_size_t n) {
u8_t* p = dest;
for (rt_size_t i = 0; i < n; ++i)
p[i] = (u8_t)val;
return dest;
}
rt_size_t rt_strlen(const char* s) {
const char* p = s;
while (*p) p++;
return p - s;
}
/* strhash - 字符串哈希(用于符号表) */
u32_t rt_strhash(const char* s) {
u32_t hash = 2166136261u; // FNV-1a偏移基础值
while (*s) {
hash ^= *s++;
hash *= 16777619u;
}
return hash;
}

View File

@@ -1,67 +0,0 @@
/**
* @file rt_string.h
* @brief 运行时字符串与内存操作
*
* 提供基本的内存操作和字符串处理函数实现
*/
#ifndef __SMCC_RT_STRING_H__
#define __SMCC_RT_STRING_H__
#include "std/rt_api_def.h"
/** @defgroup memory_operations 内存操作 */
/**
* @brief 内存区域比较
* @param s1 第一个内存区域指针
* @param s2 第二个内存区域指针
* @param n 比较的字节数
* @return 差异值(<0: s1<s2, 0: 相等, >0: s1>s2
*/
int rt_memcmp(const void* s1, const void* s2, rt_size_t n);
/**
* @brief 安全内存拷贝(要求内存区域不重叠)
* @param dest 目标内存地址restrict修饰
* @param src 源内存地址restrict修饰
* @param n 拷贝的字节数
* @return 目标内存地址
*/
void* rt_memcpy(void* restrict dest, const void* restrict src, rt_size_t n);
/**
* @brief 内存区域填充
* @param s 目标内存地址
* @param c 填充字节值转换为unsigned char
* @param n 填充的字节数
* @return 原始内存地址
*/
void* rt_memset(void* dest, int val, rt_size_t n);
/** @defgroup string_operations 字符串操作 */
/**
* @brief 字符串比较
* @param s1 第一个字符串指针
* @param s2 第二个字符串指针
* @return 差异值(<0: s1<s2, 0: 相等, >0: s1>s2
*/
int rt_strcmp(const char* s1, const char* s2);
/**
* @brief 计算字符串长度
* @param s 字符串指针
* @return 字符串长度(不含终止符)
*/
rt_size_t rt_strlen(const char* s);
/**
* @brief 计算字符串哈希值
* @param s 输入字符串
* @return 32位无符号哈希值
* @note 使用FNV-1a哈希算法实现
*/
u32_t rt_strhash(const char* s);
#endif // __SMCC_RT_STRING_H__

View File

@@ -1,183 +0,0 @@
/**
* @file rt_api_def.h
* @brief SMCC运行时库接口定义
*
* 定义运行时基础API函数指针类型及运行时接口结构体
*/
#ifndef __SMCC_RT_API_DEF_H__
#define __SMCC_RT_API_DEF_H__
#include "rt_type.h"
#ifndef __RT_SIZE_TYPE__
#define __RT_SIZE_TYPE__
/**
* @typedef rt_size_t
* @brief 表示内存大小的类型定义
*/
typedef usz_t rt_size_t;
#endif
/**
* @typedef rt_malloc
* @brief 内存分配函数指针类型
* @param size 需要分配的内存大小
* @return 分配的内存指针失败返回NULL
*/
typedef void* (*rt_malloc)(rt_size_t size);
/**
* @typedef rt_free
* @brief 内存释放函数指针类型
* @param ptr 需要释放的内存指针
*/
typedef void (*rt_free)(void* ptr);
/**
* @typedef rt_exit
* @brief 程序退出函数指针类型
* @param code 退出状态码
*/
typedef void (*rt_exit)(int code);
/** @defgroup file_io 文件I/O相关类型 */
#ifndef __RT_FILE_TYPE__
#define __RT_FILE_TYPE__
/**
* @typedef rt_file_t
* @brief 文件句柄类型定义
*/
typedef void* rt_file_t;
#endif
/** @brief 标准输入文件句柄 */
extern rt_file_t rt_stdin;
/** @brief 标准输出文件句柄 */
extern rt_file_t rt_stdout;
/** @brief 标准错误文件句柄 */
extern rt_file_t rt_stderr;
/**
* @typedef rt_fopen_t
* @brief 文件打开函数指针类型
* @param file_name 文件名
* @param mode 打开模式同fopen
* @return 文件句柄失败返回NULL
*/
typedef rt_file_t (*rt_fopen_t)(const char* file_name, const char* mode);
/**
* @typedef rt_fflush_t
* @brief 文件缓冲刷新函数指针类型
* @param file 文件句柄指针
* @return 成功返回0失败返回非0值
*/
typedef int (*rt_fflush_t)(rt_file_t* file);
/**
* @typedef rt_fclose_t
* @brief 文件关闭函数指针类型
* @param file 文件句柄
* @return 成功返回0失败返回EOF
*/
typedef int (*rt_fclose_t)(rt_file_t file);
/**
* @typedef rt_fread_t
* @brief 文件读取函数指针类型
* @param dst_buf 目标缓冲区
* @param elem_size 单个元素大小
* @param count 元素数量
* @param file 文件句柄
* @return 实际读取的元素数量
*/
typedef int (*rt_fread_t)(void * dst_buf, rt_size_t elem_size, rt_size_t count, rt_file_t file);
/**
* @typedef rt_fwrite_t
* @brief 文件写入函数指针类型
* @param buf 源缓冲区
* @param size 单个元素大小
* @param count 元素数量
* @param file 文件句柄
* @return 实际写入的元素数量
*/
typedef int (*rt_fwrite_t)(const void * buf, rt_size_t size, rt_size_t count, rt_file_t file);
/** @defgroup utility 实用工具函数 */
/**
* @typedef rt_fprintf_t
* @brief 格式化输出函数指针类型
* @param file 文件句柄
* @param format 格式化字符串
* @param ... 可变参数
* @return 输出的字符数
*/
typedef int (*rt_fprintf_t)(void * file, const char *format, ...);
/**
* @typedef rt_snprintf_t
* @brief 安全格式化字符串函数指针类型
* @param stream 目标缓冲区
* @param n 缓冲区大小
* @param format 格式化字符串
* @param ... 可变参数
* @return 写入的字符数(不含终止符)
*/
typedef int (*rt_snprintf_t)(char * stream, rt_size_t n, const char * format, ...);
/**
* @typedef rt_realloc_t
* @brief 内存重分配函数指针类型
* @param memory 原内存指针
* @param new_size 新内存大小
* @return 新内存指针失败返回NULL
*/
typedef void* (*rt_realloc_t)(void *memory, rt_size_t new_size);
/**
* @struct smcc_rt_t
* @brief 运行时接口集合
*
* 包含内存管理、文件操作等核心运行时函数的指针集合
*/
typedef struct smcc_rt {
/** @brief 内存分配函数指针 */
rt_malloc _malloc;
/** @brief 内存释放函数指针 */
rt_free _free;
/** @brief 程序退出函数指针 */
rt_exit exit;
/** @brief 文件打开函数指针 */
rt_fopen_t fopen;
/** @brief 文件缓冲刷新函数指针 */
rt_fflush_t fflush;
/** @brief 文件关闭函数指针 */
rt_fclose_t fclose;
/** @brief 文件读取函数指针 */
rt_fread_t fread;
/** @brief 文件写入函数指针 */
rt_fwrite_t fwrite;
/** @name 可选工具函数 */
///@{
/** @brief 格式化输出函数指针(可选) */
rt_fprintf_t fprintf;
/** @brief 安全格式化字符串函数指针(可选) */
rt_snprintf_t snprintf;
/** @brief 内存重分配函数指针(可选) */
rt_realloc_t _realloc;
///@}
} smcc_rt_t;
/** @brief 全局运行时接口实例 */
extern const smcc_rt_t rt;
/** @brief 空指针定义 */
#define NULL ((void *)0)
#endif // __SMCC_RT_API_DEF_H__

View File

@@ -1,32 +0,0 @@
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include "rt_api_def.h"
const smcc_rt_t rt = {
._malloc = (rt_malloc)malloc,
._free = (rt_free)free,
.exit = (rt_exit)exit,
.fopen = (rt_fopen_t)fopen,
.fflush = (rt_fflush_t)fflush,
.fclose = (rt_fclose_t)fclose,
.fread = (rt_fread_t)fread,
.fwrite = (rt_fwrite_t)fwrite,
._realloc = (rt_realloc_t)realloc,
.fprintf = (rt_fprintf_t)fprintf,
.snprintf = (rt_snprintf_t)snprintf,
};
rt_file_t rt_stdin;
rt_file_t rt_stdout;
rt_file_t rt_stderr;
void init_rt_std() {
rt_stdin = stdin;
rt_stdout = stdout;
rt_stderr = stderr;
}

View File

@@ -1,6 +0,0 @@
#ifndef __SMCC_RT_STD_H__
#define __SMCC_RT_STD_H__
void init_rt_std();
#endif // __SMCC_RT_STD_H__

View File

@@ -1,120 +0,0 @@
/**
* @file rt_type.h
* @brief 基础类型定义
*
* 定义跨平台基础数据类型别名基于C99标准头文件<stdint.h>和<stddef.h>
*/
#ifndef __SMCC_RT_TYPE_H__
#define __SMCC_RT_TYPE_H__
#include <stddef.h>
#include <stdint.h>
/** @defgroup integer_types 整数类型 */
/**
* @typedef i8_t
* @brief 8位有符号整数
*/
typedef int8_t i8_t;
/**
* @typedef i16_t
* @brief 16位有符号整数
*/
typedef int16_t i16_t;
/**
* @typedef i32_t
* @brief 32位有符号整数
*/
typedef int32_t i32_t;
/**
* @typedef i64_t
* @brief 64位有符号整数
*/
typedef int64_t i64_t;
/**
* @typedef u8_t
* @brief 8位无符号整数
*/
typedef uint8_t u8_t;
/**
* @typedef u16_t
* @brief 16位无符号整数
*/
typedef uint16_t u16_t;
/**
* @typedef u32_t
* @brief 32位无符号整数
*/
typedef uint32_t u32_t;
/**
* @typedef u64_t
* @brief 64位无符号整数
*/
typedef uint64_t u64_t;
/** @defgroup floating_point 浮点类型 */
/**
* @typedef f32_t
* @brief 32位单精度浮点数
*/
typedef float f32_t;
/**
* @typedef f64_t
* @brief 64位双精度浮点数
*/
typedef double f64_t;
/** @defgroup pointer_types 指针类型 */
/**
* @typedef iptr_t
* @brief 带符号指针类型intptr_t别名
*/
typedef intptr_t iptr_t;
/**
* @typedef uptr_t
* @brief 无符号指针类型uintptr_t别名
*/
typedef uintptr_t uptr_t;
/** @defgroup size_types 大小类型 */
/**
* @typedef usz_t
* @brief 无符号大小类型size_t别名
*/
typedef size_t usz_t;
// /**
// * @typedef isz_t
// * @brief 带符号大小类型ssize_t别名
// */
// typedef ssize_t isz_t;
// /** @defgroup word_types 字类型 */
//
// /**
// * @typedef uw_t
// * @brief 无符号机器字类型32位
// */
// typedef u32_t uw_t;
//
// /**
// * @typedef iw_t
// * @brief 带符号机器字类型32位
// */
// typedef i32_t iw_t;
#endif // __SMCC_RT_TYPE_H__

View File

View File

View File

@@ -1,15 +0,0 @@
#include <lib/core.h>
int main(void) {
// make -C ..
// gcc -g -Wall -I../.. test_log.c -L.. -lcore -o test_log
// ./test_log
init_lib_core();
LOG_NOTSET("Log notset");
LOG_DEBUG("Log debug");
LOG_INFO("Log info");
LOG_WARN("Log warn");
LOG_ERROR("Log error");
LOG_FATAL("Log fatal");
}

View File

@@ -1,140 +0,0 @@
#include "hashtable.h"
#define INIT_HASH_TABLE_SIZE (32)
void init_hashtable(hash_table_t* ht) {
vector_init(ht->entries);
ht->count = 0;
ht->tombstone_count = 0;
// Assert(ht->key_cmp != NULL && ht->hash_func != NULL);
}
static int next_power_of_two(int n) {
n--;
n |= n >> 1;
n |= n >> 2;
n |= n >> 4;
n |= n >> 8;
n |= n >> 16;
return n + 1;
}
static hash_entry_t* find_entry(hash_table_t* ht, const void* key, u32_t hash) {
if (ht->entries.cap == 0) return NULL;
u32_t index = hash & (ht->entries.cap - 1); // 容量是2的幂
u32_t probe = 0;
hash_entry_t* tombstone = NULL;
while (1) {
hash_entry_t* entry = &vector_at(ht->entries, index);
if (entry->state == ENTRY_EMPTY) {
return tombstone ? tombstone : entry;
}
if (entry->state == ENTRY_TOMBSTONE) {
if (!tombstone) tombstone = entry;
} else if (entry->hash == hash && ht->key_cmp(entry->key, key) == 0) {
return entry;
}
// Liner finding
index = (index + 1) & (ht->entries.cap - 1);
probe++;
if (probe >= ht->entries.cap) break;
}
LOG_ERROR("hashset_find: hash table is full");
return NULL;
}
static void adjust_capacity(hash_table_t* ht, int new_cap) {
new_cap = next_power_of_two(new_cap);
Assert(new_cap >= ht->entries.cap);
VECTOR_HEADER(old_entries, hash_entry_t);
old_entries.data = ht->entries.data;
old_entries.cap = ht->entries.cap;
// Not used size but for gdb python extention debug
ht->entries.size = new_cap;
ht->entries.cap = new_cap;
ht->entries.data = salloc_realloc(NULL, new_cap * sizeof(hash_entry_t));
rt_memset(ht->entries.data, 0, new_cap * sizeof(hash_entry_t));
// rehash the all of the old data
for (rt_size_t i = 0; i < old_entries.cap; i++) {
hash_entry_t* entry = &vector_at(old_entries, i);
if (entry->state == ENTRY_ACTIVE) {
hash_entry_t* dest = find_entry(ht, entry->key, entry->hash);
*dest = *entry;
}
}
vector_free(old_entries);
ht->tombstone_count = 0;
}
void* hashtable_set(hash_table_t* ht, const void* key, void* value) {
if (ht->count + ht->tombstone_count >= ht->entries.cap * 0.75) {
int new_cap = ht->entries.cap < INIT_HASH_TABLE_SIZE ? INIT_HASH_TABLE_SIZE : ht->entries.cap * 2;
adjust_capacity(ht, new_cap);
}
u32_t hash = ht->hash_func(key);
hash_entry_t* entry = find_entry(ht, key, hash);
void* old_value = NULL;
if (entry->state == ENTRY_ACTIVE) {
old_value = entry->value;
} else {
if (entry->state == ENTRY_TOMBSTONE) ht->tombstone_count--;
ht->count++;
}
entry->key = key;
entry->value = value;
entry->hash = hash;
entry->state = ENTRY_ACTIVE;
return old_value;
}
void* hashtable_get(hash_table_t* ht, const void* key) {
if (ht->entries.cap == 0) return NULL;
u32_t hash = ht->hash_func(key);
hash_entry_t* entry = find_entry(ht, key, hash);
return (entry && entry->state == ENTRY_ACTIVE) ? entry->value : NULL;
}
void* hashtable_del(hash_table_t* ht, const void* key) {
if (ht->entries.cap == 0) return NULL;
u32_t hash = ht->hash_func(key);
hash_entry_t* entry = find_entry(ht, key, hash);
if (entry == NULL || entry->state != ENTRY_ACTIVE) return NULL;
void* value = entry->value;
entry->state = ENTRY_TOMBSTONE;
ht->count--;
ht->tombstone_count++;
return value;
}
void hashtable_destory(hash_table_t* ht) {
vector_free(ht->entries);
ht->count = 0;
ht->tombstone_count = 0;
}
void hashtable_foreach(hash_table_t* ht, hash_table_iter_func iter_func, void* context) {
for (rt_size_t i = 0; i < ht->entries.cap; i++) {
hash_entry_t* entry = &vector_at(ht->entries, i);
if (entry->state == ENTRY_ACTIVE) {
if (!iter_func(entry->key, entry->value, context)) {
break; // enable callback function terminal the iter
}
}
}
}

View File

@@ -1,124 +0,0 @@
/**
* @file hashtable.h
* @brief 开放寻址法哈希表实现
*
* 提供基于向量容器的哈希表实现,支持动态扩容和墓碑机制
*/
#ifndef __SMCC_HASHTABLE_H__
#define __SMCC_HASHTABLE_H__
#include <lib/core.h>
#include <lib/rt/rt_alloc.h>
#include "vector.h"
/**
* @enum ht_entry_state_t
* @brief 哈希表条目状态标识
*/
typedef enum hash_table_entry_state {
ENTRY_EMPTY, /**< 空槽位(从未使用过) */
ENTRY_ACTIVE, /**< 有效条目(包含键值对) */
ENTRY_TOMBSTONE /**< 墓碑标记(已删除条目) */
} ht_entry_state_t;
/**
* @struct hash_entry_t
* @brief 哈希表条目结构
*
* @note key/value内存由调用者管理哈希表不负责其生命周期
*/
typedef struct hash_entry {
const void* key; /**< 键指针(不可变) */
void* value; /**< 值指针 */
u32_t hash; /**< 预计算的哈希值(避免重复计算) */
ht_entry_state_t state; /**< 当前条目状态 */
} hash_entry_t;
/**
* @struct hash_table_t
* @brief 哈希表主体结构
*
* 使用开放寻址法实现,采用墓碑标记处理删除操作
*/
typedef struct hash_table {
VECTOR_HEADER(entries, hash_entry_t); /**< 条目存储容器 */
u32_t count; /**< 有效条目数量(不含墓碑) */
u32_t tombstone_count; /**< 墓碑条目数量 */
/**
* @brief 哈希函数指针
* @param key 键指针
* @return 32位无符号哈希值
*/
u32_t (*hash_func)(const void* key);
/**
* @brief 键比较函数指针
* @param key1 第一个键指针
* @param key2 第二个键指针
* @return 相同返回0不同返回非0
*/
int(*key_cmp)(const void* key1, const void* key2);
} hash_table_t;
/**
* @brief 初始化哈希表结构
* @param ht 哈希表实例指针
*
* @warning 必须设置hash_func和key_cmp后才能使用
*/
void init_hashtable(hash_table_t* ht);
/**
* @brief 插入/更新键值对
* @param ht 哈希表实例指针
* @param key 键指针
* @param value 值指针
* @return 被替换的旧值指针无替换返回NULL
*/
void* hashtable_set(hash_table_t* ht, const void* key, void* value);
/**
* @brief 查找键对应值
* @param ht 哈希表实例指针
* @param key 查找键指针
* @return 找到返回值指针未找到返回NULL
*/
void* hashtable_get(hash_table_t* ht, const void* key);
/**
* @brief 删除键值对
* @param ht 哈希表实例指针
* @param key 要删除的键指针
* @return 被删除的值指针不存在返回NULL
*
* @note 实际采用墓碑标记方式删除
*/
void* hashtable_del(hash_table_t* ht, const void* key);
/**
* @brief 销毁哈希表
* @param ht 哈希表实例指针
*
* @note 仅释放哈希表内部内存不会释放key/value内存
*/
void hashtable_destory(hash_table_t* ht);
/**
* @typedef hash_table_iter_func
* @brief 哈希表迭代回调函数类型
* @param key 当前键指针
* @param value 当前值指针
* @param context 用户上下文指针
* @return 返回非0停止迭代
*/
typedef int (*hash_table_iter_func)(const void* key, void* value, void* context);
/**
* @brief 遍历哈希表所有有效条目
* @param ht 哈希表实例指针
* @param iter_func 迭代回调函数
* @param context 用户上下文指针
*/
void hashtable_foreach(hash_table_t* ht, hash_table_iter_func iter_func, void* context);
#endif // __SMCC_HASHTABLE_H__

View File

@@ -1,158 +0,0 @@
/**
* kllist.h is a list implement by linux kernel list
* @link https://njusecourse.feishu.cn/wiki/I8vkw2zkwiEInUkujTJc7zzOnwf
* @link https://kernelnewlbies.org/FAQ/LinkedLists
* @link https://lwn.net/Articles/887097/
* @link https://liuluheng.github.io/wiki/public_html/Embedded-System/kernel/list-and-hlist.html
*/
#ifndef __KLLIST_H__
#define __KLLIST_H__
#ifndef NULL
#define NULL (0)
#define __NULL_KLIST_DEFINED__
#endif
#ifndef container_of
// Magic: https://radek.io/posts/magical-container_of-macro/
// StackOverflow: https://stackoverflow.com/q/15832301/1833118
#ifdef __GNUC__
#define container_of(ptr, type, member) ({ \
const typeof( ((type *)0)->member ) *__mptr = (ptr); \
(type *)( (char *)__mptr - offsetof(type,member) );})
#else
#define container_of(ptr, type, member) ({ \
const void *__mptr = (ptr); \
(type *)( (char *)__mptr - offsetof(type,member) );})
#endif
#endif
/**
* used by list
*/
struct list_head {
struct list_head *next, *prev;
};
/**
* list init
* @example
* 1. struct list_head your_list = LIST_HEAD_INIT(your_list);
* 2. struct list_head your_list; INIT_LIST_HEAD(&your_list);
* 3. LIST_HEAD(your_list); => struct your_list = { &(your_list), &(your_list) };
*/
#define LIST_HEAD_INIT(name) { &(name), &(name) }
static inline void INIT_LIST_HEAD(struct list_head *list) {
list->next = list;
list->prev = list;
}
#define LIST_HEAD(name) \
struct list_head name = LIST_HEAD_INIT(name)
/**
* list add
*/
static inline void __list_add(struct list_head *newl,
struct list_head *prev,
struct list_head *next) {
next->prev = newl;
newl->next = next;
newl->prev = prev;
prev->next = newl;
}
static inline void list_add(struct list_head *newl, struct list_head *head) {
__list_add(newl, head, head->next);
}
static inline void list_add_tail(struct list_head *newl, struct list_head *head) {
__list_add(newl, head->prev, head);
}
/**
* list delete
*/
static inline void __list_del(struct list_head * prev, struct list_head * next) {
next->prev = prev;
prev->next = next;
}
static inline void list_del(struct list_head *entry) {
__list_del(entry->prev, entry->next);
entry->next = NULL;
entry->prev = NULL;
}
/**
* list_is_first -- tests whether @list is the first entry in list @head
* @list: the entry to test
* @head: the head of the list
*/
static inline int list_is_first(const struct list_head *list, const struct list_head *head) {
return list->prev == head;
}
/**
* list_is_last - tests whether @list is the last entry in list @head
* @list: the entry to test
* @head: the head of the list
*/
static inline int list_is_last(const struct list_head *list, const struct list_head *head) {
return list->next == head;
}
/**
* list_is_head - tests whether @list is the list @head
* @list: the entry to test
* @head: the head of the list
*/
static inline int list_is_head(const struct list_head *list, const struct list_head *head) {
return list == head;
}
/**
* list_empty - tests whether a list is empty
* @head: the list to test.
*/
static inline int list_empty(const struct list_head *head) {
return head->next == head;
}
/**
* list_for_each - iterate over a list
* @pos: the &struct list_head to use as a loop cursor.
* @head: the head for your list.
*/
#define list_for_each(pos, head) \
for (pos = (head)->next; !list_is_head(pos, (head)); pos = pos->next)
/**
* list_for_each_prev - iterate over a list backwards
* @pos: the &struct list_head to use as a loop cursor.
* @head: the head for your list.
*/
#define list_for_each_prev(pos, head) \
for (pos = (head)->prev; !list_is_head(pos, (head)); pos = pos->prev)
/**
* list sort
* by linux kernel 6.3.1 /lib/list_sort.c
* it remain use sigle linked list to merge sort
* @link https://www.geeksforgeeks.org/merge-sort-for-linked-list/
*/
#ifdef HAVE_KLIST_SORT
typedef int (*list_cmp_func_t)(void *,
const struct list_head *, const struct list_head *);
static void list_sort(void *priv, struct list_head *head, list_cmp_func_t cmp);
#endif
#if defined(__NULL_KLIST_DEFINED__) && !defined(__NULL_KLIST_DEFINED_NOMOVE__)
#undef NULL
#endif
#endif

View File

@@ -1,202 +0,0 @@
# # vector_gdb.py
# import gdb
# import re
# class VectorPrinter:
# """解析宏定义的 vector 结构体"""
# def __init__(self, val):
# self.val = val
# def check_vector_type(self):
# """验证是否为合法 vector 结构体"""
# try:
# # 检查是否包含 size/cap/data 字段
# return all(self.val.type.has_key(field)
# for field in ['size', 'cap', 'data'])
# except gdb.error:
# return False
# def get_array_view(self):
# """将 data 字段转换为数组视图"""
# if not self.check_vector_type():
# return None
# cap = int(self.val['cap'])
# data_ptr = self.val['data']
# if cap == 0 or data_ptr == 0:
# return []
# # 构造数组类型 (例如 int[cap])
# element_type = data_ptr.type.target()
# array_type = element_type.array(cap - 1) # C 数组声明语法
# return data_ptr.cast(array_type.pointer()).dereference()
# def to_string(self):
# if not self.check_vector_type():
# return "Not a vector type"
# size = self.val['size']
# cap = self.val['cap']
# data = self.get_array_view()
# return (f"vector(size={size}, cap={cap}, data={data})")
# class VectorInfoCommand(gdb.Command):
# """自定义命令:显示 vector 详细信息"""
# def __init__(self):
# super(VectorInfoCommand, self).__init__("vector_info",
# gdb.COMMAND_USER)
# def invoke(self, arg, from_tty):
# val = gdb.parse_and_eval(arg)
# printer = VectorPrinter(val)
# if not printer.check_vector_type():
# print(f"'{arg}' is not a vector structure")
# return
# size = int(val['size'])
# cap = int(val['cap'])
# data = printer.get_array_view()
# # 输出格式化信息
# print(f"Vector {arg}:")
# print(f"├─ Size: {size}")
# print(f"├─ Capacity: {cap}")
# print("└─ Data elements [0..{}]:".format(min(size, cap)-1))
# for i in range(min(size, cap)):
# try:
# print(f" [{i}]: {data[i]}")
# except gdb.MemoryError:
# print(f" [{i}]: <invalid memory>")
# def register_printers():
# """注册自动类型识别"""
# def vector_matcher(val):
# return VectorPrinter(val).check_vector_type()
# # 使用 lambda 包装以动态创建 printer
# gdb.pretty_printers.append(lambda val:
# VectorPrinter(val) if vector_matcher(val) else None)
# # 注册命令和打印机
# VectorInfoCommand()
# register_printers()
# vector_gdb.py
import gdb # type: ignore
from gdb.printing import PrettyPrinter # type: ignore
class VectorPrinter:
"""兼容新旧注册方式的最终方案"""
def __init__(self, val: gdb.Value):
self.val:gdb.Value = val
def check_type(self) -> bool:
"""类型检查(兼容匿名结构体)"""
try:
if self.val.type.code != gdb.TYPE_CODE_STRUCT:
return False
fields = self.val.type.fields()
if not fields:
return False
exp = ['size', 'cap', 'data']
for t in fields:
if t.name in exp:
exp.remove(t.name)
else:
return False
return True
except gdb.error:
return False
def to_string(self):
if not self.check_type():
return "Not a vector"
return "vector({} size={}, cap={})".format(
self.val.address,
self.val['size'],
self.val['cap'],
)
def display_hint(self):
return 'array'
def children(self):
"""生成数组元素(关键改进点)"""
if not self.check_type():
return []
size = int(self.val['size'])
cap = int(self.val['cap'])
data_ptr = self.val['data']
if cap == 0 or data_ptr == 0:
return []
# 使用 GDB 内置数组转换
array = data_ptr.dereference()
array = array.cast(data_ptr.type.target().array(cap - 1))
for i in range(size):
# state = "<used>" if i < size else "<unused>"
try:
value = array[i]
yield (f"[{i}] {value.type} {value.address}", value)
except gdb.MemoryError:
yield (f"[{i}]", "<invalid>")
# 注册方式一传统append方法您之前有效的方式self
def append_printer():
gdb.pretty_printers.append(
lambda val: VectorPrinter(val) if VectorPrinter(val).check_type() else None
)
# 注册方式二:新版注册方法(备用方案)
def register_new_printer():
class VectorPrinterLocator(PrettyPrinter):
def __init__(self):
super().__init__("vector_printer")
def __call__(self, val):
ret = VectorPrinter(val).check_type()
print(f"ret {ret}, type {val.type}, {[(i.name, i.type) for i in val.type.fields()]}")
return None
gdb.printing.register_pretty_printer(
gdb.current_objfile(),
VectorPrinterLocator()
)
# 双重注册保证兼容性
append_printer() # 保留您原来有效的方式
# register_new_printer() # 添加新版注册
class VectorInfoCommand(gdb.Command):
"""保持原有命令不变"""
def __init__(self):
super().__init__("vector_info", gdb.COMMAND_USER)
def invoke(self, arg, from_tty):
val = gdb.parse_and_eval(arg)
printer = VectorPrinter(val)
if not printer.check_type():
print("Invalid vector")
return
print("=== Vector Details ===")
print("Size:", val['size'])
print("Capacity:", val['cap'])
print("Elements:")
for name, value in printer.children():
print(f" {name}: {value}")
VectorInfoCommand()

View File

@@ -1,116 +0,0 @@
/**
* @file vector.h
* @brief 动态数组Vector实现
*
* 提供类型安全的动态数组容器实现,支持自动扩容和基本操作
*/
#ifndef __SMCC_DS_VECTOR_H__
#define __SMCC_DS_VECTOR_H__
#include <lib/rt/rt.h>
/** @defgroup vector_struct 数据结构定义 */
/**
* @def VECTOR_HEADER(name, type)
* @brief 声明向量结构体
* @param name 结构体变量名
* @param type 存储的数据类型
*
* 生成包含size/cap/data三个字段的结构体定义
* - size: 当前元素数量
* - cap: 数组容量
* - data: 存储数组指针
*/
#define VECTOR_HEADER(name, type) \
struct { \
rt_size_t size; /**< 当前元素数量 */ \
rt_size_t cap; /**< 数组容量 */ \
type *data; /**< 数据存储指针 */ \
} name
/** @defgroup vector_operations 向量操作宏 */
/**
* @def vector_init(vec)
* @brief 初始化向量结构体
* @param vec 要初始化的向量结构体变量
*
* @note 此宏不会分配内存,仅做零初始化
*/
#define vector_init(vec) \
do { \
(vec).size = 0, \
(vec).cap = 0, \
(vec).data = 0; \
} while(0)
/**
* @def vector_push(vec, value)
* @brief 添加元素到向量末尾
* @param vec 目标向量结构体
* @param value 要添加的值(需匹配存储类型)
*
* @note 当容量不足时自动扩容为2倍初始容量为8
* @warning 内存分配失败时会触发LOG_FATAL
*/
#define vector_push(vec, value) \
do { \
if (vec.size >= vec.cap) { \
int cap = vec.cap ? vec.cap * 2 : 8; \
void* data = salloc_realloc(vec.data, cap * sizeof(*vec.data)); \
if (!data) { \
LOG_FATAL("vector_push: rt_realloc failed\n"); \
} \
(vec).cap = cap; \
(vec).data = data; \
} \
(vec).data[(vec).size++] = value; \
} while(0)
/**
* @def vector_pop(vec)
* @brief 弹出最后一个元素
* @param vec 目标向量结构体
* @return 最后元素的引用
* @warning 需确保size > 0时使用
*/
#define vector_pop(vec) \
((vec).data[--(vec).size])
/**
* @def vector_at(vec, idx)
* @brief 获取指定索引元素
* @param vec 目标向量结构体
* @param idx 元素索引0 <= idx < size
* @return 对应元素的引用
*/
#define vector_at(vec, idx) \
(((vec).data)[idx])
/**
* @def vector_idx(vec, ptr)
* @brief 获取元素指针对应的索引
* @param vec 目标向量结构体
* @param ptr 元素指针需在data数组范围内
* @return 元素索引值
*/
#define vector_idx(vec, ptr) \
((ptr) - (vec).data)
/**
* @def vector_free(vec)
* @brief 释放向量内存
* @param vec 目标向量结构体
*
* @note 释放后需重新初始化才能再次使用
*/
#define vector_free(vec) \
do { \
salloc_free((vec).data); \
(vec).data = NULL; \
(vec).size = (vec).cap = 0; \
} while(0)
#endif // __SMCC_DS_VECTOR_H__

View File

View File

@@ -1,32 +0,0 @@
#include "strpool.h"
void init_strpool(strpool_t* pool) {
lalloc_init(&pool->stralloc);
pool->ht.hash_func = (u32_t(*)(const void*))rt_strhash;
pool->ht.key_cmp = (int(*)(const void*, const void*))rt_strcmp;
init_hashtable(&pool->ht);
}
const char* strpool_intern(strpool_t* pool, const char* str) {
void* existing = hashtable_get(&pool->ht, str);
if (existing) {
return existing;
}
rt_size_t len = rt_strlen(str) + 1;
char* new_str = lalloc_alloc(&pool->stralloc, len);
if (!new_str) {
LOG_ERROR("strpool: Failed to allocate memory for string");
return NULL;
}
rt_memcpy(new_str, str, len);
hashtable_set(&pool->ht, new_str, new_str);
return new_str;
}
void strpool_destroy(strpool_t* pool) {
hashtable_destory(&pool->ht);
lalloc_destroy(&pool->stralloc);
}

View File

@@ -1,54 +0,0 @@
/**
* @file strpool.h
* @brief 字符串池实现
*
* 提供字符串驻留String Interning功能保证相同字符串的唯一性存储
*/
#ifndef __SMCC_STRPOOL_H__
#define __SMCC_STRPOOL_H__
#include <lib/core.h>
#include <lib/rt/rt_alloc.h>
#include <lib/utils/ds/hashtable.h>
/**
* @struct strpool_t
* @brief 字符串池上下文
*
* 组合哈希表和专用内存分配器实现的高效字符串存储池
*/
typedef struct strpool {
hash_table_t ht; /**< 哈希表用于快速查找已存储字符串 */
long_alloc_t stralloc; /**< 长块分配器优化小字符串内存管理 */
} strpool_t;
/**
* @brief 初始化字符串池
* @param pool 字符串池实例指针
*
* @warning 使用前需确保 hashtable 的 hash_func 和 key_cmp 已正确设置
*/
void init_strpool(strpool_t* pool);
/**
* @brief 驻留字符串到池中
* @param pool 字符串池实例指针
* @param str 要驻留的 C 字符串
* @return 池中唯一字符串的持久指针
*
* @note 返回值生命周期与字符串池一致
* @note 重复插入相同字符串会返回已有指针
*/
const char* strpool_intern(strpool_t* pool, const char* str);
/**
* @brief 销毁字符串池
* @param pool 字符串池实例指针
*
* @warning 销毁后已获取的字符串指针将失效
* @note 会自动释放所有驻留字符串内存
*/
void strpool_destroy(strpool_t* pool);
#endif // __SMCC_STRPOOL_H__

View File

@@ -1,19 +0,0 @@
// #include <lib/rt/rt.h>
// #include "token.h"
// #define ROUND_IDX(idx) ((idx) % tokbuf->cap)
// #define POW2(x) (1 << (x))
// void init_toks(tok_stream_t* tokbuf, int cap,
// tok_stream_close_func close, tok_stream_get_func gettok, void* stream)
// {
// tokbuf->cap_mask = POW2(cap) - 1;
// // tokbuf->buf =
// }
// int toks_next( tok_stream_t* toks, tok_t* out);
// int toks_peek( tok_stream_t* toks, tok_t* out, int lookahead);
// const tok_t* toks_peek_ref(tok_stream_t* toks, int lookahead);
// int toks_reset(tok_stream_t* toks);
// int toks_seek( tok_stream_t* toks, int pos);
// int toks_close(tok_stream_t* toks);

View File

@@ -1,75 +0,0 @@
#ifndef __SMCC_TOKBUF_H__
#define __SMCC_TOKBUF_H__
#include <lib/rt/rt.h>
typedef struct loc {
const char *fname;
int line;
int col;
int len;
} loc_t;
typedef enum tok_basic_type {
TK_BASIC_INVALID, // 错误占位
TK_BASIC_KEYWORD, // 关键字
TK_BASIC_OPERATOR, // 操作符
TK_BASIC_IDENTIFIER, // 标识符
TK_BASIC_LITERAL, // 字面量
TK_BASIC_WHITESPACE, // 空白
TK_BASIC_COMMENT, // 注释
TK_BASIC_EOF // 结束标记
} tok_basic_type_t;
typedef union ctype {
u8_t u8;
u16_t u16;
u32_t u32;
u64_t u64;
i8_t i8;
i16_t i16;
i32_t i32;
i64_t i64;
f32_t f32;
f64_t f64;
iptr_t iptr;
uptr_t uptr;
void* ptr;
char ch;
int i;
// MUST BE strpool ptr
const char* str;
} ctype_t;
typedef struct tok {
tok_basic_type_t type;
int sub_type;
loc_t loc;
ctype_t val;
} tok_t;
// typedef void(*tok_stream_close_func)(void* stream);
// typedef void(*tok_stream_get_func)(void* stream, tok_t* token);
// typedef struct tok_stream {
// int cur;
// int end;
// int cap_mask;
// tok_t* buf;
// void* stream;
// tok_stream_close_func close;
// tok_stream_get_func gettok;
// } tok_stream_t;
// void init_toks(tok_stream_t* tokbuf, int cap,
// tok_stream_close_func close, tok_stream_get_func gettok, void* stream);
// int toks_next( tok_stream_t* toks, tok_t* out);
// int toks_peek( tok_stream_t* toks, tok_t* out, int lookahead);
// const tok_t* toks_peek_unsafe(tok_stream_t* toks, int lookahead);
// int toks_reset(tok_stream_t* toks);
// int toks_seek( tok_stream_t* toks, int pos);
// int toks_close(tok_stream_t* toks);
#endif // __SMCC_TOKEN_H__

View File

@@ -1,7 +0,0 @@
#ifndef __SMCC_LIB_UTILS_H__
#define __SMCC_LIB_UTILS_H__
#include "strpool/strpool.h"
#include "tokbuf/tokbuf.h"
#endif

9
libs/README.md Normal file
View File

@@ -0,0 +1,9 @@
lexer 词法分析
parse 语法分析
ast 抽象语法树
sema 语义分析
ir 中间代码标识
opt 优化器
codegen 代码生成
target 目标平台支持

6
libs/lexer/cbuild.toml Normal file
View File

@@ -0,0 +1,6 @@
[package]
name = "smcc_lex"
dependencies = [
{ name = "libcore", path = "../../runtime/libcore" },
]

View File

@@ -0,0 +1,62 @@
/**
* @file lexer.h
* @brief C语言词法分析器核心数据结构与接口
*/
#ifndef __SMCC_CC_LEXER_H__
#define __SMCC_CC_LEXER_H__
#include <libcore.h>
#include "lexer_stream.h"
#include "lexer_token.h"
typedef struct lexer_loc {
const char *name;
usize name_len;
usize line;
usize column;
usize offset;
} lexer_loc_t;
typedef struct lexer_token {
token_type_t type;
core_cvalue_t value;
lexer_loc_t loc;
} lexer_tok_t;
/**
* @brief 词法分析器核心结构体
*
* 封装词法分析所需的状态信息和缓冲区管理
*/
typedef struct cc_lexer {
lexer_stream_t* stream;
lexer_loc_t pos;
} smcc_lexer_t;
/**
* @brief 初始化词法分析器
* @param[out] lexer 要初始化的词法分析器实例
* @param[in] stream 输入流对象指针
*/
void lexer_init(smcc_lexer_t* lexer, lexer_stream_t* stream);
/**
* @brief 获取原始token
* @param[in] lexer 词法分析器实例
* @param[out] token 输出token存储位置
*
* 此函数会返回所有类型的token包括空白符等无效token
*/
void lexer_get_token(smcc_lexer_t* lexer, lexer_tok_t* token);
/**
* @brief 获取有效token
* @param[in] lexer 词法分析器实例
* @param[out] token 输出token存储位置
*
* 此函数会自动跳过空白符等无效token返回对语法分析有意义的token
*/
void lexer_get_valid_token(smcc_lexer_t* lexer, lexer_tok_t* token);
#endif

View File

@@ -0,0 +1,48 @@
#ifndef __SMCC_LEXER_LOG_H__
#define __SMCC_LEXER_LOG_H__
#include <libcore.h>
#ifndef LEX_LOG_LEVEL
#define LEX_LOG_LEVEL 4
#endif
#if LEX_LOG_LEVEL <= 1
#define LEX_NOTSET( fmt, ...) MLOG_NOTSET(&__smcc_lexer_log, fmt, ##__VA_ARGS__)
#else
#define LEX_NOTSET( fmt, ...)
#endif
#if LEX_LOG_LEVEL <= 2
#define LEX_DEBUG( fmt, ...) MLOG_DEBUG(&__smcc_lexer_log, fmt, ##__VA_ARGS__)
#else
#define LEX_DEBUG( fmt, ...)
#endif
#if LEX_LOG_LEVEL <= 3
#define LEX_INFO( fmt, ...) MLOG_INFO(&__smcc_lexer_log, fmt, ##__VA_ARGS__)
#else
#define LEX_INFO( fmt, ...)
#endif
#if LEX_LOG_LEVEL <= 4
#define LEX_WARN( fmt, ...) MLOG_WARN(&__smcc_lexer_log, fmt, ##__VA_ARGS__)
#else
#define LEX_WARN( fmt, ...)
#endif
#if LEX_LOG_LEVEL <= 5
#define LEX_ERROR( fmt, ...) MLOG_ERROR(&__smcc_lexer_log, fmt, ##__VA_ARGS__)
#else
#define LEX_ERROR( fmt, ...)
#endif
#if LEX_LOG_LEVEL <= 6
#define LEX_FATAL( fmt, ...) MLOG_FATAL(&__smcc_lexer_log, fmt, ##__VA_ARGS__)
#else
#define LEX_FATAL( fmt, ...)
#endif
extern logger_t __smcc_lexer_log;
#endif // __SMCC_LEXER_LOG_H__

View File

@@ -0,0 +1,37 @@
#include <core_type.h>
typedef struct lexer_stream lexer_stream_t;
#define lexer_stream_eof (-1)
struct lexer_stream {
const char* name;
usize name_len;
/// @brief 读取指定数量的字符到缓冲区
usize (*read_buf)(lexer_stream_t* stream, char* buffer, usize count);
/// @brief 获取下一个字符
int (*peek_char)(lexer_stream_t* stream);
/// @brief 重置字符流位置
void (*reset_char) (lexer_stream_t* stream);
/// @brief 读取并消费下一个字符(移动流位置)
int (*next_char)(lexer_stream_t* stream);
/// @brief 释放资源
void (*free_stream) (lexer_stream_t* steam);
};
#ifndef __SMCC_LEXER_NO_MEM_STREAM__
typedef struct lexer_mem_stream {
lexer_stream_t stream;
const char* data;
usize data_length;
usize curr_pos;
usize peek_pos;
cbool owned;
} lexer_mem_stream_t;
lexer_stream_t* lexer_mem_stream_init(lexer_mem_stream_t* stream, const char* data, usize length, cbool need_copy);
#endif

View File

@@ -0,0 +1,137 @@
#ifndef __SMCC_CC_TOKEN_H__
#define __SMCC_CC_TOKEN_H__
#include <libcore.h>
typedef enum ckeyword {
CSTD_C89,
CSTD_C99,
CEXT_ASM,
} ckeyword_t;
// Using Binary Search To Fast Find Keyword
#define KEYWORD_TABLE \
X(asm , TK_BASIC_KEYWORD , TOKEN_ASM , CEXT_ASM) \
X(break , TK_BASIC_KEYWORD , TOKEN_BREAK , CSTD_C89) \
X(case , TK_BASIC_KEYWORD , TOKEN_CASE , CSTD_C89) \
X(char , TK_BASIC_KEYWORD , TOKEN_CHAR , CSTD_C89) \
X(const , TK_BASIC_KEYWORD , TOKEN_CONST , CSTD_C89) \
X(continue , TK_BASIC_KEYWORD , TOKEN_CONTINUE , CSTD_C89) \
X(default , TK_BASIC_KEYWORD , TOKEN_DEFAULT , CSTD_C89) \
X(do , TK_BASIC_KEYWORD , TOKEN_DO , CSTD_C89) \
X(double , TK_BASIC_KEYWORD , TOKEN_DOUBLE , CSTD_C89) \
X(else , TK_BASIC_KEYWORD , TOKEN_ELSE , CSTD_C89) \
X(enum , TK_BASIC_KEYWORD , TOKEN_ENUM , CSTD_C89) \
X(extern , TK_BASIC_KEYWORD , TOKEN_EXTERN , CSTD_C89) \
X(float , TK_BASIC_KEYWORD , TOKEN_FLOAT , CSTD_C89) \
X(for , TK_BASIC_KEYWORD , TOKEN_FOR , CSTD_C89) \
X(goto , TK_BASIC_KEYWORD , TOKEN_GOTO , CSTD_C89) \
X(if , TK_BASIC_KEYWORD , TOKEN_IF , CSTD_C89) \
X(inline , TK_BASIC_KEYWORD , TOKEN_INLINE , CSTD_C99) \
X(int , TK_BASIC_KEYWORD , TOKEN_INT , CSTD_C89) \
X(long , TK_BASIC_KEYWORD , TOKEN_LONG , CSTD_C89) \
X(register , TK_BASIC_KEYWORD , TOKEN_REGISTER , CSTD_C89) \
X(restrict , TK_BASIC_KEYWORD , TOKEN_RESTRICT , CSTD_C99) \
X(return , TK_BASIC_KEYWORD , TOKEN_RETURN , CSTD_C89) \
X(short , TK_BASIC_KEYWORD , TOKEN_SHORT , CSTD_C89) \
X(signed , TK_BASIC_KEYWORD , TOKEN_SIGNED , CSTD_C89) \
X(sizeof , TK_BASIC_KEYWORD , TOKEN_SIZEOF , CSTD_C89) \
X(static , TK_BASIC_KEYWORD , TOKEN_STATIC , CSTD_C89) \
X(struct , TK_BASIC_KEYWORD , TOKEN_STRUCT , CSTD_C89) \
X(switch , TK_BASIC_KEYWORD , TOKEN_SWITCH , CSTD_C89) \
X(typedef , TK_BASIC_KEYWORD , TOKEN_TYPEDEF , CSTD_C89) \
X(union , TK_BASIC_KEYWORD , TOKEN_UNION , CSTD_C89) \
X(unsigned , TK_BASIC_KEYWORD , TOKEN_UNSIGNED , CSTD_C89) \
X(void , TK_BASIC_KEYWORD , TOKEN_VOID , CSTD_C89) \
X(volatile , TK_BASIC_KEYWORD , TOKEN_VOLATILE , CSTD_C89) \
X(while , TK_BASIC_KEYWORD , TOKEN_WHILE , CSTD_C89) \
// KEYWORD_TABLE
#define TOKEN_TABLE \
X(unknown , TK_BASIC_INVALID, TOKEN_UNKNOWN ) \
X(EOF , TK_BASIC_EOF, TOKEN_EOF ) \
X(blank , TK_BASIC_EMPTYSPACE, TOKEN_BLANK ) \
X("==" , TK_BASIC_OPERATOR, TOKEN_EQ ) \
X("=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN ) \
X("++" , TK_BASIC_OPERATOR, TOKEN_ADD_ADD ) \
X("+=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_ADD ) \
X("+" , TK_BASIC_OPERATOR, TOKEN_ADD ) \
X("--" , TK_BASIC_OPERATOR, TOKEN_SUB_SUB ) \
X("-=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_SUB ) \
X("->" , TK_BASIC_OPERATOR, TOKEN_DEREF ) \
X("-" , TK_BASIC_OPERATOR, TOKEN_SUB ) \
X("*=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_MUL ) \
X("*" , TK_BASIC_OPERATOR, TOKEN_MUL ) \
X("/=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_DIV ) \
X("/" , TK_BASIC_OPERATOR, TOKEN_DIV ) \
X("//" , TK_BASIC_COMMENT , TOKEN_LINE_COMMENT ) \
X("/* */" , TK_BASIC_COMMENT , TOKEN_BLOCK_COMMENT ) \
X("%=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_MOD ) \
X("%" , TK_BASIC_OPERATOR, TOKEN_MOD ) \
X("&&" , TK_BASIC_OPERATOR, TOKEN_AND_AND ) \
X("&=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_AND ) \
X("&" , TK_BASIC_OPERATOR, TOKEN_AND ) \
X("||" , TK_BASIC_OPERATOR, TOKEN_OR_OR ) \
X("|=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_OR ) \
X("|" , TK_BASIC_OPERATOR, TOKEN_OR ) \
X("^=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_XOR ) \
X("^" , TK_BASIC_OPERATOR, TOKEN_XOR ) \
X("<<=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_L_SH ) \
X("<<" , TK_BASIC_OPERATOR, TOKEN_L_SH ) \
X("<=" , TK_BASIC_OPERATOR, TOKEN_LE ) \
X("<" , TK_BASIC_OPERATOR, TOKEN_LT ) \
X(">>=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_R_SH ) \
X(">>" , TK_BASIC_OPERATOR, TOKEN_R_SH ) \
X(">=" , TK_BASIC_OPERATOR, TOKEN_GE ) \
X(">" , TK_BASIC_OPERATOR, TOKEN_GT ) \
X("!" , TK_BASIC_OPERATOR, TOKEN_NOT ) \
X("!=" , TK_BASIC_OPERATOR, TOKEN_NEQ ) \
X("~" , TK_BASIC_OPERATOR, TOKEN_BIT_NOT ) \
X("[" , TK_BASIC_OPERATOR, TOKEN_L_BRACKET ) \
X("]" , TK_BASIC_OPERATOR, TOKEN_R_BRACKET ) \
X("(" , TK_BASIC_OPERATOR, TOKEN_L_PAREN ) \
X(")" , TK_BASIC_OPERATOR, TOKEN_R_PAREN ) \
X("{" , TK_BASIC_OPERATOR, TOKEN_L_BRACE ) \
X("}" , TK_BASIC_OPERATOR, TOKEN_R_BRACE ) \
X(";" , TK_BASIC_OPERATOR, TOKEN_SEMICOLON ) \
X("," , TK_BASIC_OPERATOR, TOKEN_COMMA ) \
X(":" , TK_BASIC_OPERATOR, TOKEN_COLON ) \
X("." , TK_BASIC_OPERATOR, TOKEN_DOT ) \
X("..." , TK_BASIC_OPERATOR, TOKEN_ELLIPSIS ) \
X("?" , TK_BASIC_OPERATOR, TOKEN_COND ) \
X(ident , TK_BASIC_IDENTIFIER, TOKEN_IDENT ) \
X(int_literal , TK_BASIC_LITERAL, TOKEN_INT_LITERAL ) \
X(float_literal , TK_BASIC_LITERAL, TOKEN_FLOAT_LITERAL ) \
X(char_literal , TK_BASIC_LITERAL, TOKEN_CHAR_LITERAL ) \
X(string_literal , TK_BASIC_LITERAL, TOKEN_STRING_LITERAL ) \
// END
// 定义TokenType枚举
typedef enum cc_tktype {
// 处理普通token
#define X(str, subtype, tok) tok,
TOKEN_TABLE
#undef X
// 处理关键字(保持原有格式)
#define X(name, subtype, tok, std) tok,
KEYWORD_TABLE
#undef X
} token_type_t;
typedef enum token_subtype {
TK_BASIC_INVALID, // 错误占位
TK_BASIC_KEYWORD, // 关键字
TK_BASIC_OPERATOR, // 操作符
TK_BASIC_IDENTIFIER, // 标识符
TK_BASIC_LITERAL, // 字面量
TK_BASIC_EMPTYSPACE, // 空白
TK_BASIC_COMMENT, // 注释
TK_BASIC_EOF // 结束标记
} token_subtype_t;
token_subtype_t get_tok_subtype(token_type_t type);
const char* get_tok_name(token_type_t type);
#endif

637
libs/lexer/src/lexer.c Normal file
View File

@@ -0,0 +1,637 @@
/**
* 仿照LCCompiler的词法分析部分
*
* 如下为LCC的README in 2025.2
This hierarchy is the distribution for lcc version 4.2.
lcc version 3.x is described in the book "A Retargetable C Compiler:
Design and Implementation" (Addison-Wesley, 1995, ISBN 0-8053-1670-1).
There are significant differences between 3.x and 4.x, most notably in
the intermediate code. For details, see
https://drh.github.io/lcc/documents/interface4.pdf.
VERSION 4.2 IS INCOMPATIBLE WITH EARLIER VERSIONS OF LCC. DO NOT
UNLOAD THIS DISTRIBUTION ON TOP OF A 3.X DISTRIBUTION.
LCC is a C89 ("ANSI C") compiler designed to be highly retargetable.
LOG describes the changes since the last release.
CPYRIGHT describes the conditions under you can use, copy, modify, and
distribute lcc or works derived from lcc.
doc/install.html is an HTML file that gives a complete description of
the distribution and installation instructions.
Chris Fraser / cwf@aya.yale.edu
David Hanson / drh@drhanson.net
*/
#include <lexer_log.h>
#include <lexer.h>
static const struct {
const char* name;
ckeyword_t std_type;
token_type_t tok;
} keywords[] = {
#define X(name, subtype, tok, std_type,...) { #name, std_type, tok },
KEYWORD_TABLE
#undef X
};
// by using binary search to find the keyword
static inline int keyword_cmp(const char* name, int len) {
int low = 0;
int high = sizeof(keywords) / sizeof(keywords[0]) - 1;
while (low <= high) {
int mid = (low + high) / 2;
const char *key = keywords[mid].name;
int cmp = 0;
// 自定义字符串比较逻辑
for (int i = 0; i < len; i++) {
if (name[i] != key[i]) {
cmp = (unsigned char)name[i] - (unsigned char)key[i];
break;
}
if (name[i] == '\0') break; // 遇到终止符提前结束
}
if (cmp == 0) {
// 完全匹配检查(长度相同)
if (key[len] == '\0') return mid;
cmp = -1; // 当前关键词比输入长
}
if (cmp < 0) {
high = mid - 1;
} else {
low = mid + 1;
}
}
return -1; // Not a keyword.
}
void lexer_init(smcc_lexer_t* lexer, lexer_stream_t* stream) {
lexer->stream = stream;
lexer->pos = (lexer_loc_t) {
.name = stream->name,
.name_len = stream->name_len,
.line = 1,
.column = 1,
.offset = 0,
};
}
#define stream_reset_char(stream) ((stream)->reset_char(stream))
#define stream_next_char(stream) ((stream)->next_char(stream))
#define stream_peek_char(stream) ((stream)->peek_char(stream))
#define lexer_next_pos(lexer) ((lexer)->pos.column ++, (lexer)->pos.offset ++)
#define lexer_next_line(lexer) ((lexer)->pos.line ++, (lexer)->pos.column = 1)
#define set_err_token(token) ((token)->type = TOKEN_UNKNOWN)
static void skip_newline(smcc_lexer_t* lexer, lexer_tok_t* token) {
lexer_stream_t* stream = lexer->stream;
token->type = TOKEN_LINE_COMMENT;
// 循环直到遇到换行符或文件结束
while (1) {
int ch = stream_next_char(stream);
if (ch == lexer_stream_eof) {
// 到达文件末尾,直接返回
return;
}
// 更新位置信息
lexer_next_pos(lexer);
if (ch == '\n') {
// 遇到换行符,增加行号并重置列号
lexer_next_line(lexer);
return;
}
}
}
static void skip_block_comment(smcc_lexer_t* lexer, lexer_tok_t* token) {
lexer_stream_t* stream = lexer->stream;
token->type = TOKEN_BLOCK_COMMENT;
int ch;
stream_reset_char(stream);
ch = stream_next_char(stream);
lexer_next_pos(lexer);
// FIXME Assertion
Assert (ch == '/');
ch = stream_next_char(stream);
lexer_next_pos(lexer);
Assert (ch == '*');
// 我们已经识别了 "/*",现在需要找到 "*/"
while (1) {
ch = stream_next_char(stream);
lexer_next_pos(lexer);
if (ch == lexer_stream_eof) {
// 未闭合的块注释
LEX_WARN("Unterminated block comment");
return;
}
// LEX_ERROR("%c", ch);
// 更新位置信息
if (ch == '\n') {
lexer_next_line(lexer);
} else if (ch == '*') {
// 查看下一个字符是否是 '/'
int next_ch = stream_peek_char(stream);
if (next_ch == '/') {
// 消费 '/' 字符
stream_next_char(stream);
// 更新位置信息
lexer_next_pos(lexer);
// 成功找到注释结束标记
return;
}
}
}
}
// TODO escape character not enough
static inline int got_slash(int peek) {
switch (peek) {
case '\\': return '\\';
case '\'': return '\'';
case '\"': return '\"';
case '\?': return '\?';
case '0': return '\0';
case 'b': return '\b';
case 'f': return '\f';
case 'n': return '\n';
case 'r': return '\r';
case 't': return '\t';
case 'v': return '\v';
default: break;
}
return -1;
}
static void parse_char(smcc_lexer_t* lexer, lexer_tok_t* token) {
token->loc = lexer->pos;
token->type = TOKEN_CHAR_LITERAL;
lexer_stream_t *stream = lexer->stream;
stream_reset_char(stream);
int ch = stream_peek_char(stream);
if (ch == lexer_stream_eof) {
LEX_WARN("Unexpected EOF at begin");
goto ERR;
} else if (ch != '\'') {
LEX_WARN("Unexpected character '%c' at begin", ch);
goto ERR;
}
stream_next_char(stream);
lexer_next_pos(lexer);
ch = stream_next_char(stream);
lexer_next_pos(lexer);
if (ch == lexer_stream_eof) {
LEX_WARN("Unexpected EOF at middle");
goto ERR;
} else if (ch == '\\') {
ch = stream_next_char(stream);
lexer_next_pos(lexer);
if ((ch = got_slash(ch)) == -1) {
LEX_ERROR("Invalid escape character");
// TODO 特殊情况处理
goto ERR;
}
token->value.ch = ch;
} else {
token->value.ch = ch;
}
if ((ch = stream_next_char(stream)) != '\'') {
LEX_ERROR("Unclosed character literal '%c' at end, expect `'`", ch);
lexer_next_pos(lexer);
goto ERR;
}
return;
ERR:
set_err_token(token);
}
static void parse_string(smcc_lexer_t* lexer, lexer_tok_t* token) {
token->loc = lexer->pos;
token->type = TOKEN_STRING_LITERAL;
lexer_stream_t *stream = lexer->stream;
stream_reset_char(stream);
int ch = stream_peek_char(stream);
if (ch == lexer_stream_eof) {
LEX_WARN("Unexpected EOF at begin");
goto ERR;
} else if (ch != '"') {
LEX_WARN("Unexpected character '%c' at begin", ch);
goto ERR;
}
stream_next_char(stream);
lexer_next_pos(lexer);
int base = 0;
cstring_t str = cstring_new();
while (1) {
ch = stream_peek_char(stream);
if (ch == lexer_stream_eof) {
LEX_ERROR("Unexpected EOF at string literal");
break;
} else if (ch == '\n') {
LEX_ERROR("Unexpected newline at string literal");
break;
} else if (ch == '\\') {
// TODO bad practice and maybe bugs here
stream_next_char(stream);
ch = stream_next_char(stream);
int val = got_slash(ch);
if (val == -1) {
LEX_ERROR("Invalid escape character it is \\%c [%d]", ch, ch);
} else {
cstring_push(&str, val);
continue;
}
} else if (ch == '"') {
stream_next_char(stream);
lexer_next_pos(lexer);
break;
}
stream_next_char(stream);
lexer_next_pos(lexer);
cstring_push(&str, ch);
}
token->value.cstr.data = (char*)cstring_as_cstr(&str);
token->value.cstr.len = cstring_len(&str);
return;
ERR:
set_err_token(token);
}
static void parse_number(smcc_lexer_t* lexer, lexer_tok_t* token) {
token->loc = lexer->pos;
lexer_stream_t *stream = lexer->stream;
stream_reset_char(stream);
int ch = stream_peek_char(stream);
int base = 0;
if (ch == lexer_stream_eof) {
LEX_WARN("Unexpected EOF at begin");
goto ERR;
} else if (ch == '0') {
ch = stream_peek_char(stream);
if (ch == 'x' || ch == 'X') {
base = 16;
stream_next_char(stream);
lexer_next_pos(lexer);
stream_next_char(stream);
lexer_next_pos(lexer);
} else if (ch == 'b' || ch == 'B') {
// FIXME C23 external integer base
base = 2;
stream_next_char(stream);
lexer_next_pos(lexer);
stream_next_char(stream);
lexer_next_pos(lexer);
} else if (ch >= '0' && ch <= '7') {
base = 8;
stream_next_char(stream);
lexer_next_pos(lexer);
} else {
base = 10;
}
} else {
base = 10;
}
// 解析整数部分
stream_reset_char(stream);
int tmp = 0;
token->value.n = 0;
while (1) {
ch = stream_peek_char(stream);
if (ch == lexer_stream_eof) {
break;
} else if (ch >= 'a' && ch <= 'z') {
tmp = ch - 'a' + 10;
} else if (ch >= 'A' && ch <= 'Z') {
tmp = ch - 'A' + 10;
} else if (ch >= '0' && ch <= '9') {
tmp = ch - '0';
} else {
break;
}
if (tmp >= base) {
LOG_ERROR("Invalid digit");
break;
}
stream_next_char(stream);
lexer_next_pos(lexer);
token->value.n = token->value.n * base + tmp;
// TODO number overflow
}
token->type = TOKEN_INT_LITERAL;
return;
ERR:
set_err_token(token);
}
static void parse_line(smcc_lexer_t* lexer, lexer_tok_t* token) {
token->loc = lexer->pos;
lexer_stream_t *stream = lexer->stream;
stream_reset_char(stream);
int ch = stream_peek_char(stream);
if (ch == lexer_stream_eof) {
LEX_WARN("Unexpected EOF at begin");
goto ERR;
} else if (ch != '#') {
LEX_WARN("Unexpected character '%c' at begin", ch);
goto ERR;
}
const char line[] = "line";
for (int i = 0; i < sizeof(line); i++) {
ch = stream_next_char(stream);
lexer_next_pos(lexer);
if (ch != line[i]) {
LEX_WARN("Maroc does not support in lexer rather in preprocessor, it will be ignored");
skip_newline(lexer, token);
goto SKIP_LINE;
}
}
parse_number(lexer, token);
if (token->type != TOKEN_INT_LITERAL) {
LEX_ERROR("Invalid line number");
goto SKIP_LINE;
}
if (stream_next_char(stream) != ' ') {
skip_newline(lexer, token);
token->loc.line = token->value.n;
}
if (stream_peek_char(stream) != '"') {
LEX_ERROR("Invalid `#` line");
goto SKIP_LINE;
}
parse_string(lexer, token);
if (token->type != TOKEN_STRING_LITERAL) {
LEX_ERROR("Invalid filename");
goto SKIP_LINE;
}
skip_newline(lexer, token);
token->loc.line = token->value.n;
// FIXME memory leak
token->loc.name = cstring_as_cstr((const cstring_t *)&token->value.cstr);
token->loc.name_len = cstring_len((const cstring_t *)&token->value.cstr);
return;
SKIP_LINE:
skip_newline(lexer, token);
ERR:
set_err_token(token);
}
// /zh/c/language/operator_arithmetic.html
void lexer_get_token(smcc_lexer_t* lexer, lexer_tok_t* token) {
token->loc = lexer->pos;
token->type = TOKEN_UNKNOWN;
lexer_stream_t *stream = lexer->stream;
stream_reset_char(stream);
token_type_t type = TOKEN_UNKNOWN;
int ch = stream_peek_char(stream);
// once step
switch (ch) {
case '=':
switch (stream_peek_char(stream)) {
case '=': type = TOKEN_EQ; goto double_char;
default: stream_reset_char(stream), type = TOKEN_ASSIGN; break;
} break;
case '+':
switch (stream_peek_char(stream)) {
case '+': type = TOKEN_ADD_ADD; goto double_char;
case '=': type = TOKEN_ASSIGN_ADD; goto double_char;
default: stream_reset_char(stream), type = TOKEN_ADD; break;
} break;
case '-':
switch (stream_peek_char(stream)) {
case '-': type = TOKEN_SUB_SUB; goto double_char;
case '=': type = TOKEN_ASSIGN_SUB; goto double_char;
case '>': type = TOKEN_DEREF; goto double_char;
default: stream_reset_char(stream), type = TOKEN_SUB; break;
} break;
case '*':
switch (stream_peek_char(stream)) {
case '=': type = TOKEN_ASSIGN_MUL; goto double_char;
default: stream_reset_char(stream), type = TOKEN_MUL; break;
} break;
case '/':
switch (stream_peek_char(stream)) {
case '=': type = TOKEN_ASSIGN_DIV; goto double_char;
case '/': skip_newline(lexer, token); goto END;
case '*': skip_block_comment(lexer, token); goto END;
default: stream_reset_char(stream), type = TOKEN_DIV; break;
} break;
case '%':
switch (stream_peek_char(stream)) {
case '=': type = TOKEN_ASSIGN_MOD; goto double_char;
default: stream_reset_char(stream), type = TOKEN_MOD; break;
} break;
case '&':
switch (stream_peek_char(stream)) {
case '&': type = TOKEN_AND_AND; goto double_char;
case '=': type = TOKEN_ASSIGN_AND; goto double_char;
default: stream_reset_char(stream), type = TOKEN_AND; break;
} break;
case '|':
switch (stream_peek_char(stream)) {
case '|': type = TOKEN_OR_OR; goto double_char;
case '=': type = TOKEN_ASSIGN_OR; goto double_char;
default: stream_reset_char(stream), type = TOKEN_OR; break;
} break;
case '^':
switch (stream_peek_char(stream)) {
case '=': type = TOKEN_ASSIGN_XOR; goto double_char;
default: stream_reset_char(stream), type = TOKEN_XOR; break;
} break;
case '<':
switch (stream_peek_char(stream)) {
case '=': type = TOKEN_LE; goto double_char;
case '<': {
if (stream_peek_char(stream) == '=') {
type = TOKEN_ASSIGN_L_SH;
goto triple_char;
} else {
type = TOKEN_L_SH;
goto double_char;
}
break;
}
default: stream_reset_char(stream), type = TOKEN_LT; break;
} break;
case '>':
switch (stream_peek_char(stream)) {
case '=': type = TOKEN_GE; goto double_char;
case '>': {
if (stream_peek_char(stream) == '=') {
type = TOKEN_ASSIGN_R_SH;
goto triple_char;
} else {
type = TOKEN_R_SH;
goto double_char;
}
break;
}
default: stream_reset_char(stream), type = TOKEN_GT; break;
} break;
case '~':
type = TOKEN_BIT_NOT; break;
case '!':
switch (stream_peek_char(stream)) {
case '=': type = TOKEN_NEQ; goto double_char;
default: stream_reset_char(stream), type = TOKEN_NOT; break;
} break;
case '[':
type = TOKEN_L_BRACKET; break;
case ']':
type = TOKEN_R_BRACKET; break;
case '(':
type = TOKEN_L_PAREN; break;
case ')':
type = TOKEN_R_PAREN; break;
case '{':
type = TOKEN_L_BRACE; break;
case '}':
type = TOKEN_R_BRACE; break;
case ';':
type = TOKEN_SEMICOLON; break;
case ',':
type = TOKEN_COMMA; break;
case ':':
type = TOKEN_COLON; break;
case '.':
if (stream_peek_char(stream) == '.' && stream_peek_char(stream) == '.') {
type = TOKEN_ELLIPSIS;
goto triple_char;
}
type = TOKEN_DOT; break;
case '?':
type = TOKEN_COND; break;
case '\v': case '\r': case '\f':
case ' ': case '\t':
type = TOKEN_BLANK; break;
case '\n':
// you need to flush a newline or blank
stream_next_char(stream);
lexer_next_line(lexer);
// FIXME some error
token->type = TOKEN_BLANK;
goto END;
case '#':
parse_line(lexer, token);
token->type = TOKEN_BLANK;
goto END;
case '\0':
case lexer_stream_eof:
// EOF
type = TOKEN_EOF;
break;
case '\'':
parse_char(lexer, token);
goto END;
case '"':
parse_string(lexer, token);
goto END;
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
parse_number(lexer, token);
goto END;
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':case 'Y': case 'Z':
case '_':
// TOKEN_IDENT
// TODO
// if ((ch == 'L' && ch == '\'') || (ch == 'L' && ch == '"')) {
// LEX_ERROR("unsupport wide-character char literal by `L` format");
// }
cstring_t str = cstring_new();
while (1) {
ch = stream_peek_char(stream);
if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') ||
(ch == '_') || (ch >= '0' && ch <= '9')) {
stream_next_char(stream);
lexer_next_pos(lexer);
cstring_push(&str, ch);
continue;
}
break;
}
int res = keyword_cmp((const char*)str.data, str.len);
if (res == -1) {
token->value.cstr.data = (char*)cstring_as_cstr(&str);
token->value.cstr.len = cstring_len(&str);
type = TOKEN_IDENT; break;
} else {
type = keywords[res].tok; break;
}
default:
LEX_ERROR("unsupport char in sourse code `%c`", ch);
break;
}
goto once_char;
triple_char:
stream_next_char(stream);
lexer_next_pos(lexer);
double_char:
stream_next_char(stream);
lexer_next_pos(lexer);
once_char:
stream_next_char(stream);
lexer_next_pos(lexer);
token->type = type;
END:
LEX_DEBUG("get token `%s` in %s:%d:%d", get_tok_name(token->type),
token->loc.name, token->loc.line, token->loc.column);
}
// lexer_get_token maybe got invalid (with parser)
void lexer_get_valid_token(smcc_lexer_t* lexer, lexer_tok_t* token) {
token_subtype_t type;
do {
lexer_get_token(lexer, token);
type = get_tok_subtype(token->type);
AssertFmt(type != TK_BASIC_INVALID, "Invalid token: `%s` at %s:%d:%d",
get_tok_name(token->type), token->loc.name, token->loc.line, token->loc.column);
} while (type == TK_BASIC_EMPTYSPACE || type == TK_BASIC_COMMENT);
}

View File

@@ -0,0 +1,7 @@
#include <lexer_log.h>
logger_t __smcc_lexer_log = {
.name = "lexer",
.level = LOG_LEVEL_ALL,
.handler = log_default_handler,
};

101
libs/lexer/src/mem_stream.c Normal file
View File

@@ -0,0 +1,101 @@
#include <lexer_stream.h>
#include <lexer_log.h>
#include <libcore.h>
// 内存流的具体实现结构
static usize read_buf(lexer_stream_t* _stream, char* buffer, usize count) {
Assert(buffer != null && buffer != null);
lexer_mem_stream_t* stream = (lexer_mem_stream_t*)_stream;
usize remaining = stream->data_length - stream->curr_pos;
usize to_read = (remaining < count) ? remaining : count;
if (to_read > 0) {
smcc_memcpy(buffer, stream->data + stream->curr_pos, to_read);
stream->curr_pos += to_read;
} else {
LEX_WARN("Reading past end of stream [maybe count is too large or negative?]");
}
return to_read;
}
static int peek_char(lexer_stream_t* _stream) {
Assert(_stream != null);
lexer_mem_stream_t* stream = (lexer_mem_stream_t*)_stream;
// 如果已经到达末尾返回EOF
if (stream->peek_pos >= stream->data_length) {
return lexer_stream_eof; // EOF
}
return (int)(unsigned char)stream->data[stream->peek_pos++];
}
static int next_char(lexer_stream_t* _stream) {
Assert(_stream != NULL);
lexer_mem_stream_t* stream = (lexer_mem_stream_t*)_stream;
// 如果已经到达末尾返回EOF
if (stream->curr_pos >= stream->data_length) {
return lexer_stream_eof; // EOF
}
unsigned char ch = stream->data[stream->curr_pos++];
if (stream->peek_pos < stream->curr_pos) {
stream->peek_pos = stream->curr_pos;
}
return (int)ch;
}
static void reset_char(lexer_stream_t* _stream) {
Assert(_stream != NULL);
lexer_mem_stream_t* stream = (lexer_mem_stream_t*)_stream;
stream->peek_pos = stream->curr_pos;
}
static void free_stream(lexer_stream_t* _stream) {
Assert(_stream != null);
lexer_mem_stream_t* stream = (lexer_mem_stream_t*)_stream;
if (stream->owned) {
smcc_free((void*)stream->data);
}
}
lexer_stream_t* lexer_mem_stream_init(lexer_mem_stream_t* stream, const char* data, usize length, cbool need_copy) {
if (stream == null || data == NULL || length == 0) {
LEX_ERROR("param error");
return null;
}
stream->owned = need_copy;
if (need_copy) {
char* buf = (char*)smcc_malloc(length);
if (buf == null) {
LEX_ERROR("malloc error");
return null;
}
smcc_memcpy(buf, data, length);
stream->data = buf;
} else {
stream->data = data;
}
stream->data_length = length;
stream->curr_pos = 0;
stream->peek_pos = 0;
static const char name[] = "mem_stream";
stream->stream.name = name;
stream->stream.name_len = sizeof(name) - 1;
stream->stream.read_buf = read_buf;
stream->stream.peek_char = peek_char;
stream->stream.next_char = next_char;
stream->stream.reset_char = reset_char;
stream->stream.free_stream = free_stream;
return (void*)stream;
}

30
libs/lexer/src/token.c Normal file
View File

@@ -0,0 +1,30 @@
#include <lexer_token.h>
// 生成字符串映射(根据需求选择#str或#name
static const char* token_strings[] = {
#define X(str, subtype, tok) [tok] = #str,
TOKEN_TABLE
#undef X
#define X(str, subtype, tok, std) [tok] = #str,
KEYWORD_TABLE
#undef X
};
static token_subtype_t token_subtypes[] = {
#define X(str, subtype, tok) [tok] = subtype,
TOKEN_TABLE
#undef X
#define X(str, subtype, tok, std) [tok] = subtype,
KEYWORD_TABLE
#undef X
};
token_subtype_t get_tok_subtype(token_type_t type) {
return token_subtypes[type];
}
const char* get_tok_name(token_type_t type) {
return token_strings[type];
}

View File

@@ -0,0 +1,4 @@
int main() {
}

View File

@@ -0,0 +1,83 @@
#include <lexer.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
/// gcc -g ../lexer.c ../token.c test_lexer.c -o test_lexer
/*
tok_tConstant {
int have;
union {
char ch;
int i;
float f;
double d;
long long ll;
char* str;
};
};
*/
int g_num;
int g_num_arr[3];
int main(int argc, char* argv[]) {
// int num = 0;
if (argc == 3 && strcmp(argv[2], "-nodebug") == 0) {
log_set_level(NULL, LOG_LEVEL_ALL);
}
const char* file_name = __FILE__;
if (argc == 2) {
file_name = argv[1];
}
FILE* fp = fopen(file_name, "rb");
if (fp == NULL) {
perror("open file failed");
return 1;
}
printf("open file success\n");
if (fseek(fp, 0, SEEK_END) != 0) {
perror("fseek failed");
return 1;
}
usize fsize = ftell(fp);
LOG_INFO("file size: %zu", fsize);
if (fseek(fp, 0, SEEK_SET)) {
perror("fseek failed");
return 1;
}
char* buffer = (char*) malloc(fsize);
usize read_ret = fread(buffer, 1, fsize, fp);
fclose(fp);
if (read_ret != fsize) {
LOG_FATAL("fread failed read_ret %u != fsize %u", read_ret, fsize);
free(buffer);
return 1;
}
smcc_lexer_t lexer;
lexer_mem_stream_t mem_stream = {0};
lexer_stream_t* stream = lexer_mem_stream_init(&mem_stream, buffer, fsize, false);
Assert(stream != null);
stream->name = __FILE__;
stream->name_len = strlen(__FILE__);
lexer_init(&lexer, stream);
lexer_tok_t tok;
while (1) {
lexer_get_valid_token(&lexer, &tok);
if (tok.type == TOKEN_EOF) {
break;
}
LOG_INFO("token `%s` at %s:%u:%u", get_tok_name(tok.type), tok.loc.name, tok.loc.line, tok.loc.column);
Assert(tok.loc.offset <= fsize);
// LOG_DEBUG("%s", tok.val.str);
// printf("line: %d, column: %d, type: %3d, typename: %s\n",
// lexer.line, lexer.index, tok.type, get_tok_name(tok.type));
}
free(buffer);
}

View File

@@ -1,30 +0,0 @@
CC = gcc
CFLAGS = -g -Wall -I..
LIBS = -Lccompiler -lcc -Lassembler/riscv32 -lasm -Llinker -llk -Lmcode -lmc -L../lib -lcore
CLFAGS += -fsanitize=address
all: smcc
smcc: cc asm lib mc lk
$(CC) $(CFLAGS) smcc.c $(LIBS) -o smcc
lib:
make -C ../lib
asm:
make -C assembler/riscv32
mc:
make -C mcode
cc:
make -C ccompiler
lk:
make -C linker
clean:
make -C ../lib clean
make -C assembler/riscv32 clean
make -C ccompiler clean
make -C mcode clean
make -C linker clean

View File

@@ -1,144 +0,0 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
enum asm_symbol_type{
DATA_BYTE,
DATA_WORD,
DATA_HALFWORD,
DATA_DOUBLEWORD,
DATA_STRING,
DATA_SYMBLE,
TEXT_LABEL,
UNKNOWN,
};
struct asm_symbol_table {
const char* name;
enum asm_symbol_type type;
union {
long long data_doubleword;
long data_word;
short data_halfword;
char data_byte;
char* data_string;
} data;
};
static const char comments[] = {
'#',
};
static inline int is_blank(char ch) {
return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\v' || ch == '\f' || ch == '\r';
}
void parse_line_without_blank(const char* in, const char** out_start, const char** out_end) {
while (*in == '\0' || is_blank(*in)) {
in ++;
}
*out_start = in;
while (*in != '\0') {
if (*in == '\r' || *in == '\n') {
break;
}
for (int i = 0; i < sizeof(comments); i ++) {
if (*in == comments[i]) {
break;
}
}
in ++;
}
if (*out_end == *out_start) {
goto END;
}
while (is_blank(*in)) {
in --;
}
END:
*out_end = in;
return;
}
void parse_data_symbol(const char* start, const char* end, struct asm_symbol_table* table) {
table->name = start;
while(start < end) {
if (*start == ':') {
}
}
table->type = UNKNOWN;
}
#define TMP_BUFF_SIZE 1024
enum parse_state {
IN_DATA,
IN_TEXT,
IN_UNKNOWN,
};
void get_symbol_table(FILE* in) {
enum parse_state state = IN_UNKNOWN;
fseek(in, 0, SEEK_SET);
char buf[TMP_BUFF_SIZE];
int current_line = 0;
while (1) {
current_line ++;
char *start = fgets(buf, sizeof(buf), in);
if (start == NULL) {
return;
}
char *end;
parse_line_without_blank(buf, &start, &end);
if (start == end) {
continue;
}
if (start[0] == '.') {
// .data .text and so on
if (strcmp(start, ".data") == 0) {
state = IN_DATA;
} else if (strcmp(start, ".text") == 0) {
state = IN_TEXT;
} else {
printf("unknown directive at line %d\n", current_line);
state = IN_UNKNOWN;
}
continue;
}
switch (state) {
case IN_DATA:
parse_data_symbol(start, end);
break;
case IN_TEXT:
break;
case IN_UNKNOWN:
break;
}
}
}
void assembler(FILE* in, FILE* out, char *start_symble) {
char buf[TMP_BUFF_SIZE];
char *res = fgets(buf, sizeof(buf), in);
if (res == NULL) {
return;
}
if (res[0] == '.') {
// maybe .data .text and so on
} else if (res[0] == ' ') {
}
}

View File

@@ -1,18 +0,0 @@
#ifndef __SMCC_ASM_H__
#define __SMCC_ASM_H__
// typedef unsigned long long rt_size_t;
// extern void* _syscall_fopen (const char* path);
// extern int _syscall_fread (void* handle, char* buf, rt_size_t size);
// extern int _syscall_fwrite (void* handle, const char* buf, rt_size_t size);
// extern void _syscall_fseek (void* handle, long offset);
#include <lib/core.h>
#include "riscv32/riscv32_asm.h"
#include "riscv32/riscv32_asm.h"
typedef union asm_prog {
rv32_prog_t rv32;
} asm_prog_t;
#endif

View File

@@ -1,17 +0,0 @@
CC = gcc
AR = ar
CFLAGS = -g -Wall -I../../..
EXCLUDE = test*.c
SRCS = $(filter-out $(EXCLUDE), $(wildcard *.c))
OBJS = $(SRCS:.c=.o)
libasm.a: $(OBJS)
$(AR) rcs $@ $^
%.o: %.c
$(CC) $(CFLAGS) -c -o $@ $<
clean:
rm -f libasm.a $(OBJS)

View File

@@ -1,29 +0,0 @@
#include "riscv32_asm.h"
void init_rv32_prog(rv32_prog_t* prog, strpool_t* strpool) {
if (strpool == NULL) {
prog->strpool = salloc_alloc(sizeof(strpool_t));
init_strpool(prog->strpool);
} else {
prog->strpool = strpool;
}
prog->data_base_address = 0;
vector_init(prog->data);
prog->text_base_address = 0;
init_symtab_asm(&prog->symtab);
init_rv32_mcode(&prog->mcode, sizeof(symasm_entry_t));
}
void asm_from_file(const char* file_name, rv32_prog_t* prog) {
init_rv32_prog(prog, NULL);
rt_file_t* fp = rt.fopen(file_name, "r");
if (fp == NULL) {
LOG_FATAL("Failed to open file %s", file_name);
}
char buf[1024];
rt.fread(buf, 1, sizeof(buf), fp);
}

View File

@@ -1,18 +0,0 @@
#ifndef __SMCC_RISCV32_ASM_H__
#define __SMCC_RISCV32_ASM_H__
#include <lib/utils/strpool/strpool.h>
#include <src/mcode/riscv32/riscv32_mcode.h>
#include "symtab_asm.h"
typedef struct rv32_prog {
strpool_t* strpool;
symtab_asm_t symtab;
u32_t text_base_address;
u32_t data_base_address;
VECTOR_HEADER(data, iptr_t);
mcode_rv32_t mcode;
} rv32_prog_t;
void init_rv32_prog(rv32_prog_t* prog, strpool_t* strpool);
#endif

View File

@@ -1,48 +0,0 @@
#include "symtab_asm.h"
// /* append label */
// static inline int
// rv32_append_label(mcode_rv32_t* prog, void* label, u32_t offset) {
// // prog->symtab
// symtab_asm_put(&prog->symtab, label, offset);
// return 0;
// }
static u32_t hash_func(const symasm_entry_t* key) {
return rt_strhash(key->name);
}
static int cmp_func(const symasm_entry_t* k1, const symasm_entry_t* k2) {
return rt_strcmp(k1->name, k2->name);
}
void init_symtab_asm(symtab_asm_t* symtab) {
init_hashtable(&symtab->symtab);
symtab->symtab.hash_func = (u32_t(*)(const void*))hash_func;
symtab->symtab.key_cmp = (int(*)(const void*, const void*))cmp_func;
}
void symtab_asm_put(symtab_asm_t* symtab, symasm_entry_t* _entry, u32_t address) {
// FIXME maybe memory leak
u32_t* addr = salloc_alloc(sizeof(u32_t));
if (addr == NULL) {
LOG_FATAL("salloc_alloc failure");
}
symasm_entry_t* entry = salloc_alloc(sizeof(symasm_entry_t));
if (entry == NULL) LOG_FATAL("malloc failure");
*entry = *_entry;
*addr = address;
void* ret = hashtable_set(&symtab->symtab, entry, addr);
if (ret != NULL) {
LOG_ERROR("Symbol %s already exists", entry->name);
}
}
u32_t* symtab_asm_get(symtab_asm_t* symtab, symasm_entry_t* entry) {
u32_t* addr = hashtable_get(&symtab->symtab, entry);
return addr;
}
void symtab_asm_destroy(symtab_asm_t* symtab) {
hashtable_destory(&symtab->symtab);
}

View File

@@ -1,26 +0,0 @@
#ifndef __SMCC_SYMTAB_ASM_H__
#define __SMCC_SYMTAB_ASM_H__
#include <lib/core.h>
#include <lib/utils/ds/hashtable.h>
typedef enum symasm_attr {
GLOBAL,
LOCAL,
} symasm_attr_t;
typedef struct symasm_entry {
const char* name;
symasm_attr_t attr;
} symasm_entry_t;
typedef struct symtab_asm {
hash_table_t symtab;
} symtab_asm_t;
void init_symtab_asm(symtab_asm_t* symtab);
void symtab_asm_put(symtab_asm_t* symtab, symasm_entry_t* entry, u32_t address);
u32_t* symtab_asm_get(symtab_asm_t* symtab, symasm_entry_t* entry);
void symtab_asm_destroy(symtab_asm_t* symtab);
#endif

View File

@@ -1,62 +0,0 @@
# This example shows an implementation of the mathematical
# factorial function (! function) to find the factorial value of !7 = 5040.
.data
argument: .word 7
str1: .string "Factorial value of "
str2: .string " is "
.text
main:
lw a0, argument # Load argument from static data
jal ra, fact # Jump-and-link to the 'fact' label
# Print the result to console
mv a1, a0
lw a0, argument
jal ra, printResult
# Exit program
li a7, 10
ecall
fact:
addi sp, sp, -16
sw ra, 8(sp)
sw a0, 0(sp)
addi t0, a0, -1
bge t0, zero, nfact
addi a0, zero, 1
addi sp, sp, 16
jr x1
nfact:
addi a0, a0, -1
jal ra, fact
addi t1, a0, 0
lw a0, 0(sp)
lw ra, 8(sp)
addi sp, sp, 16
mul a0, a0, t1
ret
# --- printResult ---
# a0: Value which factorial number was computed from
# a1: Factorial result
printResult:
mv t0, a0
mv t1, a1
la a0, str1
li a7, 4
ecall
mv a0, t0
li a7, 1
ecall
la a0, str2
li a7, 4
ecall
mv a0, t1
li a7, 1
ecall
ret

View File

@@ -1,57 +0,0 @@
# all: cc
# # run: ccompiler
# # ./ccompiler test.c flat.bin
# # simple_test:
# # make -C tests/simple
# cc: frontend middleend backend ccompiler.c test_main.c
# gcc -g ccompiler.c test_main.c -I../ -L./frontend -lfrontend -L./middleend -lmiddleend -L./backend -lbackend -L../lib -lcore -o cc
# frontend:
# make -C ./frontend
# middleend:
# make -C ./middleend
# backend:
# make -C ./backend
# clean:
# rm -f cc
# make -C ./frontend clean
# make -C ./middleend clean
# make -C ./backend clean
# 顶层Makefile修改
CC = gcc
AR = ar
CFLAGS = -g -Wall -I.. -I../..
MODULES = frontend middleend backend
FRONTEND_SUBDIRS = lexer parser parser/ast parser/symtab
MODULES += $(addprefix frontend/, $(FRONTEND_SUBDIRS))
MIDDLEEND_SUBDIRS = ir
MODULES += $(addprefix middleend/, $(MIDDLEEND_SUBDIRS))
BACKEND_SUBDIRS = riscv32
MODULES += $(addprefix backend/, $(BACKEND_SUBDIRS))
# 自动收集所有子模块源文件
EXCLUDE = test*.c
SRCS = $(filter-out $(EXCLUDE), $(wildcard $(addsuffix /*.c,$(MODULES))))
SRCS += ccompiler.c
OBJS = $(SRCS:.c=.o)
libcc.a: $(OBJS)
$(AR) rcs $@ $^
%.o: %.c
$(CC) $(CFLAGS) -c -o $@ $<
clean:
rm -f libcc.a $(OBJS)

View File

@@ -1,30 +0,0 @@
# 编译器设置
CC = gcc
AR = ar
CFLAGS = -g -Wall -I../..
RISCV32_DIR = ./riscv32
# 源文件列表
SRCS = \
backend.c \
$(RISCV32_DIR)/riscv32.c
# 生成目标文件列表
OBJS = $(SRCS:.c=.o)
# 最终目标
TARGET = libbackend.a
all: $(TARGET)
$(TARGET): $(OBJS)
$(AR) rcs $@ $^
%.o: %.c
$(CC) $(CFLAGS) -c -o $@ $<
clean:
rm -f $(OBJS) $(TARGET)
.PHONY: all clean

View File

@@ -1,23 +0,0 @@
#include "backend.h"
int gen_asm_from_ir(ir_prog_t* ir, cc_arch_t arch, asm_prog_t* out_asm) {
switch (arch) {
case CC_ARCH_RISCV32:
// TODO using maroc to choice
init_rv32_prog(&(out_asm->rv32), NULL);
gen_rv32_from_ir(ir, &(out_asm->rv32));
break;
case CC_ARCH_X86_32:
default:
Panic("Unsupported arch");
break;
}
return 0;
}
asm_prog_t* cc_backend(ir_prog_t* ir, cc_backend_conf_t* conf) {
// TODO
asm_prog_t* bin = (asm_prog_t*)salloc_alloc(sizeof(asm_prog_t));
gen_asm_from_ir(ir, conf->arch, bin);
return bin;
}

View File

@@ -1,30 +0,0 @@
#ifndef __SMCC_CC_BACKEND_H__
#define __SMCC_CC_BACKEND_H__
// TODO Use Maroc to choice architecture
#ifndef __SMCC_CC_NO_RISCV32__
#include "riscv32/riscv32.h"
#endif
// #ifndef __SMCC_CC_NO_X86_32__
// #include "x86_32/x86_32.h"
// #endif
// TODO 统一 汇编器 接口
#include <src/assembler/assembler.h>
#include "../middleend/ir/ir.h"
typedef enum cc_arch {
CC_ARCH_RISCV32,
CC_ARCH_X86_32
} cc_arch_t;
typedef union asm_prog asm_prog_t;
int gen_asm_from_ir(ir_prog_t* ir, cc_arch_t arch, asm_prog_t* asm_prog);
typedef struct cc_backend_conf {
cc_arch_t arch;
} cc_backend_conf_t;
asm_prog_t* cc_backend(ir_prog_t* ir, cc_backend_conf_t* conf);
#endif

View File

@@ -1,41 +0,0 @@
# 后端代码生成
## riscv32i
> 仿照ripes的syscall实现了rv32-vm
### syscall ecall 系统调用
```c
// ecall 系统调用函数实现
#define ECALL_PNT_INT(num) \
ADDI(REG_A0, REG_X0, num), \
ADDI(REG_A7, REG_X0, 0x1), \
ECALL(),
#define ECALL_PNT_STR(str) \
ADDI(REG_A0, REG_X0, str), \
ADDI(REG_A7, REG_X0, 0x4), \
ECALL(),
#define ECALL_EXIT(errno) \
ADDI(REG_A0, REG_X0, errno), \
ADDI(REG_A7, REG_X0, 10), \
ECALL(),
#define ECALL_SCAN_INT(int) \
ADDI(REG_A7, (1025 + 4)), \
ECALL(),
#define ECALL_SCAN_STR(str) \
ADDI(REG_A0, REG_X0, str), \
ADDI(REG_A7, REG_X0, (1025 + 5)), \
ECALL(),
// 函数声明
void ecall_pnt_int(int num);
void ecall_pnt_str(char *str);
void ecall_exit(int errno);
int ecall_scani();
void ecall_scans(char *str);
```

View File

@@ -1,302 +0,0 @@
#include "riscv32.h"
#include <src/mcode/riscv32/riscv32_instr.h>
typedef struct {
ir_func_t* func;
int stack_offset;
int stack_base;
int func_idx;
int block_idx;
} gen_ctx_t;
static inline int stack_pos(ir_node_t* ptr, gen_ctx_t *ctx) {
// ir_func_t *func, int stack_base, int stack_offset
int offset = ctx->stack_base;
for (int i = 0; i < ctx->func->bblocks.size; i ++) {
ir_bblock_t* block = vector_at(ctx->func->bblocks, i);
for (int i = 0; i < block->instrs.size; i++) {
if (vector_at(block->instrs, i) == ptr) {
offset += i * 4;
Assert(offset >= 0 && offset < ctx->stack_offset);
return offset;
}
}
offset += block->instrs.size * 4;
}
Panic("stack pos got error");
return 0;
}
static int system_func(const char* name) {
static struct {
const char* name;
int ecall_num;
} defined_func[] = {
{"ecall_pnt_int", 1},
{"ecall_pnt_char", 11},
{"ecall_scan_int", 1025 + 4},
};
for (int i = 0; i < sizeof(defined_func)/sizeof(defined_func[0]); i++) {
if (rt_strcmp(name, defined_func[i].name) == 0) {
return defined_func[i].ecall_num;
}
}
return -1;
}
static int get_node_val(mcode_rv32_t* out_asm, gen_ctx_t* ctx, ir_node_t* ptr, int reg) {
int len = 0;
switch (ptr->tag) {
case IR_NODE_CONST_INT: {
// TODO
rv32_li(out_asm, reg, ptr->data.const_int.val);
// emit_rv32_instr(out_asm, RV_ADDI, reg, reg, 0, ptr->data.const_int.val);
break;
}
default: {
int offset = stack_pos(ptr, ctx);
rv32_lw(out_asm, reg, REG_SP, offset);
break;
}
}
return len;
}
static int gen_instr(rv32_prog_t* _out_asm, gen_ctx_t* ctx, ir_node_t* instr) {
mcode_rv32_t* out_asm = &_out_asm->mcode;
int idx = 0;
int offset;
char buf[1024];
symasm_entry_t label;
switch (instr->tag) {
case IR_NODE_ALLOC: {
// TODO
break;
}
case IR_NODE_LOAD: {
offset = stack_pos(instr->data.load.target, ctx);
// t0 = M[sp + offset]
rv32_lw(out_asm, REG_T0, REG_SP, offset);
break;
}
case IR_NODE_STORE: {
idx += get_node_val(out_asm, ctx, instr->data.store.value, REG_T0);
offset = stack_pos(instr->data.store.target, ctx);
// M[sp + offset] = t0
rv32_sw(out_asm, REG_T0, REG_SP, offset);
break;
}
case IR_NODE_RET: {
// A0 = S0
if (instr->data.ret.ret_val != NULL) {
idx += get_node_val(out_asm, ctx, instr->data.ret.ret_val, REG_A0);
}
// ra = M[sp + 0]
rv32_lw(out_asm, REG_RA, REG_SP, 0);
// sp = sp + stack_offset
rv32_addi(out_asm, REG_SP, REG_SP, ctx->stack_offset);
// ret == JALR(REG_X0, REG_RA, 0)
rv32_ret(out_asm);
break;
}
case IR_NODE_OP: {
idx += get_node_val(out_asm, ctx, instr->data.op.lhs, REG_T1);
idx += get_node_val(out_asm, ctx, instr->data.op.rhs, REG_T2);
rv32_instr_t _instr = {
.rd = REG_T0,
.rs1 = REG_T1,
.rs2 = REG_T2,
.imm = 0
};
#define GEN_BIN_OP(type) _instr.instr_type = type, \
emit_rv32_instr(out_asm, &_instr, EMIT_PUSH_BACK, NULL)
switch (instr->data.op.op) {
case IR_OP_ADD:
GEN_BIN_OP(RV_ADD);
break;
case IR_OP_SUB:
GEN_BIN_OP(RV_SUB);
break;
case IR_OP_MUL:
GEN_BIN_OP(RV_MUL);
break;
case IR_OP_DIV:
GEN_BIN_OP(RV_DIV);
break;
case IR_OP_MOD:
GEN_BIN_OP(RV_REM);
break;
case IR_OP_EQ:
GEN_BIN_OP(RV_XOR);
rv32_seqz(out_asm, REG_T0, REG_T0);
break;
case IR_OP_GE:
GEN_BIN_OP(RV_SLT);
rv32_seqz(out_asm, REG_T0, REG_T0);
break;
case IR_OP_GT:
// SGT(rd, rs1, rs2) SLT(rd, rs2, rs1)
// GENCODE(SGT(REG_T0, REG_T1, REG_T2));
rv32_slt(out_asm, REG_T0, REG_T2, REG_T1);
break;
case IR_OP_LE:
// GENCODE(SGT(REG_T0, REG_T1, REG_T2));
rv32_slt(out_asm, REG_T0, REG_T2, REG_T1);
rv32_seqz(out_asm, REG_T0, REG_T0);
break;
case IR_OP_LT:
rv32_slt(out_asm, REG_T0, REG_T1, REG_T2);
break;
case IR_OP_NEQ:
GEN_BIN_OP(RV_XOR);
break;
default:
LOG_ERROR("ERROR gen_instr op in riscv");
break;
}
offset = stack_pos(instr, ctx);
rv32_sw(out_asm, REG_T0, REG_SP, offset);
break;
}
case IR_NODE_BRANCH: {
get_node_val(out_asm, ctx, instr->data.branch.cond, REG_T0);
rt.snprintf(buf, sizeof(buf), "L%s%p", instr->data.branch.true_bblock->label, instr->data.branch.true_bblock);
label.name = strpool_intern(_out_asm->strpool, buf);
label.attr = LOCAL;
rv32_bne_l(out_asm, REG_T0, REG_X0, &label);
rt.snprintf(buf, sizeof(buf), "L%s%p", instr->data.branch.false_bblock->label, instr->data.branch.false_bblock);
label.name = strpool_intern(_out_asm->strpool, buf);
label.attr = LOCAL;
rv32_jal_l(out_asm, REG_X0, &label);
break;
}
case IR_NODE_JUMP: {
// TODO
rt.snprintf(buf, sizeof(buf), "L%s%p", instr->data.jump.target_bblock->label, instr->data.jump.target_bblock);
label.name = strpool_intern(_out_asm->strpool, buf);
label.attr = LOCAL;
rv32_jal_l(out_asm, REG_X0, &label);
break;
}
case IR_NODE_CALL: {
if (instr->data.call.args.size > 8) {
LOG_ERROR("can't add so much params");
}
int param_regs[8] = {
REG_A0, REG_A1, REG_A2, REG_A3,
REG_A4, REG_A5, REG_A6, REG_A7
};
for (int i = 0; i < instr->data.call.args.size; i++) {
ir_node_t* param = vector_at(instr->data.call.args, i);
idx += get_node_val(out_asm, ctx, param, param_regs[i]);
}
int system_func_idx = system_func(instr->data.call.callee->name);
if (system_func_idx != -1) {
rv32_li(out_asm, REG_A7, system_func_idx);
rv32_ecall(out_asm);
goto CALL_END;
}
/*
// GENCODES(CALL(0));
// AUIPC(REG_X1, REG_X0), \
// JALR(REG_X1, REG_X1, offset)
*/
// TODO CALL
label.name = strpool_intern(_out_asm->strpool, instr->data.call.callee->name);
label.attr = GLOBAL;
rv32_call_l(out_asm, &label);
CALL_END:
offset = stack_pos(instr, ctx);
rv32_sw(out_asm, REG_A0, REG_SP, offset);
break;
}
default:
LOG_ERROR("ERROR gen_instr in riscv");
}
return idx;
}
static int gen_block(rv32_prog_t* out_asm, gen_ctx_t* ctx, ir_bblock_t* block) {
symasm_entry_t label;
char buf[1024];
rt.snprintf(buf, sizeof(buf), "L%s%p", block->label, block);
label.name = strpool_intern(out_asm->strpool, buf);
label.attr = LOCAL;
symtab_asm_put(&out_asm->symtab, &label, out_asm->mcode.code.size);
for (int i = 0; i < block->instrs.size; i ++) {
gen_instr(out_asm, ctx, vector_at(block->instrs, i));
}
return 0;
}
static int gen_func(rv32_prog_t* out_asm, ir_func_t* func) {
gen_ctx_t ctx;
symasm_entry_t label = {
.name = strpool_intern(out_asm->strpool, func->name),
.attr = GLOBAL,
};
symtab_asm_put(&out_asm->symtab, &label, out_asm->mcode.code.size);
int stack_base = 4;
int stack_offset = stack_base;
for (int i = 0; i < func->bblocks.size; i++) {
// TODO every instr push ret val to stack
stack_offset += 4 * (*vector_at(func->bblocks, i)).instrs.size;
}
ctx.func = func;
ctx.stack_base = stack_base;
ctx.stack_offset = stack_offset;
ctx.func_idx = 0;
ctx.block_idx = 0;
// TODO Alignment by 16
// sp = sp - stack_offset;
rv32_addi(&out_asm->mcode, REG_SP, REG_SP, -stack_offset);
// M[sp] = ra;
rv32_sw(&out_asm->mcode, REG_RA, REG_SP, 0);
int param_regs[8] = {
REG_A0, REG_A1, REG_A2, REG_A3,
REG_A4, REG_A5, REG_A6, REG_A7
};
if (func->params.size > 8) {
LOG_ERROR("can't add so much params");
}
for (int i = 0; i < func->params.size; i++) {
int offset = stack_pos(vector_at(func->params, i), &ctx);
// M[sp + offset] = param[idx];
rv32_sw(&out_asm->mcode, param_regs[i], REG_SP, offset);
}
for(int i = 0; i < func->bblocks.size; i ++) {
gen_block(out_asm, &ctx ,vector_at(func->bblocks, i));
}
return 0;
}
int gen_rv32_from_ir(ir_prog_t* ir, rv32_prog_t* out_asm) {
init_rv32_prog(out_asm, NULL);
for(int i = 0; i < ir->funcs.size; i ++) {
gen_func(out_asm, vector_at(ir->funcs, i));
}
return 0;
// // Got Main pos;
// for (int i = 0; i < prog->funcs.size; i++) {
// if (strcmp(vector_at(prog->funcs, i)->name, "main") == 0) {
// return jmp_cache[i];
// }
// }
// LOG_ERROR("main not found");
}

View File

@@ -1,9 +0,0 @@
#ifndef __SMCC_CC_RISCV32_H__
#define __SMCC_CC_RISCV32_H__
#include <src/assembler/assembler.h>
#include "../../middleend/ir/ir.h"
int gen_rv32_from_ir(ir_prog_t* ir, rv32_prog_t* out_asm);
#endif

View File

@@ -1,11 +0,0 @@
#include "ccompiler.h"
asm_prog_t* smcc_cc(smcc_cc_t* cc) {
ast_node_t* root = cc_frontend(cc->file, cc->stream, cc->sread);
// TODO add config
ir_prog_t* prog = cc_middleend(root, &cc->midend_conf);
// TODO add config
asm_prog_t* asm_prog = cc_backend(prog, &cc->backend_conf);
}

View File

@@ -1,21 +0,0 @@
#ifndef __SMCC_CC_H__
#define __SMCC_CC_H__
// TODO
#include "frontend/frontend.h"
#include "middleend/middleend.h"
#include "backend/backend.h"
typedef struct smcc_cc {
const char *file;
void *stream;
sread_fn sread;
cc_midend_conf_t midend_conf;
cc_backend_conf_t backend_conf;
} smcc_cc_t;
typedef union asm_prog asm_prog_t;
asm_prog_t* smcc_cc(smcc_cc_t* cc);
#endif

View File

@@ -1,43 +0,0 @@
# 编译器设置
CC = gcc
AR = ar
CFLAGS = -g -Wall -I../..
# 源文件路径
LEXER_DIR = ./lexer
PARSER_DIR = ./parser
AST_DIR = ./parser/ast
# 源文件列表
SRCS = \
frontend.c \
$(LEXER_DIR)/lexer.c \
$(LEXER_DIR)/token.c \
$(PARSER_DIR)/parser.c \
$(PARSER_DIR)/ast.c \
$(AST_DIR)/block.c \
$(AST_DIR)/decl.c \
$(AST_DIR)/expr.c \
$(AST_DIR)/func.c \
$(AST_DIR)/program.c \
$(AST_DIR)/stmt.c \
$(AST_DIR)/term.c \
# 生成目标文件列表
OBJS = $(SRCS:.c=.o)
# 最终目标
TARGET = libfrontend.a
all: $(TARGET)
$(TARGET): $(OBJS)
$(AR) rcs $@ $^
%.o: %.c
$(CC) $(CFLAGS) -c -o $@ $<
clean:
rm -f $(OBJS) $(TARGET)
.PHONY: all clean

View File

@@ -1,23 +0,0 @@
#include <lib/core.h>
#include "frontend.h"
ast_node_t* cc_frontend(const char* file, void* stream, sread_fn sread) {
init_lib_core();
strpool_t strpool;
init_strpool(&strpool);
cc_lexer_t lexer;
init_lexer(&lexer, file, stream, sread, &strpool);
symtab_t symtab;
init_symtab(&symtab);
// TODO global scope
symtab_enter_scope(&symtab);
parser_t parser;
init_parser(&parser, &lexer, &symtab);
parse_prog(&parser);
// TODO Free the resourse
return parser.root;
}

View File

@@ -1,9 +0,0 @@
#ifndef __SMCC_CC_FRONTEND_H__
#define __SMCC_CC_FRONTEND_H__
#include "lexer/lexer.h"
#include "parser/parser.h"
typedef int (*sread_fn)(void *dst_buf, int elem_size, int count, void *stream);
ast_node_t* cc_frontend(const char* file, void* stream, sread_fn sread);
#endif

View File

@@ -1,525 +0,0 @@
/**
* 仿照LCCompiler的词法分析部分
*
* 如下为LCC的README in 2025.2
This hierarchy is the distribution for lcc version 4.2.
lcc version 3.x is described in the book "A Retargetable C Compiler:
Design and Implementation" (Addison-Wesley, 1995, ISBN 0-8053-1670-1).
There are significant differences between 3.x and 4.x, most notably in
the intermediate code. For details, see
https://drh.github.io/lcc/documents/interface4.pdf.
VERSION 4.2 IS INCOMPATIBLE WITH EARLIER VERSIONS OF LCC. DO NOT
UNLOAD THIS DISTRIBUTION ON TOP OF A 3.X DISTRIBUTION.
LCC is a C89 ("ANSI C") compiler designed to be highly retargetable.
LOG describes the changes since the last release.
CPYRIGHT describes the conditions under you can use, copy, modify, and
distribute lcc or works derived from lcc.
doc/install.html is an HTML file that gives a complete description of
the distribution and installation instructions.
Chris Fraser / cwf@aya.yale.edu
David Hanson / drh@drhanson.net
*/
#include <lib/core.h>
#include "lexer_log.h"
#include "token.h"
#include "lexer.h"
static const struct {
const char* name;
enum CSTD_KEYWORD std_type;
cc_tktype_t tok;
} keywords[] = {
#define X(name, std_type, tok, ...) { #name, std_type, tok },
KEYWORD_TABLE
#undef X
};
// by using binary search to find the keyword
static inline int keyword_cmp(const char* name, int len) {
int low = 0;
int high = sizeof(keywords) / sizeof(keywords[0]) - 1;
while (low <= high) {
int mid = (low + high) / 2;
const char *key = keywords[mid].name;
int cmp = 0;
// 自定义字符串比较逻辑
for (int i = 0; i < len; i++) {
if (name[i] != key[i]) {
cmp = (unsigned char)name[i] - (unsigned char)key[i];
break;
}
if (name[i] == '\0') break; // 遇到终止符提前结束
}
if (cmp == 0) {
// 完全匹配检查(长度相同)
if (key[len] == '\0') return mid;
cmp = -1; // 当前关键词比输入长
}
if (cmp < 0) {
high = mid - 1;
} else {
low = mid + 1;
}
}
return -1; // Not a keyword.
}
void init_lexer(cc_lexer_t* lexer, const char* file_name, void* stream, lexer_sread_fn sread, strpool_t* strpool) {
lexer->strpool = strpool;
lexer->cur_ptr = lexer->end_ptr = (char*)&(lexer->buffer);
lexer->loc.fname = strpool_intern(lexer->strpool, file_name);
lexer->loc.line = 1;
lexer->loc.col = 1;
lexer->stream = stream;
lexer->sread = sread;
rt_memset(lexer->buffer, 0, sizeof(lexer->buffer));
}
static void flush_buffer(cc_lexer_t* lexer) {
int num = lexer->end_ptr - lexer->cur_ptr;
for (int i = 0; i < num; i++) {
lexer->buffer[i] = lexer->cur_ptr[i];
}
lexer->cur_ptr = lexer->buffer;
int read_size = LEXER_BUFFER_SIZE - num;
// TODO rt_size_t to int maybe lose precision
int got_size = lexer->sread(lexer->buffer + num, 1, read_size, lexer->stream);
if (got_size < 0) {
LEX_ERROR("lexer read error");
} else if (got_size < read_size) {
lexer->end_ptr += got_size;
lexer->end_ptr[0] = '\0'; // EOF
lexer->end_ptr++;
} else if (got_size == read_size) {
lexer->end_ptr += got_size;
} else {
LEX_ERROR("lexer read error imposible got_size > read_size maybe overflow?");
}
}
static void goto_newline(cc_lexer_t* lexer) {
do {
if (lexer->cur_ptr == lexer->end_ptr) {
flush_buffer(lexer);
lexer->cur_ptr--;
}
lexer->cur_ptr++;
} while (*lexer->cur_ptr != '\n' && *lexer->cur_ptr != '\0');
}
static void goto_block_comment(cc_lexer_t* lexer) {
while (1) {
if (lexer->end_ptr - lexer->cur_ptr < 2) {
flush_buffer(lexer);
}
if (lexer->cur_ptr[0] == '\0') {
break;
} else if (lexer->cur_ptr[0] == '*' && lexer->cur_ptr[1] == '/') {
lexer->cur_ptr += 2;
break;
} else {
if (lexer->cur_ptr[0] == '\n') lexer->loc.line++;
lexer->cur_ptr++;
}
}
}
// TODO escape character not enough
static char got_slash(char* peek) {
switch (*peek) {
case '\\': return '\\';
case '\'': return '\'';
case '\"': return '\"';
case '\?': return '\?';
case '0': return '\0';
case 'b': return '\b';
case 'f': return '\f';
case 'n': return '\n';
case 'r': return '\r';
case 't': return '\t';
case 'v': return '\v';
default: break;
}
LEX_ERROR("Unknown escape character");
return -1;
}
static void parse_char_literal(cc_lexer_t* lexer, tok_t* token) {
char val = 0;
char* peek = lexer->cur_ptr + 1;
if (*peek == '\\') {
peek++;
val = got_slash(peek);
peek++;
} else {
val = *peek++;
}
if (*peek++ != '\'') LEX_ERROR("Unclosed character literal");
lexer->cur_ptr = peek;
token->val.ch = val;
}
static void parse_string_literal(cc_lexer_t* lexer, tok_t* token) {
char* peek = lexer->cur_ptr + 1;
// TODO string literal size check
static char dest[LEXER_MAX_TOKEN_SIZE + 1];
int len = 0;
while (*peek != '"') {
if (peek >= lexer->end_ptr) flush_buffer(lexer);
if (*peek == '\\') { // 处理转义
peek++;
*peek = got_slash(peek);
}
if (len >= LEXER_MAX_TOKEN_SIZE) LEX_ERROR("String too long");
dest[len++] = *peek++;
}
dest[len] = '\0';
lexer->cur_ptr = peek + 1; // 1 is `"`
lexer->loc.len = len + 2; // 2 is `"` `"`
token->val.str = strpool_intern(lexer->strpool, dest);
}
// FIXME it write by AI maybe error
static void parse_number(cc_lexer_t* lexer, tok_t* token) {
char* peek = lexer->cur_ptr;
int base = 10;
int is_float = 0;
long long int_val = 0;
double float_val = 0.0;
double fraction = 1.0;
// 判断进制
if (*peek == '0') {
peek++;
switch (*peek) {
case 'x':
case 'X':
base = 16;
default:
base = 8;
}
}
// 解析整数部分
while (1) {
int digit = -1;
if (*peek >= '0' && *peek <= '9') {
digit = *peek - '0';
} else if (base == 16) {
if (*peek >= 'a' && *peek <= 'f') digit = *peek - 'a' + 10;
else if (*peek >= 'A' && *peek <= 'F') digit = *peek - 'A' + 10;
}
if (digit < 0 || digit >= base) break;
if (!is_float) {
int_val = int_val * base + digit;
} else {
float_val = float_val * base + digit;
fraction *= base;
}
peek++;
}
// 解析浮点数
if (*peek == '.' && base == 10) {
is_float = 1;
float_val = int_val;
peek++;
while (*peek >= '0' && *peek <= '9') {
float_val = float_val * 10.0 + (*peek - '0');
fraction *= 10.0;
peek++;
}
float_val /= fraction;
}
// 解析科学计数法
if ((*peek == 'e' || *peek == 'E') && base == 10) {
is_float = 1;
peek++;
// int exp_sign = 1;
int exponent = 0;
if (*peek == '+') peek++;
else if (*peek == '-') {
// exp_sign = -1;
peek++;
}
while (*peek >= '0' && *peek <= '9') {
exponent = exponent * 10 + (*peek - '0');
peek++;
}
// float_val *= pow(10.0, exp_sign * exponent);
}
// 存储结果
// TODO
lexer->loc.len = peek - lexer->cur_ptr;
lexer->cur_ptr = peek;
if (is_float) {
token->val.f32 = float_val;
token->sub_type = TOKEN_FLOAT_LITERAL;
} else {
token->val.i = int_val;
token->sub_type = TOKEN_INT_LITERAL;
}
}
#define GOT_ONE_TOKEN_BUF_SIZE 64
// /zh/c/language/operator_arithmetic.html
void get_token(cc_lexer_t* lexer, tok_t* token) {
// 需要保证缓冲区始终可读
if (lexer->end_ptr - lexer->cur_ptr < GOT_ONE_TOKEN_BUF_SIZE) {
flush_buffer(lexer);
}
register char* peek = lexer->cur_ptr;
cc_tktype_t tk_type = TOKEN_INIT;
ctype_t literal = { 0 };
// once step
switch (*peek++) {
case '=':
switch (*peek++) {
case '=': tk_type = TOKEN_EQ; break;
default: peek--, tk_type = TOKEN_ASSIGN; break;
} break;
case '+':
switch (*peek++) {
case '+': tk_type = TOKEN_ADD_ADD; break;
case '=': tk_type = TOKEN_ASSIGN_ADD; break;
default: peek--, tk_type = TOKEN_ADD; break;
} break;
case '-':
switch (*peek++) {
case '-': tk_type = TOKEN_SUB_SUB; break;
case '=': tk_type = TOKEN_ASSIGN_SUB; break;
case '>': tk_type = TOKEN_DEREF; break;
default: peek--, tk_type = TOKEN_SUB; break;
} break;
case '*':
switch (*peek++) {
case '=': tk_type = TOKEN_ASSIGN_MUL; break;
default: peek--, tk_type = TOKEN_MUL; break;
} break;
case '/':
switch (*peek++) {
case '=': tk_type = TOKEN_ASSIGN_DIV; break;
case '/': {
goto_newline(lexer);
tk_type = TOKEN_LINE_COMMENT;
goto END;
}
case '*': {
lexer->cur_ptr = peek;
goto_block_comment(lexer);
tk_type = TOKEN_BLOCK_COMMENT;
goto END;
}
default: peek--, tk_type = TOKEN_DIV; break;
} break;
case '%':
switch (*peek++) {
case '=': tk_type = TOKEN_ASSIGN_MOD; break;
default: peek--, tk_type = TOKEN_MOD; break;
} break;
case '&':
switch (*peek++) {
case '&': tk_type = TOKEN_AND_AND; break;
case '=': tk_type = TOKEN_ASSIGN_AND; break;
default: peek--, tk_type = TOKEN_AND; break;
} break;
case '|':
switch (*peek++) {
case '|': tk_type = TOKEN_OR_OR; break;
case '=': tk_type = TOKEN_ASSIGN_OR; break;
default: peek--, tk_type = TOKEN_OR; break;
} break;
case '^':
switch (*peek++) {
case '=': tk_type = TOKEN_ASSIGN_XOR; break;
default: peek--, tk_type = TOKEN_XOR; break;
} break;
case '<':
switch (*peek++) {
case '=': tk_type = TOKEN_LE; break;
case '<': tk_type = (*peek == '=') ? (peek++, TOKEN_ASSIGN_L_SH) : TOKEN_L_SH; break;
default: peek--, tk_type = TOKEN_LT; break;
} break;
case '>':
switch (*peek++) {
case '=': tk_type = TOKEN_GE; break;
case '>': tk_type = (*peek == '=') ? (peek++, TOKEN_ASSIGN_R_SH) : TOKEN_R_SH; break;
default: peek--, tk_type = TOKEN_GT; break;
} break;
case '~':
tk_type = TOKEN_BIT_NOT; break;
case '!':
switch (*peek++) {
case '=': tk_type = TOKEN_NEQ; break;
default: peek--, tk_type = TOKEN_NOT; break;
} break;
case '[':
tk_type = TOKEN_L_BRACKET; break;
case ']':
tk_type = TOKEN_R_BRACKET; break;
case '(':
tk_type = TOKEN_L_PAREN; break;
case ')':
tk_type = TOKEN_R_PAREN; break;
case '{':
tk_type = TOKEN_L_BRACE; break;
case '}':
tk_type = TOKEN_R_BRACE; break;
case ';':
tk_type = TOKEN_SEMICOLON; break;
case ',':
tk_type = TOKEN_COMMA; break;
case ':':
tk_type = TOKEN_COLON; break;
case '.':
if (peek[0] == '.' && peek[1] == '.') {
peek += 2;
tk_type = TOKEN_ELLIPSIS;
} else {
tk_type = TOKEN_DOT;
}
break;
case '?':
tk_type = TOKEN_COND; break;
case '\v': case '\r': case '\f':
case ' ': case '\t':
tk_type = TOKEN_BLANK; break;
case '\n':
// you need to flush a newline or blank
lexer->loc.line += 1;
lexer->loc.col = -1;
lexer->loc.len = 1;
tk_type = TOKEN_BLANK;
break;
case '#':
// TODO make line or file comment to change
LEX_WARN("Maroc does not support in lexer rather in preprocessor, it will be ignored");
goto_newline(lexer);
tk_type = TOKEN_BLANK;
goto END;
case '\0':
// EOF
tk_type = TOKEN_EOF;
goto END;
case '\'':
parse_char_literal(lexer, token);
literal = token->val;
tk_type = TOKEN_CHAR_LITERAL;
goto END; break;
case '"':
parse_string_literal(lexer, token);
literal = token->val;
tk_type = TOKEN_STRING_LITERAL;
goto END; break;
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
parse_number(lexer, token);
// TODO Make it easy
literal = token->val;
tk_type = token->sub_type;
goto END; break;
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':case 'Y': case 'Z':
case '_':
// TOKEN_IDENT
if ((*peek == 'L' && *peek == '\'') || (*peek == 'L' && *peek == '"')) {
LEX_ERROR("unsupport wide-character char literal by `L` format");
}
while (1) {
if (peek == lexer->end_ptr) {
LEX_ERROR("unsupport outof 64 length identifier");
}
if ((*peek >= 'a' && *peek <= 'z') || (*peek >= 'A' && *peek <= 'Z') ||
(*peek == '_') || (*peek >= '0' && *peek <= '9')) {
peek++;
continue;
}
break;
}
int strlen = peek - lexer->cur_ptr;
int res = keyword_cmp((const char*)lexer->cur_ptr, strlen);
if (res == -1) {
char prev = lexer->cur_ptr[strlen];
lexer->cur_ptr[strlen] = '\0';
literal.str = strpool_intern(lexer->strpool, lexer->cur_ptr);
lexer->cur_ptr[strlen] = prev;
tk_type = TOKEN_IDENT; break;
} else {
tk_type = keywords[res].tok; break;
}
default:
LEX_ERROR("unsupport char in sourse code `%c`", *(lexer->cur_ptr));
break;
}
lexer->loc.len = peek - lexer->cur_ptr;
lexer->cur_ptr = peek;
END:
lexer->loc.col += lexer->loc.len;
lexer->loc.len = 0;
token->val = literal;
token->sub_type = tk_type;
token->loc = lexer->loc;
static const tok_basic_type_t tok_type_map[] = {
// 普通token使用#str
#define X(str, basic, tok) [tok] = basic,
TOKEN_TABLE
#undef X
// 关键字使用#name
#define X(name, std, tok) [tok] = TK_BASIC_KEYWORD,
KEYWORD_TABLE
#undef X
};
token->type = tok_type_map[tk_type];
LEX_DEBUG("get token `%s` in %s:%d:%d", get_tok_name(tk_type),
token->loc.fname, token->loc.line, token->loc.col);
}
// get_token maybe got invalid (with parser)
void get_valid_token(cc_lexer_t* lexer, tok_t* token) {
tok_basic_type_t type;
do {
get_token(lexer, token);
type = token->type;
Assert(type != TK_BASIC_INVALID);
} while (type == TK_BASIC_WHITESPACE || type == TK_BASIC_COMMENT);
}

View File

@@ -1,76 +0,0 @@
/**
* @file lexer.h
* @brief C语言词法分析器核心数据结构与接口
*/
#ifndef __SMCC_CC_LEXER_H__
#define __SMCC_CC_LEXER_H__
#include <lib/core.h>
#include "token.h"
#ifndef LEXER_MAX_TOKEN_SIZE
#define LEXER_MAX_TOKEN_SIZE 63 ///< 单个token的最大长度限制
#endif
#ifndef LEXER_BUFFER_SIZE
#define LEXER_BUFFER_SIZE 4095 ///< 词法分析缓冲区大小
#endif
/**
* @brief 流读取函数原型
* @param dst_buf 目标缓冲区
* @param elem_size 元素大小
* @param count 元素数量
* @param stream 输入流指针
* @return 实际读取的元素数量
*/
typedef int (*lexer_sread_fn)(void *dst_buf, int elem_size, int count, void *stream);
/**
* @brief 词法分析器核心结构体
*
* 封装词法分析所需的状态信息和缓冲区管理
*/
typedef struct cc_lexer {
loc_t loc; ///< 当前解析位置信息(文件名、行列号等)
char* cur_ptr; ///< 当前扫描指针(指向尚未处理的字符)
char* end_ptr; ///< 缓冲区结束指针(指向最后一个有效字符的下一个位置)
char buffer[LEXER_BUFFER_SIZE+1]; ///< 字符缓冲区包含NUL终止符
lexer_sread_fn sread; ///< 流读取函数指针
void* stream; ///< 输入流对象指针
strpool_t* strpool; ///< 字符串池(用于存储标识符等字符串)
} cc_lexer_t;
/**
* @brief 初始化词法分析器
* @param[out] lexer 要初始化的词法分析器实例
* @param[in] file_name 当前解析的源文件名
* @param[in] stream 输入流对象指针
* @param[in] sread 自定义流读取函数
* @param[in] strpool 字符串池实例
*/
void init_lexer(cc_lexer_t* lexer, const char* file_name, void* stream,
lexer_sread_fn sread, strpool_t* strpool);
/**
* @brief 获取原始token
* @param[in] lexer 词法分析器实例
* @param[out] token 输出token存储位置
*
* 此函数会返回所有类型的token包括空白符等无效token
*/
void get_token(cc_lexer_t* lexer, tok_t* token);
/**
* @brief 获取有效token
* @param[in] lexer 词法分析器实例
* @param[out] token 输出token存储位置
*
* 此函数会自动跳过空白符等无效token返回对语法分析有意义的token
*/
void get_valid_token(cc_lexer_t* lexer, tok_t* token);
#endif

View File

@@ -1,46 +0,0 @@
#ifndef __SMCC_LEXER_LOG_H__
#define __SMCC_LEXER_LOG_H__
#include <lib/rt/rt.h>
#ifndef LEX_LOG_LEVEL
#define LEX_LOG_LEVEL 4
#endif
#if LEX_LOG_LEVEL <= 1
#define LEX_NOTSET( fmt, ...) LOG_NOTSET("LEXER: " fmt, ##__VA_ARGS__)
#else
#define LEX_NOTSET( fmt, ...)
#endif
#if LEX_LOG_LEVEL <= 2
#define LEX_DEBUG( fmt, ...) LOG_DEBUG( "LEXER: " fmt, ##__VA_ARGS__)
#else
#define LEX_DEBUG( fmt, ...)
#endif
#if LEX_LOG_LEVEL <= 3
#define LEX_INFO( fmt, ...) LOG_INFO( "LEXER: " fmt, ##__VA_ARGS__)
#else
#define LEX_INFO( fmt, ...)
#endif
#if LEX_LOG_LEVEL <= 4
#define LEX_WARN( fmt, ...) LOG_WARN( "LEXER: " fmt, ##__VA_ARGS__)
#else
#define LEX_WARN( fmt, ...)
#endif
#if LEX_LOG_LEVEL <= 5
#define LEX_ERROR( fmt, ...) LOG_ERROR("LEXER: " fmt, ##__VA_ARGS__)
#else
#define LEX_ERROR( fmt, ...)
#endif
#if LEX_LOG_LEVEL <= 6
#define LEX_FATAL( fmt, ...) LOG_FATAL("LEXER: " fmt, ##__VA_ARGS__)
#else
#define LEX_FATAL( fmt, ...)
#endif
#endif // __SMCC_LEXER_LOG_H__

View File

@@ -1,86 +0,0 @@
#include <lib/core.h>
#include "lexer_log.h"
#include "token.h"
#define ROUND_IDX(idx) ((idx) % tokbuf->cap)
tok_t* pop_tok(tok_stream_t* tokbuf) {
if (tokbuf->size == 0) {
LEX_ERROR("no token to pop");
return NULL;
}
int idx = tokbuf->cur;
tokbuf->cur = ROUND_IDX(idx + 1);
tokbuf->size -= 1;
return tokbuf->buf + idx;
}
void flush_peek_tok(tok_stream_t* tokbuf) {
tokbuf->peek = tokbuf->cur;
}
void init_tokbuf(tok_stream_t *tokbuf, void *stream, tok_stream_get_func gettok) {
tokbuf->cur = 0;
tokbuf->end = 0;
tokbuf->peek = 0;
tokbuf->size = 0;
tokbuf->stream = stream;
tokbuf->gettok = gettok;
tokbuf->buf = NULL;
tokbuf->cap = 0;
}
tok_t *peek_tok(tok_stream_t *tokbuf) {
Assert(tokbuf->size <= tokbuf->cap);
int idx = tokbuf->peek;
tokbuf->peek = ROUND_IDX(idx + 1);
if (idx == tokbuf->end) {
if (tokbuf->size == tokbuf->cap) {
LEX_ERROR("peek_tok buffer overflow");
return NULL;
}
if (tokbuf->gettok == NULL) {
LEX_ERROR("peek_tok can not got tok");
return NULL;
}
tokbuf->gettok(tokbuf->stream, &(tokbuf->buf[idx]));
tokbuf->size++;
tokbuf->end = tokbuf->peek;
}
return &(tokbuf->buf[idx]);
}
cc_tktype_t peek_tok_type(tok_stream_t* tokbuf) {
return peek_tok(tokbuf)->sub_type;
}
int expect_pop_tok(tok_stream_t* tokbuf, cc_tktype_t type) {
flush_peek_tok(tokbuf);
tok_t* tok = peek_tok(tokbuf);
if (tok->sub_type != type) {
LEX_ERROR("expected tok `%s` but got `%s`", get_tok_name(type), get_tok_name(tok->type));
return 0;
} else {
pop_tok(tokbuf);
}
return 0;
}
// 生成字符串映射(根据需求选择#str或#name
static const char* token_strings[] = {
// 普通token使用#str
#define X(str, basic, tok) [tok] = #str,
TOKEN_TABLE
#undef X
// 关键字使用#name
#define X(name, std, tok) [tok] = #name,
KEYWORD_TABLE
#undef X
};
const char* get_tok_name(cc_tktype_t type) {
return token_strings[type];
}

View File

@@ -1,142 +0,0 @@
#ifndef __SMCC_CC_TOKEN_H__
#define __SMCC_CC_TOKEN_H__
#include <lib/utils/utils.h>
enum CSTD_KEYWORD {
CSTD_C89,
CSTD_C99,
CEXT_ASM,
};
// Using Binary Search To Fast Find Keyword
#define KEYWORD_TABLE \
X(asm , CEXT_ASM, TOKEN_ASM) \
X(break , CSTD_C89, TOKEN_BREAK) \
X(case , CSTD_C89, TOKEN_CASE) \
X(char , CSTD_C89, TOKEN_CHAR) \
X(const , CSTD_C89, TOKEN_CONST) \
X(continue , CSTD_C89, TOKEN_CONTINUE) \
X(default , CSTD_C89, TOKEN_DEFAULT) \
X(do , CSTD_C89, TOKEN_DO) \
X(double , CSTD_C89, TOKEN_DOUBLE) \
X(else , CSTD_C89, TOKEN_ELSE) \
X(enum , CSTD_C89, TOKEN_ENUM) \
X(extern , CSTD_C89, TOKEN_EXTERN) \
X(float , CSTD_C89, TOKEN_FLOAT) \
X(for , CSTD_C89, TOKEN_FOR) \
X(goto , CSTD_C89, TOKEN_GOTO) \
X(if , CSTD_C89, TOKEN_IF) \
X(inline , CSTD_C99, TOKEN_INLINE) \
X(int , CSTD_C89, TOKEN_INT) \
X(long , CSTD_C89, TOKEN_LONG) \
X(register , CSTD_C89, TOKEN_REGISTER) \
X(restrict , CSTD_C99, TOKEN_RESTRICT) \
X(return , CSTD_C89, TOKEN_RETURN) \
X(short , CSTD_C89, TOKEN_SHORT) \
X(signed , CSTD_C89, TOKEN_SIGNED) \
X(sizeof , CSTD_C89, TOKEN_SIZEOF) \
X(static , CSTD_C89, TOKEN_STATIC) \
X(struct , CSTD_C89, TOKEN_STRUCT) \
X(switch , CSTD_C89, TOKEN_SWITCH) \
X(typedef , CSTD_C89, TOKEN_TYPEDEF) \
X(union , CSTD_C89, TOKEN_UNION) \
X(unsigned , CSTD_C89, TOKEN_UNSIGNED) \
X(void , CSTD_C89, TOKEN_VOID) \
X(volatile , CSTD_C89, TOKEN_VOLATILE) \
X(while , CSTD_C89, TOKEN_WHILE) \
// KEYWORD_TABLE
#define TOKEN_TABLE \
X(init , TK_BASIC_INVALID, TOKEN_INIT) \
X(EOF , TK_BASIC_EOF, TOKEN_EOF) \
X(blank , TK_BASIC_WHITESPACE, TOKEN_BLANK) \
X("==" , TK_BASIC_OPERATOR, TOKEN_EQ) \
X("=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN) \
X("++" , TK_BASIC_OPERATOR, TOKEN_ADD_ADD) \
X("+=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_ADD) \
X("+" , TK_BASIC_OPERATOR, TOKEN_ADD) \
X("--" , TK_BASIC_OPERATOR, TOKEN_SUB_SUB) \
X("-=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_SUB) \
X("->" , TK_BASIC_OPERATOR, TOKEN_DEREF) \
X("-" , TK_BASIC_OPERATOR, TOKEN_SUB) \
X("*=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_MUL) \
X("*" , TK_BASIC_OPERATOR, TOKEN_MUL) \
X("/=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_DIV) \
X("/" , TK_BASIC_OPERATOR, TOKEN_DIV) \
X("//" , TK_BASIC_COMMENT , TOKEN_LINE_COMMENT) \
X("/* */" , TK_BASIC_COMMENT , TOKEN_BLOCK_COMMENT) \
X("%=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_MOD) \
X("%" , TK_BASIC_OPERATOR, TOKEN_MOD) \
X("&&" , TK_BASIC_OPERATOR, TOKEN_AND_AND) \
X("&=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_AND) \
X("&" , TK_BASIC_OPERATOR, TOKEN_AND) \
X("||" , TK_BASIC_OPERATOR, TOKEN_OR_OR) \
X("|=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_OR) \
X("|" , TK_BASIC_OPERATOR, TOKEN_OR) \
X("^=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_XOR) \
X("^" , TK_BASIC_OPERATOR, TOKEN_XOR) \
X("<<=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_L_SH) \
X("<<" , TK_BASIC_OPERATOR, TOKEN_L_SH) \
X("<=" , TK_BASIC_OPERATOR, TOKEN_LE) \
X("<" , TK_BASIC_OPERATOR, TOKEN_LT) \
X(">>=" , TK_BASIC_OPERATOR, TOKEN_ASSIGN_R_SH) \
X(">>" , TK_BASIC_OPERATOR, TOKEN_R_SH) \
X(">=" , TK_BASIC_OPERATOR, TOKEN_GE) \
X(">" , TK_BASIC_OPERATOR, TOKEN_GT) \
X("!" , TK_BASIC_OPERATOR, TOKEN_NOT) \
X("!=" , TK_BASIC_OPERATOR, TOKEN_NEQ) \
X("~" , TK_BASIC_OPERATOR, TOKEN_BIT_NOT) \
X("[" , TK_BASIC_OPERATOR, TOKEN_L_BRACKET) \
X("]" , TK_BASIC_OPERATOR, TOKEN_R_BRACKET) \
X("(" , TK_BASIC_OPERATOR, TOKEN_L_PAREN) \
X(")" , TK_BASIC_OPERATOR, TOKEN_R_PAREN) \
X("{" , TK_BASIC_OPERATOR, TOKEN_L_BRACE) \
X("}" , TK_BASIC_OPERATOR, TOKEN_R_BRACE) \
X(";" , TK_BASIC_OPERATOR, TOKEN_SEMICOLON) \
X("," , TK_BASIC_OPERATOR, TOKEN_COMMA) \
X(":" , TK_BASIC_OPERATOR, TOKEN_COLON) \
X("." , TK_BASIC_OPERATOR, TOKEN_DOT) \
X("..." , TK_BASIC_OPERATOR, TOKEN_ELLIPSIS) \
X("?" , TK_BASIC_OPERATOR, TOKEN_COND) \
X(ident , TK_BASIC_IDENTIFIER, TOKEN_IDENT) \
X(int_literal , TK_BASIC_LITERAL, TOKEN_INT_LITERAL) \
X(float_literal , TK_BASIC_LITERAL, TOKEN_FLOAT_LITERAL) \
X(char_literal , TK_BASIC_LITERAL, TOKEN_CHAR_LITERAL) \
X(string_literal , TK_BASIC_LITERAL, TOKEN_STRING_LITERAL) \
// END
// 定义TokenType枚举
typedef enum cc_tktype {
// 处理普通token
#define X(str, basic, tok) tok,
TOKEN_TABLE
#undef X
// 处理关键字(保持原有格式)
#define X(name, std, tok) tok,
KEYWORD_TABLE
#undef X
} cc_tktype_t;
typedef struct tok_stream {
int cur;
int end;
int peek;
int size;
int cap;
tok_t* buf;
void* stream;
void (*gettok)(void* stream, tok_t* token);
} tok_stream_t;
typedef void(*tok_stream_get_func)(void* stream, tok_t* token);
void init_tokbuf(tok_stream_t* tokbuf, void* stream, tok_stream_get_func gettok);
tok_t* peek_tok(tok_stream_t* tokbuf);
tok_t* pop_tok(tok_stream_t* tokbuf);
void flush_peek_tok(tok_stream_t* tokbuf);
cc_tktype_t peek_tok_type(tok_stream_t* tokbuf);
int expect_pop_tok(tok_stream_t* tokbuf, cc_tktype_t type);
const char* get_tok_name(cc_tktype_t type);
#endif

View File

@@ -1,172 +0,0 @@
#include "ast.h"
ast_node_t* new_ast_node(void) {
ast_node_t* node = rt._malloc(sizeof(ast_node_t));
init_ast_node(node);
return node;
}
void init_ast_node(ast_node_t* node) {
node->type = NT_INIT;
for (int i = 0; i < sizeof(node->children) / sizeof(node->children[0]); i++) {
node->children[i] = NULL;
}
}
// ast_node_t* find_ast_node(ast_node_t* node, ast_type_t type) {
// }
#include <stdio.h>
static void pnt_depth(int depth) {
for (int i = 0; i < depth; i++) {
printf(" ");
}
}
// void pnt_ast(ast_node_t* node, int depth) {
// if (!node) return;
// pnt_depth(depth);
// switch (node->type) {
// case NT_ROOT:
// for (int i = 0; i < node->root.child_size; i++) {
// pnt_ast(node->root.children[i], depth);
// }
// return;
// case NT_ADD : printf("+ \n"); break; // (expr) + (expr)
// case NT_SUB : printf("- \n"); break; // (expr) - (expr)
// case NT_MUL : printf("* \n"); break; // (expr) * (expr)
// case NT_DIV : printf("/ \n"); break; // (expr) / (expr)
// case NT_MOD : printf("%%\n"); break; // (expr) % (expr)
// case NT_AND : printf("& \n"); break; // (expr) & (expr)
// case NT_OR : printf("| \n"); break; // (expr) | (expr)
// case NT_XOR : printf("^ \n"); break; // (expr) ^ (expr)
// case NT_L_SH : printf("<<\n"); break; // (expr) << (expr)
// case NT_R_SH : printf(">>\n"); break; // (expr) >> (expr)
// case NT_EQ : printf("==\n"); break; // (expr) == (expr)
// case NT_NEQ : printf("!=\n"); break; // (expr) != (expr)
// case NT_LE : printf("<=\n"); break; // (expr) <= (expr)
// case NT_GE : printf(">=\n"); break; // (expr) >= (expr)
// case NT_LT : printf("< \n"); break; // (expr) < (expr)
// case NT_GT : printf("> \n"); break; // (expr) > (expr)
// case NT_AND_AND : printf("&&\n"); break; // (expr) && (expr)
// case NT_OR_OR : printf("||\n"); break; // (expr) || (expr)
// case NT_NOT : printf("! \n"); break; // ! (expr)
// case NT_BIT_NOT : printf("~ \n"); break; // ~ (expr)
// case NT_COMMA : printf(", \n"); break; // expr, expr 逗号运算符
// case NT_ASSIGN : printf("= \n"); break; // (expr) = (expr)
// // case NT_COND : // (expr) ? (expr) : (expr)
// case NT_STMT_EMPTY : // ;
// printf(";\n");
// break;
// case NT_STMT_IF : // if (cond) { ... } [else {...}]
// printf("if");
// pnt_ast(node->if_stmt.cond, depth+1);
// pnt_ast(node->if_stmt.if_stmt, depth+1);
// if (node->if_stmt.else_stmt) {
// pnt_depth(depth);
// printf("else");
// pnt_ast(node->if_stmt.else_stmt, depth+1);
// }
// break;
// case NT_STMT_WHILE : // while (cond) { ... }
// printf("while\n");
// pnt_ast(node->while_stmt.cond, depth+1);
// pnt_ast(node->while_stmt.body, depth+1);
// break;
// case NT_STMT_DOWHILE : // do {...} while (cond)
// printf("do-while\n");
// pnt_ast(node->do_while_stmt.body, depth+1);
// pnt_ast(node->do_while_stmt.cond, depth+1);
// break;
// case NT_STMT_FOR : // for (init; cond; iter) {...}
// printf("for\n");
// if (node->for_stmt.init)
// pnt_ast(node->for_stmt.init, depth+1);
// if (node->for_stmt.cond)
// pnt_ast(node->for_stmt.cond, depth+1);
// if (node->for_stmt.iter)
// pnt_ast(node->for_stmt.iter, depth+1);
// pnt_ast(node->for_stmt.body, depth+1);
// break;
// case NT_STMT_SWITCH : // switch (expr) { case ... }
// case NT_STMT_BREAK : // break;
// case NT_STMT_CONTINUE : // continue;
// case NT_STMT_GOTO : // goto label;
// case NT_STMT_CASE : // case const_expr:
// case NT_STMT_DEFAULT : // default:
// case NT_STMT_LABEL : // label:
// break;
// case NT_STMT_BLOCK : // { ... }
// printf("{\n");
// for (int i = 0; i < node->block.child_size; i++) {
// pnt_ast(node->block.children[i], depth+1);
// }
// pnt_depth(depth);
// printf("}\n");
// break;
// case NT_STMT_RETURN : // return expr;
// printf("return");
// if (node->return_stmt.expr_stmt) {
// printf(" ");
// pnt_ast(node->return_stmt.expr_stmt, depth+1);
// } else {
// printf("\n");
// }
// break;
// case NT_STMT_EXPR : // expr;
// printf("stmt\n");
// pnt_ast(node->expr_stmt.expr_stmt, depth);
// pnt_depth(depth);
// printf(";\n");
// break;
// case NT_DECL_VAR : // type name; or type name = expr;
// printf("decl_val\n");
// break;
// case NT_DECL_FUNC: // type func_name(param_list);
// printf("decl func %s\n", node->func.name->syms.tok.val.str);
// break;
// case NT_FUNC : // type func_name(param_list) {...}
// printf("def func %s\n", node->func.name->syms.tok.val.str);
// // pnt_ast(node->child.func.params, depth);
// pnt_ast(node->func.body, depth);
// // pnt_ast(node->child.func.ret, depth);
// break;
// case NT_PARAM : // 函数形参
// printf("param\n");
// case NT_ARG_LIST : // 实参列表需要与NT_CALL配合
// printf("arg_list\n");
// case NT_TERM_CALL : // func (expr)
// printf("call\n");
// break;
// case NT_TERM_IDENT:
// printf("%s\n", node->syms.tok.val.str);
// break;
// case NT_TERM_VAL : // Terminal Symbols like constant, identifier, keyword
// tok_t * tok = &node->syms.tok;
// switch (tok->type) {
// case TOKEN_CHAR_LITERAL:
// printf("%c\n", tok->val.ch);
// break;
// case TOKEN_INT_LITERAL:
// printf("%d\n", tok->val.i);
// break;
// case TOKEN_STRING_LITERAL:
// printf("%s\n", tok->val.str);
// break;
// default:
// printf("unknown term val\n");
// break;
// }
// default:
// break;
// }
// // 通用子节点递归处理
// if (node->type <= NT_ASSIGN) { // 表达式类统一处理子节点
// if (node->expr.left) pnt_ast(node->expr.left, depth+1);
// if (node->expr.right) pnt_ast(node->expr.right, depth + 1);
// }
// }

View File

@@ -1,190 +0,0 @@
#ifndef __AST_H__
#define __AST_H__
#include "../lexer/lexer.h"
#include <lib/utils/ds/vector.h>
#include "symtab/symtab.h"
#include "type.h"
typedef enum {
NT_INIT,
NT_ROOT, // global scope in root node
NT_ADD, // (expr) + (expr)
NT_SUB, // (expr) - (expr)
NT_MUL, // (expr) * (expr)
NT_DIV, // (expr) / (expr)
NT_MOD, // (expr) % (expr)
NT_AND, // (expr) & (expr)
NT_OR, // (expr) | (expr)
NT_XOR, // (expr) ^ (expr)
NT_L_SH, // (expr) << (expr)
NT_R_SH, // (expr) >> (expr)
NT_EQ, // (expr) == (expr)
NT_NEQ, // (expr) != (expr)
NT_LE, // (expr) <= (expr)
NT_GE, // (expr) >= (expr)
NT_LT, // (expr) < (expr)
NT_GT, // (expr) > (expr)
NT_AND_AND, // (expr) && (expr)
NT_OR_OR, // (expr) || (expr)
NT_NOT, // ! (expr)
NT_BIT_NOT, // ~ (expr)
NT_COND, // (expr) ? (expr) : (expr)
NT_COMMA, // expr, expr 逗号运算符
NT_ASSIGN, // (expr) = (expr)
NT_ADDRESS, // &expr (取地址)
NT_DEREF, // *expr (解引用)
NT_INDEX, // arr[index] (数组访问)
NT_MEMBER, // struct.member
NT_PTR_MEMBER,// ptr->member
NT_CAST, // (type)expr 强制类型转换
NT_SIZEOF, // sizeof(type|expr)
// NT_ALIGNOF, // _Alignof(type) (C11)
NT_STMT_EMPTY, // ;
NT_STMT_IF, // if (cond) { ... } [else {...}]
NT_STMT_WHILE, // while (cond) { ... }
NT_STMT_DOWHILE, // do {...} while (cond)
NT_STMT_FOR, // for (init; cond; iter) {...}
NT_STMT_SWITCH, // switch (expr) { case ... }
NT_STMT_BREAK, // break;
NT_STMT_CONTINUE, // continue;
NT_STMT_GOTO, // goto label;
NT_STMT_CASE, // case const_expr:
NT_STMT_DEFAULT, // default:
NT_STMT_LABEL, // label:
NT_STMT_BLOCK, // { ... }
NT_STMT_RETURN, // return expr;
NT_STMT_EXPR, // expr;
NT_BLOCK,
// NT_TYPE_BASE, // 基础类型节点
// NT_TYPE_PTR, // 指针类型
// NT_TYPE_ARRAY, // 数组类型
// NT_TYPE_FUNC, // 函数类型
// NT_TYPE_QUAL, // 限定符节点
NT_DECL_VAR, // type name; or type name = expr;
NT_DECL_FUNC, // type func_name(param_list);
NT_FUNC, // type func_name(param_list) {...}
NT_PARAM, // 函数形参
NT_ARG_LIST, // 实参列表需要与NT_CALL配合
NT_TERM_CALL, // func (expr)
NT_TERM_VAL,
NT_TERM_IDENT,
NT_TERM_TYPE,
} ast_type_t;
typedef struct ast_node {
ast_type_t type;
union {
void *children[6];
struct {
VECTOR_HEADER(children, struct ast_node *);
} root;
struct {
VECTOR_HEADER(children, struct ast_node *);
} block;
struct {
symtab_key_t key;
struct ast_node * decl_node;
tok_t tok;
} syms;
struct {
VECTOR_HEADER(params, struct ast_node *);
} params;
struct {
struct ast_node * name;
struct ast_node * params;
struct ast_node * func_decl;
} call;
struct {
struct ast_node *type;
struct ast_node *name;
struct ast_node *expr_stmt; // optional
void* data;
} decl_val;
struct {
struct ast_node *ret;
struct ast_node *name;
struct ast_node *params; // array of params
struct ast_node *def;
} decl_func;
struct {
struct ast_node *decl;
struct ast_node *body; // optional
void* data;
} func;
struct {
struct ast_node *left;
struct ast_node *right;
struct ast_node *optional; // optional
} expr;
struct {
struct ast_node *cond;
struct ast_node *if_stmt;
struct ast_node *else_stmt; // optional
} if_stmt;
struct {
struct ast_node *cond;
struct ast_node *body;
} switch_stmt;
struct {
struct ast_node *cond;
struct ast_node *body;
} while_stmt;
struct {
struct ast_node *body;
struct ast_node *cond;
} do_while_stmt;
struct {
struct ast_node *init;
struct ast_node *cond; // optional
struct ast_node *iter; // optional
struct ast_node *body;
} for_stmt;
struct {
struct ast_node *expr_stmt; // optional
} return_stmt;
struct {
struct ast_node *label;
} goto_stmt;
struct {
struct ast_node *label;
} label_stmt;
struct {
struct ast_node *block;
} block_stmt;
struct {
struct ast_node *expr_stmt;
} expr_stmt;
};
} ast_node_t;
ast_node_t* new_ast_node(void);
void init_ast_node(ast_node_t* node);
void pnt_ast(ast_node_t* node, int depth);
typedef struct parser parser_t;
typedef ast_node_t* (*parse_func_t) (parser_t*);
void parse_prog(parser_t* parser);
ast_node_t* parse_decl(parser_t* parser);
ast_node_t* parse_decl_val(parser_t* parser);
ast_node_t* parse_block(parser_t* parser);
ast_node_t* parse_stmt(parser_t* parser);
ast_node_t* parse_expr(parser_t* parser);
ast_node_t* parse_type(parser_t* parser);
ast_node_t* new_ast_ident_node(tok_t* tok);
ast_node_t* expect_pop_ident(tok_stream_t* tokbuf);
int peek_decl(tok_stream_t* tokbuf);
#endif

View File

@@ -1,18 +0,0 @@
- ast.c 作为抽象语法树的定义
- block.c 作为块的实现主要用于处理作用域,需要符号表
- decl.c 作为声明的实现,其中主要携带变量声明,函数声明见 func.c ,需要符号表
- func.c 作为函数的实现,其中主要携带函数声明,以及函数定义,需要符号表
- expr.c 作为表达式的实现。需要符号表
- stmt.c 作为语句的实现。需要表达式类型判断合法性
- term.c 作为终结符的实现。需要表达式类型判断合法性
- program.c 作为词法分析语义分析入口函数可以根据parser结构生成AST
其中stmt参考cppreference
其中expr参考AI以及CParser

View File

@@ -1,49 +0,0 @@
#include "../ast.h"
#include "../parser.h"
#ifndef BLOCK_MAX_NODE
#define BLOCK_MAX_NODE (1024)
#endif
ast_node_t* new_ast_node_block() {
ast_node_t* node = new_ast_node();
node->type = NT_BLOCK;
vector_init(node->block.children);
return node;
}
ast_node_t* parse_block(parser_t* parser) {
symtab_enter_scope(parser->symtab);
tok_stream_t *tokbuf = &parser->tokbuf;
flush_peek_tok(tokbuf);
cc_tktype_t ttype;
ast_node_t* node = new_ast_node_block();
expect_pop_tok(tokbuf, TOKEN_L_BRACE);
ast_node_t* child = NULL;
while (1) {
if (peek_decl(tokbuf)) {
child = parse_decl(parser);
vector_push(node->block.children, child);
continue;
}
flush_peek_tok(tokbuf);
ttype = peek_tok_type(tokbuf);
switch (ttype) {
case TOKEN_R_BRACE: {
pop_tok(tokbuf);
goto END;
}
default: {
child = parse_stmt(parser);
vector_push(node->block.children, child);
break;
}
}
}
END:
symtab_leave_scope(parser->symtab);
return node;
}

View File

@@ -1,98 +0,0 @@
#include "../ast.h"
#include "../parser.h"
/**
* 0 false
* 1 true
*/
int peek_decl(tok_stream_t* tokbuf) {
flush_peek_tok(tokbuf);
switch (peek_tok_type(tokbuf)) {
case TOKEN_STATIC:
case TOKEN_EXTERN:
case TOKEN_REGISTER:
case TOKEN_TYPEDEF:
LOG_ERROR("not impliment");
break;
default:
flush_peek_tok(tokbuf);
}
switch (peek_tok_type(tokbuf)) {
case TOKEN_VOID:
case TOKEN_CHAR:
case TOKEN_SHORT:
case TOKEN_INT:
case TOKEN_LONG:
case TOKEN_FLOAT:
case TOKEN_DOUBLE:
// FIXME Ptr
return 1;
default:
flush_peek_tok(tokbuf);
}
return 0;
}
ast_node_t* parse_decl_val(parser_t* parser) {
tok_stream_t* tokbuf = &parser->tokbuf;
cc_tktype_t ttype;
flush_peek_tok(tokbuf);
ast_node_t* node;
ast_node_t* type_node = parse_type(parser);
flush_peek_tok(tokbuf);
ast_node_t* name_node = new_ast_ident_node(peek_tok(tokbuf));
node = new_ast_node();
node->decl_val.type = type_node;
node->decl_val.name = name_node;
node->type = NT_DECL_VAR;
type_node->syms.key.uid = parser->symtab->cur_scope->uid;
type_node->syms.key.strp_name = name_node->syms.tok.val.str;
symtab_add(parser->symtab, &type_node->syms.key, node);
ttype = peek_tok_type(tokbuf);
if (ttype == TOKEN_ASSIGN) {
node->decl_val.expr_stmt = parse_stmt(parser);
if (node->decl_val.expr_stmt->type != NT_STMT_EXPR) {
LOG_ERROR("parser_decl_val want stmt_expr");
}
} else if (ttype == TOKEN_SEMICOLON) {
pop_tok(tokbuf);
expect_pop_tok(tokbuf, TOKEN_SEMICOLON);
} else {
LOG_ERROR("parser_decl_val syntax error");
}
return node;
}
ast_node_t* parse_decl(parser_t* parser) {
tok_stream_t* tokbuf = &parser->tokbuf;
flush_peek_tok(tokbuf);
cc_tktype_t ttype;
ast_node_t* node;
if (peek_decl(tokbuf) == 0) {
LOG_ERROR("syntax error expect decl_val TYPE");
}
if (peek_tok_type(tokbuf) != TOKEN_IDENT) {
LOG_ERROR("syntax error expect decl_val IDENT");
}
ttype = peek_tok_type(tokbuf);
switch (ttype) {
case TOKEN_L_PAREN: // (
return NULL;
break;
case TOKEN_ASSIGN:
case TOKEN_SEMICOLON:
node = parse_decl_val(parser);
break;
default:
LOG_ERROR("syntax error expect decl_val ASSIGN or SEMICOLON");
return NULL;
}
return node;
}

View File

@@ -1,425 +0,0 @@
#include "../ast.h"
#include "../parser.h"
// Copy from `CParse`
/**
* Operator precedence classes
*/
enum Precedence {
PREC_BOTTOM,
PREC_EXPRESSION, /* , left to right */
PREC_ASSIGNMENT, /* = += -= *= /= %= <<= >>= &= ^= |= right to left */
PREC_CONDITIONAL, /* ?: right to left */
PREC_LOGICAL_OR, /* || left to right */
PREC_LOGICAL_AND, /* && left to right */
PREC_OR, /* | left to right */
PREC_XOR, /* ^ left to right */
PREC_AND, /* & left to right */
PREC_EQUALITY, /* == != left to right */
PREC_RELATIONAL, /* < <= > >= left to right */
PREC_SHIFT, /* << >> left to right */
PREC_ADDITIVE, /* + - left to right */
PREC_MULTIPLICATIVE, /* * / % left to right */
PREC_CAST, /* (type) right to left */
PREC_UNARY, /* ! ~ ++ -- + - * & sizeof right to left */
PREC_POSTFIX, /* () [] -> . left to right */
PREC_PRIMARY,
PREC_TOP
};
enum ParseType {
INFIX_PARSER,
PREFIX_PARSER,
};
static ast_node_t *parse_subexpression(tok_stream_t* tokbuf, symtab_t *symtab, enum Precedence prec);
#define NEXT(prec) parse_subexpression(tokbuf, symtab, prec)
static ast_node_t* gen_node2(ast_node_t* left, ast_node_t* right,
ast_type_t type) {
ast_node_t* node = new_ast_node();
node->type = type;
node->expr.left = left;
node->expr.right = right;
return node;
// FIXME
// switch (type) {
// case NT_ADD : printf("+ \n"); break; // (expr) + (expr)
// case NT_SUB : printf("- \n"); break; // (expr) - (expr)
// case NT_MUL : printf("* \n"); break; // (expr) * (expr)
// case NT_DIV : printf("/ \n"); break; // (expr) / (expr)
// case NT_MOD : printf("%%\n"); break; // (expr) % (expr)
// case NT_AND : printf("& \n"); break; // (expr) & (expr)
// case NT_OR : printf("| \n"); break; // (expr) | (expr)
// case NT_XOR : printf("^ \n"); break; // (expr) ^ (expr)
// case NT_L_SH : printf("<<\n"); break; // (expr) << (expr)
// case NT_R_SH : printf(">>\n"); break; // (expr) >> (expr)
// case NT_EQ : printf("==\n"); break; // (expr) == (expr)
// case NT_NEQ : printf("!=\n"); break; // (expr) != (expr)
// case NT_LE : printf("<=\n"); break; // (expr) <= (expr)
// case NT_GE : printf(">=\n"); break; // (expr) >= (expr)
// case NT_LT : printf("< \n"); break; // (expr) < (expr)
// case NT_GT : printf("> \n"); break; // (expr) > (expr)
// case NT_AND_AND : printf("&&\n"); break; // (expr) && (expr)
// case NT_OR_OR : printf("||\n"); break; // (expr) || (expr)
// case NT_NOT : printf("! \n"); break; // ! (expr)
// case NT_BIT_NOT : printf("~ \n"); break; // ~ (expr)
// case NT_COMMA : printf(", \n"); break; // expr, expr 逗号运算符
// case NT_ASSIGN : printf("= \n"); break; // (expr) = (expr)
// // case NT_COND : // (expr) ? (expr) : (expr)
// }
}
static ast_node_t* parse_comma(tok_stream_t* tokbuf, symtab_t *symtab, ast_node_t* left) {
ast_node_t* node = new_ast_node();
node->type = NT_COMMA;
node->expr.left = left;
node->expr.right = NEXT(PREC_EXPRESSION);
return node;
}
static ast_node_t* parse_assign(tok_stream_t* tokbuf, symtab_t *symtab, ast_node_t* left) {
flush_peek_tok(tokbuf);
cc_tktype_t ttype = peek_tok_type(tokbuf);
pop_tok(tokbuf);
ast_node_t* node = new_ast_node();
node->type = NT_ASSIGN;
// saved left
node->expr.left = left;
enum Precedence next = PREC_ASSIGNMENT + 1;
switch (ttype) {
case TOKEN_ASSIGN :
left = NEXT(next);
break;
case TOKEN_ASSIGN_ADD :
left = gen_node2(left, NEXT(next), NT_ADD);
break;
case TOKEN_ASSIGN_SUB :
left = gen_node2(left, NEXT(next), NT_SUB);
break;
case TOKEN_ASSIGN_MUL :
left = gen_node2(left, NEXT(next), NT_MUL);
break;
case TOKEN_ASSIGN_DIV :
left = gen_node2(left, NEXT(next), NT_DIV);
break;
case TOKEN_ASSIGN_MOD :
left = gen_node2(left, NEXT(next), NT_MOD);
break;
case TOKEN_ASSIGN_L_SH :
left = gen_node2(left, NEXT(next), NT_L_SH);
break;
case TOKEN_ASSIGN_R_SH :
left = gen_node2(left, NEXT(next), NT_R_SH);
break;
case TOKEN_ASSIGN_AND :
left = gen_node2(left, NEXT(next), NT_AND);
break;
case TOKEN_ASSIGN_OR :
left = gen_node2(left, NEXT(next), NT_OR);
break;
case TOKEN_ASSIGN_XOR :
left = gen_node2(left, NEXT(next), NT_XOR);
break;
default:
LOG_ERROR("unsupported operator");
break;
}
node->expr.right = left;
return node;
}
static ast_node_t* parse_cmp(tok_stream_t* tokbuf, symtab_t *symtab, ast_node_t* left) {
flush_peek_tok(tokbuf);
cc_tktype_t ttype = peek_tok_type(tokbuf);
pop_tok(tokbuf);
ast_node_t* node = new_ast_node();
// saved left
node->expr.left = left;
switch (ttype) {
case TOKEN_EQ:
node->type = NT_EQ;
node->expr.right = NEXT(PREC_EQUALITY);
break;
case TOKEN_NEQ:
node->type = NT_NEQ;
node->expr.right = NEXT(PREC_EQUALITY);
break;
case TOKEN_LT:
node->type = NT_LT;
node->expr.right = NEXT(PREC_RELATIONAL);
break;
case TOKEN_GT:
node->type = NT_GT;
node->expr.right = NEXT(PREC_RELATIONAL);
break;
case TOKEN_LE:
node->type = NT_LE;
node->expr.right = NEXT(PREC_RELATIONAL);
break;
case TOKEN_GE:
node->type = NT_GE;
node->expr.right = NEXT(PREC_RELATIONAL);
break;
default:
LOG_ERROR("invalid operator");
}
return node;
}
static ast_node_t* parse_cal(tok_stream_t* tokbuf, symtab_t *symtab, ast_node_t* left) {
flush_peek_tok(tokbuf);
cc_tktype_t ttype = peek_tok_type(tokbuf);
pop_tok(tokbuf);
ast_node_t* node = new_ast_node();
node->expr.left = left;
switch (ttype) {
case TOKEN_OR_OR:
node->type = NT_OR_OR;
node->expr.right = NEXT(PREC_LOGICAL_OR);
break;
case TOKEN_AND_AND:
node->type = NT_AND_AND;
node->expr.right = NEXT(PREC_LOGICAL_AND);
break;
case TOKEN_OR:
node->type = NT_OR;
node->expr.right = NEXT(PREC_OR);
break;
case TOKEN_XOR:
node->type = NT_XOR;
node->expr.right = NEXT(PREC_XOR);
break;
case TOKEN_AND:
node->type = NT_AND;
node->expr.right = NEXT(PREC_AND);
break;
case TOKEN_L_SH:
node->type = NT_L_SH;
node->expr.right = NEXT(PREC_SHIFT);
break;
case TOKEN_R_SH:
node->type = NT_R_SH;
node->expr.right = NEXT(PREC_SHIFT);
break;
case TOKEN_ADD:
node->type = NT_ADD;
node->expr.right = NEXT(PREC_ADDITIVE);
break;
case TOKEN_SUB:
node->type = NT_SUB;
node->expr.right = NEXT(PREC_ADDITIVE);
break;
case TOKEN_MUL:
node->type = NT_MUL;
node->expr.right = NEXT(PREC_MULTIPLICATIVE);
break;
case TOKEN_DIV:
node->type = NT_DIV;
node->expr.right = NEXT(PREC_MULTIPLICATIVE);
break;
case TOKEN_MOD:
node->type = NT_MOD;
node->expr.right = NEXT(PREC_MULTIPLICATIVE);
break;
default:
break;
}
return node;
}
static ast_node_t* parse_call(tok_stream_t* tokbuf, symtab_t *symtab, ast_node_t* ident) {
ast_node_t* node = new_ast_node();
node->type = NT_TERM_CALL;
node->call.name = ident;
node->call.params = new_ast_node();
vector_init(node->call.params->params.params);
pop_tok(tokbuf); // 跳过 '('
cc_tktype_t ttype;
while (1) {
flush_peek_tok(tokbuf);
ttype = peek_tok_type(tokbuf);
if (ttype == TOKEN_R_PAREN) {
break;
}
ast_node_t* param = NEXT(PREC_EXPRESSION);
vector_push(node->call.params->params.params, param);
flush_peek_tok(tokbuf);
ttype = peek_tok_type(tokbuf);
if (ttype == TOKEN_COMMA) pop_tok(tokbuf);
}
pop_tok(tokbuf); // 跳过 ')'
ast_node_t* sym = symtab_get(symtab, &ident->syms.key);
// TODO check func is match
if (sym == NULL || sym->type != NT_DECL_FUNC) {
LOG_FATAL("function not decl %s", ident->syms.key.strp_name);
}
node->call.name = ident;
node->call.func_decl = sym;
return node;
}
static ast_node_t* parse_paren(tok_stream_t* tokbuf, symtab_t *symtab, ast_node_t* left) {
flush_peek_tok(tokbuf);
expect_pop_tok(tokbuf, TOKEN_L_PAREN);
left = NEXT(PREC_EXPRESSION);
flush_peek_tok(tokbuf);
expect_pop_tok(tokbuf, TOKEN_R_PAREN);
return left;
}
typedef ast_node_t* (*parse_expr_fun_t)(tok_stream_t*, symtab_t* , ast_node_t*);
static struct expr_prec_table_t {
parse_expr_fun_t parser;
enum Precedence prec;
enum ParseType ptype;
} expr_table [256] = {
[TOKEN_COMMA] = {parse_comma, PREC_EXPRESSION, INFIX_PARSER},
[TOKEN_ASSIGN] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
[TOKEN_ASSIGN_ADD] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
[TOKEN_ASSIGN_SUB] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
[TOKEN_ASSIGN_MUL] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
[TOKEN_ASSIGN_DIV] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
[TOKEN_ASSIGN_MOD] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
[TOKEN_ASSIGN_L_SH] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
[TOKEN_ASSIGN_R_SH] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
[TOKEN_ASSIGN_AND] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
[TOKEN_ASSIGN_OR] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
[TOKEN_ASSIGN_XOR] = {parse_assign, PREC_ASSIGNMENT, INFIX_PARSER},
[TOKEN_OR_OR] = {parse_cal, PREC_LOGICAL_OR , INFIX_PARSER},
[TOKEN_AND_AND] = {parse_cal, PREC_LOGICAL_AND, INFIX_PARSER},
[TOKEN_OR] = {parse_cal, PREC_OR , INFIX_PARSER},
[TOKEN_XOR] = {parse_cal, PREC_XOR , INFIX_PARSER},
[TOKEN_AND] = {parse_cal, PREC_AND , INFIX_PARSER},
[TOKEN_EQ] = {parse_cmp, PREC_EQUALITY, INFIX_PARSER},
[TOKEN_NEQ] = {parse_cmp, PREC_EQUALITY, INFIX_PARSER},
[TOKEN_LT] = {parse_cmp, PREC_RELATIONAL, INFIX_PARSER},
[TOKEN_LE] = {parse_cmp, PREC_RELATIONAL, INFIX_PARSER},
[TOKEN_GT] = {parse_cmp, PREC_RELATIONAL, INFIX_PARSER},
[TOKEN_GE] = {parse_cmp, PREC_RELATIONAL, INFIX_PARSER},
[TOKEN_L_SH] = {parse_cal, PREC_SHIFT , INFIX_PARSER},
[TOKEN_R_SH] = {parse_cal, PREC_SHIFT , INFIX_PARSER},
[TOKEN_ADD] = {parse_cal, PREC_ADDITIVE , INFIX_PARSER},
[TOKEN_SUB] = {parse_cal, PREC_ADDITIVE , INFIX_PARSER},
[TOKEN_MUL] = {parse_cal, PREC_MULTIPLICATIVE , INFIX_PARSER},
[TOKEN_DIV] = {parse_cal, PREC_MULTIPLICATIVE , INFIX_PARSER},
[TOKEN_MOD] = {parse_cal, PREC_MULTIPLICATIVE , INFIX_PARSER},
[TOKEN_NOT] = {NULL, PREC_UNARY, PREFIX_PARSER},
[TOKEN_BIT_NOT] = {NULL, PREC_UNARY, PREFIX_PARSER},
[TOKEN_ADD_ADD] = {NULL, PREC_UNARY, PREFIX_PARSER},
[TOKEN_SUB_SUB] = {NULL, PREC_UNARY, PREFIX_PARSER},
// + - * & sizeof
[TOKEN_L_PAREN] = {parse_paren, PREC_POSTFIX, INFIX_PARSER},
};
static ast_node_t *parse_primary_expression(tok_stream_t* tokbuf, symtab_t *symtab) {
flush_peek_tok(tokbuf);
tok_t* tok = peek_tok(tokbuf);
ast_node_t *node = new_ast_node();
node->type = NT_TERM_VAL;
node->syms.tok = *tok;
switch (tok->sub_type) {
case TOKEN_INT_LITERAL:
// node->data.data_type = TYPE_INT;
break;
case TOKEN_FLOAT_LITERAL:
LOG_WARN("float not supported");
break;
case TOKEN_CHAR_LITERAL:
// node->data.data_type = TYPE_CHAR;
break;
case TOKEN_STRING_LITERAL:
// node->data.data_type = TYPE_POINTER;
case TOKEN_IDENT:
node = expect_pop_ident(tokbuf);
cc_tktype_t ttype = peek_tok_type(tokbuf);
node->syms.key.uid = 0;
node->syms.key.strp_name = tok->val.str;
if (ttype == TOKEN_L_PAREN) {
node = parse_call(tokbuf, symtab, node);
} else {
void *sym = symtab_get(symtab, &node->syms.key);
if (sym == NULL) {
LOG_ERROR("undefined symbol but use %s", tok->val.str);
}
node->type = NT_TERM_IDENT;
node->syms.decl_node = sym;
}
goto END;
default:
return NULL;
}
pop_tok(tokbuf);
END:
return node;
}
static ast_node_t *parse_subexpression(tok_stream_t* tokbuf, symtab_t *symtab, enum Precedence prec) {
cc_tktype_t ttype;
struct expr_prec_table_t* work;
ast_node_t* left;
while (1) {
flush_peek_tok(tokbuf);
ttype = peek_tok_type(tokbuf);
work = &expr_table[ttype];
// FIXME
if (ttype == TOKEN_SEMICOLON || ttype == TOKEN_R_PAREN) {
break;
}
if (work == NULL || work->parser == NULL || work->ptype == PREFIX_PARSER) {
if (work->parser != NULL) {
left = work->parser(tokbuf, symtab, NULL);
} else {
left = parse_primary_expression(tokbuf, symtab);
}
} else if (work->ptype == INFIX_PARSER) {
if (work->parser == NULL)
break;
if (work->prec <= prec)
break;
left = work->parser(tokbuf, symtab, left);
}
// assert(left != NULL);
}
return left;
}
ast_node_t* parse_expr(parser_t* parser) {
tok_stream_t* tokbuf = &(parser->tokbuf);
symtab_t *symtab = parser->symtab;
flush_peek_tok(tokbuf);
cc_tktype_t ttype = peek_tok_type(tokbuf);
switch (ttype) {
case TOKEN_NOT:
case TOKEN_AND:
case TOKEN_L_PAREN:
case TOKEN_MUL:
case TOKEN_ADD:
case TOKEN_SUB:
case TOKEN_BIT_NOT:
case TOKEN_AND_AND:
case TOKEN_CHAR_LITERAL:
case TOKEN_INT_LITERAL:
case TOKEN_STRING_LITERAL:
case TOKEN_ADD_ADD:
case TOKEN_SUB_SUB:
case TOKEN_SIZEOF:
case TOKEN_IDENT:
return NEXT(PREC_EXPRESSION);
default:
LOG_ERROR("Want expr but not got %s", get_tok_name(ttype));
break;
}
}

View File

@@ -1,176 +0,0 @@
#include "../ast.h"
#include "../parser.h"
#ifndef FUNC_PARAM_CACHE_SIZE
#define FUNC_PARAM_CACHE_SIZE 32 // 合理初始值可覆盖99%常见情况
#endif
// TODO 语义分析压入符号表
static void parse_params(parser_t* parser, tok_stream_t* cache, ast_node_t* node) {
flush_peek_tok(cache);
cc_tktype_t ttype;
ast_node_t *params = new_ast_node();
node->decl_func.params = params;
vector_init(params->params.params);
int depth = 1;
while (depth) {
ttype = peek_tok_type(cache);
switch (ttype) {
case TOKEN_COMMA:
break;
case TOKEN_ELLIPSIS:
ttype = peek_tok_type(cache);
if (ttype != TOKEN_R_PAREN) {
LOG_ERROR("... must be a last parameter list (expect ')')");
}
// TODO
LOG_ERROR("not implement");
break;
case TOKEN_IDENT:
// TODO 静态数组
flush_peek_tok(cache);
ast_node_t* id_node = new_ast_ident_node(peek_tok(cache));
ast_node_t* node = new_ast_node();
node->type = NT_DECL_VAR;
node->decl_val.name = id_node;
// TODO typing sys
node->decl_val.type = NULL;
node->decl_val.expr_stmt = NULL;
node->decl_val.data = NULL;
vector_push(params->params.params, node);
id_node->syms.key.uid = parser->symtab->cur_scope->uid;
id_node->syms.key.strp_name = id_node->syms.tok.val.str;
symtab_add(parser->symtab, &id_node->syms.key, node);
break;
case TOKEN_L_PAREN: {
depth++;
break;
}
case TOKEN_R_PAREN: {
depth--;
break;
}
default:
break;
// TODO 使用cache的类型解析
// parse_type(parser);
// TODO type parse
// ttype = peekcachetype(cache);
// ttype = peekcachetype(cache);
// if (ttype != TOKEN_IDENT) {
// node->node_type = NT_DECL_FUNC;
// flush_peek_tok(tokbuf);
// continue;
// }
// LOG_ERROR("function expected ')' or ','\n");
}
pop_tok(cache);
}
}
ast_type_t check_is_func_decl(tok_stream_t* tokbuf, tok_stream_t* cache) {
expect_pop_tok(tokbuf, TOKEN_L_PAREN);
int depth = 1;
while (depth) {
tok_t* tok = peek_tok(tokbuf);
pop_tok(tokbuf);
if (cache->size >= cache->cap - 1) {
LOG_ERROR("function parameter list too long");
}
cache->buf[cache->size++] = *tok;
switch (tok->sub_type) {
case TOKEN_L_PAREN:
depth++;
break;
case TOKEN_R_PAREN:
depth--;
break;
default:
break;
}
}
cache->end = cache->size;
switch (peek_tok_type(tokbuf)) {
case TOKEN_SEMICOLON:
pop_tok(tokbuf);
return NT_DECL_FUNC;
case TOKEN_L_BRACE:
return NT_FUNC;
break;
default:
LOG_ERROR("function define or decl need '{' or ';' but you don't got");
}
}
static ast_node_t* new_ast_node_funcdecl(ast_node_t* ret, ast_node_t* name) {
ast_node_t* node = new_ast_node();
node->type = NT_DECL_FUNC;
node->decl_func.ret = ret;
node->decl_func.name = name;
node->decl_func.def = NULL;
return node;
}
void parse_func(parser_t* parser) {
tok_stream_t* tokbuf = &(parser->tokbuf);
flush_peek_tok(tokbuf);
ast_node_t* ret_node = parse_type(parser);
ast_node_t* name_node = expect_pop_ident(tokbuf);
const char* func_name = name_node->syms.tok.val.str;
ast_node_t* decl = new_ast_node_funcdecl(ret_node, name_node);
tok_stream_t cache;
init_tokbuf(&cache, NULL, NULL);
cache.cap = FUNC_PARAM_CACHE_SIZE;
tok_t buf[FUNC_PARAM_CACHE_SIZE];
cache.buf = buf;
ast_type_t type = check_is_func_decl(&(parser->tokbuf), &cache);
name_node->syms.key.uid = parser->symtab->cur_scope->uid;
name_node->syms.key.strp_name = func_name;
ast_node_t* prev = symtab_get(parser->symtab, &name_node->syms.key);
// TODO Change something
if (prev != NULL) {
if (prev->type != NT_DECL_FUNC) {
LOG_ERROR("the symbol duplicate old is %d, new is func", prev->type);
}
// TODO check redeclare func is match
if (type == NT_FUNC) {
// TODO Free decl;
rt._free(decl);
decl = prev;
goto FUNC;
}
return;
}
symtab_add(parser->symtab, &name_node->syms.key, decl);
vector_push(parser->root->root.children, decl);
if (type == NT_DECL_FUNC) {
return;
}
FUNC:
// 该data临时用于判断是否重复定义
if (decl->decl_func.def != NULL) {
LOG_ERROR("redefinition of function %s", func_name);
}
ast_node_t* node = new_ast_node();
node->type = NT_FUNC;
node->func.decl = decl;
node->func.data = NULL;
decl->decl_func.def = node;
symtab_enter_scope(parser->symtab);
parse_params(parser, &cache, decl);
node->func.body = parse_block(parser);
symtab_leave_scope(parser->symtab);
vector_push(parser->root->root.children, node);
}

View File

@@ -1,34 +0,0 @@
#include "../ast.h"
#include "../parser.h"
#ifndef PROG_MAX_NODE_SIZE
#define PROG_MAX_NODE_SIZE (1024 * 4)
#endif
void parse_func(parser_t* parser);
void parse_prog(parser_t* parser) {
/**
* Program := (Declaration | Definition)*
* same as
* Program := Declaration* Definition*
*/
tok_stream_t *tokbuf = &(parser->tokbuf);
parser->root = new_ast_node();
ast_node_t* node;
parser->root->type = NT_ROOT;
vector_init(parser->root->root.children);
while (1) {
flush_peek_tok(tokbuf);
if (peek_tok_type(tokbuf) == TOKEN_EOF) {
break;
}
node = parse_decl(parser);
if (node == NULL) {
parse_func(parser);
} else {
vector_push(parser->root->root.children, node);
}
}
return;
}

View File

@@ -1,246 +0,0 @@
#include "../ast.h"
#include "../parser.h"
ast_node_t* parse_stmt(parser_t* parser) {
tok_stream_t* tokbuf = &parser->tokbuf;
flush_peek_tok(tokbuf);
cc_tktype_t ttype = peek_tok_type(tokbuf);
ast_node_t* node = new_ast_node();
switch (ttype) {
case TOKEN_IF: {
/**
* if (exp) stmt
* if (exp) stmt else stmt
*/
pop_tok(tokbuf);
expect_pop_tok(tokbuf, TOKEN_L_PAREN);
node->if_stmt.cond = parse_expr(parser);
flush_peek_tok(tokbuf);
expect_pop_tok(tokbuf, TOKEN_R_PAREN);
node->if_stmt.if_stmt = parse_stmt(parser);
ttype = peek_tok_type(tokbuf);
if (ttype == TOKEN_ELSE) {
pop_tok(tokbuf);
node->if_stmt.else_stmt = parse_stmt(parser);
} else {
node->if_stmt.else_stmt = NULL;
}
node->type = NT_STMT_IF;
break;
}
case TOKEN_SWITCH: {
/**
* switch (exp) stmt
*/
pop_tok(tokbuf);
expect_pop_tok(tokbuf, TOKEN_L_PAREN);
node->switch_stmt.cond = parse_expr(parser);
expect_pop_tok(tokbuf, TOKEN_R_PAREN);
node->switch_stmt.body = parse_stmt(parser);
node->type = NT_STMT_SWITCH;
break;
}
case TOKEN_WHILE: {
/**
* while (exp) stmt
*/
pop_tok(tokbuf);
expect_pop_tok(tokbuf, TOKEN_L_PAREN);
node->while_stmt.cond = parse_expr(parser);
expect_pop_tok(tokbuf, TOKEN_R_PAREN);
node->while_stmt.body = parse_stmt(parser);
node->type = NT_STMT_WHILE;
break;
}
case TOKEN_DO: {
/**
* do stmt while (exp)
*/
pop_tok(tokbuf);
node->do_while_stmt.body = parse_stmt(parser);
ttype = peek_tok_type(tokbuf);
if (ttype != TOKEN_WHILE) {
LOG_ERROR("expected while after do");
}
pop_tok(tokbuf);
expect_pop_tok(tokbuf, TOKEN_L_PAREN);
node->do_while_stmt.cond = parse_expr(parser);
expect_pop_tok(tokbuf, TOKEN_R_PAREN);
node->type = NT_STMT_DOWHILE;
break;
}
case TOKEN_FOR: {
/**
* for (init; [cond]; [iter]) stmt
*/
// node->children.stmt.for_stmt.init
pop_tok(tokbuf);
ttype = peek_tok_type(tokbuf);
if (ttype != TOKEN_L_PAREN) {
LOG_ERROR("expected ( after for");
}
pop_tok(tokbuf);
// init expr or init decl_var
// TODO need add this feature
if (peek_decl(tokbuf)) {
node->for_stmt.init = parse_decl_val(parser);
} else {
node->for_stmt.init = parse_expr(parser);
expect_pop_tok(tokbuf, TOKEN_SEMICOLON);
}
// cond expr or null
ttype = peek_tok_type(tokbuf);
if (ttype != TOKEN_SEMICOLON) {
node->for_stmt.cond = parse_expr(parser);
expect_pop_tok(tokbuf, TOKEN_SEMICOLON);
} else {
node->for_stmt.cond = NULL;
pop_tok(tokbuf);
}
// iter expr or null
ttype = peek_tok_type(tokbuf);
if (ttype != TOKEN_R_PAREN) {
node->for_stmt.iter = parse_expr(parser);
expect_pop_tok(tokbuf, TOKEN_R_PAREN);
} else {
node->for_stmt.iter = NULL;
pop_tok(tokbuf);
}
node->for_stmt.body = parse_stmt(parser);
node->type = NT_STMT_FOR;
break;
}
case TOKEN_BREAK: {
/**
* break ;
*/
// TODO check 导致外围 for、while 或 do-while 循环或 switch 语句终止。
pop_tok(tokbuf);
expect_pop_tok(tokbuf, TOKEN_SEMICOLON);
node->type = NT_STMT_BREAK;
break;
}
case TOKEN_CONTINUE: {
/**
* continue ;
*/
// TODO check 导致跳过整个 for、 while 或 do-while 循环体的剩余部分。
pop_tok(tokbuf);
expect_pop_tok(tokbuf, TOKEN_SEMICOLON);
node->type = NT_STMT_CONTINUE;
break;
}
case TOKEN_RETURN: {
/**
* return [exp] ;
*/
// TODO 终止当前函数并返回指定值给调用方函数。
pop_tok(tokbuf);
ttype = peek_tok_type(tokbuf);
if (ttype != TOKEN_SEMICOLON) {
node->return_stmt.expr_stmt = parse_expr(parser);
flush_peek_tok(tokbuf);
expect_pop_tok(tokbuf, TOKEN_SEMICOLON);
} else {
node->return_stmt.expr_stmt = NULL;
pop_tok(tokbuf);
}
node->type = NT_STMT_RETURN;
break;
}
case TOKEN_GOTO: {
/**
* goto label ;
*/
// TODO check label 将控制无条件转移到所欲位置。
//在无法用约定的构造将控制转移到所欲位置时使用。
pop_tok(tokbuf);
// find symbol table
ttype = peek_tok_type(tokbuf);
if (ttype != TOKEN_IDENT) {
LOG_ERROR("expect identifier after goto");
}
expect_pop_tok(tokbuf, TOKEN_SEMICOLON);
// TODO filling label
node->goto_stmt.label = expect_pop_ident(tokbuf);
node->type = NT_STMT_GOTO;
break;
}
case TOKEN_SEMICOLON: {
/**
* ;
* empty stmt using by :
* while () ;
* if () ;
* for () ;
*/
pop_tok(tokbuf);
node->type = NT_STMT_EMPTY;
break;
}
case TOKEN_L_BRACE: {
/**
* stmt_block like: { (decl_var | stmt) ... }
*/
node->block_stmt.block = parse_block(parser);
node->type = NT_STMT_BLOCK;
break;
}
case TOKEN_IDENT: {
// TODO label goto
if (peek_tok_type(tokbuf) != TOKEN_COLON) {
goto EXP;
}
node->label_stmt.label = expect_pop_ident(tokbuf);
expect_pop_tok(tokbuf, TOKEN_COLON);
node->type = NT_STMT_LABEL;
break;
}
case TOKEN_CASE: {
// TODO label switch
pop_tok(tokbuf);
LOG_ERROR("unimplemented switch label");
node->label_stmt.label = parse_expr(parser);
// TODO 该表达式为const int
expect_pop_tok(tokbuf, TOKEN_COLON);
node->type = NT_STMT_CASE;
break;
}
case TOKEN_DEFAULT: {
// TODO label switch default
pop_tok(tokbuf);
expect_pop_tok(tokbuf, TOKEN_COLON);
node->type = NT_STMT_DEFAULT;
break;
}
default: {
/**
* exp ;
*/
EXP:
node->expr_stmt.expr_stmt = parse_expr(parser);
flush_peek_tok(tokbuf);
ttype = peek_tok_type(tokbuf);
if (ttype != TOKEN_SEMICOLON) {
LOG_ERROR("exp must end with \";\"");
}
pop_tok(tokbuf);
node->type = NT_STMT_EXPR;
break;
}
}
return node;
}

View File

@@ -1,51 +0,0 @@
#include "../ast.h"
#include "../parser.h"
#include "../type.h"
ast_node_t* new_ast_ident_node(tok_t* tok) {
if (tok->sub_type != TOKEN_IDENT) {
LOG_ERROR("syntax error: want identifier but got %d", tok->sub_type);
}
ast_node_t* node = new_ast_node();
node->type = NT_TERM_IDENT;
node->syms.tok = *tok;
node->syms.decl_node = NULL;
return node;
}
ast_node_t* expect_pop_ident(tok_stream_t* tokbuf) {
flush_peek_tok(tokbuf);
tok_t* tok = peek_tok(tokbuf);
ast_node_t* node = new_ast_ident_node(tok);
pop_tok(tokbuf);
return node;
}
ast_node_t* parse_type(parser_t* parser) {
tok_stream_t* tokbuf = &parser->tokbuf;
flush_peek_tok(tokbuf);
cc_tktype_t ttype = peek_tok_type(tokbuf);
data_type_t dtype;
switch(ttype) {
case TOKEN_VOID: dtype = TYPE_VOID; break;
case TOKEN_CHAR: dtype = TYPE_CHAR; break;
case TOKEN_SHORT: dtype = TYPE_SHORT; break;
case TOKEN_INT: dtype = TYPE_INT; break;
case TOKEN_LONG: dtype = TYPE_LONG; break;
case TOKEN_FLOAT: dtype = TYPE_FLOAT; break;
case TOKEN_DOUBLE: dtype = TYPE_DOUBLE; break;
default:
LOG_ERROR("无效的类型说明符");
}
ast_node_t* node = new_ast_node();
node->type = NT_TERM_TYPE;
// TODO added by disable warning, will add typing system
dtype += 1;
pop_tok(tokbuf);
if (peek_tok_type(tokbuf) == TOKEN_MUL) {
pop_tok(tokbuf);
}
return node;
}

View File

@@ -1,136 +0,0 @@
// #include "../parser.h"
// #include "../type.h"
// enum TypeParseState {
// TPS_BASE_TYPE, // 解析基础类型 (int/char等)
// TPS_QUALIFIER, // 解析限定符 (const/volatile)
// TPS_POINTER, // 解析指针 (*)
// TPS_ARRAY, // 解析数组维度 ([n])
// TPS_FUNC_PARAMS // 解析函数参数列表
// };
// ast_node_t* parse_type(parser_t* p) {
// ast_node_t* type_root = new_ast_node();
// ast_node_t* current = type_root;
// current->type = NT_TYPE_BASE;
// enum TypeParseState state = TPS_QUALIFIER;
// int pointer_level = 0;
// while (1) {
// tok_type_t t = peektoktype(p);
// switch (state) {
// // 基础类型解析 (int, char等)
// case TPS_BASE_TYPE:
// if (is_base_type(t)) {
// // current->data.data_type = token_to_datatype(t);
// pop_tok(p);
// state = TPS_POINTER;
// } else {
// error("Expected type specifier");
// }
// break;
// // 类型限定符 (const/volatile)
// case TPS_QUALIFIER:
// if (t == TOKEN_CONST || t == TOKEN_VOLATILE) {
// ast_node_t* qual_node = new_ast_node();
// qual_node->type = NT_TYPE_QUAL;
// qual_node->data.data_type = t; // 复用data_type字段存储限定符
// current->child.decl.type = qual_node;
// current = qual_node;
// pop_tok(p);
// } else {
// state = TPS_BASE_TYPE;
// }
// break;
// // 指针解析 (*)
// case TPS_POINTER:
// if (t == TOKEN_MUL) {
// ast_node_t* ptr_node = new_ast_node();
// ptr_node->type = NT_TYPE_PTR;
// current->child.decl.type = ptr_node;
// current = ptr_node;
// pointer_level++;
// pop_tok(p);
// } else {
// state = TPS_ARRAY;
// }
// break;
// // 数组维度 ([n])
// case TPS_ARRAY:
// if (t == TOKEN_L_BRACKET) {
// pop_tok(p); // 吃掉[
// ast_node_t* arr_node = new_ast_node();
// arr_node->type = NT_TYPE_ARRAY;
// // 解析数组大小(仅语法检查)
// if (peektoktype(p) != TOKEN_R_BRACKET) {
// parse_expr(p); // 不计算实际值
// }
// expecttok(p, TOKEN_R_BRACKET);
// current->child.decl.type = arr_node;
// current = arr_node;
// } else {
// state = TPS_FUNC_PARAMS;
// }
// break;
// // 函数参数列表
// case TPS_FUNC_PARAMS:
// if (t == TOKEN_L_PAREN) {
// ast_node_t* func_node = new_ast_node();
// func_node->type = NT_TYPE_FUNC;
// current->child.decl.type = func_node;
// // 解析参数列表(仅结构,不验证类型)
// parse_param_list(p, func_node);
// current = func_node;
// } else {
// return type_root; // 类型解析结束
// }
// break;
// }
// }
// }
// // 判断是否是基础类型
// static int is_base_type(tok_type_t t) {
// return t >= TOKEN_VOID && t <= TOKEN_DOUBLE;
// }
// // // 转换token到数据类型简化版
// // static enum DataType token_to_datatype(tok_type_t t) {
// // static enum DataType map[] = {
// // [TOKEN_VOID] = DT_VOID,
// // [TOKEN_CHAR] = DT_CHAR,
// // [TOKEN_INT] = DT_INT,
// // // ...其他类型映射
// // };
// // return map[t];
// // }
// // 解析参数列表(轻量级)
// static void parse_param_list(parser_t* p, ast_node_t* func) {
// expecttok(p, TOKEN_L_PAREN);
// while (peektoktype(p) != TOKEN_R_PAREN) {
// ast_node_t* param = parse_type(p); // 递归解析类型
// // 允许可选参数名(仅语法检查)
// if (peektoktype(p) == TOKEN_IDENT) {
// pop_tok(p); // 吃掉参数名
// }
// if (peektoktype(p) == TOKEN_COMMA) {
// pop_tok(p);
// }
// }
// expecttok(p, TOKEN_R_PAREN);
// }

View File

@@ -1,20 +0,0 @@
#include <lib/core.h>
#include "parser.h"
#include "type.h"
void init_parser(parser_t* parser, cc_lexer_t* lexer, symtab_t* symtab) {
init_lib_core();
parser->cur_node = NULL;
parser->root = NULL;
parser->lexer = lexer;
parser->symtab = symtab;
init_tokbuf(&parser->tokbuf, lexer, (tok_stream_get_func)get_valid_token);
parser->tokbuf.cap = sizeof(parser->TokenBuffer) / sizeof(parser->TokenBuffer[0]);
parser->tokbuf.buf = parser->TokenBuffer;
}
void run_parser(parser_t* parser) {
parse_prog(parser);
}

View File

@@ -1,23 +0,0 @@
#ifndef __PARSER_H__
#define __PARSER_H__
#include "../lexer/lexer.h"
#include "symtab/symtab.h"
#include "ast.h"
#define PARSER_MAX_TOKEN_QUEUE 16
typedef struct parser {
ast_node_t* root;
ast_node_t* cur_node;
cc_lexer_t* lexer;
symtab_t* symtab;
tok_stream_t tokbuf;
tok_t TokenBuffer[PARSER_MAX_TOKEN_QUEUE];
int err_level;
} parser_t;
void init_parser(parser_t* parser, cc_lexer_t* lexer, symtab_t* symtab);
void run_parser(parser_t* parser);
#endif

View File

@@ -1,62 +0,0 @@
#include "symtab.h"
static u32_t hash_func(const void* _key) {
const symtab_key_t* key = (symtab_key_t*)_key;
return rt_strhash(key->strp_name);
}
static int key_cmp(const void* _key1, const void* _key2) {
const symtab_key_t* key1 = (symtab_key_t*)_key1;
const symtab_key_t* key2 = (symtab_key_t*)_key2;
if (rt_strcmp(key1->strp_name, key2->strp_name) == 0) {
return 0;
}
return 1;
}
void init_symtab(symtab_t* symtab) {
symtab->cur_scope = NULL;
symtab->gid = 1;
init_hashtable(&symtab->global_table);
symtab->global_table.hash_func = hash_func;
symtab->global_table.key_cmp = key_cmp;
init_hashtable(&symtab->local_table);
symtab->local_table.hash_func = hash_func;
symtab->local_table.key_cmp = key_cmp;
}
void symtab_destroy(symtab_t* symtab) {
// TODO
}
void symtab_enter_scope(symtab_t* symtab) {
scope_t *scope = (scope_t*)salloc_alloc(sizeof(scope_t));
scope->parent = symtab->cur_scope;
scope->uid = symtab->gid++;
init_hashtable(&scope->table);
scope->table.hash_func = hash_func;
scope->table.key_cmp = key_cmp;
symtab->cur_scope = scope;
}
void symtab_leave_scope(symtab_t* symtab) {
Assert(symtab->cur_scope != NULL);
scope_t *parent = symtab->cur_scope->parent;
hashtable_destory(&symtab->cur_scope->table);
salloc_free(symtab->cur_scope);
symtab->cur_scope = parent;
}
void* symtab_get(symtab_t* symtab, symtab_key_t* key) {
for (scope_t* scope = symtab->cur_scope; scope != NULL; scope = scope->parent) {
void* val = hashtable_get(&scope->table, key);
if (val != NULL) {
return val;
}
}
return NULL;
}
void* symtab_add(symtab_t* symtab, symtab_key_t* key, void* val) {
return hashtable_set(&symtab->cur_scope->table, key, val);
}

View File

@@ -1,39 +0,0 @@
#ifndef __SMCC_SYMTABL_H__
#define __SMCC_SYMTABL_H__
#include <lib/core.h>
#include <lib/utils/ds/hashtable.h>
#include <lib/utils/strpool/strpool.h>
// FIXME 架构上可能有更好的方式解决
typedef struct symtab_key {
const char* strp_name;
int uid;
} symtab_key_t;
typedef struct scope {
int uid;
struct scope* parent;
hash_table_t table;
} scope_t;
typedef struct symtab {
hash_table_t global_table;
hash_table_t local_table;
scope_t* cur_scope;
int gid; // global id for generating unique scope id
} symtab_t;
void init_symtab(symtab_t* symtab);
void symtab_destroy(symtab_t* symtab);
void symtab_enter_scope(symtab_t* symtab);
void symtab_leave_scope(symtab_t* symtab);
void* symtab_get(symtab_t* symtab, symtab_key_t* key);
// WARNING key and val need you save, especially val
void* symtab_add(symtab_t* symtab, symtab_key_t* key, void* val);
#endif

View File

@@ -1,35 +0,0 @@
#ifndef __TYPE_H__
#define __TYPE_H__
#include "../lexer/token.h"
typedef enum {
TYPE_VOID,
TYPE_CHAR,
TYPE_SHORT,
TYPE_INT,
TYPE_LONG,
TYPE_LONG_LONG,
TYPE_FLOAT,
TYPE_DOUBLE,
TYPE_LONG_DOUBLE,
// prefix
TYPE_SIGNED,
TYPE_UNSIGNED,
// TYPE_BOOL,
// TYPE_COMPLEX,
// TYPE_IMAGINARY,
TYPE_ENUM,
TYPE_ARRAY,
TYPE_STRUCT,
TYPE_UNION,
TYPE_FUNCTION,
TYPE_POINTER,
TYPE_ATOMIC,
TYPE_TYPEDEF,
} data_type_t;
#endif

View File

@@ -1,33 +0,0 @@
# 编译器设置
CC = gcc
AR = ar
CFLAGS = -g -Wall -I../..
IR_DIR = ./ir
# 源文件列表
SRCS = \
middleend.c \
$(IR_DIR)/ir.c \
$(IR_DIR)/ir_ast.c \
$(IR_DIR)/ir_lib.c \
$(IR_DIR)/ir_type.c
# 生成目标文件列表
OBJS = $(SRCS:.c=.o)
# 最终目标
TARGET = libmiddleend.a
all: $(TARGET)
$(TARGET): $(OBJS)
$(AR) rcs $@ $^
%.o: %.c
$(CC) $(CFLAGS) -c -o $@ $<
clean:
rm -f $(OBJS) $(TARGET)
.PHONY: all clean

View File

@@ -1,152 +0,0 @@
// ir_core.h
#ifndef IR_CORE_H
#define IR_CORE_H
#include <lib/utils/ds/vector.h>
// 错误码定义
typedef enum {
IR_EC_SUCCESS = 0, // 成功
IR_EC_MEMORY_ERROR, // 内存分配失败
IR_EC_TYPE_MISMATCH, // 类型不匹配
IR_EC_INVALID_OPERAND, // 无效操作数
IR_EC_DUPLICATE_SYMBOL, // 符号重定义
} ir_ecode_t;
typedef struct {
enum {
IR_TYPE_INT32,
IR_TYPE_PTR,
IR_TYPE_ARRAY,
IR_TYPE_FUNC,
IR_TYPE_VOID,
} tag;
union {
struct {
struct ir_type *base;
rt_size_t len;
} arr;
struct {
struct ir_type *ret;
struct ir_type **params;
rt_size_t param_cnt;
} func;
};
} ir_type_t;
typedef struct ir_node ir_node_t;
typedef struct ir_bblock {
const char *label;
VECTOR_HEADER(instrs, ir_node_t*);
// ir_arr_t used_by;
} ir_bblock_t; // basic block
typedef struct {
const char *name;
ir_type_t *type;
VECTOR_HEADER(params, ir_node_t*);
VECTOR_HEADER(bblocks, ir_bblock_t*);
} ir_func_t;
typedef struct {
VECTOR_HEADER(global, ir_node_t*);
VECTOR_HEADER(funcs, ir_func_t*);
VECTOR_HEADER(extern_funcs, ir_func_t*);
} ir_prog_t;
typedef enum ir_node_tag {
IR_NODE_NULL,
IR_NODE_CONST_INT,
IR_NODE_ALLOC,
IR_NODE_LOAD,
IR_NODE_STORE,
IR_NODE_GET_PTR,
IR_NODE_OP,
IR_NODE_BRANCH,
IR_NODE_JUMP,
IR_NODE_CALL,
IR_NODE_RET,
} ir_node_tag_t;
struct ir_node {
const ir_type_t* type;
const char* name;
VECTOR_HEADER(used_by, ir_node_t*);
ir_node_tag_t tag;
union {
struct {
int32_t val;
} const_int;
struct {
ir_node_t* target;
} load;
struct {
ir_node_t* target;
ir_node_t* value;
} store;
struct {
ir_node_t* src_addr;
ir_node_t* offset;
} get_ptr;
struct {
enum {
/// Not equal to.
IR_OP_NEQ,
/// Equal to.
IR_OP_EQ,
/// Greater than.
IR_OP_GT,
/// Less than.
IR_OP_LT,
/// Greater than or equal to.
IR_OP_GE,
/// Less than or equal to.
IR_OP_LE,
/// Addition.
IR_OP_ADD,
/// Subtraction.
IR_OP_SUB,
/// Multiplication.
IR_OP_MUL,
/// Division.
IR_OP_DIV,
/// Modulo.
IR_OP_MOD,
/// Bitwise AND.
IR_OP_AND,
/// Bitwise OR.
IR_OP_OR,
/// Bitwise XOR.
IR_OP_XOR,
/// Bitwise NOT.
IR_OP_NOT,
/// Shift left logical.
IR_OP_SHL,
/// Shift right logical.
IR_OP_SHR,
/// Shift right arithmetic.
IR_OP_SAR,
} op;
ir_node_t* lhs;
ir_node_t* rhs;
} op;
struct {
ir_node_t* cond;
ir_bblock_t* true_bblock;
ir_bblock_t* false_bblock;
} branch;
struct {
ir_bblock_t* target_bblock;
} jump;
struct {
ir_func_t* callee;
VECTOR_HEADER(args, ir_node_t*);
} call;
struct {
ir_node_t* ret_val;
} ret;
} data;
};
#endif // IR_CORE_H

View File

@@ -1,446 +0,0 @@
#include "ir.h"
#include "ir_lib.h"
#include "ir_type.h"
#include "../../frontend/frontend.h"
#include "../../frontend/parser/ast.h"
// 上下文结构,记录生成过程中的状态
typedef struct {
ir_func_t* cur_func; // 当前处理的函数
ir_bblock_t* cur_block; // 当前基本块
} IRGenContext;
IRGenContext ctx;
ir_prog_t prog;
void _gen_ir_from_ast(ast_node_t* node);
static void emit_instr(ir_bblock_t* block, ir_node_t* node) {
if (block == NULL) block = ctx.cur_block;
vector_push(block->instrs, node);
// return &(vector_at(block->instrs, block->instrs.size - 1));
}
static ir_node_t* emit_br(ir_node_t* cond, ir_bblock_t* trueb, ir_bblock_t* falseb) {
ir_node_t* br = new_ir_node(NULL, IR_NODE_BRANCH);
emit_instr(NULL, br);
br->data.branch.cond = cond;
br->data.branch.true_bblock = trueb;
br->data.branch.false_bblock = falseb;
return br;
}
static ir_node_t* gen_ir_expr(ast_node_t* node);
static ir_node_t* gen_ir_term(ast_node_t* node) {
switch (node->type) {
case NT_TERM_VAL: {
ir_node_t* ir = new_ir_node(NULL, IR_NODE_CONST_INT);
ir->data.const_int.val = node->syms.tok.val.i;
return ir;
}
case NT_TERM_IDENT: {
ir_node_t* decl = node->syms.decl_node->decl_val.data;
return decl;
}
case NT_TERM_CALL: {
ir_node_t* call = new_ir_node(NULL, IR_NODE_CALL);
call->data.call.callee = node->call.func_decl->decl_func.def->func.data;
for (int i = 0; i < node->call.params->params.params.size; i++) {
ast_node_t* param = vector_at(node->call.params->params.params, i);
ir_node_t *tmp = gen_ir_expr(param);
vector_push(call->data.call.args, tmp);
}
emit_instr(NULL, call);
return call;
}
default: {
Panic("gen_ir_expr: unknown node type");
}
}
TODO();
return NULL;
}
static ir_node_t* gen_ir_expr(ast_node_t* node) {
// term node
switch (node->type) {
case NT_TERM_VAL:
case NT_TERM_IDENT:
case NT_TERM_CALL:
return gen_ir_term(node);
default:
break;
}
ir_node_t* lhs = gen_ir_expr(node->expr.left);
ir_node_t* rhs = node->expr.right ? gen_ir_expr(node->expr.right) : NULL;
if (node->type == NT_COMMA) {
return rhs;
}
ir_node_t* instr = NULL;
vector_push(lhs->used_by, instr);
if (rhs) { vector_push(rhs->used_by, instr); }
ir_node_t* ret;
#define BINOP(operand) do { \
instr = new_ir_node(NULL, IR_NODE_OP); \
instr->data.op.op = operand; \
instr->data.op.lhs = lhs; \
instr->data.op.rhs = rhs; \
ret = instr; \
} while (0)
switch (node->type) {
case NT_ADD: {
// (expr) + (expr)
BINOP(IR_OP_ADD); break;
}
case NT_SUB: {
// (expr) - (expr)
BINOP(IR_OP_SUB); break;
}
case NT_MUL: {
// (expr) * (expr)
BINOP(IR_OP_MUL); break;
}
case NT_DIV: {
// (expr) / (expr)
BINOP(IR_OP_DIV); break;
}
case NT_MOD: {
// (expr) % (expr)
BINOP(IR_OP_MOD); break;
}
case NT_AND: {
// (expr) & (expr)
BINOP(IR_OP_AND); break;
}
case NT_OR: {
// (expr) | (expr)
BINOP(IR_OP_OR); break;
}
case NT_XOR: {
// (expr) ^ (expr)
BINOP(IR_OP_XOR); break;
}
case NT_BIT_NOT: {
// ~ (expr)
// TODO
// BINOP(IR_OP_NOT);
break;
}
case NT_L_SH: {
// (expr) << (expr)
BINOP(IR_OP_SHL);
break;
}
case NT_R_SH: {
// (expr) >> (expr)
BINOP(IR_OP_SHR); // Shift right logical.
// TODO
// BINOP(IR_OP_SAR); // Shift right arithmetic.
break;
}
case NT_EQ: {
// (expr) == (expr)
BINOP(IR_OP_EQ); break;
}
case NT_NEQ: {
// (expr) != (expr)
BINOP(IR_OP_NEQ); break;
}
case NT_LE: {
// (expr) <= (expr)
BINOP(IR_OP_LE); break;
}
case NT_GE: {
// (expr) >= (expr)
BINOP(IR_OP_GE); break;
}
case NT_LT: {
// (expr) < (expr)
BINOP(IR_OP_LT); break;
}
case NT_GT: {
// (expr) > (expr)
BINOP(IR_OP_GE); break;
}
case NT_AND_AND:// (expr) && (expr)
LOG_ERROR("unimpliment");
break;
case NT_OR_OR:// (expr) || (expr)
LOG_ERROR("unimpliment");
break;
case NT_NOT: {
// ! (expr)
instr = new_ir_node(NULL, IR_NODE_OP);
instr->data.op.op = IR_OP_EQ,
instr->data.op.lhs = &node_zero,
instr->data.op.rhs = lhs,
ret = instr;
break;
}
case NT_ASSIGN: {
// (expr) = (expr)
instr = new_ir_node(NULL, IR_NODE_STORE);
instr->data.store.target = lhs;
instr->data.store.value = rhs;
ret = rhs;
break;
}
// case NT_COND: // (expr) ? (expr) : (expr)
default: {
// TODO self error msg
LOG_ERROR("Unsupported IR generation for AST node type %d", node->type);
break;
}
}
emit_instr(NULL, instr);
return ret;
}
static void gen_ir_func(ast_node_t* node, ir_func_t* func) {
Assert(node->type == NT_FUNC);
ir_bblock_t *entry = new_ir_bblock("entry");
vector_push(func->bblocks, entry);
vector_push(prog.funcs, func);
IRGenContext prev_ctx = ctx;
ctx.cur_func = func;
ctx.cur_block = entry;
ast_node_t* params = node->func.decl->decl_func.params;
for (int i = 0; i < params->params.params.size; i ++) {
ast_node_t* param = params->params.params.data[i];
ir_node_t* decl = new_ir_node(param->decl_val.name->syms.tok.val.str, IR_NODE_ALLOC);
emit_instr(entry, decl);
vector_push(func->params, decl);
// TODO Typing system
decl->type = &type_i32;
param->decl_val.data = decl;
}
_gen_ir_from_ast(node->func.body);
ctx = prev_ctx;
}
void gen_ir_jmp(ast_node_t* node) {
ir_bblock_t *bblocks[3];
for (int i = 0; i < sizeof(bblocks)/sizeof(bblocks[0]); i++) {
bblocks[i] = new_ir_bblock(NULL);
vector_push(ctx.cur_func->bblocks, bblocks[i]);
}
#define NEW_IR_JMP(name, block) do { \
name = new_ir_node(NULL, IR_NODE_JUMP); \
name->data.jump.target_bblock = block; \
} while (0)
switch (node->type) {
case NT_STMT_IF: {
ir_bblock_t* trueb = bblocks[0];
trueb->label = "if_true";
ir_bblock_t* falseb = bblocks[1];
falseb->label = "if_false";
ir_bblock_t* endb = bblocks[2];
endb->label = "if_end";
ir_node_t* jmp;
// cond
ir_node_t *cond = gen_ir_expr(node->if_stmt.cond);
emit_br(cond, trueb, falseb);
// true block
ctx.cur_block = trueb;
_gen_ir_from_ast(node->if_stmt.if_stmt);
// else block
if (node->if_stmt.else_stmt != NULL) {
ctx.cur_block = falseb;
_gen_ir_from_ast(node->if_stmt.else_stmt);
ir_node_t* jmp;
ctx.cur_block = endb;
NEW_IR_JMP(jmp, ctx.cur_block);
emit_instr(falseb, jmp);
} else {
ctx.cur_block = falseb;
}
NEW_IR_JMP(jmp, ctx.cur_block);
emit_instr(trueb, jmp);
break;
}
case NT_STMT_WHILE: {
ir_bblock_t* entryb = bblocks[0];
ir_bblock_t* bodyb = bblocks[1];
ir_bblock_t* endb = bblocks[2];
ir_node_t* entry;
NEW_IR_JMP(entry, entryb);
emit_instr(NULL, entry);
// Entry:
ctx.cur_block = entryb;
ir_node_t *cond = gen_ir_expr(node->while_stmt.cond);
emit_br(cond, bodyb, endb);
// Body:
ir_node_t* jmp;
ctx.cur_block = bodyb;
_gen_ir_from_ast(node->while_stmt.body);
NEW_IR_JMP(jmp, entryb);
emit_instr(NULL, jmp);
// End:
ctx.cur_block = endb;
break;
}
case NT_STMT_DOWHILE: {
ir_bblock_t* entryb = bblocks[0];
ir_bblock_t* bodyb = bblocks[1];
ir_bblock_t* endb = bblocks[2];
ir_node_t* entry;
NEW_IR_JMP(entry, bodyb);
emit_instr(NULL, entry);
// Body:
ctx.cur_block = bodyb;
_gen_ir_from_ast(node->do_while_stmt.body);
ir_node_t* jmp;
NEW_IR_JMP(jmp, entryb);
emit_instr(NULL, jmp);
// Entry:
ctx.cur_block = entryb;
ir_node_t *cond = gen_ir_expr(node->do_while_stmt.cond);
emit_br(cond, bodyb, endb);
// End:
ctx.cur_block = endb;
break;
}
case NT_STMT_FOR: {
ir_bblock_t* entryb = bblocks[0];
ir_bblock_t* bodyb = bblocks[1];
ir_bblock_t* endb = bblocks[2];
if (node->for_stmt.init) {
_gen_ir_from_ast(node->for_stmt.init);
}
ir_node_t* entry;
NEW_IR_JMP(entry, entryb);
emit_instr(NULL, entry);
// Entry:
ctx.cur_block = entryb;
if (node->for_stmt.cond) {
ir_node_t *cond = gen_ir_expr(node->for_stmt.cond);
emit_br(cond, bodyb, endb);
} else {
ir_node_t* jmp;
NEW_IR_JMP(jmp, bodyb);
}
// Body:
ctx.cur_block = bodyb;
_gen_ir_from_ast(node->for_stmt.body);
if (node->for_stmt.iter) {
gen_ir_expr(node->for_stmt.iter);
}
ir_node_t* jmp;
NEW_IR_JMP(jmp, entryb);
emit_instr(NULL, jmp);
// End:
ctx.cur_block = endb;
break;
}
default:
LOG_ERROR("ir jmp can't hit here");
}
}
ir_prog_t* gen_ir_from_ast(ast_node_t* root) {
Assert(root->type == NT_ROOT);
for (int i = 0; i < root->root.children.size; i ++) {
_gen_ir_from_ast(root->root.children.data[i]);
}
// _gen_ir_from_ast(root);
return &prog;
}
void _gen_ir_from_ast(ast_node_t* node) {
switch (node->type) {
case NT_DECL_FUNC: {
ir_func_t* func = new_ir_func(node->decl_func.name->syms.tok.val.str, &type_i32);
if (node->decl_func.def == NULL) {
ast_node_t* def = new_ast_node();
def->func.body = NULL;
def->func.decl = node;
node->decl_func.def = def;
vector_push(prog.extern_funcs, func);
}
node->decl_func.def->func.data = func;
break;
}
case NT_FUNC: {
gen_ir_func(node, node->func.data);
break;
}
case NT_STMT_RETURN: {
ir_node_t* ret = NULL;
if (node->return_stmt.expr_stmt != NULL) {
ret = gen_ir_expr(node->return_stmt.expr_stmt);
}
ir_node_t* ir = new_ir_node(NULL, IR_NODE_RET);
ir->data.ret.ret_val = ret;
emit_instr(NULL, ir);
ir_bblock_t* block = new_ir_bblock(NULL);
ctx.cur_block = block;
vector_push(ctx.cur_func->bblocks, block);
break;
}
case NT_STMT_BLOCK: {
_gen_ir_from_ast(node->block_stmt.block);
break;
}
case NT_BLOCK: {
for (int i = 0; i < node->block.children.size; i ++) {
_gen_ir_from_ast(node->block.children.data[i]);
}
break;
}
case NT_STMT_IF:
case NT_STMT_WHILE:
case NT_STMT_DOWHILE:
case NT_STMT_FOR:
gen_ir_jmp(node);
break;
case NT_DECL_VAR: {
ir_node_t* ir = new_ir_node(node->decl_val.name->syms.tok.val.str, IR_NODE_ALLOC);
emit_instr(NULL, ir);
// TODO Typing system
ir->type = &type_i32;
node->decl_val.data = ir;
if (node->decl_val.expr_stmt != NULL) {
_gen_ir_from_ast(node->decl_val.expr_stmt);
}
break;
}
case NT_STMT_EXPR: {
gen_ir_expr(node->expr_stmt.expr_stmt);
break;
}
case NT_STMT_EMPTY: {
break;
}
default:
// TODO: 错误处理
LOG_ERROR("unknown node type");
break;
}
}

View File

@@ -1,8 +0,0 @@
#ifndef __IR_AST_H__
#define __IR_AST_H__
#include "ir.h"
typedef struct ast_node ast_node_t;
ir_prog_t* gen_ir_from_ast(ast_node_t* node);
#endif //

View File

@@ -1,76 +0,0 @@
#include "ir.h"
#include "ir_lib.h"
#include "ir_type.h"
#include <stdio.h>
#include <assert.h>
typedef struct ir_dump {
FILE* fp;
} ir_dump_t;
void dump_ir_node(ir_node_t* node, ir_dump_t* dump) {
fprintf(dump->fp, "%%%p", node);
switch (node->tag) {
case IR_NODE_ALLOC: {
node->type = NULL;
// fprintf(dump->fp, "%p\n", );
break;
}
case IR_NODE_BRANCH: {
node->data.branch.cond = NULL;
node->data.branch.true_bblock = NULL;
node->data.branch.false_bblock = NULL;
break;
}
case IR_NODE_CALL: {
vector_init(node->data.call.args);
node->data.call.callee = NULL;
break;
}
case IR_NODE_CONST_INT: {
node->data.const_int.val = 0;
break;
}
case IR_NODE_JUMP: {
node->data.jump.target_bblock = NULL;
break;
}
case IR_NODE_LOAD: {
node->data.load.target = NULL;
break;
}
case IR_NODE_STORE: {
node->data.store.target = NULL;
node->data.store.value = NULL;
break;
}
case IR_NODE_OP: {
node->data.op.op = 0;
node->data.op.lhs = NULL;
node->data.op.rhs = NULL;
break;
}
case IR_NODE_RET: {
node->data.ret.ret_val = NULL;
break;
}
case IR_NODE_GET_PTR: {
}
default: {
assert(0);
}
}
}
void dump_ir_bblock(ir_bblock_t* block) {
}
void dump_ir_func(ir_func_t* func) {
}
void dump_ir_prog(ir_prog_t* prog) {
}

View File

@@ -1,118 +0,0 @@
#include "ir.h"
// FIXME using stdlib.h
#include <stdlib.h>
static int total_alloc = 0;
typedef union ir_alloc_item {
ir_node_t node;
ir_bblock_t bblock;
ir_func_t func;
ir_prog_t prog;
} ir_alloc_item_t;
ir_alloc_item_t* alloc_item() {
return malloc(sizeof(ir_alloc_item_t));
}
void free_item(ir_alloc_item_t* item) {
return free(item);
}
ir_node_t* new_ir_node(const char* name, ir_node_tag_t tag) {
ir_node_t* node = (ir_node_t*)alloc_item();
node->name = name;
node->type = NULL;
node->tag = tag;
switch (tag) {
case IR_NODE_ALLOC: {
node->type = NULL;
break;
}
case IR_NODE_BRANCH: {
node->data.branch.cond = NULL;
node->data.branch.true_bblock = NULL;
node->data.branch.false_bblock = NULL;
break;
}
case IR_NODE_CALL: {
vector_init(node->data.call.args);
node->data.call.callee = NULL;
break;
}
case IR_NODE_CONST_INT: {
node->data.const_int.val = 0;
break;
}
case IR_NODE_JUMP: {
node->data.jump.target_bblock = NULL;
break;
}
case IR_NODE_LOAD: {
node->data.load.target = NULL;
break;
}
case IR_NODE_STORE: {
node->data.store.target = NULL;
node->data.store.value = NULL;
break;
}
case IR_NODE_OP: {
node->data.op.op = 0;
node->data.op.lhs = NULL;
node->data.op.rhs = NULL;
break;
}
case IR_NODE_RET: {
node->data.ret.ret_val = NULL;
break;
}
case IR_NODE_GET_PTR: {
}
default: {
exit(0);
}
}
vector_init(node->used_by);
return node;
}
void free_irnode() {
}
ir_bblock_t* new_ir_bblock(const char* name) {
ir_bblock_t* block = (ir_bblock_t*)alloc_item();
block->label = name;
vector_init(block->instrs);
return block;
}
void free_irbblock() {
}
ir_func_t* new_ir_func(const char* name, ir_type_t* type) {
ir_func_t* func = (ir_func_t*)alloc_item();
func->name = name;
func->type = type;
vector_init(func->params);
vector_init(func->bblocks);
return func;
}
void free_irfunc() {
}
ir_prog_t* new_ir_prog() {
ir_prog_t* prog = (ir_prog_t*)alloc_item();
vector_init(prog->global);
vector_init(prog->funcs);
vector_init(prog->extern_funcs);
return prog;
}
void free_irprog() {
}

Some files were not shown because too many files have changed in this diff Show More