feat(lex_parser): 提取字符判断函数并增强解析器断言

将 `is_next_line` 内联函数重命名为 `lex_parse_is_endline` 并新增 `lex_parse_is_whitespace` 函数，统一用于词法解析中的字符分类。同时加强多个解析函数的输入参数断言，提升代码健壮性。此外，修正了 `lex_parse_skip_whitespace` 中的逻辑错误，并优化部分注释和控制流结构。 feat(pprocessor): 初始化预处理器模块并添加基础功能实现新增预处理器模块 `pprocessor`，包括宏定义、条件编译状态管理以及基本的指令解析框架。实现了标识符解析、空白跳过、关键字查找等功能，并初步支持 `#define` 指令的对象类宏替换。该提交还引入了一组测试用例，覆盖多种宏展开场景及边界情况，确保预处理器的核心行为符合预期。
2025-11-24 22:44:08 +08:00
parent 871d031ceb
commit e6a76e7a86
58 changed files with 1429 additions and 9 deletions
--- a/libs/lex_parser/include/lex_parser.h
+++ b/libs/lex_parser/include/lex_parser.h
@@ -3,6 +3,14 @@

 #include <libcore.h>

+static inline cbool lex_parse_is_endline(int ch) {
+    return ch == '\n' || ch == '\r';
+}
+
+static inline cbool lex_parse_is_whitespace(int ch) {
+    return ch == ' ' || ch == '\t';
+}
+
 int lex_parse_char(core_stream_t *input, core_pos_t *pos);
 cbool lex_parse_string(core_stream_t *input, core_pos_t *pos,
                       cstring_t *output);
--- a/libs/lex_parser/src/lex_parser.c
+++ b/libs/lex_parser/src/lex_parser.c
@@ -1,8 +1,7 @@
 #include <lex_parser.h>

-static inline cbool is_next_line(int ch) { return ch == '\n' || ch == '\r'; }
-
 void lex_parse_skip_endline(core_stream_t *input, core_pos_t *pos) {
+    Assert(input != null && pos != null);
    core_stream_reset_char(input);
    int ch = core_stream_peek_char(input);
    if (ch == '\r') {
@@ -60,7 +59,7 @@ static inline int got_simple_escape(int ch) {

 void lex_parse_skip_line(core_stream_t *input, core_pos_t *pos) {
    core_stream_t *stream = input;
-    Assert(stream != null);
+    Assert(stream != null && pos != null);
    core_stream_reset_char(stream);
    while (1) {
        int ch = core_stream_peek_char(stream);
@@ -70,7 +69,7 @@ void lex_parse_skip_line(core_stream_t *input, core_pos_t *pos) {
        }

        // TODO endline
-        if (is_next_line(ch)) {
+        if (lex_parse_is_endline(ch)) {
            lex_parse_skip_endline(stream, pos);
            return;
        } else {
@@ -82,6 +81,7 @@ void lex_parse_skip_line(core_stream_t *input, core_pos_t *pos) {

 void lex_parse_skip_block_comment(core_stream_t *input, core_pos_t *pos) {
    core_stream_t *stream = input;
+    Assert(stream != null && pos != null);
    int ch;
    core_stream_reset_char(stream);
    ch = core_stream_next_char(stream);
@@ -102,7 +102,7 @@ void lex_parse_skip_block_comment(core_stream_t *input, core_pos_t *pos) {
            return;
        }

-        if (is_next_line(ch)) {
+        if (lex_parse_is_endline(ch)) {
            lex_parse_skip_endline(stream, pos);
            continue;
        }
@@ -121,15 +121,16 @@ void lex_parse_skip_block_comment(core_stream_t *input, core_pos_t *pos) {

 void lex_parse_skip_whitespace(core_stream_t *input, core_pos_t *pos) {
    core_stream_t *stream = input;
-    Assert(stream != null);
+    Assert(stream != null && pos != null);
    core_stream_reset_char(stream);
    while (1) {
-        int ch = core_stream_next_char(stream);
+        int ch = core_stream_peek_char(stream);

-        if (ch == core_stream_eof) {
+        if (!lex_parse_is_whitespace(ch)) {
            return;
        }

+        core_stream_next_char(stream);
        core_pos_next(pos);
    }
 }
@@ -188,6 +189,7 @@ static inline cbool _lex_parse_uint(core_stream_t *input, core_pos_t *pos,
 */
 int lex_parse_char(core_stream_t *input, core_pos_t *pos) {
    core_stream_t *stream = input;
+    Assert(stream != null && pos != null);
    core_stream_reset_char(stream);
    int ch = core_stream_peek_char(stream);
    int ret = core_stream_eof;
@@ -258,6 +260,7 @@ ERR:
 cbool lex_parse_string(core_stream_t *input, core_pos_t *pos,
                       cstring_t *output) {
    core_stream_t *stream = input;
+    Assert(stream != null && pos != null && output != null);
    core_stream_reset_char(stream);
    int ch = core_stream_peek_char(stream);

@@ -279,7 +282,7 @@ cbool lex_parse_string(core_stream_t *input, core_pos_t *pos,
        if (ch == core_stream_eof) {
            LOG_ERROR("Unexpected EOF at string literal");
            goto ERR;
-        } else if (is_next_line(ch)) {
+        } else if (lex_parse_is_endline(ch)) {
            LOG_ERROR("Unexpected newline at string literal");
            goto ERR;
        } else if (ch == '\\') {
@@ -322,6 +325,7 @@ ERR:
 */
 cbool lex_parse_number(core_stream_t *input, core_pos_t *pos, usize *output) {
    core_stream_t *stream = input;
+    Assert(stream != null && pos != null && output != null);
    core_stream_reset_char(stream);
    int ch = core_stream_peek_char(stream);
    int base = 0;
@@ -380,6 +384,7 @@ ERR:
 */
 cbool lex_parse_identifier(core_stream_t *input, core_pos_t *pos,
                           cstring_t *output) {
+    Assert(input != null && pos != null && output != null);
    Assert(cstring_is_empty(output));
    core_stream_t *stream = input;
    core_stream_reset_char(stream);
--- a/libs/pprocessor/README.md
+++ b/libs/pprocessor/README.md
@@ -0,0 +1,4 @@
+
+tests/pp 复制了 TinyCC 的 tests/pp 测试
+
+详情见 [README](tests/pp/README)
--- a/libs/pprocessor/cbuild.toml
+++ b/libs/pprocessor/cbuild.toml
@@ -0,0 +1,8 @@
+[package]
+name = "smcc_pprocesser"
+
+dependencies = [
+    { name = "libcore", path = "../../runtime/libcore" },
+    { name = "libutils", path = "../../runtime/libutils" },
+    { name = "smcc_lex_parser", path = "../lex_parser" },
+]
--- a/libs/pprocessor/include/pp_token.h
+++ b/libs/pprocessor/include/pp_token.h
@@ -0,0 +1,30 @@
+#ifndef __SMCC_PP_TOKEN_H__
+#define __SMCC_PP_TOKEN_H__
+
+/* clang-format off */
+/// https://cppreference.cn/w/c/preprocessor
+#define PP_INST_TOKEN \
+    X(define    , PP_STD, PP_TOK_DEFINE    ) \
+    X(undef     , PP_STD, PP_TOK_UNDEF     ) \
+    X(include   , PP_STD, PP_TOK_INCLUDE   ) \
+    X(if        , PP_STD, PP_TOK_IF        ) \
+    X(ifdef     , PP_STD, PP_TOK_IFDEF     ) \
+    X(ifndef    , PP_STD, PP_TOK_IFNDEF    ) \
+    X(else      , PP_STD, PP_TOK_ELSE      ) \
+    X(elif      , PP_STD, PP_TOK_ELIF      ) \
+    X(elifdef   , PP_STD, PP_TOK_ELIFDEF   ) \
+    X(elifndef  , PP_C23, PP_TOK_ELIFNDEF  ) \
+    X(endif     , PP_STD, PP_TOK_ENDIF     ) \
+    X(line      , PP_STD, PP_TOK_LINE      ) \
+    X(embed     , PP_C23, PP_TOK_EMBED     ) \
+    X(error     , PP_STD, PP_TOK_ERROR     ) \
+    X(warning   , PP_C23, PP_TOK_WARNING   ) \
+    X(pragma    , PP_STD, PP_TOK_PRAMA     ) \
+    // END
+/* clang-format on */
+
+#define X(name, type, tok) tok,
+typedef enum pp_token { PP_INST_TOKEN } pp_token_t;
+#undef X
+
+#endif /* __SMCC_PP_TOKEN_H__ */
--- a/libs/pprocessor/include/pprocessor.h
+++ b/libs/pprocessor/include/pprocessor.h
@@ -0,0 +1,72 @@
+// pprocessor.h - 更新后的头文件
+/**
+ * @file pprocessor.h
+ * @brief C语言预处理器核心数据结构与接口
+ */
+
+#ifndef __SMCC_PP_H__
+#define __SMCC_PP_H__
+
+#include <libcore.h>
+#include <libutils.h>
+
+// 宏定义类型
+typedef enum {
+    MACRO_OBJECT,   // 对象宏
+    MACRO_FUNCTION, // 函数宏
+} macro_type_t;
+
+typedef VEC(cstring_t) macro_list_t;
+
+// 宏定义结构
+typedef struct smcc_macro {
+    cstring_t name;        // 宏名称
+    macro_type_t type;     // 宏类型
+    macro_list_t replaces; // 替换列表
+    macro_list_t params;   // 参数列表（仅函数宏）
+} smcc_macro_t;
+
+// 条件编译状态
+typedef enum {
+    IFState_NONE,  // 不在条件编译中
+    IFState_TRUE,  // 条件为真
+    IFState_FALSE, // 条件为假
+    IFState_ELSE   // 已经执行过else分支
+} if_state_t;
+
+// 条件编译栈项
+typedef struct if_stack_item {
+    if_state_t state;
+    int skip; // 是否跳过当前段
+} if_stack_item_t;
+
+// 预处理器状态结构
+typedef struct smcc_preprocessor {
+    core_stream_t *stream;         // 输出流
+    strpool_t strpool;             // 字符串池
+    hashmap_t macros;              // 宏定义表
+    VEC(if_stack_item_t) if_stack; // 条件编译栈
+} smcc_pp_t;
+
+/**
+ * @brief 初始化预处理器
+ * @param[out] pp 要初始化的预处理器实例
+ * @param[in] input 输入流对象指针
+ * @return output 输出流对象指针
+ */
+core_stream_t *pp_init(smcc_pp_t *pp, core_stream_t *input);
+
+/**
+ * @brief 执行预处理
+ * @param[in] pp 预处理器实例
+ * @return 处理结果
+ */
+int pp_process(smcc_pp_t *pp);
+
+/**
+ * @brief 销毁预处理器
+ * @param[in] pp 预处理器实例
+ */
+void pp_drop(smcc_pp_t *pp);
+
+#endif /* __SMCC_PP_H__ */
--- a/libs/pprocessor/src/pprocessor.c
+++ b/libs/pprocessor/src/pprocessor.c
@@ -0,0 +1,427 @@
+/**
+ * @file pprocessor.c
+ * @brief C语言预处理器实现
+ */
+
+#include <lex_parser.h>
+#include <pp_token.h>
+#include <pprocessor.h>
+#define PPROCESSER_BUFFER_SIZE (1024)
+
+static u32 hash_func(cstring_t *string) {
+    return smcc_strhash32(cstring_as_cstr(string));
+}
+
+static int hash_cmp(const cstring_t *str1, const cstring_t *str2) {
+    if (str1->size != str2->size) {
+        return str1->size - str2->size;
+    }
+
+    return smcc_strcmp(cstring_as_cstr(str1), cstring_as_cstr(str2));
+}
+
+// 添加宏定义
+static void add_macro(smcc_pp_t *pp, const cstring_t *name,
+                      const macro_list_t *replaces, const macro_list_t *params,
+                      macro_type_t type) {
+    smcc_macro_t *macro = smcc_malloc(sizeof(smcc_macro_t));
+    macro->name = *name;
+    macro->type = type;
+
+    if (replaces) {
+        macro->replaces = *replaces;
+    } else {
+        vec_init(macro->replaces);
+    }
+
+    if (params) {
+        macro->params = *params;
+    } else {
+        vec_init(macro->params);
+    }
+
+    hashmap_set(&pp->macros, &macro->name, macro);
+}
+
+// 查找宏定义
+static smcc_macro_t *find_macro(smcc_pp_t *pp, cstring_t *name) {
+    return hashmap_get(&pp->macros, name);
+}
+
+// 条件编译处理框架
+static void handle_if(smcc_pp_t *pp, const char *condition) {
+    if_stack_item_t item;
+    int cond_value;
+    // cond_value = evaluate_condition(pp, condition);
+
+    item.state = cond_value ? IFState_TRUE : IFState_FALSE;
+    item.skip = !cond_value;
+    vec_push(pp->if_stack, item);
+}
+
+static void handle_else(smcc_pp_t *pp) {
+    if (pp->if_stack.size == 0) {
+        // 错误：没有匹配的#if
+        return;
+    }
+
+    if_stack_item_t *top = &vec_at(pp->if_stack, pp->if_stack.size - 1);
+    if (top->state == IFState_ELSE) {
+        // 错误：#else重复出现
+        return;
+    }
+
+    top->skip = !top->skip;
+    top->state = IFState_ELSE;
+}
+
+static void handle_include(smcc_pp_t *pp, const char *filename,
+                           int system_header) {
+    // 查找文件路径逻辑
+    // 创建新的输入流
+    // 递归处理包含文件
+}
+
+// 解析标识符
+static cstring_t parse_identifier(core_stream_t *stream) {
+    cstring_t identifier = cstring_new();
+    core_stream_reset_char(stream);
+    int ch = core_stream_peek_char(stream);
+
+    // 标识符以字母或下划线开头
+    if (!((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '_')) {
+        LOG_WARN("Invalid identifier");
+        return identifier;
+    }
+    do {
+        cstring_push(&identifier, (char)ch);
+        core_stream_next_char(stream); // 消费字符
+        ch = core_stream_peek_char(stream);
+    } while ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') ||
+             (ch >= '0' && ch <= '9') || ch == '_');
+
+    return identifier;
+}
+
+// 跳过空白字符 ' ' and '\t'
+static void skip_whitespace(core_stream_t *stream) {
+    int ch;
+    core_stream_reset_char(stream);
+    while ((ch = core_stream_peek_char(stream)) != core_stream_eof) {
+        if (ch == ' ' || ch == '\t') {
+            core_stream_next_char(stream);
+        } else {
+            break;
+        }
+    }
+}
+
+#define X(name, type, tok) SMCC_STR(name),
+static const char *token_strings[] = {PP_INST_TOKEN};
+#undef X
+
+static const struct {
+    const char *name;
+    pp_token_t tok;
+} keywords[] = {
+#define X(name, type, tok) {#name, tok},
+    PP_INST_TOKEN
+#undef X
+};
+
+// by using binary search to find the keyword
+static inline int keyword_cmp(const char *name, int len) {
+    int low = 0;
+    int high = sizeof(keywords) / sizeof(keywords[0]) - 1;
+    while (low <= high) {
+        int mid = (low + high) / 2;
+        const char *key = keywords[mid].name;
+        int cmp = 0;
+
+        // 自定义字符串比较逻辑
+        for (int i = 0; i < len; i++) {
+            if (name[i] != key[i]) {
+                cmp = (unsigned char)name[i] - (unsigned char)key[i];
+                break;
+            }
+            if (name[i] == '\0')
+                break; // 遇到终止符提前结束
+        }
+
+        if (cmp == 0) {
+            // 完全匹配检查（长度相同）
+            if (key[len] == '\0')
+                return mid;
+            cmp = -1; // 当前关键词比输入长
+        }
+
+        if (cmp < 0) {
+            high = mid - 1;
+        } else {
+            low = mid + 1;
+        }
+    }
+    return -1; // Not a keyword.
+}
+
+typedef struct pp_stream {
+    core_stream_t stream;
+    core_stream_t *input;
+    smcc_pp_t *self;
+
+    usize size;
+    usize pos;
+    char buffer[PPROCESSER_BUFFER_SIZE];
+} pp_stream_t;
+
+static cbool parse_list(pp_stream_t *_stream, macro_list_t *list,
+                        cbool is_param) {
+    Assert(_stream != null);
+    core_stream_t *stream = _stream->input;
+    Assert(stream != null);
+    core_stream_reset_char(stream);
+
+    vec_init(*list);
+    int ch;
+    cstring_t str = cstring_new();
+    core_pos_t pos;
+
+    while ((ch = core_stream_peek_char(stream)) != core_stream_eof) {
+        if (is_param) {
+            // ( 参数 ) ( 参数, ... ) ( ... )
+            if (lex_parse_is_whitespace(ch)) {
+                // TODO #define ( A A , B ) need ERROR
+                lex_parse_skip_whitespace(stream, &pos);
+                core_stream_reset_char(stream);
+            } else if (ch == ',') {
+                vec_push(*list, str);
+                str = cstring_new();
+                core_stream_next_char(stream);
+                continue;
+            } else if (ch == ')') {
+                break;
+            } else if (ch == core_stream_eof || lex_parse_is_endline(ch)) {
+                LOG_ERROR("Invalid parameter list");
+                return false;
+            }
+        } else {
+            // 替换列表
+            if (lex_parse_is_whitespace(ch)) {
+                lex_parse_skip_whitespace(stream, &pos);
+                vec_push(*list, str);
+                str = cstring_new();
+                core_stream_reset_char(stream);
+                continue;
+            } else if (lex_parse_is_endline(ch)) {
+                break;
+            }
+        }
+        core_stream_next_char(stream);
+        cstring_push(&str, (char)ch);
+    }
+    vec_push(*list, str);
+    str = cstring_new();
+    return true;
+}
+
+// 解析预处理指令
+static void parse_directive(pp_stream_t *_stream) {
+    Assert(_stream != null);
+    core_stream_t *stream = _stream->input;
+    Assert(stream != null);
+
+    int ch;
+    core_pos_t pos;
+    core_stream_reset_char(stream);
+    // 跳过 '#' 和后续空白
+    if (core_stream_peek_char(stream) != '#') {
+        LOG_WARN("Invalid directive");
+        return;
+    }
+    core_stream_next_char(stream);
+
+    // TODO 允许空指令（# 后跟换行符），且无任何效果。
+    skip_whitespace(stream);
+    // 解析指令名称
+    cstring_t directive = parse_identifier(stream);
+    if (cstring_is_empty(&directive)) {
+        LOG_ERROR("expected indentifier");
+        goto ERR;
+    }
+    skip_whitespace(stream);
+    core_stream_reset_char(stream);
+
+    pp_token_t token =
+        keyword_cmp(cstring_as_cstr(&directive), cstring_len(&directive));
+    switch (token) {
+    case PP_TOK_DEFINE: {
+        cstring_t name = parse_identifier(stream);
+        if (cstring_is_empty(&name)) {
+            LOG_ERROR("expected indentifier");
+            goto ERR;
+        }
+        skip_whitespace(stream);
+        core_stream_reset_char(stream);
+
+        int ch = core_stream_peek_char(stream);
+        if (ch == '(') {
+            macro_list_t params;
+            parse_list(_stream, &params, true);
+            ch = core_stream_next_char(stream);
+            if (ch != ')') {
+            }
+            goto ERR;
+        }
+        macro_list_t replacement;
+        parse_list(_stream, &replacement, false);
+        add_macro(_stream->self, &name, &replacement, NULL, MACRO_OBJECT);
+        break;
+    }
+    case PP_TOK_UNDEF:
+    case PP_TOK_INCLUDE:
+    case PP_TOK_IF:
+    case PP_TOK_IFDEF:
+    case PP_TOK_IFNDEF:
+    case PP_TOK_ELSE:
+    case PP_TOK_ELIF:
+    case PP_TOK_ELIFDEF:
+    case PP_TOK_ELIFNDEF:
+    case PP_TOK_ENDIF:
+    case PP_TOK_LINE:
+    case PP_TOK_EMBED:
+    case PP_TOK_ERROR:
+    case PP_TOK_WARNING:
+    case PP_TOK_PRAMA:
+        TODO();
+        break;
+    default:
+        LOG_WARN("Unknown preprocessor directive: %s",
+                 cstring_as_cstr(&directive));
+    }
+
+    // TODO: win \r\n linux \n mac \r => all need transport to \n
+    core_stream_reset_char(stream);
+    lex_parse_skip_line(stream, &pos);
+
+    cstring_free(&directive);
+    return;
+ERR:
+    // TODO skip line
+    LOG_FATAL("Unhandled preprocessor directive");
+}
+
+static inline void stream_push_string(pp_stream_t *stream, cstring_t *str) {
+    stream->size += cstring_len(str);
+    Assert(stream->size <= PPROCESSER_BUFFER_SIZE);
+    smcc_memcpy(stream->buffer, cstring_as_cstr(str), stream->size);
+}
+
+static inline void stream_push_char(pp_stream_t *stream, int ch) {
+    stream->buffer[stream->size++] = ch;
+    Assert(stream->size <= PPROCESSER_BUFFER_SIZE);
+}
+
+static int next_char(core_stream_t *_stream) {
+    pp_stream_t *stream = (pp_stream_t *)_stream;
+    Assert(stream != null);
+READ_BUF:
+    if (stream->size != 0) {
+        if (stream->pos < stream->size) {
+            return stream->buffer[stream->pos++];
+        } else {
+            stream->size = 0;
+            stream->pos = 0;
+        }
+    }
+RETRY:
+    core_stream_reset_char(stream->input);
+    int ch = core_stream_peek_char(stream->input);
+    if (ch == '#') {
+        parse_directive(stream);
+        goto RETRY;
+    } else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') ||
+               ch == '_') {
+        cstring_t identifier = parse_identifier(stream->input);
+        smcc_macro_t *macro = find_macro(stream->self, &identifier);
+        if (macro == null) {
+            stream_push_string(stream, &identifier);
+            cstring_free(&identifier);
+            goto READ_BUF;
+        } else {
+            cstring_free(&identifier);
+        }
+        if (macro->type == MACRO_OBJECT) {
+            for (usize i = 0; i < macro->replaces.size; ++i) {
+                stream_push_string(stream, &vec_at(macro->replaces, i));
+                // usize never using `-`
+                if (i + 1 < macro->replaces.size)
+                    stream_push_char(stream, ' ');
+            }
+            goto READ_BUF;
+        } else if (macro->type == MACRO_FUNCTION) {
+            TODO();
+        }
+        UNREACHABLE();
+    }
+    return core_stream_next_char(stream->input);
+}
+
+static core_stream_t *pp_stream_init(smcc_pp_t *self, core_stream_t *input) {
+    pp_stream_t *stream = smcc_malloc(sizeof(pp_stream_t));
+    if (stream == null) {
+        LOG_FATAL("Failed to allocate memory for output stream");
+    }
+    if (stream == null || self == null) {
+        return null;
+    }
+    stream->self = self;
+    stream->input = input;
+    stream->size = 0;
+    stream->pos = 0;
+
+    stream->stream.name = cstring_from_cstr("pipe_stream");
+    stream->stream.free_stream = null;
+    stream->stream.next_char = next_char;
+    stream->stream.peek_char = null;
+    stream->stream.reset_char = null;
+    stream->stream.read_buf = null;
+    return (core_stream_t *)stream;
+}
+
+core_stream_t *pp_init(smcc_pp_t *pp, core_stream_t *input) {
+    if (pp == null || input == null) {
+        return null;
+    }
+    core_mem_stream_t *stream = smcc_malloc(sizeof(core_mem_stream_t));
+
+    if (stream == null) {
+        LOG_FATAL("Failed to allocate memory for output stream");
+    }
+    pp->stream = pp_stream_init(pp, input);
+    Assert(pp->stream != null);
+
+    hashmap_init(&pp->macros);
+    pp->macros.hash_func = (u32 (*)(const void *))hash_func;
+    pp->macros.key_cmp = (int (*)(const void *, const void *))hash_cmp;
+    return pp->stream;
+}
+
+// 销毁预处理器
+void pp_drop(smcc_pp_t *pp) {
+    if (pp == NULL)
+        return;
+
+    // 清理所有宏定义
+    // 注意：需要实现 hashmap 的迭代和清理函数
+    hashmap_drop(&pp->macros);
+
+    // 清理字符串池
+    // strpool_destroy(&pp->strpool);
+
+    // 清理条件编译栈
+    // 需要释放栈中每个元素的资源（如果有的话）
+    // vec_free(pp->if_stack);
+
+    // 清理文件名
+    cstring_free(&pp->stream->name);
+}
--- a/libs/pprocessor/tests/pp/01.c
+++ b/libs/pprocessor/tests/pp/01.c
@@ -0,0 +1,6 @@
+#define hash_hash # ## #
+#define mkstr(a) # a
+#define in_between(a) mkstr(a)
+#define join(c, d) in_between(c hash_hash d)
+char p[] = join(x, y);
+// char p[] = "x ## y";
--- a/libs/pprocessor/tests/pp/01.expect
+++ b/libs/pprocessor/tests/pp/01.expect
@@ -0,0 +1 @@
+char p[] = "x ## y";
--- a/libs/pprocessor/tests/pp/02.c
+++ b/libs/pprocessor/tests/pp/02.c
@@ -0,0 +1,28 @@
+#define x 3
+#define f(a) f(x * (a))
+#undef x
+#define x 2
+#define g f
+#define z z[0]
+#define h g(~
+#define m(a) a(w)
+#define w 0,1
+#define t(a) a
+#define p() int
+#define q(x) x
+#define r(x,y) x ## y
+#define str(x) # x
+f(y+1) + f(f(z)) % t(t(g)(0) + t)(1);
+g(x+(3,4)-w) | h 5) & m
+(f)^m(m);
+char c[2][6] = { str(hello), str() };
+/*
+ * f(2 * (y+1)) + f(2 * (f(2 * (z[0])))) % f(2 * (0)) + t(1);
+ * f(2 * (2+(3,4)-0,1)) | f(2 * (~ 5)) & f(2 * (0,1))^m(0,1);
+ * char c[2][6] = { "hello", "" };
+ */
+#define L21 f(y+1) + f(f(z)) % t(t(g)(0) + t)(1);
+#define L22 g(x+(3,4)-w) | h 5) & m\
+(f)^m(m);
+L21
+L22
--- a/libs/pprocessor/tests/pp/02.expect
+++ b/libs/pprocessor/tests/pp/02.expect
@@ -0,0 +1,5 @@
+f(2 * (y+1)) + f(2 * (f(2 * (z[0])))) % f(2 * (0)) + t(1);
+f(2 * (2 +(3,4)-0,1)) | f(2 * (~ 5)) & f(2 * (0,1))^m(0,1);
+char c[2][6] = { "hello", "" };
+f(2 * (y+1)) + f(2 * (f(2 * (z[0])))) % f(2 * (0)) + t(1);
+f(2 * (2 +(3,4)-0,1)) | f(2 * (~ 5)) & f(2 * (0,1))^m(0,1);
--- a/libs/pprocessor/tests/pp/03.c
+++ b/libs/pprocessor/tests/pp/03.c
@@ -0,0 +1,15 @@
+#define str(s) # s
+#define xstr(s) str(s)
+#define debug(s, t) printf("x" # s "= %d, x" # t "= %s", \
+	x ## s, x ## t)
+#define INCFILE(n) vers ## n
+#define glue(a, b) a ## b
+#define xglue(a, b) glue(a, b)
+#define HIGHLOW "hello"
+#define LOW LOW ", world"
+debug(1, 2);
+fputs(str(strncmp("abc\0d", "abc", '\4') // this goes away
+	== 0) str(: @\n), s);
+\#include xstr(INCFILE(2).h)
+glue(HIGH, LOW);
+xglue(HIGH, LOW)
--- a/libs/pprocessor/tests/pp/03.expect
+++ b/libs/pprocessor/tests/pp/03.expect
@@ -0,0 +1,5 @@
+printf("x" "1" "= %d, x" "2" "= %s", x1, x2);
+fputs("strncmp(\"abc\\0d\", \"abc\", '\\4') == 0" ": @\n", s);
+\#include "vers2.h"
+"hello";
+"hello" ", world"
--- a/libs/pprocessor/tests/pp/04.c
+++ b/libs/pprocessor/tests/pp/04.c
@@ -0,0 +1,4 @@
+#define foobar 1
+#define C(x,y) x##y
+#define D(x) (C(x,bar))
+D(foo)
--- a/libs/pprocessor/tests/pp/04.expect
+++ b/libs/pprocessor/tests/pp/04.expect
@@ -0,0 +1 @@
+(1)
--- a/libs/pprocessor/tests/pp/05.c
+++ b/libs/pprocessor/tests/pp/05.c
@@ -0,0 +1,7 @@
+#define t(x,y,z) x ## y ## z
+#define xxx(s) int s[] = { t(1,2,3), t(,4,5), t(6,,7), t(8,9,), \
+        t(10,,), t(,11,), t(,,12), t(,,) };
+
+int j[] = { t(1,2,3), t(,4,5), t(6,,7), t(8,9,),
+	t(10,,), t(,11,), t(,,12), t(,,) };
+xxx(j)
--- a/libs/pprocessor/tests/pp/05.expect
+++ b/libs/pprocessor/tests/pp/05.expect
@@ -0,0 +1,3 @@
+int j[] = { 123, 45, 67, 89,
+ 10, 11, 12, };
+int j[] = { 123, 45, 67, 89, 10, 11, 12, };
--- a/libs/pprocessor/tests/pp/06.c
+++ b/libs/pprocessor/tests/pp/06.c
@@ -0,0 +1,5 @@
+#define X(a,b, \
+	c,d) \
+	foo
+
+X(1,2,3,4)
--- a/libs/pprocessor/tests/pp/06.expect
+++ b/libs/pprocessor/tests/pp/06.expect
@@ -0,0 +1 @@
+foo
--- a/libs/pprocessor/tests/pp/07.c
+++ b/libs/pprocessor/tests/pp/07.c
@@ -0,0 +1,4 @@
+#define a() YES
+#define b() a
+b()
+b()()
--- a/libs/pprocessor/tests/pp/07.expect
+++ b/libs/pprocessor/tests/pp/07.expect
@@ -0,0 +1,2 @@
+a
+YES
--- a/libs/pprocessor/tests/pp/08.c
+++ b/libs/pprocessor/tests/pp/08.c
@@ -0,0 +1,4 @@
+// test macro expansion in arguments
+#define s_pos              s_s.s_pos
+#define foo(x) (x)
+foo(hej.s_pos)
--- a/libs/pprocessor/tests/pp/08.expect
+++ b/libs/pprocessor/tests/pp/08.expect
@@ -0,0 +1 @@
+(hej.s_s.s_pos)
--- a/libs/pprocessor/tests/pp/09.c
+++ b/libs/pprocessor/tests/pp/09.c
@@ -0,0 +1,4 @@
+#define C(a,b,c) a##b##c
+#define N(x,y) C(x,_,y)
+#define A_O aaaaoooo
+N(A,O)
--- a/libs/pprocessor/tests/pp/09.expect
+++ b/libs/pprocessor/tests/pp/09.expect
@@ -0,0 +1 @@
+aaaaoooo
--- a/libs/pprocessor/tests/pp/10.c
+++ b/libs/pprocessor/tests/pp/10.c
@@ -0,0 +1,10 @@
+#define f(x) x
+#define g(x) f(x) f(x
+#define i(x) g(x)) g(x
+#define h(x) i(x))) i(x
+#define k(x) i(x))) i(x))))
+f(x)
+g(x))
+i(x)))
+h(x))))
+k(x))))
--- a/libs/pprocessor/tests/pp/10.expect
+++ b/libs/pprocessor/tests/pp/10.expect
@@ -0,0 +1,5 @@
+x
+x x
+x x x x
+x x x x x x x x
+x x x x x x x x))))
--- a/libs/pprocessor/tests/pp/11.c
+++ b/libs/pprocessor/tests/pp/11.c
@@ -0,0 +1,36 @@
+#define D1(s, ...) s
+#define D2(s, ...) s D1(__VA_ARGS__)
+#define D3(s, ...) s D2(__VA_ARGS__)
+#define D4(s, ...) s D3(__VA_ARGS__)
+
+D1(a)
+D2(a, b)
+D3(a, b, c)
+D4(a, b, c, d)
+
+x D4(a, b, c, d) y
+x D4(a, b, c) y
+x D4(a, b) y
+x D4(a) y
+x D4() y
+
+#define GNU_COMMA(X,Y...) X,##Y
+
+x GNU_COMMA(A,B,C) y
+x GNU_COMMA(A,B) y
+x GNU_COMMA(A) y
+x GNU_COMMA() y
+
+#define __sun_attr___noreturn__ __attribute__((__noreturn__))
+#define ___sun_attr_inner(__a) __sun_attr_##__a
+#define __sun_attr__(__a) ___sun_attr_inner __a
+#define __NORETURN __sun_attr__((__noreturn__))
+__NORETURN
+#define X(...)
+#define Y(...)  1 __VA_ARGS__ 2
+Y(X X() ())
+
+#define DDD(A, B) D_ ## B ## _D_ ## A
+#define CCC(X, ...) DDD(X,## __VA_ARGS__)
+/* must be D_B_D_A (not D_B _D_A) */
+CCC(A,B)
--- a/libs/pprocessor/tests/pp/11.expect
+++ b/libs/pprocessor/tests/pp/11.expect
@@ -0,0 +1,16 @@
+a
+a b
+a b c
+a b c d
+x a b c d y
+x a b c y
+x a b y
+x a y
+x  y
+x A,B,C y
+x A,B y
+x A y
+x  y
+__attribute__((__noreturn__))
+1 2
+D_B_D_A
--- a/libs/pprocessor/tests/pp/12.S
+++ b/libs/pprocessor/tests/pp/12.S
@@ -0,0 +1,8 @@
+#define SRC(y...)                       \
+        9999: y;                        \
+        .section __ex_table, "a";       \
+        .long 9999b, 6001f      ;       \
+        // .previous
+
+    SRC(1: movw (%esi), %bx)
+6001:
--- a/libs/pprocessor/tests/pp/12.expect
+++ b/libs/pprocessor/tests/pp/12.expect
@@ -0,0 +1,2 @@
+    9999: 1: movw (%esi), %bx; .section __ex_table, "a"; .long 9999b, 6001f ;
+6001:
--- a/libs/pprocessor/tests/pp/13.S
+++ b/libs/pprocessor/tests/pp/13.S
@@ -0,0 +1,6 @@
+# `modelist' label. Each video mode record looks like:
+#ifdef AAA
+# modelist' label. Each video mode record looks like:
+#endif
+.text
+endtext:
--- a/libs/pprocessor/tests/pp/13.expect
+++ b/libs/pprocessor/tests/pp/13.expect
@@ -0,0 +1,2 @@
+.text
+endtext:
--- a/libs/pprocessor/tests/pp/14.c
+++ b/libs/pprocessor/tests/pp/14.c
@@ -0,0 +1,13 @@
+#define W Z
+#define Z(X) W(X,2)
+#define Y(X) Z(X)
+#define X Y
+return X(X(1));
+
+#define P Q
+#define Q(n) P(n,2)
+return P(1);
+
+#define A (B * B)
+#define B (A + A)
+return A + B;
--- a/libs/pprocessor/tests/pp/14.expect
+++ b/libs/pprocessor/tests/pp/14.expect
@@ -0,0 +1,3 @@
+return Z(Z(1,2),2);
+return Q(1,2);
+return ((A + A) * (A + A)) + ((B * B) + (B * B));
--- a/libs/pprocessor/tests/pp/15.c
+++ b/libs/pprocessor/tests/pp/15.c
@@ -0,0 +1,18 @@
+// insert a space between two tokens if otherwise they
+// would form a single token when read back
+
+#define n(x) x
+
+return (n(long)n(double))d;
+return n(A)n(++)n(+)n(B);
+return n(A)n(+)n(++)n(B);
+return n(A)n(++)n(+)n(+)n(B);
+
+// not a hex float
+return n(0x1E)n(-1);
+
+// unlike gcc but correct
+// XXX: return n(x)+n(x)-n(1)+n(1)-2;
+
+// unlike gcc, but cannot appear in valid C
+// XXX: return n(x)n(x)n(1)n(2)n(x);
--- a/libs/pprocessor/tests/pp/15.expect
+++ b/libs/pprocessor/tests/pp/15.expect
@@ -0,0 +1,5 @@
+return (long double)d;
+return A+++B;
+return A+ ++B;
+return A+++ +B;
+return 0x1E -1;
--- a/libs/pprocessor/tests/pp/16.c
+++ b/libs/pprocessor/tests/pp/16.c
@@ -0,0 +1,3 @@
+/* The following should warn */
+#define A ...
+#define A <<=
--- a/libs/pprocessor/tests/pp/16.expect
+++ b/libs/pprocessor/tests/pp/16.expect
@@ -0,0 +1,2 @@
+
+16.c:3: warning: A redefined
--- a/libs/pprocessor/tests/pp/17.c
+++ b/libs/pprocessor/tests/pp/17.c
@@ -0,0 +1,14 @@
+#define STR1(u) # u
+#define pass(a) a
+#define __ASM_REG(reg)         STR1(one##reg)
+#define _ASM_DX         __ASM_REG(tok)
+X162 pass(__ASM_REG(tok))
+X161 pass(_ASM_DX)
+X163 pass(STR1(one##tok))
+
+X170 pass(x ## y)
+X171 pass(x pass(##) y)
+
+#define Y(x) Z(x)
+#define X Y
+X180 return X(X(1));
--- a/libs/pprocessor/tests/pp/17.expect
+++ b/libs/pprocessor/tests/pp/17.expect
@@ -0,0 +1,6 @@
+X162 "onetok"
+X161 "onetok"
+X163 "one##tok"
+X170 x ## y
+X171 x ## y
+X180 return Z(Z(1));
--- a/libs/pprocessor/tests/pp/18.c
+++ b/libs/pprocessor/tests/pp/18.c
@@ -0,0 +1,30 @@
+#define M_RETI_ARG27(x,y,z,aa, ...)    aa
+#define M_RET_ARG27(...)        M_RETI_ARG27(__VA_ARGS__)
+#define M_COMMA_P(...)          M_RET_ARG27(__VA_ARGS__, 1, 1, 0, useless)
+#define M_EMPTYI_DETECT(...)    0, 1,
+#define M_EMPTYI_P_C1(...)      M_COMMA_P(M_EMPTYI_DETECT __VA_ARGS__ () )
+#define EX
+#define empty(x)
+#define fnlike(x) yeah x
+/* If the following macro is called with empty arg (X183), the use
+   of 'x' between fnlike and '(' doesn't hinder the recognition of this
+   being a further fnlike macro invocation.  */
+#define usefnlike(x) fnlike x (x)
+X181 M_EMPTYI_P_C1()
+X182 M_EMPTYI_P_C1(x)
+X183 usefnlike()
+
+#define ABC(x) ABC : x
+#define A(a,b,c) a ## b ## c
+#define B(a,b,c) A(a,b,c)
+#define C(a,b,c) a b c
+B(
+    C(A,C(,,),),
+    C(B(,,),B,B(,,)),
+    C(B(,,),B(,,),C)
+    )/* */(a b c)
+
+#define TEST(x) TEST : x
+#define K(a,b) a ## b
+#define L(a,b) K( TE  a , b  ST )
+L(,)(t e s t)
--- a/libs/pprocessor/tests/pp/18.expect
+++ b/libs/pprocessor/tests/pp/18.expect
@@ -0,0 +1,5 @@
+X181 1
+X182 0
+X183 yeah
+ABC : a b c
+TEST : t e s t
--- a/libs/pprocessor/tests/pp/19.c
+++ b/libs/pprocessor/tests/pp/19.c
@@ -0,0 +1,101 @@
+#define M_C2I(a, ...)       a ## __VA_ARGS__
+#define M_C(a, ...)         M_C2I(a, __VA_ARGS__)
+#define M_C3I(a, b, ...)    a ## b ## __VA_ARGS__
+#define M_C3(a, b, ...)     M_C3I(a ,b, __VA_ARGS__)
+
+#define M_RETI_ARG2(a, b, ...)  b
+#define M_RET_ARG2(...)         M_RETI_ARG2(__VA_ARGS__)
+#define M_RETI_ARG27(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z,aa, ...)    aa
+#define M_RET_ARG27(...)        M_RETI_ARG27(__VA_ARGS__)
+
+#define M_TOBOOLI_0                 1, 0,
+#define M_BOOL(x)                   M_RET_ARG2(M_C(M_TOBOOLI_, x), 1, useless)
+
+#define M_IFI_0(true_macro, ...)    __VA_ARGS__
+#define M_IFI_1(true_macro, ...)    true_macro
+#define M_IF(c)                     M_C(M_IFI_, M_BOOL(c))
+
+#define M_FLAT(...)                 __VA_ARGS__
+#define M_INVI_0                    1
+#define M_INVI_1                    0
+#define M_INV(x)                    M_C(M_INVI_, x)
+
+#define M_ANDI_00                   0
+#define M_ANDI_01                   0
+#define M_ANDI_10                   0
+#define M_ANDI_11                   1
+#define M_AND(x,y)                  M_C3(M_ANDI_, x, y)
+
+#define M_ORI_00                    0
+#define M_ORI_01                    1
+#define M_ORI_10                    1
+#define M_ORI_11                    1
+#define M_OR(x,y)                   M_C3(M_ORI_, x, y)
+
+#define M_COMMA_P(...)              M_RET_ARG27(__VA_ARGS__, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, useless)
+
+#define M_EMPTYI_DETECT(...)        0, 1,
+#define M_EMPTYI_P_C1(...)          M_COMMA_P(M_EMPTYI_DETECT __VA_ARGS__ ())
+#define M_EMPTYI_P_C2(...)          M_COMMA_P(M_EMPTYI_DETECT __VA_ARGS__)
+#define M_EMPTYI_P_C3(...)          M_COMMA_P(__VA_ARGS__ () )
+#define M_EMPTY_P(...)              M_AND(M_EMPTYI_P_C1(__VA_ARGS__), M_INV(M_OR(M_OR(M_EMPTYI_P_C2(__VA_ARGS__), M_COMMA_P(__VA_ARGS__)),M_EMPTYI_P_C3(__VA_ARGS__))))
+#define M_APPLY_FUNC2B(func, arg1, arg2)        \
+  M_IF(M_EMPTY_P(arg2))(,func(arg1, arg2))
+#define M_MAP2B_0(func, data, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z,...) \
+  M_APPLY_FUNC2B(func, data, a) M_APPLY_FUNC2B(func, data, b) M_APPLY_FUNC2B(func, data, c) \
+  M_APPLY_FUNC2B(func, data, d) M_APPLY_FUNC2B(func, data, e) M_APPLY_FUNC2B(func, data, f) \
+  M_APPLY_FUNC2B(func, data, g) M_APPLY_FUNC2B(func, data, h) M_APPLY_FUNC2B(func, data, i) \
+  M_APPLY_FUNC2B(func, data, j) M_APPLY_FUNC2B(func, data, k) M_APPLY_FUNC2B(func, data, l) \
+  M_APPLY_FUNC2B(func, data, m) M_APPLY_FUNC2B(func, data, n) M_APPLY_FUNC2B(func, data, o) \
+  M_APPLY_FUNC2B(func, data, p) M_APPLY_FUNC2B(func, data, q) M_APPLY_FUNC2B(func, data, r) \
+  M_APPLY_FUNC2B(func, data, s) M_APPLY_FUNC2B(func, data, t) M_APPLY_FUNC2B(func, data, u) \
+  M_APPLY_FUNC2B(func, data, v) M_APPLY_FUNC2B(func, data, w) M_APPLY_FUNC2B(func, data, x) \
+  M_APPLY_FUNC2B(func, data, y) M_APPLY_FUNC2B(func, data, z)
+#define M_MAP2B(f, ...) M_MAP2B_0(f, __VA_ARGS__, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , )
+#define M_INIT_INIT(a)           ,a,
+
+#define M_GET_METHOD(method, method_default, ...)                       \
+  M_RET_ARG2 (M_MAP2B(M_C, M_C3(M_, method, _), __VA_ARGS__), method_default,)
+
+#define M_TEST_METHOD_P(method, oplist)                         \
+  M_BOOL(M_GET_METHOD (method, 0, M_FLAT oplist))
+
+#define TRUE 1
+#define TEST1(n)                                \
+  M_IF(n)(ok,nok)
+#define TEST2(op)                               \
+  M_TEST_METHOD_P(INIT, op)
+#define TEST3(op)                               \
+  M_IF(M_TEST_METHOD_P(INIT, op))(ok, nok)
+#define TEST4(op) \
+  TEST1(TEST2(op))
+#define KO(a) ((void)1)
+
+/* This checks that the various expansions that ultimately lead to
+   something like 'KO(arg,arg)', where 'KO' comes from a macro
+   expansion reducing from a large macro chain do not are regarded
+   as funclike macro invocation of KO.  E.g. X93 and X94 expand to 'KO',
+   but X95 must not consume the (a,b) arguments outside the M_IF()
+   invocation to reduce the 'KO' macro to an invocation.  Instead
+   X95 should reduce via M_IF(KO)(a,b) to 'a'. 
+   
+   The other lines here are variations on this scheme, with X1 to
+   X6 coming from the bug report at
+   http://lists.nongnu.org/archive/html/tinycc-devel/2017-07/msg00017.html */
+X92 M_IF(KO)
+X93 M_GET_METHOD(INIT, 0, INIT(KO))
+X94 M_GET_METHOD(INIT, 0, M_FLAT (INIT(KO)))
+X95 M_IF(M_GET_METHOD(INIT, 0, INIT(KO)))(a,b)
+X96 M_IF(M_GET_METHOD(INIT, 0, M_FLAT (INIT(KO))))
+X97 M_IF(M_GET_METHOD(INIT, 0, M_FLAT (INIT(KO))))(ok,nok)
+X98 (M_TEST_METHOD_P(INIT, (INIT(KO))))(ok, nok)
+X99 M_IF(M_TEST_METHOD_P(INIT, (INIT(KO))))(ok, nok)
+// test begins
+X1 TEST1(TRUE)          // ==> expect ok, get ok
+// First test with a token which is not a macro
+X2 TEST2((INIT(ok)))    // ==> expect 1, get 1
+X3 TEST3((INIT(ok)))    // ==> expect ok, get ok
+// Then test with a token which is a macro, but should not be expanded.
+X4 TEST2((INIT(KO)))    // ==> expect 1, get 1
+X5 TEST4(INIT(KO))
+X6 TEST3((INIT(KO)))    // ==> expect ok, get "error: macro 'KO' used with too many args"
--- a/libs/pprocessor/tests/pp/19.expect
+++ b/libs/pprocessor/tests/pp/19.expect
@@ -0,0 +1,14 @@
+X92 M_IFI_1
+X93 KO
+X94 KO
+X95 a
+X96 M_IFI_1
+X97 ok
+X98 (1)(ok, nok)
+X99 ok
+X1 ok
+X2 1
+X3 ok
+X4 1
+X5 nok
+X6 ok
--- a/libs/pprocessor/tests/pp/20.c
+++ b/libs/pprocessor/tests/pp/20.c
@@ -0,0 +1,13 @@
+/* Various things I encountered while hacking the pre processor */
+#define wrap(x) x
+#define pr_warning(fmt, ...) printk(KERN_WARNING fmt, ##__VA_ARGS__)
+#define pr_warn(x,y) pr_warning(x,y)
+#define net_ratelimited_function(function, ...) function(__VA_ARGS__)
+X1 net_ratelimited_function(pr_warn, "pipapo", bla);
+X2 net_ratelimited_function(wrap(pr_warn), "bla", foo);
+#define two m n
+#define chain4(a,b,c,d) a ## b ## c ## d
+X2 chain4(two,o,p,q)
+X3 chain4(o,two,p,q)
+X4 chain4(o,p,two,q)
+X5 chain4(o,p,q,two)
--- a/libs/pprocessor/tests/pp/20.expect
+++ b/libs/pprocessor/tests/pp/20.expect
@@ -0,0 +1,6 @@
+X1 printk(KERN_WARNING "pipapo",bla);
+X2 printk(KERN_WARNING "bla",foo);
+X2 twoopq
+X3 otwopq
+X4 optwoq
+X5 opqtwo
--- a/libs/pprocessor/tests/pp/21.c
+++ b/libs/pprocessor/tests/pp/21.c
@@ -0,0 +1,41 @@
+/* accept 'defined' as result of substitution */
+
+----- 1 ------
+#define AAA 2
+#define BBB
+#define CCC (defined ( AAA ) && AAA > 1 && !defined BBB)
+#if !CCC
+OK
+#else
+NOT OK
+#endif
+
+----- 2 ------
+#undef BBB
+#if CCC
+OK
+#else
+NOT OK
+#endif
+
+----- 3 ------
+#define DEFINED defined
+#define DDD (DEFINED ( AAA ) && AAA > 1 && !DEFINED BBB)
+#if (DDD)
+OK
+#else
+NOT OK
+#endif
+
+----- 4 ------
+#undef AAA
+#if !(DDD)
+OK
+#else
+NOT OK
+#endif
+
+----- 5 ------
+line __LINE__
+#define __LINE__ # ## #
+line __LINE__
--- a/libs/pprocessor/tests/pp/21.expect
+++ b/libs/pprocessor/tests/pp/21.expect
@@ -0,0 +1,11 @@
+----- 1 ------
+OK
+----- 2 ------
+OK
+----- 3 ------
+OK
+----- 4 ------
+OK
+----- 5 ------
+line 39
+line ##
--- a/libs/pprocessor/tests/pp/22.c
+++ b/libs/pprocessor/tests/pp/22.c
@@ -0,0 +1,12 @@
+#define TRACE(a,b,c) X a X b X c X
+#define rettrue(x) 1
+A rettrue(bla) B
+TRACE(
+      ARG_1,
+#if rettrue(bla)
+      ARG_2,
+#else
+      ARG_2_wrong,
+#endif
+      ARG_3
+);
--- a/libs/pprocessor/tests/pp/22.expect
+++ b/libs/pprocessor/tests/pp/22.expect
@@ -0,0 +1,2 @@
+A 1 B
+X ARG_1 X ARG_2 X ARG_3 X;
--- a/libs/pprocessor/tests/pp/Makefile
+++ b/libs/pprocessor/tests/pp/Makefile
@@ -0,0 +1,55 @@
+#
+# credits: 01..13.c from the pcc cpp-tests suite
+#
+
+TOP = ../..
+include $(TOP)/Makefile
+SRC = $(TOPSRC)/tests/pp
+VPATH = $(SRC)
+
+files = $(patsubst %.$1,%.test,$(notdir $(wildcard $(SRC)/*.$1)))
+TESTS = $(call files,c) $(call files,S)
+
+all test testspp.all: $(sort $(TESTS))
+
+DIFF_OPTS = -Nu -b
+
+# Filter source directory in warnings/errors (out-of-tree builds)
+FILTER = 2>&1 | sed 's,$(SRC)/,,g'
+
+%.test: %.c %.expect
+	@echo PPTest $* ...
+	-@$(TCC) -E -P $< $(FILTER) >$*.output 2>&1 ; \
+	    diff $(DIFF_OPTS) $(SRC)/$*.expect $*.output \
+	    && rm -f $*.output
+
+%.test: %.S %.expect
+	@echo PPTest $* ...
+	-@$(TCC) -E -P $< $(FILTER) >$*.output 2>&1 ; \
+	    diff $(DIFF_OPTS) $(SRC)/$*.expect $*.output \
+	    && rm -f $*.output
+
+testspp.%: %.test ;
+
+# generate .expect file with tcc, e.g. "make testspp.17+"
+testspp.%+: %.c
+	$(TCC) -E -P $*.[cS] -o $*.expect
+
+# automatically generate .expect files with gcc:
+%.expect: # %.c
+	gcc -E -P $*.[cS] >$*.expect 2>&1
+
+# tell make not to delete
+.PRECIOUS: %.expect
+
+clean:
+	rm -f *.output
+
+02.test : DIFF_OPTS += -w
+# 15.test : DIFF_OPTS += -I"^XXX:"
+
+# diff options:
+# -b ighore space changes
+# -w ighore all whitespace
+# -B ignore blank lines
+# -I <RE> ignore lines matching RE
--- a/libs/pprocessor/tests/pp/README
+++ b/libs/pprocessor/tests/pp/README
@@ -0,0 +1,96 @@
+Tiny C Compiler - C Scripting Everywhere - The Smallest ANSI C compiler
+-----------------------------------------------------------------------
+
+Features:
+--------
+
+- SMALL! You can compile and execute C code everywhere, for example on
+  rescue disks.
+
+- FAST! tcc generates optimized x86 code. No byte code
+  overhead. Compile, assemble and link about 7 times faster than 'gcc
+  -O0'.
+
+- UNLIMITED! Any C dynamic library can be used directly. TCC is
+  heading toward full ISOC99 compliance. TCC can of course compile
+  itself.
+
+- SAFE! tcc includes an optional memory and bound checker. Bound
+  checked code can be mixed freely with standard code.
+
+- Compile and execute C source directly. No linking or assembly
+  necessary. Full C preprocessor included.
+
+- C script supported : just add '#!/usr/local/bin/tcc -run' at the first
+  line of your C source, and execute it directly from the command
+  line.
+
+Documentation:
+-------------
+
+1) Installation on a i386/x86_64/arm/aarch64/riscv64
+   Linux/macOS/FreeBSD/NetBSD/OpenBSD hosts.
+
+   ./configure
+   make
+   make test
+   make install
+
+   Notes: For FreeBSD, NetBSD and OpenBSD, gmake should be used instead of make.
+   For Windows read tcc-win32.txt.
+
+makeinfo must be installed to compile the doc.  By default, tcc is
+installed in /usr/local/bin.  ./configure --help  shows configuration
+options.
+
+
+2) Introduction
+
+We assume here that you know ANSI C. Look at the example ex1.c to know
+what the programs look like.
+
+The include file <tcclib.h> can be used if you want a small basic libc
+include support (especially useful for floppy disks). Of course, you
+can also use standard headers, although they are slower to compile.
+
+You can begin your C script with '#!/usr/local/bin/tcc -run' on the first
+line and set its execute bits (chmod a+x your_script). Then, you can
+launch the C code as a shell or perl script :-) The command line
+arguments are put in 'argc' and 'argv' of the main functions, as in
+ANSI C.
+
+3) Examples
+
+ex1.c: simplest example (hello world). Can also be launched directly
+as a script: './ex1.c'.
+
+ex2.c: more complicated example: find a number with the four
+operations given a list of numbers (benchmark).
+
+ex3.c: compute fibonacci numbers (benchmark).
+
+ex4.c: more complicated: X11 program. Very complicated test in fact
+because standard headers are being used ! As for ex1.c, can also be launched
+directly as a script: './ex4.c'.
+
+ex5.c: 'hello world' with standard glibc headers.
+
+tcc.c: TCC can of course compile itself. Used to check the code
+generator.
+
+tcctest.c: auto test for TCC which tests many subtle possible bugs. Used
+when doing 'make test'.
+
+4) Full Documentation
+
+Please read tcc-doc.html to have all the features of TCC.
+
+Additional information is available for the Windows port in tcc-win32.txt.
+
+License:
+-------
+
+TCC is distributed under the GNU Lesser General Public License (see
+COPYING file).
+
+Fabrice Bellard.
--- a/libs/pprocessor/tests/pp/pp-counter.c
+++ b/libs/pprocessor/tests/pp/pp-counter.c
@@ -0,0 +1,27 @@
+X1 __COUNTER__
+X2 __COUNTER__
+#if __COUNTER__
+X3 __COUNTER__
+#endif
+#define pass(x) x
+#define a x __COUNTER__ y
+#define a2 pass(__COUNTER__)
+#define f(c) c __COUNTER__
+#define apply(d) d d __COUNTER__ x2 f(d) y2 __COUNTER__
+#define _paste(a,b) a ## b
+#define paste(a,b) _paste(a,b)
+#define _paste3(a,b,c) a ## b ## c
+#define doublepaste(a,b) _paste3(a,b,b)
+#define str(x) #x
+X4 a
+X5 f(a)
+X6 f(b)
+X7 f(__COUNTER__)
+X8 apply(a)
+X9 apply(f(a))
+X10 apply(__COUNTER__)
+X11 apply(a2)
+X12 str(__COUNTER__)
+X13 paste(x,__COUNTER__)
+X14 _paste(x,__COUNTER__)
+X15 doublepaste(x,__COUNTER__)
--- a/libs/pprocessor/tests/pp/pp-counter.expect
+++ b/libs/pprocessor/tests/pp/pp-counter.expect
@@ -0,0 +1,15 @@
+X1 0
+X2 1
+X3 3
+X4 x 4 y
+X5 x 5 y 6
+X6 b 7
+X7 8 9
+X8 x 10 y x 10 y 11 x2 x 10 y 12 y2 13
+X9 x 14 y 15 x 14 y 15 16 x2 x 14 y 15 17 y2 18
+X10 19 19 20 x2 19 21 y2 22
+X11 23 23 24 x2 23 25 y2 26
+X12 "__COUNTER__"
+X13 x27
+X14 x__COUNTER__
+X15 x2828
--- a/libs/pprocessor/tests/test_pp.c
+++ b/libs/pprocessor/tests/test_pp.c
@@ -0,0 +1,67 @@
+#include <assert.h>
+#include <pprocessor.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <utest/acutest.h>
+
+static core_stream_t *from_file_stream(FILE *fp) {
+    if (fseek(fp, 0, SEEK_END) != 0) {
+        perror("fseek failed");
+        return NULL;
+    }
+    usize fsize = ftell(fp);
+    if (fseek(fp, 0, SEEK_SET)) {
+        perror("fseek failed");
+        return NULL;
+    }
+
+    char *buffer = (char *)malloc(fsize);
+
+    usize read_ret = fread(buffer, 1, fsize, fp);
+    fclose(fp);
+
+    core_mem_stream_t *mem_stream = malloc(sizeof(core_mem_stream_t));
+    core_stream_t *stream =
+        core_mem_stream_init(mem_stream, buffer, fsize, true);
+    return stream;
+}
+
+static void test_file(const char *name) {
+    char src_fname[1024];
+    char expected_fname[1024];
+    snprintf(src_fname, sizeof(src_fname), __FILE__ "/../pp/%s.c", name);
+    snprintf(expected_fname, sizeof(expected_fname),
+             __FILE__ "/../pp/%s.expect", name);
+    FILE *fsrc = fopen(src_fname, "r");
+    assert(fsrc != NULL);
+    FILE *fexpect = fopen(expected_fname, "r");
+    assert(fexpect != NULL);
+
+    smcc_pp_t pp;
+    core_mem_stream_t stream;
+    core_stream_t *output_stream = pp_init(&pp, from_file_stream(fsrc));
+    core_stream_t *expect_stream = from_file_stream(fexpect);
+    while (1) {
+        int output_ch = core_stream_next_char(output_stream);
+        int expect_ch = core_stream_next_char(expect_stream);
+        TEST_CHECK(output_ch == expect_ch);
+        TEST_MSG("output: %c, expect: %c", output_ch, expect_ch);
+        if (output_ch == core_stream_eof) {
+            break;
+        }
+    }
+    pp_drop(&pp);
+}
+
+static void test_basic(void) {
+    char name[32];
+    // for (int i = 1; i <= 22; ++i) {
+    //     snprintf(name, sizeof(name), "%02d", i);
+    //     test_file(name);
+    // }
+}
+
+TEST_LIST = {
+    {"basic", test_basic},
+    {NULL, NULL},
+};
--- a/libs/pprocessor/tests/test_run.c
+++ b/libs/pprocessor/tests/test_run.c
@@ -0,0 +1,24 @@
+#include <pprocessor.h>
+#include <stdio.h>
+
+int main(void) {
+    smcc_pp_t pp;
+    core_mem_stream_t input;
+    core_stream_t *output;
+
+    const char buf[] = "#define A 123 \"asd\"\nA A A\n";
+    output =
+        pp_init(&pp, core_mem_stream_init(&input, buf, sizeof(buf) - 1, false));
+
+    int ch = 0;
+
+    while (1) {
+        ch = core_stream_next_char(output);
+        if (ch == core_stream_eof) {
+            break;
+        }
+        putc(ch, stdout);
+    }
+
+    return 0;
+}
--- a/libs/pprocessor/tests/test_unit.c
+++ b/libs/pprocessor/tests/test_unit.c
@@ -0,0 +1,101 @@
+#include <assert.h>
+#include <pprocessor.h>
+#include <string.h>
+#include <utest/acutest.h>
+
+static cbool process_input(const char *input, cstring_t *output) {
+    smcc_pp_t pp;
+    core_mem_stream_t mem_stream;
+    core_stream_t *output_stream;
+
+    // 初始化预处理器
+    output_stream = pp_init(
+        &pp, core_mem_stream_init(&mem_stream, input, strlen(input), false));
+
+    // 获取输出结果
+    int ch;
+    *output = cstring_new();
+    while (1) {
+        ch = core_stream_next_char(output_stream);
+        if (ch == core_stream_eof) {
+            break;
+        }
+        cstring_push(output, (char)ch);
+    }
+    // 清理资源
+    pp_drop(&pp);
+
+    return true;
+}
+
+#define CHECK_PP_OUTPUT_EXACT(input, expect)                                   \
+    do {                                                                       \
+        cstring_t output;                                                      \
+        process_input(input, &output);                                         \
+        assert(output.data != NULL);                                           \
+        TEST_CHECK(strcmp(output.data, expect) == 0);                          \
+    } while (0)
+
+#define CHECK_PP_OUTPUT_CONTAIN(input, expect)                                 \
+    do {                                                                       \
+        cstring_t output;                                                      \
+        process_input(input, &output);                                         \
+        assert(output.data != NULL);                                           \
+        TEST_CHECK(strstr(output.data, expect) != NULL);                       \
+    } while (0)
+
+static void test_define_simple_object_macro(void) {
+    TEST_CASE("simple object-like macro");
+    CHECK_PP_OUTPUT_EXACT("#define MAX 100\nMAX\n", "100\n");
+    CHECK_PP_OUTPUT_EXACT("#define NAME test\r\nNAME\n", "test\n");
+}
+
+static void test_define_complex_object_macro(void) {
+    TEST_CASE("complex object-like macro");
+    CHECK_PP_OUTPUT_EXACT("#define VALUE (100 + 50)\nVALUE\n", "(100 + 50)\n");
+    CHECK_PP_OUTPUT_EXACT("#define PI 3.14159\nPI\n", "3.14159\n");
+}
+
+static void test_define_function_macro(void) {
+    TEST_CASE("function-like macro");
+    CHECK_PP_OUTPUT_EXACT("#define ADD(a,b) a + b\nADD(1, 2)\n", "1 + 2\n");
+    CHECK_PP_OUTPUT_EXACT(
+        "#define MAX(a,b) ((a) > (b) ? (a) : (b))\nMAX(10, 20)\n",
+        "((10) > (20) ? (10) : (20))\n");
+}
+
+static void test_define_stringify_operator(void) {
+    TEST_CASE("stringify operator (#)");
+    CHECK_PP_OUTPUT_EXACT("#define STRINGIFY(x) #x\nSTRINGIFY(hello)\n",
+                          "\"hello\"\n");
+    CHECK_PP_OUTPUT_EXACT("#define STR(x) #x\nSTR(test value)\n",
+                          "\"test value\"\n");
+}
+
+static void test_define_concat_operator(void) {
+    TEST_CASE("concatenation operator (##)");
+    CHECK_PP_OUTPUT_EXACT("#define CONCAT(a,b) a##b\nCONCAT(hello,world)\n",
+                          "helloworld\n");
+    CHECK_PP_OUTPUT_EXACT("#define JOIN(pre,suf) pre##suf\nJOIN(var,123)\n",
+                          "var123\n");
+}
+
+static void test_define_nested_macros(void) {
+    TEST_CASE("nested macros");
+    CHECK_PP_OUTPUT_EXACT(
+        "#define MAX 100\n#define TWICE_MAX (MAX * 2)\nTWICE_MAX\n",
+        "(100 * 2)\n");
+    CHECK_PP_OUTPUT_EXACT(
+        "#define A 1\n#define B (A + 1)\n#define C (B + 1)\nC\n",
+        "((1 + 1) + 1)\n");
+}
+
+TEST_LIST = {
+    {"test_define_simple_object_macro", test_define_simple_object_macro},
+    {"test_define_complex_object_macro", test_define_complex_object_macro},
+    {"test_define_function_macro", test_define_function_macro},
+    {"test_define_stringify_operator", test_define_stringify_operator},
+    {"test_define_concat_operator", test_define_concat_operator},
+    {"test_define_nested_macros", test_define_nested_macros},
+    {NULL, NULL},
+};