refactor: 重构前端代码并添加日志功能

- 重命名和重构了多个文件,包括 lexer、parser 和 AST 相关代码
- 添加了日志功能,使用 LOG_* 宏替代原有的 error 和 warn 函数
- 优化了错误处理和内存分配方式
- 调整了代码结构,提高了模块化和可读性
This commit is contained in:
ZZY
2025-03-19 12:22:55 +08:00
parent 172d72b0a0
commit 05c637e594
76 changed files with 1479 additions and 310 deletions

View File

@ -3,8 +3,11 @@ all: ccompiler
run: ccompiler
./ccompiler test.c flat.bin
simple_test:
make -C tests/simple
ccompiler: frontend ir
gcc -g rv32ima_codegen.c -L../../frontend -lfrontend -L../../middleend -lir -o ccompiler
gcc -g rv32.c -I../../.. -L../../frontend -lfrontend -L../../middleend -lmiddleend -L../../../lib -lcore -o ccompiler
frontend:
make -C ../../frontend

View File

@ -1,16 +1,18 @@
#define RISCV_VM_BUILDIN_ECALL
#include "rv32gen.h"
#include <stdio.h>
#include <string.h>
#include <assert.h>
// 指令编码联合体(自动处理小端序)
typedef union rv32code {
uint32_t code;
uint8_t bytes[4];
u8_t bytes[4];
} rv32code_t;
#include "../../frontend/frontend.h"
#include "../../middleend/ir.h"
#include "../../middleend/middleend.h"
typedef struct {
int code_pos;
int to_idx;
@ -200,7 +202,7 @@ static int gen_instr(ir_bblock_t* block, ir_node_t* instr) {
GENCODE(XOR(REG_T0, REG_T1, REG_T2));
break;
default:
error("ERROR gen_instr op in riscv");
LOG_ERROR("ERROR gen_instr op in riscv");
break;
}
offset = stack_offset(instr);
@ -213,7 +215,7 @@ static int gen_instr(ir_bblock_t* block, ir_node_t* instr) {
int fidx = block_idx(instr->data.branch.false_bblock);
int cidx = block_idx(ctx.cur_block);
jmp_t* jmp;
jmp = xmalloc(sizeof(jmp_t));
jmp = rt._malloc(sizeof(jmp_t));
*jmp = (jmp_t) {
.base_offset = 8,
.code_pos = ctx.codes.size,
@ -223,7 +225,7 @@ static int gen_instr(ir_bblock_t* block, ir_node_t* instr) {
};
vector_push(ctx.jmp, jmp);
GENCODE(BNEZ(REG_T0, 0));
jmp = xmalloc(sizeof(jmp_t));
jmp = rt._malloc(sizeof(jmp_t));
*jmp = (jmp_t) {
.base_offset = 4,
.code_pos = ctx.codes.size,
@ -237,7 +239,7 @@ static int gen_instr(ir_bblock_t* block, ir_node_t* instr) {
}
case IR_NODE_JUMP: {
int idx = block_idx(instr->data.jump.target_bblock);
jmp_t* jmp = xmalloc(sizeof(jmp_t));
jmp_t* jmp = rt._malloc(sizeof(jmp_t));
*jmp = (jmp_t) {
.base_offset = 4,
.code_pos = ctx.codes.size,
@ -251,7 +253,7 @@ static int gen_instr(ir_bblock_t* block, ir_node_t* instr) {
}
case IR_NODE_CALL: {
if (instr->data.call.args.size > 8) {
error("can't add so much params");
LOG_ERROR("can't add so much params");
}
int param_regs[8] = {
REG_A0, REG_A1, REG_A2, REG_A3,
@ -272,7 +274,7 @@ static int gen_instr(ir_bblock_t* block, ir_node_t* instr) {
goto CALL_END;
}
jmp_t* jmp = xmalloc(sizeof(jmp_t));
jmp_t* jmp = rt._malloc(sizeof(jmp_t));
*jmp = (jmp_t) {
.base_offset = ctx.cur_func_offset + ctx.cur_block_offset + len,
.code_pos = ctx.codes.size,
@ -289,7 +291,7 @@ static int gen_instr(ir_bblock_t* block, ir_node_t* instr) {
break;
}
default:
error("ERROR gen_instr in riscv");
LOG_ERROR("ERROR gen_instr in riscv");
}
return len;
}
@ -320,7 +322,7 @@ static int gen_func(ir_func_t* func) {
REG_A4, REG_A5, REG_A6, REG_A7
};
if (func->params.size > 8) {
error("can't add so much params");
LOG_ERROR("can't add so much params");
}
for (int i = 0; i < func->params.size; i++) {
int offset = stack_offset(vector_at(func->params, i));
@ -362,7 +364,7 @@ static int gen_code(ir_prog_t* prog) {
for (int i = 0; i < prog->extern_funcs.size; i++) {
if (system_func(prog->extern_funcs.data[i]->name) == -1) {
error("func %s not defined and not a system func", prog->extern_funcs.data[i]->name);
LOG_ERROR("func %s not defined and not a system func", prog->extern_funcs.data[i]->name);
}
}
@ -397,11 +399,15 @@ static int gen_code(ir_prog_t* prog) {
return jmp_cache[i];
}
}
error("main not found");
LOG_ERROR("main not found");
}
int main(int argc, char** argv) {
// gcc rv32ima_codegen.c -o rv32gen.exe
init_lib_core();
log_set_level(NULL, LOG_LEVEL_NOTSET);
const char* infilename = "test.c";
const char* outfilename = "flat.bin";
if (argc >= 2) {
@ -417,9 +423,9 @@ int main(int argc, char** argv) {
return 1;
}
struct ASTNode* root = frontend(infilename, in, (sread_fn)fread_s);
gen_ir_from_ast(root);
int main_pos = gen_code(&prog);
ast_node_t* root = frontend(infilename, in, (sread_fn)fread_s);
ir_prog_t* prog = gen_ir_from_ast(root);
int main_pos = gen_code(prog);
#define CRT_CODE_SIZE 16
rv32code_t gcodes[] = {

View File

@ -0,0 +1,8 @@
CC = gcc
CFLAGS = -g -Wall
all = rv32-vm
CFLAGS += -DDEFAULT_FILE='\"flat.bin\"'
rv32-vm:
$(CC) $(CFLAGS) -g -o rv32-vm .\ripes-vm.c

View File

@ -0,0 +1,520 @@
// Copyright 2022 Charles Lohr, you may use this file or any portions herein under any of the BSD, MIT, or CC0 licenses.
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include "default64mbdtc.h"
// Just default RAM amount is 64MB.
uint32_t ram_amt = 64*1024*1024;
int fail_on_all_faults = 0;
static int64_t SimpleReadNumberInt( const char * number, int64_t defaultNumber );
static uint64_t GetTimeMicroseconds();
static void ResetKeyboardInput();
static void CaptureKeyboardInput();
static uint32_t HandleException( uint32_t ir, uint32_t retval );
static uint32_t HandleControlStore( uint32_t addy, uint32_t val );
static uint32_t HandleControlLoad( uint32_t addy );
static void HandleOtherCSRWrite( uint8_t * image, uint16_t csrno, uint32_t value );
static int32_t HandleOtherCSRRead( uint8_t * image, uint16_t csrno );
static void MiniSleep();
static int IsKBHit();
static int ReadKBByte();
// This is the functionality we want to override in the emulator.
// think of this as the way the emulator's processor is connected to the outside world.
#define MINIRV32WARN( x... ) printf( x );
#define MINIRV32_DECORATE static
#define MINI_RV32_RAM_SIZE ram_amt
#define MINIRV32_IMPLEMENTATION
#define MINIRV32_POSTEXEC( pc, ir, retval ) { if( retval > 0 ) { if( fail_on_all_faults ) { printf( "FAULT\n" ); return 3; } else retval = HandleException( ir, retval ); } }
#define MINIRV32_HANDLE_MEM_STORE_CONTROL( addy, val ) if( HandleControlStore( addy, val ) ) return val;
#define MINIRV32_HANDLE_MEM_LOAD_CONTROL( addy, rval ) rval = HandleControlLoad( addy );
#define MINIRV32_OTHERCSR_WRITE( csrno, value ) HandleOtherCSRWrite( image, csrno, value );
#define MINIRV32_OTHERCSR_READ( csrno, value ) value = HandleOtherCSRRead( image, csrno );
#include "mini-rv32ima.h"
uint8_t * ram_image = 0;
struct MiniRV32IMAState * core;
const char * kernel_command_line = 0;
static void DumpState( struct MiniRV32IMAState * core, uint8_t * ram_image );
int main( int argc, char ** argv )
{
int i;
long long instct = -1;
int show_help = 0;
int time_divisor = 1;
int fixed_update = 0;
int do_sleep = 1;
int single_step = 0;
int dtb_ptr = 0;
const char * image_file_name = 0;
const char * dtb_file_name = 0;
for( i = 1; i < argc; i++ )
{
const char * param = argv[i];
int param_continue = 0; // Can combine parameters, like -lpt x
do
{
if( param[0] == '-' || param_continue )
{
switch( param[1] )
{
case 'm': if( ++i < argc ) ram_amt = SimpleReadNumberInt( argv[i], ram_amt ); break;
case 'c': if( ++i < argc ) instct = SimpleReadNumberInt( argv[i], -1 ); break;
case 'k': if( ++i < argc ) kernel_command_line = argv[i]; break;
case 'f': image_file_name = (++i<argc)?argv[i]:0; break;
case 'b': dtb_file_name = (++i<argc)?argv[i]:0; break;
case 'l': param_continue = 1; fixed_update = 1; break;
case 'p': param_continue = 1; do_sleep = 0; break;
case 's': param_continue = 1; single_step = 1; break;
case 'd': param_continue = 1; fail_on_all_faults = 1; break;
case 't': if( ++i < argc ) time_divisor = SimpleReadNumberInt( argv[i], 1 ); break;
default:
if( param_continue )
param_continue = 0;
else
show_help = 1;
break;
}
}
else
{
show_help = 1;
break;
}
param++;
} while( param_continue );
}
if( show_help || image_file_name == 0 || time_divisor <= 0 )
{
fprintf( stderr, "./mini-rv32imaf [parameters]\n\t-m [ram amount]\n\t-f [running image]\n\t-k [kernel command line]\n\t-b [dtb file, or 'disable']\n\t-c instruction count\n\t-s single step with full processor state\n\t-t time divion base\n\t-l lock time base to instruction count\n\t-p disable sleep when wfi\n\t-d fail out immediately on all faults\n" );
return 1;
}
ram_image = malloc( ram_amt );
if( !ram_image )
{
fprintf( stderr, "Error: could not allocate system image.\n" );
return -4;
}
restart:
{
FILE * f = fopen( image_file_name, "rb" );
if( !f || ferror( f ) )
{
fprintf( stderr, "Error: \"%s\" not found\n", image_file_name );
return -5;
}
fseek( f, 0, SEEK_END );
long flen = ftell( f );
fseek( f, 0, SEEK_SET );
if( flen > ram_amt )
{
fprintf( stderr, "Error: Could not fit RAM image (%ld bytes) into %d\n", flen, ram_amt );
return -6;
}
memset( ram_image, 0, ram_amt );
if( fread( ram_image, flen, 1, f ) != 1)
{
fprintf( stderr, "Error: Could not load image.\n" );
return -7;
}
fclose( f );
if( dtb_file_name )
{
if( strcmp( dtb_file_name, "disable" ) == 0 )
{
// No DTB reading.
}
else
{
f = fopen( dtb_file_name, "rb" );
if( !f || ferror( f ) )
{
fprintf( stderr, "Error: \"%s\" not found\n", dtb_file_name );
return -5;
}
fseek( f, 0, SEEK_END );
long dtblen = ftell( f );
fseek( f, 0, SEEK_SET );
dtb_ptr = ram_amt - dtblen - sizeof( struct MiniRV32IMAState );
if( fread( ram_image + dtb_ptr, dtblen, 1, f ) != 1 )
{
fprintf( stderr, "Error: Could not open dtb \"%s\"\n", dtb_file_name );
return -9;
}
fclose( f );
}
}
else
{
// Load a default dtb.
dtb_ptr = ram_amt - sizeof(default64mbdtb) - sizeof( struct MiniRV32IMAState );
memcpy( ram_image + dtb_ptr, default64mbdtb, sizeof( default64mbdtb ) );
if( kernel_command_line )
{
strncpy( (char*)( ram_image + dtb_ptr + 0xc0 ), kernel_command_line, 54 );
}
}
}
CaptureKeyboardInput();
// The core lives at the end of RAM.
core = (struct MiniRV32IMAState *)(ram_image + ram_amt - sizeof( struct MiniRV32IMAState ));
core->pc = MINIRV32_RAM_IMAGE_OFFSET;
core->regs[10] = 0x00; //hart ID
core->regs[11] = dtb_ptr?(dtb_ptr+MINIRV32_RAM_IMAGE_OFFSET):0; //dtb_pa (Must be valid pointer) (Should be pointer to dtb)
core->extraflags |= 3; // Machine-mode.
if( dtb_file_name == 0 )
{
// Update system ram size in DTB (but if and only if we're using the default DTB)
// Warning - this will need to be updated if the skeleton DTB is ever modified.
uint32_t * dtb = (uint32_t*)(ram_image + dtb_ptr);
if( dtb[0x13c/4] == 0x00c0ff03 )
{
uint32_t validram = dtb_ptr;
dtb[0x13c/4] = (validram>>24) | ((( validram >> 16 ) & 0xff) << 8 ) | (((validram>>8) & 0xff ) << 16 ) | ( ( validram & 0xff) << 24 );
}
}
// Image is loaded.
uint64_t rt;
uint64_t lastTime = (fixed_update)?0:(GetTimeMicroseconds()/time_divisor);
int instrs_per_flip = single_step?1:1024;
for( rt = 0; rt < instct+1 || instct < 0; rt += instrs_per_flip )
{
uint64_t * this_ccount = ((uint64_t*)&core->cyclel);
uint32_t elapsedUs = 0;
if( fixed_update )
elapsedUs = *this_ccount / time_divisor - lastTime;
else
elapsedUs = GetTimeMicroseconds()/time_divisor - lastTime;
lastTime += elapsedUs;
if( single_step )
DumpState( core, ram_image);
int ret = MiniRV32IMAStep( core, ram_image, 0, elapsedUs, instrs_per_flip ); // Execute upto 1024 cycles before breaking out.
switch( ret )
{
case 0: break;
case 1: if( do_sleep ) MiniSleep(); *this_ccount += instrs_per_flip; break;
case 3: instct = 0; break;
case 0x7777: goto restart; //syscon code for restart
case 0x5555: printf( "POWEROFF@0x%08x%08x\n", core->cycleh, core->cyclel ); return 0; //syscon code for power-off
default: printf( "Unknown failure\n" ); break;
}
}
DumpState( core, ram_image);
}
//////////////////////////////////////////////////////////////////////////
// Platform-specific functionality
//////////////////////////////////////////////////////////////////////////
#if defined(WINDOWS) || defined(WIN32) || defined(_WIN32)
#include <windows.h>
#include <conio.h>
#define strtoll _strtoi64
static void CaptureKeyboardInput()
{
system(""); // Poorly documented tick: Enable VT100 Windows mode.
}
static void ResetKeyboardInput()
{
}
static void MiniSleep()
{
Sleep(1);
}
static uint64_t GetTimeMicroseconds()
{
static LARGE_INTEGER lpf;
LARGE_INTEGER li;
if( !lpf.QuadPart )
QueryPerformanceFrequency( &lpf );
QueryPerformanceCounter( &li );
return ((uint64_t)li.QuadPart * 1000000LL) / (uint64_t)lpf.QuadPart;
}
static int IsKBHit()
{
return _kbhit();
}
static int ReadKBByte()
{
// This code is kind of tricky, but used to convert windows arrow keys
// to VT100 arrow keys.
static int is_escape_sequence = 0;
int r;
if( is_escape_sequence == 1 )
{
is_escape_sequence++;
return '[';
}
r = _getch();
if( is_escape_sequence )
{
is_escape_sequence = 0;
switch( r )
{
case 'H': return 'A'; // Up
case 'P': return 'B'; // Down
case 'K': return 'D'; // Left
case 'M': return 'C'; // Right
case 'G': return 'H'; // Home
case 'O': return 'F'; // End
default: return r; // Unknown code.
}
}
else
{
switch( r )
{
case 13: return 10; //cr->lf
case 224: is_escape_sequence = 1; return 27; // Escape arrow keys
default: return r;
}
}
}
#else
#include <sys/ioctl.h>
#include <termios.h>
#include <unistd.h>
#include <signal.h>
#include <sys/time.h>
static void CtrlC()
{
DumpState( core, ram_image);
exit( 0 );
}
// Override keyboard, so we can capture all keyboard input for the VM.
static void CaptureKeyboardInput()
{
// Hook exit, because we want to re-enable keyboard.
atexit(ResetKeyboardInput);
signal(SIGINT, CtrlC);
struct termios term;
tcgetattr(0, &term);
term.c_lflag &= ~(ICANON | ECHO); // Disable echo as well
tcsetattr(0, TCSANOW, &term);
}
static void ResetKeyboardInput()
{
// Re-enable echo, etc. on keyboard.
struct termios term;
tcgetattr(0, &term);
term.c_lflag |= ICANON | ECHO;
tcsetattr(0, TCSANOW, &term);
}
static void MiniSleep()
{
usleep(500);
}
static uint64_t GetTimeMicroseconds()
{
struct timeval tv;
gettimeofday( &tv, 0 );
return tv.tv_usec + ((uint64_t)(tv.tv_sec)) * 1000000LL;
}
static int is_eofd;
static int ReadKBByte()
{
if( is_eofd ) return 0xffffffff;
char rxchar = 0;
int rread = read(fileno(stdin), (char*)&rxchar, 1);
if( rread > 0 ) // Tricky: getchar can't be used with arrow keys.
return rxchar;
else
return -1;
}
static int IsKBHit()
{
if( is_eofd ) return -1;
int byteswaiting;
ioctl(0, FIONREAD, &byteswaiting);
if( !byteswaiting && write( fileno(stdin), 0, 0 ) != 0 ) { is_eofd = 1; return -1; } // Is end-of-file for
return !!byteswaiting;
}
#endif
//////////////////////////////////////////////////////////////////////////
// Rest of functions functionality
//////////////////////////////////////////////////////////////////////////
static uint32_t HandleException( uint32_t ir, uint32_t code )
{
// Weird opcode emitted by duktape on exit.
if( code == 3 )
{
// Could handle other opcodes here.
}
return code;
}
static uint32_t HandleControlStore( uint32_t addy, uint32_t val )
{
if( addy == 0x10000000 ) //UART 8250 / 16550 Data Buffer
{
printf( "%c", val );
fflush( stdout );
}
else if( addy == 0x11004004 ) //CLNT
core->timermatchh = val;
else if( addy == 0x11004000 ) //CLNT
core->timermatchl = val;
else if( addy == 0x11100000 ) //SYSCON (reboot, poweroff, etc.)
{
core->pc = core->pc + 4;
return val; // NOTE: PC will be PC of Syscon.
}
return 0;
}
static uint32_t HandleControlLoad( uint32_t addy )
{
// Emulating a 8250 / 16550 UART
if( addy == 0x10000005 )
return 0x60 | IsKBHit();
else if( addy == 0x10000000 && IsKBHit() )
return ReadKBByte();
else if( addy == 0x1100bffc ) // https://chromitem-soc.readthedocs.io/en/latest/clint.html
return core->timerh;
else if( addy == 0x1100bff8 )
return core->timerl;
return 0;
}
static void HandleOtherCSRWrite( uint8_t * image, uint16_t csrno, uint32_t value )
{
if( csrno == 0x136 )
{
printf( "%d", value ); fflush( stdout );
}
if( csrno == 0x137 )
{
printf( "%08x", value ); fflush( stdout );
}
else if( csrno == 0x138 )
{
//Print "string"
uint32_t ptrstart = value - MINIRV32_RAM_IMAGE_OFFSET;
uint32_t ptrend = ptrstart;
if( ptrstart >= ram_amt )
printf( "DEBUG PASSED INVALID PTR (%08x)\n", value );
while( ptrend < ram_amt )
{
if( image[ptrend] == 0 ) break;
ptrend++;
}
if( ptrend != ptrstart )
fwrite( image + ptrstart, ptrend - ptrstart, 1, stdout );
}
else if( csrno == 0x139 )
{
putchar( value ); fflush( stdout );
}
}
static int32_t HandleOtherCSRRead( uint8_t * image, uint16_t csrno )
{
if( csrno == 0x140 )
{
if( !IsKBHit() ) return -1;
return ReadKBByte();
}
return 0;
}
static int64_t SimpleReadNumberInt( const char * number, int64_t defaultNumber )
{
if( !number || !number[0] ) return defaultNumber;
int radix = 10;
if( number[0] == '0' )
{
char nc = number[1];
number+=2;
if( nc == 0 ) return 0;
else if( nc == 'x' ) radix = 16;
else if( nc == 'b' ) radix = 2;
else { number--; radix = 8; }
}
char * endptr;
uint64_t ret = strtoll( number, &endptr, radix );
if( endptr == number )
{
return defaultNumber;
}
else
{
return ret;
}
}
static void DumpState( struct MiniRV32IMAState * core, uint8_t * ram_image )
{
uint32_t pc = core->pc;
uint32_t pc_offset = pc - MINIRV32_RAM_IMAGE_OFFSET;
uint32_t ir = 0;
printf( "PC: %08x ", pc );
if( pc_offset >= 0 && pc_offset < ram_amt - 3 )
{
ir = *((uint32_t*)(&((uint8_t*)ram_image)[pc_offset]));
printf( "[0x%08x] ", ir );
}
else
printf( "[xxxxxxxxxx] " );
uint32_t * regs = core->regs;
printf( "Z:%08x ra:%08x sp:%08x gp:%08x tp:%08x t0:%08x t1:%08x t2:%08x s0:%08x s1:%08x a0:%08x a1:%08x a2:%08x a3:%08x a4:%08x a5:%08x ",
regs[0], regs[1], regs[2], regs[3], regs[4], regs[5], regs[6], regs[7],
regs[8], regs[9], regs[10], regs[11], regs[12], regs[13], regs[14], regs[15] );
printf( "a6:%08x a7:%08x s2:%08x s3:%08x s4:%08x s5:%08x s6:%08x s7:%08x s8:%08x s9:%08x s10:%08x s11:%08x t3:%08x t4:%08x t5:%08x t6:%08x\n",
regs[16], regs[17], regs[18], regs[19], regs[20], regs[21], regs[22], regs[23],
regs[24], regs[25], regs[26], regs[27], regs[28], regs[29], regs[30], regs[31] );
}

View File

@ -0,0 +1,547 @@
// Copyright 2022 Charles Lohr, you may use this file or any portions herein under any of the BSD, MIT, or CC0 licenses.
#ifndef _MINI_RV32IMAH_H
#define _MINI_RV32IMAH_H
/**
To use mini-rv32ima.h for the bare minimum, the following:
#define MINI_RV32_RAM_SIZE ram_amt
#define MINIRV32_IMPLEMENTATION
#include "mini-rv32ima.h"
Though, that's not _that_ interesting. You probably want I/O!
Notes:
* There is a dedicated CLNT at 0x10000000.
* There is free MMIO from there to 0x12000000.
* You can put things like a UART, or whatever there.
* Feel free to override any of the functionality with macros.
*/
#ifndef MINIRV32WARN
#define MINIRV32WARN( x... );
#endif
#ifndef MINIRV32_DECORATE
#define MINIRV32_DECORATE static
#endif
#ifndef MINIRV32_RAM_IMAGE_OFFSET
#define MINIRV32_RAM_IMAGE_OFFSET 0x80000000
#endif
#ifndef MINIRV32_MMIO_RANGE
#define MINIRV32_MMIO_RANGE(n) (0x10000000 <= (n) && (n) < 0x12000000)
#endif
#ifndef MINIRV32_POSTEXEC
#define MINIRV32_POSTEXEC(...);
#endif
#ifndef MINIRV32_HANDLE_MEM_STORE_CONTROL
#define MINIRV32_HANDLE_MEM_STORE_CONTROL(...);
#endif
#ifndef MINIRV32_HANDLE_MEM_LOAD_CONTROL
#define MINIRV32_HANDLE_MEM_LOAD_CONTROL(...);
#endif
#ifndef MINIRV32_OTHERCSR_WRITE
#define MINIRV32_OTHERCSR_WRITE(...);
#endif
#ifndef MINIRV32_OTHERCSR_READ
#define MINIRV32_OTHERCSR_READ(...);
#endif
#ifndef MINIRV32_CUSTOM_MEMORY_BUS
#define MINIRV32_STORE4( ofs, val ) *(uint32_t*)(image + ofs) = val
#define MINIRV32_STORE2( ofs, val ) *(uint16_t*)(image + ofs) = val
#define MINIRV32_STORE1( ofs, val ) *(uint8_t*)(image + ofs) = val
#define MINIRV32_LOAD4( ofs ) *(uint32_t*)(image + ofs)
#define MINIRV32_LOAD2( ofs ) *(uint16_t*)(image + ofs)
#define MINIRV32_LOAD1( ofs ) *(uint8_t*)(image + ofs)
#define MINIRV32_LOAD2_SIGNED( ofs ) *(int16_t*)(image + ofs)
#define MINIRV32_LOAD1_SIGNED( ofs ) *(int8_t*)(image + ofs)
#endif
// As a note: We quouple-ify these, because in HLSL, we will be operating with
// uint4's. We are going to uint4 data to/from system RAM.
//
// We're going to try to keep the full processor state to 12 x uint4.
struct MiniRV32IMAState
{
uint32_t regs[32];
uint32_t pc;
uint32_t mstatus;
uint32_t cyclel;
uint32_t cycleh;
uint32_t timerl;
uint32_t timerh;
uint32_t timermatchl;
uint32_t timermatchh;
uint32_t mscratch;
uint32_t mtvec;
uint32_t mie;
uint32_t mip;
uint32_t mepc;
uint32_t mtval;
uint32_t mcause;
// Note: only a few bits are used. (Machine = 3, User = 0)
// Bits 0..1 = privilege.
// Bit 2 = WFI (Wait for interrupt)
// Bit 3+ = Load/Store reservation LSBs.
uint32_t extraflags;
};
#ifndef MINIRV32_STEPPROTO
MINIRV32_DECORATE int32_t MiniRV32IMAStep( struct MiniRV32IMAState * state, uint8_t * image, uint32_t vProcAddress, uint32_t elapsedUs, int count );
#endif
#ifdef MINIRV32_IMPLEMENTATION
#ifndef MINIRV32_CUSTOM_INTERNALS
#define CSR( x ) state->x
#define SETCSR( x, val ) { state->x = val; }
#define REG( x ) state->regs[x]
#define REGSET( x, val ) { state->regs[x] = val; }
#endif
#ifndef MINIRV32_STEPPROTO
MINIRV32_DECORATE int32_t MiniRV32IMAStep( struct MiniRV32IMAState * state, uint8_t * image, uint32_t vProcAddress, uint32_t elapsedUs, int count )
#else
MINIRV32_STEPPROTO
#endif
{
uint32_t new_timer = CSR( timerl ) + elapsedUs;
if( new_timer < CSR( timerl ) ) CSR( timerh )++;
CSR( timerl ) = new_timer;
// Handle Timer interrupt.
if( ( CSR( timerh ) > CSR( timermatchh ) || ( CSR( timerh ) == CSR( timermatchh ) && CSR( timerl ) > CSR( timermatchl ) ) ) && ( CSR( timermatchh ) || CSR( timermatchl ) ) )
{
CSR( extraflags ) &= ~4; // Clear WFI
CSR( mip ) |= 1<<7; //MTIP of MIP // https://stackoverflow.com/a/61916199/2926815 Fire interrupt.
}
else
CSR( mip ) &= ~(1<<7);
// If WFI, don't run processor.
if( CSR( extraflags ) & 4 )
return 1;
uint32_t trap = 0;
uint32_t rval = 0;
uint32_t pc = CSR( pc );
uint32_t cycle = CSR( cyclel );
if( ( CSR( mip ) & (1<<7) ) && ( CSR( mie ) & (1<<7) /*mtie*/ ) && ( CSR( mstatus ) & 0x8 /*mie*/) )
{
// Timer interrupt.
trap = 0x80000007;
pc -= 4;
}
else // No timer interrupt? Execute a bunch of instructions.
for( int icount = 0; icount < count; icount++ )
{
uint32_t ir = 0;
rval = 0;
cycle++;
uint32_t ofs_pc = pc - MINIRV32_RAM_IMAGE_OFFSET;
if( ofs_pc >= MINI_RV32_RAM_SIZE )
{
trap = 1 + 1; // Handle access violation on instruction read.
break;
}
else if( ofs_pc & 3 )
{
trap = 1 + 0; //Handle PC-misaligned access
break;
}
else
{
ir = MINIRV32_LOAD4( ofs_pc );
uint32_t rdid = (ir >> 7) & 0x1f;
switch( ir & 0x7f )
{
case 0x37: // LUI (0b0110111)
rval = ( ir & 0xfffff000 );
break;
case 0x17: // AUIPC (0b0010111)
rval = pc + ( ir & 0xfffff000 );
break;
case 0x6F: // JAL (0b1101111)
{
int32_t reladdy = ((ir & 0x80000000)>>11) | ((ir & 0x7fe00000)>>20) | ((ir & 0x00100000)>>9) | ((ir&0x000ff000));
if( reladdy & 0x00100000 ) reladdy |= 0xffe00000; // Sign extension.
rval = pc + 4;
pc = pc + reladdy - 4;
break;
}
case 0x67: // JALR (0b1100111)
{
uint32_t imm = ir >> 20;
int32_t imm_se = imm | (( imm & 0x800 )?0xfffff000:0);
rval = pc + 4;
pc = ( (REG( (ir >> 15) & 0x1f ) + imm_se) & ~1) - 4;
break;
}
case 0x63: // Branch (0b1100011)
{
uint32_t immm4 = ((ir & 0xf00)>>7) | ((ir & 0x7e000000)>>20) | ((ir & 0x80) << 4) | ((ir >> 31)<<12);
if( immm4 & 0x1000 ) immm4 |= 0xffffe000;
int32_t rs1 = REG((ir >> 15) & 0x1f);
int32_t rs2 = REG((ir >> 20) & 0x1f);
immm4 = pc + immm4 - 4;
rdid = 0;
switch( ( ir >> 12 ) & 0x7 )
{
// BEQ, BNE, BLT, BGE, BLTU, BGEU
case 0: if( rs1 == rs2 ) pc = immm4; break;
case 1: if( rs1 != rs2 ) pc = immm4; break;
case 4: if( rs1 < rs2 ) pc = immm4; break;
case 5: if( rs1 >= rs2 ) pc = immm4; break; //BGE
case 6: if( (uint32_t)rs1 < (uint32_t)rs2 ) pc = immm4; break; //BLTU
case 7: if( (uint32_t)rs1 >= (uint32_t)rs2 ) pc = immm4; break; //BGEU
default: trap = (2+1);
}
break;
}
case 0x03: // Load (0b0000011)
{
uint32_t rs1 = REG((ir >> 15) & 0x1f);
uint32_t imm = ir >> 20;
int32_t imm_se = imm | (( imm & 0x800 )?0xfffff000:0);
uint32_t rsval = rs1 + imm_se;
rsval -= MINIRV32_RAM_IMAGE_OFFSET;
if( rsval >= MINI_RV32_RAM_SIZE-3 )
{
rsval += MINIRV32_RAM_IMAGE_OFFSET;
if( MINIRV32_MMIO_RANGE( rsval ) ) // UART, CLNT
{
MINIRV32_HANDLE_MEM_LOAD_CONTROL( rsval, rval );
}
else
{
trap = (5+1);
rval = rsval;
}
}
else
{
switch( ( ir >> 12 ) & 0x7 )
{
//LB, LH, LW, LBU, LHU
case 0: rval = MINIRV32_LOAD1_SIGNED( rsval ); break;
case 1: rval = MINIRV32_LOAD2_SIGNED( rsval ); break;
case 2: rval = MINIRV32_LOAD4( rsval ); break;
case 4: rval = MINIRV32_LOAD1( rsval ); break;
case 5: rval = MINIRV32_LOAD2( rsval ); break;
default: trap = (2+1);
}
}
break;
}
case 0x23: // Store 0b0100011
{
uint32_t rs1 = REG((ir >> 15) & 0x1f);
uint32_t rs2 = REG((ir >> 20) & 0x1f);
uint32_t addy = ( ( ir >> 7 ) & 0x1f ) | ( ( ir & 0xfe000000 ) >> 20 );
if( addy & 0x800 ) addy |= 0xfffff000;
addy += rs1 - MINIRV32_RAM_IMAGE_OFFSET;
rdid = 0;
if( addy >= MINI_RV32_RAM_SIZE-3 )
{
addy += MINIRV32_RAM_IMAGE_OFFSET;
if( MINIRV32_MMIO_RANGE( addy ) )
{
MINIRV32_HANDLE_MEM_STORE_CONTROL( addy, rs2 );
}
else
{
trap = (7+1); // Store access fault.
rval = addy;
}
}
else
{
switch( ( ir >> 12 ) & 0x7 )
{
//SB, SH, SW
case 0: MINIRV32_STORE1( addy, rs2 ); break;
case 1: MINIRV32_STORE2( addy, rs2 ); break;
case 2: MINIRV32_STORE4( addy, rs2 ); break;
default: trap = (2+1);
}
}
break;
}
case 0x13: // Op-immediate 0b0010011
case 0x33: // Op 0b0110011
{
uint32_t imm = ir >> 20;
imm = imm | (( imm & 0x800 )?0xfffff000:0);
uint32_t rs1 = REG((ir >> 15) & 0x1f);
uint32_t is_reg = !!( ir & 0x20 );
uint32_t rs2 = is_reg ? REG(imm & 0x1f) : imm;
if( is_reg && ( ir & 0x02000000 ) )
{
switch( (ir>>12)&7 ) //0x02000000 = RV32M
{
case 0: rval = rs1 * rs2; break; // MUL
#ifndef CUSTOM_MULH // If compiling on a system that doesn't natively, or via libgcc support 64-bit math.
case 1: rval = ((int64_t)((int32_t)rs1) * (int64_t)((int32_t)rs2)) >> 32; break; // MULH
case 2: rval = ((int64_t)((int32_t)rs1) * (uint64_t)rs2) >> 32; break; // MULHSU
case 3: rval = ((uint64_t)rs1 * (uint64_t)rs2) >> 32; break; // MULHU
#else
CUSTOM_MULH
#endif
case 4: if( rs2 == 0 ) rval = -1; else rval = ((int32_t)rs1 == INT32_MIN && (int32_t)rs2 == -1) ? rs1 : ((int32_t)rs1 / (int32_t)rs2); break; // DIV
case 5: if( rs2 == 0 ) rval = 0xffffffff; else rval = rs1 / rs2; break; // DIVU
case 6: if( rs2 == 0 ) rval = rs1; else rval = ((int32_t)rs1 == INT32_MIN && (int32_t)rs2 == -1) ? 0 : ((uint32_t)((int32_t)rs1 % (int32_t)rs2)); break; // REM
case 7: if( rs2 == 0 ) rval = rs1; else rval = rs1 % rs2; break; // REMU
}
}
else
{
switch( (ir>>12)&7 ) // These could be either op-immediate or op commands. Be careful.
{
case 0: rval = (is_reg && (ir & 0x40000000) ) ? ( rs1 - rs2 ) : ( rs1 + rs2 ); break;
case 1: rval = rs1 << (rs2 & 0x1F); break;
case 2: rval = (int32_t)rs1 < (int32_t)rs2; break;
case 3: rval = rs1 < rs2; break;
case 4: rval = rs1 ^ rs2; break;
case 5: rval = (ir & 0x40000000 ) ? ( ((int32_t)rs1) >> (rs2 & 0x1F) ) : ( rs1 >> (rs2 & 0x1F) ); break;
case 6: rval = rs1 | rs2; break;
case 7: rval = rs1 & rs2; break;
}
}
break;
}
case 0x0f: // 0b0001111
rdid = 0; // fencetype = (ir >> 12) & 0b111; We ignore fences in this impl.
break;
case 0x73: // Zifencei+Zicsr (0b1110011)
{
uint32_t csrno = ir >> 20;
uint32_t microop = ( ir >> 12 ) & 0x7;
if( (microop & 3) ) // It's a Zicsr function.
{
int rs1imm = (ir >> 15) & 0x1f;
uint32_t rs1 = REG(rs1imm);
uint32_t writeval = rs1;
// https://raw.githubusercontent.com/riscv/virtual-memory/main/specs/663-Svpbmt.pdf
// Generally, support for Zicsr
switch( csrno )
{
case 0x340: rval = CSR( mscratch ); break;
case 0x305: rval = CSR( mtvec ); break;
case 0x304: rval = CSR( mie ); break;
case 0xC00: rval = cycle; break;
case 0x344: rval = CSR( mip ); break;
case 0x341: rval = CSR( mepc ); break;
case 0x300: rval = CSR( mstatus ); break; //mstatus
case 0x342: rval = CSR( mcause ); break;
case 0x343: rval = CSR( mtval ); break;
case 0xf11: rval = 0xff0ff0ff; break; //mvendorid
case 0x301: rval = 0x40401101; break; //misa (XLEN=32, IMA+X)
//case 0x3B0: rval = 0; break; //pmpaddr0
//case 0x3a0: rval = 0; break; //pmpcfg0
//case 0xf12: rval = 0x00000000; break; //marchid
//case 0xf13: rval = 0x00000000; break; //mimpid
//case 0xf14: rval = 0x00000000; break; //mhartid
default:
MINIRV32_OTHERCSR_READ( csrno, rval );
break;
}
switch( microop )
{
case 1: writeval = rs1; break; //CSRRW
case 2: writeval = rval | rs1; break; //CSRRS
case 3: writeval = rval & ~rs1; break; //CSRRC
case 5: writeval = rs1imm; break; //CSRRWI
case 6: writeval = rval | rs1imm; break; //CSRRSI
case 7: writeval = rval & ~rs1imm; break; //CSRRCI
}
switch( csrno )
{
case 0x340: SETCSR( mscratch, writeval ); break;
case 0x305: SETCSR( mtvec, writeval ); break;
case 0x304: SETCSR( mie, writeval ); break;
case 0x344: SETCSR( mip, writeval ); break;
case 0x341: SETCSR( mepc, writeval ); break;
case 0x300: SETCSR( mstatus, writeval ); break; //mstatus
case 0x342: SETCSR( mcause, writeval ); break;
case 0x343: SETCSR( mtval, writeval ); break;
//case 0x3a0: break; //pmpcfg0
//case 0x3B0: break; //pmpaddr0
//case 0xf11: break; //mvendorid
//case 0xf12: break; //marchid
//case 0xf13: break; //mimpid
//case 0xf14: break; //mhartid
//case 0x301: break; //misa
default:
MINIRV32_OTHERCSR_WRITE( csrno, writeval );
break;
}
}
else if( microop == 0x0 ) // "SYSTEM" 0b000
{
rdid = 0;
if( ( ( csrno & 0xff ) == 0x02 ) ) // MRET
{
//https://raw.githubusercontent.com/riscv/virtual-memory/main/specs/663-Svpbmt.pdf
//Table 7.6. MRET then in mstatus/mstatush sets MPV=0, MPP=0, MIE=MPIE, and MPIE=1. La
// Should also update mstatus to reflect correct mode.
uint32_t startmstatus = CSR( mstatus );
uint32_t startextraflags = CSR( extraflags );
SETCSR( mstatus , (( startmstatus & 0x80) >> 4) | ((startextraflags&3) << 11) | 0x80 );
SETCSR( extraflags, (startextraflags & ~3) | ((startmstatus >> 11) & 3) );
pc = CSR( mepc ) -4;
} else {
switch (csrno) {
case 0:
#ifndef ECALL_HANDLER
trap = ( CSR( extraflags ) & 3) ? (11+1) : (8+1); // ECALL; 8 = "Environment call from U-mode"; 11 = "Environment call from M-mode"
#else
ECALL_HANDLER(state);
trap = 0;
#endif
break;
case 1:
trap = (3+1); break; // EBREAK 3 = "Breakpoint"
case 0x105: //WFI (Wait for interrupts)
CSR( mstatus ) |= 8; //Enable interrupts
CSR( extraflags ) |= 4; //Infor environment we want to go to sleep.
SETCSR( pc, pc + 4 );
return 1;
default:
trap = (2+1); break; // Illegal opcode.
}
}
}
else
trap = (2+1); // Note micrrop 0b100 == undefined.
break;
}
case 0x2f: // RV32A (0b00101111)
{
uint32_t rs1 = REG((ir >> 15) & 0x1f);
uint32_t rs2 = REG((ir >> 20) & 0x1f);
uint32_t irmid = ( ir>>27 ) & 0x1f;
rs1 -= MINIRV32_RAM_IMAGE_OFFSET;
// We don't implement load/store from UART or CLNT with RV32A here.
if( rs1 >= MINI_RV32_RAM_SIZE-3 )
{
trap = (7+1); //Store/AMO access fault
rval = rs1 + MINIRV32_RAM_IMAGE_OFFSET;
}
else
{
rval = MINIRV32_LOAD4( rs1 );
// Referenced a little bit of https://github.com/franzflasch/riscv_em/blob/master/src/core/core.c
uint32_t dowrite = 1;
switch( irmid )
{
case 2: //LR.W (0b00010)
dowrite = 0;
CSR( extraflags ) = (CSR( extraflags ) & 0x07) | (rs1<<3);
break;
case 3: //SC.W (0b00011) (Make sure we have a slot, and, it's valid)
rval = ( CSR( extraflags ) >> 3 != ( rs1 & 0x1fffffff ) ); // Validate that our reservation slot is OK.
dowrite = !rval; // Only write if slot is valid.
break;
case 1: break; //AMOSWAP.W (0b00001)
case 0: rs2 += rval; break; //AMOADD.W (0b00000)
case 4: rs2 ^= rval; break; //AMOXOR.W (0b00100)
case 12: rs2 &= rval; break; //AMOAND.W (0b01100)
case 8: rs2 |= rval; break; //AMOOR.W (0b01000)
case 16: rs2 = ((int32_t)rs2<(int32_t)rval)?rs2:rval; break; //AMOMIN.W (0b10000)
case 20: rs2 = ((int32_t)rs2>(int32_t)rval)?rs2:rval; break; //AMOMAX.W (0b10100)
case 24: rs2 = (rs2<rval)?rs2:rval; break; //AMOMINU.W (0b11000)
case 28: rs2 = (rs2>rval)?rs2:rval; break; //AMOMAXU.W (0b11100)
default: trap = (2+1); dowrite = 0; break; //Not supported.
}
if( dowrite ) MINIRV32_STORE4( rs1, rs2 );
}
break;
}
default: trap = (2+1); // Fault: Invalid opcode.
}
// If there was a trap, do NOT allow register writeback.
if( trap ) {
SETCSR( pc, pc );
MINIRV32_POSTEXEC( pc, ir, trap );
break;
}
if( rdid )
{
REGSET( rdid, rval ); // Write back register.
}
}
MINIRV32_POSTEXEC( pc, ir, trap );
pc += 4;
}
// Handle traps and interrupts.
if( trap )
{
if( trap & 0x80000000 ) // If prefixed with 1 in MSB, it's an interrupt, not a trap.
{
SETCSR( mcause, trap );
SETCSR( mtval, 0 );
pc += 4; // PC needs to point to where the PC will return to.
}
else
{
SETCSR( mcause, trap - 1 );
SETCSR( mtval, (trap > 5 && trap <= 8)? rval : pc );
}
SETCSR( mepc, pc ); //TRICKY: The kernel advances mepc automatically.
//CSR( mstatus ) & 8 = MIE, & 0x80 = MPIE
// On an interrupt, the system moves current MIE into MPIE
SETCSR( mstatus, (( CSR( mstatus ) & 0x08) << 4) | (( CSR( extraflags ) & 3 ) << 11) );
pc = (CSR( mtvec ) - 4);
// If trapping, always enter machine mode.
CSR( extraflags ) |= 3;
trap = 0;
pc += 4;
}
if( CSR( cyclel ) > cycle ) CSR( cycleh )++;
SETCSR( cyclel, cycle );
SETCSR( pc, pc );
return 0;
}
#endif
#endif

View File

@ -0,0 +1,192 @@
# riscv_emufun (mini-rv32ima)
Click below for the YouTube video introducing this project:
[![Writing a Really Tiny RISC-V Emulator](https://img.youtube.com/vi/YT5vB3UqU_E/0.jpg)](https://www.youtube.com/watch?v=YT5vB3UqU_E) [![But Will It Run Doom?](https://img.youtube.com/vi/uZMNK17VCMU/0.jpg)](https://www.youtube.com/watch?v=uZMNK17VCMU)
## What
mini-rv32ima is a single-file-header, [mini-rv32ima.h](https://github.com/cnlohr/riscv_emufun/blob/master/mini-rv32ima/mini-rv32ima.h), in the [STB Style library](https://github.com/nothings/stb) that:
* Implements a RISC-V **rv32ima/Zifencei†+Zicsr** (and partial su), with CLINT and MMIO.
* Is about **400 lines** of actual code.
* Has **no dependencies**, not even libc.
* Is **easily extensible**. So you can easily add CSRs, instructions, MMIO, etc!
* Is pretty **performant**. (~450 coremark on my laptop, about 1/2 the speed of QEMU)
* Is human-readable and in **basic C** code.
* Is "**incomplete**" in that it didn't implement the tons of the spec that Linux doesn't (and you shouldn't) use.
* Is trivially **embeddable** in applications.
It has a [demo wrapper](https://github.com/cnlohr/riscv_emufun/blob/master/mini-rv32ima/mini-rv32ima.c) that:
* Implements a CLI, SYSCON, UART, DTB and Kernel image loading.
* And it only around **250 lines** of code, itself.
* Compiles down to a **~18kB executable** and only relies on libc.
†: Zifence+RV32A are stubbed. So, tweaks will need to be made if you want to emulate a multiprocessor system with this emulator.
Just see the `mini-rv32ima` folder.
It's "fully functional" now in that I can run Linux, apps, etc. Compile flat binaries and drop them in an image.
## Why
I'm working on a really really simple C Risc-V emulator. So simple it doesn't even have an MMU (Memory Management Unit). I have a few goals, they include:
* Furthering RV32-NOMMU work to improve Linux support for RV32-NOMMU. (Imagine if we could run Linux on the $1 ESP32-C3)
* Learning more about RV32 and writing emulators.
* Being further inspired by @pimaker's amazing work on [Running Linux in a Pixel Shader](https://blog.pimaker.at/texts/rvc1/) and having the sneaking suspicion performance could be even better!
* Hoping to port it to some weird places.
* Understand the *most simplistic* system you can run Linux on and trying to push that boundary.
* Continue to include my [education of people about assembly language](https://www.youtube.com/watch?v=Gelf0AyVGy4).
## How
Windows instructions (Just playing with the image)
* Clone this repo.
* Install or have TinyCC. [Powershell Installer](https://github.com/cntools/Install-TCC) or [Regular Windows Installer](https://github.com/cnlohr/tinycc-win64-installer/releases/tag/v0_0.9.27)
* Run `winrun.ps` in the `windows` folder.
WSL (For full toolchain and image build:
* You will need to remove all spaces from your path i.e. `export PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/mnt/c/Windows/system32:/snap/bin` and continue the instructions. P.S. What in the world was Windows thinking, putting a space between "Program" and "Files"??!?
Linux instructions (both):
* Clone this repo.
* Install `git build-essential` and/or whatever other requirements are in place for [buildroot](https://buildroot.org/).
* `make testdlimage`
* It automatically downloads the image (~1MB) and runs the emulator.
* Should be up and running in about 2.5s depending on internet speed.
You can do in-depth work on Linux by:
* `make everything`
If you want to play with the bare metal system, see below, or if you have the toolchain installed, just:
* `make testbare`
If you just want to play emdoom, and use the prebuilt image:
* On Windows, run `windows\winrundoom.ps1`
* On Linux, `cd mini-rv32ima`, and type `make testdoom`
## Questions?
* Why not rv64?
* Because then I can't run it as easily in a pixel shader if I ever hope to.
* Can I add an MMU?
* Yes. It actually probably wouldn't be too difficult.
* Should I add an MMU?
* No. It is important to further support for nommu systems to empower minimal Risc-V designs!
Everything else: Contact us on my Discord: https://discord.com/invite/CCeyWyZ
## How do I use this in my own project?
You shoud not need to modify `mini-rv32ima.h`, but instead, use `mini-rv32ima.c` as a template for what you are trying to do in your own project.
You can override all functionality by defining the following macros. Here are examples of what `mini-rv32ima.c` does with them. You can see the definition of the functions, or augment their definitions, by altering `mini-rv32ima.c`.
| Macro | Definition / Comment |
| --- | --- |
| `MINIRV32WARN( x... )` | `printf( x );` <br> Warnings emitted from mini-rv32ima.h |
| `MINIRV32_DECORATE` | `static` <br> How to decorate the functions. |
| `MINI_RV32_RAM_SIZE` | `ram_amt` <br> A variable, how big is system RAM? |
| `MINIRV32_IMPLEMENTATION` | If using mini-rv32ima.h, need to define this. |
| `MINIRV32_POSTEXEC( pc, ir, retval )` | `{ if( retval > 0 ) { if( fail_on_all_faults ) { printf( "FAULT\n" ); return 3; } else retval = HandleException( ir, retval ); } }` <br> If you want to execute something every time slice. |
| `MINIRV32_HANDLE_MEM_STORE_CONTROL( addy, val )` | `if( HandleControlStore( addy, val ) ) return val;` <br> Called on non-RAM memory access. |
| `MINIRV32_HANDLE_MEM_LOAD_CONTROL( addy, rval )` | `rval = HandleControlLoad( addy );` <br> Called on non-RAM memory access return a value. |
| `MINIRV32_OTHERCSR_WRITE( csrno, value )` | `HandleOtherCSRWrite( image, csrno, value );` <br> You can use CSRs for control requests. |
| `MINIRV32_OTHERCSR_READ( csrno, value )` | `value = HandleOtherCSRRead( image, csrno );` <br> You can use CSRs for control requests. |
## Hopeful goals?
* Further drive down needed features to run Linux.
* Remove need for RV32A extension on systems with only one CPU.
* Support for relocatable ELF executables.
* Add support for an unreal UART. One that's **much** simpler than the current 8250 driver.
* Maybe run this in a pixelshader too!
* Get opensbi working with this.
* Be able to "embed" rv32 emulators in random projects.
* Can I use early console to be a full system console?
* Can I increase the maximum contiguous memory allocatable?
## Special Thanks
* For @regymm and their [patches to buildroot](https://github.com/regymm/buildroot) and help!
* callout: Regymm's [quazisoc project](https://github.com/regymm/quasiSoC/).
* Buildroot (For being so helpful).
* @vowstar and their team working on [k210-linux-nommu](https://github.com/vowstar/k210-linux-nommu).
* This [guide](https://jborza.com/emulation/2020/04/09/riscv-environment.html)
* [rvcodecjs](https://luplab.gitlab.io/rvcodecjs/) I probably went through over 1,000 codes here.
* @splinedrive from the [KianV RISC-V noMMU SoC](https://github.com/splinedrive/kianRiscV/tree/master/linux_socs/kianv_harris_mcycle_edition?s=09) project.
## More details
If you want to build the kernel yourself:
* `make everything`
* About 20 minutes. (Or 4+ hours if you're on [Windows Subsytem for Linux 2](https://github.com/microsoft/WSL/issues/4197))
* And you should be dropped into a Linux busybox shell with some little tools that were compiled here.
## Emdoom notes
* Emdoom building is in the `experiments/emdoom` folder
* You *MUST* build your kernel with `MAX_ORDER` set to >12 in `buildroot/output/build/linux-5.19/include/linux/mmzone.h` if you are building your own image.
* You CAN use the pre-existing image that is described above.
* On Windows, it will be very slow. Not sure why.
If you want to use bare metal to build your binaries so you don't need buildroot, you can use the rv64 gcc in 32-bit mode built into Ubuntu 20.04 and up.
```
sudo apt-get install gcc-multilib gcc-riscv64-unknown-elf make
```
## Links
* "Hackaday Supercon 2022: Charles Lohr - Assembly in 2022: Yes! We Still Use it and Here's Why" : https://www.youtube.com/watch?v=Gelf0AyVGy4
## Attic
## General notes:
* https://github.com/cnlohr/riscv_emufun/commit/2f09cdeb378dc0215c07eb63f5a6fb43dbbf1871#diff-b48ccd795ae9aced07d022bf010bf9376232c4d78210c3113d90a8d349c59b3dL440
(These things don't currently work)
### Building Tests
(This does not work, now)
```
cd riscv-tests
export CROSS_COMPILE=riscv64-linux-gnu-
export PLATFORM_RISCV_XLEN=32
CC=riscv64-linux-gnu-gcc ./configure
make XLEN=32 RISCV_PREFIX=riscv64-unknown-elf- RISCV_GCC_OPTS="-g -O1 -march=rv32imaf -mabi=ilp32f -I/usr/include"
```
### Building OpenSBI
(This does not currently work!)
```
cd opensbi
export CROSS_COMPILE=riscv64-unknown-elf-
export PLATFORM_RISCV_XLEN=32
make
```
### Extra links
* Clear outline of CSRs: https://five-embeddev.com/riscv-isa-manual/latest/priv-csrs.html
* Fonts used in videos: https://audiolink.dev/
### Using custom build
Where yminpatch is the patch from the mailing list.
```
rm -rf buildroot
git clone git://git.buildroot.net/buildroot
cd buildroot
git am < ../yminpatch.txt
make qemu_riscv32_nommu_virt_defconfig
make
# Or use our configs.
```
Note: For emdoom you will need to modify include/linux/mmzone.h and change MAX_ORDER to 13.
### Buildroot Notes
Add this:
https://github.com/cnlohr/buildroot/pull/1/commits/bc890f74354e7e2f2b1cf7715f6ef334ff6ed1b2
Use this:
https://github.com/cnlohr/buildroot/commit/e97714621bfae535d947817e98956b112eb80a75

View File

@ -0,0 +1,143 @@
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
struct MiniRV32IMAState;
void ecall_handler(struct MiniRV32IMAState *state);
#define ECALL_HANDLER(state) ecall_handler(state)
#define MINIRV32WARN( x... ) printf( x );
#define MINIRV32_DECORATE static
#define MINI_RV32_RAM_SIZE (32 * 1024 * 1024)
#define MINIRV32_IMPLEMENTATION
#define MINIRV32_RAM_IMAGE_OFFSET 0x0
#include "mini-rv32ima.h"
#define SYSCALL(num) (1025 + num)
void ecall_handler(struct MiniRV32IMAState *state) {
uint32_t a0 = REG(10);
uint32_t a1 = REG(11);
switch (state->regs[17]) // x17 | a7
{
case 1:
// PrintInt
printf("%d", a0);
break;
case 4:
// PrintString
printf("%s", a0);
break;
case 10:
fprintf(stderr, "\nexit: %d\n", a0);
exit(a0);
case 11:
// PrintChar
printf("%c", a0);
break;
case 93:
fprintf(stderr, "\nmain return code: %d\n", a0);
exit(a0);
case SYSCALL(0):
// getchar();
REGSET(10, getchar());
case SYSCALL(1):
// putchar
putchar(a0);
break;
case SYSCALL(4):
// input int
scanf("%d", &a0);
REGSET(10, a0);
break;
case SYSCALL(5):
// input string
scanf("%s", a0);
REGSET(10, a0);
break;
default:
MINIRV32WARN("Unhandled ECALL: %d\n", state->regs[17]);
exit(1);
break;
}
}
int main(int argc, char *argv[]) {
// gcc -DDEFAULT_FILE='\"flat.bin\"' .\ripes-vm.c -o rv32-vm.exe
struct MiniRV32IMAState state;
uint8_t *image = (uint8_t *)malloc(MINI_RV32_RAM_SIZE);
// 初始化状态
memset(&state, 0, sizeof(state));
state.pc = 0; // 程序计数器从0开始
state.mstatus = 0x80000000; // 设置机器模式
state.mtvec = 0x1000;
state.mie = 0x7; // 启用所有中断
// 初始化内存
memset(image, 0, MINI_RV32_RAM_SIZE);
#ifndef DEFAULT_FILE
#define DEFAULT_FILE "../ccompiler/backend/test_rv.bin"
#endif
const char* filename = DEFAULT_FILE;
// 加载 flatbin 文件
if (argc == 2) {
filename = argv[1];
}
FILE *file = fopen(filename, "rb");
if (!file) {
fprintf(stderr, "Usage: %s <flatbin_file>\n", argv[0]);
printf("Failed to open file %s\n", filename);
return 1;
}
fseek(file, 0, SEEK_END);
long flen = ftell(file);
fseek(file, 0, SEEK_SET);
if (flen > MINI_RV32_RAM_SIZE) {
fprintf(stderr, "Flatbin file is too large\n");
fclose(file);
return 1;
}
fread(image, flen, 1, file);
fclose(file);
// 运行模拟器
while (1) {
int32_t ret = MiniRV32IMAStep(&state, image, MINIRV32_RAM_IMAGE_OFFSET, 0, 1);
if (ret != 0) {
printf("Exception or interrupt occurred at PC: %d\n", state.pc);
return ret;
}
}
free(image);
return 0;
}
// static void DumpState( struct MiniRV32IMAState * core, uint8_t * ram_image )
// {
// uint32_t pc = core->pc;
// uint32_t pc_offset = pc - MINIRV32_RAM_IMAGE_OFFSET;
// uint32_t ir = 0;
// printf( "PC: %08x ", pc );
// if( pc_offset >= 0 && pc_offset < ram_amt - 3 )
// {
// ir = *((uint32_t*)(&((uint8_t*)ram_image)[pc_offset]));
// printf( "[0x%08x] ", ir );
// }
// else
// printf( "[xxxxxxxxxx] " );
// uint32_t * regs = core->regs;
// printf( "Z:%08x ra:%08x sp:%08x gp:%08x tp:%08x t0:%08x t1:%08x t2:%08x s0:%08x s1:%08x a0:%08x a1:%08x a2:%08x a3:%08x a4:%08x a5:%08x ",
// regs[0], regs[1], regs[2], regs[3], regs[4], regs[5], regs[6], regs[7],
// regs[8], regs[9], regs[10], regs[11], regs[12], regs[13], regs[14], regs[15] );
// printf( "a6:%08x a7:%08x s2:%08x s3:%08x s4:%08x s5:%08x s6:%08x s7:%08x s8:%08x s9:%08x s10:%08x s11:%08x t3:%08x t4:%08x t5:%08x t6:%08x\n",
// regs[16], regs[17], regs[18], regs[19], regs[20], regs[21], regs[22], regs[23],
// regs[24], regs[25], regs[26], regs[27], regs[28], regs[29], regs[30], regs[31] );
// }

View File

@ -1,28 +1,30 @@
VM := ../../rv32-vm
CC := ../../ccompiler
STD_CC := gcc
# VM := ../../rv32-vm
# CC := ../../ccompiler
# STD_CC := gcc
TESTS := $(wildcard *.c)
# TESTS := $(wildcard *.c)
# 定义所有测试目标
TEST_TARGETS := $(patsubst %.c, %_test, $(TESTS))
# # 定义所有测试目标
# TEST_TARGETS := $(patsubst %.c, %_test, $(TESTS))
all: $(TEST_TARGETS)
# all: $(TEST_TARGETS)
%_test: %.c
@$(STD_CC) -g -o $@ $<
@$(CC) $< flat.bin
@./$@ ; ret_gcc=$$?
@$(VM) flat.bin ; ret_vm=$$?
@echo "Testing $@"
@if [ $$ret_gcc -eq $$ret_vm ]; then \
echo "$@ passed"; \
else \
echo "$@ failed: GCC returned $$ret_gcc, VM returned $$ret_vm"; \
exit 1; \
fi
# %_test: %.c
# @$(STD_CC) -g -o $@ $<
# @$(CC) $< flat.bin
# @./$@ ; ret_gcc=$$?
# @$(VM) flat.bin ; ret_vm=$$?
# @echo "Testing $@"
# @if [ $$ret_gcc -eq $$ret_vm ]; then \
# echo "$@ passed"; \
# else \
# echo "$@ failed: GCC returned $$ret_gcc, VM returned $$ret_vm"; \
# exit 1; \
# fi
clean:
rm -f $(TEST_TARGETS) flat.bin
# clean:
# rm -f $(TEST_TARGETS) flat.bin
.PHONY: all clean
# .PHONY: all clean
all:
python test.py

View File

@ -1,7 +1,7 @@
# 编译器设置
CC = gcc
AR = ar
CFLAGS = -g -Wall
CFLAGS = -g -Wall -I../..
# 源文件路径
LEXER_DIR = ./lexer
@ -15,7 +15,7 @@ SRCS = \
$(LEXER_DIR)/lexer.c \
$(LEXER_DIR)/token.c \
$(PARSER_DIR)/parser.c \
$(AST_DIR)/ast.c \
$(PARSER_DIR)/ast.c \
$(AST_DIR)/block.c \
$(AST_DIR)/decl.c \
$(AST_DIR)/expr.c \

View File

@ -1,8 +1,10 @@
#include "lexer/lexer.h"
#include "parser/symtab/symtab.h"
#include <lib/core.h>
#include "frontend.h"
#include "parser/symtab/symtab.h"
ast_node_t* frontend(const char* file, void* stream, sread_fn sread) {
init_lib_core();
struct ASTNode* frontend(const char* file, void* stream, sread_fn sread) {
lexer_t lexer;
init_lexer(&lexer, file, stream, sread);

View File

@ -1,27 +1,9 @@
#ifndef __FRONTEND_H__
#define __FRONTEND_H__
#ifndef __SMCC_FRONTEND_H__
#define __SMCC_FRONTEND_H__
#ifndef error
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#define STD_LIBRARY
#define error(...) do { fprintf(stderr, __VA_ARGS__); assert(0); } while (0)
#endif
#ifndef warn
#include <stdio.h>
#define STD_LIBRARY
#define warn(...) do { fprintf(stdout, __VA_ARGS__); } while (0)
#endif
#define xmalloc(size) malloc(size)
#ifndef FRONTEND_IMPLEMENTATION
#include "lexer/lexer.h"
#include "parser/parser.h"
#include "parser/ast/ast.h"
typedef int (*sread_fn)(void *dst_buf, int dst_size, int elem_size, int count, void *stream);
struct ASTNode* frontend(const char* file, void* stream, sread_fn sread);
#endif
ast_node_t* frontend(const char* file, void* stream, sread_fn sread);
#endif
#endif

View File

@ -26,8 +26,8 @@ the distribution and installation instructions.
Chris Fraser / cwf@aya.yale.edu
David Hanson / drh@drhanson.net
*/
#define FRONTEND_IMPLEMENTATION
#include "../frontend.h"
#include <lib/core.h>
#include "lexer_log.h"
#include "token.h"
#include "lexer.h"
@ -74,8 +74,9 @@ static inline int keyword_cmp(const char* name, int len) {
return -1; // Not a keyword.
}
void init_lexer(lexer_t* lexer, const char* file_name, void* stream, lexer_sread_fn sread)
{
void init_lexer(lexer_t* lexer, const char* file_name, void* stream, lexer_sread_fn sread) {
init_lib_core();
lexer->cur_ptr = lexer->end_ptr = (unsigned char*)&(lexer->buffer);
lexer->index = 1;
lexer->line = 1;
@ -96,10 +97,10 @@ static void flush_buffer(lexer_t* lexer) {
lexer->cur_ptr = (unsigned char*)lexer->buffer;
int read_size = LEXER_BUFFER_SIZE - num;
// TODO size_t to int maybe lose precision
// TODO rt_size_t to int maybe lose precision
int got_size = lexer->sread(lexer->buffer + num, read_size, 1, read_size, lexer->stream);
if (got_size < 0) {
error("lexer read error");
LEX_ERROR("lexer read error");
} else if (got_size < read_size) {
lexer->end_ptr += got_size;
lexer->end_ptr[0] = '\0'; // EOF
@ -107,7 +108,7 @@ static void flush_buffer(lexer_t* lexer) {
} else if (got_size == read_size) {
lexer->end_ptr += got_size;
} else {
error("lexer read error imposible got_size > read_size maybe overflow?");
LEX_ERROR("lexer read error imposible got_size > read_size maybe overflow?");
}
}
@ -153,8 +154,10 @@ static char got_slash(unsigned char* peek) {
case 'r': return '\r';
case 't': return '\t';
case 'v': return '\v';
default: error("Unknown escape character");
default: break;
}
LEX_ERROR("Unknown escape character");
return -1;
}
static void parse_char_literal(lexer_t* lexer, tok_t* token) {
@ -168,7 +171,7 @@ static void parse_char_literal(lexer_t* lexer, tok_t* token) {
val = *peek++;
}
if (*peek++ != '\'') error("Unclosed character literal");
if (*peek++ != '\'') LEX_ERROR("Unclosed character literal");
token->val.ch = val;
lexer->cur_ptr = peek;
token->val.have = 1;
@ -178,7 +181,7 @@ static void parse_char_literal(lexer_t* lexer, tok_t* token) {
static void parse_string_literal(lexer_t* lexer, tok_t* token) {
unsigned char* peek = lexer->cur_ptr + 1;
// TODO string literal size check
char* dest = token->val.str = xmalloc(LEXER_MAX_TOKEN_SIZE + 1);
char* dest = token->val.str = rt._malloc(LEXER_MAX_TOKEN_SIZE + 1);
int len = 0;
while (*peek != '"') {
@ -189,7 +192,7 @@ static void parse_string_literal(lexer_t* lexer, tok_t* token) {
*peek = got_slash(peek);
}
if (len >= LEXER_MAX_TOKEN_SIZE) error("String too long");
if (len >= LEXER_MAX_TOKEN_SIZE) LEX_ERROR("String too long");
dest[len++] = *peek++;
}
dest[len] = '\0';
@ -431,7 +434,7 @@ void get_token(lexer_t* lexer, tok_t* token) {
lexer->line++;
tok = TOKEN_FLUSH; break;
case '#':
warn("TODO: #define\n");
LEX_WARN("Marroc does not support in lexer rather in preprocessor, it will be ignored");
goto_newline(lexer);
tok = TOKEN_FLUSH;
goto END;
@ -458,14 +461,14 @@ void get_token(lexer_t* lexer, tok_t* token) {
case '_':
// TOKEN_IDENT
if ((*peek == 'L' && *peek == '\'') || (*peek == 'L' && *peek == '"')) {
error("unsupport wide-character char literal by `L` format");
LEX_ERROR("unsupport wide-character char literal by `L` format");
}
while (1) {
if (peek == lexer->end_ptr) {
error("unsupport outof 64 length identifier");
LEX_ERROR("unsupport outof 64 length identifier");
}
if ((*peek >= 'a' && *peek <= 'z') || (*peek >= 'A' && *peek <= 'Z') ||
(*peek == '_') || (*peek >= '0' && *peek <= '9')) {
(*peek == '_') || (*peek >= '0' && *peek <= '9')) {
peek++;
continue;
}
@ -475,7 +478,7 @@ void get_token(lexer_t* lexer, tok_t* token) {
int res = keyword_cmp((const char*)lexer->cur_ptr, peek - (lexer->cur_ptr));
if (res == -1) {
int strlen = peek - lexer->cur_ptr;
unsigned char* str = xmalloc(strlen + 1);
unsigned char* str = rt._malloc(strlen + 1);
constant.have = 1;
constant.str = (char*)str;
for (int i = 0; i < strlen; i++) {
@ -489,7 +492,7 @@ void get_token(lexer_t* lexer, tok_t* token) {
tok = keywords[res].tok; break;
}
default:
error("unsupport char in sourse code `%c`", *(lexer->cur_ptr));
LEX_ERROR("unsupport char in sourse code `%c`", *(lexer->cur_ptr));
break;
}
@ -497,6 +500,7 @@ void get_token(lexer_t* lexer, tok_t* token) {
END:
token->val = constant;
token->type = tok;
LEX_DEBUG("get token `%s` (ch: %c, int: %d)", get_tok_name(token->type), token->val.ch, token->val.i);
}
// get_token maybe got invalid (with parser)
@ -507,4 +511,3 @@ void get_valid_token(lexer_t* lexer, tok_t* token) {
type = token->type;
} while (type == TOKEN_FLUSH || type == TOKEN_LINE_COMMENT || type == TOKEN_BLOCK_COMMENT);
}

View File

@ -1,6 +1,7 @@
#ifndef __LEXER_H__
#define __LEXER_H__
#ifndef __SMCC_LEXER_H__
#define __SMCC_LEXER_H__
#include <lib/core.h>
#include "token.h"
#ifndef LEXER_MAX_TOKEN_SIZE
#define LEXER_MAX_TOKEN_SIZE 63

View File

@ -0,0 +1,13 @@
#ifndef __SMCC_LEXER_LOG_H__
#define __SMCC_LEXER_LOG_H__
#include <lib/rt/rt.h>
#define LEX_NOTSET( fmt, ...) LOG_NOTSET("LEXER: " fmt, ##__VA_ARGS__)
#define LEX_DEBUG( fmt, ...) LOG_DEBUG("LEXER: " fmt, ##__VA_ARGS__)
#define LEX_INFO( fmt, ...) LOG_INFO("LEXER: " fmt, ##__VA_ARGS__)
#define LEX_WARN( fmt, ...) LOG_WARN("LEXER: " fmt, ##__VA_ARGS__)
#define LEX_ERROR( fmt, ...) LOG_ERROR("LEXER: " fmt, ##__VA_ARGS__)
#define LEX_FATAL( fmt, ...) LOG_FATAL("LEXER: " fmt, ##__VA_ARGS__)
#endif // __SMCC_LEXER_LOG_H__

View File

@ -1,6 +1,7 @@
CC = gcc
CFLAGS = -g -Wall
CFLAGS = -g -Wall -I../../../..
SRC = ../lexer.c ../token.c
LIB = -L../../../../lib -lcore
all = test_all
@ -8,10 +9,10 @@ test_all: test
./test
run:
$(CC) $(CFLAGS) $(SRC) run.c -o run
$(CC) $(CFLAGS) $(SRC) run.c $(LIB) -o run
test:
$(CC) $(CFLAGS) $(SRC) -o test test.c
$(CC) $(CFLAGS) $(SRC) $(LIB) -o test test.c
clean:
rm -f test run

View File

@ -1,5 +1,6 @@
#include "../lexer.h"
#include <stdio.h>
#include <string.h>
// gcc -g ../lexer.c ../token.c test_lexer.c -o test_lexer
/*
tok_tConstant {
@ -18,9 +19,14 @@ tok_tConstant {
int g_num;
int g_num_arr[3];
int main(int argc, char* argv[]) {
int num = 0;
// int num = 0;
// You Must Be Call
init_lib_core();
if (argc == 3 && strcmp(argv[2], "-nodebug") == 0) {
log_set_level(NULL, LOG_LEVEL_ALL & ~LOG_LEVEL_DEBUG);
}
const char* file_name = "test_lexer.c";
const char* file_name = "run.c";
if (argc == 2) {
file_name = argv[1];
}
@ -32,7 +38,7 @@ int main(int argc, char* argv[]) {
printf("open file success\n");
lexer_t lexer;
init_lexer(&lexer, "test_lexter.c", fp, (lexer_sread_fn)fread_s);
init_lexer(&lexer, file_name, fp, (lexer_sread_fn)fread_s);
tok_t tok;
while (1) {
@ -40,7 +46,7 @@ int main(int argc, char* argv[]) {
if (tok.type == TOKEN_EOF) {
break;
}
printf("line: %d, column: %d, type: %3d, typename: %s\n",
lexer.line, lexer.index, tok.type, get_tok_name(tok.type));
// printf("line: %d, column: %d, type: %3d, typename: %s\n",
// lexer.line, lexer.index, tok.type, get_tok_name(tok.type));
}
}

View File

@ -1,12 +1,12 @@
#define FRONTEND_IMPLEMENTATION
#include "../frontend.h"
#include <lib/core.h>
#include "lexer_log.h"
#include "token.h"
#define ROUND_IDX(idx) ((idx) % tokbuf->cap)
tok_t* pop_tok(tok_buf_t* tokbuf) {
tok_t* pop_tok(tok_stream_t* tokbuf) {
if (tokbuf->size == 0) {
error("no token to pop");
LEX_ERROR("no token to pop");
return NULL;
}
int idx = tokbuf->cur;
@ -15,11 +15,11 @@ tok_t* pop_tok(tok_buf_t* tokbuf) {
return tokbuf->buf + idx;
}
void flush_peek_tok(tok_buf_t* tokbuf) {
void flush_peek_tok(tok_stream_t* tokbuf) {
tokbuf->peek = tokbuf->cur;
}
void init_tokbuf(tok_buf_t *tokbuf, void *stream, get_tokbuf_func gettok) {
void init_tokbuf(tok_stream_t *tokbuf, void *stream, tok_stream_get_func gettok) {
tokbuf->cur = 0;
tokbuf->end = 0;
tokbuf->peek = 0;
@ -30,18 +30,19 @@ void init_tokbuf(tok_buf_t *tokbuf, void *stream, get_tokbuf_func gettok) {
tokbuf->cap = 0;
}
tok_t *peek_tok(tok_buf_t *tokbuf) {
tok_t *peek_tok(tok_stream_t *tokbuf) {
Assert(tokbuf->size <= tokbuf->cap);
int idx = tokbuf->peek;
tokbuf->peek = ROUND_IDX(idx + 1);
if (tokbuf->size >= tokbuf->cap) {
error("peek too deep, outof array size");
}
if (idx == tokbuf->end) {
if (tokbuf->size == tokbuf->cap) {
error("peek_tok buffer overflow");
LEX_ERROR("peek_tok buffer overflow");
return NULL;
}
if (tokbuf->gettok == NULL) {
error("peek_tok can not got tok");
LEX_ERROR("peek_tok can not got tok");
return NULL;
}
tokbuf->gettok(tokbuf->stream, &(tokbuf->buf[idx]));
tokbuf->size++;
@ -51,15 +52,16 @@ tok_t *peek_tok(tok_buf_t *tokbuf) {
return &(tokbuf->buf[idx]);
}
tok_type_t peek_tok_type(tok_buf_t* tokbuf) {
tok_type_t peek_tok_type(tok_stream_t* tokbuf) {
return peek_tok(tokbuf)->type;
}
int expect_pop_tok(tok_buf_t* tokbuf, tok_type_t type) {
int expect_pop_tok(tok_stream_t* tokbuf, tok_type_t type) {
flush_peek_tok(tokbuf);
tok_t* tok = peek_tok(tokbuf);
if (tok->type != type) {
error("expected tok: %s, got %s", get_tok_name(type), get_tok_name(tok->type));
LEX_ERROR("expected tok `%s` but got `%s`", get_tok_name(type), get_tok_name(tok->type));
return 0;
} else {
pop_tok(tokbuf);
}

View File

@ -51,10 +51,10 @@ enum CSTD_KEYWORD {
X(flush , TOKEN_FLUSH) \
X("==" , TOKEN_EQ) \
X("=" , TOKEN_ASSIGN) \
X("++" , TOKEN_ADD_ADD) \
X("++" , TOKEN_ADD_ADD) \
X("+=" , TOKEN_ASSIGN_ADD) \
X("+" , TOKEN_ADD) \
X("--" , TOKEN_SUB_SUB) \
X("--" , TOKEN_SUB_SUB) \
X("-=" , TOKEN_ASSIGN_SUB) \
X("->" , TOKEN_DEREF) \
X("-" , TOKEN_SUB) \
@ -134,7 +134,7 @@ typedef struct tok {
tok_val_t val;
} tok_t;
typedef struct tok_buf {
typedef struct tok_stream {
int cur;
int end;
int peek;
@ -143,15 +143,15 @@ typedef struct tok_buf {
tok_t* buf;
void* stream;
void (*gettok)(void* stream, tok_t* token);
} tok_buf_t;
} tok_stream_t;
typedef void(*get_tokbuf_func)(void* stream, tok_t* token);
void init_tokbuf(tok_buf_t* tokbuf, void* stream, get_tokbuf_func gettok);
tok_t* peek_tok(tok_buf_t* tokbuf);
tok_t* pop_tok(tok_buf_t* tokbuf);
void flush_peek_tok(tok_buf_t* tokbuf);
tok_type_t peek_tok_type(tok_buf_t* tokbuf);
int expect_pop_tok(tok_buf_t* tokbuf, tok_type_t type);
typedef void(*tok_stream_get_func)(void* stream, tok_t* token);
void init_tokbuf(tok_stream_t* tokbuf, void* stream, tok_stream_get_func gettok);
tok_t* peek_tok(tok_stream_t* tokbuf);
tok_t* pop_tok(tok_stream_t* tokbuf);
void flush_peek_tok(tok_stream_t* tokbuf);
tok_type_t peek_tok_type(tok_stream_t* tokbuf);
int expect_pop_tok(tok_stream_t* tokbuf, tok_type_t type);
const char* get_tok_name(tok_type_t type);
#endif
#endif

View File

@ -1,12 +1,11 @@
#include "ast.h"
#include "../parser.h"
struct ASTNode* new_ast_node(void) {
struct ASTNode* node = xmalloc(sizeof(struct ASTNode));
ast_node_t* new_ast_node(void) {
ast_node_t* node = rt._malloc(sizeof(ast_node_t));
init_ast_node(node);
return node;
}
void init_ast_node(struct ASTNode* node) {
void init_ast_node(ast_node_t* node) {
node->type = NT_INIT;
for (int i = 0; i < sizeof(node->children) / sizeof(node->children[0]); i++) {
@ -14,7 +13,7 @@ void init_ast_node(struct ASTNode* node) {
}
}
// struct ASTNode* find_ast_node(struct ASTNode* node, ast_type_t type) {
// ast_node_t* find_ast_node(ast_node_t* node, ast_type_t type) {
// }
@ -25,7 +24,7 @@ static void pnt_depth(int depth) {
}
}
// void pnt_ast(struct ASTNode* node, int depth) {
// void pnt_ast(ast_node_t* node, int depth) {
// if (!node) return;
// pnt_depth(depth);
// switch (node->type) {

View File

@ -1,10 +1,9 @@
#ifndef __AST_H__
#define __AST_H__
#include "../../frontend.h"
#include "../../lexer/lexer.h"
#include "../../../../libcore/vector.h"
#include "../type.h"
#include <lib/utils/ds/vector.h>
#include "../lexer/lexer.h"
#include "type.h"
typedef enum {
NT_INIT,
@ -78,98 +77,98 @@ typedef enum {
NT_TERM_TYPE,
} ast_type_t;
typedef struct ASTNode {
typedef struct ast_node {
ast_type_t type;
union {
void *children[6];
struct {
vector_header(children, struct ASTNode*);
vector_header(children, struct ast_node *);
} root;
struct {
vector_header(children, struct ASTNode*);
vector_header(children, struct ast_node *);
} block;
struct {
struct ASTNode* decl_node;
struct ast_node * decl_node;
tok_t tok;
} syms;
struct {
vector_header(params, struct ASTNode*);
vector_header(params, struct ast_node *);
} params;
struct {
struct ASTNode* name;
struct ASTNode* params;
struct ASTNode* func_decl;
struct ast_node * name;
struct ast_node * params;
struct ast_node * func_decl;
} call;
struct {
struct ASTNode *type;
struct ASTNode *name;
struct ASTNode *expr_stmt; // optional
struct ast_node *type;
struct ast_node *name;
struct ast_node *expr_stmt; // optional
void* data;
} decl_val;
struct {
struct ASTNode *ret;
struct ASTNode *name;
struct ASTNode *params; // array of params
struct ASTNode *def;
struct ast_node *ret;
struct ast_node *name;
struct ast_node *params; // array of params
struct ast_node *def;
} decl_func;
struct {
struct ASTNode *decl;
struct ASTNode *body; // optional
struct ast_node *decl;
struct ast_node *body; // optional
void* data;
} func;
struct {
struct ASTNode *left;
struct ASTNode *right;
struct ASTNode *optional; // optional
struct ast_node *left;
struct ast_node *right;
struct ast_node *optional; // optional
} expr;
struct {
struct ASTNode *cond;
struct ASTNode *if_stmt;
struct ASTNode *else_stmt; // optional
struct ast_node *cond;
struct ast_node *if_stmt;
struct ast_node *else_stmt; // optional
} if_stmt;
struct {
struct ASTNode *cond;
struct ASTNode *body;
struct ast_node *cond;
struct ast_node *body;
} switch_stmt;
struct {
struct ASTNode *cond;
struct ASTNode *body;
struct ast_node *cond;
struct ast_node *body;
} while_stmt;
struct {
struct ASTNode *body;
struct ASTNode *cond;
struct ast_node *body;
struct ast_node *cond;
} do_while_stmt;
struct {
struct ASTNode *init;
struct ASTNode *cond; // optional
struct ASTNode *iter; // optional
struct ASTNode *body;
struct ast_node *init;
struct ast_node *cond; // optional
struct ast_node *iter; // optional
struct ast_node *body;
} for_stmt;
struct {
struct ASTNode *expr_stmt; // optional
struct ast_node *expr_stmt; // optional
} return_stmt;
struct {
struct ASTNode *label;
struct ast_node *label;
} goto_stmt;
struct {
struct ASTNode *label;
struct ast_node *label;
} label_stmt;
struct {
struct ASTNode *block;
struct ast_node *block;
} block_stmt;
struct {
struct ASTNode *expr_stmt;
struct ast_node *expr_stmt;
} expr_stmt;
};
} ast_node_t;
struct ASTNode* new_ast_node(void);
void init_ast_node(struct ASTNode* node);
void pnt_ast(struct ASTNode* node, int depth);
ast_node_t* new_ast_node(void);
void init_ast_node(ast_node_t* node);
void pnt_ast(ast_node_t* node, int depth);
typedef struct parser parser_t;
typedef struct ASTNode* (*parse_func_t) (parser_t*);
typedef ast_node_t* (*parse_func_t) (parser_t*);
void parse_prog(parser_t* parser);
ast_node_t* parse_decl(parser_t* parser);
@ -182,8 +181,8 @@ ast_node_t* parse_expr(parser_t* parser);
ast_node_t* parse_type(parser_t* parser);
ast_node_t* new_ast_ident_node(tok_t* tok);
ast_node_t* expect_pop_ident(tok_buf_t* tokbuf);
ast_node_t* expect_pop_ident(tok_stream_t* tokbuf);
int peek_decl(tok_buf_t* tokbuf);
int peek_decl(tok_stream_t* tokbuf);
#endif

View File

@ -1,5 +1,5 @@
#include "ast.h"
#include "../ast.h"
#include "../parser.h"
#include "../symtab/symtab.h"
@ -17,7 +17,7 @@ ast_node_t* new_ast_node_block() {
ast_node_t* parse_block(parser_t* parser) {
symtab_enter_scope(parser->symtab);
tok_buf_t *tokbuf = &parser->tokbuf;
tok_stream_t *tokbuf = &parser->tokbuf;
flush_peek_tok(tokbuf);
tok_type_t ttype;
ast_node_t* node = new_ast_node_block();

View File

@ -1,19 +1,19 @@
#include "../ast.h"
#include "../parser.h"
#include "ast.h"
#include "../symtab/symtab.h"
/**
* 0 false
* 1 true
*/
int peek_decl(tok_buf_t* tokbuf) {
int peek_decl(tok_stream_t* tokbuf) {
flush_peek_tok(tokbuf);
switch (peek_tok_type(tokbuf)) {
case TOKEN_STATIC:
case TOKEN_EXTERN:
case TOKEN_REGISTER:
case TOKEN_TYPEDEF:
error("not impliment");
LOG_ERROR("not impliment");
break;
default:
flush_peek_tok(tokbuf);
@ -36,7 +36,7 @@ int peek_decl(tok_buf_t* tokbuf) {
}
ast_node_t* parse_decl_val(parser_t* parser) {
tok_buf_t* tokbuf = &parser->tokbuf;
tok_stream_t* tokbuf = &parser->tokbuf;
tok_type_t ttype;
flush_peek_tok(tokbuf);
@ -55,28 +55,28 @@ ast_node_t* parse_decl_val(parser_t* parser) {
if (ttype == TOKEN_ASSIGN) {
node->decl_val.expr_stmt = parse_stmt(parser);
if (node->decl_val.expr_stmt->type != NT_STMT_EXPR) {
error("parser_decl_val want stmt_expr");
LOG_ERROR("parser_decl_val want stmt_expr");
}
} else if (ttype == TOKEN_SEMICOLON) {
pop_tok(tokbuf);
expect_pop_tok(tokbuf, TOKEN_SEMICOLON);
} else {
error("parser_decl_val syntax error");
LOG_ERROR("parser_decl_val syntax error");
}
return node;
}
ast_node_t* parse_decl(parser_t* parser) {
tok_buf_t* tokbuf = &parser->tokbuf;
tok_stream_t* tokbuf = &parser->tokbuf;
flush_peek_tok(tokbuf);
tok_type_t ttype;
ast_node_t* node;
if (peek_decl(tokbuf) == 0) {
error("syntax error expect decl_val TYPE");
LOG_ERROR("syntax error expect decl_val TYPE");
}
if (peek_tok_type(tokbuf) != TOKEN_IDENT) {
error("syntax error expect decl_val IDENT");
LOG_ERROR("syntax error expect decl_val IDENT");
}
ttype = peek_tok_type(tokbuf);
@ -89,7 +89,7 @@ ast_node_t* parse_decl(parser_t* parser) {
node = parse_decl_val(parser);
break;
default:
error("syntax error expect decl_val ASSIGN or SEMICOLON");
LOG_ERROR("syntax error expect decl_val ASSIGN or SEMICOLON");
return NULL;
}
return node;

View File

@ -1,5 +1,5 @@
#include "../ast.h"
#include "../parser.h"
#include "ast.h"
#include "../symtab/symtab.h"
// Copy from `CParse`
@ -33,7 +33,7 @@ enum ParseType {
PREFIX_PARSER,
};
static ast_node_t *parse_subexpression(tok_buf_t* tokbuf, symtab_t *symtab, enum Precedence prec);
static ast_node_t *parse_subexpression(tok_stream_t* tokbuf, symtab_t *symtab, enum Precedence prec);
#define NEXT(prec) parse_subexpression(tokbuf, symtab, prec)
static ast_node_t* gen_node2(ast_node_t* left, ast_node_t* right,
@ -72,7 +72,7 @@ static ast_node_t* gen_node2(ast_node_t* left, ast_node_t* right,
// }
}
static ast_node_t* parse_comma(tok_buf_t* tokbuf, symtab_t *symtab, ast_node_t* left) {
static ast_node_t* parse_comma(tok_stream_t* tokbuf, symtab_t *symtab, ast_node_t* left) {
ast_node_t* node = new_ast_node();
node->type = NT_COMMA;
node->expr.left = left;
@ -80,7 +80,7 @@ static ast_node_t* parse_comma(tok_buf_t* tokbuf, symtab_t *symtab, ast_node_t*
return node;
}
static ast_node_t* parse_assign(tok_buf_t* tokbuf, symtab_t *symtab, ast_node_t* left) {
static ast_node_t* parse_assign(tok_stream_t* tokbuf, symtab_t *symtab, ast_node_t* left) {
flush_peek_tok(tokbuf);
tok_type_t ttype = peek_tok_type(tokbuf);
pop_tok(tokbuf);
@ -124,14 +124,14 @@ static ast_node_t* parse_assign(tok_buf_t* tokbuf, symtab_t *symtab, ast_node_t*
left = gen_node2(left, NEXT(next), NT_XOR);
break;
default:
error("unsupported operator");
LOG_ERROR("unsupported operator");
break;
}
node->expr.right = left;
return node;
}
static ast_node_t* parse_cmp(tok_buf_t* tokbuf, symtab_t *symtab, ast_node_t* left) {
static ast_node_t* parse_cmp(tok_stream_t* tokbuf, symtab_t *symtab, ast_node_t* left) {
flush_peek_tok(tokbuf);
tok_type_t ttype = peek_tok_type(tokbuf);
pop_tok(tokbuf);
@ -164,12 +164,12 @@ static ast_node_t* parse_cmp(tok_buf_t* tokbuf, symtab_t *symtab, ast_node_t* le
node->expr.right = NEXT(PREC_RELATIONAL);
break;
default:
error("invalid operator");
LOG_ERROR("invalid operator");
}
return node;
}
static ast_node_t* parse_cal(tok_buf_t* tokbuf, symtab_t *symtab, ast_node_t* left) {
static ast_node_t* parse_cal(tok_stream_t* tokbuf, symtab_t *symtab, ast_node_t* left) {
flush_peek_tok(tokbuf);
tok_type_t ttype = peek_tok_type(tokbuf);
pop_tok(tokbuf);
@ -230,7 +230,7 @@ static ast_node_t* parse_cal(tok_buf_t* tokbuf, symtab_t *symtab, ast_node_t* le
return node;
}
static ast_node_t* parse_call(tok_buf_t* tokbuf, symtab_t *symtab, ast_node_t* ident) {
static ast_node_t* parse_call(tok_stream_t* tokbuf, symtab_t *symtab, ast_node_t* ident) {
ast_node_t* node = new_ast_node();
node->type = NT_TERM_CALL;
node->call.name = ident;
@ -257,14 +257,14 @@ static ast_node_t* parse_call(tok_buf_t* tokbuf, symtab_t *symtab, ast_node_t* i
ast_node_t* sym = symtab_lookup_symbol(symtab, name);
// TODO check func is match
if (sym == NULL || sym->type != NT_DECL_FUNC) {
error("function not decl %s", name);
LOG_ERROR("function not decl %s", name);
}
node->call.name = ident;
node->call.func_decl = sym;
return node;
}
static ast_node_t* parse_paren(tok_buf_t* tokbuf, symtab_t *symtab, ast_node_t* left) {
static ast_node_t* parse_paren(tok_stream_t* tokbuf, symtab_t *symtab, ast_node_t* left) {
flush_peek_tok(tokbuf);
expect_pop_tok(tokbuf, TOKEN_L_PAREN);
left = NEXT(PREC_EXPRESSION);
@ -273,7 +273,7 @@ static ast_node_t* parse_paren(tok_buf_t* tokbuf, symtab_t *symtab, ast_node_t*
return left;
}
typedef ast_node_t* (*parse_expr_fun_t)(tok_buf_t*, symtab_t* , ast_node_t*);
typedef ast_node_t* (*parse_expr_fun_t)(tok_stream_t*, symtab_t* , ast_node_t*);
static struct expr_prec_table_t {
parse_expr_fun_t parser;
enum Precedence prec;
@ -322,7 +322,7 @@ static struct expr_prec_table_t {
[TOKEN_L_PAREN] = {parse_paren, PREC_POSTFIX, INFIX_PARSER},
};
static ast_node_t *parse_primary_expression(tok_buf_t* tokbuf, symtab_t *symtab) {
static ast_node_t *parse_primary_expression(tok_stream_t* tokbuf, symtab_t *symtab) {
flush_peek_tok(tokbuf);
tok_t* tok = peek_tok(tokbuf);
@ -335,7 +335,7 @@ static ast_node_t *parse_primary_expression(tok_buf_t* tokbuf, symtab_t *symtab)
// node->data.data_type = TYPE_INT;
break;
case TOKEN_FLOAT_LITERAL:
warn("float not supported");
LOG_WARN("float not supported");
break;
case TOKEN_CHAR_LITERAL:
// node->data.data_type = TYPE_CHAR;
@ -350,7 +350,7 @@ static ast_node_t *parse_primary_expression(tok_buf_t* tokbuf, symtab_t *symtab)
} else {
void *sym = symtab_lookup_symbol(symtab, tok->val.str);
if (sym == NULL) {
error("undefined symbol but use %s", tok->val.str);
LOG_ERROR("undefined symbol but use %s", tok->val.str);
}
node->type = NT_TERM_IDENT;
node->syms.decl_node = sym;
@ -364,7 +364,7 @@ END:
return node;
}
static ast_node_t *parse_subexpression(tok_buf_t* tokbuf, symtab_t *symtab, enum Precedence prec) {
static ast_node_t *parse_subexpression(tok_stream_t* tokbuf, symtab_t *symtab, enum Precedence prec) {
tok_type_t ttype;
struct expr_prec_table_t* work;
ast_node_t* left;
@ -397,7 +397,7 @@ static ast_node_t *parse_subexpression(tok_buf_t* tokbuf, symtab_t *symtab, enum
}
ast_node_t* parse_expr(parser_t* parser) {
tok_buf_t* tokbuf = &(parser->tokbuf);
tok_stream_t* tokbuf = &(parser->tokbuf);
symtab_t *symtab = parser->symtab;
flush_peek_tok(tokbuf);
tok_type_t ttype = peek_tok_type(tokbuf);
@ -419,7 +419,7 @@ ast_node_t* parse_expr(parser_t* parser) {
case TOKEN_IDENT:
return NEXT(PREC_EXPRESSION);
default:
error("Want expr but not got %s", get_tok_name(ttype));
LOG_ERROR("Want expr but not got %s", get_tok_name(ttype));
break;
}
}

View File

@ -1,13 +1,13 @@
#include "../ast.h"
#include "../parser.h"
#include "../symtab/symtab.h"
#include "ast.h"
#ifndef FUNC_PARAM_CACHE_SIZE
#define FUNC_PARAM_CACHE_SIZE 32 // 合理初始值可覆盖99%常见情况
#endif
// TODO 语义分析压入符号表
static void parse_params(parser_t* parser, tok_buf_t* cache, ast_node_t* node) {
static void parse_params(parser_t* parser, tok_stream_t* cache, ast_node_t* node) {
flush_peek_tok(cache);
tok_type_t ttype;
ast_node_t *params = new_ast_node();
@ -23,10 +23,10 @@ static void parse_params(parser_t* parser, tok_buf_t* cache, ast_node_t* node) {
case TOKEN_ELLIPSIS:
ttype = peek_tok_type(cache);
if (ttype != TOKEN_R_PAREN) {
error("... must be a last parameter list (expect ')')");
LOG_ERROR("... must be a last parameter list (expect ')')");
}
// TODO
error("not implement");
LOG_ERROR("not implement");
break;
case TOKEN_IDENT:
// TODO 静态数组
@ -62,13 +62,13 @@ static void parse_params(parser_t* parser, tok_buf_t* cache, ast_node_t* node) {
// flush_peek_tok(tokbuf);
// continue;
// }
// error("function expected ')' or ','\n");
// LOG_ERROR("function expected ')' or ','\n");
}
pop_tok(cache);
}
}
ast_type_t check_is_func_decl(tok_buf_t* tokbuf, tok_buf_t* cache) {
ast_type_t check_is_func_decl(tok_stream_t* tokbuf, tok_stream_t* cache) {
expect_pop_tok(tokbuf, TOKEN_L_PAREN);
int depth = 1;
@ -76,7 +76,7 @@ ast_type_t check_is_func_decl(tok_buf_t* tokbuf, tok_buf_t* cache) {
tok_t* tok = peek_tok(tokbuf);
pop_tok(tokbuf);
if (cache->size >= cache->cap - 1) {
error("function parameter list too long");
LOG_ERROR("function parameter list too long");
}
cache->buf[cache->size++] = *tok;
switch (tok->type) {
@ -100,7 +100,7 @@ ast_type_t check_is_func_decl(tok_buf_t* tokbuf, tok_buf_t* cache) {
return NT_FUNC;
break;
default:
error("function define or decl need '{' or ';' but you don't got");
LOG_ERROR("function define or decl need '{' or ';' but you don't got");
}
}
@ -114,14 +114,14 @@ static ast_node_t* new_ast_node_funcdecl(ast_node_t* ret, ast_node_t* name) {
}
void parse_func(parser_t* parser) {
tok_buf_t* tokbuf = &(parser->tokbuf);
tok_stream_t* tokbuf = &(parser->tokbuf);
flush_peek_tok(tokbuf);
ast_node_t* ret_node = parse_type(parser);
ast_node_t* name_node = expect_pop_ident(tokbuf);
const char* func_name = name_node->syms.tok.val.str;
ast_node_t* decl = new_ast_node_funcdecl(ret_node, name_node);
tok_buf_t cache;
tok_stream_t cache;
init_tokbuf(&cache, NULL, NULL);
cache.cap = FUNC_PARAM_CACHE_SIZE;
tok_t buf[FUNC_PARAM_CACHE_SIZE];
@ -132,12 +132,12 @@ void parse_func(parser_t* parser) {
ast_node_t* prev = symtab_add_symbol(parser->symtab, func_name, decl, 1);
if (prev != NULL) {
if (prev->type != NT_DECL_FUNC) {
error("the symbol duplicate old is %d, new is func", prev->type);
LOG_ERROR("the symbol duplicate old is %d, new is func", prev->type);
}
// TODO check redeclare func is match
if (type == NT_FUNC) {
// TODO Free decl;
free(decl);
rt._free(decl);
decl = prev;
goto FUNC;
}
@ -151,7 +151,7 @@ void parse_func(parser_t* parser) {
FUNC:
// 该data临时用于判断是否重复定义
if (decl->decl_func.def != NULL) {
error("redefinition of function %s", func_name);
LOG_ERROR("redefinition of function %s", func_name);
}
ast_node_t* node = new_ast_node();

View File

@ -1,5 +1,5 @@
#include "../ast.h"
#include "../parser.h"
#include "ast.h"
#ifndef PROG_MAX_NODE_SIZE
#define PROG_MAX_NODE_SIZE (1024 * 4)
@ -13,7 +13,7 @@ void parse_prog(parser_t* parser) {
* same as
* Program := Declaration* Definition*
*/
tok_buf_t *tokbuf = &(parser->tokbuf);
tok_stream_t *tokbuf = &(parser->tokbuf);
parser->root = new_ast_node();
ast_node_t* node;
parser->root->type = NT_ROOT;

View File

@ -1,8 +1,8 @@
#include "../ast.h"
#include "../parser.h"
#include "ast.h"
ast_node_t* parse_stmt(parser_t* parser) {
tok_buf_t* tokbuf = &parser->tokbuf;
tok_stream_t* tokbuf = &parser->tokbuf;
flush_peek_tok(tokbuf);
tok_type_t ttype = peek_tok_type(tokbuf);
ast_node_t* node = new_ast_node();
@ -66,7 +66,7 @@ ast_node_t* parse_stmt(parser_t* parser) {
node->do_while_stmt.body = parse_stmt(parser);
ttype = peek_tok_type(tokbuf);
if (ttype != TOKEN_WHILE) {
error("expected while after do");
LOG_ERROR("expected while after do");
}
pop_tok(tokbuf);
expect_pop_tok(tokbuf, TOKEN_L_PAREN);
@ -83,7 +83,7 @@ ast_node_t* parse_stmt(parser_t* parser) {
pop_tok(tokbuf);
ttype = peek_tok_type(tokbuf);
if (ttype != TOKEN_L_PAREN) {
error("expected ( after for");
LOG_ERROR("expected ( after for");
}
pop_tok(tokbuf);
@ -170,7 +170,7 @@ ast_node_t* parse_stmt(parser_t* parser) {
// find symbol table
ttype = peek_tok_type(tokbuf);
if (ttype != TOKEN_IDENT) {
error("expect identifier after goto");
LOG_ERROR("expect identifier after goto");
}
expect_pop_tok(tokbuf, TOKEN_SEMICOLON);
// TODO filling label
@ -211,7 +211,7 @@ ast_node_t* parse_stmt(parser_t* parser) {
case TOKEN_CASE: {
// TODO label switch
pop_tok(tokbuf);
error("unimplemented switch label");
LOG_ERROR("unimplemented switch label");
node->label_stmt.label = parse_expr(parser);
// TODO 该表达式为const int
expect_pop_tok(tokbuf, TOKEN_COLON);
@ -234,7 +234,7 @@ ast_node_t* parse_stmt(parser_t* parser) {
flush_peek_tok(tokbuf);
ttype = peek_tok_type(tokbuf);
if (ttype != TOKEN_SEMICOLON) {
error("exp must end with \";\"");
LOG_ERROR("exp must end with \";\"");
}
pop_tok(tokbuf);
node->type = NT_STMT_EXPR;

View File

@ -1,10 +1,10 @@
#include "../ast.h"
#include "../parser.h"
#include "../type.h"
#include "ast.h"
ast_node_t* new_ast_ident_node(tok_t* tok) {
if (tok->type != TOKEN_IDENT) {
error("syntax error: want identifier but got %d", tok->type);
LOG_ERROR("syntax error: want identifier but got %d", tok->type);
}
ast_node_t* node = new_ast_node();
node->type = NT_TERM_IDENT;
@ -13,7 +13,7 @@ ast_node_t* new_ast_ident_node(tok_t* tok) {
return node;
}
ast_node_t* expect_pop_ident(tok_buf_t* tokbuf) {
ast_node_t* expect_pop_ident(tok_stream_t* tokbuf) {
flush_peek_tok(tokbuf);
tok_t* tok = peek_tok(tokbuf);
ast_node_t* node = new_ast_ident_node(tok);
@ -22,7 +22,7 @@ ast_node_t* expect_pop_ident(tok_buf_t* tokbuf) {
}
ast_node_t* parse_type(parser_t* parser) {
tok_buf_t* tokbuf = &parser->tokbuf;
tok_stream_t* tokbuf = &parser->tokbuf;
flush_peek_tok(tokbuf);
tok_type_t ttype = peek_tok_type(tokbuf);
data_type_t dtype;
@ -35,7 +35,7 @@ ast_node_t* parse_type(parser_t* parser) {
case TOKEN_FLOAT: dtype = TYPE_FLOAT; break;
case TOKEN_DOUBLE: dtype = TYPE_DOUBLE; break;
default:
error("无效的类型说明符");
LOG_ERROR("无效的类型说明符");
}
ast_node_t* node = new_ast_node();

View File

@ -1,13 +1,16 @@
#include <lib/core.h>
#include "parser.h"
#include "type.h"
void init_parser(parser_t* parser, lexer_t* lexer, symtab_t* symtab) {
init_lib_core();
parser->cur_node = NULL;
parser->root = NULL;
parser->lexer = lexer;
parser->symtab = symtab;
init_tokbuf(&parser->tokbuf, lexer, (get_tokbuf_func)get_valid_token);
init_tokbuf(&parser->tokbuf, lexer, (tok_stream_get_func)get_valid_token);
parser->tokbuf.cap = sizeof(parser->TokenBuffer) / sizeof(parser->TokenBuffer[0]);
parser->tokbuf.buf = parser->TokenBuffer;
}

View File

@ -1,20 +1,19 @@
#ifndef __PARSER_H__
#define __PARSER_H__
#include "../frontend.h"
#include "../lexer/lexer.h"
#include "ast.h"
typedef struct lexer lexer_t;
typedef struct symtab symtab_t;
#define PARSER_MAX_TOKEN_QUEUE 16
typedef struct parser {
struct ASTNode* root;
struct ASTNode* cur_node;
ast_node_t* root;
ast_node_t* cur_node;
lexer_t* lexer;
symtab_t* symtab;
tok_buf_t tokbuf;
tok_stream_t tokbuf;
tok_t TokenBuffer[PARSER_MAX_TOKEN_QUEUE];
int err_level;
} parser_t;

View File

@ -1,5 +1,6 @@
// symtab.c
#include "../../frontend.h"
#include <lib/core.h>
#include "scope.h"
#include "symtab.h"
@ -24,7 +25,7 @@ void symtab_enter_scope(symtab_t* symtab) {
void symtab_leave_scope(symtab_t* symtab) {
Scope * scope = symtab->cur_scope;
if (scope == NULL) {
error("cannot leave NULL scope or global scope");
LOG_ERROR("cannot leave NULL scope or global scope");
}
symtab->cur_scope = symtab->cur_scope->parent;
scope_destroy(scope);
@ -35,7 +36,7 @@ void* symtab_add_symbol(symtab_t* symtab, const char* name, void* ast_node, int
void* node = scope_lookup_current(scope, name);
if (node != NULL) {
if (!can_duplicate) {
error("duplicate symbol %s", name);
LOG_ERROR("duplicate symbol %s", name);
}
return node;
}

View File

@ -1,20 +1,23 @@
# 编译器设置
CC = gcc
AR = ar
CFLAGS = -g -Wall
CFLAGS = -g -Wall -I../..
IR_DIR = ./ir
# 源文件列表
SRCS = \
ir.c \
ir_ast.c \
ir_lib.c \
ir_type.c
middleend.c \
$(IR_DIR)/ir.c \
$(IR_DIR)/ir_ast.c \
$(IR_DIR)/ir_lib.c \
$(IR_DIR)/ir_type.c
# 生成目标文件列表
OBJS = $(SRCS:.c=.o)
# 最终目标
TARGET = libir.a
TARGET = libmiddleend.a
all: $(TARGET)
@ -27,4 +30,4 @@ $(TARGET): $(OBJS)
clean:
rm -f $(OBJS) $(TARGET)
.PHONY: all clean
.PHONY: all clean

View File

@ -2,9 +2,7 @@
#ifndef IR_CORE_H
#define IR_CORE_H
#include "../../libcore/vector.h"
#include <stddef.h>
#include <stdint.h>
#include <lib/utils/ds/vector.h>
// 错误码定义
typedef enum {
@ -26,12 +24,12 @@ typedef struct {
union {
struct {
struct ir_type *base;
size_t len;
rt_size_t len;
} arr;
struct {
struct ir_type *ret;
struct ir_type **params;
size_t param_cnt;
rt_size_t param_cnt;
} func;
};
} ir_type_t;
@ -151,9 +149,4 @@ struct ir_node {
} data;
};
extern ir_prog_t prog;
struct ASTNode;
void gen_ir_from_ast(struct ASTNode* node);
#endif // IR_CORE_H

View File

@ -1,7 +1,8 @@
#include "ir.h"
#include "ir_lib.h"
#include "ir_type.h"
#include "../frontend/frontend.h"
#include "../../frontend/frontend.h"
#include "../../frontend/parser/ast.h"
// 上下文结构,记录生成过程中的状态
typedef struct {
@ -10,6 +11,7 @@ typedef struct {
} IRGenContext;
IRGenContext ctx;
ir_prog_t prog;
void _gen_ir_from_ast(ast_node_t* node);
static void emit_instr(ir_bblock_t* block, ir_node_t* node) {
if (block == NULL) block = ctx.cur_block;
@ -53,7 +55,7 @@ static ir_node_t* gen_ir_term(ast_node_t* node) {
return call;
}
default: {
assert(0);
Panic("gen_ir_expr: unknown node type");
}
}
}
@ -164,10 +166,10 @@ static ir_node_t* gen_ir_expr(ast_node_t* node) {
BINOP(IR_OP_GE); break;
}
case NT_AND_AND:// (expr) && (expr)
error("unimpliment");
LOG_ERROR("unimpliment");
break;
case NT_OR_OR:// (expr) || (expr)
error("unimpliment");
LOG_ERROR("unimpliment");
break;
case NT_NOT: {
// ! (expr)
@ -189,7 +191,7 @@ static ir_node_t* gen_ir_expr(ast_node_t* node) {
// case NT_COND: // (expr) ? (expr) : (expr)
default: {
// TODO self error msg
error("Unsupported IR generation for AST node type %d", node->type);
LOG_ERROR("Unsupported IR generation for AST node type %d", node->type);
break;
}
}
@ -198,7 +200,7 @@ static ir_node_t* gen_ir_expr(ast_node_t* node) {
}
static void gen_ir_func(ast_node_t* node, ir_func_t* func) {
assert(node->type == NT_FUNC);
Assert(node->type == NT_FUNC);
ir_bblock_t *entry = new_ir_bblock("entry");
vector_push(func->bblocks, entry);
@ -217,7 +219,7 @@ static void gen_ir_func(ast_node_t* node, ir_func_t* func) {
decl->type = &type_i32;
param->decl_val.data = decl;
}
gen_ir_from_ast(node->func.body);
_gen_ir_from_ast(node->func.body);
ctx = prev_ctx;
}
@ -248,13 +250,13 @@ void gen_ir_jmp(ast_node_t* node) {
// true block
vector_push(ctx.cur_func->bblocks, trueb);
ctx.cur_block = trueb;
gen_ir_from_ast(node->if_stmt.if_stmt);
_gen_ir_from_ast(node->if_stmt.if_stmt);
// else block
if (node->if_stmt.else_stmt != NULL) {
vector_push(ctx.cur_func->bblocks, falseb);
ctx.cur_block = falseb;
gen_ir_from_ast(node->if_stmt.else_stmt);
_gen_ir_from_ast(node->if_stmt.else_stmt);
ir_node_t* jmp;
ctx.cur_block = endb;
@ -285,7 +287,7 @@ void gen_ir_jmp(ast_node_t* node) {
// Body:
ir_node_t* jmp;
ctx.cur_block = bodyb;
gen_ir_from_ast(node->while_stmt.body);
_gen_ir_from_ast(node->while_stmt.body);
NEW_IR_JMP(jmp, entryb);
emit_instr(NULL, jmp);
@ -304,7 +306,7 @@ void gen_ir_jmp(ast_node_t* node) {
// Body:
ctx.cur_block = bodyb;
gen_ir_from_ast(node->do_while_stmt.body);
_gen_ir_from_ast(node->do_while_stmt.body);
ir_node_t* jmp;
NEW_IR_JMP(jmp, entryb);
emit_instr(NULL, jmp);
@ -324,7 +326,7 @@ void gen_ir_jmp(ast_node_t* node) {
ir_bblock_t* endb = bblocks[2];
if (node->for_stmt.init) {
gen_ir_from_ast(node->for_stmt.init);
_gen_ir_from_ast(node->for_stmt.init);
}
ir_node_t* entry;
NEW_IR_JMP(entry, entryb);
@ -342,7 +344,7 @@ void gen_ir_jmp(ast_node_t* node) {
// Body:
ctx.cur_block = bodyb;
gen_ir_from_ast(node->for_stmt.body);
_gen_ir_from_ast(node->for_stmt.body);
if (node->for_stmt.iter) {
gen_ir_expr(node->for_stmt.iter);
}
@ -355,18 +357,21 @@ void gen_ir_jmp(ast_node_t* node) {
break;
}
default:
error("ir jmp can't hit here");
LOG_ERROR("ir jmp can't hit here");
}
}
void gen_ir_from_ast(ast_node_t* node) {
ir_prog_t* gen_ir_from_ast(ast_node_t* root) {
Assert(root->type == NT_ROOT);
for (int i = 0; i < root->root.children.size; i ++) {
_gen_ir_from_ast(root->root.children.data[i]);
}
// _gen_ir_from_ast(root);
return &prog;
}
void _gen_ir_from_ast(ast_node_t* node) {
switch (node->type) {
case NT_ROOT: {
for (int i = 0; i < node->root.children.size; i ++) {
gen_ir_from_ast(node->root.children.data[i]);
}
break;
}
case NT_DECL_FUNC: {
ir_func_t* func = new_ir_func(node->decl_func.name->syms.tok.val.str, &type_i32);
if (node->decl_func.def == NULL) {
@ -398,12 +403,12 @@ void gen_ir_from_ast(ast_node_t* node) {
break;
}
case NT_STMT_BLOCK: {
gen_ir_from_ast(node->block_stmt.block);
_gen_ir_from_ast(node->block_stmt.block);
break;
}
case NT_BLOCK: {
for (int i = 0; i < node->block.children.size; i ++) {
gen_ir_from_ast(node->block.children.data[i]);
_gen_ir_from_ast(node->block.children.data[i]);
}
break;
}
@ -420,7 +425,7 @@ void gen_ir_from_ast(ast_node_t* node) {
ir->type = &type_i32;
node->decl_val.data = ir;
if (node->decl_val.expr_stmt != NULL) {
gen_ir_from_ast(node->decl_val.expr_stmt);
_gen_ir_from_ast(node->decl_val.expr_stmt);
}
break;
}
@ -433,7 +438,7 @@ void gen_ir_from_ast(ast_node_t* node) {
}
default:
// TODO: 错误处理
error("unknown node type");
LOG_ERROR("unknown node type");
break;
}
}

View File

@ -0,0 +1,7 @@
#ifndef __IR_AST_H__
#define __IR_AST_H__
#include "ir.h"
ir_prog_t* gen_ir_from_ast(ast_node_t* node);
#endif //

View File

@ -0,0 +1,76 @@
#include "ir.h"
#include "ir_lib.h"
#include "ir_type.h"
#include <stdio.h>
#include <assert.h>
typedef struct ir_dump {
FILE* fp;
} ir_dump_t;
void dump_ir_node(ir_node_t* node, ir_dump_t* dump) {
fprintf(dump->fp, "%%%p", node);
switch (node->tag) {
case IR_NODE_ALLOC: {
node->type = NULL;
// fprintf(dump->fp, "%p\n", );
break;
}
case IR_NODE_BRANCH: {
node->data.branch.cond = NULL;
node->data.branch.true_bblock = NULL;
node->data.branch.false_bblock = NULL;
break;
}
case IR_NODE_CALL: {
vector_init(node->data.call.args);
node->data.call.callee = NULL;
break;
}
case IR_NODE_CONST_INT: {
node->data.const_int.val = 0;
break;
}
case IR_NODE_JUMP: {
node->data.jump.target_bblock = NULL;
break;
}
case IR_NODE_LOAD: {
node->data.load.target = NULL;
break;
}
case IR_NODE_STORE: {
node->data.store.target = NULL;
node->data.store.value = NULL;
break;
}
case IR_NODE_OP: {
node->data.op.op = 0;
node->data.op.lhs = NULL;
node->data.op.rhs = NULL;
break;
}
case IR_NODE_RET: {
node->data.ret.ret_val = NULL;
break;
}
case IR_NODE_GET_PTR: {
}
default: {
assert(0);
}
}
}
void dump_ir_bblock(ir_bblock_t* block) {
}
void dump_ir_func(ir_func_t* func) {
}
void dump_ir_prog(ir_prog_t* prog) {
}

View File

@ -77,10 +77,6 @@ ir_node_t* new_ir_node(const char* name, ir_node_tag_t tag) {
return node;
}
void dump_ir_node(ir_node_t* node) {
}
void free_irnode() {
}

View File

View File

@ -0,0 +1,7 @@
#ifndef __SMCC_MIDDLEEND_H__
#define __SMCC_MIDDLEEND_H__
#include "ir/ir.h"
#include "ir/ir_ast.h"
#endif // __SMCC_MIDDLEEND_H__