From 71330800b89cb4c6ae8716a7e78bdcf574fe04ab Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Thu, 6 Feb 2014 01:36:26 -0800 Subject: [PATCH] Clean up benchmarks; support uarch-specific counters --- benchmarks/Makefile | 4 +- benchmarks/common/crt-mt.S | 113 ----------- benchmarks/common/crt.S | 94 ++++----- benchmarks/common/syscalls.c | 178 +++++++++++++----- benchmarks/common/test-mt.ld | 45 ----- benchmarks/common/util.h | 128 +++++++++---- benchmarks/dgemm/bmark.mk | 3 +- benchmarks/dgemm/dgemm_main.c | 84 +-------- benchmarks/dhrystone/bmark.mk | 1 + benchmarks/dhrystone/dhrystone.h | 8 +- benchmarks/dhrystone/dhrystone_main.c | 12 +- benchmarks/median/bmark.mk | 3 +- benchmarks/median/median_main.c | 80 +------- benchmarks/mt-matmul/bmark.mk | 5 +- benchmarks/mt-matmul/mt-matmul.c | 51 ++--- benchmarks/mt-vvadd/bmark.mk | 4 +- benchmarks/mt-vvadd/mt-vvadd.c | 58 ++---- benchmarks/multiply/bmark.mk | 3 +- benchmarks/multiply/multiply_main.c | 90 +-------- benchmarks/qsort/bmark.mk | 3 +- benchmarks/qsort/qsort_main.c | 75 +------- benchmarks/spmv/bmark.mk | 3 +- benchmarks/spmv/spmv_main.c | 64 +------ benchmarks/towers/bmark.mk | 3 +- benchmarks/towers/towers_main.c | 44 +---- benchmarks/vec-cmplxmult/vec_cmplxmult_main.c | 3 +- benchmarks/vec-matmul/vec_matmul_main.c | 59 +----- benchmarks/vec-vvadd/vec_vvadd_main.c | 59 +----- benchmarks/vvadd/bmark.mk | 3 +- benchmarks/vvadd/vvadd_main.c | 88 +-------- env | 2 +- 31 files changed, 348 insertions(+), 1022 deletions(-) delete mode 100644 benchmarks/common/crt-mt.S delete mode 100644 benchmarks/common/test-mt.ld diff --git a/benchmarks/Makefile b/benchmarks/Makefile index cff904a..8f580a4 100644 --- a/benchmarks/Makefile +++ b/benchmarks/Makefile @@ -51,10 +51,10 @@ HOST_OPTS = -std=gnu99 -DPREALLOCATE=0 -DHOST_DEBUG=1 HOST_COMP = gcc $(HOST_OPTS) RISCV_GCC = riscv-gcc -RISCV_GCC_OPTS = -Wa,-march=RVIMAFDXhwacha -std=gnu99 -O2 -nostdlib -nostartfiles -ffast-math +RISCV_GCC_OPTS = -Wa,-march=RVIMAFDXhwacha -std=gnu99 -O2 -ffast-math RISCV_LINK = riscv-gcc -T $(bmarkdir)/common/test.ld $(incs) RISCV_LINK_MT = riscv-gcc -T $(bmarkdir)/common/test-mt.ld -RISCV_LINK_OPTS = -lc +RISCV_LINK_OPTS = -nostdlib -nostartfiles -ffast-math -lc RISCV_OBJDUMP = riscv-objdump --disassemble-all --disassemble-zeroes --section=.text --section=.text.startup --section=.data RISCV_SIM = spike diff --git a/benchmarks/common/crt-mt.S b/benchmarks/common/crt-mt.S deleted file mode 100644 index 6cedec0..0000000 --- a/benchmarks/common/crt-mt.S +++ /dev/null @@ -1,113 +0,0 @@ -#include "encoding.h" - - .data - .globl _heapend - .globl environ -_heapend: - .word 0 -environ: - .word 0 - - .text - .globl _start - -_start: - li x1, 0 - li x2, 0 - li x3, 0 - li x4, 0 - li x5, 0 - li x6, 0 - li x7, 0 - li x8, 0 - li x9, 0 - li x10,0 - li x11,0 - li x12,0 - li x13,0 - li x14,0 - li x15,0 - li x16,0 - li x17,0 - li x18,0 - li x19,0 - li x20,0 - li x21,0 - li x22,0 - li x23,0 - li x24,0 - li x25,0 - li x26,0 - li x27,0 - li x28,0 - li x29,0 - li x30,0 - li x31,0 - - # enable fp and accelerator - li a0, SR_EF | SR_EA - csrs status, a0 - - ## if that didn't stick, we don't have an FPU, so don't initialize it - csrr t0, status - and t0, t0, SR_EF - beqz t0, 1f - - fssr x0 - fmv.s.x f0, x0 - fmv.s.x f1, x0 - fmv.s.x f2, x0 - fmv.s.x f3, x0 - fmv.s.x f4, x0 - fmv.s.x f5, x0 - fmv.s.x f6, x0 - fmv.s.x f7, x0 - fmv.s.x f8, x0 - fmv.s.x f9, x0 - fmv.s.x f10,x0 - fmv.s.x f11,x0 - fmv.s.x f12,x0 - fmv.s.x f13,x0 - fmv.s.x f14,x0 - fmv.s.x f15,x0 - fmv.s.x f16,x0 - fmv.s.x f17,x0 - fmv.s.x f18,x0 - fmv.s.x f19,x0 - fmv.s.x f20,x0 - fmv.s.x f21,x0 - fmv.s.x f22,x0 - fmv.s.x f23,x0 - fmv.s.x f24,x0 - fmv.s.x f25,x0 - fmv.s.x f26,x0 - fmv.s.x f27,x0 - fmv.s.x f28,x0 - fmv.s.x f29,x0 - fmv.s.x f30,x0 - fmv.s.x f31,x0 -1: - - - # get core id and number of cores - csrr a0, hartid - lw a1, 4(zero) - - slli a2, a0, 13 - la sp, stacktop - sub sp, sp, a2 - - la tp, tlstop - sub tp, tp, a2 - - jal thread_entry - - .bss - .globl stacktop - .globl tlstop - - .align 4 - .skip 32768 -stacktop: - .skip 65536 -tlstop: diff --git a/benchmarks/common/crt.S b/benchmarks/common/crt.S index ae8706b..fb2cc25 100644 --- a/benchmarks/common/crt.S +++ b/benchmarks/common/crt.S @@ -93,48 +93,52 @@ _start: fmv.s.x f31,x0 1: - lui a0, %hi(trap_entry) - add a0, a0, %lo(trap_entry) - csrw evec, a0 - - lui a0, %hi(main) - add a0, a0, %lo(main) - csrw epc, a0 - - # only allow core 0 to proceed -1:csrr a0, hartid - bnez a0, 1b - - la sp,stacktop - - # jmp to main as a user program - sret -1:b 1b - -.align 4 -.globl trap_entry -trap_entry: # only check for SYS_exit, otherwise crash out - li a3, 1337 # magic "bad things" happened error code - csrr a1, cause - li a2, 6 # syscall exception number - bne a1, a2, exit_error -handle_syscall: - li a1, 93 # SYS_exit number - bne v0, a1, exit_error - li a1, 1 # successful exit code - move a3, a0 - bne a3, a1, exit_error - csrw tohost, a1 # exit successfully (tohost == 1) -1:b 1b -exit_error: - sll a3, a3, 1 - or a3, a3, 1 - csrw tohost, a3 -1:b 1b - - .bss - .globl stacktop - - .align 4 - .skip 131072 -stacktop: + la t0, trap_entry + csrw evec, t0 + + la tp, _end + 63 + and tp, tp, -64 + + # get core id and number of cores + csrr a0, hartid + lw a1, 4(zero) + + # give each core a 1KB TLS and a 127KB stack +#define STKSHIFT 17 + sll a2, a0, STKSHIFT + add tp, tp, a2 + add sp, a0, 1 + sll sp, sp, STKSHIFT + add sp, sp, tp + add tp, tp, 1024 + + jal _init + unimp + +trap_entry: + csrw sup0, t0 + csrw sup1, t1 + la t0, uarch_insn + lw t0, (t0) + csrr t1, epc + and t1, t1, ~3 + lw t1, (t1) + and t1, t1, t0 + beq t1, t0, handle_uarch_insn + + # a trap occurred that shouldn't have. + li t0, 1337 + csrw tohost, t0 +1:j 1b + +handle_uarch_insn: + # we trapped on an illegal uarch-specific CSR. just skip over it. + csrr t1, epc + add t1, t1, 4 + csrw epc, t1 + csrr t0, sup0 + csrr t1, sup1 + sret + +uarch_insn: + csrr x0, uarch0 diff --git a/benchmarks/common/syscalls.c b/benchmarks/common/syscalls.c index 4154ba4..1a53349 100644 --- a/benchmarks/common/syscalls.c +++ b/benchmarks/common/syscalls.c @@ -1,50 +1,106 @@ #include #include #include +#include +#include #include #include "encoding.h" -void exit(int code) +#define static_assert(cond) switch(0) { case 0: case !!(long)(cond): ; } + +void syscall(long which, long arg0, long arg1, long arg2) { - volatile uint64_t magic_mem[8] = {0}; - magic_mem[0] = SYS_exit; - magic_mem[1] = code; + volatile uint64_t magic_mem[8] __attribute__((aligned(64))); + magic_mem[0] = which; + magic_mem[1] = arg0; + magic_mem[2] = arg1; + magic_mem[3] = arg2; __sync_synchronize(); write_csr(tohost, (long)magic_mem); - while(1); + while (swap_csr(fromhost, 0) == 0); +} + +void exit(int code) +{ + write_csr(tohost, (code << 1) | 1); + while (1); } void printstr(const char* s) { - volatile uint64_t magic_mem[8] = {0}; - magic_mem[0] = SYS_write; - magic_mem[1] = 1; - magic_mem[2] = (unsigned long)s; - magic_mem[3] = strlen(s); - __sync_synchronize(); - write_csr(tohost, (long)magic_mem); - while (swap_csr(fromhost, 0) == 0); + syscall(SYS_write, 1, (long)s, strlen(s)); +} + +// In setStats, we might trap reading uarch-specific counters. +// The trap handler will skip over the instruction, but we want +// to pretend as though we read the value 0 in this case. +#define read_csr_safe(reg) ({ long __tmp = 0; \ + asm volatile ("csrr %0, " #reg : "+r"(__tmp)); \ + __tmp; }) + +#define NUM_COUNTERS 18 +static long counters[NUM_COUNTERS]; +static char* counter_names[NUM_COUNTERS]; +void setStats(int enable) +{ + int i = 0; +#define READ_CTR(name) do { \ + if (i >= NUM_COUNTERS) exit(-1); \ + long csr = read_csr_safe(name); \ + if (!enable) { csr -= counters[i]; counter_names[i] = #name; } \ + counters[i++] = csr; \ + } while (0) + READ_CTR(cycle); READ_CTR(instret); + READ_CTR(uarch0); READ_CTR(uarch1); READ_CTR(uarch2); READ_CTR(uarch3); + READ_CTR(uarch4); READ_CTR(uarch5); READ_CTR(uarch6); READ_CTR(uarch7); + READ_CTR(uarch8); READ_CTR(uarch9); READ_CTR(uarch10); READ_CTR(uarch11); + READ_CTR(uarch12); READ_CTR(uarch13); READ_CTR(uarch14); READ_CTR(uarch15); +#undef READ_CTR +} + +void __attribute__((weak)) thread_entry(int cid, int nc) +{ + // multi-threaded programs override this function. + // for the case of single-threaded programs, only let core 0 proceed. + while (cid != 0); +} + +int __attribute__((weak)) main(int argc, char** argv) +{ + // single-threaded programs override this function. + printstr("Implement main(), foo!\n"); + return -1; } +void _init(int cid, int nc) +{ + thread_entry(cid, nc); + + // only single-threaded programs should ever get here. + int ret = main(0, 0); + + char buf[NUM_COUNTERS * 32] __attribute__((aligned(64))); + char* pbuf = buf; + for (int i = 0; i < NUM_COUNTERS; i++) + if (counters[i]) + pbuf += sprintf(pbuf, "%s = %d\n", counter_names[i], counters[i]); + if (pbuf != buf) + printstr(buf); + + exit(ret); +} + +#undef putchar int putchar(int ch) { - static char buf[64]; + static char buf[64] __attribute__((aligned(64))); static int buflen = 0; - if(ch != -1) - buf[buflen++] = ch; + buf[buflen++] = ch; - if(ch == -1 || buflen == sizeof(buf)) + if (ch == '\n' || buflen == sizeof(buf)) { - volatile uint64_t magic_mem[8] = {0}; - magic_mem[0] = SYS_write; - magic_mem[1] = 1; - magic_mem[2] = (long)buf; - magic_mem[3] = buflen; - __sync_synchronize(); - write_csr(tohost, (long)magic_mem); - while (swap_csr(fromhost, 0) == 0); - + syscall(SYS_write, 1, (long)buf, buflen); buflen = 0; } @@ -65,15 +121,25 @@ void printhex(uint64_t x) printstr(str); } -static void printnum(void (*putch)(int, void**), void **putdat, - unsigned long long num, unsigned base, int width, int padc) +static inline void printnum(void (*putch)(int, void**), void **putdat, + unsigned long long num, unsigned base, int width, int padc) { - if (num >= base) - printnum(putch, putdat, num / base, base, width - 1, padc); - else while (--width > 0) + unsigned digs[sizeof(num)*CHAR_BIT]; + int pos = 0; + + while (1) + { + digs[pos++] = num % base; + if (num < base) + break; + num /= base; + } + + while (width-- > pos) putch(padc, putdat); - putch("0123456789abcdef"[num % base], putdat); + while (pos-- > 0) + putch(digs[pos] + (digs[pos] >= 10 ? 'a' - 10 : '0'), putdat); } static unsigned long long getuint(va_list *ap, int lflag) @@ -96,7 +162,7 @@ static long long getint(va_list *ap, int lflag) return va_arg(*ap, int); } -void vprintfmt(void (*putch)(int, void**), void **putdat, const char *fmt, va_list ap) +static void vprintfmt(void (*putch)(int, void**), void **putdat, const char *fmt, va_list ap) { register const char* p; const char* last_fmt; @@ -188,10 +254,7 @@ void vprintfmt(void (*putch)(int, void**), void **putdat, const char *fmt, va_li for (width -= strnlen(p, precision); width > 0; width--) putch(padc, putdat); for (; (ch = *p) != '\0' && (precision < 0 || --precision >= 0); width--) { - if (altflag && (ch < ' ' || ch > '~')) - putch('?', putdat); - else - putch(ch, putdat); + putch(ch, putdat); p++; } for (; width > 0; width--) @@ -206,35 +269,33 @@ void vprintfmt(void (*putch)(int, void**), void **putdat, const char *fmt, va_li num = -(long long) num; } base = 10; - goto number; + goto signed_number; // unsigned decimal case 'u': - num = getuint(&ap, lflag); base = 10; - goto number; + goto unsigned_number; // (unsigned) octal case 'o': // should do something with padding so it's always 3 octits - num = getuint(&ap, lflag); base = 8; - goto number; + goto unsigned_number; // pointer case 'p': + static_assert(sizeof(long) == sizeof(void*)); + lflag = 1; putch('0', putdat); putch('x', putdat); - num = (unsigned long long) - (uintptr_t) va_arg(ap, void *); - base = 16; - goto number; + /* fall through to 'x' */ // (unsigned) hexadecimal case 'x': - num = getuint(&ap, lflag); base = 16; - number: + unsigned_number: + num = getuint(&ap, lflag); + signed_number: printnum(putch, putdat, num, base, width, padc); break; @@ -258,8 +319,27 @@ int printf(const char* fmt, ...) va_start(ap, fmt); vprintfmt((void*)putchar, 0, fmt, ap); - putchar(-1); va_end(ap); return 0; // incorrect return value, but who cares, anyway? } + +int sprintf(char* str, const char* fmt, ...) +{ + va_list ap; + char* str0 = str; + va_start(ap, fmt); + + void sprintf_putch(int ch, void** data) + { + char** pstr = (char**)data; + **pstr = ch; + (*pstr)++; + } + + vprintfmt(sprintf_putch, (void**)&str, fmt, ap); + *str = 0; + + va_end(ap); + return str - str0; +} diff --git a/benchmarks/common/test-mt.ld b/benchmarks/common/test-mt.ld deleted file mode 100644 index 5523032..0000000 --- a/benchmarks/common/test-mt.ld +++ /dev/null @@ -1,45 +0,0 @@ -/*======================================================================*/ -/* Proxy kernel linker script */ -/*======================================================================*/ -/* This is the linker script used when building the proxy kernel. */ - -/*----------------------------------------------------------------------*/ -/* Setup */ -/*----------------------------------------------------------------------*/ - -/* The OUTPUT_ARCH command specifies the machine architecture where the - argument is one of the names used in the BFD library. More - specifically one of the entires in bfd/cpu-mips.c */ - -OUTPUT_ARCH( "riscv" ) - -/* The ENTRY command specifies the entry point (ie. first instruction - to execute). The symbol _start should be defined in each test. */ - -ENTRY( _start ) - -/*----------------------------------------------------------------------*/ -/* Sections */ -/*----------------------------------------------------------------------*/ - -SECTIONS -{ - - /* text: test code section */ - . = 0x00002000; - .text : - { - crt-mt.o(.text) - *(.text) - } - - /* data: Initialized data segment */ - .data : - { - *(.data) - } - - /* End of uninitalized data segement */ - _end = .; -} - diff --git a/benchmarks/common/util.h b/benchmarks/common/util.h index 10f3169..1f0c73c 100644 --- a/benchmarks/common/util.h +++ b/benchmarks/common/util.h @@ -1,16 +1,99 @@ -// helpful utility and synch functions - -// relies on defining "ncores" before including this file... - #ifndef __UTIL_H #define __UTIL_H -#include +//-------------------------------------------------------------------------- +// Macros + +// Set HOST_DEBUG to 1 if you are going to compile this for a host +// machine (ie Athena/Linux) for debug purposes and set HOST_DEBUG +// to 0 if you are compiling with the smips-gcc toolchain. + +#ifndef HOST_DEBUG +#define HOST_DEBUG 0 +#endif + +// Set PREALLOCATE to 1 if you want to preallocate the benchmark +// function before starting stats. If you have instruction/data +// caches and you don't want to count the overhead of misses, then +// you will need to use preallocation. + +#ifndef PREALLOCATE +#define PREALLOCATE 0 +#endif + +// Set SET_STATS to 1 if you want to carve out the piece that actually +// does the computation. + +#ifndef SET_STATS +#define SET_STATS 0 +#endif + +#if HOST_DEBUG +#include +static void setStats(int enable) {} +#else +extern void setStats(int enable); +#endif + +static void printArray(const char name[], int n, const int arr[]) +{ +#if HOST_DEBUG + int i; + printf( " %10s :", name ); + for ( i = 0; i < n; i++ ) + printf( " %3d ", arr[i] ); + printf( "\n" ); +#endif +} + +static void printDoubleArray(const char name[], int n, const double arr[]) +{ +#if HOST_DEBUG + int i; + printf( " %10s :", name ); + for ( i = 0; i < n; i++ ) + printf( " %g ", arr[i] ); + printf( "\n" ); +#endif +} + +static int verify(int n, const int test[], const int verify[]) +{ + int i; + // Unrolled for faster verification + for (i = 0; i < n/2*2; i+=2) + { + int t0 = test[i], t1 = test[i+1]; + int v0 = verify[i], v1 = verify[i+1]; + if (t0 != v0) return i+1; + if (t1 != v1) return i+2; + } + if (n % 2 != 0 && test[n-1] != verify[n-1]) + return n; + return 0; +} -#define rdcycle() ({ unsigned long _c; asm volatile ("rdcycle %0" : "=r"(_c) :: "memory"); _c; }) -#define rdinstret() ({ unsigned long _c; asm volatile ("rdinstret %0" : "=r"(_c) :: "memory"); _c; }) - -void __attribute__((noinline)) barrier() +static int verifyDouble(int n, const double test[], const double verify[]) +{ + int i; + // Unrolled for faster verification + for (i = 0; i < n/2*2; i+=2) + { + double t0 = test[i], t1 = test[i+1]; + double v0 = verify[i], v1 = verify[i+1]; + int eq1 = t0 == v0, eq2 = t1 == v1; + if (!(eq1 & eq2)) return i+1+eq1; + } + if (n % 2 != 0 && test[n-1] != verify[n-1]) + return n; + return 0; +} + +#ifndef ncores +#define ncores 1 +#endif + +static void __attribute__((noinline)) barrier() { static volatile int sense; static volatile int count; @@ -30,31 +113,8 @@ void __attribute__((noinline)) barrier() __sync_synchronize(); } - - - - -void finishTest(int test_result) -{ -#if HOST_DEBUG - if ( test_result == 1 ) - printf( "*** PASSED ***\n" ); - else - printf( "*** FAILED *** (tohost = %d)\n", test_result); - exit(0); -#else - { - // perform exit syscall - asm volatile( - "move a0,%0 ;" - "li a1,0 ;" - "li a2,0 ;" - "li a3,0 ;" - "li v0,%1 ;" - "scall" : : "r"(test_result) , "i"(SYS_exit)); - } +#ifdef __riscv +#include "encoding.h" #endif -} #endif //__UTIL_H - diff --git a/benchmarks/dgemm/bmark.mk b/benchmarks/dgemm/bmark.mk index 5a26242..11d8656 100644 --- a/benchmarks/dgemm/bmark.mk +++ b/benchmarks/dgemm/bmark.mk @@ -10,6 +10,7 @@ dgemm_c_src = \ dgemm_main.c \ + syscalls.c \ dgemm_riscv_src = \ crt.S \ @@ -23,7 +24,7 @@ $(dgemm_host_bin) : $(dgemm_c_src) dgemm_riscv_bin = dgemm.riscv $(dgemm_riscv_bin) : $(dgemm_c_objs) $(dgemm_riscv_objs) - $(RISCV_LINK) $(dgemm_c_objs) $(dgemm_riscv_objs) -o $(dgemm_riscv_bin) + $(RISCV_LINK) $(dgemm_c_objs) $(dgemm_riscv_objs) -o $(dgemm_riscv_bin) $(RISCV_LINK_OPTS) junk += $(dgemm_c_objs) $(dgemm_riscv_objs) \ $(dgemm_host_bin) $(dgemm_riscv_bin) diff --git a/benchmarks/dgemm/dgemm_main.c b/benchmarks/dgemm/dgemm_main.c index 7fd7dc2..9f28c07 100644 --- a/benchmarks/dgemm/dgemm_main.c +++ b/benchmarks/dgemm/dgemm_main.c @@ -2,75 +2,13 @@ // Double-precision general matrix multiplication benchmark //-------------------------------------------------------------------------- -int ncores = 1; #include "util.h" -//-------------------------------------------------------------------------- -// Macros - -// Set HOST_DEBUG to 1 if you are going to compile this for a host -// machine (ie Athena/Linux) for debug purposes and set HOST_DEBUG -// to 0 if you are compiling with the smips-gcc toolchain. - -#ifndef HOST_DEBUG -#define HOST_DEBUG 0 -#endif - -// Set PREALLOCATE to 1 if you want to preallocate the benchmark -// function before starting stats. If you have instruction/data -// caches and you don't want to count the overhead of misses, then -// you will need to use preallocation. - -#ifndef PREALLOCATE -#define PREALLOCATE 0 -#endif - -// Set SET_STATS to 1 if you want to carve out the piece that actually -// does the computation. - -#ifndef SET_STATS -#define SET_STATS 0 -#endif - //-------------------------------------------------------------------------- // Input/Reference Data #include "dataset1.h" -//-------------------------------------------------------------------------- -// Helper functions - -int verify( long n, const double test[], const double correct[] ) -{ - int i; - for ( i = 0; i < n; i++ ) { - if ( test[i] != correct[i] ) { - return 2; - } - } - return 1; -} - -#if HOST_DEBUG -#include -#include -void printArray( char name[], long n, const double arr[] ) -{ - int i; - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %8.1f ", arr[i] ); - printf( "\n" ); -} -#endif - -void setStats( int enable ) -{ -#if ( !HOST_DEBUG && SET_STATS ) - asm( "mtpcr %0, cr10" : : "r" (enable) ); -#endif -} - //-------------------------------------------------------------------------- // square_dgemm function @@ -162,33 +100,23 @@ int main( int argc, char* argv[] ) double results_data[DATA_SIZE*DATA_SIZE]; // Output the input array - -#if HOST_DEBUG - printArray( "input1", DATA_SIZE*DATA_SIZE, input1_data ); - printArray( "input2", DATA_SIZE*DATA_SIZE, input2_data ); - printArray( "verify", DATA_SIZE*DATA_SIZE, verify_data ); -#endif - - // If needed we preallocate everything in the caches + printDoubleArray( "input1", DATA_SIZE*DATA_SIZE, input1_data ); + printDoubleArray( "input2", DATA_SIZE*DATA_SIZE, input2_data ); + printDoubleArray( "verify", DATA_SIZE*DATA_SIZE, verify_data ); #if PREALLOCATE + // If needed we preallocate everything in the caches square_dgemm( DATA_SIZE, input1_data, input2_data, results_data ); #endif // Do the dgemm - setStats(1); square_dgemm( DATA_SIZE, input1_data, input2_data, results_data ); setStats(0); // Print out the results - -#if HOST_DEBUG - printArray( "results", DATA_SIZE*DATA_SIZE, results_data ); -#endif + printDoubleArray( "results", DATA_SIZE*DATA_SIZE, results_data ); // Check the results - - finishTest(verify( DATA_SIZE*DATA_SIZE, results_data, verify_data )); - + return verifyDouble( DATA_SIZE*DATA_SIZE, results_data, verify_data ); } diff --git a/benchmarks/dhrystone/bmark.mk b/benchmarks/dhrystone/bmark.mk index 6e45706..06b4ab8 100644 --- a/benchmarks/dhrystone/bmark.mk +++ b/benchmarks/dhrystone/bmark.mk @@ -11,6 +11,7 @@ dhrystone_c_src = \ dhrystone_main.c \ dhrystone.c \ + syscalls.c \ dhrystone_riscv_src = \ crt.S \ diff --git a/benchmarks/dhrystone/dhrystone.h b/benchmarks/dhrystone/dhrystone.h index 8abb874..f274c57 100644 --- a/benchmarks/dhrystone/dhrystone.h +++ b/benchmarks/dhrystone/dhrystone.h @@ -381,13 +381,9 @@ extern clock_t clock(); #define HZ 976563 #define Too_Small_Time 50 -#define rdcycle() ({ \ - long __x; \ - asm volatile("rdcycle %0; srl %0, %0, 10" : "=r"(__x)); \ - __x; }) #define CLOCK_TYPE "rdcycle()" -#define Start_Timer() Begin_Time = rdcycle() -#define Stop_Timer() End_Time = rdcycle() +#define Start_Timer() Begin_Time = rdcycle()/1024 +#define Stop_Timer() End_Time = rdcycle()/1024 #else /* Use times(2) time function unless */ diff --git a/benchmarks/dhrystone/dhrystone_main.c b/benchmarks/dhrystone/dhrystone_main.c index 93b79da..a755743 100644 --- a/benchmarks/dhrystone/dhrystone_main.c +++ b/benchmarks/dhrystone/dhrystone_main.c @@ -8,9 +8,6 @@ #include "dhrystone.h" -int ncores = 1; -#include "util.h" - //-------------------------------------------------------------------------- // Macros @@ -47,12 +44,7 @@ int __attribute__((noinline)) do_fprintf(FILE* f, const char* str, ...) } #endif -void setStats( int enable ) -{ -#if ( !HOST_DEBUG && SET_STATS ) - asm( "mtpcr %0, cr10" : : "r" (enable) ); -#endif -} +#include "util.h" #include @@ -299,7 +291,7 @@ int main (int argc, char** argv) do_fprintf (stdout, "\n"); #endif - finishTest(1); + return 0; } diff --git a/benchmarks/median/bmark.mk b/benchmarks/median/bmark.mk index 31c853a..b489a67 100644 --- a/benchmarks/median/bmark.mk +++ b/benchmarks/median/bmark.mk @@ -11,6 +11,7 @@ median_c_src = \ median_main.c \ median.c \ + syscalls.c \ median_riscv_src = \ crt.S \ @@ -24,7 +25,7 @@ $(median_host_bin): $(median_c_src) median_riscv_bin = median.riscv $(median_riscv_bin): $(median_c_objs) $(median_riscv_objs) - $(RISCV_LINK) $(median_c_objs) $(median_riscv_objs) -o $(median_riscv_bin) + $(RISCV_LINK) $(median_c_objs) $(median_riscv_objs) -o $(median_riscv_bin) $(RISCV_LINK_OPTS) junk += $(median_c_objs) $(median_riscv_objs) \ $(median_host_bin) $(median_riscv_bin) diff --git a/benchmarks/median/median_main.c b/benchmarks/median/median_main.c index 0691bec..7d50f76 100644 --- a/benchmarks/median/median_main.c +++ b/benchmarks/median/median_main.c @@ -8,75 +8,15 @@ // dataset1.h You should not change anything except the // HOST_DEBUG and PREALLOCATE macros for your timing run. -#include "median.h" - -int ncores = 1; #include "util.h" -//-------------------------------------------------------------------------- -// Macros - -// Set HOST_DEBUG to 1 if you are going to compile this for a host -// machine (ie Athena/Linux) for debug purposes and set HOST_DEBUG -// to 0 if you are compiling with the smips-gcc toolchain. - -#ifndef HOST_DEBUG -#define HOST_DEBUG 0 -#endif - -// Set PREALLOCATE to 1 if you want to preallocate the benchmark -// function before starting stats. If you have instruction/data -// caches and you don't want to count the overhead of misses, then -// you will need to use preallocation. - -#ifndef PREALLOCATE -#define PREALLOCATE 0 -#endif - -// Set SET_STATS to 1 if you want to carve out the piece that actually -// does the computation. - -#ifndef SET_STATS -#define SET_STATS 0 -#endif +#include "median.h" //-------------------------------------------------------------------------- // Input/Reference Data #include "dataset1.h" -//-------------------------------------------------------------------------- -// Helper functions - -int verify( int n, int test[], int correct[] ) -{ - int i; - for ( i = 0; i < n; i++ ) { - if ( test[i] != correct[i] ) { - return 2; - } - } - return 1; -} - -#if HOST_DEBUG -void printArray( char name[], int n, int arr[] ) -{ - int i; - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3d ", arr[i] ); - printf( "\n" ); -} -#endif - -void setStats( int enable ) -{ -#if ( !HOST_DEBUG && SET_STATS ) - asm( "mtpcr %0, cr10" : : "r" (enable) ); -#endif -} - //-------------------------------------------------------------------------- // Main @@ -85,36 +25,22 @@ int main( int argc, char* argv[] ) int results_data[DATA_SIZE]; // Output the input array - -#if HOST_DEBUG printArray( "input", DATA_SIZE, input_data ); printArray( "verify", DATA_SIZE, verify_data ); -#endif +#if PREALLOCATE // If needed we preallocate everything in the caches - -#if ( !HOST_DEBUG && PREALLOCATE ) median( DATA_SIZE, input_data, results_data ); #endif // Do the filter - -#if HOST_DEBUG - median( DATA_SIZE, input_data, results_data ); -#else setStats(1); median( DATA_SIZE, input_data, results_data ); setStats(0); -#endif // Print out the results - -#if HOST_DEBUG printArray( "results", DATA_SIZE, results_data ); -#endif // Check the results - - finishTest(verify( DATA_SIZE, results_data, verify_data )); - + return verify( DATA_SIZE, results_data, verify_data ); } diff --git a/benchmarks/mt-matmul/bmark.mk b/benchmarks/mt-matmul/bmark.mk index 7749951..4b7fcb7 100644 --- a/benchmarks/mt-matmul/bmark.mk +++ b/benchmarks/mt-matmul/bmark.mk @@ -10,9 +10,10 @@ mt_matmul_c_src = \ mt-matmul.c \ + syscalls.c \ mt_matmul_riscv_src = \ - crt-mt.S \ + crt.S \ mt_matmul_c_objs = $(patsubst %.c, %.o, $(mt_matmul_c_src)) mt_matmul_riscv_objs = $(patsubst %.S, %.o, $(mt_matmul_riscv_src)) @@ -23,7 +24,7 @@ $(mt_matmul_host_bin) : $(mt_matmul_c_src) mt_matmul_riscv_bin = mt-matmul.riscv $(mt_matmul_riscv_bin) : $(mt_matmul_c_objs) $(mt_matmul_riscv_objs) - $(RISCV_LINK_MT) $(mt_matmul_c_objs) $(mt_matmul_riscv_objs) $(RISCV_LINK_OPTS) -o $(mt_matmul_riscv_bin) + $(RISCV_LINK) $(mt_matmul_c_objs) $(mt_matmul_riscv_objs) $(RISCV_LINK_OPTS) -o $(mt_matmul_riscv_bin) junk += $(mt_matmul_c_objs) $(mt_matmul_riscv_objs) \ $(mt_matmul_host_bin) $(mt_matmul_riscv_bin) diff --git a/benchmarks/mt-matmul/mt-matmul.c b/benchmarks/mt-matmul/mt-matmul.c index 93f8ea9..e795b50 100644 --- a/benchmarks/mt-matmul/mt-matmul.c +++ b/benchmarks/mt-matmul/mt-matmul.c @@ -25,7 +25,7 @@ //-------------------------------------------------------------------------- // Input/Reference Data -typedef float data_t; +typedef double data_t; #include "dataset.h" @@ -34,6 +34,7 @@ typedef float data_t; __thread unsigned long coreid; unsigned long ncores; +#define ncores ncores #include "util.h" @@ -48,41 +49,6 @@ unsigned long ncores; stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ } while(0) - -//-------------------------------------------------------------------------- -// Helper functions - -void printArray( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n", - i, (long)test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - //-------------------------------------------------------------------------- // matmul function @@ -139,8 +105,11 @@ void thread_entry(int cid, int nc) // verify - verify(ARRAY_SIZE, results_data, verify_data); - + int res = verifyDouble(ARRAY_SIZE, results_data, verify_data); + if (res) + exit(res); + +#if 0 // clear results from the first trial size_t i; if (coreid == 0) @@ -159,9 +128,11 @@ void thread_entry(int cid, int nc) #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); + res = verify(ARRAY_SIZE, results_data, verify_data); + if (res) + exit(res); barrier(); +#endif exit(0); } - diff --git a/benchmarks/mt-vvadd/bmark.mk b/benchmarks/mt-vvadd/bmark.mk index 1f8b3ed..72b2d34 100644 --- a/benchmarks/mt-vvadd/bmark.mk +++ b/benchmarks/mt-vvadd/bmark.mk @@ -13,7 +13,7 @@ mt_vvadd_c_src = \ syscalls.c \ mt_vvadd_riscv_src = \ - crt-mt.S \ + crt.S \ mt_vvadd_c_objs = $(patsubst %.c, %.o, $(mt_vvadd_c_src)) mt_vvadd_riscv_objs = $(patsubst %.S, %.o, $(mt_vvadd_riscv_src)) @@ -24,7 +24,7 @@ $(mt_vvadd_host_bin) : $(mt_vvadd_c_src) mt_vvadd_riscv_bin = mt-vvadd.riscv $(mt_vvadd_riscv_bin) : $(mt_vvadd_c_objs) $(mt_vvadd_riscv_objs) - $(RISCV_LINK_MT) $(mt_vvadd_c_objs) $(mt_vvadd_riscv_objs) $(RISCV_LINK_OPTS) -o $(mt_vvadd_riscv_bin) + $(RISCV_LINK) $(mt_vvadd_c_objs) $(mt_vvadd_riscv_objs) $(RISCV_LINK_OPTS) -o $(mt_vvadd_riscv_bin) junk += $(mt_vvadd_c_objs) $(mt_vvadd_riscv_objs) \ $(mt_vvadd_host_bin) $(mt_vvadd_riscv_bin) diff --git a/benchmarks/mt-vvadd/mt-vvadd.c b/benchmarks/mt-vvadd/mt-vvadd.c index 497b9bb..60aa2e7 100644 --- a/benchmarks/mt-vvadd/mt-vvadd.c +++ b/benchmarks/mt-vvadd/mt-vvadd.c @@ -24,7 +24,7 @@ //-------------------------------------------------------------------------- // Input/Reference Data -typedef float data_t; +typedef double data_t; #include "dataset.h" @@ -33,6 +33,7 @@ typedef float data_t; __thread unsigned long coreid; unsigned long ncores; +#define ncores ncores #include "util.h" @@ -47,41 +48,6 @@ unsigned long ncores; stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \ } while(0) - -//-------------------------------------------------------------------------- -// Helper functions - -void printArray( char name[], int n, data_t arr[] ) -{ - int i; - if (coreid != 0) - return; - - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %4ld ", (long) arr[i] ); - printf( "\n" ); -} - -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) -{ - if (coreid != 0) - return; - - size_t i; - for (i = 0; i < n; i++) - { - if (test[i] != correct[i]) - { - printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n", - i, (long) test[i], i, (long)correct[i]); - exit(-1); - } - } - - return; -} - //-------------------------------------------------------------------------- // vvadd function @@ -136,8 +102,11 @@ void thread_entry(int cid, int nc) // verify - verify(DATA_SIZE, results_data, verify_data); - + int res = verifyDouble(DATA_SIZE, results_data, verify_data); + if (res) + exit(res); + +#if 0 // reset results from the first trial if (coreid == 0) { @@ -145,21 +114,22 @@ void thread_entry(int cid, int nc) results_data[i] = input1_data[i]; } barrier(); - - + // Execute your faster vvadd barrier(); stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier()); #ifdef DEBUG - printArray("results: ", DATA_SIZE, results_data); - printArray("verify : ", DATA_SIZE, verify_data); + printDoubleArray("results: ", DATA_SIZE, results_data); + printDoubleArray("verify : ", DATA_SIZE, verify_data); #endif // verify - verify(DATA_SIZE, results_data, verify_data); + res = verifyDouble(DATA_SIZE, results_data, verify_data); + if (res) + exit(res); barrier(); +#endif exit(0); } - diff --git a/benchmarks/multiply/bmark.mk b/benchmarks/multiply/bmark.mk index d6114a1..93ba67f 100644 --- a/benchmarks/multiply/bmark.mk +++ b/benchmarks/multiply/bmark.mk @@ -11,6 +11,7 @@ multiply_c_src = \ multiply_main.c \ multiply.c \ + syscalls.c \ multiply_riscv_src = \ crt.S \ @@ -24,7 +25,7 @@ $(multiply_host_bin): $(multiply_c_src) multiply_riscv_bin = multiply.riscv $(multiply_riscv_bin): $(multiply_c_objs) $(multiply_riscv_objs) - $(RISCV_LINK) $(multiply_c_objs) $(multiply_riscv_objs) -o $(multiply_riscv_bin) + $(RISCV_LINK) $(multiply_c_objs) $(multiply_riscv_objs) -o $(multiply_riscv_bin) $(RISCV_LINK_OPTS) junk += $(multiply_c_objs) $(multiply_riscv_objs) \ $(multiply_host_bin) $(multiply_riscv_bin) diff --git a/benchmarks/multiply/multiply_main.c b/benchmarks/multiply/multiply_main.c index ca359eb..037de2f 100644 --- a/benchmarks/multiply/multiply_main.c +++ b/benchmarks/multiply/multiply_main.c @@ -8,84 +8,15 @@ // dataset1.h You should not change anything except the // HOST_DEBUG and VERIFY macros for your timing run. -#include "multiply.h" - -int ncores = 1; #include "util.h" -//-------------------------------------------------------------------------- -// Macros - -// Set HOST_DEBUG to 1 if you are going to compile this for a host -// machine (ie Athena/Linux) for debug purposes and set HOST_DEBUG -// to 0 if you are compiling with the smips-gcc toolchain. - -#ifndef HOST_DEBUG -#define HOST_DEBUG 0 -#endif - -// Set PREALLOCATE to 1 if you want to preallocate the benchmark -// function before starting stats. If you have instruction/data -// caches and you don't want to count the overhead of misses, then -// you will need to use preallocation. - -#ifndef PREALLOCATE -#define PREALLOCATE 0 -#endif - -// Set VERIFY to 1 if you want the program to check that the sort -// function returns the right answer. When you are doing your -// benchmarking you should set this to 0 so that the verification -// is not included in your timing. - -#ifndef VERIFY -#define VERIFY 1 -#endif - -// Set SET_STATS to 1 if you want to carve out the piece that actually -// does the computation. - -#ifndef SET_STATS -#define SET_STATS 0 -#endif +#include "multiply.h" //-------------------------------------------------------------------------- // Input/Reference Data #include "dataset1.h" -//-------------------------------------------------------------------------- -// Helper functions - -int verify( int n, int test[], int correct[] ) -{ - int i; - for ( i = 0; i < n; i++ ) { - if ( test[i] != correct[i] ) { - return 2; - } - } - return 1; -} - -#if HOST_DEBUG -void printArray( char name[], int n, int arr[] ) -{ - int i; - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3d ", arr[i] ); - printf( "\n" ); -} -#endif - -void setStats( int enable ) -{ -#if ( !HOST_DEBUG && SET_STATS ) - asm( "mtpcr %0, cr10" : : "r" (enable) ); -#endif -} - //-------------------------------------------------------------------------- // Main @@ -95,42 +26,27 @@ int main( int argc, char* argv[] ) int results_data[DATA_SIZE]; // Output the input arrays - -#if HOST_DEBUG printArray( "input1", DATA_SIZE, input_data1 ); printArray( "input2", DATA_SIZE, input_data2 ); printArray( "verify", DATA_SIZE, verify_data ); -#endif -#if ( !HOST_DEBUG && PREALLOCATE ) +#if PREALLOCATE for (i = 0; i < DATA_SIZE; i++) { results_data[i] = multiply( input_data1[i], input_data2[i] ); } #endif -#if HOST_DEBUG - for (i = 0; i < DATA_SIZE; i++) - { - results_data[i] = multiply( input_data1[i], input_data2[i] ); - } -#else setStats(1); for (i = 0; i < DATA_SIZE; i++) { results_data[i] = multiply( input_data1[i], input_data2[i] ); } setStats(0); -#endif // Print out the results - -#if HOST_DEBUG printArray( "results", DATA_SIZE, results_data ); -#endif // Check the results - - finishTest(verify( DATA_SIZE, results_data, verify_data )); - + return verify( DATA_SIZE, results_data, verify_data ); } diff --git a/benchmarks/qsort/bmark.mk b/benchmarks/qsort/bmark.mk index cdc0545..4b39d96 100644 --- a/benchmarks/qsort/bmark.mk +++ b/benchmarks/qsort/bmark.mk @@ -10,6 +10,7 @@ qsort_c_src = \ qsort_main.c \ + syscalls.c \ qsort_riscv_src = \ crt.S \ @@ -23,7 +24,7 @@ $(qsort_host_bin) : $(qsort_c_src) qsort_riscv_bin = qsort.riscv $(qsort_riscv_bin) : $(qsort_c_objs) $(qsort_riscv_objs) - $(RISCV_LINK) $(qsort_c_objs) $(qsort_riscv_objs) -o $(qsort_riscv_bin) + $(RISCV_LINK) $(qsort_c_objs) $(qsort_riscv_objs) -o $(qsort_riscv_bin) $(RISCV_LINK_OPTS) junk += $(qsort_c_objs) $(qsort_riscv_objs) \ $(qsort_host_bin) $(qsort_riscv_bin) diff --git a/benchmarks/qsort/qsort_main.c b/benchmarks/qsort/qsort_main.c index e61eef2..9633356 100644 --- a/benchmarks/qsort/qsort_main.c +++ b/benchmarks/qsort/qsort_main.c @@ -11,36 +11,8 @@ // processor simulator itself. You should not change anything except // the HOST_DEBUG and PREALLOCATE macros for your timing run. -int ncores = 1; #include "util.h" -//-------------------------------------------------------------------------- -// Macros - -// Set HOST_DEBUG to 1 if you are going to compile this for a host -// machine (ie Athena/Linux) for debug purposes and set HOST_DEBUG -// to 0 if you are compiling with the smips-gcc toolchain. - -#ifndef HOST_DEBUG -#define HOST_DEBUG 0 -#endif - -// Set PREALLOCATE to 1 if you want to preallocate the benchmark -// function before starting stats. If you have instruction/data -// caches and you don't want to count the overhead of misses, then -// you will need to use preallocation. - -#ifndef PREALLOCATE -#define PREALLOCATE 0 -#endif - -// Set SET_STATS to 1 if you want to carve out the piece that actually -// does the computation. - -#ifndef SET_STATS -#define SET_STATS 0 -#endif - // The INSERTION_THRESHOLD is the size of the subarray when the // algorithm switches to using an insertion sort instead of // quick sort. @@ -61,38 +33,6 @@ int ncores = 1; #include "dataset1.h" -//-------------------------------------------------------------------------- -// Helper functions - -int verify( int n, int test[], int correct[] ) -{ - int i; - for ( i = 0; i < n; i++ ) { - if ( test[i] != correct[i] ) { - return 2; - } - } - return 1; -} - -#if HOST_DEBUG -void printArray( char name[], int n, int arr[] ) -{ - int i; - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3d ", arr[i] ); - printf( "\n" ); -} -#endif - -void setStats( int enable ) -{ -#if ( !HOST_DEBUG && SET_STATS ) - asm( "mtpcr %0, cr10" : : "r" (enable) ); -#endif -} - //-------------------------------------------------------------------------- // Quicksort function @@ -195,34 +135,23 @@ void sort( int n, int arr[] ) int main( int argc, char* argv[] ) { - // Output the input array - -#if HOST_DEBUG printArray( "input", DATA_SIZE, input_data ); printArray( "verify", DATA_SIZE, verify_data ); -#endif - - // If needed we preallocate everything in the caches #if PREALLOCATE + // If needed we preallocate everything in the caches sort( DATA_SIZE, input_data ); #endif // Do the sort - setStats(1); sort( DATA_SIZE, input_data ); setStats(0); // Print out the results - -#if HOST_DEBUG printArray( "test", DATA_SIZE, input_data ); -#endif // Check the results - - finishTest(verify( DATA_SIZE, input_data, verify_data )); - + return verify( DATA_SIZE, input_data, verify_data ); } diff --git a/benchmarks/spmv/bmark.mk b/benchmarks/spmv/bmark.mk index 5ab9cc0..dcfdb19 100644 --- a/benchmarks/spmv/bmark.mk +++ b/benchmarks/spmv/bmark.mk @@ -10,6 +10,7 @@ spmv_c_src = \ spmv_main.c \ + syscalls.c \ spmv_riscv_src = \ crt.S \ @@ -23,7 +24,7 @@ $(spmv_host_bin) : $(spmv_c_src) spmv_riscv_bin = spmv.riscv $(spmv_riscv_bin) : $(spmv_c_objs) $(spmv_riscv_objs) - $(RISCV_LINK) $(spmv_c_objs) $(spmv_riscv_objs) -o $(spmv_riscv_bin) + $(RISCV_LINK) $(spmv_c_objs) $(spmv_riscv_objs) -o $(spmv_riscv_bin) $(RISCV_LINK_OPTS) junk += $(spmv_c_objs) $(spmv_riscv_objs) \ $(spmv_host_bin) $(spmv_riscv_bin) diff --git a/benchmarks/spmv/spmv_main.c b/benchmarks/spmv/spmv_main.c index d765ca2..44cdc99 100644 --- a/benchmarks/spmv/spmv_main.c +++ b/benchmarks/spmv/spmv_main.c @@ -2,75 +2,13 @@ // Double-precision general matrix multiplication benchmark //-------------------------------------------------------------------------- -int ncores = 1; #include "util.h" -//-------------------------------------------------------------------------- -// Macros - -// Set HOST_DEBUG to 1 if you are going to compile this for a host -// machine (ie Athena/Linux) for debug purposes and set HOST_DEBUG -// to 0 if you are compiling with the smips-gcc toolchain. - -#ifndef HOST_DEBUG -#define HOST_DEBUG 0 -#endif - -// Set PREALLOCATE to 1 if you want to preallocate the benchmark -// function before starting stats. If you have instruction/data -// caches and you don't want to count the overhead of misses, then -// you will need to use preallocation. - -#ifndef PREALLOCATE -#define PREALLOCATE 0 -#endif - -// Set SET_STATS to 1 if you want to carve out the piece that actually -// does the computation. - -#ifndef SET_STATS -#define SET_STATS 0 -#endif - //-------------------------------------------------------------------------- // Input/Reference Data #include "dataset1.h" -//-------------------------------------------------------------------------- -// Helper functions - -int verify( long n, const double test[], const double correct[] ) -{ - int i; - for ( i = 0; i < n; i++ ) { - if ( test[i] != correct[i] ) { - return 2; - } - } - return 1; -} - -#if HOST_DEBUG -#include -#include -void printArray( char name[], long n, const double arr[] ) -{ - int i; - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %8.1f ", arr[i] ); - printf( "\n" ); -} -#endif - -void setStats( int enable ) -{ -#if ( !HOST_DEBUG && SET_STATS ) - asm( "mtpcr %0, cr10" : : "r" (enable) ); -#endif -} - void spmv(int r, const double* val, const int* idx, const double* x, const int* ptr, double* y) { @@ -108,5 +46,5 @@ int main( int argc, char* argv[] ) spmv(R, val, idx, x, ptr, y); setStats(0); - finishTest(verify(R, y, verify_data)); + return verifyDouble(R, y, verify_data); } diff --git a/benchmarks/towers/bmark.mk b/benchmarks/towers/bmark.mk index 0c16a81..b16bf04 100644 --- a/benchmarks/towers/bmark.mk +++ b/benchmarks/towers/bmark.mk @@ -10,6 +10,7 @@ towers_c_src = \ towers_main.c \ + syscalls.c \ towers_riscv_src = \ crt.S \ @@ -23,7 +24,7 @@ $(towers_host_bin) : $(towers_c_src) towers_riscv_bin = towers.riscv $(towers_riscv_bin) : $(towers_c_objs) $(towers_riscv_objs) - $(RISCV_LINK) $(towers_c_objs) $(towers_riscv_objs) -o $(towers_riscv_bin) + $(RISCV_LINK) $(towers_c_objs) $(towers_riscv_objs) -o $(towers_riscv_bin) $(RISCV_LINK_OPTS) junk += $(towers_c_objs) $(towers_riscv_objs) \ $(towers_host_bin) $(towers_riscv_bin) diff --git a/benchmarks/towers/towers_main.c b/benchmarks/towers/towers_main.c index 724b73b..9f60017 100644 --- a/benchmarks/towers/towers_main.c +++ b/benchmarks/towers/towers_main.c @@ -16,50 +16,12 @@ // smips processor simulator itself. You should not change anything except // the HOST_DEBUG and PREALLOCATE macros for your timing run. -int ncores = 1; #include "util.h" -//-------------------------------------------------------------------------- -// Macros - -// Set HOST_DEBUG to 1 if you are going to compile this for a host -// machine (ie Athena/Linux) for debug purposes and set HOST_DEBUG -// to 0 if you are compiling with the smips-gcc toolchain. - -#ifndef HOST_DEBUG -#define HOST_DEBUG 0 -#endif - -// Set PREALLOCATE to 1 if you want to preallocate the benchmark -// function before starting stats. If you have instruction/data -// caches and you don't want to count the overhead of misses, then -// you will need to use preallocation. - -#ifndef PREALLOCATE -#define PREALLOCATE 0 -#endif - -// Set SET_STATS to 1 if you want to carve out the piece that actually -// does the computation. - -#ifndef SET_STATS -#define SET_STATS 0 -#endif - // This is the number of discs in the puzzle. #define NUM_DISCS 7 -//-------------------------------------------------------------------------- -// Helper functions - -void setStats( int enable ) -{ -#if ( !HOST_DEBUG && SET_STATS ) - asm( "mtpcr %0, cr10" : : "r" (enable) ); -#endif -} - //-------------------------------------------------------------------------- // List data structure and functions @@ -278,7 +240,7 @@ int towers_verify( struct Towers* this ) return 6; } - return 1; + return 0; } //-------------------------------------------------------------------------- @@ -323,8 +285,6 @@ int main( int argc, char* argv[] ) #endif // Check the results - - finishTest( towers_verify( &towers ) ); - + return towers_verify( &towers ); } diff --git a/benchmarks/vec-cmplxmult/vec_cmplxmult_main.c b/benchmarks/vec-cmplxmult/vec_cmplxmult_main.c index 3251ef9..6715c45 100644 --- a/benchmarks/vec-cmplxmult/vec_cmplxmult_main.c +++ b/benchmarks/vec-cmplxmult/vec_cmplxmult_main.c @@ -53,9 +53,10 @@ #include #else void printstr(const char*); -void exit(); #endif +#include "util.h" + //-------------------------------------------------------------------------- // Complex Value Structs diff --git a/benchmarks/vec-matmul/vec_matmul_main.c b/benchmarks/vec-matmul/vec_matmul_main.c index 5de377e..6613902 100644 --- a/benchmarks/vec-matmul/vec_matmul_main.c +++ b/benchmarks/vec-matmul/vec_matmul_main.c @@ -5,13 +5,9 @@ // This benchmark multiplies two 2-D arrays together and writes the results to // a third vector. The input data (and reference data) should be generated // using the matmul_gendata.pl perl script and dumped to a file named -// dataset.h. The riscv-gcc toolchain does not support system calls so printf's -// can only be used on a host system, not on the riscv-v processor simulator -// itself. -// -// HOWEVER: printstr() and printhex() are provided, for a primitive form of -// printing strings and hexadecimal values to stdout. +// dataset.h. +#include "util.h" // Choose which implementation you wish to test... but leave only one on! // (only the first one will be executed). @@ -19,45 +15,6 @@ //#define SCALAR_ASM #define VT_ASM -//-------------------------------------------------------------------------- -// Macros - -// Set HOST_DEBUG to 1 if you are going to compile this for a host -// machine (ie Athena/Linux) for debug purposes and set HOST_DEBUG -// to 0 if you are compiling with the smips-gcc toolchain. - -#ifndef HOST_DEBUG -#define HOST_DEBUG 0 -#endif - -// Set PREALLOCATE to 1 if you want to preallocate the benchmark -// function before starting stats. If you have instruction/data -// caches and you don't want to count the overhead of misses, then -// you will need to use preallocation. - -#ifndef PREALLOCATE -#define PREALLOCATE 0 -#endif - -// Set SET_STATS to 1 if you want to carve out the piece that actually -// does the computation. - -#ifndef SET_STATS -#define SET_STATS 0 -#endif - -//-------------------------------------------------------------------------- -// Host Platform Includes - -#if HOST_DEBUG - #include - #include -#else -void printstr(const char*); -void exit(); -#endif - - //-------------------------------------------------------------------------- // Input/Reference Data @@ -86,18 +43,6 @@ int verify( int n, float test[], float correct[] ) return 1; } -#if HOST_DEBUG -void printArray( char name[], int n, float arr[] ) -{ - int i; - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %03.2f ", arr[i] ); - printf( "\n" ); -} -#endif - - void finishTest( int correct, long long num_cycles, long long num_retired ) { int toHostValue = correct; diff --git a/benchmarks/vec-vvadd/vec_vvadd_main.c b/benchmarks/vec-vvadd/vec_vvadd_main.c index c08e41a..850bec6 100644 --- a/benchmarks/vec-vvadd/vec_vvadd_main.c +++ b/benchmarks/vec-vvadd/vec_vvadd_main.c @@ -4,13 +4,7 @@ // // This benchmark uses adds to vectors and writes the results to a third // vector. The input data (and reference data) should be generated using the -// vvadd_gendata.pl perl script and dumped to a file named dataset.h. The -// riscv-gcc toolchain does not support system calls so printf's can only be -// used on a host system, not on the riscv-v processor simulator itself. -// -// HOWEVER: printstr() and printhex() are provided, for a primitive form of -// printing strings and hexadecimal values to stdout. - +// vvadd_gendata.pl perl script and dumped to a file named dataset.h. // Choose which implementation you wish to test... but leave only one on! // (only the first one will be executed). @@ -18,45 +12,6 @@ //#define SCALAR_ASM #define VT_ASM -//-------------------------------------------------------------------------- -// Macros - -// Set HOST_DEBUG to 1 if you are going to compile this for a host -// machine (ie Athena/Linux) for debug purposes and set HOST_DEBUG -// to 0 if you are compiling with the smips-gcc toolchain. - -#ifndef HOST_DEBUG -#define HOST_DEBUG 0 -#endif - -// Set PREALLOCATE to 1 if you want to preallocate the benchmark -// function before starting stats. If you have instruction/data -// caches and you don't want to count the overhead of misses, then -// you will need to use preallocation. - -#ifndef PREALLOCATE -#define PREALLOCATE 0 -#endif - -// Set SET_STATS to 1 if you want to carve out the piece that actually -// does the computation. - -#ifndef SET_STATS -#define SET_STATS 0 -#endif - -//-------------------------------------------------------------------------- -// Platform Specific Includes - -#if HOST_DEBUG - #include - #include -#else -void printstr(const char*); -void exit(); -#endif - - //-------------------------------------------------------------------------- // Input/Reference Data @@ -86,18 +41,6 @@ int verify( int n, float test[], float correct[] ) } return 1; } - -#if HOST_DEBUG -void printArray( char name[], int n, float arr[] ) -{ - int i; - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %03.2f ", arr[i] ); - printf( "\n" ); -} -#endif - void finishTest( int correct, long long num_cycles, long long num_retired ) { diff --git a/benchmarks/vvadd/bmark.mk b/benchmarks/vvadd/bmark.mk index d03cb96..5ab99de 100644 --- a/benchmarks/vvadd/bmark.mk +++ b/benchmarks/vvadd/bmark.mk @@ -10,6 +10,7 @@ vvadd_c_src = \ vvadd_main.c \ + syscalls.c \ vvadd_riscv_src = \ crt.S \ @@ -23,7 +24,7 @@ $(vvadd_host_bin) : $(vvadd_c_src) vvadd_riscv_bin = vvadd.riscv $(vvadd_riscv_bin) : $(vvadd_c_objs) $(vvadd_riscv_objs) - $(RISCV_LINK) $(vvadd_c_objs) $(vvadd_riscv_objs) -o $(vvadd_riscv_bin) + $(RISCV_LINK) $(vvadd_c_objs) $(vvadd_riscv_objs) -o $(vvadd_riscv_bin) $(RISCV_LINK_OPTS) junk += $(vvadd_c_objs) $(vvadd_riscv_objs) \ $(vvadd_host_bin) $(vvadd_riscv_bin) diff --git a/benchmarks/vvadd/vvadd_main.c b/benchmarks/vvadd/vvadd_main.c index 0be3051..9c47617 100644 --- a/benchmarks/vvadd/vvadd_main.c +++ b/benchmarks/vvadd/vvadd_main.c @@ -11,87 +11,13 @@ // anything except the HOST_DEBUG and PREALLOCATE macros for your timing // runs. -int ncores = 1; #include "util.h" - -//-------------------------------------------------------------------------- -// Macros - -// Set HOST_DEBUG to 1 if you are going to compile this for a host -// machine (ie Athena/Linux) for debug purposes and set HOST_DEBUG -// to 0 if you are compiling with the smips-gcc toolchain. - -#ifndef HOST_DEBUG -#define HOST_DEBUG 0 -#endif - -// Set PREALLOCATE to 1 if you want to preallocate the benchmark -// function before starting stats. If you have instruction/data -// caches and you don't want to count the overhead of misses, then -// you will need to use preallocation. - -#ifndef PREALLOCATE -#define PREALLOCATE 0 -#endif - -// Set SET_STATS to 1 if you want to carve out the piece that actually -// does the computation. - -#ifndef SET_STATS -#define SET_STATS 0 -#endif //-------------------------------------------------------------------------- // Input/Reference Data #include "dataset1.h" -//-------------------------------------------------------------------------- -// Helper functions - -int verify( int n, int test[], int correct[] ) -{ - int i; - for ( i = 0; i < n; i++ ) { - if ( test[i] != correct[i] ) { - return 2; - } - } - return 1; -} - -#if HOST_DEBUG -void printArray( char name[], int n, int arr[] ) -{ - int i; - printf( " %10s :", name ); - for ( i = 0; i < n; i++ ) - printf( " %3d ", arr[i] ); - printf( "\n" ); -} -#endif - -//void finishTest( int toHostValue ) -//{ -//#if HOST_DEBUG -// if ( toHostValue == 1 ) -// printf( "*** PASSED ***\n" ); -// else -// printf( "*** FAILED *** (tohost = %d)\n", toHostValue ); -// exit(0); -//#else -// asm( "mtpcr %0, tohost" : : "r" (toHostValue) ); -// while ( 1 ) { } -//#endif -//} - -void setStats( int enable ) -{ -#if ( !HOST_DEBUG && SET_STATS ) - asm( "mtpcr %0, cr10" : : "r" (enable) ); -#endif -} - //-------------------------------------------------------------------------- // vvadd function @@ -110,33 +36,23 @@ int main( int argc, char* argv[] ) int results_data[DATA_SIZE]; // Output the input array - -#if HOST_DEBUG printArray( "input1", DATA_SIZE, input1_data ); printArray( "input2", DATA_SIZE, input2_data ); printArray( "verify", DATA_SIZE, verify_data ); -#endif - - // If needed we preallocate everything in the caches #if PREALLOCATE + // If needed we preallocate everything in the caches vvadd( DATA_SIZE, input1_data, input2_data, results_data ); #endif // Do the vvadd - setStats(1); vvadd( DATA_SIZE, input1_data, input2_data, results_data ); setStats(0); // Print out the results - -#if HOST_DEBUG printArray( "results", DATA_SIZE, results_data ); -#endif // Check the results - - finishTest(verify( DATA_SIZE, results_data, verify_data )); - + return verify( DATA_SIZE, results_data, verify_data ); } diff --git a/env b/env index d4b98ba..35d0a98 160000 --- a/env +++ b/env @@ -1 +1 @@ -Subproject commit d4b98bac3c304e3bed612c79680fc2226ddb7e9a +Subproject commit 35d0a989a24fc0b9913760dcbead775eb8c0f29e -- 2.30.2