HOST_COMP = gcc $(HOST_OPTS)
RISCV_GCC = riscv-gcc
-RISCV_GCC_OPTS = -Wa,-march=RVIMAFDXhwacha -std=gnu99 -O2 -nostdlib -nostartfiles -ffast-math
+RISCV_GCC_OPTS = -Wa,-march=RVIMAFDXhwacha -std=gnu99 -O2 -ffast-math
RISCV_LINK = riscv-gcc -T $(bmarkdir)/common/test.ld $(incs)
RISCV_LINK_MT = riscv-gcc -T $(bmarkdir)/common/test-mt.ld
-RISCV_LINK_OPTS = -lc
+RISCV_LINK_OPTS = -nostdlib -nostartfiles -ffast-math -lc
RISCV_OBJDUMP = riscv-objdump --disassemble-all --disassemble-zeroes --section=.text --section=.text.startup --section=.data
RISCV_SIM = spike
+++ /dev/null
-#include "encoding.h"
-
- .data
- .globl _heapend
- .globl environ
-_heapend:
- .word 0
-environ:
- .word 0
-
- .text
- .globl _start
-
-_start:
- li x1, 0
- li x2, 0
- li x3, 0
- li x4, 0
- li x5, 0
- li x6, 0
- li x7, 0
- li x8, 0
- li x9, 0
- li x10,0
- li x11,0
- li x12,0
- li x13,0
- li x14,0
- li x15,0
- li x16,0
- li x17,0
- li x18,0
- li x19,0
- li x20,0
- li x21,0
- li x22,0
- li x23,0
- li x24,0
- li x25,0
- li x26,0
- li x27,0
- li x28,0
- li x29,0
- li x30,0
- li x31,0
-
- # enable fp and accelerator
- li a0, SR_EF | SR_EA
- csrs status, a0
-
- ## if that didn't stick, we don't have an FPU, so don't initialize it
- csrr t0, status
- and t0, t0, SR_EF
- beqz t0, 1f
-
- fssr x0
- fmv.s.x f0, x0
- fmv.s.x f1, x0
- fmv.s.x f2, x0
- fmv.s.x f3, x0
- fmv.s.x f4, x0
- fmv.s.x f5, x0
- fmv.s.x f6, x0
- fmv.s.x f7, x0
- fmv.s.x f8, x0
- fmv.s.x f9, x0
- fmv.s.x f10,x0
- fmv.s.x f11,x0
- fmv.s.x f12,x0
- fmv.s.x f13,x0
- fmv.s.x f14,x0
- fmv.s.x f15,x0
- fmv.s.x f16,x0
- fmv.s.x f17,x0
- fmv.s.x f18,x0
- fmv.s.x f19,x0
- fmv.s.x f20,x0
- fmv.s.x f21,x0
- fmv.s.x f22,x0
- fmv.s.x f23,x0
- fmv.s.x f24,x0
- fmv.s.x f25,x0
- fmv.s.x f26,x0
- fmv.s.x f27,x0
- fmv.s.x f28,x0
- fmv.s.x f29,x0
- fmv.s.x f30,x0
- fmv.s.x f31,x0
-1:
-
-
- # get core id and number of cores
- csrr a0, hartid
- lw a1, 4(zero)
-
- slli a2, a0, 13
- la sp, stacktop
- sub sp, sp, a2
-
- la tp, tlstop
- sub tp, tp, a2
-
- jal thread_entry
-
- .bss
- .globl stacktop
- .globl tlstop
-
- .align 4
- .skip 32768
-stacktop:
- .skip 65536
-tlstop:
fmv.s.x f31,x0
1:
- lui a0, %hi(trap_entry)
- add a0, a0, %lo(trap_entry)
- csrw evec, a0
-
- lui a0, %hi(main)
- add a0, a0, %lo(main)
- csrw epc, a0
-
- # only allow core 0 to proceed
-1:csrr a0, hartid
- bnez a0, 1b
-
- la sp,stacktop
-
- # jmp to main as a user program
- sret
-1:b 1b
-
-.align 4
-.globl trap_entry
-trap_entry: # only check for SYS_exit, otherwise crash out
- li a3, 1337 # magic "bad things" happened error code
- csrr a1, cause
- li a2, 6 # syscall exception number
- bne a1, a2, exit_error
-handle_syscall:
- li a1, 93 # SYS_exit number
- bne v0, a1, exit_error
- li a1, 1 # successful exit code
- move a3, a0
- bne a3, a1, exit_error
- csrw tohost, a1 # exit successfully (tohost == 1)
-1:b 1b
-exit_error:
- sll a3, a3, 1
- or a3, a3, 1
- csrw tohost, a3
-1:b 1b
-
- .bss
- .globl stacktop
-
- .align 4
- .skip 131072
-stacktop:
+ la t0, trap_entry
+ csrw evec, t0
+
+ la tp, _end + 63
+ and tp, tp, -64
+
+ # get core id and number of cores
+ csrr a0, hartid
+ lw a1, 4(zero)
+
+ # give each core a 1KB TLS and a 127KB stack
+#define STKSHIFT 17
+ sll a2, a0, STKSHIFT
+ add tp, tp, a2
+ add sp, a0, 1
+ sll sp, sp, STKSHIFT
+ add sp, sp, tp
+ add tp, tp, 1024
+
+ jal _init
+ unimp
+
+trap_entry:
+ csrw sup0, t0
+ csrw sup1, t1
+ la t0, uarch_insn
+ lw t0, (t0)
+ csrr t1, epc
+ and t1, t1, ~3
+ lw t1, (t1)
+ and t1, t1, t0
+ beq t1, t0, handle_uarch_insn
+
+ # a trap occurred that shouldn't have.
+ li t0, 1337
+ csrw tohost, t0
+1:j 1b
+
+handle_uarch_insn:
+ # we trapped on an illegal uarch-specific CSR. just skip over it.
+ csrr t1, epc
+ add t1, t1, 4
+ csrw epc, t1
+ csrr t0, sup0
+ csrr t1, sup1
+ sret
+
+uarch_insn:
+ csrr x0, uarch0
#include <stdint.h>
#include <string.h>
#include <stdarg.h>
+#include <stdio.h>
+#include <limits.h>
#include <machine/syscall.h>
#include "encoding.h"
-void exit(int code)
+#define static_assert(cond) switch(0) { case 0: case !!(long)(cond): ; }
+
+void syscall(long which, long arg0, long arg1, long arg2)
{
- volatile uint64_t magic_mem[8] = {0};
- magic_mem[0] = SYS_exit;
- magic_mem[1] = code;
+ volatile uint64_t magic_mem[8] __attribute__((aligned(64)));
+ magic_mem[0] = which;
+ magic_mem[1] = arg0;
+ magic_mem[2] = arg1;
+ magic_mem[3] = arg2;
__sync_synchronize();
write_csr(tohost, (long)magic_mem);
- while(1);
+ while (swap_csr(fromhost, 0) == 0);
+}
+
+void exit(int code)
+{
+ write_csr(tohost, (code << 1) | 1);
+ while (1);
}
void printstr(const char* s)
{
- volatile uint64_t magic_mem[8] = {0};
- magic_mem[0] = SYS_write;
- magic_mem[1] = 1;
- magic_mem[2] = (unsigned long)s;
- magic_mem[3] = strlen(s);
- __sync_synchronize();
- write_csr(tohost, (long)magic_mem);
- while (swap_csr(fromhost, 0) == 0);
+ syscall(SYS_write, 1, (long)s, strlen(s));
+}
+
+// In setStats, we might trap reading uarch-specific counters.
+// The trap handler will skip over the instruction, but we want
+// to pretend as though we read the value 0 in this case.
+#define read_csr_safe(reg) ({ long __tmp = 0; \
+ asm volatile ("csrr %0, " #reg : "+r"(__tmp)); \
+ __tmp; })
+
+#define NUM_COUNTERS 18
+static long counters[NUM_COUNTERS];
+static char* counter_names[NUM_COUNTERS];
+void setStats(int enable)
+{
+ int i = 0;
+#define READ_CTR(name) do { \
+ if (i >= NUM_COUNTERS) exit(-1); \
+ long csr = read_csr_safe(name); \
+ if (!enable) { csr -= counters[i]; counter_names[i] = #name; } \
+ counters[i++] = csr; \
+ } while (0)
+ READ_CTR(cycle); READ_CTR(instret);
+ READ_CTR(uarch0); READ_CTR(uarch1); READ_CTR(uarch2); READ_CTR(uarch3);
+ READ_CTR(uarch4); READ_CTR(uarch5); READ_CTR(uarch6); READ_CTR(uarch7);
+ READ_CTR(uarch8); READ_CTR(uarch9); READ_CTR(uarch10); READ_CTR(uarch11);
+ READ_CTR(uarch12); READ_CTR(uarch13); READ_CTR(uarch14); READ_CTR(uarch15);
+#undef READ_CTR
+}
+
+void __attribute__((weak)) thread_entry(int cid, int nc)
+{
+ // multi-threaded programs override this function.
+ // for the case of single-threaded programs, only let core 0 proceed.
+ while (cid != 0);
+}
+
+int __attribute__((weak)) main(int argc, char** argv)
+{
+ // single-threaded programs override this function.
+ printstr("Implement main(), foo!\n");
+ return -1;
}
+void _init(int cid, int nc)
+{
+ thread_entry(cid, nc);
+
+ // only single-threaded programs should ever get here.
+ int ret = main(0, 0);
+
+ char buf[NUM_COUNTERS * 32] __attribute__((aligned(64)));
+ char* pbuf = buf;
+ for (int i = 0; i < NUM_COUNTERS; i++)
+ if (counters[i])
+ pbuf += sprintf(pbuf, "%s = %d\n", counter_names[i], counters[i]);
+ if (pbuf != buf)
+ printstr(buf);
+
+ exit(ret);
+}
+
+#undef putchar
int putchar(int ch)
{
- static char buf[64];
+ static char buf[64] __attribute__((aligned(64)));
static int buflen = 0;
- if(ch != -1)
- buf[buflen++] = ch;
+ buf[buflen++] = ch;
- if(ch == -1 || buflen == sizeof(buf))
+ if (ch == '\n' || buflen == sizeof(buf))
{
- volatile uint64_t magic_mem[8] = {0};
- magic_mem[0] = SYS_write;
- magic_mem[1] = 1;
- magic_mem[2] = (long)buf;
- magic_mem[3] = buflen;
- __sync_synchronize();
- write_csr(tohost, (long)magic_mem);
- while (swap_csr(fromhost, 0) == 0);
-
+ syscall(SYS_write, 1, (long)buf, buflen);
buflen = 0;
}
printstr(str);
}
-static void printnum(void (*putch)(int, void**), void **putdat,
- unsigned long long num, unsigned base, int width, int padc)
+static inline void printnum(void (*putch)(int, void**), void **putdat,
+ unsigned long long num, unsigned base, int width, int padc)
{
- if (num >= base)
- printnum(putch, putdat, num / base, base, width - 1, padc);
- else while (--width > 0)
+ unsigned digs[sizeof(num)*CHAR_BIT];
+ int pos = 0;
+
+ while (1)
+ {
+ digs[pos++] = num % base;
+ if (num < base)
+ break;
+ num /= base;
+ }
+
+ while (width-- > pos)
putch(padc, putdat);
- putch("0123456789abcdef"[num % base], putdat);
+ while (pos-- > 0)
+ putch(digs[pos] + (digs[pos] >= 10 ? 'a' - 10 : '0'), putdat);
}
static unsigned long long getuint(va_list *ap, int lflag)
return va_arg(*ap, int);
}
-void vprintfmt(void (*putch)(int, void**), void **putdat, const char *fmt, va_list ap)
+static void vprintfmt(void (*putch)(int, void**), void **putdat, const char *fmt, va_list ap)
{
register const char* p;
const char* last_fmt;
for (width -= strnlen(p, precision); width > 0; width--)
putch(padc, putdat);
for (; (ch = *p) != '\0' && (precision < 0 || --precision >= 0); width--) {
- if (altflag && (ch < ' ' || ch > '~'))
- putch('?', putdat);
- else
- putch(ch, putdat);
+ putch(ch, putdat);
p++;
}
for (; width > 0; width--)
num = -(long long) num;
}
base = 10;
- goto number;
+ goto signed_number;
// unsigned decimal
case 'u':
- num = getuint(&ap, lflag);
base = 10;
- goto number;
+ goto unsigned_number;
// (unsigned) octal
case 'o':
// should do something with padding so it's always 3 octits
- num = getuint(&ap, lflag);
base = 8;
- goto number;
+ goto unsigned_number;
// pointer
case 'p':
+ static_assert(sizeof(long) == sizeof(void*));
+ lflag = 1;
putch('0', putdat);
putch('x', putdat);
- num = (unsigned long long)
- (uintptr_t) va_arg(ap, void *);
- base = 16;
- goto number;
+ /* fall through to 'x' */
// (unsigned) hexadecimal
case 'x':
- num = getuint(&ap, lflag);
base = 16;
- number:
+ unsigned_number:
+ num = getuint(&ap, lflag);
+ signed_number:
printnum(putch, putdat, num, base, width, padc);
break;
va_start(ap, fmt);
vprintfmt((void*)putchar, 0, fmt, ap);
- putchar(-1);
va_end(ap);
return 0; // incorrect return value, but who cares, anyway?
}
+
+int sprintf(char* str, const char* fmt, ...)
+{
+ va_list ap;
+ char* str0 = str;
+ va_start(ap, fmt);
+
+ void sprintf_putch(int ch, void** data)
+ {
+ char** pstr = (char**)data;
+ **pstr = ch;
+ (*pstr)++;
+ }
+
+ vprintfmt(sprintf_putch, (void**)&str, fmt, ap);
+ *str = 0;
+
+ va_end(ap);
+ return str - str0;
+}
+++ /dev/null
-/*======================================================================*/
-/* Proxy kernel linker script */
-/*======================================================================*/
-/* This is the linker script used when building the proxy kernel. */
-
-/*----------------------------------------------------------------------*/
-/* Setup */
-/*----------------------------------------------------------------------*/
-
-/* The OUTPUT_ARCH command specifies the machine architecture where the
- argument is one of the names used in the BFD library. More
- specifically one of the entires in bfd/cpu-mips.c */
-
-OUTPUT_ARCH( "riscv" )
-
-/* The ENTRY command specifies the entry point (ie. first instruction
- to execute). The symbol _start should be defined in each test. */
-
-ENTRY( _start )
-
-/*----------------------------------------------------------------------*/
-/* Sections */
-/*----------------------------------------------------------------------*/
-
-SECTIONS
-{
-
- /* text: test code section */
- . = 0x00002000;
- .text :
- {
- crt-mt.o(.text)
- *(.text)
- }
-
- /* data: Initialized data segment */
- .data :
- {
- *(.data)
- }
-
- /* End of uninitalized data segement */
- _end = .;
-}
-
-// helpful utility and synch functions
-
-// relies on defining "ncores" before including this file...
-
#ifndef __UTIL_H
#define __UTIL_H
-#include <machine/syscall.h>
+//--------------------------------------------------------------------------
+// Macros
+
+// Set HOST_DEBUG to 1 if you are going to compile this for a host
+// machine (ie Athena/Linux) for debug purposes and set HOST_DEBUG
+// to 0 if you are compiling with the smips-gcc toolchain.
+
+#ifndef HOST_DEBUG
+#define HOST_DEBUG 0
+#endif
+
+// Set PREALLOCATE to 1 if you want to preallocate the benchmark
+// function before starting stats. If you have instruction/data
+// caches and you don't want to count the overhead of misses, then
+// you will need to use preallocation.
+
+#ifndef PREALLOCATE
+#define PREALLOCATE 0
+#endif
+
+// Set SET_STATS to 1 if you want to carve out the piece that actually
+// does the computation.
+
+#ifndef SET_STATS
+#define SET_STATS 0
+#endif
+
+#if HOST_DEBUG
+#include <stdio.h>
+static void setStats(int enable) {}
+#else
+extern void setStats(int enable);
+#endif
+
+static void printArray(const char name[], int n, const int arr[])
+{
+#if HOST_DEBUG
+ int i;
+ printf( " %10s :", name );
+ for ( i = 0; i < n; i++ )
+ printf( " %3d ", arr[i] );
+ printf( "\n" );
+#endif
+}
+
+static void printDoubleArray(const char name[], int n, const double arr[])
+{
+#if HOST_DEBUG
+ int i;
+ printf( " %10s :", name );
+ for ( i = 0; i < n; i++ )
+ printf( " %g ", arr[i] );
+ printf( "\n" );
+#endif
+}
+
+static int verify(int n, const int test[], const int verify[])
+{
+ int i;
+ // Unrolled for faster verification
+ for (i = 0; i < n/2*2; i+=2)
+ {
+ int t0 = test[i], t1 = test[i+1];
+ int v0 = verify[i], v1 = verify[i+1];
+ if (t0 != v0) return i+1;
+ if (t1 != v1) return i+2;
+ }
+ if (n % 2 != 0 && test[n-1] != verify[n-1])
+ return n;
+ return 0;
+}
-#define rdcycle() ({ unsigned long _c; asm volatile ("rdcycle %0" : "=r"(_c) :: "memory"); _c; })
-#define rdinstret() ({ unsigned long _c; asm volatile ("rdinstret %0" : "=r"(_c) :: "memory"); _c; })
-
-void __attribute__((noinline)) barrier()
+static int verifyDouble(int n, const double test[], const double verify[])
+{
+ int i;
+ // Unrolled for faster verification
+ for (i = 0; i < n/2*2; i+=2)
+ {
+ double t0 = test[i], t1 = test[i+1];
+ double v0 = verify[i], v1 = verify[i+1];
+ int eq1 = t0 == v0, eq2 = t1 == v1;
+ if (!(eq1 & eq2)) return i+1+eq1;
+ }
+ if (n % 2 != 0 && test[n-1] != verify[n-1])
+ return n;
+ return 0;
+}
+
+#ifndef ncores
+#define ncores 1
+#endif
+
+static void __attribute__((noinline)) barrier()
{
static volatile int sense;
static volatile int count;
__sync_synchronize();
}
-
-
-
-
-void finishTest(int test_result)
-{
-#if HOST_DEBUG
- if ( test_result == 1 )
- printf( "*** PASSED ***\n" );
- else
- printf( "*** FAILED *** (tohost = %d)\n", test_result);
- exit(0);
-#else
- {
- // perform exit syscall
- asm volatile(
- "move a0,%0 ;"
- "li a1,0 ;"
- "li a2,0 ;"
- "li a3,0 ;"
- "li v0,%1 ;"
- "scall" : : "r"(test_result) , "i"(SYS_exit));
- }
+#ifdef __riscv
+#include "encoding.h"
#endif
-}
#endif //__UTIL_H
-
dgemm_c_src = \
dgemm_main.c \
+ syscalls.c \
dgemm_riscv_src = \
crt.S \
dgemm_riscv_bin = dgemm.riscv
$(dgemm_riscv_bin) : $(dgemm_c_objs) $(dgemm_riscv_objs)
- $(RISCV_LINK) $(dgemm_c_objs) $(dgemm_riscv_objs) -o $(dgemm_riscv_bin)
+ $(RISCV_LINK) $(dgemm_c_objs) $(dgemm_riscv_objs) -o $(dgemm_riscv_bin) $(RISCV_LINK_OPTS)
junk += $(dgemm_c_objs) $(dgemm_riscv_objs) \
$(dgemm_host_bin) $(dgemm_riscv_bin)
// Double-precision general matrix multiplication benchmark
//--------------------------------------------------------------------------
-int ncores = 1;
#include "util.h"
-//--------------------------------------------------------------------------
-// Macros
-
-// Set HOST_DEBUG to 1 if you are going to compile this for a host
-// machine (ie Athena/Linux) for debug purposes and set HOST_DEBUG
-// to 0 if you are compiling with the smips-gcc toolchain.
-
-#ifndef HOST_DEBUG
-#define HOST_DEBUG 0
-#endif
-
-// Set PREALLOCATE to 1 if you want to preallocate the benchmark
-// function before starting stats. If you have instruction/data
-// caches and you don't want to count the overhead of misses, then
-// you will need to use preallocation.
-
-#ifndef PREALLOCATE
-#define PREALLOCATE 0
-#endif
-
-// Set SET_STATS to 1 if you want to carve out the piece that actually
-// does the computation.
-
-#ifndef SET_STATS
-#define SET_STATS 0
-#endif
-
//--------------------------------------------------------------------------
// Input/Reference Data
#include "dataset1.h"
-//--------------------------------------------------------------------------
-// Helper functions
-
-int verify( long n, const double test[], const double correct[] )
-{
- int i;
- for ( i = 0; i < n; i++ ) {
- if ( test[i] != correct[i] ) {
- return 2;
- }
- }
- return 1;
-}
-
-#if HOST_DEBUG
-#include <stdio.h>
-#include <stdlib.h>
-void printArray( char name[], long n, const double arr[] )
-{
- int i;
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %8.1f ", arr[i] );
- printf( "\n" );
-}
-#endif
-
-void setStats( int enable )
-{
-#if ( !HOST_DEBUG && SET_STATS )
- asm( "mtpcr %0, cr10" : : "r" (enable) );
-#endif
-}
-
//--------------------------------------------------------------------------
// square_dgemm function
double results_data[DATA_SIZE*DATA_SIZE];
// Output the input array
-
-#if HOST_DEBUG
- printArray( "input1", DATA_SIZE*DATA_SIZE, input1_data );
- printArray( "input2", DATA_SIZE*DATA_SIZE, input2_data );
- printArray( "verify", DATA_SIZE*DATA_SIZE, verify_data );
-#endif
-
- // If needed we preallocate everything in the caches
+ printDoubleArray( "input1", DATA_SIZE*DATA_SIZE, input1_data );
+ printDoubleArray( "input2", DATA_SIZE*DATA_SIZE, input2_data );
+ printDoubleArray( "verify", DATA_SIZE*DATA_SIZE, verify_data );
#if PREALLOCATE
+ // If needed we preallocate everything in the caches
square_dgemm( DATA_SIZE, input1_data, input2_data, results_data );
#endif
// Do the dgemm
-
setStats(1);
square_dgemm( DATA_SIZE, input1_data, input2_data, results_data );
setStats(0);
// Print out the results
-
-#if HOST_DEBUG
- printArray( "results", DATA_SIZE*DATA_SIZE, results_data );
-#endif
+ printDoubleArray( "results", DATA_SIZE*DATA_SIZE, results_data );
// Check the results
-
- finishTest(verify( DATA_SIZE*DATA_SIZE, results_data, verify_data ));
-
+ return verifyDouble( DATA_SIZE*DATA_SIZE, results_data, verify_data );
}
dhrystone_c_src = \
dhrystone_main.c \
dhrystone.c \
+ syscalls.c \
dhrystone_riscv_src = \
crt.S \
#define HZ 976563
#define Too_Small_Time 50
-#define rdcycle() ({ \
- long __x; \
- asm volatile("rdcycle %0; srl %0, %0, 10" : "=r"(__x)); \
- __x; })
#define CLOCK_TYPE "rdcycle()"
-#define Start_Timer() Begin_Time = rdcycle()
-#define Stop_Timer() End_Time = rdcycle()
+#define Start_Timer() Begin_Time = rdcycle()/1024
+#define Stop_Timer() End_Time = rdcycle()/1024
#else
/* Use times(2) time function unless */
#include "dhrystone.h"
-int ncores = 1;
-#include "util.h"
-
//--------------------------------------------------------------------------
// Macros
}
#endif
-void setStats( int enable )
-{
-#if ( !HOST_DEBUG && SET_STATS )
- asm( "mtpcr %0, cr10" : : "r" (enable) );
-#endif
-}
+#include "util.h"
#include <alloca.h>
do_fprintf (stdout, "\n");
#endif
- finishTest(1);
+ return 0;
}
median_c_src = \
median_main.c \
median.c \
+ syscalls.c \
median_riscv_src = \
crt.S \
median_riscv_bin = median.riscv
$(median_riscv_bin): $(median_c_objs) $(median_riscv_objs)
- $(RISCV_LINK) $(median_c_objs) $(median_riscv_objs) -o $(median_riscv_bin)
+ $(RISCV_LINK) $(median_c_objs) $(median_riscv_objs) -o $(median_riscv_bin) $(RISCV_LINK_OPTS)
junk += $(median_c_objs) $(median_riscv_objs) \
$(median_host_bin) $(median_riscv_bin)
// dataset1.h You should not change anything except the
// HOST_DEBUG and PREALLOCATE macros for your timing run.
-#include "median.h"
-
-int ncores = 1;
#include "util.h"
-//--------------------------------------------------------------------------
-// Macros
-
-// Set HOST_DEBUG to 1 if you are going to compile this for a host
-// machine (ie Athena/Linux) for debug purposes and set HOST_DEBUG
-// to 0 if you are compiling with the smips-gcc toolchain.
-
-#ifndef HOST_DEBUG
-#define HOST_DEBUG 0
-#endif
-
-// Set PREALLOCATE to 1 if you want to preallocate the benchmark
-// function before starting stats. If you have instruction/data
-// caches and you don't want to count the overhead of misses, then
-// you will need to use preallocation.
-
-#ifndef PREALLOCATE
-#define PREALLOCATE 0
-#endif
-
-// Set SET_STATS to 1 if you want to carve out the piece that actually
-// does the computation.
-
-#ifndef SET_STATS
-#define SET_STATS 0
-#endif
+#include "median.h"
//--------------------------------------------------------------------------
// Input/Reference Data
#include "dataset1.h"
-//--------------------------------------------------------------------------
-// Helper functions
-
-int verify( int n, int test[], int correct[] )
-{
- int i;
- for ( i = 0; i < n; i++ ) {
- if ( test[i] != correct[i] ) {
- return 2;
- }
- }
- return 1;
-}
-
-#if HOST_DEBUG
-void printArray( char name[], int n, int arr[] )
-{
- int i;
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3d ", arr[i] );
- printf( "\n" );
-}
-#endif
-
-void setStats( int enable )
-{
-#if ( !HOST_DEBUG && SET_STATS )
- asm( "mtpcr %0, cr10" : : "r" (enable) );
-#endif
-}
-
//--------------------------------------------------------------------------
// Main
int results_data[DATA_SIZE];
// Output the input array
-
-#if HOST_DEBUG
printArray( "input", DATA_SIZE, input_data );
printArray( "verify", DATA_SIZE, verify_data );
-#endif
+#if PREALLOCATE
// If needed we preallocate everything in the caches
-
-#if ( !HOST_DEBUG && PREALLOCATE )
median( DATA_SIZE, input_data, results_data );
#endif
// Do the filter
-
-#if HOST_DEBUG
- median( DATA_SIZE, input_data, results_data );
-#else
setStats(1);
median( DATA_SIZE, input_data, results_data );
setStats(0);
-#endif
// Print out the results
-
-#if HOST_DEBUG
printArray( "results", DATA_SIZE, results_data );
-#endif
// Check the results
-
- finishTest(verify( DATA_SIZE, results_data, verify_data ));
-
+ return verify( DATA_SIZE, results_data, verify_data );
}
mt_matmul_c_src = \
mt-matmul.c \
+ syscalls.c \
mt_matmul_riscv_src = \
- crt-mt.S \
+ crt.S \
mt_matmul_c_objs = $(patsubst %.c, %.o, $(mt_matmul_c_src))
mt_matmul_riscv_objs = $(patsubst %.S, %.o, $(mt_matmul_riscv_src))
mt_matmul_riscv_bin = mt-matmul.riscv
$(mt_matmul_riscv_bin) : $(mt_matmul_c_objs) $(mt_matmul_riscv_objs)
- $(RISCV_LINK_MT) $(mt_matmul_c_objs) $(mt_matmul_riscv_objs) $(RISCV_LINK_OPTS) -o $(mt_matmul_riscv_bin)
+ $(RISCV_LINK) $(mt_matmul_c_objs) $(mt_matmul_riscv_objs) $(RISCV_LINK_OPTS) -o $(mt_matmul_riscv_bin)
junk += $(mt_matmul_c_objs) $(mt_matmul_riscv_objs) \
$(mt_matmul_host_bin) $(mt_matmul_riscv_bin)
//--------------------------------------------------------------------------
// Input/Reference Data
-typedef float data_t;
+typedef double data_t;
#include "dataset.h"
__thread unsigned long coreid;
unsigned long ncores;
+#define ncores ncores
#include "util.h"
stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
} while(0)
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArray( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %3ld, correct[%d]= %3ld\n",
- i, (long)test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
//--------------------------------------------------------------------------
// matmul function
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
-
+ int res = verifyDouble(ARRAY_SIZE, results_data, verify_data);
+ if (res)
+ exit(res);
+
+#if 0
// clear results from the first trial
size_t i;
if (coreid == 0)
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
+ res = verify(ARRAY_SIZE, results_data, verify_data);
+ if (res)
+ exit(res);
barrier();
+#endif
exit(0);
}
-
syscalls.c \
mt_vvadd_riscv_src = \
- crt-mt.S \
+ crt.S \
mt_vvadd_c_objs = $(patsubst %.c, %.o, $(mt_vvadd_c_src))
mt_vvadd_riscv_objs = $(patsubst %.S, %.o, $(mt_vvadd_riscv_src))
mt_vvadd_riscv_bin = mt-vvadd.riscv
$(mt_vvadd_riscv_bin) : $(mt_vvadd_c_objs) $(mt_vvadd_riscv_objs)
- $(RISCV_LINK_MT) $(mt_vvadd_c_objs) $(mt_vvadd_riscv_objs) $(RISCV_LINK_OPTS) -o $(mt_vvadd_riscv_bin)
+ $(RISCV_LINK) $(mt_vvadd_c_objs) $(mt_vvadd_riscv_objs) $(RISCV_LINK_OPTS) -o $(mt_vvadd_riscv_bin)
junk += $(mt_vvadd_c_objs) $(mt_vvadd_riscv_objs) \
$(mt_vvadd_host_bin) $(mt_vvadd_riscv_bin)
//--------------------------------------------------------------------------
// Input/Reference Data
-typedef float data_t;
+typedef double data_t;
#include "dataset.h"
__thread unsigned long coreid;
unsigned long ncores;
+#define ncores ncores
#include "util.h"
stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \
} while(0)
-
-//--------------------------------------------------------------------------
-// Helper functions
-
-void printArray( char name[], int n, data_t arr[] )
-{
- int i;
- if (coreid != 0)
- return;
-
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %4ld ", (long) arr[i] );
- printf( "\n" );
-}
-
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
-{
- if (coreid != 0)
- return;
-
- size_t i;
- for (i = 0; i < n; i++)
- {
- if (test[i] != correct[i])
- {
- printf("FAILED test[%d]= %4ld, correct[%d]= %4ld\n",
- i, (long) test[i], i, (long)correct[i]);
- exit(-1);
- }
- }
-
- return;
-}
-
//--------------------------------------------------------------------------
// vvadd function
// verify
- verify(DATA_SIZE, results_data, verify_data);
-
+ int res = verifyDouble(DATA_SIZE, results_data, verify_data);
+ if (res)
+ exit(res);
+
+#if 0
// reset results from the first trial
if (coreid == 0)
{
results_data[i] = input1_data[i];
}
barrier();
-
-
+
// Execute your faster vvadd
barrier();
stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier());
#ifdef DEBUG
- printArray("results: ", DATA_SIZE, results_data);
- printArray("verify : ", DATA_SIZE, verify_data);
+ printDoubleArray("results: ", DATA_SIZE, results_data);
+ printDoubleArray("verify : ", DATA_SIZE, verify_data);
#endif
// verify
- verify(DATA_SIZE, results_data, verify_data);
+ res = verifyDouble(DATA_SIZE, results_data, verify_data);
+ if (res)
+ exit(res);
barrier();
+#endif
exit(0);
}
-
multiply_c_src = \
multiply_main.c \
multiply.c \
+ syscalls.c \
multiply_riscv_src = \
crt.S \
multiply_riscv_bin = multiply.riscv
$(multiply_riscv_bin): $(multiply_c_objs) $(multiply_riscv_objs)
- $(RISCV_LINK) $(multiply_c_objs) $(multiply_riscv_objs) -o $(multiply_riscv_bin)
+ $(RISCV_LINK) $(multiply_c_objs) $(multiply_riscv_objs) -o $(multiply_riscv_bin) $(RISCV_LINK_OPTS)
junk += $(multiply_c_objs) $(multiply_riscv_objs) \
$(multiply_host_bin) $(multiply_riscv_bin)
// dataset1.h You should not change anything except the
// HOST_DEBUG and VERIFY macros for your timing run.
-#include "multiply.h"
-
-int ncores = 1;
#include "util.h"
-//--------------------------------------------------------------------------
-// Macros
-
-// Set HOST_DEBUG to 1 if you are going to compile this for a host
-// machine (ie Athena/Linux) for debug purposes and set HOST_DEBUG
-// to 0 if you are compiling with the smips-gcc toolchain.
-
-#ifndef HOST_DEBUG
-#define HOST_DEBUG 0
-#endif
-
-// Set PREALLOCATE to 1 if you want to preallocate the benchmark
-// function before starting stats. If you have instruction/data
-// caches and you don't want to count the overhead of misses, then
-// you will need to use preallocation.
-
-#ifndef PREALLOCATE
-#define PREALLOCATE 0
-#endif
-
-// Set VERIFY to 1 if you want the program to check that the sort
-// function returns the right answer. When you are doing your
-// benchmarking you should set this to 0 so that the verification
-// is not included in your timing.
-
-#ifndef VERIFY
-#define VERIFY 1
-#endif
-
-// Set SET_STATS to 1 if you want to carve out the piece that actually
-// does the computation.
-
-#ifndef SET_STATS
-#define SET_STATS 0
-#endif
+#include "multiply.h"
//--------------------------------------------------------------------------
// Input/Reference Data
#include "dataset1.h"
-//--------------------------------------------------------------------------
-// Helper functions
-
-int verify( int n, int test[], int correct[] )
-{
- int i;
- for ( i = 0; i < n; i++ ) {
- if ( test[i] != correct[i] ) {
- return 2;
- }
- }
- return 1;
-}
-
-#if HOST_DEBUG
-void printArray( char name[], int n, int arr[] )
-{
- int i;
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3d ", arr[i] );
- printf( "\n" );
-}
-#endif
-
-void setStats( int enable )
-{
-#if ( !HOST_DEBUG && SET_STATS )
- asm( "mtpcr %0, cr10" : : "r" (enable) );
-#endif
-}
-
//--------------------------------------------------------------------------
// Main
int results_data[DATA_SIZE];
// Output the input arrays
-
-#if HOST_DEBUG
printArray( "input1", DATA_SIZE, input_data1 );
printArray( "input2", DATA_SIZE, input_data2 );
printArray( "verify", DATA_SIZE, verify_data );
-#endif
-#if ( !HOST_DEBUG && PREALLOCATE )
+#if PREALLOCATE
for (i = 0; i < DATA_SIZE; i++)
{
results_data[i] = multiply( input_data1[i], input_data2[i] );
}
#endif
-#if HOST_DEBUG
- for (i = 0; i < DATA_SIZE; i++)
- {
- results_data[i] = multiply( input_data1[i], input_data2[i] );
- }
-#else
setStats(1);
for (i = 0; i < DATA_SIZE; i++)
{
results_data[i] = multiply( input_data1[i], input_data2[i] );
}
setStats(0);
-#endif
// Print out the results
-
-#if HOST_DEBUG
printArray( "results", DATA_SIZE, results_data );
-#endif
// Check the results
-
- finishTest(verify( DATA_SIZE, results_data, verify_data ));
-
+ return verify( DATA_SIZE, results_data, verify_data );
}
qsort_c_src = \
qsort_main.c \
+ syscalls.c \
qsort_riscv_src = \
crt.S \
qsort_riscv_bin = qsort.riscv
$(qsort_riscv_bin) : $(qsort_c_objs) $(qsort_riscv_objs)
- $(RISCV_LINK) $(qsort_c_objs) $(qsort_riscv_objs) -o $(qsort_riscv_bin)
+ $(RISCV_LINK) $(qsort_c_objs) $(qsort_riscv_objs) -o $(qsort_riscv_bin) $(RISCV_LINK_OPTS)
junk += $(qsort_c_objs) $(qsort_riscv_objs) \
$(qsort_host_bin) $(qsort_riscv_bin)
// processor simulator itself. You should not change anything except
// the HOST_DEBUG and PREALLOCATE macros for your timing run.
-int ncores = 1;
#include "util.h"
-//--------------------------------------------------------------------------
-// Macros
-
-// Set HOST_DEBUG to 1 if you are going to compile this for a host
-// machine (ie Athena/Linux) for debug purposes and set HOST_DEBUG
-// to 0 if you are compiling with the smips-gcc toolchain.
-
-#ifndef HOST_DEBUG
-#define HOST_DEBUG 0
-#endif
-
-// Set PREALLOCATE to 1 if you want to preallocate the benchmark
-// function before starting stats. If you have instruction/data
-// caches and you don't want to count the overhead of misses, then
-// you will need to use preallocation.
-
-#ifndef PREALLOCATE
-#define PREALLOCATE 0
-#endif
-
-// Set SET_STATS to 1 if you want to carve out the piece that actually
-// does the computation.
-
-#ifndef SET_STATS
-#define SET_STATS 0
-#endif
-
// The INSERTION_THRESHOLD is the size of the subarray when the
// algorithm switches to using an insertion sort instead of
// quick sort.
#include "dataset1.h"
-//--------------------------------------------------------------------------
-// Helper functions
-
-int verify( int n, int test[], int correct[] )
-{
- int i;
- for ( i = 0; i < n; i++ ) {
- if ( test[i] != correct[i] ) {
- return 2;
- }
- }
- return 1;
-}
-
-#if HOST_DEBUG
-void printArray( char name[], int n, int arr[] )
-{
- int i;
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3d ", arr[i] );
- printf( "\n" );
-}
-#endif
-
-void setStats( int enable )
-{
-#if ( !HOST_DEBUG && SET_STATS )
- asm( "mtpcr %0, cr10" : : "r" (enable) );
-#endif
-}
-
//--------------------------------------------------------------------------
// Quicksort function
int main( int argc, char* argv[] )
{
-
// Output the input array
-
-#if HOST_DEBUG
printArray( "input", DATA_SIZE, input_data );
printArray( "verify", DATA_SIZE, verify_data );
-#endif
-
- // If needed we preallocate everything in the caches
#if PREALLOCATE
+ // If needed we preallocate everything in the caches
sort( DATA_SIZE, input_data );
#endif
// Do the sort
-
setStats(1);
sort( DATA_SIZE, input_data );
setStats(0);
// Print out the results
-
-#if HOST_DEBUG
printArray( "test", DATA_SIZE, input_data );
-#endif
// Check the results
-
- finishTest(verify( DATA_SIZE, input_data, verify_data ));
-
+ return verify( DATA_SIZE, input_data, verify_data );
}
spmv_c_src = \
spmv_main.c \
+ syscalls.c \
spmv_riscv_src = \
crt.S \
spmv_riscv_bin = spmv.riscv
$(spmv_riscv_bin) : $(spmv_c_objs) $(spmv_riscv_objs)
- $(RISCV_LINK) $(spmv_c_objs) $(spmv_riscv_objs) -o $(spmv_riscv_bin)
+ $(RISCV_LINK) $(spmv_c_objs) $(spmv_riscv_objs) -o $(spmv_riscv_bin) $(RISCV_LINK_OPTS)
junk += $(spmv_c_objs) $(spmv_riscv_objs) \
$(spmv_host_bin) $(spmv_riscv_bin)
// Double-precision general matrix multiplication benchmark
//--------------------------------------------------------------------------
-int ncores = 1;
#include "util.h"
-//--------------------------------------------------------------------------
-// Macros
-
-// Set HOST_DEBUG to 1 if you are going to compile this for a host
-// machine (ie Athena/Linux) for debug purposes and set HOST_DEBUG
-// to 0 if you are compiling with the smips-gcc toolchain.
-
-#ifndef HOST_DEBUG
-#define HOST_DEBUG 0
-#endif
-
-// Set PREALLOCATE to 1 if you want to preallocate the benchmark
-// function before starting stats. If you have instruction/data
-// caches and you don't want to count the overhead of misses, then
-// you will need to use preallocation.
-
-#ifndef PREALLOCATE
-#define PREALLOCATE 0
-#endif
-
-// Set SET_STATS to 1 if you want to carve out the piece that actually
-// does the computation.
-
-#ifndef SET_STATS
-#define SET_STATS 0
-#endif
-
//--------------------------------------------------------------------------
// Input/Reference Data
#include "dataset1.h"
-//--------------------------------------------------------------------------
-// Helper functions
-
-int verify( long n, const double test[], const double correct[] )
-{
- int i;
- for ( i = 0; i < n; i++ ) {
- if ( test[i] != correct[i] ) {
- return 2;
- }
- }
- return 1;
-}
-
-#if HOST_DEBUG
-#include <stdio.h>
-#include <stdlib.h>
-void printArray( char name[], long n, const double arr[] )
-{
- int i;
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %8.1f ", arr[i] );
- printf( "\n" );
-}
-#endif
-
-void setStats( int enable )
-{
-#if ( !HOST_DEBUG && SET_STATS )
- asm( "mtpcr %0, cr10" : : "r" (enable) );
-#endif
-}
-
void spmv(int r, const double* val, const int* idx, const double* x,
const int* ptr, double* y)
{
spmv(R, val, idx, x, ptr, y);
setStats(0);
- finishTest(verify(R, y, verify_data));
+ return verifyDouble(R, y, verify_data);
}
towers_c_src = \
towers_main.c \
+ syscalls.c \
towers_riscv_src = \
crt.S \
towers_riscv_bin = towers.riscv
$(towers_riscv_bin) : $(towers_c_objs) $(towers_riscv_objs)
- $(RISCV_LINK) $(towers_c_objs) $(towers_riscv_objs) -o $(towers_riscv_bin)
+ $(RISCV_LINK) $(towers_c_objs) $(towers_riscv_objs) -o $(towers_riscv_bin) $(RISCV_LINK_OPTS)
junk += $(towers_c_objs) $(towers_riscv_objs) \
$(towers_host_bin) $(towers_riscv_bin)
// smips processor simulator itself. You should not change anything except
// the HOST_DEBUG and PREALLOCATE macros for your timing run.
-int ncores = 1;
#include "util.h"
-//--------------------------------------------------------------------------
-// Macros
-
-// Set HOST_DEBUG to 1 if you are going to compile this for a host
-// machine (ie Athena/Linux) for debug purposes and set HOST_DEBUG
-// to 0 if you are compiling with the smips-gcc toolchain.
-
-#ifndef HOST_DEBUG
-#define HOST_DEBUG 0
-#endif
-
-// Set PREALLOCATE to 1 if you want to preallocate the benchmark
-// function before starting stats. If you have instruction/data
-// caches and you don't want to count the overhead of misses, then
-// you will need to use preallocation.
-
-#ifndef PREALLOCATE
-#define PREALLOCATE 0
-#endif
-
-// Set SET_STATS to 1 if you want to carve out the piece that actually
-// does the computation.
-
-#ifndef SET_STATS
-#define SET_STATS 0
-#endif
-
// This is the number of discs in the puzzle.
#define NUM_DISCS 7
-//--------------------------------------------------------------------------
-// Helper functions
-
-void setStats( int enable )
-{
-#if ( !HOST_DEBUG && SET_STATS )
- asm( "mtpcr %0, cr10" : : "r" (enable) );
-#endif
-}
-
//--------------------------------------------------------------------------
// List data structure and functions
return 6;
}
- return 1;
+ return 0;
}
//--------------------------------------------------------------------------
#endif
// Check the results
-
- finishTest( towers_verify( &towers ) );
-
+ return towers_verify( &towers );
}
#include <stdlib.h>
#else
void printstr(const char*);
-void exit();
#endif
+#include "util.h"
+
//--------------------------------------------------------------------------
// Complex Value Structs
// This benchmark multiplies two 2-D arrays together and writes the results to
// a third vector. The input data (and reference data) should be generated
// using the matmul_gendata.pl perl script and dumped to a file named
-// dataset.h. The riscv-gcc toolchain does not support system calls so printf's
-// can only be used on a host system, not on the riscv-v processor simulator
-// itself.
-//
-// HOWEVER: printstr() and printhex() are provided, for a primitive form of
-// printing strings and hexadecimal values to stdout.
+// dataset.h.
+#include "util.h"
// Choose which implementation you wish to test... but leave only one on!
// (only the first one will be executed).
//#define SCALAR_ASM
#define VT_ASM
-//--------------------------------------------------------------------------
-// Macros
-
-// Set HOST_DEBUG to 1 if you are going to compile this for a host
-// machine (ie Athena/Linux) for debug purposes and set HOST_DEBUG
-// to 0 if you are compiling with the smips-gcc toolchain.
-
-#ifndef HOST_DEBUG
-#define HOST_DEBUG 0
-#endif
-
-// Set PREALLOCATE to 1 if you want to preallocate the benchmark
-// function before starting stats. If you have instruction/data
-// caches and you don't want to count the overhead of misses, then
-// you will need to use preallocation.
-
-#ifndef PREALLOCATE
-#define PREALLOCATE 0
-#endif
-
-// Set SET_STATS to 1 if you want to carve out the piece that actually
-// does the computation.
-
-#ifndef SET_STATS
-#define SET_STATS 0
-#endif
-
-//--------------------------------------------------------------------------
-// Host Platform Includes
-
-#if HOST_DEBUG
- #include <stdio.h>
- #include <stdlib.h>
-#else
-void printstr(const char*);
-void exit();
-#endif
-
-
//--------------------------------------------------------------------------
// Input/Reference Data
return 1;
}
-#if HOST_DEBUG
-void printArray( char name[], int n, float arr[] )
-{
- int i;
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %03.2f ", arr[i] );
- printf( "\n" );
-}
-#endif
-
-
void finishTest( int correct, long long num_cycles, long long num_retired )
{
int toHostValue = correct;
//
// This benchmark uses adds to vectors and writes the results to a third
// vector. The input data (and reference data) should be generated using the
-// vvadd_gendata.pl perl script and dumped to a file named dataset.h. The
-// riscv-gcc toolchain does not support system calls so printf's can only be
-// used on a host system, not on the riscv-v processor simulator itself.
-//
-// HOWEVER: printstr() and printhex() are provided, for a primitive form of
-// printing strings and hexadecimal values to stdout.
-
+// vvadd_gendata.pl perl script and dumped to a file named dataset.h.
// Choose which implementation you wish to test... but leave only one on!
// (only the first one will be executed).
//#define SCALAR_ASM
#define VT_ASM
-//--------------------------------------------------------------------------
-// Macros
-
-// Set HOST_DEBUG to 1 if you are going to compile this for a host
-// machine (ie Athena/Linux) for debug purposes and set HOST_DEBUG
-// to 0 if you are compiling with the smips-gcc toolchain.
-
-#ifndef HOST_DEBUG
-#define HOST_DEBUG 0
-#endif
-
-// Set PREALLOCATE to 1 if you want to preallocate the benchmark
-// function before starting stats. If you have instruction/data
-// caches and you don't want to count the overhead of misses, then
-// you will need to use preallocation.
-
-#ifndef PREALLOCATE
-#define PREALLOCATE 0
-#endif
-
-// Set SET_STATS to 1 if you want to carve out the piece that actually
-// does the computation.
-
-#ifndef SET_STATS
-#define SET_STATS 0
-#endif
-
-//--------------------------------------------------------------------------
-// Platform Specific Includes
-
-#if HOST_DEBUG
- #include <stdio.h>
- #include <stdlib.h>
-#else
-void printstr(const char*);
-void exit();
-#endif
-
-
//--------------------------------------------------------------------------
// Input/Reference Data
}
return 1;
}
-
-#if HOST_DEBUG
-void printArray( char name[], int n, float arr[] )
-{
- int i;
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %03.2f ", arr[i] );
- printf( "\n" );
-}
-#endif
-
void finishTest( int correct, long long num_cycles, long long num_retired )
{
vvadd_c_src = \
vvadd_main.c \
+ syscalls.c \
vvadd_riscv_src = \
crt.S \
vvadd_riscv_bin = vvadd.riscv
$(vvadd_riscv_bin) : $(vvadd_c_objs) $(vvadd_riscv_objs)
- $(RISCV_LINK) $(vvadd_c_objs) $(vvadd_riscv_objs) -o $(vvadd_riscv_bin)
+ $(RISCV_LINK) $(vvadd_c_objs) $(vvadd_riscv_objs) -o $(vvadd_riscv_bin) $(RISCV_LINK_OPTS)
junk += $(vvadd_c_objs) $(vvadd_riscv_objs) \
$(vvadd_host_bin) $(vvadd_riscv_bin)
// anything except the HOST_DEBUG and PREALLOCATE macros for your timing
// runs.
-int ncores = 1;
#include "util.h"
-
-//--------------------------------------------------------------------------
-// Macros
-
-// Set HOST_DEBUG to 1 if you are going to compile this for a host
-// machine (ie Athena/Linux) for debug purposes and set HOST_DEBUG
-// to 0 if you are compiling with the smips-gcc toolchain.
-
-#ifndef HOST_DEBUG
-#define HOST_DEBUG 0
-#endif
-
-// Set PREALLOCATE to 1 if you want to preallocate the benchmark
-// function before starting stats. If you have instruction/data
-// caches and you don't want to count the overhead of misses, then
-// you will need to use preallocation.
-
-#ifndef PREALLOCATE
-#define PREALLOCATE 0
-#endif
-
-// Set SET_STATS to 1 if you want to carve out the piece that actually
-// does the computation.
-
-#ifndef SET_STATS
-#define SET_STATS 0
-#endif
//--------------------------------------------------------------------------
// Input/Reference Data
#include "dataset1.h"
-//--------------------------------------------------------------------------
-// Helper functions
-
-int verify( int n, int test[], int correct[] )
-{
- int i;
- for ( i = 0; i < n; i++ ) {
- if ( test[i] != correct[i] ) {
- return 2;
- }
- }
- return 1;
-}
-
-#if HOST_DEBUG
-void printArray( char name[], int n, int arr[] )
-{
- int i;
- printf( " %10s :", name );
- for ( i = 0; i < n; i++ )
- printf( " %3d ", arr[i] );
- printf( "\n" );
-}
-#endif
-
-//void finishTest( int toHostValue )
-//{
-//#if HOST_DEBUG
-// if ( toHostValue == 1 )
-// printf( "*** PASSED ***\n" );
-// else
-// printf( "*** FAILED *** (tohost = %d)\n", toHostValue );
-// exit(0);
-//#else
-// asm( "mtpcr %0, tohost" : : "r" (toHostValue) );
-// while ( 1 ) { }
-//#endif
-//}
-
-void setStats( int enable )
-{
-#if ( !HOST_DEBUG && SET_STATS )
- asm( "mtpcr %0, cr10" : : "r" (enable) );
-#endif
-}
-
//--------------------------------------------------------------------------
// vvadd function
int results_data[DATA_SIZE];
// Output the input array
-
-#if HOST_DEBUG
printArray( "input1", DATA_SIZE, input1_data );
printArray( "input2", DATA_SIZE, input2_data );
printArray( "verify", DATA_SIZE, verify_data );
-#endif
-
- // If needed we preallocate everything in the caches
#if PREALLOCATE
+ // If needed we preallocate everything in the caches
vvadd( DATA_SIZE, input1_data, input2_data, results_data );
#endif
// Do the vvadd
-
setStats(1);
vvadd( DATA_SIZE, input1_data, input2_data, results_data );
setStats(0);
// Print out the results
-
-#if HOST_DEBUG
printArray( "results", DATA_SIZE, results_data );
-#endif
// Check the results
-
- finishTest(verify( DATA_SIZE, results_data, verify_data ));
-
+ return verify( DATA_SIZE, results_data, verify_data );
}
-Subproject commit d4b98bac3c304e3bed612c79680fc2226ddb7e9a
+Subproject commit 35d0a989a24fc0b9913760dcbead775eb8c0f29e