#--------------------------------------------------------------------
RISCV_GCC = riscv-gcc
-RISCV_GCC_OPTS = -std=gnu99 -T common/test.ld -O3 -nostdlib -nostartfiles -funroll-all-loops
-RISCV_LINK = riscv-gcc -T $(common)/test.ld
-RISCV_LINK_MT = riscv-gcc -T $(common)/test-mt.ld
-RISCV_LINK_OPTS = -lc
-RISCV_LINK_SYSCALL = -I$(bmarkdir)/../env $(common)/syscalls.c -lc
+RISCV_GCC_OPTS = -Wa,-march=RVIMAFDXhwacha -std=gnu99 -O2 -ffast-math
+RISCV_LINK = riscv-gcc -T $(common)/test.ld $(incs)
+RISCV_LINK_OPTS = -nostdlib -nostartfiles -ffast-math -lc
RISCV_OBJDUMP = riscv-objdump --disassemble-all --disassemble-zeroes --section=.text --section=.text.startup --section=.data
RISCV_SIM = spike -p2
%.hex: %
elf2hex 16 32768 $< > $@
-$(bmarks_riscv_bin): %.riscv: %.o crt-mt.o
- $(RISCV_LINK_MT) crt-mt.o $< $(RISCV_LINK_SYSCALL) -o $@
+$(bmarks_riscv_bin): %.riscv: %.o syscalls.o crt.o
+ $(RISCV_LINK) $< syscalls.o crt.o $(RISCV_LINK_OPTS) -o $@
$(bmarks_riscv_dump): %.riscv.dump: %.riscv
$(RISCV_OBJDUMP) $< > $@
$(RISCV_SIM) $< > $@
%.o: %.c
- $(RISCV_GCC) $(RISCV_GCC_OPTS) $(bmarks_defs) \
+ $(RISCV_GCC) $(RISCV_GCC_OPTS) $(bmarks_defs) -D__ASSEMBLY__=1 \
-c $(incs) $< -o $@
%.o: %.S
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// Execute the provided, terrible vvadd
- barrier();
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
// verify
- verify(DATA_SIZE, results_data, verify_data);
+ verifyMT(DATA_SIZE, results_data, verify_data);
// reset results from the first trial
if (coreid == 0)
for (i=0; i < DATA_SIZE; i++)
results_data[i] = input1_data[i];
}
- barrier();
+ barrier(nc);
// Execute your faster vvadd
- barrier();
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
#ifdef DEBUG
- printArray("results: ", DATA_SIZE, results_data);
- printArray("verify : ", DATA_SIZE, verify_data);
+ printArrayMT("results: ", DATA_SIZE, results_data);
+ printArrayMT("verify : ", DATA_SIZE, verify_data);
#endif
// verify
- verify(DATA_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(DATA_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// Execute the provided, terrible vvadd
- barrier();
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
// verify
- verify(DATA_SIZE, results_data, verify_data);
+ verifyMT(DATA_SIZE, results_data, verify_data);
// reset results from the first trial
if (coreid == 0)
for (i=0; i < DATA_SIZE; i++)
results_data[i] = input1_data[i];
}
- barrier();
+ barrier(nc);
// Execute your faster vvadd
- barrier();
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
#ifdef DEBUG
- printArray("results: ", DATA_SIZE, results_data);
- printArray("verify : ", DATA_SIZE, verify_data);
+ printArrayMT("results: ", DATA_SIZE, results_data);
+ printArrayMT("verify : ", DATA_SIZE, verify_data);
#endif
// verify
- verify(DATA_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(DATA_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
}
}
}
- barrier();
+ barrier(ncores);
for ( i = 0; i < lda; i+=4 ) {
for ( j = coreid*(lda/ncores); j < (coreid+1)*(lda/ncores); j++ ) {
/*
// Execute the provided, naive matmul
- barrier();
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
// clear results from the first trial
size_t i;
if (coreid == 0)
for (i=0; i < ARRAY_SIZE; i++)
results_data[i] = 0;
- barrier();
+ barrier(nc);
*/
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
}
}
}
- barrier();
+ barrier(nc);
for ( int x = 0; x < ncores; x++) {
//split the i values into two chunks so the threads don't interfere on the B loads
/*
// Execute the provided, naive matmul
- barrier();
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
// clear results from the first trial
size_t i;
if (coreid == 0)
for (i=0; i < ARRAY_SIZE; i++)
results_data[i] = 0;
- barrier();
+ barrier(nc);
*/
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// Execute the provided, terrible vvadd
- barrier();
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
// verify
- verify(DATA_SIZE, results_data, verify_data);
+ verifyMT(DATA_SIZE, results_data, verify_data);
// reset results from the first trial
if (coreid == 0)
for (i=0; i < DATA_SIZE; i++)
results_data[i] = input1_data[i];
}
- barrier();
+ barrier(nc);
// Execute your faster vvadd
- barrier();
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
#ifdef DEBUG
- printArray("results: ", DATA_SIZE, results_data);
- printArray("verify : ", DATA_SIZE, verify_data);
+ printArrayMT("results: ", DATA_SIZE, results_data);
+ printArrayMT("verify : ", DATA_SIZE, verify_data);
#endif
// verify
- verify(DATA_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(DATA_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// Execute the provided, naive matmul
- barrier();
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
// clear results from the first trial
size_t i;
if (coreid == 0)
for (i=0; i < ARRAY_SIZE; i++)
results_data[i] = 0;
- barrier();
+ barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// Execute the provided, naive matmul
- barrier();
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
// clear results from the first trial
size_t i;
if (coreid == 0)
for (i=0; i < ARRAY_SIZE; i++)
results_data[i] = 0;
- barrier();
+ barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// Execute the provided, naive matmul
- barrier();
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
// clear results from the first trial
size_t i;
if (coreid == 0)
for (i=0; i < ARRAY_SIZE; i++)
results_data[i] = 0;
- barrier();
+ barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// Execute the provided, naive matmul
- barrier();
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
// clear results from the first trial
size_t i;
if (coreid == 0)
for (i=0; i < ARRAY_SIZE; i++)
results_data[i] = 0;
- barrier();
+ barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// Execute the provided, naive matmul
- barrier();
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
// clear results from the first trial
size_t i;
if (coreid == 0)
for (i=0; i < ARRAY_SIZE; i++)
results_data[i] = 0;
- barrier();
+ barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// Execute the provided, naive matmul
- barrier();
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
// clear results from the first trial
size_t i;
if (coreid == 0)
for (i=0; i < ARRAY_SIZE; i++)
results_data[i] = 0;
- barrier();
+ barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
/*
// Execute the provided, naive matmul
- barrier();
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
// clear results from the first trial
size_t i;
if (coreid == 0)
for (i=0; i < ARRAY_SIZE; i++)
results_data[i] = 0;
- barrier();
+ barrier(nc);
*/
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// Execute the provided, naive matmul
- barrier();
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
// clear results from the first trial
size_t i;
if (coreid == 0)
for (i=0; i < ARRAY_SIZE; i++)
results_data[i] = 0;
- barrier();
+ barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// Execute the provided, terrible vvadd
- barrier();
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
// verify
- verify(DATA_SIZE, results_data, verify_data);
+ verifyMT(DATA_SIZE, results_data, verify_data);
// reset results from the first trial
if (coreid == 0)
for (i=0; i < DATA_SIZE; i++)
results_data[i] = input1_data[i];
}
- barrier();
+ barrier(nc);
// Execute your faster vvadd
- barrier();
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
#ifdef DEBUG
- printArray("results: ", DATA_SIZE, results_data);
- printArray("verify : ", DATA_SIZE, verify_data);
+ printArrayMT("results: ", DATA_SIZE, results_data);
+ printArrayMT("verify : ", DATA_SIZE, verify_data);
#endif
// verify
- verify(DATA_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(DATA_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
C[(j+3)*lda + i + 1] += sum;
}
- barrier();
+ barrier(ncores);
}
}
}
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
C[(j+3)*lda + i + 1] += sum;
}
- barrier();
+ barrier(nc);
}
}
}
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// Execute the provided, terrible vvadd
- barrier();
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
// verify
- verify(DATA_SIZE, results_data, verify_data);
+ verifyMT(DATA_SIZE, results_data, verify_data);
// reset results from the first trial
if (coreid == 0)
for (i=0; i < DATA_SIZE; i++)
results_data[i] = input1_data[i];
}
- barrier();
+ barrier(nc);
// Execute your faster vvadd
- barrier();
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
#ifdef DEBUG
- printArray("results: ", DATA_SIZE, results_data);
- printArray("verify : ", DATA_SIZE, verify_data);
+ printArrayMT("results: ", DATA_SIZE, results_data);
+ printArrayMT("verify : ", DATA_SIZE, verify_data);
#endif
// verify
- verify(DATA_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(DATA_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
//----------MSI--------------
///*
int i,j,k;
- barrier();
+ barrier(ncores);
for(j = coreid*lda/ncores; j < coreid*lda/ncores + lda/ncores; j++) {
for(i = 0; i < lda; i+=4) {
data_t Cval0 = 0;
//------------------MI-------------------
/*
int i,j,k;
- barrier();
+ barrier(nc);
for(j = coreid*lda/ncores; j < coreid*lda/ncores + lda/ncores; j++) {
for(i = 0; i < lda; i+=4) {
data_t Cval0 = 0;
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
//----------MSI--------------
/*
int i,j,k;
- barrier();
+ barrier(nc);
for(j = coreid*lda/ncores; j < coreid*lda/ncores + lda/ncores; j++) {
for(i = 0; i < lda; i+=4) {
data_t Cval0 = 0;
//------------------MI-------------------
int i,j,k;
- barrier();
+ barrier(nc);
for(j = coreid*lda/ncores; j < coreid*lda/ncores + lda/ncores; j++) {
for(i = 0; i < lda; i+=4) {
data_t Cval0 = 0;
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// Execute the provided, terrible vvadd
- barrier();
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
// verify
- verify(DATA_SIZE, results_data, verify_data);
+ verifyMT(DATA_SIZE, results_data, verify_data);
// reset results from the first trial
if (coreid == 0)
for (i=0; i < DATA_SIZE; i++)
results_data[i] = input1_data[i];
}
- barrier();
+ barrier(nc);
// Execute your faster vvadd
- barrier();
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
#ifdef DEBUG
- printArray("results: ", DATA_SIZE, results_data);
- printArray("verify : ", DATA_SIZE, verify_data);
+ printArrayMT("results: ", DATA_SIZE, results_data);
+ printArrayMT("verify : ", DATA_SIZE, verify_data);
#endif
// verify
- verify(DATA_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(DATA_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// Execute the provided, terrible vvadd
- barrier();
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
// verify
- verify(DATA_SIZE, results_data, verify_data);
+ verifyMT(DATA_SIZE, results_data, verify_data);
// reset results from the first trial
if (coreid == 0)
for (i=0; i < DATA_SIZE; i++)
results_data[i] = input1_data[i];
}
- barrier();
+ barrier(nc);
// Execute your faster vvadd
- barrier();
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
#ifdef DEBUG
- printArray("results: ", DATA_SIZE, results_data);
- printArray("verify : ", DATA_SIZE, verify_data);
+ printArrayMT("results: ", DATA_SIZE, results_data);
+ printArrayMT("verify : ", DATA_SIZE, verify_data);
#endif
// verify
- verify(DATA_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(DATA_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// Execute the provided, naive matmul
- barrier();
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
// clear results from the first trial
size_t i;
if (coreid == 0)
for (i=0; i < ARRAY_SIZE; i++)
results_data[i] = 0;
- barrier();
+ barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// Execute the provided, terrible vvadd
- barrier();
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
// verify
- verify(DATA_SIZE, results_data, verify_data);
+ verifyMT(DATA_SIZE, results_data, verify_data);
// reset results from the first trial
if (coreid == 0)
for (i=0; i < DATA_SIZE; i++)
results_data[i] = input1_data[i];
}
- barrier();
+ barrier(nc);
// Execute your faster vvadd
- barrier();
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
#ifdef DEBUG
- printArray("results: ", DATA_SIZE, results_data);
- printArray("verify : ", DATA_SIZE, verify_data);
+ printArrayMT("results: ", DATA_SIZE, results_data);
+ printArrayMT("verify : ", DATA_SIZE, verify_data);
#endif
// verify
- verify(DATA_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(DATA_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// Execute the provided, terrible vvadd
- barrier();
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
// verify
- verify(DATA_SIZE, results_data, verify_data);
+ verifyMT(DATA_SIZE, results_data, verify_data);
// reset results from the first trial
if (coreid == 0)
for (i=0; i < DATA_SIZE; i++)
results_data[i] = input1_data[i];
}
- barrier();
+ barrier(nc);
// Execute your faster vvadd
- barrier();
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
#ifdef DEBUG
- printArray("results: ", DATA_SIZE, results_data);
- printArray("verify : ", DATA_SIZE, verify_data);
+ printArrayMT("results: ", DATA_SIZE, results_data);
+ printArrayMT("verify : ", DATA_SIZE, verify_data);
#endif
// verify
- verify(DATA_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(DATA_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// Execute the provided, naive matmul
- barrier();
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
// clear results from the first trial
size_t i;
if (coreid == 0)
for (i=0; i < ARRAY_SIZE; i++)
results_data[i] = 0;
- barrier();
+ barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// Execute the provided, naive matmul
- barrier();
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
// clear results from the first trial
size_t i;
if (coreid == 0)
for (i=0; i < ARRAY_SIZE; i++)
results_data[i] = 0;
- barrier();
+ barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// Execute the provided, naive matmul
- barrier();
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
// clear results from the first trial
size_t i;
if (coreid == 0)
for (i=0; i < ARRAY_SIZE; i++)
results_data[i] = 0;
- barrier();
+ barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// Execute the provided, terrible vvadd
- barrier();
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
// verify
- verify(DATA_SIZE, results_data, verify_data);
+ verifyMT(DATA_SIZE, results_data, verify_data);
// reset results from the first trial
if (coreid == 0)
for (i=0; i < DATA_SIZE; i++)
results_data[i] = input1_data[i];
}
- barrier();
+ barrier(nc);
// Execute your faster vvadd
- barrier();
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
#ifdef DEBUG
- printArray("results: ", DATA_SIZE, results_data);
- printArray("verify : ", DATA_SIZE, verify_data);
+ printArrayMT("results: ", DATA_SIZE, results_data);
+ printArrayMT("verify : ", DATA_SIZE, verify_data);
#endif
// verify
- verify(DATA_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(DATA_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// Execute the provided, terrible vvadd
- barrier();
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
// verify
- verify(DATA_SIZE, results_data, verify_data);
+ verifyMT(DATA_SIZE, results_data, verify_data);
// reset results from the first trial
if (coreid == 0)
for (i=0; i < DATA_SIZE; i++)
results_data[i] = input1_data[i];
}
- barrier();
+ barrier(nc);
// Execute your faster vvadd
- barrier();
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
#ifdef DEBUG
- printArray("results: ", DATA_SIZE, results_data);
- printArray("verify : ", DATA_SIZE, verify_data);
+ printArrayMT("results: ", DATA_SIZE, results_data);
+ printArrayMT("verify : ", DATA_SIZE, verify_data);
#endif
// verify
- verify(DATA_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(DATA_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// Execute the provided, terrible vvadd
- barrier();
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
// verify
- verify(DATA_SIZE, results_data, verify_data);
+ verifyMT(DATA_SIZE, results_data, verify_data);
// reset results from the first trial
if (coreid == 0)
for (i=0; i < DATA_SIZE; i++)
results_data[i] = input1_data[i];
}
- barrier();
+ barrier(nc);
// Execute your faster vvadd
- barrier();
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
#ifdef DEBUG
- printArray("results: ", DATA_SIZE, results_data);
- printArray("verify : ", DATA_SIZE, verify_data);
+ printArrayMT("results: ", DATA_SIZE, results_data);
+ printArrayMT("verify : ", DATA_SIZE, verify_data);
#endif
// verify
- verify(DATA_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(DATA_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// Execute the provided, terrible vvadd
- barrier();
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
// verify
- verify(DATA_SIZE, results_data, verify_data);
+ verifyMT(DATA_SIZE, results_data, verify_data);
// reset results from the first trial
if (coreid == 0)
for (i=0; i < DATA_SIZE; i++)
results_data[i] = input1_data[i];
}
- barrier();
+ barrier(nc);
// Execute your faster vvadd
- barrier();
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
#ifdef DEBUG
- printArray("results: ", DATA_SIZE, results_data);
- printArray("verify : ", DATA_SIZE, verify_data);
+ printArrayMT("results: ", DATA_SIZE, results_data);
+ printArrayMT("verify : ", DATA_SIZE, verify_data);
#endif
// verify
- verify(DATA_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(DATA_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
+++ /dev/null
-matmul.c
\ No newline at end of file
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// Execute the provided, terrible vvadd
- barrier();
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
// verify
- verify(DATA_SIZE, results_data, verify_data);
+ verifyMT(DATA_SIZE, results_data, verify_data);
// reset results from the first trial
if (coreid == 0)
for (i=0; i < DATA_SIZE; i++)
results_data[i] = input1_data[i];
}
- barrier();
+ barrier(nc);
// Execute your faster vvadd
- barrier();
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
#ifdef DEBUG
- printArray("results: ", DATA_SIZE, results_data);
- printArray("verify : ", DATA_SIZE, verify_data);
+ printArrayMT("results: ", DATA_SIZE, results_data);
+ printArrayMT("verify : ", DATA_SIZE, verify_data);
#endif
// verify
- verify(DATA_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(DATA_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
int i, j, k, n, m;
- //matmul_naive(32, input1_data, input2_data, results_data); barrier(): 957424 cycles, 29.2 cycles/iter, 3.6 CPI
- //matmul(32, input1_data, input2_data, results_data); barrier(): 340408 cycles, 10.3 cycles/iter, 1.8 CPI
+ //matmul_naive(32, input1_data, input2_data, results_data); barrier(nc): 957424 cycles, 29.2 cycles/iter, 3.6 CPI
+ //matmul(32, input1_data, input2_data, results_data); barrier(nc): 340408 cycles, 10.3 cycles/iter, 1.8 CPI
for (n = 0; n < lda; n += 1) {
for (m = 0; m < lda; m += 1) {
bTranspose[lda*n + m] = B[lda*m + n];
}
}
- barrier();
+ barrier(ncores);
for ( j = coreid; j < lda; j += 2*ncores ) {
for ( i = 0; i < lda; i += 1 ){
c1 += A[j * lda + k] * bTranspose[i*lda + k];
c2 += A[(j+2) * lda + k] * bTranspose[i*lda + k];
- //barrier();
+ //barrier(nc);
}
C[i + j * lda] = c1;
C[i + (j+2) * lda] = c2;
- barrier();
+ barrier(ncores);
}
- //barrier();
+ //barrier(nc);
}
- //matmul_naive(32, input1_data, input2_data, results_data); barrier(): 983609 cycles, 30.0 cycles/iter, 3.7 CPI
- //matmul(32, input1_data, input2_data, results_data); barrier(): 389942 cycles, 11.9 cycles/iter, 2.5 CPI
+ //matmul_naive(32, input1_data, input2_data, results_data); barrier(nc): 983609 cycles, 30.0 cycles/iter, 3.7 CPI
+ //matmul(32, input1_data, input2_data, results_data); barrier(nc): 389942 cycles, 11.9 cycles/iter, 2.5 CPI
/*
for ( j = coreid; j < lda; j += 2*ncores ) {
c1 += A[j * lda + k] * B[k*lda + i];
c2 += A[(j+2) * lda + k] * B[k*lda + i];
- //barrier();
+ //barrier(nc);
}
C[i + j * lda] = c1;
C[i + (j+2) * lda] = c2;
- barrier();
+ barrier(nc);
}
- //barrier();
+ //barrier(nc);
}
*/
- // matmul_naive(32, input1_data, input2_data, results_data); barrier(): 973781 cycles, 29.7 cycles/iter, 3.7 CPI
- // matmul(32, input1_data, input2_data, results_data); barrier(): 461066 cycles, 14.0 cycles/iter, 3.5 CPI
+ // matmul_naive(32, input1_data, input2_data, results_data); barrier(nc): 973781 cycles, 29.7 cycles/iter, 3.7 CPI
+ // matmul(32, input1_data, input2_data, results_data); barrier(nc): 461066 cycles, 14.0 cycles/iter, 3.5 CPI
// for ( k = 0; k < lda; k += 1 ) {
// for ( j = coreid; j < lda; j += 2*ncores ) {
// for ( i = 0; i < lda; i += 1 ){
// C[i + j * lda] += A[j * lda + k] * B[k*lda + i];
// C[i + (j+2) * lda] += A[(j+2) * lda + k] * B[k*lda + i];
- // //barrier();
+ // //barrier(nc);
// }
- // barrier();
+ // barrier(nc);
// }
- // //barrier();
+ // //barrier(nc);
// }
- // matmul_naive(32, input1_data, input2_data, results_data); barrier(): 965136 cycles, 29.4 cycles/iter, 3.7 CPI
- // matmul(32, input1_data, input2_data, results_data); barrier(): 513779 cycles, 15.6 cycles/iter, 3.2 CPI
+ // matmul_naive(32, input1_data, input2_data, results_data); barrier(nc): 965136 cycles, 29.4 cycles/iter, 3.7 CPI
+ // matmul(32, input1_data, input2_data, results_data); barrier(nc): 513779 cycles, 15.6 cycles/iter, 3.2 CPI
// for ( j = coreid; j < lda; j += 2*ncores ) {
// for ( i = 0; i < lda; i += 1 ){
// C[i + j * lda] += A[j * lda + k] * B[k*lda + i];
// C[i + (j+2) * lda] += A[(j+2) * lda + k] * B[k*lda + i];
- // //barrier();
+ // //barrier(nc);
// }
- // barrier();
+ // barrier(nc);
// }
- // //barrier();
+ // //barrier(nc);
//}
- // matmul_naive(32, input1_data, input2_data, results_data); barrier(): 937892 cycles, 28.6 cycles/iter, 3.6 CPI
- // matmul(32, input1_data, input2_data, results_data); barrier(): 576478 cycles, 17.5 cycles/iter, 3.5 CPI
+ // matmul_naive(32, input1_data, input2_data, results_data); barrier(nc): 937892 cycles, 28.6 cycles/iter, 3.6 CPI
+ // matmul(32, input1_data, input2_data, results_data); barrier(nc): 576478 cycles, 17.5 cycles/iter, 3.5 CPI
// for ( i = 0; i < lda; i += 1 ){
// for ( j = coreid; j < lda; j += 2*ncores ) {
// C[i + j * lda] += A[j * lda + k] * B[k*lda + i];
// C[i + (j+2) * lda] += A[(j+2) * lda + k] * B[k*lda + i];
- // //barrier();
+ // //barrier(nc);
// }
- // barrier();
+ // barrier(nc);
// }
- // //barrier();
+ // //barrier(nc);
// }
//for ( i = coreid; i < lda; i += ncores ){
// for ( k = coreid; k < lda; k += ncores ) {
// C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
// }
- //barrier();
+ //barrier(nc);
// }
//}
}
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
int i, j, k, n, m, c1, c2;
- //matmul_naive(32, input1_data, input2_data, results_data); barrier(): 952596 cycles, 29.0 cycles/iter, 3.6 CPI
- //matmul(32, input1_data, input2_data, results_data); barrier(): 570135 cycles, 17.3 cycles/iter, 3.4 CPI
+ //matmul_naive(32, input1_data, input2_data, results_data); barrier(nc): 952596 cycles, 29.0 cycles/iter, 3.6 CPI
+ //matmul(32, input1_data, input2_data, results_data); barrier(nc): 570135 cycles, 17.3 cycles/iter, 3.4 CPI
for ( j = coreid; j < lda; j += 2*ncores ) {
for ( i = 0; i < lda; i += 1 ){
c1 += A[j * lda + k] * B[k*lda + i];
c2 += A[(j+2) * lda + k] * B[k*lda + i];
- //barrier();
+ //barrier(nc);
}
C[i + j * lda] = c1;
C[i + (j+2) * lda] = c2;
- barrier();
+ barrier(nc);
}
- //barrier();
+ //barrier(nc);
}
}
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
for (i = coreid; i < n; i += 2*ncores) {
x[i] = x[i] + y[i];
x[i+2] = x[i+2] + y[i+2];
- //barrier();
+ //barrier(nc);
}
- barrier(); //adding a barrier so there aren't any OOB errors due to faster threads
+ barrier(ncores); //adding a barrier so there aren't any OOB errors due to faster threads
}
// Execute the provided, terrible vvadd
- barrier();
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
// verify
- verify(DATA_SIZE, results_data, verify_data);
+ verifyMT(DATA_SIZE, results_data, verify_data);
// reset results from the first trial
if (coreid == 0)
for (i=0; i < DATA_SIZE; i++)
results_data[i] = input1_data[i];
}
- barrier();
+ barrier(nc);
// Execute your faster vvadd
- barrier();
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
#ifdef DEBUG
- printArray("results: ", DATA_SIZE, results_data);
- printArray("verify : ", DATA_SIZE, verify_data);
+ printArrayMT("results: ", DATA_SIZE, results_data);
+ printArrayMT("verify : ", DATA_SIZE, verify_data);
#endif
// verify
- verify(DATA_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(DATA_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// Execute the provided, terrible vvadd
- barrier();
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
// verify
- verify(DATA_SIZE, results_data, verify_data);
+ verifyMT(DATA_SIZE, results_data, verify_data);
// reset results from the first trial
if (coreid == 0)
for (i=0; i < DATA_SIZE; i++)
results_data[i] = input1_data[i];
}
- barrier();
+ barrier(nc);
// Execute your faster vvadd
- barrier();
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
#ifdef DEBUG
- printArray("results: ", DATA_SIZE, results_data);
- printArray("verify : ", DATA_SIZE, verify_data);
+ printArrayMT("results: ", DATA_SIZE, results_data);
+ printArrayMT("verify : ", DATA_SIZE, verify_data);
#endif
// verify
- verify(DATA_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(DATA_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
/*
// Execute the provided, naive matmul
- barrier();
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
// clear results from the first trial
size_t i;
if (coreid == 0)
for (i=0; i < ARRAY_SIZE; i++)
results_data[i] = 0;
- barrier();
+ barrier(nc);
*/
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// Execute the provided, terrible vvadd
- barrier();
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
// verify
- verify(DATA_SIZE, results_data, verify_data);
+ verifyMT(DATA_SIZE, results_data, verify_data);
// reset results from the first trial
if (coreid == 0)
for (i=0; i < DATA_SIZE; i++)
results_data[i] = input1_data[i];
}
- barrier();
+ barrier(nc);
// Execute your faster vvadd
- barrier();
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
#ifdef DEBUG
- printArray("results: ", DATA_SIZE, results_data);
- printArray("verify : ", DATA_SIZE, verify_data);
+ printArrayMT("results: ", DATA_SIZE, results_data);
+ printArrayMT("verify : ", DATA_SIZE, verify_data);
#endif
// verify
- verify(DATA_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(DATA_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------\r
// Helper functions\r
\r
-void printArray( char name[], int n, data_t arr[] )\r
+void printArrayMT( char name[], int n, data_t arr[] )\r
{\r
int i;\r
if (coreid != 0)\r
printf( "\n" );\r
}\r
\r
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)\r
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)\r
{\r
if (coreid != 0)\r
return;\r
\r
\r
//// Execute the provided, naive matmul\r
- //barrier();\r
- //stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());\r
+ //barrier(nc);\r
+ //stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));\r
\r
//\r
//// verify\r
- //verify(ARRAY_SIZE, results_data, verify_data);\r
+ //verifyMT(ARRAY_SIZE, results_data, verify_data);\r
//\r
//// clear results from the first trial\r
//size_t i;\r
//if (coreid == 0) \r
// for (i=0; i < ARRAY_SIZE; i++)\r
// results_data[i] = 0;\r
- //barrier();\r
+ //barrier(nc);\r
\r
\r
// Execute your faster matmul\r
- barrier();\r
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());\r
+ barrier(nc);\r
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));\r
\r
#ifdef DEBUG\r
- printArray("results:", ARRAY_SIZE, results_data);\r
- printArray("verify :", ARRAY_SIZE, verify_data);\r
+ printArrayMT("results:", ARRAY_SIZE, results_data);\r
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);\r
#endif\r
\r
// verify\r
- verify(ARRAY_SIZE, results_data, verify_data);\r
- barrier();\r
+ verifyMT(ARRAY_SIZE, results_data, verify_data);\r
+ barrier(nc);\r
\r
exit(0);\r
}\r
//--------------------------------------------------------------------------\r
// Helper functions\r
\r
-void printArray( char name[], int n, data_t arr[] )\r
+void printArrayMT( char name[], int n, data_t arr[] )\r
{\r
int i;\r
if (coreid != 0)\r
printf( "\n" );\r
}\r
\r
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)\r
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)\r
{\r
if (coreid != 0)\r
return;\r
\r
\r
//// Execute the provided, naive matmul\r
- //barrier();\r
- //stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());\r
+ //barrier(nc);\r
+ //stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));\r
\r
//\r
//// verify\r
- //verify(ARRAY_SIZE, results_data, verify_data);\r
+ //verifyMT(ARRAY_SIZE, results_data, verify_data);\r
//\r
//// clear results from the first trial\r
//size_t i;\r
//if (coreid == 0) \r
// for (i=0; i < ARRAY_SIZE; i++)\r
// results_data[i] = 0;\r
- //barrier();\r
+ //barrier(nc);\r
\r
\r
// Execute your faster matmul\r
- barrier();\r
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());\r
+ barrier(nc);\r
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));\r
\r
#ifdef DEBUG\r
- printArray("results:", ARRAY_SIZE, results_data);\r
- printArray("verify :", ARRAY_SIZE, verify_data);\r
+ printArrayMT("results:", ARRAY_SIZE, results_data);\r
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);\r
#endif\r
\r
// verify\r
- verify(ARRAY_SIZE, results_data, verify_data);\r
- barrier();\r
+ verifyMT(ARRAY_SIZE, results_data, verify_data);\r
+ barrier(nc);\r
\r
exit(0);\r
}\r
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// Execute the provided, terrible vvadd
- barrier();
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
// verify
- verify(DATA_SIZE, results_data, verify_data);
+ verifyMT(DATA_SIZE, results_data, verify_data);
// reset results from the first trial
if (coreid == 0)
for (i=0; i < DATA_SIZE; i++)
results_data[i] = input1_data[i];
}
- barrier();
+ barrier(nc);
// Execute your faster vvadd
- barrier();
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
#ifdef DEBUG
- printArray("results: ", DATA_SIZE, results_data);
- printArray("verify : ", DATA_SIZE, verify_data);
+ printArrayMT("results: ", DATA_SIZE, results_data);
+ printArrayMT("verify : ", DATA_SIZE, verify_data);
#endif
// verify
- verify(DATA_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(DATA_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
//// Execute the provided, naive matmul
- //barrier();
- //stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ //barrier(nc);
+ //stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//// verify
- //verify(ARRAY_SIZE, results_data, verify_data);
+ //verifyMT(ARRAY_SIZE, results_data, verify_data);
//
//// clear results from the first trial
//size_t i;
//if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
- //barrier();
+ //barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
//// Execute the provided, naive matmul
- //barrier();
- //stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ //barrier(nc);
+ //stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//// verify
- //verify(ARRAY_SIZE, results_data, verify_data);
+ //verifyMT(ARRAY_SIZE, results_data, verify_data);
//
//// clear results from the first trial
//size_t i;
//if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
- //barrier();
+ //barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// Execute the provided, terrible vvadd
- barrier();
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
// verify
- verify(DATA_SIZE, results_data, verify_data);
+ verifyMT(DATA_SIZE, results_data, verify_data);
// reset results from the first trial
if (coreid == 0)
for (i=0; i < DATA_SIZE; i++)
results_data[i] = input1_data[i];
}
- barrier();
+ barrier(nc);
// Execute your faster vvadd
- barrier();
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
#ifdef DEBUG
- printArray("results: ", DATA_SIZE, results_data);
- printArray("verify : ", DATA_SIZE, verify_data);
+ printArrayMT("results: ", DATA_SIZE, results_data);
+ printArrayMT("verify : ", DATA_SIZE, verify_data);
#endif
// verify
- verify(DATA_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(DATA_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
// verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
// clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
// verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
// clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// Execute the provided, terrible vvadd
- barrier();
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
// verify
- verify(DATA_SIZE, results_data, verify_data);
+ verifyMT(DATA_SIZE, results_data, verify_data);
// reset results from the first trial
if (coreid == 0)
for (i=0; i < DATA_SIZE; i++)
results_data[i] = input1_data[i];
}
- barrier();
+ barrier(nc);
// Execute your faster vvadd
- barrier();
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
#ifdef DEBUG
- printArray("results: ", DATA_SIZE, results_data);
- printArray("verify : ", DATA_SIZE, verify_data);
+ printArrayMT("results: ", DATA_SIZE, results_data);
+ printArrayMT("verify : ", DATA_SIZE, verify_data);
#endif
// verify
- verify(DATA_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(DATA_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
}
}
- barrier();
+ barrier(ncores);
curhalf++;
curhalf %= ncores;
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
}
}
- barrier();
+ barrier(nc);
curhalf++;
curhalf %= ncores;
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// Execute the provided, terrible vvadd
- barrier();
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
// verify
- verify(DATA_SIZE, results_data, verify_data);
+ verifyMT(DATA_SIZE, results_data, verify_data);
// reset results from the first trial
if (coreid == 0)
for (i=0; i < DATA_SIZE; i++)
results_data[i] = input1_data[i];
}
- barrier();
+ barrier(nc);
// Execute your faster vvadd
- barrier();
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
#ifdef DEBUG
- printArray("results: ", DATA_SIZE, results_data);
- printArray("verify : ", DATA_SIZE, verify_data);
+ printArrayMT("results: ", DATA_SIZE, results_data);
+ printArrayMT("verify : ", DATA_SIZE, verify_data);
#endif
// verify
- verify(DATA_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(DATA_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// /* // Execute the provided, naive matmul */
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
}
}
}
- /* barrier(); */
+ /* barrier(nc); */
/* kk_start= (coreid == 1 ? 0 : LDA/2); */
/* kk_end = (coreid == 1 ? LDA/2 : LDA); */
}
- //barrier();
+ //barrier(nc);
for (jj = start; jj < end; jj += BLOCK_J) {
int kk_start= (coreid != 0 ? 0 : LDA/2), kk_end = (coreid != 0 ? LDA/2 : LDA);
for (kk = kk_start; kk < kk_end; kk += BLOCK_K) {
// /* // Execute the provided, naive matmul */
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// Execute the provided, terrible vvadd
- barrier();
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
// verify
- verify(DATA_SIZE, results_data, verify_data);
+ verifyMT(DATA_SIZE, results_data, verify_data);
// reset results from the first trial
if (coreid == 0)
for (i=0; i < DATA_SIZE; i++)
results_data[i] = input1_data[i];
}
- barrier();
+ barrier(nc);
// Execute your faster vvadd
- barrier();
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
#ifdef DEBUG
- printArray("results: ", DATA_SIZE, results_data);
- printArray("verify : ", DATA_SIZE, verify_data);
+ printArrayMT("results: ", DATA_SIZE, results_data);
+ printArrayMT("verify : ", DATA_SIZE, verify_data);
#endif
// verify
- verify(DATA_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(DATA_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
pos_B += (lda*step_k) ;
pos_A += step_k;
}
- //barrier();
+ //barrier(nc);
C[(pos_C + 0)] = temp10;
C[(pos_C + 1)] = temp11;
C[(pos_C + 5)] = temp15;
C[(pos_C + 6)] = temp16;
C[(pos_C + 7)] = temp17;
- //barrier();
+ //barrier(nc);
pos_C = i + j*lda;
//pos_C -= lda;
C[(pos_C + 5)] = temp05;
C[(pos_C + 6)] = temp06;
C[(pos_C + 7)] = temp07;
- //barrier();
+ //barrier(nc);
//pos_C += step_j * lda;
}
- //barrier();
+ //barrier(nc);
}
- //barrier();
+ //barrier(nc);
}
- //barrier();
+ //barrier(nc);
}
}
/*
// Execute the provided, naive matmul
- barrier();
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
// clear results from the first trial
size_t i;
if (coreid == 0)
for (i=0; i < ARRAY_SIZE; i++)
results_data[i] = 0;
- barrier();
+ barrier(nc);
*/
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
//printf("input1_data");
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
pos_B += (lda*step_k) ;
pos_A += step_k;
}
- //barrier();
+ //barrier(nc);
C[(pos_C + 0)] = temp10;
C[(pos_C + 1)] = temp11;
C[(pos_C + 5)] = temp15;
C[(pos_C + 6)] = temp16;
C[(pos_C + 7)] = temp17;
- //barrier();
+ //barrier(nc);
pos_C = i + j*lda;
//pos_C -= lda;
C[(pos_C + 5)] = temp05;
C[(pos_C + 6)] = temp06;
C[(pos_C + 7)] = temp07;
- //barrier();
+ //barrier(nc);
//pos_C += step_j * lda;
}
- //barrier();
+ //barrier(nc);
}
- //barrier();
+ //barrier(nc);
}
- //barrier();
+ //barrier(nc);
}
}
/*
// Execute the provided, naive matmul
- barrier();
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
// clear results from the first trial
size_t i;
if (coreid == 0)
for (i=0; i < ARRAY_SIZE; i++)
results_data[i] = 0;
- barrier();
+ barrier(nc);
*/
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
//printf("input1_data");
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// Execute the provided, terrible vvadd
- barrier();
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
// verify
- verify(DATA_SIZE, results_data, verify_data);
+ verifyMT(DATA_SIZE, results_data, verify_data);
// reset results from the first trial
if (coreid == 0)
for (i=0; i < DATA_SIZE; i++)
results_data[i] = input1_data[i];
}
- barrier();
+ barrier(nc);
// Execute your faster vvadd
- barrier();
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
#ifdef DEBUG
- printArray("results: ", DATA_SIZE, results_data);
- printArray("verify : ", DATA_SIZE, verify_data);
+ printArrayMT("results: ", DATA_SIZE, results_data);
+ printArrayMT("verify : ", DATA_SIZE, verify_data);
#endif
// verify
- verify(DATA_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(DATA_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// Execute the provided, terrible vvadd
- barrier();
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
// verify
- verify(DATA_SIZE, results_data, verify_data);
+ verifyMT(DATA_SIZE, results_data, verify_data);
// reset results from the first trial
if (coreid == 0)
for (i=0; i < DATA_SIZE; i++)
results_data[i] = input1_data[i];
}
- barrier();
+ barrier(nc);
// Execute your faster vvadd
- barrier();
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
#ifdef DEBUG
- printArray("results: ", DATA_SIZE, results_data);
- printArray("verify : ", DATA_SIZE, verify_data);
+ printArrayMT("results: ", DATA_SIZE, results_data);
+ printArrayMT("verify : ", DATA_SIZE, verify_data);
#endif
// verify
- verify(DATA_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(DATA_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// Execute the provided, terrible vvadd
- barrier();
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
// verify
- verify(DATA_SIZE, results_data, verify_data);
+ verifyMT(DATA_SIZE, results_data, verify_data);
// reset results from the first trial
if (coreid == 0)
for (i=0; i < DATA_SIZE; i++)
results_data[i] = input1_data[i];
}
- barrier();
+ barrier(nc);
// Execute your faster vvadd
- barrier();
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
#ifdef DEBUG
- printArray("results: ", DATA_SIZE, results_data);
- printArray("verify : ", DATA_SIZE, verify_data);
+ printArrayMT("results: ", DATA_SIZE, results_data);
+ printArrayMT("verify : ", DATA_SIZE, verify_data);
#endif
// verify
- verify(DATA_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(DATA_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------\r
// Helper functions\r
\r
-void printArray( char name[], int n, data_t arr[] )\r
+void printArrayMT( char name[], int n, data_t arr[] )\r
{\r
int i;\r
if (coreid != 0)\r
printf( "\n" );\r
}\r
\r
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)\r
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)\r
{\r
if (coreid != 0)\r
return;\r
\r
\r
// // Execute the provided, naive matmul\r
-// barrier();\r
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());\r
+// barrier(nc);\r
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));\r
// \r
// \r
// // verify\r
-// verify(ARRAY_SIZE, results_data, verify_data);\r
+// verifyMT(ARRAY_SIZE, results_data, verify_data);\r
// \r
// // clear results from the first trial\r
// size_t i;\r
// if (coreid == 0)\r
// for (i=0; i < ARRAY_SIZE; i++)\r
// results_data[i] = 0;\r
-// barrier();\r
+// barrier(nc);\r
\r
\r
// Execute your faster matmul\r
- barrier();\r
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());\r
+ barrier(nc);\r
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));\r
\r
#ifdef DEBUG\r
- printArray("results:", ARRAY_SIZE, results_data);\r
- printArray("verify :", ARRAY_SIZE, verify_data);\r
+ printArrayMT("results:", ARRAY_SIZE, results_data);\r
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);\r
#endif\r
\r
// verify\r
- verify(ARRAY_SIZE, results_data, verify_data);\r
- barrier();\r
+ verifyMT(ARRAY_SIZE, results_data, verify_data);\r
+ barrier(nc);\r
\r
exit(0);\r
}\r
//--------------------------------------------------------------------------\r
// Helper functions\r
\r
-void printArray( char name[], int n, data_t arr[] )\r
+void printArrayMT( char name[], int n, data_t arr[] )\r
{\r
int i;\r
if (coreid != 0)\r
printf( "\n" );\r
}\r
\r
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)\r
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)\r
{\r
if (coreid != 0)\r
return;\r
\r
\r
// // Execute the provided, naive matmul\r
-// barrier();\r
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());\r
+// barrier(nc);\r
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));\r
// \r
// \r
// // verify\r
-// verify(ARRAY_SIZE, results_data, verify_data);\r
+// verifyMT(ARRAY_SIZE, results_data, verify_data);\r
// \r
// // clear results from the first trial\r
// size_t i;\r
// if (coreid == 0)\r
// for (i=0; i < ARRAY_SIZE; i++)\r
// results_data[i] = 0;\r
-// barrier();\r
+// barrier(nc);\r
\r
\r
// Execute your faster matmul\r
- barrier();\r
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());\r
+ barrier(nc);\r
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));\r
\r
#ifdef DEBUG\r
- printArray("results:", ARRAY_SIZE, results_data);\r
- printArray("verify :", ARRAY_SIZE, verify_data);\r
+ printArrayMT("results:", ARRAY_SIZE, results_data);\r
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);\r
#endif\r
\r
// verify\r
- verify(ARRAY_SIZE, results_data, verify_data);\r
- barrier();\r
+ verifyMT(ARRAY_SIZE, results_data, verify_data);\r
+ barrier(nc);\r
\r
exit(0);\r
}\r
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// Execute the provided, terrible vvadd
- barrier();
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
// verify
- verify(DATA_SIZE, results_data, verify_data);
+ verifyMT(DATA_SIZE, results_data, verify_data);
// reset results from the first trial
if (coreid == 0)
for (i=0; i < DATA_SIZE; i++)
results_data[i] = input1_data[i];
}
- barrier();
+ barrier(nc);
// Execute your faster vvadd
- barrier();
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
#ifdef DEBUG
- printArray("results: ", DATA_SIZE, results_data);
- printArray("verify : ", DATA_SIZE, verify_data);
+ printArrayMT("results: ", DATA_SIZE, results_data);
+ printArrayMT("verify : ", DATA_SIZE, verify_data);
#endif
// verify
- verify(DATA_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(DATA_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// Execute the provided, naive matmul
- barrier();
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
// clear results from the first trial
size_t i;
if (coreid == 0)
for (i=0; i < ARRAY_SIZE; i++)
results_data[i] = 0;
- barrier();
+ barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// Execute the provided, terrible vvadd
- barrier();
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
// verify
- verify(DATA_SIZE, results_data, verify_data);
+ verifyMT(DATA_SIZE, results_data, verify_data);
// reset results from the first trial
if (coreid == 0)
for (i=0; i < DATA_SIZE; i++)
results_data[i] = input1_data[i];
}
- barrier();
+ barrier(nc);
// Execute your faster vvadd
- barrier();
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
#ifdef DEBUG
- printArray("results: ", DATA_SIZE, results_data);
- printArray("verify : ", DATA_SIZE, verify_data);
+ printArrayMT("results: ", DATA_SIZE, results_data);
+ printArrayMT("verify : ", DATA_SIZE, verify_data);
#endif
// verify
- verify(DATA_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(DATA_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
data_t temp2_3=0;
data_t temp3_3=0;
data_t tempB_3=0;
- barrier();
+ barrier(nc);
if (coreid!=ncores-1){
for (i=space*coreid;i<max/4*4;i+=4)
{
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// Execute the provided, terrible vvadd
- barrier();
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
// verify
- verify(DATA_SIZE, results_data, verify_data);
+ verifyMT(DATA_SIZE, results_data, verify_data);
// reset results from the first trial
if (coreid == 0)
for (i=0; i < DATA_SIZE; i++)
results_data[i] = input1_data[i];
}
- barrier();
+ barrier(nc);
// Execute your faster vvadd
- barrier();
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
#ifdef DEBUG
- printArray("results: ", DATA_SIZE, results_data);
- printArray("verify : ", DATA_SIZE, verify_data);
+ printArrayMT("results: ", DATA_SIZE, results_data);
+ printArrayMT("verify : ", DATA_SIZE, verify_data);
#endif
// verify
- verify(DATA_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(DATA_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------\r
// Helper functions\r
\r
-void printArray( char name[], int n, data_t arr[] )\r
+void printArrayMT( char name[], int n, data_t arr[] )\r
{\r
int i;\r
if (coreid != 0)\r
printf( "\n" );\r
}\r
\r
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)\r
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)\r
{\r
if (coreid != 0)\r
return;\r
\r
\r
// Execute the provided, terrible vvadd\r
- barrier();\r
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier());\r
+ barrier(nc);\r
+ stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));\r
\r
\r
// verify\r
- verify(DATA_SIZE, results_data, verify_data);\r
+ verifyMT(DATA_SIZE, results_data, verify_data);\r
\r
// reset results from the first trial\r
if (coreid == 0) \r
for (i=0; i < DATA_SIZE; i++)\r
results_data[i] = input1_data[i];\r
}\r
- barrier();\r
+ barrier(nc);\r
\r
\r
// Execute your faster vvadd\r
- barrier();\r
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier());\r
+ barrier(nc);\r
+ stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));\r
\r
#ifdef DEBUG\r
- printArray("results: ", DATA_SIZE, results_data);\r
- printArray("verify : ", DATA_SIZE, verify_data);\r
+ printArrayMT("results: ", DATA_SIZE, results_data);\r
+ printArrayMT("verify : ", DATA_SIZE, verify_data);\r
#endif\r
\r
// verify\r
- verify(DATA_SIZE, results_data, verify_data);\r
- barrier();\r
+ verifyMT(DATA_SIZE, results_data, verify_data);\r
+ barrier(nc);\r
\r
exit(0);\r
}\r
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// // Execute the provided, naive matmul
-// barrier();
-// //stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// //stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// //verify(ARRAY_SIZE, results_data, verify_data);
+// //verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// // Execute the provided, naive matmul
-// barrier();
-// //stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// //stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// //verify(ARRAY_SIZE, results_data, verify_data);
+// //verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
//
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// Execute the provided, terrible vvadd
- barrier();
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
// verify
- verify(DATA_SIZE, results_data, verify_data);
+ verifyMT(DATA_SIZE, results_data, verify_data);
// reset results from the first trial
if (coreid == 0)
for (i=0; i < DATA_SIZE; i++)
results_data[i] = input1_data[i];
}
- barrier();
+ barrier(nc);
// Execute your faster vvadd
- barrier();
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
#ifdef DEBUG
- printArray("results: ", DATA_SIZE, results_data);
- printArray("verify : ", DATA_SIZE, verify_data);
+ printArrayMT("results: ", DATA_SIZE, results_data);
+ printArrayMT("verify : ", DATA_SIZE, verify_data);
#endif
// verify
- verify(DATA_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(DATA_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// // Execute the provided, naive matmul
-// barrier();
-// //stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// //stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// //verify(ARRAY_SIZE, results_data, verify_data);
+// //verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// // Execute the provided, naive matmul
-// barrier();
-// //stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// //stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// //verify(ARRAY_SIZE, results_data, verify_data);
+// //verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// Execute the provided, terrible vvadd
- barrier();
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
// verify
- verify(DATA_SIZE, results_data, verify_data);
+ verifyMT(DATA_SIZE, results_data, verify_data);
// reset results from the first trial
if (coreid == 0)
for (i=0; i < DATA_SIZE; i++)
results_data[i] = input1_data[i];
}
- barrier();
+ barrier(nc);
// Execute your faster vvadd
- barrier();
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
#ifdef DEBUG
- printArray("results: ", DATA_SIZE, results_data);
- printArray("verify : ", DATA_SIZE, verify_data);
+ printArrayMT("results: ", DATA_SIZE, results_data);
+ printArrayMT("verify : ", DATA_SIZE, verify_data);
#endif
// verify
- verify(DATA_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(DATA_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
tB[(j + 1)*lda + i + 1] = B[(i + 1)*lda + j + 1];
}
}
- barrier();
+ barrier(ncores);
// compute C[j*n + i] += A[j*n + k] + Btranspose[i*n + k]
for ( j2 = 0; j2 < lda; j2 += jBLOCK )
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
tB[(j + 1)*lda + i + 1] = B[(i + 1)*lda + j + 1];
}
}
- barrier();
+ barrier(nc);
// compute C[j*n + i] += A[j*n + k] + Btranspose[i*n + k]
for ( j2 = 0; j2 < lda; j2 += jBLOCK )
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// Execute the provided, terrible vvadd
- barrier();
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
// verify
- verify(DATA_SIZE, results_data, verify_data);
+ verifyMT(DATA_SIZE, results_data, verify_data);
// reset results from the first trial
if (coreid == 0)
for (i=0; i < DATA_SIZE; i++)
results_data[i] = input1_data[i];
}
- barrier();
+ barrier(nc);
// Execute your faster vvadd
- barrier();
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
#ifdef DEBUG
- printArray("results: ", DATA_SIZE, results_data);
- printArray("verify : ", DATA_SIZE, verify_data);
+ printArrayMT("results: ", DATA_SIZE, results_data);
+ printArrayMT("verify : ", DATA_SIZE, verify_data);
#endif
// verify
- verify(DATA_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(DATA_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// Execute the provided, terrible vvadd
- barrier();
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
// verify
- verify(DATA_SIZE, results_data, verify_data);
+ verifyMT(DATA_SIZE, results_data, verify_data);
// reset results from the first trial
if (coreid == 0)
for (i=0; i < DATA_SIZE; i++)
results_data[i] = input1_data[i];
}
- barrier();
+ barrier(nc);
// Execute your faster vvadd
- barrier();
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
#ifdef DEBUG
- printArray("results: ", DATA_SIZE, results_data);
- printArray("verify : ", DATA_SIZE, verify_data);
+ printArrayMT("results: ", DATA_SIZE, results_data);
+ printArrayMT("verify : ", DATA_SIZE, verify_data);
#endif
// verify
- verify(DATA_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(DATA_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
pos_B += (lda*step_k) ;
pos_A += step_k;
}
- //barrier();
+ //barrier(nc);
C[(pos_C + 0)] = temp10;
C[(pos_C + 1)] = temp11;
C[(pos_C + 5)] = temp15;
C[(pos_C + 6)] = temp16;
C[(pos_C + 7)] = temp17;
- //barrier();
+ //barrier(nc);
pos_C = i + j*lda;
//pos_C -= lda;
C[(pos_C + 5)] = temp05;
C[(pos_C + 6)] = temp06;
C[(pos_C + 7)] = temp07;
- //barrier();
+ //barrier(nc);
//pos_C += step_j * lda;
}
- //barrier();
+ //barrier(nc);
}
- //barrier();
+ //barrier(nc);
}
- //barrier();
+ //barrier(nc);
}
}
/*
// Execute the provided, naive matmul
- barrier();
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
// clear results from the first trial
size_t i;
if (coreid == 0)
for (i=0; i < ARRAY_SIZE; i++)
results_data[i] = 0;
- barrier();
+ barrier(nc);
*/
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
//printf("input1_data");
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
pos_B += (lda*step_k) ;
pos_A += step_k;
}
- //barrier();
+ //barrier(nc);
C[(pos_C + 0)] = temp10;
C[(pos_C + 1)] = temp11;
C[(pos_C + 5)] = temp15;
C[(pos_C + 6)] = temp16;
C[(pos_C + 7)] = temp17;
- //barrier();
+ //barrier(nc);
pos_C = i + j*lda;
//pos_C -= lda;
C[(pos_C + 5)] = temp05;
C[(pos_C + 6)] = temp06;
C[(pos_C + 7)] = temp07;
- //barrier();
+ //barrier(nc);
//pos_C += step_j * lda;
}
- //barrier();
+ //barrier(nc);
}
- //barrier();
+ //barrier(nc);
}
- //barrier();
+ //barrier(nc);
}
}
/*
// Execute the provided, naive matmul
- barrier();
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
// clear results from the first trial
size_t i;
if (coreid == 0)
for (i=0; i < ARRAY_SIZE; i++)
results_data[i] = 0;
- barrier();
+ barrier(nc);
*/
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
//printf("input1_data");
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// Execute the provided, terrible vvadd
- barrier();
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
// verify
- verify(DATA_SIZE, results_data, verify_data);
+ verifyMT(DATA_SIZE, results_data, verify_data);
// reset results from the first trial
if (coreid == 0)
for (i=0; i < DATA_SIZE; i++)
results_data[i] = input1_data[i];
}
- barrier();
+ barrier(nc);
// Execute your faster vvadd
- barrier();
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
#ifdef DEBUG
- printArray("results: ", DATA_SIZE, results_data);
- printArray("verify : ", DATA_SIZE, verify_data);
+ printArrayMT("results: ", DATA_SIZE, results_data);
+ printArrayMT("verify : ", DATA_SIZE, verify_data);
#endif
// verify
- verify(DATA_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(DATA_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// Execute the provided, naive matmul
- barrier();
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
// clear results from the first trial
size_t i;
if (coreid == 0)
for (i=0; i < ARRAY_SIZE; i++)
results_data[i] = 0;
- barrier();
+ barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// Execute the provided, naive matmul
- barrier();
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
// clear results from the first trial
size_t i;
if (coreid == 0)
for (i=0; i < ARRAY_SIZE; i++)
results_data[i] = 0;
- barrier();
+ barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
// Execute the provided, terrible vvadd
- barrier();
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
// verify
- verify(DATA_SIZE, results_data, verify_data);
+ verifyMT(DATA_SIZE, results_data, verify_data);
// reset results from the first trial
if (coreid == 0)
for (i=0; i < DATA_SIZE; i++)
results_data[i] = input1_data[i];
}
- barrier();
+ barrier(nc);
// Execute your faster vvadd
- barrier();
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
#ifdef DEBUG
- printArray("results: ", DATA_SIZE, results_data);
- printArray("verify : ", DATA_SIZE, verify_data);
+ printArrayMT("results: ", DATA_SIZE, results_data);
+ printArrayMT("verify : ", DATA_SIZE, verify_data);
#endif
// verify
- verify(DATA_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(DATA_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}